Search code examples
solrsolr4dataimporthandler

Solr delta-import erases index


I'm having trouble with Solr delta-import from MySQL database. I am able to do a full import no problem. When I try to do delta-import, it imports the changed records (as expected), but wipes out the rest of the index, so that only the updated records are in the index. There are no errors in the log. Am I missing something in my configuration? Running Solr 5.4 on Ubuntu server and using the admin UI.

<dataConfig>
    <dataSource driver="com.mysql.jdbc.Driver" url="jdbc:mysql://localhost/ibnet" user="xxxx" password="xxxxx" />
    <document>
    <entity name="profile" pk="profile.id" query="
        SELECT 
            profile.id AS id,
            profile.profile_status AS profile_status,
            //
            // Other fields
            //
            linkedProfile.org_name AS linked_org_name,
            linkedProfile.org_city AS linked_org_city,
            linkedProfile.org_st_prov_reg AS linked_org_st_prov_reg,
            linkedProfile.org_country AS linked_org_country
        FROM profile AS profile
        LEFT JOIN profile AS linkedProfile ON linkedProfile.id = profile.linked_id" 
        deltaImportQuery="
            SELECT 
                profile.id AS id,
                profile.profile_status AS profile_status,
                //
                // Other fields
                //
                linkedProfile.org_name AS linked_org_name,
                linkedProfile.org_city AS linked_org_city,
                linkedProfile.org_st_prov_reg AS linked_org_st_prov_reg,
                linkedProfile.org_country AS linked_org_country
            FROM profile AS profile
            LEFT JOIN profile AS linkedProfile ON linkedProfile.id = profile.linked_id
            WHERE profile.id = '${dih.delta.id}'"
        deltaQuery="SELECT profile.id FROM profile WHERE last_modified > '${dih.last_index_time}'"
        onError="skip" >
    </entity>
</document>

EDIT: I've changed dih.delta.id to dataimporter.delta.id and the same for last_index_time, but that hasn't changed the results.

Here is the response:

{
  "responseHeader": {
    "status": 0,
    "QTime": 0
  },
  "initArgs": [
    "defaults",
    [
      "config",
      "data-config.xml"
    ]
  ],
  "command": "status",
  "status": "idle",
  "importResponse": "",
  "statusMessages": {
    "Total Requests made to DataSource": "4",
    "Total Rows Fetched": "6",
    "Total Documents Processed": "3",
    "Total Documents Skipped": "0",
    "Delta Dump started": "2016-05-01 02:38:03",
    "Identifying Delta": "2016-05-01 02:38:03",
    "Deltas Obtained": "2016-05-01 02:38:03",
    "Building documents": "2016-05-01 02:38:03",
    "Total Changed Documents": "3",
    "": "Indexing completed. Added/Updated: 3 documents. Deleted 0 documents.",
    "Committed": "2016-05-01 02:38:03",
    "Time taken": "0:0:0.317"
  }
}

Solution

  • In solr admin -> your core -> dataimport, there is a Clean option, if checked then it will clean data first before import (for both full-import and delta-import).

    Another tip is that, solr DIH always use UTC as the import timestamp, so what is your timezone? Convert your datetime columns in database to utc first before compare it to the dih.last_index_time.