Search code examples
sql-serverdtsearch

Fastest way to record all DocIds and FileNames from dtSearch in SQL database


I am using dtSearch on combination with a SQL database and would like to maintain a table that includes all DocIds and their related FileNames. From there, I will add a column with my foreign key to allow me to combine text and database searches.

I have code to simply return all the records in the index and add them one by one to the DB. This, however, takes FOREVER, and doesn't address the issue of how to simply append new records as they are added to the index. But just in case it helps:

MyDatabaseContext db = new StateScapeEntities();
IndexJob ij = new dtSearch.Engine.IndexJob();

ij.IndexPath = @"d:\myindex";

IndexInfo indexInfo = dtSearch.Engine.IndexJob.GetIndexInfo(@"d:\myindex");

bool jobDone =   ij.Execute();

SearchResults sr = new SearchResults();

uint n = indexInfo.DocCount;

for (int i = 1; i <= n; i++)
{
    sr.AddDoc(ij.IndexPath, i, null);
}

for (int i = 1; i <= n; i++)
{
    sr.GetNthDoc(i - 1);
        //IndexDocument is defined elsewhere
        IndexDocument id = new IndexDocument();
        id.DocId = sr.CurrentItem.DocId;
        id.FilePath = sr.CurrentItem.Filename;

        if (id.FilePath != null)
        {
            db.IndexDocuments.Add(id);
            db.SaveChanges();           
        }   
}

Solution

  • So, I used part of user2172986's response, but combined it with some additional code to get the solution to my question. I did indeed have to set the dtsKeepExistingDocIds flag in my index update routine. From there, I only wanted to add the newly created DocIds to my SQL database. For that, I used the following code:

    string indexPath = @"d:\myindex"; 
    
            using (IndexJob ij = new dtSearch.Engine.IndexJob())
            {
                //make sure the updated index doesn't change DocIds
                ij.IndexingFlags = IndexingFlags.dtsIndexKeepExistingDocIds;
                ij.IndexPath = indexPath;
                ij.ActionAdd = true;
                ij.FoldersToIndex.Add( indexPath + "<+>");
                ij.IncludeFilters.Add( "*");
                bool jobDone = ij.Execute();
            }
            //create a DataTable to hold results
            DataTable newIndexDoc = MakeTempIndexDocTable(); //this is a custom method not included in this example; just creates a DataTable with the appropriate columns
    
            //connect to the DB;
            MyDataBase db = new MyDataBase(); //again, custom code not included - link to EntityFramework entity
    
            //get the last DocId in the DB?
            int lastDbDocId = db.IndexDocuments.OrderByDescending(i => i.DocId).FirstOrDefault().DocId;
    
            //get the last DocId in the Index
            IndexInfo indexInfo = dtSearch.Engine.IndexJob.GetIndexInfo(indexPath);
    
            uint latestIndexDocId = indexInfo.LastDocId;
    
            //create a searchFilter
            dtSearch.Engine.SearchFilter sf = new SearchFilter();
    
            int indexId = sf.AddIndex(indexPath);
    
    
            //only select new records (from one greater than the last DocId in the DB to the last DocId in the index itself
            sf.SelectItems(indexId, lastDbDocId + 1, int.Parse(latestIndexDocId.ToString()), true);
    
            using (SearchJob sj = new dtSearch.Engine.SearchJob())
            {
               sj.SetFilter(sf);
               //return every document in the specified range (using xfirstword)
               sj.Request = "xfirstword";
               // Specify the path to the index to search here
               sj.IndexesToSearch.Add(indexPath);
    
    
              //additional flags and limits redacted for clarity
    
               sj.Execute();
    
               // Store the error message in the status
               //redacted for clarity
    
    
    
               SearchResults results = sj.Results;
               int startIdx = 0;
               int endIdx = results.Count;
               if (startIdx==endIdx)
                   return;
    
    
               for (int i = startIdx; i < endIdx; i++)
               {
                   results.GetNthDoc(i);
    
                   IndexDocument id = new IndexDocument();
                   id.DocId = results.CurrentItem.DocId;
                   id.FileName= results.CurrentItem.Filename;
    
                   if (id.FileName!= null)
                   {
    
                       DataRow row = newIndexDoc.NewRow();
    
                       row["DocId"] = id.DocId;
                       row["FileName"] = id.FileName;
    
                       newIndexDoc.Rows.Add(row);
                   }
    
    
               }
    
               newIndexDoc.AcceptChanges();
    
               //SqlBulkCopy
               using (SqlConnection connection =
                      new SqlConnection(db.Database.Connection.ConnectionString))
               {
                   connection.Open();
    
                   using (SqlBulkCopy bulkCopy = new SqlBulkCopy(connection))
                   {
                       bulkCopy.DestinationTableName =
                           "dbo.IndexDocument";
    
                       try
                       {
                           // Write from the source to the destination.
                           bulkCopy.WriteToServer(newIndexDoc);
                       }
                       catch (Exception ex)
                       {
                           Console.WriteLine(ex.Message);
                       }
                   }
               }
    
               newIndexDoc.Clear();
               db.UpdateIndexDocument();
            }