Search code examples
elasticsearchnest

Is there bulk partial update in elasticsearch?


I have a model with 50 properties for elasticsearch and I am transferring datas to elastic search. However, I have aproximately 150000 documents in my elasticsearch alias and I want to update these documents' 3 properties with bulk partial update. I know that there is bulk update and partial update separately but ıs there partial bulk update in elasticsearch?


Solution

  • You can send partial updates using the bulk API. Here's an example

    private static void Main()
    {
        var defaultIndex = "documents";
        var pool = new SingleNodeConnectionPool(new Uri("http://localhost:9200"));
    
        var settings = new ConnectionSettings(pool)
            .DefaultIndex(defaultIndex);
    
        var client = new ElasticClient(settings);
    
        if (client.IndexExists(defaultIndex).Exists)
            client.DeleteIndex(defaultIndex);
    
        var docs = Enumerable.Range(1, 10).Select(i => new MyDocument(i) 
            {
                Message = $"message {i}"
            });
    
        // bulk index the documents   
        var bulkResponse = client.Bulk(b => b
            .IndexMany(docs)
            .Refresh(Refresh.WaitFor)
        );
    
        var searchResponse = client.Search<MyDocument>(s => s
            .Sort(so => so.Ascending("_id"))
        );
    
        // update the documents
        bulkResponse = client.Bulk(b => b
            .UpdateMany<MyDocument, object>(docs, (bu, doc) => 
            {
                if (doc.Id % 3 == 0)
                {
                    // use script to update
                    bu.Id(doc.Id).Script(s => s
                        .Source("ctx._source.message = 'message ' + (Integer.parseInt(ctx._id) * 2);")
                    );
                }
                else if (doc.Id % 2 == 0)
                {
                    // use partial document to update
                    bu.Id(doc.Id).Doc(new { message = "updated message" });
                }
                else
                {
                    // send the original document to update
                    bu.Doc(doc);
                }
    
                return bu;
            })
            .Refresh(Refresh.WaitFor)
        );
    
        searchResponse = client.Search<MyDocument>(s => s
            .Sort(so => so.Ascending("_id"))
        );    
    }
    
    
    public class MyDocument 
    {
        public MyDocument(int id) => Id = id;
    
        public int Id { get; set; }  
    
        public string Message { get; set; }
    }
    

    The final search response returns

    {
      "took" : 0,
      "timed_out" : false,
      "_shards" : {
        "total" : 5,
        "successful" : 5,
        "skipped" : 0,
        "failed" : 0
      },
      "hits" : {
        "total" : 10,
        "max_score" : null,
        "hits" : [
          {
            "_index" : "documents",
            "_type" : "mydocument",
            "_id" : "1",
            "_score" : null,
            "_source" : {
              "id" : 1,
              "message" : "message 1"
            },
            "sort" : [
              "1"
            ]
          },
          {
            "_index" : "documents",
            "_type" : "mydocument",
            "_id" : "10",
            "_score" : null,
            "_source" : {
              "id" : 10,
              "message" : "updated message"
            },
            "sort" : [
              "10"
            ]
          },
          {
            "_index" : "documents",
            "_type" : "mydocument",
            "_id" : "2",
            "_score" : null,
            "_source" : {
              "id" : 2,
              "message" : "updated message"
            },
            "sort" : [
              "2"
            ]
          },
          {
            "_index" : "documents",
            "_type" : "mydocument",
            "_id" : "3",
            "_score" : null,
            "_source" : {
              "id" : 3,
              "message" : "message 6"
            },
            "sort" : [
              "3"
            ]
          },
          {
            "_index" : "documents",
            "_type" : "mydocument",
            "_id" : "4",
            "_score" : null,
            "_source" : {
              "id" : 4,
              "message" : "updated message"
            },
            "sort" : [
              "4"
            ]
          },
          {
            "_index" : "documents",
            "_type" : "mydocument",
            "_id" : "5",
            "_score" : null,
            "_source" : {
              "id" : 5,
              "message" : "message 5"
            },
            "sort" : [
              "5"
            ]
          },
          {
            "_index" : "documents",
            "_type" : "mydocument",
            "_id" : "6",
            "_score" : null,
            "_source" : {
              "id" : 6,
              "message" : "message 12"
            },
            "sort" : [
              "6"
            ]
          },
          {
            "_index" : "documents",
            "_type" : "mydocument",
            "_id" : "7",
            "_score" : null,
            "_source" : {
              "id" : 7,
              "message" : "message 7"
            },
            "sort" : [
              "7"
            ]
          },
          {
            "_index" : "documents",
            "_type" : "mydocument",
            "_id" : "8",
            "_score" : null,
            "_source" : {
              "id" : 8,
              "message" : "updated message"
            },
            "sort" : [
              "8"
            ]
          },
          {
            "_index" : "documents",
            "_type" : "mydocument",
            "_id" : "9",
            "_score" : null,
            "_source" : {
              "id" : 9,
              "message" : "message 18"
            },
            "sort" : [
              "9"
            ]
          }
        ]
      }
    }
    

    Observe that the source documents have been updated

    1. documents with an _id divisible by 3 have updated the document using a scripted update
    2. docuemnts with an _id divisible by 2 have updated the document using a partial update.
    3. Remaining documents have been updated by passing the original document; this results in a noop in the bulk response.