Search code examples
elasticsearchelastic-stack

Change field type in index without reindex


First, I had this index template

GET localhost:9200/_index_template/document

And this is output

{
  "index_templates": [
    {
      "name": "document",
      "index_template": {
        "index_patterns": [
          "v*-documents-*"
        ],
        "template": {
          "settings": {
            "index": {
              "number_of_shards": "1"
            }
          },
          "mappings": {
            "properties": {
              "firstOperationAtUtc": {
                "format": "epoch_millis",
                "ignore_malformed": true,
                "type": "date"
              },
              "firstOperationAtUtcDate": {
                "ignore_malformed": true,
                "type": "date"
              }
            }
          },
          "aliases": {
            "documents-": {}
          }
        },
        "composed_of": [],
        "priority": 501,
        "version": 1
      }
    }
  ]
}

And my data is indexed, for example

GET localhost:9200/v2-documents-2021-11-20/_search
{
"query": {
    "bool": {
      "should": [
        {
          "exists": {
            "field": "firstOperationAtUtc"
          }
        }
      ]
    }
  }
}

Output is

{
    "took": 1,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 1,
            "relation": "eq"
        },
        "max_score": 1.0,
        "hits": [
            {
                "_index": "v2-documents-2021-11-20",
                "_type": "_doc",
                "_id": "9b46d6fe78735274342d1bc539b084510000000455",
                "_score": 1.0,
                "_source": {
                    "firstOperationAtUtc": 1556868952000,
                    "firstOperationAtUtcDate": "2019-05-03T13:35:52.000Z"
                }
            }
        ]
    }
}

Next, I need to update mapping for field firstOperationAtUtc and remove format epoch_millis

localhost:9200/_template/document
{
  "index_patterns": [
    "v*-documents-*"
  ],
  "template": {
    "settings": {
      "index": {
        "number_of_shards": "1"
      }
    },
    "mappings": {
      "properties": {
        "firstOperationAtUtc": {
          "ignore_malformed": true,
          "type": "date"
        },
        "firstOperationAtUtcDate": {
          "ignore_malformed": true,
          "type": "date"
        }
      }
    },
    "aliases": {
      "documents-": {}
    }
  },
  "version": 1
}

After that, If I get previous request I still have indexed data.

But now I need to update field firstOperationAtUtc and set data from firstOperationAtUtcDate

localhost:9200/v2-documents-2021-11-20/_update_by_query
{
  "script": {
    "source": "if (ctx._source.firstOperationAtUtcDate != null) { ctx._source.firstOperationAtUtc = ctx._source.firstOperationAtUtcDate }",
    "lang": "painless"
  },
  "query": {
    "match": {
      "_id": "9b46d6fe78735274342d1bc539b084510000000455"
    }
  }
}

After that, if I get previous request

GET localhost:9200/v2-documents-2021-11-20/_search
{
"query": {
    "bool": {
      "should": [
        {
          "exists": {
            "field": "firstOperationAtUtc"
          }
        }
      ]
    }
  }
}

I have no indexed data

{
    "took": 1,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 0,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    }
}

But if I find with id, I will get this data with modify data but my field is ignored

GET localhost:9200/v2-documents-2021-11-20/_search

{
    "query": {
    "terms": {
      "_id": [ "9b46d6fe78735274342d1bc539b084510000000455" ] 
    }
  }
}

Output is

{
    "took": 1,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 1,
            "relation": "eq"
        },
        "max_score": 1.0,
        "hits": [
            {
                "_index": "v2-documents-2021-11-20",
                "_type": "_doc",
                "_id": "9b46d6fe78735274342d1bc539b084510000000455",
                "_score": 1.0,
                "_ignored": [
                    "firstOperationAtUtc"
                ],
                "_source": {
                    "firstOperationAtUtc": "2019-05-03T13:35:52.000Z",
                    "firstOperationAtUtcDate": "2019-05-03T13:35:52.000Z"
                }
            }
        ]
    }
}

How I could indexed data without reindex? Because I have milliard data in index and this could may produce huge downtime in prod


Solution

  • What you changed is the index template, but not your index mapping. The index template is used only when a new index that matches the name pattern is created.

    What you want to do is to modify the actual mapping of your index, like this:

    PUT test/_mapping
    {
      "properties": {
        "firstOperationAtUtc": {
          "ignore_malformed": true,
          "type": "date"
        }
      }
    }
    

    However, this won't be possible and you will get the following error, which makes sense as you cannot modify an existing field mapping.

    Mapper for [firstOperationAtUtc] conflicts with existing mapper:
    Cannot update parameter [format] from [epoch_millis] to [strict_date_optional_time||epoch_millis]
    

    The only reason why your update by query seemed to work is because you have "ignore_malformed": true in your mapping. Because if you remove that parameter and try to run your update by query again, you'd see the following error:

    "type" : "mapper_parsing_exception",
    "reason" : "failed to parse field [firstOperationAtUtc] of type [date] in document with id '2'. Preview of field's value: '2019-05-03T13:35:52.000Z'",
    "caused_by" : {
      "type" : "illegal_argument_exception",
      "reason" : "failed to parse date field [2019-05-03T13:35:52.000Z] with format [epoch_millis]",
      "caused_by" : {
        "type" : "date_time_parse_exception",
        "reason" : "date_time_parse_exception: Failed to parse with all enclosed parsers"
      }
    }
    

    So, to wrap it up, you have two options:

    1. Create a new index with the right mapping and reindex your old index into it, but that doesn't seem like an option for you.
    2. Create a new field in your existing index mapping (e.g. firstOperationAtUtcTime) and discard the use of firstOperationAtUtc

    The steps would be:

    1. Modify the index template to add the new field
    2. Modify the actual index mapping to add the new field
    3. Run your update by query by modifying the script to write your new field

    In short:

    # 1. Modify your index template
    
    # 2. modify your actual index mapping
    PUT v2-documents-2021-11-20/_mapping
    {
      "properties": {
        "firstOperationAtUtcTime": {
          "ignore_malformed": true,
          "type": "date"
        }
      }
    }
    
    # 3. Run update by query again
    POST v2-documents-2021-11-20/_update_by_query
    {
      "script": {
        "source": "if (ctx._source.firstOperationAtUtcDate != null) { ctx._source.firstOperationAtUtcTime = ctx._source.firstOperationAtUtcDate; ctx._source.remove('firstOperationAtUtc')}",
        "lang": "painless"
      },
      "query": {
        "match": {
          "_id": "9b46d6fe78735274342d1bc539b084510000000455"
        }
      }
    }