Search code examples
elasticsearchelastic-stackelasticsearch-5

re-indexing problem with suggesters from elastic v1.7 to elastic 7.X problem


I have problem with the reindex, I‘m not sure 100% that the solution I found is the best as we are having issues to work with the index from the java client. So I want to confirm if the re-indexing steps are correct:

Problem Description: I have an index on an old v1.7 that need to migrate to elasticsearch v7.x that uses context suggesters.

GET /autocompleteterms_v20/_search?pretty 
{
 "took" : 2,
 "timed_out" : false,
 "_shards" : {
 "total" : 1,
 "successful" : 1,
 "failed" : 0
},
 "hits" : {
 "total" : 4130,
 "max_score" : 1.0,
 "hits" : [ {
 "_index" : "autocompleteterms_v20",
 "_type" : "autoitem",
 "_id" : "en_incorporated_by_reference",
 "_score" : 1.0,
 "_source":{ "name": "incorporated by reference", "name_suggest": { "input": "incorporated by reference", "context": { "lang": "en" }, "weight": 393 } }
 }, {
 "_index" : "autocompleteterms_v20",
 "_type" : "autoitem",
 "_id" : "en_double_gaming",
 "_score" : 1.0,
 "_source":{ "name": "double gaming", "name_suggest": { "input": "double gaming", "context": { 
 "lang": "en" }, "weight": 371 } }
 }, {
 "_index" : "autocompleteterms_v20",
"_type" : "autoitem",
"_id" : "en_checking_for_players",
"_score" : 1.0,
"_source":{ "name": "checking for players", "name_suggest": { "input": "checking for players", 
 "context": { "lang": "en" }, "weight": 2 } }
},

GET /autocompleteterms_v20/_mapping?pretty
{
"autocompleteterms_v20" : {
"mappings" : {
"autoitem" : {
"properties" : {
"name" : {
"type" : "string"
},
"name_suggest" : {
"type" : "completion",
"analyzer" : "standard",
"payloads" : true,
"preserve_separators" : false,
"preserve_position_increments" : false,
"max_input_length" : 50,
 "context" : {
 "lang" : {
 "type" : "category",
 "path" : "lang_field",
 "default" : [ "en" ]
  }
 }
 }
 }
 }
 }
 }
 }

To reindex to the new cluster I have found that there were some breaking changes in v5 for the suggesters so when tried to transform the fields of the index on the reindex

  1. First I create the mapping:

    PUT autocompleteterms_v20 {
     "settings": {
     "index": {
     "number_of_shards": 2,
     "number_of_replicas": 0
     }
    },
    "mappings": {
     "properties": {
     "name": {
      "type": "text"
    },
     "name_suggest": {
    "type": "completion",
    "analyzer": "standard",
    "preserve_separators": false,
    "preserve_position_increments": false,
    "max_input_length": 50,
    "contexts": [
    {
     "name": "lang",
     "type": "category",
     "path": "lang_field"
      }
    ]
    }
    }
    }
    }
    {
     "acknowledged" : true,
     "shards_acknowledged" : true,
     "index" : "autocompleteterms_v20"
    }
    
  2. After that I reindex changing the field context to contexts:

     POST _reindex {
     "source": {
     "remote": {
      "host": http://x.x.x.x:9200,
      "username": "user",
      "password": "password"
     },
     "index": "autocompleteterms_v20"
     },
     "dest": {
     "index": "autocompleteterms_v20"
    },
    "script": {
    "source": """ 
    ctx._source.name_suggest.contexts = ctx._source.name_suggest.context;
    ctx._source['name_suggest'].remove('context');
    """,
    "lang": "painless"
    }
    }
    {
    "took" : 366,
    "timed_out" : false,
    "total" : 4130,
    "updated" : 0,
    "created" : 4130,
    "deleted" : 0,
    "batches" : 5,
    "version_conflicts" : 0,
    "noops" : 0,
    "retries" : {
    "bulk" : 0,
    "search" : 0
    },
    "throttled_millis" : 0,
    "requests_per_second" : -1.0,
    "throttled_until_millis" : 0,
    "failures" : [ ]
    }
    
  3. When I make a query using explicit context I get results:

     POST /autocompleteterms_v20/_search {
     "suggest": {
     "text": "spirit",
     "completion": {
     "field": "name_suggest",
     "contexts": {
     "lang": [ 
        { "context": "en" }
       ]
       }
      }
      }
      }
      }
      {
      "took" : 5,
      "timed_out" : false,
      "_shards" : {
      "total" : 2,
      "successful" : 2,
      "skipped" : 0,
      "failed" : 0
      },
      "hits" : {
      "total" : {
      "value" : 0,
      "relation" : "eq"
      },
      "max_score" : null,
      "hits" : [ ]
      },
      "suggest" : {
      "completeMe" : [
      {
      "text" : "spirit",
      "offset" : 0,
      "length" : 6,
      "options" : [
      {
       "text" : "spirit",
       "_index" : "autocompleteterms_v20",
       "_type" : "_doc",
       "_id" : "en_spirit",
       "_score" : 290.0,
       "_source" : {
       "name_suggest" : {
       "input" : "spirit",
       "weight" : 290,
       "contexts" : {
       "lang" : "en"
        }
       },
       "name" : "spirit"
       },
       "contexts" : {
       "lang" : [
       "en"
      ]
      }
      },
      {
     "text" : "spirit of the game",
     "_index" : "autocompleteterms_v20",
     "_type" : "_doc",
     "_id" : "en_spirit_of_the_game",
     "_score" : 34.0,
     "_source" : {
     "name_suggest" : {
      "input" : "spirit of the game",
     "weight" : 34,
     "contexts" : {
     "lang" : "en"
     }
    },
    "name" : "spirit of the game"
     },
    "contexts" : {
     "lang" : [
        "en"
       ]
       }
     }
    
  4. But when I make a query without context I get an error:

     POST /autocompleteterms_v20/_search
     {
     "suggest": {
      "completeMe": {
      "text": "spirit",
      "completion": {
      "field": "name_suggest"
      }
     }
    }
    }
     {
      "error" : {
       "root_cause" : [
      {
       "type" : "illegal_argument_exception",
       "reason" : "Missing mandatory contexts in context query"
      }
      ],
      "type" : "search_phase_execution_exception",
     "reason" : "all shards failed",
     "phase" : "query",
     "grouped" : true,
     "failed_shards" : [
     {
     "shard" : 0,
     "index" : "autocompleteterms_v20",
     "node" : "B7EpKWRVRzGkLCnl7CZapQ",
     "reason" : {
     "type" : "illegal_argument_exception",
     "reason" : "Missing mandatory contexts in context query"
     }
     }
      ],
     "caused_by" : {
    "type" : "illegal_argument_exception",
    "reason" : "Missing mandatory contexts in context query",
    "caused_by" : {
    "type" : "illegal_argument_exception",
    "reason" : "Missing mandatory contexts in context query"
     }
     }
      },
      "status" : 400
    }
    

But in the breaking change from v5.0 we have https://www.elastic.co/guide/en/elasticsearch/reference/5.0/breaking_50_suggester.html#_completion_mapping_with_multiple_contexts

it is mentioned: "New queries with no context against a context-enabled completion field yields results from all indexed suggestions"

In this context I have two main questions:

  1. Is the reindexing of the old index is done correctly?
  2. How can I do queries without choosing context against that index?

Solution

    1. Yes you did everything correctly!

    2. You can't as there was another suggester change in ES 7.0 that made the provision of context mandatory because otherwise the performance was too degraded

    Context Completion Suggester

    The ability to query and index context enabled suggestions without context, deprecated in 6.x, has been removed. Context enabled suggestion queries without contexts have to visit every suggestion, which degrades the search performance considerably.