Search code examples
elasticsearchanalyzerelasticsearch-6elasticsearch-analyzers

elasticsearch 6.4 does not count search_analyzer in fields


I have a problem with ElasticSearch mapping. for example the mapping for field name is:

{
    "name": {
        "type": "keyword",
        "fields": {
            "ngram": {
                "type": "text",
                "analyzer": "ngram_analyzer",
                "search_analyzer": "ngram_analyzer"
            },
            "word": {
                "type": "text",
                "analyzer": "word_analyzer",
                "search_analyzer": "word_analyzer"
            }
        }
    }
}

Whole mapping works except search_analyzer which it seems ElasticSearch ignores that.

analysis settings:

{
   "analysis":{
      "analyzer":{
         "ngram_analyzer":{
            "type":"custom",
            "char_filter":[
               "number_char_filter_map",
               "remove_duplicates"
            ],
            "tokenizer":"ngram_tokenizer_whitespace",
            "filter":[
               "lowercase",
               "english_stop"
            ]
         },
         "word_analyzer":{
            "type":"custom",
            "char_filter":[
               "number_char_filter_map",
               "remove_duplicates"
            ],
            "tokenizer":"word_tokenizer",
            "filter":[
               "lowercase",
               "english_stop"
            ]
         }
      },
      "char_filter":{
         "remove_duplicates":{
            "type":"pattern_replace",
            "pattern":"(.)(?=\\1)",
            "replacement":""
         },
         "remove_white_spaces":{
            "type":"pattern_replace",
            "pattern":"(\s)",
            "replacement":""
         }
      },
      "filter":{
         "english_stop":{
            "type":"stop",
            "ignore_case":true,
            "stopwords":"_english_"
         }
      },
      "tokenizer":{
         "ngram_tokenizer":{
            "type":"ngram",
            "min_gram":2,
            "max_gram":7
         },
         "ngram_tokenizer_whitespace":{
            "type":"ngram",
            "min_gram":2,
            "max_gram":7,
            "token_chars":[
               "letter",
               "digit",
               "punctuation",
               "symbol"
            ]
         },
         "word_tokenizer":{
            "type":"standard"
         }
      }
   }
}

According to ElasticSearch documentations, I didn't find any definition for search_analyzer in fields. If this method doe's not work, is there any alternative structure to include search analyzer?


Solution

  • Problem solved after couple days... The problem was using same analyzer as search_analyzer. I just set another analyzer with different name in settings (using same key as analyzer and search_analyzer seems to cause problem and ignore search_analyzer).

    Cloned ngram_analyzer to ngram_search_analyzer Cloned word_analyzer to word_search_analyzer

    And the create mapping request:

    PUT /suggestions HTTP/1.1
    Host: localhost:9200
    Content-Type: application/json
    
    {
       "mappings":{
          "doc":{
             "properties":{
                "caption":{
                   "type":"keyword",
                   "fields":{
                      "ngram":{
                         "type":"text",
                         "analyzer":"ngram_analyzer",
                         "search_analyzer":"ngram_search_analyzer"
                      },
                      "word":{
                         "type":"text",
                         "analyzer":"word_analyzer",
                         "search_analyzer":"word_search_analyzer"
                      }
                   }
                }
             }
          }
       },
       "settings":{
          "number_of_shards":1,
          "number_of_replicas":1,
          "routing_partition_size":1,
          "analysis":{
             "analyzer":{
                "ngram_analyzer":{
                   "type":"custom",
                   "char_filter":[
                      "number_char_filter_map",
                      "remove_duplicates"
                   ],
                   "tokenizer":"ngram_tokenizer_whitespace",
                   "filter":[
                      "lowercase",
                      "english_stop"
                   ]
                },
                "ngram_search_analyzer":{
                   "type":"custom",
                   "char_filter":[
                      "number_char_filter_map",
                      "remove_duplicates"
                   ],
                   "tokenizer":"ngram_tokenizer_whitespace",
                   "filter":[
                      "lowercase",
                      "english_stop"
                   ]
                },
                "word_analyzer":{
                   "type":"custom",
                   "char_filter":[
                      "number_char_filter_map",
                      "remove_duplicates"
                   ],
                   "tokenizer":"word_tokenizer",
                   "filter":[
                      "lowercase",
                      "english_stop"
                   ]
                },
                "word_search_analyzer":{
                   "type":"custom",
                   "char_filter":[
                      "number_char_filter_map",
                      "remove_duplicates"
                   ],
                   "tokenizer":"word_tokenizer",
                   "filter":[
                      "lowercase",
                      "english_stop"
                   ]
                }
             },
             "char_filter":{
                "number_char_filter_map":{
                   "type":"mapping",
                   "mappings":[
                      "\u0660 => 0",
                      "\u0661 => 1",
                      "\u0662 => 2",
                      "\u0663 => 3",
                      "\u0664 => 4",
                      "\u0665 => 5",
                      "\u0666 => 6",
                      "\u0667 => 7",
                      "\u0668 => 8",
                      "\u0669 => 9",
                      "\u06f0 => 0",
                      "\u06f1 => 1",
                      "\u06f2 => 2",
                      "\u06f3 => 3",
                      "\u06f4 => 4",
                      "\u06f5 => 5",
                      "\u06f6 => 6",
                      "\u06f7 => 7",
                      "\u06f8 => 8",
                      "\u06f9 => 9"
                   ]
                },
                "remove_duplicates":{
                   "type":"pattern_replace",
                   "pattern":"(.)(?=\\1)",
                   "replacement":""
                },
                "remove_white_spaces":{
                   "type":"pattern_replace",
                   "pattern":"(\\s)",
                   "replacement":""
                }
             },
             "filter":{
                "english_stop":{
                   "type":"stop",
                   "ignore_case":true,
                   "stopwords":"_english_"
                }
             },
             "tokenizer":{
                "ngram_tokenizer":{
                   "type":"ngram",
                   "min_gram":2,
                   "max_gram":7
                },
                "ngram_tokenizer_whitespace":{
                   "type":"ngram",
                   "min_gram":2,
                   "max_gram":7,
                   "token_chars":[
                      "letter",
                      "digit",
                      "punctuation",
                      "symbol"
                   ]
                },
                "word_tokenizer":{
                   "type":"standard",
                   "token_chars":[
    
                   ]
                }
             }
          }
       }
    }
    

    Now i see search analyzers in mapping :]

    I think it would be a good thing to have different analyzers and search analyzers for later customization also.