Search code examples
elasticsearchelasticsearch-marvel

elasticsearch phrase_prefix expected results


I'm having some weird behavior with elastic search. I'm using custom analyzer with custom tokenizer which spilts the words in case of space, + , -.

when I'm searching

{
  "query": {
    "match_phrase_prefix": {
      "name.default": {
        "query": "paris oly"
      }
    }
  }
}

I get results as expected paris olympia etc... but when I'm searching

{
  "query": {
    "match_phrase_prefix": {
      "name.default": {
        "query": "paris ol"
      }
    }
  }
}

I get no results at all.

settings:

     "analysis": {
           "analyzer": {
              "customAnalyzer": {
                 "type": "custom",
                 "filter": "lowercase",
                 "tokenizer": "customTokenizer"
              },
           "tokenizer": {
              "customTokenizer": {
                 "pattern": "[\\+\\s-]",
                 "type": "pattern"
              }
           }
        }

field mapping:

{
    "name": {
              "properties": {
                        "default": {
                        "type": "string",
                        "analyzer": "customAnalyzer"
                 }
            }
        }
}

sample of part of the doc (the requested field):

 { 
"name": {
              "jp": "パリ オリンピア (劇場)",
              "default": "Paris Olympia",
              }
}

{    
    "TYPE_NAME": {
      "dynamic_templates": [
        {
          "name": {
            "path_match": "*name.*",
            "match_mapping_type": "string",
            "mapping": {
              "type": "string",
              "analyzer": "customAnalyzer"
            }
          }
        }
      ],
      "properties": {
        "point": {
          "type": "geo_point"
        }
      }
     }
}

Solution

  • When I tried testing what you posted, it worked for me. I'll post what I did and you can look at it and see if you can figure out what's different about your setup, and if you have further questions I'll try to help.

    I created an index using the mapping and analyzer/tokenizer you posted, then added the doc you posted:

    DELETE /test_index
    
    PUT /test_index
    {
       "settings": {
          "number_of_shards": 1,
          "number_of_replicas": 0,
          "analysis": {
             "tokenizer": {
                "customTokenizer": {
                   "pattern": "[\\+\\s-]",
                   "type": "pattern"
                }
             },
             "analyzer": {
                "customAnalyzer": {
                   "type": "custom",
                   "filter": "lowercase",
                   "tokenizer": "customTokenizer"
                }
             }
          }
       },
       "mappings": {
          "doc": {
             "properties": {
                "name": {
                   "properties": {
                      "default": {
                         "type": "string",
                         "analyzer": "customAnalyzer"
                      }
                   }
                }
             }
          }
       }
    }
    
    PUT /test_index/doc/1
    {
       "name": {
          "jp": "パリ オリンピア (劇場)",
          "default": "Paris Olympia"
       }
    }
    

    Then either of the queries you posted returned the document for me:

    POST /test_index/_search
    {
       "query": {
          "match_phrase_prefix": {
             "name.default": {
                "query": "paris oly"
             }
          }
       }
    }
    ...
    {
       "took": 1,
       "timed_out": false,
       "_shards": {
          "total": 1,
          "successful": 1,
          "failed": 0
       },
       "hits": {
          "total": 1,
          "max_score": 0.38356602,
          "hits": [
             {
                "_index": "test_index",
                "_type": "doc",
                "_id": "1",
                "_score": 0.38356602,
                "_source": {
                   "name": {
                      "jp": "パリ オリンピア (劇場)",
                      "default": "Paris Olympia"
                   }
                }
             }
          ]
       }
    }
    

    or

    POST /test_index/_search
    {
       "query": {
          "match_phrase_prefix": {
             "name.default": {
                "query": "paris ol "
             }
          }
       }
    }
    ...
    {
       "took": 1,
       "timed_out": false,
       "_shards": {
          "total": 1,
          "successful": 1,
          "failed": 0
       },
       "hits": {
          "total": 1,
          "max_score": 0.38356602,
          "hits": [
             {
                "_index": "test_index",
                "_type": "doc",
                "_id": "1",
                "_score": 0.38356602,
                "_source": {
                   "name": {
                      "jp": "パリ オリンピア (劇場)",
                      "default": "Paris Olympia"
                   }
                }
             }
          ]
       }
    }
    

    Here is the code I used, for convenience:

    http://sense.qbox.io/gist/4e58344580dcf01299f7cc2199d0fb7694d2a051

    So there must be something else going on. Can you tell what is different about your setup from what I tried?

    Edit: I did have to switch the order of the tokenizer and analyzer, because I got an error otherwise. So you might want to look into that.