Search code examples
elasticsearchautocompletefuzzy-searchexact-match

Exact match and fuzziness...What is the good way?


i was invert many hours trying found the best way to create and autocomplete that support a multilanguaje seach of cities. (ES/EN), fuzzieness and get priority for exact match (show this at top of results) but i cant find a good way to make this task.

My current solution work very well in many cases, but when I to find for Roma the first option is "Iasi-East Romania, romania" and Roma italy is the thirty function (is an exact match)

Result Json:

[{"_index":"destinations","_type":"doc","_id":"_X80XWcBn2nzTu98N7_F","_score":75.50012,"_source":{"destination_name_en":"Iasi-East Romania","destination_name_es":"Iasi-East Romania","destination_name_pt":"Iasi-East Romania","country_code":"RO","country_name":"ROMANIA","destination_id":7953,"popularity":"0"}},{"_index":"destinations","_type":"doc","_id":"7380XWcBn2nzTu98OMZl","_score":73.116455,"_source":{"destination_name_en":"La Romana","destination_name_es":"La Romana","destination_name_pt":"La Romana","country_code":"DO","country_name":"DOMINICAN REPUBLIC","destination_id":2816,"popularity":"0"}},{"_index":"destinations","_type":"doc","_id":"1X80XWcBn2nzTu98OMZl","_score":71.4391,"_source":{"_index":"destinations","_type":"doc","_id":"8H80XWcBn2nzTu98OMZl","_score":52.018818,"_source":{"destination_name_en":"Rome","destination_name_es":"Roma","destination_name_pt":"Roma","country_code":"IT","country_name":"ITALY","destination_id":6338,"popularity":"0"}}]

Right now this is my best solution..

Mapping:

'settings' => [ 
                'analysis' => [     
                    'filter' => [
                        'autocomplete_filter' => [
                            "type"=> "edge_ngram",
                            "min_gram"=> 1,
                            "max_gram"=> 20,

                        ]
                    ],
                    'analyzer' => [
                        'autocomplete' => [
                            "type" => "custom",
                            'tokenizer' => "standard",
                            'filter' => ['lowercase', 'asciifolding', 'autocomplete_filter'],
                        ]
                    ],

                ],   
            ],
            'mappings' =>[
                'doc' => [
                    "properties"=> [
                        "destination_name_en"=> [
                           "type"=> "text",
                           "analyzer"=> "autocomplete",
                           "search_analyzer"=> "standard",

                        ],
                        "destination_name_es"=> [
                           "type"=> "text",
                           "analyzer"=> "autocomplete",
                           "search_analyzer"=> "standard",
                        ],
                        "destination_name_pt"=> [
                           "type"=> "text",
                           "analyzer"=> "autocomplete",
                           "search_analyzer"=> "standard",
                        ],
                        "popularity"=> [
                           "type"=> "integer",
                        ]
                    ]
                ]
            ]

Search:

'query' => [
                "bool" => [
                    "should" => [   
                         [
                            "multi_match"=>[
                                "query"=>$text,
                                "fields"=>[
                                   "destination_name_*"
                                ],
                                "type"=>"most_fields",
                                "boost" => 2
                            ]
                        ],
                        [
                            "multi_match"=>[
                                "query"=>$text,
                                "fields"=>[
                                   "destination_name_*"
                                ],
                                "fuzziness" => "1",
                                "prefix_length"=> 2                                   
                            ]
                        ]
                    ]
                ]
            ]

Also, i want add boost to specific destination using her popularity value.

I hope that someone can guide me with an example or direction of which way to go.

I would appreciate it very much


Solution

  • The probleme is that when you search roma, Iasi-East Romania the first result since it contains roma in all language. But roma is only matching for Rome in ES/PT/IT and not for EN.

    So if you want to boost exact matching your need to index your city name in another field without autocompletion (for all language) and add a new clause in the should on those field.

    example for the mapping :

     "properties"=> [
            "destination_name_en"=> [
                    "type"=> "text",
                    "analyzer"=> "autocomplete",
                    "search_analyzer"=> "standard",
                    "fields": => [
                        "exact" => [
                            "type"=> "text",
                            "analyzer"=> "standard", // you could use a more fancy analyzer here
                        ]
    
                    ]
            ],
    ....
    

    and in the query :

    'query' => [
                    "bool" => [
                        "should" => [   
                             [
                                "multi_match"=>[
                                    "query"=>$text,
                                    "fields"=>[
                                       "destination_name_*"
                                    ],
                                    "type"=>"most_fields",
                                    "boost" => 2
                                ]
                            ],
                            [
                                "multi_match"=>[
                                    "query"=>$text,
                                    "fields"=>[
                                       "destination_name_*"
                                    ],
                                    "fuzziness" => "1",
                                    "prefix_length"=> 2                                   
                                ]
                            ],
                            [
                                "multi_match"=>[
                                    "query"=>$text,
                                    "type"=>"most_fields" 
                                    "fields"=>[
                                       "destination_name_*.exact"
                                    ],
                                    "boost" => 2 
                                ]
                            ]
                        ]
                    ]
                ]
    

    Could you try something like that and keep us posted ?