Search code examples
elasticsearchwordsn-gramstartswith

Elasticsearch starts with, multiple words


I'm trying to implement an autocomplete feature from phrases that contain multiple words.

I want to be able to match only the beginning of words (edgeNGram?), but for every word searched.

For example if I search for "monitor", I should receive all phrases that have the word monitor in them, but if I search for "onitor", I should get no matches (from the dataset below). Also the search for "mon ap" should give me "APNEA MONITOR- SCHULTE Vital Signs Monitor" for example and "mon rrr" should in turn give no results.

So my question is how should I go about to implement it?

So in short: the matching phrases should contain words that start with the terms searched for.

Here is my mapping:

{
    "quicksearch2" : {
        "results" : {
            "properties" : {       
                "phrase" : {
                    "type" : "string",
                    "index_analyzer" : "quicksearch_index_analyzer",
                    "search_analyzer" : "quicksearch_search_analyzer"
                }        
            }
        }
    }
}

And here are my settings:

{
    "quicksearch2" : {
        "settings" : {
            "index.analysis.analyzer.quicksearch_index_analyzer.filter.4" : "left_ngram",
            "index.analysis.analyzer.quicksearch_search_analyzer.filter.3" : "unique",
            "index.analysis.analyzer.quicksearch_index_analyzer.filter.3" : "unique",
            "index.analysis.filter.left_ngram.max_gram" : "20",
            "index.analysis.analyzer.quicksearch_search_analyzer.filter.2" : "asciifolding",
            "index.analysis.analyzer.quicksearch_search_analyzer.tokenizer" : "keyword",
            "index.analysis.analyzer.quicksearch_search_analyzer.filter.1" : "lowercase",
            "index.number_of_replicas" : "0",
            "index.analysis.analyzer.quicksearch_search_analyzer.filter.0" : "trim",
            "index.analysis.filter.left_ngram.type" : "edgeNGram",
            "index.analysis.analyzer.quicksearch_search_analyzer.type" : "custom",
            "index.analysis.analyzer.quicksearch_index_analyzer.filter.0" : "trim",
            "index.analysis.analyzer.quicksearch_index_analyzer.filter.2" : "asciifolding",
            "index.analysis.analyzer.quicksearch_index_analyzer.filter.1" : "lowercase",
            "index.analysis.analyzer.quicksearch_index_analyzer.type" : "custom",
            "index.analysis.filter.left_ngram.side" : "front",
            "index.analysis.analyzer.quicksearch_index_analyzer.tokenizer" : "keyword",
            "index.number_of_shards" : "1",
            "index.version.created" : "900899",
            "index.uuid" : "Lb7vC-eHQB-u_Okm3ERLow"
        }
    }
}

Here is my query:

query: {
    match: {
        phrase: {
            query: term,
            operator: 'and'
        }
}

Some sample data:

{
    "took" : 133,
    "timed_out" : false,
    "_shards" : {
        "total" : 1,
        "successful" : 1,
        "failed" : 0
    },
    "hits" : {
        "total" : 6197,
        "max_score" : 1.491863,
        "hits" : [ {
            "_index" : "quicksearch2",
            "_type" : "results",
            "_id" : "emCydgTfQwuKkl4sSZoosQ",
            "_score" : 1.491863,
            "fields" : {
                "phrase" : "APNEA MONITOR- SCHULTE Apnea Monitor"
            }
        }, {
            "_index" : "quicksearch2",
            "_type" : "results",
            "_id" : "AXCO5rUxRwC9SebXcQxXeQ",
            "_score" : 1.491863,
            "fields" : {
                "phrase" : "APNEA MONITOR- SCHULTE Apnea Monitor, Neonatal"
            }
        }, {
            "_index" : "quicksearch2",
            "_type" : "results",
            "_id" : "tjJq3klPTsmP8akOc18Htw",
            "_score" : 1.491863,
            "fields" : {
                "phrase" : "APNEA MONITOR- SCHULTE Apnea Monitor, Recording"
            }
        }, {
            "_index" : "quicksearch2",
            "_type" : "results",
            "_id" : "-FjKWxl9Rm6-byn-wlpoIw",
            "_score" : 1.491863,
            "fields" : {
                "phrase" : "APNEA MONITOR- SCHULTE Cardiorespiratory Monitor"
            }
        }, {
            "_index" : "quicksearch2",
            "_type" : "results",
            "_id" : "Q19k6V6VQ6ulZOLCfESQ6w",
            "_score" : 1.491863,
            "fields" : {
                "phrase" : "APNEA MONITOR- SCHULTE Impedance Pneumograph Bedside Monitor"
            }
        }, {
            "_index" : "quicksearch2",
            "_type" : "results",
            "_id" : "YLI1er3cRjSyGumWNVi0pg",
            "_score" : 1.491863,
            "fields" : {
                "phrase" : "APNEA MONITOR- SCHULTE Impedance Pneumograph Monitor"
            }
        }, {
            "_index" : "quicksearch2",
            "_type" : "results",
            "_id" : "n5j1SaXeS2W6NymaYAYD6A",
            "_score" : 1.491863,
            "fields" : {
                "phrase" : "APNEA MONITOR- SCHULTE Neonatal Monitor"
            }
        }, {
            "_index" : "quicksearch2",
            "_type" : "results",
            "_id" : "U7Q5XrrHRbKOIwfRWO6RTQ",
            "_score" : 1.491863,
            "fields" : {
                "phrase" : "APNEA MONITOR- SCHULTE Pulmonary Function Monitor"
            }
        }, {
            "_index" : "quicksearch2",
            "_type" : "results",
            "_id" : "aF_THiCKRIyzunCbBxJTEg",
            "_score" : 1.491863,
            "fields" : {
                "phrase" : "APNEA MONITOR- SCHULTE Vital Signs Monitor"
            }
        }, {
            "_index" : "quicksearch2",
            "_type" : "results",
            "_id" : "8BAjZfwMQjWmrkqCO7o6gg",
            "_score" : 1.491863,
            "fields" : {
                "phrase" : "P.P.M. - PORTABLE PRECISION MONITOR Gas Monitor, Atmospheric"
            }
        } ]
    }
}

Solution

  • Changing the tokenizers (both index and search) from keyword to standard seem to have done the trick.