Search code examples
elasticsearchelastic-stackelasticsearch-queryelasticsearch-7

Elasticsearch partial query


I am working with the Elasticsearch v 7.3.1 and trying to implement a partial search. All searches are going well, but when I query "John Oxford", "John" matches with the document, but there is no "Oxford" in the entire doc. But still shows me the document instead of showing the empty results.

How can I do it, so that it does not return the document when we query John Oxford?

My mapping, settings, sample doc and query of student data are below.

Mappings

PUT student
{
  "settings": {
    "analysis": {
      "filter": {
        "autocomplete_filter": {
          "type": "edge_ngram",
          "min_gram": 1,
          "max_gram": 20
        }
      },
      "analyzer": {
        "autocomplete": { 
          "type": "custom",
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "autocomplete_filter"
          ]
        }
      }
    }
  }, "mappings" : {
      "properties" : {
        "DOB" : {
          "type" : "text"
        },
        "email" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "first_name" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "home_phone" : {
          "type" : "text"
        },
        "last_name" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "student_id" : {
          "type" : "text"
        }
      }
    }
}

Sample Document

POST student/_doc
{
    "DOB": "1983-12-04",
    "email": "[email protected]",
    "first_name": "john",
    "home_phone": 1242432,
    "last_name": "doe",
    "student_id": 28

}

Query

GET student/_search
{
  "query": {
    "multi_match": {
      "query": "john oxford",
      "type": "bool_prefix",
      "analyzer": "standard",
      "fields": [
        "first_name",
        "last_name",
        "email",
        "DOB",
        "home_phone",
        "student_id"
      ]
    }
  }
}

Following are the results that I want

  • 1242 - partially matches home_phone
  • joh do - partial match against "John" and "Doe"
  • 1983-12-04 - matches the DOB
  • johndoe - partial match on email
  • doe - match last name

Solution

  • To implement the partial search you should add the specific autocomplete analyzer to the required text fields and implement a specific search_analyzer because you are using a edgengram filter - please read here and here for explanation. This is more comfortable than specify an analyzer during query time, as you have done. Try:

    PUT student
    {
      "settings": {
        "analysis": {
          "filter": {
            "autocomplete_filter": {
              "type": "edge_ngram",
              "min_gram": 1,
              "max_gram": 20
            }
          },
          "analyzer": {
            "autocomplete": { 
              "type": "custom",
              "tokenizer": "standard",
              "filter": [
                "lowercase",
                "autocomplete_filter"
              ]
            }
          }
        }
      }, "mappings" : {
          "properties" : {
            "DOB" : {
              "type" : "text",
              "analyzer": "autocomplete",
              "search_analyzer": "standard"
            },
            "email" : {
              "type" : "text",
              "analyzer": "autocomplete",
              "search_analyzer": "standard",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "first_name" : {
              "type" : "text",
              "analyzer": "autocomplete",
              "search_analyzer": "standard",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "home_phone" : {
              "type" : "text",
              "analyzer": "autocomplete",
              "search_analyzer": "standard"
            },
            "last_name" : {
              "type" : "text",
              "analyzer": "autocomplete",
              "search_analyzer": "standard",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "student_id" : {
              "type" : "text"
            }
          }
        }
    }
    

    Then when you query for autocompletion of two terms, you should concatenate them with the and operator. For your use case cross-field type should be the best:

    GET student/_search
    {
      "query": {
        "multi_match" : {
          "query":      "John Oxford",
          "type":       "cross_fields",
          "fields": [
            "first_name",
            "last_name",
            "email",
            "DOB",
            "home_phone",
            "student_id"
          ],
          "operator":   "and" 
        }
      }
    }