elasticsearchopensearch

Fix fuzziness for ElasticSearch/OpenSearch query


I'm having issues trying to do a simple query, please see the following data:

Having this data:

POST test/_doc/1
{
  "id": 1,
  "title": "Test Name"
}

POST test/_doc/2
{
  "id": 2,
  "title": "TestName"
}

And this query:

GET test/_search
{
  "query": {
    "match": {
      "title": {
        "query": "TestName",
        "fuzziness": "AUTO"
      }
    }
  }
}

With this output:

{
  ...
  "hits": {
    "total": {
      "value": 1,
      "relation": "eq"
    },
    "max_score": 1.605183,
    "hits": [
      {
        "_index": "test",
        "_id": "2",
        "_score": 1.605183,
        "_source": {
          "id": 2,
          "title": "TestName"
        }
      }
    ]
  }
}

Why the output doesn't return both records?

How can I fix it?


Solution

  • So the solution was done with elasticsearch edge n-gram, I also had to add filter lowercase to the analyzer.

    Thanks @paulo !

    PUT test
    {
      "mappings": {
        "properties": {
          "title": {
            "type": "text",
            "fields": {
              "edge_ngram": {
                "type": "text",
                "analyzer": "edge_ngram_analyzer"
              }
            }
          }
        }
      },
      "settings": {
        "analysis": {
          "analyzer": {
            "edge_ngram_analyzer": {
              "tokenizer": "edge_ngram_tokenizer",
              "filter": [
                "lowercase"
              ]
            }
          },
          "tokenizer": {
            "edge_ngram_tokenizer": {
              "type": "edge_ngram",
              "min_gram": 3,
              "max_gram": 10,
              "token_chars": [
                "letter",
                "digit"
              ]
            }
          }
        }
      }
    }
    
    POST test/_doc/1
    {
      "title": "Test Name"
    }
    
    POST test/_doc/2
    {
      "title": "TestName"
    }
    
    GET test/_search
    {
      "query": {
        "match": {
          "title.edge_ngram": {
            "query": "Test Name",
            "fuzziness": "AUTO"
          }
        }
      }
    }
    
    

    Now it returns the expected output:

    {
      ...
      "hits": {
        "total": {
          "value": 2,
          "relation": "eq"
        },
        "max_score": 3.1782691,
        "hits": [
          {
            "_index": "test",
            "_id": "1",
            "_score": 3.1782691,
            "_source": {
              "title": "Test Name"
            }
          },
          {
            "_index": "test",
            "_id": "2",
            "_score": 0.68817455,
            "_source": {
              "title": "TestName"
            }
          }
        ]
      }
    }