Search code examples
elasticsearchnest

How to handle multi-word synonyms


I'm trying to understand the results that I'm getting in Elastic search in a couple of conditions. I have this list of synonyms defined:

"product insert, product inserts, qc package, qc package inserts, qc package insert, package insert => package inserts"

My hope was that all of the terms to the left of the arrow would be treated as the term to the right. Here is my index setting:

PUT /test_index
{
  "settings": {
    "index": {
      "analysis": {
        "filter": {
          "my_syn_filt": {
            "tokenizer": "keyword",
            "type": "synonym",
            "synonyms": [
              "product insert, product inserts, package inserts, qc package, qc packages, qc insert, qc inserts, package insert, qc package insert, qc package inserts => package inserts"
            ]
          }
        },
        "analyzer": {
          "my_synonyms": {
            "filter": [
              "lowercase",
              "my_syn_filt"
            ],
            "tokenizer": "keyword"
          }
        }
      }
    }
  }
}

My problem is that when I search a couple of the terms - "product insert", I don't get the results I expect. But "product inserts" works just fine. Is there something wrong with my configuration? Am I missing a step?


Solution

  • I've tested your settings and my guess is that you haven't assign my_synonyms analyzer to your field.

    Without knowing how you define your mappings i will show you a working example:

    Assuming your mapping and settings look like:

    PUT /my_index
    {
      "mappings": {
        "properties": {
          "data": {
            "type": "text",
            "analyzer": "my_synonyms",  => my guess
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          }
        }
      },
      "settings": {
        "index": {
          "analysis": {
            "filter": {
              "my_syn_filt": {
                "tokenizer": "keyword",
                "type": "synonym",
                "synonyms": [
                  "product insert, product inserts, package inserts, qc package, qc packages, qc insert, qc inserts, package insert, qc package insert, qc package inserts => package inserts"
                ]
              }
            },
            "analyzer": {
              "my_synonyms": {
                "filter": [
                  "lowercase",
                  "my_syn_filt"
                ],
                "tokenizer": "keyword"
              }
            }
          }
        }
      }
    }
    

    Indexing some data:

    POST my_index/_doc/1
    {
      "data":"package inserts"
    }
    

    Query which utilities the synonyms:

    GET my_index/_search
    {
      "query": {
          "match": {
            "data": "product insert"
          }
      }
    }
    

    Results:

    {
     "took" : 0,
      "timed_out" : false,
      "_shards" : {
        "total" : 1,
        "successful" : 1,
        "skipped" : 0,
        "failed" : 0
      },
      "hits" : {
        "total" : {
          "value" : 1,
          "relation" : "eq"
        },
        "max_score" : 0.2876821,
        "hits" : [
          {
            "_index" : "my_index",
            "_type" : "_doc",
            "_id" : "1",
            "_score" : 0.2876821,
            "_source" : {
              "data" : "package inserts"
            }
          }
        ]
      }
    }
    

    Without assigning the analyzer to your field you will get results only if one of the words - package or inserts are included in your search query,actually without the analyzer you executing a simple match query which used the default elasticsearch standard analyzer.

    Hope this helps