Search code examples
elasticsearchfieldaggregationsynonym

Synonyms aggregation in elasticsearch 7 - term based


I am trying to aggregate fields, but fields are similar like Med and Medium. I don't want both to come in my aggregation results, only either of it should come. I tried with synonyms but it doesn't seem to work. Question is: How can I concatenate or unify similar aggregation results when it is term based?

Below is my work.

Mapping and Setting

{
"settings": {
    "index" : {
        "analysis" : {
            "filter" : {
                "synonym_filter" : {
                    "type" : "synonym",
                    "synonyms" : [
                        "medium, m, med",
                        "large, l",
                        "extra small, xs, x small"
                    ]
                }
            },
            "analyzer" : {
                "synonym_analyzer" : {
                    "tokenizer" : "standard",
                    "filter" : ["lowercase", "synonym_filter"] 
                }
            }
        }
    }
},
"mappings": {
    "properties": {
            "skus": {
                "type": "nested",
                "properties": {
                    "labels": {
                        "dynamic": "true",
                        "properties": {
                            "Color": {
                                "type": "text",
                                "fields": {
                                    "synonym": {
                                        "analyzer": "synonym_analyzer",
                                        "type": "text",
                                        "fielddata":true
                                    }
                                }
                            },
                            "Size": {
                                "type": "text",
                                "fields": {
                                    "synonym": {
                                        "analyzer": "synonym_analyzer",
                                        "type": "text",
                                        "fielddata":true
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
}}

Aggregation

{   
"aggs":{
    "sizesFilter": {
                "aggs": {
                    "sizes": {
                        "terms": {
                            "field": "skus.labels.Size.synonym"
                        }
                    }
                },
                "nested": {
                    "path": "skus"
                }
            }
}}

With only one doc my aggregation result is

"aggregations": {
    "sizesFilter": {
        "doc_count": 1,
        "sizes": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
                {
                    "key": "m",
                    "doc_count": 1
                },
                {
                    "key": "med",
                    "doc_count": 1
                },
                {
                    "key": "medium",
                    "doc_count": 1
                }
            ]
        }
    }
}

Solution

  • I got it by setting tokenizer in analyzer to "keyword"

    {
      "analyzer" : {
                    "synonym_analyzer" : {
                        "tokenizer" : "keyword",
                        "filter" : ["lowercase", "synonym_filter"] 
                    }
                  }
    }