Search code examples
elasticsearchelasticsearch-aggregation

ElasticSearch aggregation with filter on array item


I want to do an aggregation request with some additional logic, but I'm not sure if it's possible and how to do it. When I have the following documents, how can I requests the locations in the array that do not have a "type" defined? I tried with sending a sub aggregation filter on type, but then also location "20" get doc_count 1 on that aggregation.

How can I do some logic on the matching aggregation item in this case?

Documents:

    //document1
{
        "locations": [{
                "code": "20",
                "names": [{
                        "languageCode": "en-GB",
                        "value": "Amsterdam"
                    }
                ]
            }, {
                "type": {
                    "id": 25,
                    "names": [{
                            "languageCode": "en-GB",
                            "value": "area"
                        }
                    ]
                },
                "code": "21",
                "names": [{
                        "languageCode": "en-GB",
                        "value": "Amsterdam-South"
                    }
                ]
            }
        ]
    }
//Document 2
    {
        "locations": [{
                "code": "22",
                "names": [{
                        "languageCode": "en-GB",
                        "value": "DenHague"
                    }, {
                        "languageCode": "nl-NL",
                        "value": "DenHaag"
                    }
                ]
            }
        ]
    }

Request:

{
    "aggs": {
        "Filter_Location": {
            "aggs": {
                "SubType": {
                    "filter": {
                        "exists": {
                            "field": "locations.type"
                        }
                    }
                }           },
            "terms": {
                "field": "locations.code.keyword"
            }
        }
    },
    "size": 0
}

Result:

{
    "aggregations": {
        "Filter_Location": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [{
                    "key": "20",
                    "doc_count": 1,
                    "SubType": {
                        "doc_count": 1
                    },
                    "groupByAccoId": {
                        "value": 1
                    }
                }, {
                    "key": "21",
                    "doc_count": 1,
                    "SubType": {
                        "doc_count": 1
                    },
                    "groupByAccoId": {
                        "value": 1
                    }
                }, {
                    "key": "22",
                    "doc_count": 1,
                    "SubType": {
                        "doc_count": 0
                    },
                    "groupByAccoId": {
                        "value": 1
                    }
                }
            ]
        }
    }
}

Expected Result:

{
    "aggregations": {
        "Filter_Location": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [{
                    "key": "20",
                    "doc_count": 1,
                    "SubType": {
                        "doc_count": 0
                    }
                }, {
                    "key": "21",
                    "doc_count": 1,
                    "SubType": {
                        "doc_count": 1
                    }
                }, {
                    "key": "22",
                    "doc_count": 1,
                    "SubType": {
                        "doc_count": 0
                    }
                }
            ]
        }
    }
}

Solution

  • In order to prevent the locations array flattening you'll need to set your index mapping as nested:

    PUT ind
    {
      "mappings": {
        "properties": {
          "locations": {
            "type": "nested"
          }
        }
      }
    }
    

    After ingesting the docs this query will fetch you the desired results:

    GET ind/_search
    {
      "size": 0,
      "aggs": {
        "Filter_Location_parent": {
          "nested": {
            "path": "locations"
          },
          "aggs": {
            "Filter_Location": {
              "terms": {
                "field": "locations.code.keyword"
              },
              "aggs": {
                "SubType": {
                  "filter": {
                    "exists": {
                      "field": "locations.type"
                    }
                  }
                }
              }
            }
          }
        }
      }
    }