Search code examples
elasticsearchaggregation

Elasticsearch exclude key from composite aggregation


i need to perform an exclusion of some key in a composite aggregation. here is one document of my index as an example :

{
    "end_date": 1230314400000,
    "parameter_codes": [28, 35, 30],
    "platform_code": "41012",
    "start_date": 1230314400000,
    "station_id": 7833246
}

I perform a search request allowing me to : get a result for each platform_code/parameter_codes couple, plus getting the station_id correspounding plus a paging on the bucket.

here is the request :

{
    "size": 0,
    "query": {
        "match_all": {
            "boost": 1.0
        }
    },
    "_source": false,
    "aggregations": {
        "compositeAgg": {
            "composite": {
                "size": 10,
                "sources": [{
                        "platform_code": {
                            "terms": {
                                "field": "platform_code",
                                "missing_bucket": false,
                                "order": "asc"
                            }
                        }
                    }, {
                        "parameter_codes": {
                            "terms": {
                                "field": "parameter_codes",
                                "missing_bucket": false,
                                "order": "asc"
                            }
                        }
                    }]
            },
            "aggregations": {
                "aggstation_id": {
                    "terms": {
                        "field": "station_id",
                        "size": 2147483647,
                        "min_doc_count": 1,
                        "shard_min_doc_count": 0,
                        "show_term_doc_count_error": false,
                        "order": {
                            "_key": "asc"
                        }
                    }
                },
                "pipe": {
                    "bucket_sort": {
                        "sort": [{
                                "_key": {
                                    "order": "asc"
                                }
                            }],
                        "from": 0,
                        "size": 10,
                        "gap_policy": "SKIP"
                    }
                }
            }
        }
    }
}

this request give me the following results :

{
    "took": 3,
    "timed_out": false,
    "_shards": {
        "total": 8,
        "successful": 8,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 3,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "composite#compositeAgg": {
            "after_key": {
                "platform_code": "41012",
                "parameter_codes": 60
            },
            "buckets": [{
                    "key": {
                        "platform_code": "41012",
                        "parameter_codes": 28
                    },
                    "doc_count": 1,
                    "lterms#aggstation_id": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [{
                                "key": 7833246,
                                "doc_count": 1
                            }]
                    }
                }, {
                    "key": {
                        "platform_code": "41012",
                        "parameter_codes": 30
                    },
                    "doc_count": 2,
                    "lterms#aggstation_id": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [{
                                "key": 7833246,
                                "doc_count": 1
                            }, {
                                "key": 12787501,
                                "doc_count": 1
                            }]
                    }
                }, {
                    "key": {
                        "platform_code": "41012",
                        "parameter_codes": 35
                    },
                    "doc_count": 2,
                    "lterms#aggstation_id": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [{
                                "key": 7833246,
                                "doc_count": 1
                            }, {
                                "key": 12787501,
                                "doc_count": 1
                            }]
                    }
                }]
        }
    }
}

this works very well but i need to exclude one or many parameter_code. For example by excluding '35', i want only the keys :

{
   "platform_code": "41012",
   "parameter_codes": 28
}

and

{
   "platform_code": "41012",
   "parameter_codes": 30
}

i tried, many options but can not succeed to perform this. Can anybody know how can i do that?


Solution

  • A script query can be used in composite source to return only specific values of array.

    {
      "size": 0,
      "query": {
        "match_all": {
          "boost": 1
        }
      },
      "_source": false,
      "aggregations": {
        "compositeAgg": {
          "composite": {
            "size": 10,
            "sources": [
              {
                "platform_code": {
                  "terms": {
                    "field": "platform_code.keyword",
                    "missing_bucket": false,
                    "order": "asc"
                  }
                }
              },
              {
                "parameter_codes": {
                  "terms": {
                    "script": {
                      "source": """
                       def arr=[];
                       for (item in doc['parameter_codes']) {
                           if(item !=35)
                           {
                              arr.add(item);
                           }
                        }
                      return arr"""
                    }
                  }
                }
              }
            ]
          },
          "aggregations": {
            "aggstation_id": {
              "terms": {
                "field": "station_id",
                "size": 2147483647,
                "min_doc_count": 1,
                "shard_min_doc_count": 0,
                "show_term_doc_count_error": false,
                "order": {
                  "_key": "asc"
                }
              }
            },
            "pipe": {
              "bucket_sort": {
                "sort": [
                  {
                    "_key": {
                      "order": "asc"
                    }
                  }
                ],
                "from": 0,
                "size": 10,
                "gap_policy": "SKIP"
              }
            }
          }
        }
      }
    }