Search code examples
elasticsearchelastic-stack

Ordering the Buckets in ElasticSearch on the basis of doc_count


I am new to ElasticSearch and I am currently trying to write a ElasticSearch Query involving Aggregations that will fetch me the top 5 buckets involving certain fields (The number of fields involved in the query is dynamic it can be anywhere between 2 fields and 5 fields).

The problem I am facing is that the Team's Current ElasticSearch's Version doesn't support multi-terms, and due to this I reverted to Composite Aggregations with top_hits as a sub-aggregator so that I can get the top 5 buckets. However I am not getting the result in a Sorted Order on the basis of doc_count / hits.

Request:

{
  "query":{
    "bool":{
      "must":[{
          "exists":{"field":"uuid"}
        },{
          "query_string":{
            "query":"*","lowercase_expanded_terms":false
          }
        }]
    }
  },
  "aggs":{
    "test_aggregation":{
      "composite":{
        "sources":[{
          "zipCode":{"terms":{"field":"zipCode"}},
          "routeCode":{"terms":{"field":"routeCode"}}
        }]
      }
    },
    "aggs":{
      "test_aggregation_hits":{
        "top_hits":{
          "size":5,
          "_source":{
            "includes":["uuid"]
          }
        }
      }
    }
  },
  "size":"0"
}

Response:

{
    "took": 310,
    "timed_out": false,
    "num_reduce_phases": 2,
    "_shards": {
        "total": 140,
        "successful": 140,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": 8400,
        "max_score": 0.0,
        "hits": []
    },
    "aggregations": {
        "test_aggregation": {
            "after_key": {
                "zipCode": "someRandomZipCode",
                "routeCode": "someRandomRouteCode"
            },
            "buckets": [{
                "key": {
                    "zipCode": "someRandomValue-1",
                    "routeCode": "someRandomRouteCode-1"
                },
                "doc_count": 36,
                "test_aggregation_hits": {
                    "hits": {
                        "total": 36,
                        "max_score": 11.5650015,
                        "hits": [{
                          .... // some data
                        }]
                    }
                 }
            }, {
                "key": {
                    "zipCode": "someRandomValue-2",
                    "routeCode": "someRandomRouteCode-2"
                },
                "doc_count": 40,
                "test_aggregation_hits": {
                    "hits": {
                        "total": 40,
                        "max_score": 11.5658015,
                        "hits": [{
                          .... // some data
                        }]
                    }
                 }
            }, {
                "key": {
                    "zipCode": "someRandomValue-3",
                    "routeCode": "someRandomRouteCode-3"
                },
                "doc_count": 13,
                "test_aggregation_hits": {
                    "hits": {
                        "total": 13,
                        "max_score": 11.5750015,
                        "hits": [{
                          .... // some data
                        }]
                    }
                 }
            }
            ....  
            ....
          ]
        }
      }
}
  1. I would like to know if the approach that I am taking is correct or not?
  2. If the approach is not correct, then how should I go about solving this problem?
  3. If the approach is correct, then what am I doing wrong and how can I order the buckets on the basis of doc_count?

Solution

  • Was able to solve this by using the Bucket Sorting ElasticSearch Aggregation.

    The Final Query looked something like this.

    {
      "query":{
        "bool":{
          "must":[{
              "exists":{"field":"uuid"}
            },{
              "query_string":{
                "query":"*","lowercase_expanded_terms":false
              }
            }]
        }
      },
      "aggs":{
        "test_aggregation":{
          "composite":{
            "sources":[{
              "zipCode":{"terms":{"field":"zipCode"}},
              "routeCode":{"terms":{"field":"routeCode"}}
            }],
            "size":"100"
          }
        },
        "aggs":{
          "test_aggregation_hits":{
            "top_hits":{
              "size":"25",
              "_source":{
                "includes":["uuid"]
              }
            }
          },
          "sort_buckets": {
             "size":"10",
             "sort": [
                {"_count": { "order": "desc" } }
             ]
          }
        }
      },
      "size":"0"
    }