Search code examples
elasticsearchkibana

How do I count the number of buckets that match a condition in Elastic Search?


I have a collection of documents describing the scores of users. The same user will have multiple scores.

My data is structured like so:

[
  { "user_id" : 3, "score" : 10 },
  { "user_id" : 1, "score" : 20 },
  { "user_id" : 2, "score" : 60 },
  { "user_id" : 1, "score" : 10 },
  ...
]

I am trying to determine each user's max score. The elastic search query that I am using looks like this:

{
  "size": 0,
  "aggs": {
    "users": {
      "terms": {
        "field": "user_id",
        "size": 9999
      },
      "aggs": {
        "max_score": {
          "max": {
            "field": "score"
          }
        }
      }
    }
  }
}

The response looks like this:

  "aggregations": {
    "users": {
      "buckets": [
        {
          "key": "1",
          "doc_count": 10,
          "max_score": {
            "value": 10
          }
        },
        {
          "key": "2",
          "doc_count": 10,
          "max_score": {
            "value": 20
          }
        },
        ...
      ]
    }
  }
}

How can I find the number of buckets where max_score > 20, max_score > 50, and max_score > 100?

Is there any way to make the response look like below?

  "aggregations": {
    "users": {
      "buckets": [
        {
          "key": "1",
          "doc_count": 10,
          "max_score": {
            "value": 10
          }
        },
        ...
      ],
      "scoresGreaterThan20": {
         "value": 10
      },
      "scoresGreaterThan50": {
         "value": 5
      },
      "scoresGreaterThan100": {
         "value": 2
      },
    }
  }
}

Solution

  • You can achieve your use case by repeating the same terms and max aggregation along with bucket selector aggregation, for different conditions you need. Adding a working example -

    Index Data:

      { "user_id" : 3, "score" : 10 }
      { "user_id" : 1, "score" : 20 }
      { "user_id" : 2, "score" : 60 }
      { "user_id" : 1, "score" : 10 }
    

    Search Query:

    You can use stats bucket aggregation to get the count of buckets after performing the bucket selector aggregation.

    {
      "size": 0,
      "aggs": {
        "user_gt20": {
          "terms": {
            "field": "user_id",
            "size": 9999
          },
          "aggs": {
            "max_score": {
              "max": {
                "field": "score"
              }
            },
            "scoresGreaterThan20": {
              "bucket_selector": {
                "buckets_path": {
                  "values": "max_score"
                },
                "script": "params.values > 20"
              }
            }
          }
        },
        "user_gt20_count": {
          "stats_bucket": {
            "buckets_path": "user_gt20._count"
          }
        },
        "user_gt50": {
          "terms": {
            "field": "user_id",
            "size": 9999
          },
          "aggs": {
            "max_score": {
              "max": {
                "field": "score"
              }
            },
            "scoresGreaterThan50": {
              "bucket_selector": {
                "buckets_path": {
                  "values": "max_score"
                },
                "script": "params.values > 50"
              }
            }
          }
        },
        "user_gt50_count": {
          "stats_bucket": {
            "buckets_path": "user_gt50._count"
          }
        },
        "user_gt100": {
          "terms": {
            "field": "user_id",
            "size": 9999
          },
          "aggs": {
            "max_score": {
              "max": {
                "field": "score"
              }
            },
            "scoresGreaterThan100": {
              "bucket_selector": {
                "buckets_path": {
                  "values": "max_score"
                },
                "script": "params.values > 100"
              }
            }
          }
        },
        "user_gt100_count": {
          "stats_bucket": {
            "buckets_path": "user_gt100._count"
          }
        }
      }
    }
    

    Search Result:

     "aggregations": {
        "user_gt100": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": []
        },
        "user_gt20": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": 2,
              "doc_count": 1,
              "max_score": {
                "value": 60.0
              }
            }
          ]
        },
        "user_gt50": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": 2,
              "doc_count": 1,
              "max_score": {
                "value": 60.0
              }
            }
          ]
        },
        "user_gt20_count": {
          "count": 1,            // note this
          "min": 1.0,
          "max": 1.0,
          "avg": 1.0,
          "sum": 1.0
        },
        "user_gt50_count": {
          "count": 1,             // note this
          "min": 1.0,
          "max": 1.0,
          "avg": 1.0,
          "sum": 1.0
        },
        "user_gt100_count": {
          "count": 0,             // note this
          "min": null,
          "max": null,
          "avg": null,
          "sum": 0.0
        }
      }