I have a collection of documents describing the scores of users. The same user will have multiple scores.
My data is structured like so:
[
{ "user_id" : 3, "score" : 10 },
{ "user_id" : 1, "score" : 20 },
{ "user_id" : 2, "score" : 60 },
{ "user_id" : 1, "score" : 10 },
...
]
I am trying to determine each user's max score. The elastic search query that I am using looks like this:
{
"size": 0,
"aggs": {
"users": {
"terms": {
"field": "user_id",
"size": 9999
},
"aggs": {
"max_score": {
"max": {
"field": "score"
}
}
}
}
}
}
The response looks like this:
"aggregations": {
"users": {
"buckets": [
{
"key": "1",
"doc_count": 10,
"max_score": {
"value": 10
}
},
{
"key": "2",
"doc_count": 10,
"max_score": {
"value": 20
}
},
...
]
}
}
}
How can I find the number of buckets where max_score > 20
, max_score > 50
, and max_score > 100
?
Is there any way to make the response look like below?
"aggregations": {
"users": {
"buckets": [
{
"key": "1",
"doc_count": 10,
"max_score": {
"value": 10
}
},
...
],
"scoresGreaterThan20": {
"value": 10
},
"scoresGreaterThan50": {
"value": 5
},
"scoresGreaterThan100": {
"value": 2
},
}
}
}
You can achieve your use case by repeating the same terms and max aggregation along with bucket selector aggregation, for different conditions you need. Adding a working example -
Index Data:
{ "user_id" : 3, "score" : 10 }
{ "user_id" : 1, "score" : 20 }
{ "user_id" : 2, "score" : 60 }
{ "user_id" : 1, "score" : 10 }
Search Query:
You can use stats bucket aggregation to get the count of buckets after performing the bucket selector aggregation.
{
"size": 0,
"aggs": {
"user_gt20": {
"terms": {
"field": "user_id",
"size": 9999
},
"aggs": {
"max_score": {
"max": {
"field": "score"
}
},
"scoresGreaterThan20": {
"bucket_selector": {
"buckets_path": {
"values": "max_score"
},
"script": "params.values > 20"
}
}
}
},
"user_gt20_count": {
"stats_bucket": {
"buckets_path": "user_gt20._count"
}
},
"user_gt50": {
"terms": {
"field": "user_id",
"size": 9999
},
"aggs": {
"max_score": {
"max": {
"field": "score"
}
},
"scoresGreaterThan50": {
"bucket_selector": {
"buckets_path": {
"values": "max_score"
},
"script": "params.values > 50"
}
}
}
},
"user_gt50_count": {
"stats_bucket": {
"buckets_path": "user_gt50._count"
}
},
"user_gt100": {
"terms": {
"field": "user_id",
"size": 9999
},
"aggs": {
"max_score": {
"max": {
"field": "score"
}
},
"scoresGreaterThan100": {
"bucket_selector": {
"buckets_path": {
"values": "max_score"
},
"script": "params.values > 100"
}
}
}
},
"user_gt100_count": {
"stats_bucket": {
"buckets_path": "user_gt100._count"
}
}
}
}
Search Result:
"aggregations": {
"user_gt100": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
},
"user_gt20": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 2,
"doc_count": 1,
"max_score": {
"value": 60.0
}
}
]
},
"user_gt50": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 2,
"doc_count": 1,
"max_score": {
"value": 60.0
}
}
]
},
"user_gt20_count": {
"count": 1, // note this
"min": 1.0,
"max": 1.0,
"avg": 1.0,
"sum": 1.0
},
"user_gt50_count": {
"count": 1, // note this
"min": 1.0,
"max": 1.0,
"avg": 1.0,
"sum": 1.0
},
"user_gt100_count": {
"count": 0, // note this
"min": null,
"max": null,
"avg": null,
"sum": 0.0
}
}