I want to query in my elasticsearch index similar to below query on postgres
select count(distinct(candidate_id)) from candidate_ranking cr
where badge='1'
Please consider below as sample index with few documents
{
"id": 295537,
"candidate_id": 29492,
"created_at": "2021-03-30T02:23:42.077149+00:00",
"badge": "1"
}
{
"id": 271179,
"candidate_id": 29492,
"created_at": "2021-03-30T01:19:59.803999+00:00",
"badge": "1"
}
{
"id": 247169,
"candidate_id": 29492,
"created_at": "2021-03-30T00:16:04.077245+00:00",
"badge": "1"
}
{
"id": 247156,
"candidate_id": 29332,
"created_at": "2021-03-30T00:17:04.077245+00:00",
"badge": "1"
}
{
"id": 225434,
"candidate_id": 24493,
"created_at": "2021-03-29T23:13:59.266074+00:00",
"badge": null
}
{
"id": 192999,
"candidate_id": 24493,
"created_at": "2021-03-29T22:20:24.942116+00:00",
"badge": null
}
{
"id": 177712,
"candidate_id": 24493,
"created_at": "2021-03-29T21:33:32.596613+00:00",
"badge": null
}
{
"id": 162916,
"candidate_id": 24493,
"created_at": "2021-03-29T21:05:03.985032+00:00",
"badge": null
}
{
"id": 148136,
"candidate_id": 23422,
"created_at": "2021-03-29T20:20:36.482066+00:00",
"badge": "2"
}
{
"id": 118558,
"candidate_id": 23422,
"created_at": "2021-03-27T01:34:29.628550+00:00",
"badge": "2"
}
{
"id": 133354,
"candidate_id": 23422,
"created_at": "2021-03-27T02:11:35.811420+00:00",
"badge": "2"
}
for above case my answer count should be 2, as candidate_id=29492, 29332 have badge 1. My es index contains numerous documents with same candidate_id but different created_at field
You need to use multiple combinations of aggregation - terms, top_hits, max aggregation
Then you need to use stats_bucket aggregation, to get the count of buckets
{
"size": 0,
"aggs": {
"badge_1": {
"terms": {
"field": "badge.keyword",
"include": [
"1"
],
"size": 10
},
"aggs": {
"unique_id": {
"terms": {
"field": "candidate_id",
"size": 10,
"order": {
"latestOrder": "desc"
}
},
"aggs": {
"top_doc": {
"top_hits": {
"size": 1
}
},
"latestOrder": {
"max": {
"field": "created_at"
}
}
}
},
"bucketcount": {
"stats_bucket": {
"buckets_path": "unique_id._count"
}
}
}
}
}
}
Search Result will be
"aggregations": {
"badge_1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "1",
"doc_count": 4,
"unique_id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 29492,
"doc_count": 3,
"latestOrder": {
"value": 1.617071022077E12,
"value_as_string": "2021-03-30T02:23:42.077000Z"
},
"top_doc": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "67162554",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"id": 295537,
"candidate_id": 29492,
"created_at": "2021-03-30T02:23:42.077149+00:00",
"badge": "1"
}
}
]
}
}
},
{
"key": 29332,
"doc_count": 1,
"latestOrder": {
"value": 1.617063424077E12,
"value_as_string": "2021-03-30T00:17:04.077000Z"
},
"top_doc": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "67162554",
"_type": "_doc",
"_id": "4",
"_score": 1.0,
"_source": {
"id": 247156,
"candidate_id": 29332,
"created_at": "2021-03-30T00:17:04.077245+00:00",
"badge": "1"
}
}
]
}
}
}
]
},
"bucketcount": {
"count": 2, // note this
"min": 1.0,
"max": 3.0,
"avg": 2.0,
"sum": 4.0
}
}
]
}
}