elasticsearch query for count of distinct field value with where condition on another field

I want to query in my elasticsearch index similar to below query on postgres

    select count(distinct(candidate_id)) from candidate_ranking cr 
where badge='1'

Please consider below as sample index with few documents

{
  "id": 295537,
  "candidate_id": 29492,
  "created_at": "2021-03-30T02:23:42.077149+00:00",
  "badge": "1"
}
{
  "id": 271179,
  "candidate_id": 29492,
  "created_at": "2021-03-30T01:19:59.803999+00:00",
  "badge": "1"
}
{
  "id": 247169,
  "candidate_id": 29492,
  "created_at": "2021-03-30T00:16:04.077245+00:00",
  "badge": "1"
}
{
  "id": 247156,
  "candidate_id": 29332,
  "created_at": "2021-03-30T00:17:04.077245+00:00",
  "badge": "1"
}
{
  "id": 225434,
  "candidate_id": 24493,
  "created_at": "2021-03-29T23:13:59.266074+00:00",
  "badge": null
}
{
  "id": 192999,
  "candidate_id": 24493,
  "created_at": "2021-03-29T22:20:24.942116+00:00",
  "badge": null
}
{
  "id": 177712,
  "candidate_id": 24493,
  "created_at": "2021-03-29T21:33:32.596613+00:00",
  "badge": null
}
{
  "id": 162916,
  "candidate_id": 24493,
  "created_at": "2021-03-29T21:05:03.985032+00:00",
  "badge": null
}
{
  "id": 148136,
  "candidate_id": 23422,
  "created_at": "2021-03-29T20:20:36.482066+00:00",
  "badge": "2"
}
{
  "id": 118558,
  "candidate_id": 23422,
  "created_at": "2021-03-27T01:34:29.628550+00:00",
  "badge": "2"
}
{
  "id": 133354,
  "candidate_id": 23422,
  "created_at": "2021-03-27T02:11:35.811420+00:00",
  "badge": "2"
}

for above case my answer count should be 2, as candidate_id=29492, 29332 have badge 1. My es index contains numerous documents with same candidate_id but different created_at field

Solution

You need to use multiple combinations of aggregation - terms, top_hits, max aggregation

Then you need to use stats_bucket aggregation, to get the count of buckets

{
  "size": 0,
  "aggs": {
    "badge_1": {
      "terms": {
        "field": "badge.keyword",
        "include": [
          "1"
        ],
        "size": 10
      },
      "aggs": {
        "unique_id": {
          "terms": {
            "field": "candidate_id",
            "size": 10,
            "order": {
              "latestOrder": "desc"
            }
          },
          "aggs": {
            "top_doc": {
              "top_hits": {
                "size": 1
              }
            },
            "latestOrder": {
              "max": {
                "field": "created_at"
              }
            }
          }
        },
        "bucketcount": {
          "stats_bucket": {
            "buckets_path": "unique_id._count"
          }
        }
      }
    }
  }
}

Search Result will be

    "aggregations": {
    "badge_1": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "1",
          "doc_count": 4,
          "unique_id": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": 29492,
                "doc_count": 3,
                "latestOrder": {
                  "value": 1.617071022077E12,
                  "value_as_string": "2021-03-30T02:23:42.077000Z"
                },
                "top_doc": {
                  "hits": {
                    "total": {
                      "value": 3,
                      "relation": "eq"
                    },
                    "max_score": 1.0,
                    "hits": [
                      {
                        "_index": "67162554",
                        "_type": "_doc",
                        "_id": "1",
                        "_score": 1.0,
                        "_source": {
                          "id": 295537,
                          "candidate_id": 29492,
                          "created_at": "2021-03-30T02:23:42.077149+00:00",
                          "badge": "1"
                        }
                      }
                    ]
                  }
                }
              },
              {
                "key": 29332,
                "doc_count": 1,
                "latestOrder": {
                  "value": 1.617063424077E12,
                  "value_as_string": "2021-03-30T00:17:04.077000Z"
                },
                "top_doc": {
                  "hits": {
                    "total": {
                      "value": 1,
                      "relation": "eq"
                    },
                    "max_score": 1.0,
                    "hits": [
                      {
                        "_index": "67162554",
                        "_type": "_doc",
                        "_id": "4",
                        "_score": 1.0,
                        "_source": {
                          "id": 247156,
                          "candidate_id": 29332,
                          "created_at": "2021-03-30T00:17:04.077245+00:00",
                          "badge": "1"
                        }
                      }
                    ]
                  }
                }
              }
            ]
          },
          "bucketcount": {
            "count": 2,        // note this
            "min": 1.0,
            "max": 3.0,
            "avg": 2.0,
            "sum": 4.0
          }
        }
      ]
    }
  }