Search code examples
elasticsearchelasticsearch-aggregation

elasticsearch query for count of distinct field value with where condition on another field


I want to query in my elasticsearch index similar to below query on postgres

    select count(distinct(candidate_id)) from candidate_ranking cr 
where badge='1'

Please consider below as sample index with few documents

{
  "id": 295537,
  "candidate_id": 29492,
  "created_at": "2021-03-30T02:23:42.077149+00:00",
  "badge": "1"
}
{
  "id": 271179,
  "candidate_id": 29492,
  "created_at": "2021-03-30T01:19:59.803999+00:00",
  "badge": "1"
}
{
  "id": 247169,
  "candidate_id": 29492,
  "created_at": "2021-03-30T00:16:04.077245+00:00",
  "badge": "1"
}
{
  "id": 247156,
  "candidate_id": 29332,
  "created_at": "2021-03-30T00:17:04.077245+00:00",
  "badge": "1"
}
{
  "id": 225434,
  "candidate_id": 24493,
  "created_at": "2021-03-29T23:13:59.266074+00:00",
  "badge": null
}
{
  "id": 192999,
  "candidate_id": 24493,
  "created_at": "2021-03-29T22:20:24.942116+00:00",
  "badge": null
}
{
  "id": 177712,
  "candidate_id": 24493,
  "created_at": "2021-03-29T21:33:32.596613+00:00",
  "badge": null
}
{
  "id": 162916,
  "candidate_id": 24493,
  "created_at": "2021-03-29T21:05:03.985032+00:00",
  "badge": null
}
{
  "id": 148136,
  "candidate_id": 23422,
  "created_at": "2021-03-29T20:20:36.482066+00:00",
  "badge": "2"
}
{
  "id": 118558,
  "candidate_id": 23422,
  "created_at": "2021-03-27T01:34:29.628550+00:00",
  "badge": "2"
}
{
  "id": 133354,
  "candidate_id": 23422,
  "created_at": "2021-03-27T02:11:35.811420+00:00",
  "badge": "2"
}

for above case my answer count should be 2, as candidate_id=29492, 29332 have badge 1. My es index contains numerous documents with same candidate_id but different created_at field


Solution

  • You need to use multiple combinations of aggregation - terms, top_hits, max aggregation

    Then you need to use stats_bucket aggregation, to get the count of buckets

    {
      "size": 0,
      "aggs": {
        "badge_1": {
          "terms": {
            "field": "badge.keyword",
            "include": [
              "1"
            ],
            "size": 10
          },
          "aggs": {
            "unique_id": {
              "terms": {
                "field": "candidate_id",
                "size": 10,
                "order": {
                  "latestOrder": "desc"
                }
              },
              "aggs": {
                "top_doc": {
                  "top_hits": {
                    "size": 1
                  }
                },
                "latestOrder": {
                  "max": {
                    "field": "created_at"
                  }
                }
              }
            },
            "bucketcount": {
              "stats_bucket": {
                "buckets_path": "unique_id._count"
              }
            }
          }
        }
      }
    }
    

    Search Result will be

        "aggregations": {
        "badge_1": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": "1",
              "doc_count": 4,
              "unique_id": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                  {
                    "key": 29492,
                    "doc_count": 3,
                    "latestOrder": {
                      "value": 1.617071022077E12,
                      "value_as_string": "2021-03-30T02:23:42.077000Z"
                    },
                    "top_doc": {
                      "hits": {
                        "total": {
                          "value": 3,
                          "relation": "eq"
                        },
                        "max_score": 1.0,
                        "hits": [
                          {
                            "_index": "67162554",
                            "_type": "_doc",
                            "_id": "1",
                            "_score": 1.0,
                            "_source": {
                              "id": 295537,
                              "candidate_id": 29492,
                              "created_at": "2021-03-30T02:23:42.077149+00:00",
                              "badge": "1"
                            }
                          }
                        ]
                      }
                    }
                  },
                  {
                    "key": 29332,
                    "doc_count": 1,
                    "latestOrder": {
                      "value": 1.617063424077E12,
                      "value_as_string": "2021-03-30T00:17:04.077000Z"
                    },
                    "top_doc": {
                      "hits": {
                        "total": {
                          "value": 1,
                          "relation": "eq"
                        },
                        "max_score": 1.0,
                        "hits": [
                          {
                            "_index": "67162554",
                            "_type": "_doc",
                            "_id": "4",
                            "_score": 1.0,
                            "_source": {
                              "id": 247156,
                              "candidate_id": 29332,
                              "created_at": "2021-03-30T00:17:04.077245+00:00",
                              "badge": "1"
                            }
                          }
                        ]
                      }
                    }
                  }
                ]
              },
              "bucketcount": {
                "count": 2,        // note this
                "min": 1.0,
                "max": 3.0,
                "avg": 2.0,
                "sum": 4.0
              }
            }
          ]
        }
      }