Search code examples
elasticsearchelasticsearch-aggregationelasticsearch-8

how to limit the response size in terms aggregation in elastic search


Am using below elastic search query and getting the response, but how to limit the response size of each itemDetails.itemId specified in the query.

elastic search version: 8.7.1

sample elatic search doc :

{
  "itemDetails": {
    "itemId": "3076",
    "usecase": "habc",
    "usecaseId": "xyz"
  },
  "metaData": {
    "cId": "96ff54507c2d018e5c767785c705a5b2",
    "date1": "2023-09-29T12:29:54",
    "date2": "2023-09-29T12:30:09"
  }
other properties....
}

input query :

POST /index/_search?typed_keys=true
{
  "size": 0,
  "query": {
    "bool": {
      "must": [
        {
          "terms": {
            "itemDetails.itemId": [
              "3076",
              "1111",
              "2222"
            ]
          }
        },
        {
          "terms": {
            "itemDetails.usecase": [
              "habc"
            ]
          }
        },
        {
          "range": {
            "metaData.date1": {
              "lte": "2023-09-30T19:55:54.611Z",
              "gte": "2023-09-27T19:55:54.611Z"
            }
          }
        }
      ]
    }
  },
  "aggs": {
    "distinct_cIds": {
      "terms": {
        "field": "metaData.cId"
        "order": { "max_date1": "desc" }
      },
      "aggs": {
        "max_date1": { 
          "max": { "field": "metaData.date1" } 
        },
        "top_doc": {
          "top_hits": {
            "size": 1,
            "sort": [
               {"metaData.date1": "desc"},          
               {"metaData.date2": "desc"}          
            ],
            "_source": {
              "includes": [
                        "itemDetails.itemId",
                        "itemDetails.usecase",
                        "metaData.cId",
                        "metaData.date1"
              ]
            }
          }
        }
      }
    }
  }
}

expected result:if size is 2, response should contain 2 records for each itemDetails.itemId

Reference question : : this query is continuation of reference question

Attaching the response for query mentioned in the answer below:

"buckets": [
  {
    "key": "f8d79b2856e894c9e88852f65120e9e3",
    "doc_count": 1,
    "sterms#distinct_itemId": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "3076",
          "doc_count": 1,
          "top_hits#top_doc": {
            "hits": {
              "total": {
                "value": 1,
                "relation": "eq"
              },
              "max_score": null,
              "hits": [
                {}
                    ]
                  }
                }
              }
            ]
          },
          "max#max_date1": {
            "value": 1697385179000,
            "value_as_string": "2023-10-15T15:52:59.000Z"
          }
        },
        {
          "key": "153c067041341dc4d2052313d3f0891f",
          "doc_count": 1,
          "sterms#distinct_itemId": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "3076",
                "doc_count": 1,
                "top_hits#top_doc": {
                  "hits": {
                    "total": {
                      "value": 1,
                      "relation": "eq"
                    },
                    "max_score": null,
                    "hits": [
                      {}
                    ]
                  }
                }
              }
            ]
          },
          "max#max_date1": {
            "value": 1697303179000,
            "value_as_string": "2023-10-14T17:06:19.000Z"
          }
        }
...still continues, i think it is giving all response to distinct crawlIds
           

Solution

  • If you want to have two results per itemId, you need to add another terms sub-aggregation for the itemDetails.itemId field, as shown below:

    POST /index/_search?typed_keys=true
    {
      "size": 0,
      "query": {
        "bool": {
          "must": [
            {
              "terms": {
                "itemDetails.itemId": [
                  "3076",
                  "1111",
                  "2222"
                ]
              }
            },
            {
              "terms": {
                "itemDetails.usecase": [
                  "habc"
                ]
              }
            },
            {
              "range": {
                "metaData.date1": {
                  "lte": "2023-09-30T19:55:54.611Z",
                  "gte": "2023-09-27T19:55:54.611Z"
                }
              }
            }
          ]
        }
      },
      "aggs": {
        "distinct_cIds": {
          "terms": {
            "field": "metaData.cId",
            "order": {
              "max_date1": "desc"
            }
          },
          "aggs": {
            "max_date1": {
              "max": {
                "field": "metaData.date1"
              }
            },
            "distinct_itemId": {
              "terms": {
                "field": "itemDetails.itemId"
              },
              "aggs": {
                "top_doc": {
                  "top_hits": {
                    "size": 2,
                    "sort": [
                      {
                        "metaData.date1": "desc"
                      },
                      {
                        "metaData.date2": "desc"
                      }
                    ],
                    "_source": {
                      "includes": [
                        "itemDetails.itemId",
                        "itemDetails.usecase",
                        "metaData.cId",
                        "metaData.date1"
                      ]
                    }
                  }
                }
              }
            }
          }
        }
      }
    }