Am using below elastic search query and getting the response, but how to limit the response size of each itemDetails.itemId specified in the query.
elastic search version: 8.7.1
sample elatic search doc :
{
"itemDetails": {
"itemId": "3076",
"usecase": "habc",
"usecaseId": "xyz"
},
"metaData": {
"cId": "96ff54507c2d018e5c767785c705a5b2",
"date1": "2023-09-29T12:29:54",
"date2": "2023-09-29T12:30:09"
}
other properties....
}
input query :
POST /index/_search?typed_keys=true
{
"size": 0,
"query": {
"bool": {
"must": [
{
"terms": {
"itemDetails.itemId": [
"3076",
"1111",
"2222"
]
}
},
{
"terms": {
"itemDetails.usecase": [
"habc"
]
}
},
{
"range": {
"metaData.date1": {
"lte": "2023-09-30T19:55:54.611Z",
"gte": "2023-09-27T19:55:54.611Z"
}
}
}
]
}
},
"aggs": {
"distinct_cIds": {
"terms": {
"field": "metaData.cId"
"order": { "max_date1": "desc" }
},
"aggs": {
"max_date1": {
"max": { "field": "metaData.date1" }
},
"top_doc": {
"top_hits": {
"size": 1,
"sort": [
{"metaData.date1": "desc"},
{"metaData.date2": "desc"}
],
"_source": {
"includes": [
"itemDetails.itemId",
"itemDetails.usecase",
"metaData.cId",
"metaData.date1"
]
}
}
}
}
}
}
}
expected result:if size is 2, response should contain 2 records for each itemDetails.itemId
Reference question : : this query is continuation of reference question
Attaching the response for query mentioned in the answer below:
"buckets": [
{
"key": "f8d79b2856e894c9e88852f65120e9e3",
"doc_count": 1,
"sterms#distinct_itemId": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "3076",
"doc_count": 1,
"top_hits#top_doc": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": null,
"hits": [
{}
]
}
}
}
]
},
"max#max_date1": {
"value": 1697385179000,
"value_as_string": "2023-10-15T15:52:59.000Z"
}
},
{
"key": "153c067041341dc4d2052313d3f0891f",
"doc_count": 1,
"sterms#distinct_itemId": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "3076",
"doc_count": 1,
"top_hits#top_doc": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": null,
"hits": [
{}
]
}
}
}
]
},
"max#max_date1": {
"value": 1697303179000,
"value_as_string": "2023-10-14T17:06:19.000Z"
}
}
...still continues, i think it is giving all response to distinct crawlIds
If you want to have two results per itemId
, you need to add another terms
sub-aggregation for the itemDetails.itemId
field, as shown below:
POST /index/_search?typed_keys=true
{
"size": 0,
"query": {
"bool": {
"must": [
{
"terms": {
"itemDetails.itemId": [
"3076",
"1111",
"2222"
]
}
},
{
"terms": {
"itemDetails.usecase": [
"habc"
]
}
},
{
"range": {
"metaData.date1": {
"lte": "2023-09-30T19:55:54.611Z",
"gte": "2023-09-27T19:55:54.611Z"
}
}
}
]
}
},
"aggs": {
"distinct_cIds": {
"terms": {
"field": "metaData.cId",
"order": {
"max_date1": "desc"
}
},
"aggs": {
"max_date1": {
"max": {
"field": "metaData.date1"
}
},
"distinct_itemId": {
"terms": {
"field": "itemDetails.itemId"
},
"aggs": {
"top_doc": {
"top_hits": {
"size": 2,
"sort": [
{
"metaData.date1": "desc"
},
{
"metaData.date2": "desc"
}
],
"_source": {
"includes": [
"itemDetails.itemId",
"itemDetails.usecase",
"metaData.cId",
"metaData.date1"
]
}
}
}
}
}
}
}
}
}