In elastisearch I have an key that is an array I need to count the unique items in the array. Without overlapping items of other documents. So I made this:
The scripted_metric works when it isn't in the date_histogram
'aggs' => [
'groupByWeek' => [
'date_histogram' => [
'field' => 'date',
'calendar_interval' => '1w',
],
'aggs' => [
'count_unique_locations' => [
'scripted_metric' => [
'init_script' => 'state.locations = []',
'map_script' => 'state.locations.addAll(doc.unique_locations_with_error)',
'combine_script' => 'return state.locations',
'reduce_script' => "
def locations = [];
for (state in states) {
for(location in state) {
if(!locations.contains(location) && location != '' ) {
locations.add(location);
}
}
}
return locations.length;
",
],
],
],
],
],
When I run the query I get this error:
{
"error": {
"root_cause":[],
"type":"search_phase_execution_exception",
"reason":"",
"phase":"fetch",
"grouped":true,
"failed_shards":[],
"caused_by":{
"type":"script_exception",
"reason":"runtime error",
"script_stack":[
"locations = []; "," ^---- HERE"],
"script":"def locations = []; for (state in states) { for(location in state){if(!locations.contains(location) && location != '' ) {locations.add(location); }}} return locations.length;",
"lang":"painless",
"position":{
"offset":16,
"start":4,
"end":20
},
"caused_by":{
"type":"null_pointer_exception",
"reason":"cannot access method/field [iterator] from a null def reference"
}
}
},
"status":400
}
I think it has something to do with that doc now null is but I don't know why of how to fix it.
If you want to count unique array items in documents from a date range, you can use an unscripted aggregation
Mapping
PUT /unique_array_item
{
"mappings": {
"properties": {
"text": {
"type": "keyword"
},
"date": {
"type": "date"
}
}
}
}
Documents
PUT /unique_array_item/_bulk
{"create":{"_id":1}}
{"date":"2024-03-28","text":["banana","banana"]}
{"create":{"_id":2}}
{"date":"2024-03-28","text":["apple","apple","apple","apple","apple","apple","banana"]}
{"create":{"_id":3}}
{"date":"2024-03-14","text":["cherry","banana","apple"]}
{"create":{"_id":4}}
{"date":"2024-03-14","text":["pineapple"]}
Query with aggregations
GET /unique_array_item/_search?filter_path=aggregations
{
"aggs": {
"by_week": {
"date_histogram": {
"field": "date",
"calendar_interval": "1w",
"min_doc_count": 1
},
"aggs": {
"date_interval_unique_array_item_count": {
"cardinality": {
"field": "text"
}
}
}
}
}
}
Response
{
"aggregations" : {
"by_week" : {
"buckets" : [
{
"key_as_string" : "2024-03-11T00:00:00.000Z",
"key" : 1710115200000,
"doc_count" : 2,
"date_interval_unique_array_item_count" : {
"value" : 4
}
},
{
"key_as_string" : "2024-03-25T00:00:00.000Z",
"key" : 1711324800000,
"doc_count" : 2,
"date_interval_unique_array_item_count" : {
"value" : 2
}
}
]
}
}
}
Test it:
["cherry","banana","apple","pineapple"]
= 4["apple","banana"]
= 2Response is correct