I want to compute some aggregations (using Elasticsearch 6.2) on products that have criteria. All the criteria are flattened and I want to reuse some aggregation results to reaggregate by a specific criterion.
Here is my index mapping:
PUT my_index
{
"mappings" : {
"_doc" : {
"properties" : {
"contract": {
"properties": {
"products": {
"type": "nested",
"properties": {
"productKey": {
"type": "keyword"
},
"criteria": {
"type": "nested",
"properties": {
"criterionKey": {
"type": "keyword"
},
"criterionValue": {
"type": "keyword"
}
}
}
}
}
}
}
}
}
}
}
I populated my index with the following data:
POST my_index/_doc
{
"contract": {
"products": [
{
"productKey": "PK_0001",
"criteria": [
{
"criterionKey": "CK_AAAA",
"criterionValue": "above_50"
},
{
"criterionKey": "CK_AAAB",
"criterionValue": "all"
}
]
}
]
}
}
POST my_index/_doc
{
"contract": {
"products": [
{
"productKey": "PK_0001",
"criteria": [
{
"criterionKey": "CK_AAAA",
"criterionValue": "below_50"
},
{
"criterionKey": "CK_AAAB",
"criterionValue": "dep"
}
]
}
]
}
}
POST my_index/_doc
{
"contract": {
"products": [
{
"productKey": "PK_0002",
"criteria": [
{
"criterionKey": "CK_AAAA",
"criterionValue": "below_50"
},
{
"criterionKey": "CK_AAAB",
"criterionValue": "dep"
}
]
}
]
}
}
I am able to count the occurrences of all criterion values per product. To do so, I use the following aggregation request:
POST my_index/_doc/_search
{
"size": 0,
"aggs": {
"agg_by_product": {
"nested": {
"path": "contract.products"
},
"aggs": {
"agg_by_product_key": {
"terms": {
"field": "contract.products.productKey"
},
"aggs": {
"agg_by_product_crit": {
"nested": {
"path": "contract.products.criteria"
},
"aggs": {
"agg_by_product_crit_key": {
"terms": {
"field": "contract.products.criteria.criterionKey",
"include": [ "CK_AAAB", "CK_AAAA" ]
},
"aggs": {
"agg_by_product_crit_value": {
"terms": {
"field": "contract.products.criteria.criterionValue"
}
}
}
}
}
}
}
}
}
}
}
}
It returns:
{
// ...
"aggregations": {
"agg_by_product": {
"doc_count": 3,
"agg_by_product_key": {
"buckets": [
{
"key": "PK_0001",
"doc_count": 2,
"agg_by_product_crit": {
"doc_count": 8,
"agg_by_product_crit_key": {
"buckets": [
{
"key": "CK_AAAB",
"doc_count": 2,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "dep",
"doc_count": 1
},
{
"key": "all",
"doc_count": 1
}
]
}
},
{
"key": "CK_AAAA",
"doc_count": 2,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "below_50",
"doc_count": 1
},
{
"key": "above_50",
"doc_count": 1
}
]
}
}
]
}
}
},
{
"key": "PK_0002",
"doc_count": 1,
"agg_by_product_crit": {
"doc_count": 4,
"agg_by_product_crit_key": {
"buckets": [
{
"key": "CK_AAAB",
"doc_count": 1,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "dep",
"doc_count": 1
}
]
}
},
{
"key": "CK_AAAA",
"doc_count": 1,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "below_50",
"doc_count": 1
}
]
}
}
]
}
}
}
]
}
}
}
}
Now I would like to aggregate by criterion values of a specified criterion key, in order to get something like this:
{
// ...
"aggregations": {
"agg_by_product": {
"doc_count": 3,
"agg_by_product_key": {
"buckets": [
{
"key": "PK_0001",
"doc_count": 2,
"agg_by_product_crit": {
"doc_count": 8,
"agg_by_product_crit_key": {
"buckets": [
{
"key": "CK_AAAB",
"doc_count": 2,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "dep",
"doc_count": 1,
"AGG_BY_SOMETHING": {
"buckets": [
{
"key": "CK_AAAA",
"doc_count": 1,
"AGG_BY_SOMETHING_2": {
"buckets": [
{
"key": "below_50",
"doc_count": 1
}
]
}
}
]
}
},
{
"key": "all",
"doc_count": 1,
"AGG_BY_SOMETHING": {
"buckets": [
{
"key": "CK_AAAA",
"doc_count": 1,
"AGG_BY_SOMETHING_2": {
"buckets": [
{
"key": "above_50",
"doc_count": 1
}
]
}
}
]
}
}
]
}
}
]
}
}
},
{
"key": "PK_0002",
"doc_count": 1,
"agg_by_product_crit": {
"doc_count": 4,
"agg_by_product_crit_key": {
"buckets": [
{
"key": "CK_AAAB",
"doc_count": 1,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "dep",
"doc_count": 1,
"AGG_BY_SOMETHING": {
"buckets": [
{
"key": "CK_AAAA",
"doc_count": 1,
"AGG_BY_SOMETHING_2": {
"buckets": [
{
"key": "below_50",
"doc_count": 1
}
]
}
}
]
}
}
]
}
}
]
}
}
}
]
}
}
}
}
What should be the corresponding aggregation request?
Finally I found a solution using a reverse_nested
aggregation.
POST my_index/_doc/_search
{
"size": 0,
"aggs": {
"agg_by_product": {
"nested": {
"path": "contract.products"
},
"aggs": {
"agg_by_product_key": {
"terms": {
"field": "contract.products.productKey"
},
"aggs": {
"agg_by_product_crit": {
"nested": {
"path": "contract.products.criteria"
},
"aggs": {
"agg_by_product_crit_key": {
"terms": {
"field": "contract.products.criteria.criterionKey",
"include": [ "CK_AAAB" ]
},
"aggs": {
"agg_by_product_crit_value": {
"terms": {
"field": "contract.products.criteria.criterionValue"
},
"aggs": {
"agg_back_to_root": {
"reverse_nested": {},
"aggs": {
"agg_by_product_crit2": {
"nested": {
"path": "contract.products.criteria"
},
"aggs": {
"agg_by_product_crit_key2": {
"terms": {
"field": "contract.products.criteria.criterionKey",
"include": [ "CK_AAAA" ]
},
"aggs": {
"agg_by_product_crit_value2": {
"terms": {
"field": "contract.products.criteria.criterionValue"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}