Search code examples
elasticsearchelasticsearch-aggregation

Aggregation by field and then sum value of another field


I need to aggregate by one field and then, in the same aggregation, calculate sum with another field value. But when execute the query the first aggregation is ok but the sum is always 0.

Example index:


{
    "mappings": {
        "transaction": {
            "dynamic": "strict",
            "properties": {
                "transaction": {
                    "properties": {
                        "amount": {
                            "type": "double"
                        }
                    }
                },
                "infrastructureElement": {
                    "type": "nested",
                    "properties": {
                        "infrastructureElementSubType": {
                            "type": "keyword"
                        }
                    }
                }
            }
        }
    }
}

In the query below, aggregate by infrastructureElement.infrastructureElementSubType and then sum the value transactionPurchase.amount in another aggs:

{
    "aggs": {
        "group_by_infrastructure_element": {
            "nested": {
                "path": "infrastructureElement"
            },
            "aggs": {
                "group_by_ie_subtype": {
                    "terms": {
                        "field": "infrastructureElement.infrastructureElementSubType"
                    },
                    "aggs": {
                        "revenue": {
                            "sum": {
                                "field": "transactionPurchase.amount"
                            }
                        }
                    }
                }
            }
        }
    }
}

Current result:

{
    "took": 6,
    "timed_out": false,
    "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
    ...
    },
    "aggregations": {
        "group_by_infrastructure_element": {
            "doc_count": 365,
            "group_by_ie_subtype": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                    {
                        "key": "MOBILE",
                        "doc_count": 1,
                        "revenue": {
                            "value": 0
                        }
                    }
                ]
            }
        }
    }
}

Thanks in advance!


Solution

  • You would need to make use of Reverse Nested Aggregation and then chain in the Sum Aggregation for calculating what you are looking for:

    Aggregation Query:

    POST <your_index_name>/_search
    {  
       "size":0,
       "aggs":{  
          "myterms":{  
             "nested":{  
                "path":"infrastructureElement"
             },
             "aggs":{  
                "myterms":{  
                   "terms":{  
                      "field":"infrastructureElement.infrastructureElementSubType",
                      "size":10
                   },
                   "aggs":{  
                      "reverse":{  
                         "reverse_nested":{},
                         "aggs":{  
                            "revenue":{  
                               "sum":{  
                                  "field":"transaction.amount"
                               }
                            }
                         }
                      }
                   }
                }
             }
          }
       }
    }
    

    Also notice how your mapping is structured, the field transaction is not a Nested Type but a simple Object Type. Now, if you are inside the nested aggregation, you would need to traverse back to the root and then perform the metric aggregation for e.g. sum in order to calculate the amount

    Note the below response for the sample documents I've created.

    POST someaggregation/_doc/1
    {
      "transaction":{
        "amount": 100
      },
      "infrastructureElement": [
        {
          "infrastructureElementSubType": "type1"
        },
        {
          "infrastructureElementSubType": "type2"
        }
      ]
    }
    
    POST someaggregation/_doc/2
    {
      "transaction":{
        "amount": 100
      },
      "infrastructureElement": [
        {
          "infrastructureElementSubType": "type1"
        },
        {
          "infrastructureElementSubType": "type2"
        }
      ]
    }
    
    POST someaggregation/_doc/3
    {
      "transaction":{
        "amount": 100
      },
      "infrastructureElement": [
        {
          "infrastructureElementSubType": "type3"
        },
        {
          "infrastructureElementSubType": "type4"
        }
      ]
    }
    

    Response:

    {
      "took" : 519,
      "timed_out" : false,
      "_shards" : {
        "total" : 1,
        "successful" : 1,
        "skipped" : 0,
        "failed" : 0
      },
      "hits" : {
        "total" : {
          "value" : 3,
          "relation" : "eq"
        },
        "max_score" : null,
        "hits" : [ ]
      },
      "aggregations" : {
        "myterms" : {
          "doc_count" : 6,
          "myterms" : {
            "doc_count_error_upper_bound" : 0,
            "sum_other_doc_count" : 0,
            "buckets" : [
              {
                "key" : "type1",
                "doc_count" : 2,
                "reverse" : {
                  "doc_count" : 2,
                  "revenue" : {
                    "value" : 200.0
                  }
                }
              },
              {
                "key" : "type2",
                "doc_count" : 2,
                "reverse" : {
                  "doc_count" : 2,
                  "revenue" : {
                    "value" : 200.0
                  }
                }
              },
              {
                "key" : "type3",
                "doc_count" : 1,
                "reverse" : {
                  "doc_count" : 1,
                  "revenue" : {
                    "value" : 100.0
                  }
                }
              },
              {
                "key" : "type4",
                "doc_count" : 1,
                "reverse" : {
                  "doc_count" : 1,
                  "revenue" : {
                    "value" : 100.0
                  }
                }
              }
            ]
          }
        }
      }
    }
    

    Hope this helps!

    Feel free to upvote and/or accept this answer if you think this solves your problem :)