Search code examples
elasticsearchelasticsearch-aggregationelasticsearch-java-api

Elasticsearch get average


I'm trying to average aggregate data on elasticsearch. This is the structure of my data:

document 1

{
   "groupId":"TEST_01",
   "lag":10,
   "detectionDate":"2021-02-26T21:42:30.010Z",
   "tipo":"uno",
   "topics":[
      {
         "name":"topic_01",
         "valore":2
      },
      {
         "name":"topic_02",
         "valore":4
      }
   ]
}

document 2

{
   "groupId":"TEST_01",
   "lag":10,
   "detectionDate":"2021-02-26T21:42:30.010Z",
   "tipo":"uno",
   "topics":[
      {
         "name":"topic_01",
         "valore":4
      },
      {
         "name":"topic_02",
         "valore":8
      }
   ]
}

I have to create an aggregation by groupId and by topic name and on this aggregation calculate the average of the value field. But trying with the source code the result of the obtained average is wrong.

With the above data of documents one and two the expected result should be:

groupId topicName average
TEST_01 topic_01 3
TEST_01 topic_02 6
TermsAggregationBuilder aggregation = AggregationBuilders
    .terms("groupId")
    .field("groupId.keyword")
        .subAggregation(AggregationBuilders
            .terms("topicName")
            .field("topics.name.keyword").subAggregation(AggregationBuilders
                .avg("avg").field("topics.valore")));

Solution

  • First of all make sure you topics field is type "nested", because if it is "object" the topicName and valores will be flattened. This mean you will end up with a set of valores and topicNames without relation between them.

    Mappings

    {
      "test_ynsanity" : {
        "mappings" : {
          "properties" : {
            "detectionDate" : {
              "type" : "date"
            },
            "groupId" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "lag" : {
              "type" : "long"
            },
            "tipo" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "topics" : {
              "type" : "nested",
              "properties" : {
                "name" : {
                  "type" : "text",
                  "fields" : {
                    "keyword" : {
                      "type" : "keyword",
                      "ignore_above" : 256
                    }
                  }
                },
                "valore" : {
                  "type" : "long"
                }
              }
            }
          }
        }
      }
    }
    

    Ingesting data

    POST test_ynsanity/_doc
    {
       "groupId":"TEST_01",
       "lag":10,
       "detectionDate":"2021-02-26T21:42:30.010Z",
       "tipo":"uno",
       "topics":[
          {
             "name":"topic_01",
             "valore":2
          },
          {
             "name":"topic_02",
             "valore":4
          }
       ]
    }
    
    POST test_ynsanity/_doc
    {
       "groupId":"TEST_01",
       "lag":10,
       "detectionDate":"2021-02-26T21:42:30.010Z",
       "tipo":"uno",
       "topics":[
          {
             "name":"topic_01",
             "valore":4
          },
          {
             "name":"topic_02",
             "valore":8
          }
       ]
    }
    

    Query

    POST test_ynsanity/_search
    {
      "size": 0, 
      "aggs": {
        "groups": {
          "terms": {
            "field": "groupId.keyword",
            "size": 10
          },
          "aggs": {
            "topics": {
              "nested": {
                "path": "topics"
              },
              "aggs": {
                "topic_names": {
                  "terms": {
                    "field": "topics.name.keyword"
                  },
                  "aggs": {
                    "topic_avg": {
                      "avg": {
                        "field": "topics.valore"
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
    

    Response

    {
      "took" : 1,
      "timed_out" : false,
      "_shards" : {
        "total" : 1,
        "successful" : 1,
        "skipped" : 0,
        "failed" : 0
      },
      "hits" : {
        "total" : {
          "value" : 2,
          "relation" : "eq"
        },
        "max_score" : null,
        "hits" : [ ]
      },
      "aggregations" : {
        "groups" : {
          "doc_count_error_upper_bound" : 0,
          "sum_other_doc_count" : 0,
          "buckets" : [
            {
              "key" : "TEST_01",
              "doc_count" : 2,
              "topics" : {
                "doc_count" : 4,
                "topic_names" : {
                  "doc_count_error_upper_bound" : 0,
                  "sum_other_doc_count" : 0,
                  "buckets" : [
                    {
                      "key" : "topic_01",
                      "doc_count" : 2,
                      "NAME" : {
                        "value" : 3.0
                      }
                    },
                    {
                      "key" : "topic_02",
                      "doc_count" : 2,
                      "NAME" : {
                        "value" : 6.0
                      }
                    }
                  ]
                }
              }
            }
          ]
        }
      }
    }
    

    I have no access to the Java DSL right now, but the query should look something like this:

    TermsAggregationBuilder aggregation = AggregationBuilders
        .terms("groupId")
        .field("groupId.keyword")
            .subAggregation(AggregationBuilders
                .nested("agg", "topics")
                .terms("topic_names")
                .field("topics.name.keyword").subAggregation(AggregationBuilders
                    .avg("avg").field("topics.valore")));