Search code examples
elasticsearchkibana

Elasticsearch Aggregation not giving desirable otput


I have an object, which contains mills of drugs given to a patient. More than one drug can be administered to a patient. I am trying to sum the total individual mills of drugs administered to a patient within a specified time.

Here is a sample of my Object.

{
    "_uid" : "953a4af9901847c3b206dac7cee5b298",
    "_fullName" : "Test Patient",
    "_created": "2021-12-18 22:48:45",
    "_treatment" : {
        "_created" : "2021-12-18 22:48:45",
        "_drugs" : [
            {
                "_name" : "Another Tablet",
                "_uid" : "5a09f6a9c415465a84a8661f35ac621d",
                "_mils" : "500"
              },
              {
                "_name" : "Test Drug",
                "_uid" : "36c7fcf048c743078ca4c80d187d86c9",
                "_mils" : "300"
           }
        ]
    }
}

In Kibana, i did the following

{
  "query": {
    "bool": {
      "filter": {
         "range": {
             "_created": {
                 "gte": "2021-01-01 00:00:00",
                 "lte": "2021-12-31 00:00:00"
             }
         }
      }
    }
  },
  "size": 0,
  "aggs" : {
      "men" : {
        "terms": {
          "field": "_treatment._drugs._name.keyword"
        },
        "aggs": {
          "milsUsed": { "sum": { "field": "_treatment._drugs._mils" } }
        }
      }
    }
}

Presently kibana is adding all the mills together and not separating them. Below is the response from Kibana.

"aggregations" : {
    "men" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "Another Tablet",
          "doc_count" : 2,
          "milsUsed" : {
            "value" : 1100.0
          }
        },
        {
          "key" : "Test Drug",
          "doc_count" : 2,
          "milsUsed" : {
            "value" : 1100.0
          }
        }
      ]
    }
  }

Expected response i am looking to get

"aggregations" : {
    "men" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "Another Tablet",
          "doc_count" : 1,
          "milsUsed" : {
            "value" : 500.0
          }
        },
        {
          "key" : "Test Drug",
          "doc_count" : 1,
          "milsUsed" : {
            "value" : 300.0
          }
        }
      ]
    }
  }

Index mapping

{
    "patients" : {
        "mappings" : {
            "properties" : {
                "_fullName" : {
                    "type" : "text",
                    "fields" : {
                        "keyword" : {
                            "type" : "keyword",
                            "ignore_above" : 256
                        }
                    }
                },
                "_treatment" : {
                    "properties": {
                        "_drugs": {
                            "properties": {
                                "_mils" : {
                                    "type" : "long"
                                },
                                "_name" : {
                                    "type" : "text",
                                    "fields" : {
                                        "keyword" : {
                                            "type" : "keyword",
                                             "ignore_above" : 256
                                        }
                                    }
                                },,
                                "_uid" : {
                                    "type" : "text",
                                    "fields" : {
                                        "keyword" : {
                                            "type" : "keyword",
                                             "ignore_above" : 256
                                        }
                                    }
                                },
                            }
                        }
                    }
                }
            }
        }
    }
}

Solution

  • TLDR;

    Have you heard about nested fields in elastic search ? Internally Elastic search flatten nested object in your documents.

    So if you have

    {
      "group" : "fans",
      "user" : [ 
        {
          "first" : "John",
          "last" :  "Smith"
        },
        {
          "first" : "Alice",
          "last" :  "White"
        }
      ]
    }
    

    The internal representation of the json documents in the index will be

    {
      "group" :        "fans",
      "user.first" : [ "alice", "john" ],
      "user.last" :  [ "smith", "white" ]
    }
    

    In you case when you perform the aggregation it does the same. And all of a sudden, because of the flattening operation. You lose the "relationship" between _drugs._name et _drugs._mils

    Below is a pet project that solve you use example.

    Example

    1. Set Up
    PUT /so_agg_sum_drugs/
    {
      "mappings": {
        "properties": {
          "_fullName": {
            "type": "keyword"
          },
          "_treatment": {
            "properties": {
              "_drugs": {
                "type": "nested",   <- nested field type !!
                "properties": {
                  "_mils": {
                    "type": "long"
                  },
                  "_name": {
                    "type": "keyword"
                  },
                  "_uid": {
                    "type": "keyword"
                  }
                }
              }
            }
          }
        }
      }
    }
    
    POST /so_agg_sum_drugs/_doc
    {
      "_fullName" : "Test Patient",
      "_treatment" : {
        "_drugs" : [
          {
              "_name" : "Another Tablet",
              "_uid" : "5a09f6a9c415465a84a8661f35ac621d",
              "_mils" : "500"
            },
            {
              "_name" : "Test Drug",
              "_uid" : "36c7fcf048c743078ca4c80d187d86c9",
              "_mils" : "300"
          }
        ]
      }
    }
    
    POST /so_agg_sum_drugs/_doc
    {
      "_fullName" : "Test Patient 2",
      "_treatment" : {
        "_drugs" : [
          {
            "_name" : "Another Tablet",
            "_uid" : "5a09f6a9c415465a84a8661f35ac621d",
            "_mils" : "500"
          },
          {
            "_name" : "Test Drug",
            "_uid" : "36c7fcf048c743078ca4c80d187d86c9",
            "_mils" : "400"
          },
          {
            "_name" : "Test Drug",
            "_uid" : "36c7fcf048c743078ca4c80d187d86c9",
            "_mils" : "300"
          }
        ]
      }
    }
    
    
    1. Solution

    Your aggregation was mostly right, except for the nested field type. You can find some documentation about aggregation on nested fields here. [doc]

    GET /so_agg_sum_drugs/_search
    {
      "size": 0,
      "query": {
        "match_all": {}
      },
      "aggs": {
        "patients": {
          "terms": {
            "field": "_fullName"
          },
          "aggs": {
            "drugs": {
              "nested": {
                "path": "_treatment._drugs". <- wrap you agg on the drugs objects in a nested type agg.
              },
              "aggs": {
                "per_drug": {
                  "terms": {
                    "field": "_treatment._drugs._name"
                  },
                  "aggs": {
                    "quantity": {
                      "sum": {
                        "field": "_treatment._drugs._mils"
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
    
    {
      "took" : 350,
      "timed_out" : false,
      "_shards" : {
        "total" : 1,
        "successful" : 1,
        "skipped" : 0,
        "failed" : 0
      },
      "hits" : {
        "total" : {
          "value" : 2,
          "relation" : "eq"
        },
        "max_score" : null,
        "hits" : [ ]
      },
      "aggregations" : {
        "patients" : {
          "doc_count_error_upper_bound" : 0,
          "sum_other_doc_count" : 0,
          "buckets" : [
            {
              "key" : "Test Patient",
              "doc_count" : 1,
              "drugs" : {
                "doc_count" : 2,
                "per_drug" : {
                  "doc_count_error_upper_bound" : 0,
                  "sum_other_doc_count" : 0,
                  "buckets" : [
                    {
                      "key" : "Another Tablet",
                      "doc_count" : 1,
                      "quantity" : {
                        "value" : 500.0
                      }
                    },
                    {
                      "key" : "Test Drug",
                      "doc_count" : 1,
                      "quantity" : {
                        "value" : 300.0
                      }
                    }
                  ]
                }
              }
            },
            {
              "key" : "Test Patient 2",
              "doc_count" : 1,
              "drugs" : {
                "doc_count" : 3,
                "per_drug" : {
                  "doc_count_error_upper_bound" : 0,
                  "sum_other_doc_count" : 0,
                  "buckets" : [
                    {
                      "key" : "Test Drug",
                      "doc_count" : 2,
                      "quantity" : {
                        "value" : 700.0
                      }
                    },
                    {
                      "key" : "Another Tablet",
                      "doc_count" : 1,
                      "quantity" : {
                        "value" : 500.0
                      }
                    }
                  ]
                }
              }
            }
          ]
        }
      }
    }