Search code examples
elasticsearchelastic-stackelasticsearch-aggregationelasticsearch-dsl

Stats Aggregation with Min Mode in ElasticSearch


I have the below mapping in ElasticSearch

{
       "properties":{
          "Costs":{
             "type":"nested",
             "properties":{
                "price":{
                   "type":"integer"
                }
             }
          }
       }
    }

So every document has an Array field Costs, which contains many elements and each element has price in it. I want to find the min and max price with the condition being - that from each array the element with the minimum price should be considered. So it is basically min/max among the minimum value of each array.

Lets say I have 2 documents with the Costs field as

Costs: [
 {
  "price": 100,
 },
 {
  "price": 200,
 }
]

and

Costs: [
 {
  "price": 300,
 },
 {
  "price": 400,
 }
]

So I need to find the stats This is the query I am currently using

{
   "costs_stats":{
      "nested":{
         "path":"Costs"
      },
      "aggs":{
         "price_stats_new":{
            "stats":{
               "field":"Costs.price"
            }
         }
      }
   }
}

And it gives me this:

"min" : 100,
"max" : 400

But I need to find stats after taking minimum elements of each array for consideration. So this is what i need:

"min" : 100,
"max" : 300

Like we have a "mode" option in sort, is there something similar in stats aggregation also, or any other way of achieving this, maybe using a script or something. Please suggest. I am really stuck here.

Let me know if anything is required

Update 1:

Query for finding min/max among minimums

{
   "_source":false,
   "timeout":"5s",
   "from":0,
   "size":0,
   "aggs":{
      "price_1":{
         "terms":{
            "field":"id"
         },
         "aggs":{
            "price_2":{
               "nested":{
                  "path":"Costs"
               },
               "aggs":{
                  "filtered":{
                     "aggs":{
                        "price_3":{
                           "min":{
                              "field":"Costs.price"
                           }
                        }
                     },
                     "filter":{
                        "bool":{
                           "filter":{
                              "range":{
                                 "Costs.price":{
                                    "gte":100
                                 }
                              }
                           }
                        }
                     }
                  }
               }
            }
         }
      },
      "minValue":{
         "min_bucket":{
            "buckets_path":"price_1>price_2>filtered>price_3"
         }
      }
   }
}

Only few buckets are coming and hence the min/max is coming among those, which is not correct. Is there any size limit.


Solution

  • One way to achieve your use case is to add one more field id, in each document. With the help of id field terms aggregation can be performed, and so buckets will be dynamically built - one per unique value.

    Then, we can apply min aggregation, which will return the minimum value among numeric values extracted from the aggregated documents.

    Adding a working example with index data, mapping, search query, and search result

    Index Mapping:

    {
      "mappings": {
        "properties": {
          "Costs": {
            "type": "nested"
          }
        }
      }
    }
    

    Index Data:

    {
      "id":1,
      "Costs": [
        {
          "price": 100
        },
        {
          "price": 200
        }
      ]
    }
    {
      "id":2,
      "Costs": [
        {
          "price": 300
        },
        {
          "price": 400
        }
      ]
    }
    

    Search Query:

    {
        "size": 0,
        "aggs": {
            "id_terms": {
                "terms": {
                    "field": "id",
                    "size": 15              <-- note this
                },
                "aggs": {
                    "nested_entries": {
                        "nested": {
                            "path": "Costs"
                        },
                        "aggs": {
                            "min_position": {
                                "min": {
                                    "field": "Costs.price"
                                }
                            }
                        }
                    }
                }
            }
        }
    }
    

    Search Result:

    "aggregations": {
        "id_terms": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": 1,
              "doc_count": 1,
              "nested_entries": {
                "doc_count": 2,
                "min_position": {
                  "value": 100.0
                }
              }
            },
            {
              "key": 2,
              "doc_count": 1,
              "nested_entries": {
                "doc_count": 2,
                "min_position": {
                  "value": 300.0
                }
              }
            }
          ]
        }
    

    Using stats aggregation also, it can be achieved (if you add one more field id that uniquely identifies your document)

    {
      "size": 0,
      "aggs": {
        "id_terms": {
          "terms": {
            "field": "id",
            "size": 15              <-- note this
          },
          "aggs": {
            "costs_stats": {
              "nested": {
                "path": "Costs"
              },
              "aggs": {
                "price_stats_new": {
                  "stats": {
                    "field": "Costs.price"
                  }
                }
              }
            }
          }
        }
      }
    }
    

    Update 1:

    To find the maximum value among those minimums (as seen in the above query), you can use max bucket aggregation

    {
      "size": 0,
      "aggs": {
        "id_terms": {
          "terms": {
            "field": "id",
            "size": 15         <-- note this
          },
          "aggs": {
            "nested_entries": {
              "nested": {
                "path": "Costs"
              },
              "aggs": {
                "min_position": {
                  "min": {
                    "field": "Costs.price"
                  }
                }
              }
            }
          }
        },
        "maxValue": {
          "max_bucket": {
            "buckets_path": "id_terms>nested_entries>min_position"
          }
        }
      }
    }
    

    Search Result:

    "aggregations": {
        "id_terms": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": 1,
              "doc_count": 1,
              "nested_entries": {
                "doc_count": 2,
                "min_position": {
                  "value": 100.0
                }
              }
            },
            {
              "key": 2,
              "doc_count": 1,
              "nested_entries": {
                "doc_count": 2,
                "min_position": {
                  "value": 300.0
                }
              }
            }
          ]
        },
        "maxValue": {
          "value": 300.0,
          "keys": [
            "2"
          ]
        }
      }