Search code examples
sortingelasticsearchnestedaggregation

How to sort an Elasticsearch query result by a nested calculated field


We are using Elasticsearch 7.9

Our index, called method_info_tree, contains two-level nested fields:

  • The top level described a java method,
  • the nested level below it describes a thread that ran the method, and
  • the third nested level within a thread describes the states of that thread over time.

Below is the mapping of the index in Elasticsearch:

{
  "mappings": {
    "properties": {
      "method_id" : {
        "type" : "long"
      },
      "threads": {
        "type": "nested",
        "properties": {
          "thread_id": {
            "type": "long"
          },
          "states": {
            "type": "nested",
            "properties": {
              "collect_time": {
                "type": "date"
              },
              "state": {
                "type": "keyword"
              },
              "elapsed_time": {
                "type" : "integer"
              }
            }
          }

        }
      }
    }
  }
}

Here is a sample document in the index:

{
  "took" : 13,
  "timed_out" : false,
  "_shards" : {
    "total" : 2,
    "successful" : 2,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 5198,
      "relation" : "eq"
    },
    "max_score" : 0.0,
    "hits" : [
      {
        "_index" : "method_info_tree-000001",
        "_type" : "_doc",
        "_id" : "WiHCCYQBhPdvF01n3kp1",
        "_score" : 0.0,
        "_routing" : "86163-d5c064d0-55a3-44b9-88fb-c44b7233cfa4",
        "_source" : {
          "timestamp" : 1666610993800,
          "method_id" : 140280075031760,

          "threads" : [
            {
              "thread_id" : 1,

              "states_hit" : [
                {
                  "state" : "RUNNABLE",
                  "collect_time" : 1666610994750,
                  "elapsed_time" : 50
                },
                {
                  "state" : "IO",
                  "collect_time" : 1666610994800,
                  "elapsed_time" : 50
                },
                {
                  "state" : "IO",
                  "collect_time" : 1666610994850,
                  "elapsed_time" : 50
                },
                {
                  "state" : "IO",
                  "collect_time" : 1666610994900,
                  "elapsed_time" : 50
                },
                {
                  "state" : "IO",
                  "collect_time" : 1666610994950,
                  "elapsed_time" : 50
                },
                {
                  "state" : "IO",
                  "collect_time" : 1666610995000,
                  "elapsed_time" : 50
                },
                {
                  "state" : "IO",
                  "collect_time" : 1666610995050,
                  "elapsed_time" : 50
                },
                {
                  "state" : "IO",
                  "collect_time" : 1666610995100,
                  "elapsed_time" : 50
                },
                {
                  "state" : "IO",
                  "collect_time" : 1666610995150,
                  "elapsed_time" : 50
                }
              ]

            }
          ]
        }
      }
    ]
  }
}

Note that for each method_id we have many documents for various thread_ids.

I would like, for each method_id, to calculate the sum of elapsed_time field per state (for all threads), something like:

method_id -> 
    [
      {
        "state" : "IO",
        "elapsed_time" : 566622.0
      },
      {
        "state" : "RUNNABLE",
        "elapsed_time" : 566572.0
      },
      {
        "state" : "BLOCKED",
        "elapsed_time" : 50.0
      }
    ]

Below is my Elasticsearch query:

GET method_info_tree/_search
{
  "from": 0,
  "size": 0,
  "track_total_hits": true,
  "query": {
    "bool": {
      "filter": [
        {
          "term": {
            "session_id": "86163-d5c064d0-55a3-44b9-88fb-c44b7233cfa4"
          }
        },
        {
          "nested": {
            "path": "threads.states_hit",
            "query": {
              "bool": {
                "filter": [
                  {
                    "range": {
                      "threads.states_hit.collect_time": {
                        "gte": 0,
                        "lte": 2000000000000
                      }
                    }
                  }
                ]
              }
            }
          }
        }
      ]
    }
  },
  "aggs": {
    "top_methods_agg": {
      "terms": {
        "field": "method_id",
        "size": 20
      },
      "aggs": {
        "elapsed_time_agg": {
          "nested": {
            "path": "threads.states_hit"
          },
          "aggs": {
            "states_range": {
              "range": {
                "field": "threads.states_hit.collect_time",
                "ranges": [
                  {
                    "from": 0,
                    "to": 2000000000001
                  }
                ]
              },
              "aggs": {
                "elapsed_time_per_state_agg": {
                  "terms": {
                    "field": "threads.states_hit.state",
                    "size": 10
                  },
                  "aggs": {
                    "elapsed_time": {
                      "sum": {
                        "field": "threads.states_hit.elapsed_time"
                      }
                    }
                  }
                },
                "total_self_elapsed_time": {
                  "sum": {
                    "field": "threads.states_hit.elapsed_time"
                  }
                },
                "wasted_elapsed_time": {
                  "filter": {
                    "terms": {
                      "threads.states_hit.state": [
                        "BLOCKED",
                        "IO"
                      ]
                    }
                  },
                  "aggs": {
                    "total_wasted": {
                      "sum": {
                        "field": "threads.states_hit.elapsed_time"
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

A sample result would be:

{
  "took" : 218,
  "timed_out" : false,
  "_shards" : {
    "total" : 2,
    "successful" : 2,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 5727,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "top_methods_agg" : {
      "doc_count_error_upper_bound" : 1,
      "sum_other_doc_count" : 73,
      "buckets" : [
        {
          "key" : 140280074341584,
          "doc_count" : 728,
          "elapsed_time_agg" : {
            "doc_count" : 21838,
            "states_range" : {
              "buckets" : [
                {
                  "key" : "1970-01-01T00:00:00.000Z-2033-05-18T03:33:20.001Z",
                  "from" : 0.0,
                  "from_as_string" : "1970-01-01T00:00:00.000Z",
                  "to" : 2.000000000001E12,
                  "to_as_string" : "2033-05-18T03:33:20.001Z",
                  "doc_count" : 21838,
                  "total_self_elapsed_time" : {
                    "value" : 1133244.0
                  },
                  "wasted_elapsed_time" : {
                    "doc_count" : 1,
                    "total_wasted" : {
                      "value" : 50.0
                    }
                  },
                  "elapsed_time_per_state_agg" : {
                    "doc_count_error_upper_bound" : 0,
                    "sum_other_doc_count" : 0,
                    "buckets" : [
                      {
                        "key" : "IO",
                        "doc_count" : 10919,
                        "elapsed_time" : {
                          "value" : 566622.0
                        }
                      },
                      {
                        "key" : "RUNNABLE",
                        "doc_count" : 10918,
                        "elapsed_time" : {
                          "value" : 566572.0
                        }
                      },
                      {
                        "key" : "BLOCKED",
                        "doc_count" : 1,
                        "elapsed_time" : {
                          "value" : 50.0
                        }
                      }
                    ]
                  }
                }
              ]
            }
          }
        },
        {
          "key" : 140282650318928,
          "doc_count" : 3,
          "elapsed_time_agg" : {
            "doc_count" : 3,
            "states_range" : {
              "buckets" : [
                {
                  "key" : "1970-01-01T00:00:00.000Z-2033-05-18T03:33:20.001Z",
                  "from" : 0.0,
                  "from_as_string" : "1970-01-01T00:00:00.000Z",
                  "to" : 2.000000000001E12,
                  "to_as_string" : "2033-05-18T03:33:20.001Z",
                  "doc_count" : 3,
                  "total_self_elapsed_time" : {
                    "value" : 150.0
                  },
                  "wasted_elapsed_time" : {
                    "doc_count" : 0,
                    "total_wasted" : {
                      "value" : 0.0
                    }
                  },
                  "elapsed_time_per_state_agg" : {
                    "doc_count_error_upper_bound" : 0,
                    "sum_other_doc_count" : 0,
                    "buckets" : [
                      {
                        "key" : "RUNNABLE",
                        "doc_count" : 3,
                        "elapsed_time" : {
                          "value" : 150.0
                        }
                      }
                    ]
                  }
                }
              ]
            }
          }
        }
      ]
    }
  }
}

Note that I deleted some of the result buckets for better coherence of the example.

My problem: I need to sort the results by 'total_self_elapsed_time' and return only the top 5 results. Since the result is nested, I cannot access the calculated field 'total_self_elapsed_time'. Can you please direct me to how I can add to my query sorting by this aggregated field?


Solution

  • I have replaced states_range range aggregation with filter aggregation. Range aggregation will generate buckets for each ranges specified. So you cannot sort terms based on sub multi buckets.

    To sort I have used "order" in terms aggregation.

    Query

      "aggs": {
        "top_methods_agg": {
          "terms": {
            "field": "method_id",
            "size": 20,
            "order": {
              "elapsed_time_agg>states_range>total_self_elapsed_time": "asc"
            }
          },
          "aggs": {
            "elapsed_time_agg": {
              "nested": {
                "path": "threads.states_hit"
              },
              "aggs": {
                "states_range": {
                  "filter": {
                    "range": {
                      "threads.states_hit.collect_time": {
                        "gte": 0,
                        "lte": 2000000000000
                      }
                    }
                  },
                  "aggs": {
                    "elapsed_time_per_state_agg": {
                      "terms": {
                        "field": "threads.states_hit.state",
                        "size": 10
                      },
                      "aggs": {
                        "elapsed_time": {
                          "sum": {
                            "field": "threads.states_hit.elapsed_time"
                          }
                        }
                      }
                    },
                    "total_self_elapsed_time": {
                      "sum": {
                        "field": "threads.states_hit.elapsed_time"
                      }
                    },
                    "wasted_elapsed_time": {
                      "filter": {
                        "terms": {
                          "threads.states_hit.state": [
                            "BLOCKED",
                            "IO"
                          ]
                        }
                      },
                      "aggs": {
                        "total_wasted": {
                          "sum": {
                            "field": "threads.states_hit.elapsed_time"
                          }
                        }
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    

    Try above , see if it works out for you.