Search code examples
elasticsearch

Elasticsearch sort term aggregation by sum total of 2 nested term aggregations


I have an index which stores all the books and articles read by users. The books and articles are of nested type. I am trying to get the top x users who read the highest number of books and articles which match the specified genre filters.

I was able to get the total number of books read for each user (see aggregation below named books_total_reads)

I was also able to get the total number of articles read by each user (see aggregation below named articles_total_reads)

What i can't figure out is how to sort the the top x users by highest sum of: books_total_reads + articles_total_reads.

So in the example below, the query should return top 2 users in following order:
john (total 23)
alice (total 19)

To reproduce this you can run commands below:

PUT my-index
{
  "mappings": {
    "properties": {
      "user": {
        "type": "keyword"
      },
      "books": {
        "type": "nested",
        "properties": {
          "genre": { "type": "keyword"},
          "count": { "type": "integer" }
        }
      },
      "articles": {
        "type": "nested",
        "properties": {
          "genre": { "type": "keyword"},
          "count": { "type": "integer" }
        }
      }
    }
  }
}

PUT my-index/_doc/1
{
  "user" : "mark",
  "books" : [
    {
      "genre" : "1",
      "count" :  3
    },
    {
      "genre" : "2",
      "count" :  5
    }
  ],
  "articles" : [
    {
      "genre" : "10",
      "count" :  5
    },
    {
      "genre" : "11",
      "count" :  5
    }
  ]
}

PUT my-index/_doc/2
{
  "user" : "john",
  "books" : [
    {
      "genre" : "1",
      "count" :  1
    }
  ],
  "articles" : [
    {
      "genre" : "10",
      "count" :  2
    },
    {
      "genre" : "12",
      "count" :  20
    }
  ]
}

PUT my-index/_doc/3
{
  "user" : "alice",
  "books" : [
    {
      "genre" : "1",
      "count" :  4
    },
    {
      "genre" : "2",
      "count" :  5
    }
  ],
  "articles" : [
    {
      "genre" : "10",
      "count" :  5
    },
    {
      "genre" : "11",
      "count" :  5
    }
  ]
}


POST /my-index/_search
{
  "size": 0,
  "query": {
    "bool": {
      "should": [
        {
          "nested": {
            "path": "books",
            "query": {
              "bool": {
                "filter": [
                  {
                    "terms": {
                      "books.genre": [
                        "1",
                        "2"
                      ]
                    }
                  }
                ]
              }
            }
          }
        },
        {
          "nested": {
            "path": "articles",
            "query": {
              "bool": {
                "filter": [
                  {
                    "terms": {
                      "articles.genre": [
                        "10",
                        "11",
                        "12"
                      ]
                    }
                  }
                ]
              }
            }
          }
        }
      ]
    }
  },
  "aggs": {
    "users": {
      "terms": {
        "field": "user",
        "size": 2
      },
      "aggs": {
        "books_root_agg": {
          "nested": {
            "path": "books"
          },
          "aggs": {
            "books": {
              "terms": {
                "field": "books.genre",
                "include": [
                  "1",
                  "2"
                ],
                "size": 10,
                "order": {
                  "sum_reads": "desc"
                }
              },
              "aggs": {
                "sum_reads": {
                  "sum": {
                    "field": "books.count"
                  }
                }
              }
            },
            "books_total_reads": {
              "sum_bucket": {
                "buckets_path": "books>sum_reads"
              }
            }
          }
        },
        "articles_root_agg": {
          "nested": {
            "path": "articles"
          },
          "aggs": {
            "articles": {
              "terms": {
                "field": "articles.genre",
                "include": [
                  "10",
                  "11",
                  "12"
                ],
                "size": 10,
                "order": {
                  "sum_reads": "desc"
                }
              },
              "aggs": {
                "sum_reads": {
                  "sum": {
                    "field": "articles.count"
                  }
                }
              }
            },
            "articles_total_reads": {
              "sum_bucket": {
                "buckets_path": "articles>sum_reads"
              }
            }
          }
        }
      }
    }
  }
}

Solution

  • Answer #1

    You've done a great job. All you have to do is sum the results and sort the baskets. I removed terms aggregations because you already filter with the bool query. The sum_bucket aggregation is redundant

    POST /my-index/_search?filter_path=aggregations
    {
      "query": {
        "bool": {
          "should": [
            {
              "nested": {
                "path": "books",
                "query": {
                  "bool": {
                    "filter": [
                      {
                        "terms": {
                          "books.genre": [
                            "1",
                            "2"
                          ]
                        }
                      }
                    ]
                  }
                }
              }
            },
            {
              "nested": {
                "path": "articles",
                "query": {
                  "bool": {
                    "filter": [
                      {
                        "terms": {
                          "articles.genre": [
                            "10",
                            "11",
                            "12"
                          ]
                        }
                      }
                    ]
                  }
                }
              }
            }
          ]
        }
      },
      "aggs": {
        "users": {
          "terms": {
            "field": "user",
            "size": 2
          },
          "aggs": {
            "book_total_reads": {
              "nested": {
                "path": "books"
              },
              "aggs": {
                "sum_reads": {
                  "sum": {
                    "field": "books.count"
                  }
                }
              }
            },
            "article_total_reads": {
              "nested": {
                "path": "articles"
              },
              "aggs": {
                "sum_reads": {
                  "sum": {
                    "field": "articles.count"
                  }
                }
              }
            },
            "total_reads": {
              "bucket_script": {
                "buckets_path": {
                  "books_read": "book_total_reads.sum_reads",
                  "article_read": "article_total_reads.sum_reads"
                },
                "script": "params.books_read + params.article_read"
              }
            },
            "sort_total_reads": {
              "bucket_sort": {
                "sort": [
                  {
                    "total_reads": "desc"
                  }
                ]
              }
            }
          }
        }
      }
    }