I have an index which stores all the books and articles read by users. The books and articles are of nested type. I am trying to get the top x users who read the highest number of books and articles which match the specified genre filters.
I was able to get the total number of books read for each user (see aggregation below named books_total_reads)
I was also able to get the total number of articles read by each user (see aggregation below named articles_total_reads)
What i can't figure out is how to sort the the top x users by highest sum of: books_total_reads + articles_total_reads.
So in the example below, the query should return top 2 users in following order:
john (total 23)
alice (total 19)
To reproduce this you can run commands below:
PUT my-index
{
"mappings": {
"properties": {
"user": {
"type": "keyword"
},
"books": {
"type": "nested",
"properties": {
"genre": { "type": "keyword"},
"count": { "type": "integer" }
}
},
"articles": {
"type": "nested",
"properties": {
"genre": { "type": "keyword"},
"count": { "type": "integer" }
}
}
}
}
}
PUT my-index/_doc/1
{
"user" : "mark",
"books" : [
{
"genre" : "1",
"count" : 3
},
{
"genre" : "2",
"count" : 5
}
],
"articles" : [
{
"genre" : "10",
"count" : 5
},
{
"genre" : "11",
"count" : 5
}
]
}
PUT my-index/_doc/2
{
"user" : "john",
"books" : [
{
"genre" : "1",
"count" : 1
}
],
"articles" : [
{
"genre" : "10",
"count" : 2
},
{
"genre" : "12",
"count" : 20
}
]
}
PUT my-index/_doc/3
{
"user" : "alice",
"books" : [
{
"genre" : "1",
"count" : 4
},
{
"genre" : "2",
"count" : 5
}
],
"articles" : [
{
"genre" : "10",
"count" : 5
},
{
"genre" : "11",
"count" : 5
}
]
}
POST /my-index/_search
{
"size": 0,
"query": {
"bool": {
"should": [
{
"nested": {
"path": "books",
"query": {
"bool": {
"filter": [
{
"terms": {
"books.genre": [
"1",
"2"
]
}
}
]
}
}
}
},
{
"nested": {
"path": "articles",
"query": {
"bool": {
"filter": [
{
"terms": {
"articles.genre": [
"10",
"11",
"12"
]
}
}
]
}
}
}
}
]
}
},
"aggs": {
"users": {
"terms": {
"field": "user",
"size": 2
},
"aggs": {
"books_root_agg": {
"nested": {
"path": "books"
},
"aggs": {
"books": {
"terms": {
"field": "books.genre",
"include": [
"1",
"2"
],
"size": 10,
"order": {
"sum_reads": "desc"
}
},
"aggs": {
"sum_reads": {
"sum": {
"field": "books.count"
}
}
}
},
"books_total_reads": {
"sum_bucket": {
"buckets_path": "books>sum_reads"
}
}
}
},
"articles_root_agg": {
"nested": {
"path": "articles"
},
"aggs": {
"articles": {
"terms": {
"field": "articles.genre",
"include": [
"10",
"11",
"12"
],
"size": 10,
"order": {
"sum_reads": "desc"
}
},
"aggs": {
"sum_reads": {
"sum": {
"field": "articles.count"
}
}
}
},
"articles_total_reads": {
"sum_bucket": {
"buckets_path": "articles>sum_reads"
}
}
}
}
}
}
}
}
You've done a great job. All you have to do is sum the results and sort the baskets. I removed terms aggregations because you already filter with the bool query. The sum_bucket aggregation is redundant
POST /my-index/_search?filter_path=aggregations
{
"query": {
"bool": {
"should": [
{
"nested": {
"path": "books",
"query": {
"bool": {
"filter": [
{
"terms": {
"books.genre": [
"1",
"2"
]
}
}
]
}
}
}
},
{
"nested": {
"path": "articles",
"query": {
"bool": {
"filter": [
{
"terms": {
"articles.genre": [
"10",
"11",
"12"
]
}
}
]
}
}
}
}
]
}
},
"aggs": {
"users": {
"terms": {
"field": "user",
"size": 2
},
"aggs": {
"book_total_reads": {
"nested": {
"path": "books"
},
"aggs": {
"sum_reads": {
"sum": {
"field": "books.count"
}
}
}
},
"article_total_reads": {
"nested": {
"path": "articles"
},
"aggs": {
"sum_reads": {
"sum": {
"field": "articles.count"
}
}
}
},
"total_reads": {
"bucket_script": {
"buckets_path": {
"books_read": "book_total_reads.sum_reads",
"article_read": "article_total_reads.sum_reads"
},
"script": "params.books_read + params.article_read"
}
},
"sort_total_reads": {
"bucket_sort": {
"sort": [
{
"total_reads": "desc"
}
]
}
}
}
}
}
}