Search code examples
elasticsearchelasticsearch-aggregation

Elasticsearch order by field in top hits aggregation


I am trying to sort data by top_hits parameter in search query Elasticsearch but somehow it didn't affect anything. can anyone please help me with this one?

so I've tried using sort as some people said like this :

{
    "size" : 0,
    "from" : 0,
    "aggs": {
        "by_filter": {
            "filter": {
                "bool": {
                    "must": [
                    {
                        "range": {
                            "published_at": {
                                "gte": "2019-08-01 00:00:00",
                                "lte": "2023-10-30 23:59:59"
                            }
                        }
                    },
                    {
                        "match": {
                            "status": "published"
                        }
                    }
                    ]
                }
            },
            "aggs": {
                "by_created": {
                    "terms": {
                        "field": "created_by.id",
                        "size": 10
                    },
                    "aggs" : {
                        "count_data": {
                            "terms": {
                                "field": "created_by.id"
                            }
                        },
                        "hits": {
                            "top_hits": {
                                "sort": [                         <---- the sort query that I found
                                    {
                                        "created_by.id": {
                                            "order": "desc"
                                        }
                                    }
                                ],
                                "_source":["created_by.id"],
                                "size": 1
                            }
                        }
                    }
                }
            }
        }
    }
}

but the result didn't change :


"aggregations": {
    "by_filter": {
        "doc_count": 21,
        "by_created": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 3,
            "buckets": [
                {
                    "key": 34,
                    "doc_count": 3,
                    "hits": {
                        "hits": {
                            "total": {
                                "value": 3,
                                "relation": "eq"
                            },
                            "max_score": null,
                            "hits": [
                                {
                                    "_index": "re_article",
                                    "_id": "53822",
                                    "_score": null,
                                    "_source": {
                                        "created_by": {
                                            "id": 34
                                        }
                                    },
                                    "sort": [                <--- I think this is the result of the sort
                                        34
                                    ]
                                }
                            ]
                        }
                    },
                    "count_data": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                            {
                                "key": 34,
                                "doc_count": 3
                            }
                        ]
                    }
                },
                {
                    "key": 52,
                    "doc_count": 3,
                    "hits": {
                        "hits": {
                            "total": {
                                "value": 3,
                                "relation": "eq"
                            },
                            "max_score": null,
                            "hits": [
                                {
                                    "_index": "re_article",
                                    "_id": "338610",
                                    "_score": null,
                                    "_source": {
                                        "created_by": {
                                            "id": 52
                                        }
                                    },
                                    "sort": [
                                        52
                                    ]
                                }
                            ]
                        }
                    },
                    "count_data": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                            {
                                "key": 52,
                                "doc_count": 3
                            }
                        ]
                    }
                }
            ]
        }
    }
}

what I expected is the buckets shows key 52 first then 34, like this :


"aggregations": {
    "by_filter": {
        "doc_count": 21,
        "by_created": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 3,
            "buckets": [
                {
                    "key": 52,
                    "doc_count": 3,
                    "hits": {
                        "hits": {
                            "total": {
                                "value": 3,
                                "relation": "eq"
                            },
                            "max_score": null,
                            "hits": [
                                {
                                    "_index": "re_article",
                                    "_id": "338610",
                                    "_score": null,
                                    "_source": {
                                        "created_by": {
                                            "id": 52
                                        }
                                    }
                                }
                            ]
                        }
                    },
                    "count_data": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                            {
                                "key": 52,
                                "doc_count": 3
                            }
                        ]
                    }
                },
                {
                    "key": 34,
                    "doc_count": 3,
                    "hits": {
                        "hits": {
                            "total": {
                                "value": 3,
                                "relation": "eq"
                            },
                            "max_score": null,
                            "hits": [
                                {
                                    "_index": "re_article",
                                    "_id": "53822",
                                    "_score": null,
                                    "_source": {
                                        "created_by": {
                                            "id": 34
                                        }
                                    }
                                }
                            ]
                        }
                    },
                    "count_data": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                            {
                                "key": 34,
                                "doc_count": 3
                            }
                        ]
                    }
                }
            ]
        }
    }
}

I think I pick a wrong example since there's new "sort" field in top_hits result but not really what I'm looking for


Solution

  • You are close, you just need to move "order" to the aggregation that you actually want to be sorted and change the syntax a bit. "order" in top_hits will sort hits if you want to sort keys in the by_created aggregation you need to add "order" to that aggregation:

    {
        "size" : 0,
        "from" : 0,
        "aggs": {
            "by_filter": {
                "filter": {
                    "bool": {
                        "must": [
                        {
                            "range": {
                                "published_at": {
                                    "gte": "2019-08-01 00:00:00",
                                    "lte": "2023-10-30 23:59:59"
                                }
                            }
                        },
                        {
                            "match": {
                                "status": "published"
                            }
                        }
                        ]
                    }
                },
                "aggs": {
                    "by_created": {
                        "terms": {
                            "field": "created_by.id",
                            "size": 10,
                            "order": {                      <--- try applying it here
                              "_key": "desc"
                            }
                        },
                        "aggs" : {
                            "count_data": {
                                "terms": {
                                    "field": "created_by.id"
                                }
                            },
                            "hits": {
                                "top_hits": {
                                    "sort": [                         <---- the sort query that I found
                                        {
                                            "created_by.id": {
                                                "order": "desc"
                                            }
                                        }
                                    ],
                                    "_source":["created_by.id"],
                                    "size": 1
                                }
                            }
                        }
                    }
                }
            }
        }
    }
    

    You can find more option in the terms aggregation docs. If you need to sort other aggregations, you might find bucket_sort aggregation useful.