Search code examples
elasticsearchelasticsearch-aggregationelasticsearch-nested

How to sort nested aggregation field based on parent document field in elasticsearch?


I have index of stores at various location. With each store I have a nested list of discount coupon.

Now I have query to get list of all unique coupons in a x km of radius sorted by the distance of the nearest applicable coupon on given location

Database :: Elasticsearch

Index Mapping ::

{
"mappings": {
    "car_stores": {
        "properties": {
            "location": {
                "type": "geo_point"
            },
            "discount_coupons": {
                "type": "nested",
                "properties": {
                    "name": {
                        "type": "keyword"
                    }
                }
            }
        }
    }
}
}

Sample Doc ::

{
"_index": "stores",
"_type": "car_stores",
"_id": "1258c81d-b6f2-400f-a448-bd728f524b55",
"_score": 1.0,
"_source": {
    "location": {
        "lat": 36.053757,
        "lon": 139.525482
    },
    "discount_coupons": [
        {
            "name": "c1"
        },
        {
            "name": "c2"
        }
    ]
}
}

Old Query to get unique discount coupon names in x km area for given location ::

{
"size": 0,
"query": {
    "bool": {
        "must": {
            "match_all": {}
        },
        "filter": {
            "geo_distance": {
                "distance": "100km",
                "location": {
                    "lat": 40,
                    "lon": -70
                }
            }
        }
    }
},
"aggs": {
    "coupon": {
        "nested": {
            "path": "discount_coupons"
        },
        "aggs": {
            "name": {
                "terms": {
                    "field": "discount_coupons.name",
                    "order": {
                        "_key": "asc"
                    },
                    "size": 200
                }
            }
        }
    }
}
}

Updated Response ::

{
"took": 60,
"timed_out": false,
"_shards": {
    "total": 3,
    "successful": 3,
    "skipped": 0,
    "failed": 0
},
"hits": {
    "total": 245328,
    "max_score": 0.0,
    "hits": []
},
"aggregations": {
    "coupon": {
        "doc_count": 657442,
        "name": {
            "doc_count_error_upper_bound": -1,
            "sum_other_doc_count": 641189,
            "buckets": [
                {
                    "key": "local20210211",
                    "doc_count": 1611,
                    "back_to_base": {
                        "doc_count": 1611,
                        "distance_script": {
                            "value": 160.61034409639765
                        }
                    }
                },
                {
                    "key": "local20210117",
                    "doc_count": 1621,
                    "back_to_base": {
                        "doc_count": 1621,
                        "distance_script": {
                            "value": 77.51459886447356
                        }
                    }
                },
                {
                    "key": "local20201220",
                    "doc_count": 1622,
                    "back_to_base": {
                        "doc_count": 1622,
                        "distance_script": {
                            "value": 84.15734462544432
                        }
                    }
                },
                {
                    "key": "kisekae1",
                    "doc_count": 1626,
                    "back_to_base": {
                        "doc_count": 1626,
                        "distance_script": {
                            "value": 88.23770888201268
                        }
                    }
                },
                {
                    "key": "local20210206",
                    "doc_count": 1626,
                    "back_to_base": {
                        "doc_count": 1626,
                        "distance_script": {
                            "value": 86.78376012847237
                        }
                    }
                },
                {
                    "key": "local20210106",
                    "doc_count": 1628,
                    "back_to_base": {
                        "doc_count": 1628,
                        "distance_script": {
                            "value": 384.12156408078397
                        }
                    }
                },
                {
                    "key": "local20210113",
                    "doc_count": 1628,
                    "back_to_base": {
                        "doc_count": 1628,
                        "distance_script": {
                            "value": 153.61681676703674
                        }
                    }
                },
                {
                    "key": "local20",
                    "doc_count": 1629,
                    "back_to_base": {
                        "doc_count": 1629,
                        "distance_script": {
                            "value": 168.74132991524073
                        }
                    }
                },
                {
                    "key": "local20210213",
                    "doc_count": 1630,
                    "back_to_base": {
                        "doc_count": 1630,
                        "distance_script": {
                            "value": 155.8335679860034
                        }
                    }
                },
                {
                    "key": "local20210208",
                    "doc_count": 1632,
                    "back_to_base": {
                        "doc_count": 1632,
                        "distance_script": {
                            "value": 99.58790590445102
                        }
                    }
                }
            ]
        }
    }
}
}

Now the above query will return first 200 discount coupon default sorted by count but I want to return coupons sorted on distance based to given location i.e. the coupon that is nearest applicable should come first.

Is there any way to sort nested aggregations based on a parent key or can I solve this use case using a different data model?

Update Query ::

{
"size": 0,
"query": {
    "bool": {
        "filter": [
            {
                "geo_distance": {
                    "distance": "100km",
                    "location": {
                        "lat": 35.699104,
                        "lon": 139.825211
                    }
                }
            },
            {
                "nested": {
                    "path": "discount_coupons",
                    "query": {
                        "bool": {
                            "filter": {
                                "exists": {
                                    "field": "discount_coupons"
                                }
                            }
                        }
                    }
                }
            }
        ]
    }
},
"aggs": {
    "coupon": {
        "nested": {
            "path": "discount_coupons"
        },
        "aggs": {
            "name": {
                "terms": {
                    "field": "discount_coupons.name",
                    "order": {
                        "back_to_base": "asc"
                    },
                    "size": 10
                },
                "aggs": {
                    "back_to_base": {
                        "reverse_nested": {},
                        "aggs": {
                            "distance_script": {
                                "min": {
                                    "script": {
                                        "source": "doc['location'].arcDistance(35.699104, 139.825211)"
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}
}

Solution

  • Interesting question. You can always order a terms aggregation by the result of a numeric sub-aggregation. The trick here is to escape the nested context via a reverse_nested aggregation and then calculate the distance from the pivot using a script:

    {
      "size": 0,
      "query": {
        "bool": {
          "must": {
            "match_all": {}
          },
          "filter": {
            "geo_distance": {
              "distance": "100km",
              "location": {
                "lat": 40,
                "lon": -70
              }
            }
          }
        }
      },
      "aggs": {
        "coupon": {
          "nested": {
            "path": "discount_coupons"
          },
          "aggs": {
            "name": {
              "terms": {
                "field": "discount_coupons.name",
                "order": {
                  "back_to_base": "asc"
                },
                "size": 200
              },
              "aggs": {
                "back_to_base": {
                  "reverse_nested": {},
                  "aggs": {
                    "distance_script": {
                      "min": {
                        "script": {
                          "source": "doc['location'].arcDistance(40, -70)"
                        }
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }