Search code examples
elasticsearchelasticsearch-aggregation

Elasticsearch query with grouping


I have database with products. Each Product is composed of fields: uuid, group_id, title, since, till.

since and till define interval of availability.

Intervals [since, till] are disjoint pairs for each group_id. So there are no 2 products within one group for which intervals intersect.

I need to fetch a list of products that meets the following conditions:

  1. on the list should be at most 1 product from each group
  2. each product matches the given title
  3. each product is current (since <= NOW <= till) OR if current product does not exist within its group, it should be the nearest product from the future (min(since) such that since >= NOW)

ES mapping:

{
    "products": {
        "mappings": {
            "dynamic": "false",
            "properties": {
                "group_id": {
                    "type": "long",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                },
                "title": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                },
                "since": {
                    "type": "date",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                },
                "till": {
                    "type": "date",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                }
            }
        }
    }
}

Is it possible to create such query in Elasticsearch?


Solution

  • Looking at your mapping, I've created sample documents, the query and its response as below:

    Sample Documents:

    POST product_index/_doc/1
    {
      "group_id": 1,
      "title": "nike",
      "since": "2020-01-01",
      "till": "2020-03-31"
    }
    
    POST product_index/_doc/2
    {
      "group_id": 2,
      "title": "nike",
      "since": "2020-01-01",
      "till": "2020-03-31"
    }
    
    POST product_index/_doc/3
    {
      "group_id": 3,
      "title": "nike",
      "since": "2020-03-15",
      "till": "2020-03-31"
    }
    
    POST product_index/_doc/4
    {
      "group_id": 3,
      "title": "nike",
      "since": "2020-03-19",
      "till": "2020-03-31"
    }
    

    As mentioned above, there are like 4 documents in total, group 1 and 2 have one document each while group 3 has two documents with both since >= now

    Query Request:

    The summary of the query is below:

    Bool
     - Must 
       - Match title as nike 
       - Should 
         - clause 1 - since <= now <= till
         - clause 2 - now <= since
    Agg
     - Terms on GroupId
       - Top Hits (retrieve only 1st document as your clause is at most for each group, and sort them by asc order of since)
    

    Below is the actual query:

    POST product_index/_search
    {
      "size": 0,
      "query": {
        "bool": {
          "must": [
            {
              "match": {
                "title": "nike"
              }
            },
            {
              "bool": {
                "should": [
                  {                               <--- since <=now <= till
                    "bool": {
                      "must": [
                        {
                          "range": {
                            "till": {
                              "gte": "now"
                            }
                          }
                        },
                        {
                          "range": {
                            "since": {
                              "lte": "now"
                            }
                          }
                        }
                      ]
                    }
                  },
                  {                               <---- since >= now
                    "bool": {
                      "must": [
                        {
                          "range": {
                            "since": {
                              "gte": "now"
                            }
                          }
                        }
                      ]
                    }
                  }
                ]
              }
            }
          ]
        }
      },
      "aggs": {
        "my_groups": {
          "terms": {
            "field": "group_id.keyword",
            "size": 10
          },
          "aggs": {
            "my_docs": {
              "top_hits": {
                "size": 1,                           <--- Note this to return at most one document
                "sort": [
                  { "since": { "order": "asc"}       <--- Sort to return the lowest value of since
                  }
                 ]  
              }
            }
          }
        }
      }
    }
    

    Notice that I've made use of Terms Aggregation and Top Hits as its sub-aggregation.

    Response:

    {
      "took" : 7,
      "timed_out" : false,
      "_shards" : {
        "total" : 1,
        "successful" : 1,
        "skipped" : 0,
        "failed" : 0
      },
      "hits" : {
        "total" : {
          "value" : 4,
          "relation" : "eq"
        },
        "max_score" : null,
        "hits" : [ ]
      },
      "aggregations" : {
        "my_groups" : {
          "doc_count_error_upper_bound" : 0,
          "sum_other_doc_count" : 0,
          "buckets" : [
            {
              "key" : "3",
              "doc_count" : 2,
              "my_docs" : {
                "hits" : {
                  "total" : {
                    "value" : 2,
                    "relation" : "eq"
                  },
                  "max_score" : null,
                  "hits" : [
                    {
                      "_index" : "product_index",
                      "_type" : "_doc",
                      "_id" : "3",
                      "_score" : null,
                      "_source" : {
                        "group_id" : 3,
                        "title" : "nike",
                        "since" : "2020-03-15",
                        "till" : "2020-03-31"
                      },
                      "sort" : [
                        1584230400000
                      ]
                    }
                  ]
                }
              }
            },
            {
              "key" : "1",
              "doc_count" : 1,
              "my_docs" : {
                "hits" : {
                  "total" : {
                    "value" : 1,
                    "relation" : "eq"
                  },
                  "max_score" : null,
                  "hits" : [
                    {
                      "_index" : "product_index",
                      "_type" : "_doc",
                      "_id" : "1",
                      "_score" : null,
                      "_source" : {
                        "group_id" : 1,
                        "title" : "nike",
                        "since" : "2020-01-01",
                        "till" : "2020-03-31"
                      },
                      "sort" : [
                        1577836800000
                      ]
                    }
                  ]
                }
              }
            },
            {
              "key" : "2",
              "doc_count" : 1,
              "my_docs" : {
                "hits" : {
                  "total" : {
                    "value" : 1,
                    "relation" : "eq"
                  },
                  "max_score" : null,
                  "hits" : [
                    {
                      "_index" : "product_index",
                      "_type" : "_doc",
                      "_id" : "2",
                      "_score" : null,
                      "_source" : {
                        "group_id" : 2,
                        "title" : "nike",
                        "since" : "2020-01-01",
                        "till" : "2020-03-31"
                      },
                      "sort" : [
                        1577836800000
                      ]
                    }
                  ]
                }
              }
            }
          ]
        }
      }
    }
    

    Let me know if this helps!