Search code examples
elasticsearchkibanaelasticsearch-aggregation

Elastic-search missing bucket aggregation


Updated

I have the following elastic-search query. Which gives me the following results, with aggregation.

Tried following what Andrey Borisko example but for the life of me i can not get it working.

  1. The main query with filter of companyId finds all the fullnames with the name 'Brenda'
  2. The companyId agg returns best match companyId for fullnames brenda, based of the main filter.

My exact query

 GET employee-index/_search
{
  "aggs": {
    "companyId": {
      "terms": {
        "field": "companyId"
      },
      "aggs": {
        "filtered": {
          "filter": {
            "multi_match": {
              "fields": [
                "fullName.edgengram",
                "number"
              ],
              "query": "brenda"
            }
          }
        }
      }
    }
  },
  "query": {
    "bool": {
      "must": [
        {
          "multi_match": {
            "fields": [
              "fullName.edgengram",
              "number"
            ],
            "query": "brenda"
          }
        }
      ],
      "filter": [
        {
          "terms": {
            "companyId": [
              3849,
              3867,
              3884,
              3944,
              3260,
              4187,
              3844,
              2367,
              158,
              3176,
              3165,
              3836,
              4050,
              3280,
              2298,
              3755,
              3854,
              7161,
              3375,
              7596,
              836,
              4616
            ]
          }
        }
      ]
    }
  }
}

My exact result

{
  "took" : 14,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 3,
      "relation" : "eq"
    },
    "max_score" : 8.262566,
    "hits" : [
      {
        "_index" : "employee-index",
        "_type" : "_doc",
        "_id" : "67207",
        "_score" : 8.262566,
        "_source" : {
          "companyGroupId" : 1595,
          "companyId" : 158,
          "fullName" : "Brenda Grey",
          "companyTradingName" : "Sky Blue",
        }
      },
      {
        "_index" : "employee-index",
        "_type" : "_doc",
        "_id" : "7061",
        "_score" : 7.868355,
        "_source" : {
          "companyGroupId" : 1595,
          "companyId" : 158,
          "fullName" : "Brenda Eaton",
          "companyTradingName" : "Sky Blue",
        }
      },
      {
        "_index" : "employee-index",
        "_type" : "_doc",
        "_id" : "107223",
        "_score" : 7.5100465,
        "_source" : {
          "companyGroupId" : 1595,
          "companyId" : 3260,
          "fullName" : "Brenda Bently",
          "companyTradingName" : "Green Ice",

        }
      }
    ]
  },
  "aggregations" : {
    "companyId" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "158",
          "doc_count" : 2,
          "filtered" : {
            "doc_count" : 2
          }
        },
        {
          "key" : "3260",
          "doc_count" : 1,
          "filtered" : {
            "doc_count" : 1
          }
        }
      ]
    }
  }
}



**This is how i want the filtered-companies results to look**




 "aggregations": {
    "companyId": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "158",
          "doc_count": 2,
          "filtered": {
            "doc_count": 2 (<- 2 records found of brenda)
          }
        },
        {
          "key": "3260",
          "doc_count": 1,
          "filtered": {
            "doc_count": 1 (<- 1 records found of brenda)
          }
        },
        {
          "key": "4616",
          "doc_count": 0,
          "filtered": {
            "doc_count": 0 (<- 0 records found of brenda)
          }
        },
        ... and so on. Basically all the other companies that are in the filtered list i want to display with a doc_count of 0.
      ]
    }

Solution

  • As I understood you correctly, you want to run aggregation or a part of aggregation independently from the query. In this case you should use Global Aggregation

    UPDATE after your comment

    In this case you need to use filter aggregation. So for example this type query (simplified your example) you have currently:

    GET indexName/_search
    {
      "size": 0, 
      "query": {
        "match": {
          "firstName": "John"
        }
      },
      "aggs": {
        "by_phone": {
          "terms": {
            "field": "cellPhoneNumber"
          }
        }
      }
    }
    

    becomes this:

    GET indexName/_search
    {
      "size": 0,
      "aggs": {
        "by_phone": {
          "terms": {
            "field": "cellPhoneNumber"
          },
          "aggs": {
            "filtered": {
              "filter": {
                "match": {
                  "firstName": "John"
                }
              }
            }
          }
        }
      }
    }
    

    the output will look slightly different though:

    ...
      "aggregations" : {
        "by_phone" : {
          "doc_count_error_upper_bound" : 0,
          "sum_other_doc_count" : 260072,
          "buckets" : [
            {
              "key" : "+9649400",
              "doc_count" : 270,
              "filtered" : {
                "doc_count" : 0  <-- not John
              }
            },
            {
              "key" : "+8003000",
              "doc_count" : 184,
              "filtered" : {
                "doc_count" : 3 <-- this is John
              }
            },
            {
              "key" : "+41025026",
              "doc_count" : 168,
              "filtered" : {
                "doc_count" : 0  <-- not John
              }
            }
            ...
    

    And now if you need the results of query as well then you have to wrap it in global aggregation like so:

    GET indexName/_search
    {
      "size": 20,
      "from": 0,
      "query": {
        "match": {
          "firstName": "John"
        }
      },
      "aggs": {
        "all": {
          "global": {},
          "aggs": {
            "by_phone": {
              "terms": {
                "field": "cellPhoneNumber"
              },
              "aggs": {
                "filtered": {
                  "filter": {
                    "match": {
                      "firstName": "John"
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
    

    Reviewed version based on your query:

    GET employee-index/_search
    {
      "size": 0,
      "aggs": {
        "filtered": {
          "filter": {
            "bool": {
              "filter": [
                {
                  "terms": {
                    "companyId": [
                      3849,
                      3867,
                      3884,
                      3944,
                      3260,
                      4187,
                      3844,
                      2367,
                      158,
                      3176,
                      3165,
                      3836,
                      4050,
                      3280,
                      2298,
                      3755,
                      3854,
                      7161,
                      3375,
                      7596,
                      836,
                      4616
                    ]
                  }
                }
              ]
            }
          },
          "aggs": {
            "by_companyId": {
              "terms": {
                "field": "companyId",
                "size": 1000
              },
              "aggs": {
                "testing": {
                  "filter": {
                    "multi_match": {
                      "fields": [
                        "fullName"
                      ],
                      "query": "brenda"
                    }
                  }
                }
              }
            }
          }
        }
      }
    }