Search code examples
elasticsearchopensearch

Returning a count of documents with id and name using OpenSearch


I'm hoping to return a count of documents grouped by a property in the document, but I need it to return the key (company_id), company_name and the count of documents. I'm able to return a count of documents group by the company_id but I'm unable to find a way to include the company_name field

GET test1/_search
{
  "size": 0,
  "aggs": {
    "group_by_company_id": {
      "terms": {
        "field": "identity.company.company_id",
        "size": 100
      }
    }
  }
}

Example document structure

{
  "_index": "test1",
  "_id": "111111",
  "_score": 1,
  "_source": {
    "id": 111111,
    "identity": {
      "name": "Test User 1",
      "email": "test1@test.com",
      "company": {
        "company_id": 1,
        "company_name": "Test Company 1"
      }
    }
  }
},
{
  "_index": "test1",
  "_id": "222222",
  "_score": 1,
  "_source": {
    "id": 222222,
    "identity": {
      "name": "Test User 2",
      "email": "test2@test.com",
      "company": {
        "company_id": 1,
        "company_name": "Test Company 1"
      }
    }
  }
},
{
  "_index": "test1",
  "_id": "333333",
  "_score": 1,
  "_source": {
    "id": 333333,
    "identity": {
      "name": "Test User 3",
      "email": "test3@test.com",
      "company": {
        "company_id": 2,
        "company_name": "Test Company 2"
      }
    }
  }
},
{
  "_index": "test1",
  "_id": "444444",
  "_score": 1,
  "_source": {
    "id": 444444,
    "identity": {
      "name": "Test User 4",
      "email": "test4@test.com",
      "company": {
        "company_id": 3,
        "company_name": "Test Company 3"
      }
    }
  }
}

Solution

  • You're almost there, in order to also show the name you have the option of adding the top_hits sub-aggregation to show the name field

    GET test1/_search
    {
      "size": 0,
      "aggs": {
        "group_by_company_id": {
          "terms": {
            "field": "identity.company.company_id",
            "size": 100
          },
          "aggs": {
            "name": {
              "top_hits": {
                "size": 1,
                "_source": ["identity.company.company_name"]
              }
            }
          }
        }
      }
    }
    

    Bonus for those using Elasticsearch 7.12+, you can use the multi_terms aggregation

    GET /test1/_search
    {
      "aggs": {
        "group_by_company_id_and_name": {
          "multi_terms": {
            "terms": [{
              "field": "identity.company.company_id" 
            }, {
              "field": "identity.company.company_name"
            }]
          }
        }
      }
    }