Search code examples
elasticsearchelasticsearch-aggregation

Elasticsearch Aggregation (scripted metric aggregation)


I have product collection with contains array of categories. What i need is aggregation(?) - an array of distinct category_id's. Also nice to have - count of matching product number per category_id.

Sample index data

{
  "data": [
    {
      "took": 6,
      "timed_out": false,
      "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 4257,
        "max_score": 1.0,
        "hits": [
          {
            "_index": "index_name",
            "_type": "product",
            "_id": "4",
            "_score": 1.0,
            "_source": {
              "entity_id": "4",
              "attribute_set_id": "4",
              "type_id": "simple",
              "sku": "skuxxx",
              "has_options": false,
              "required_options": false,
              "created_at": "2020-01-29 06:38:52",
              "updated_at": "2020-09-04 12:31:00",
              "visibility": "4"
            },
            "category": [
              {
                "category_id": 2,
                "is_virtual": "false"
              },
              {
                "category_id": 3,
                "is_parent": true,
                "is_virtual": "false",
                "name": "Category name1"
              },
              {
                "category_id": 4,
                "is_parent": true,
                "is_virtual": "false",
                "name": "Category name2"
              },
              {
                "category_id": 7,
                "is_parent": true,
                "is_virtual": "false",
                "name": "Category name3"
              }
            ]
          }
        ]
      }
    }
  ]
}

What i need is array of category ids, sorted by product count and possibility to filter categories ("is_virtual" or "is_parent"):

Expected result

{
  "data": [
    {
      "categories": [
        {
          "category_id": 2,
          "doc_count": 555
        },
        {
          "category_id": 3,
          "doc_count": 150
        },
        {
          "category_id": 56,
          "doc_count": 12
        }
      ]
    }
  ]
}

Index mapping

{
"index": {
    "mappings": {
        "product": {
            "_all": {
                "enabled": false
            },
            "properties": {
                "EAN01": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "EAN02": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "EAN03": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "EAN04": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "EAN05": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "EAN06": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "EAN07": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "EAN08": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "EAN09": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "EAN10": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "EAN11": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "EAN12": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "EAN13": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "EAN14": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "EAN15": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "EAN16": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "EAN17": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "EAN18": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "EAN19": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "EAN20": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "activity": {
                    "type": "integer"
                },
                "attribute_set1_1": {
                    "type": "integer"
                },
                "attribute_set_id": {
                    "type": "integer"
                },
                "attributeset2attribute1": {
                    "type": "integer"
                },
                "attributeset3attribute1": {
                    "type": "integer"
                },
                "autocomplete": {
                    "type": "text",
                    "fields": {
                        "shingle": {
                            "type": "text",
                            "analyzer": "shingle"
                        },
                        "whitespace": {
                            "type": "text",
                            "analyzer": "whitespace"
                        }
                    },
                    "analyzer": "standard"
                },
                "belongs_to_catalog": {
                    "type": "boolean"
                },
                "ca_1_1243545189": {
                    "type": "integer"
                },
                "category": {
                    "type": "nested",
                    "properties": {
                        "category_id": {
                            "type": "integer"
                        },
                        "is_blacklisted": {
                            "type": "boolean"
                        },
                        "is_parent": {
                            "type": "boolean"
                        },
                        "is_virtual": {
                            "type": "boolean"
                        },
                        "name": {
                            "type": "text",
                            "copy_to": [
                                "search",
                                "spelling"
                            ],
                            "analyzer": "standard"
                        },
                        "position": {
                            "type": "integer"
                        }
                    }
                },
                "category_gear": {
                    "type": "integer"
                },
                "children_attributes": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                },
                "children_ids": {
                    "type": "integer"
                },
                "color": {
                    "type": "integer"
                },
                "configurable_attributes": {
                    "type": "keyword"
                },
                "configurable_variation": {
                    "type": "integer"
                },
                "created_at": {
                    "type": "date",
                    "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd"
                },
                "custom_price": {
                    "type": "keyword",
                    "ignore_above": 256
                },
                "description": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "eco_collection": {
                    "type": "boolean"
                },
                "entity_id": {
                    "type": "integer"
                },
                "erin_recommends": {
                    "type": "boolean"
                },
                "features_bags": {
                    "type": "integer"
                },
                "gender": {
                    "type": "integer"
                },
                "has_options": {
                    "type": "boolean"
                },
                "image": {
                    "type": "keyword",
                    "ignore_above": 256
                },
                "indexed_attributes": {
                    "type": "keyword"
                },
                "manufacturer": {
                    "type": "integer"
                },
                "material": {
                    "type": "integer"
                },
                "mycolor": {
                    "type": "integer"
                },
                "mysize": {
                    "type": "integer"
                },
                "name": {
                    "type": "text",
                    "fields": {
                        "shingle": {
                            "type": "text",
                            "analyzer": "shingle"
                        },
                        "sortable": {
                            "type": "text",
                            "analyzer": "sortable",
                            "fielddata": true
                        },
                        "whitespace": {
                            "type": "text",
                            "analyzer": "whitespace"
                        }
                    },
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "new": {
                    "type": "boolean"
                },
                "option_text_activity": {
                    "type": "keyword",
                    "ignore_above": 256
                },
                "option_text_attribute_set1_1": {
                    "type": "keyword",
                    "ignore_above": 256
                },
                "option_text_attributeset2attribute1": {
                    "type": "text",
                    "fields": {
                        "untouched": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    },
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "option_text_attributeset3attribute1": {
                    "type": "text",
                    "fields": {
                        "untouched": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    },
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "option_text_belongs_to_catalog": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "option_text_ca_1_1243545189": {
                    "type": "text",
                    "fields": {
                        "untouched": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    },
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "option_text_category_gear": {
                    "type": "keyword",
                    "ignore_above": 256
                },
                "option_text_color": {
                    "type": "keyword",
                    "ignore_above": 256
                },
                "option_text_configurable_variation": {
                    "type": "text",
                    "fields": {
                        "untouched": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    },
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "option_text_eco_collection": {
                    "type": "keyword",
                    "ignore_above": 256
                },
                "option_text_erin_recommends": {
                    "type": "keyword",
                    "ignore_above": 256
                },
                "option_text_features_bags": {
                    "type": "keyword",
                    "ignore_above": 256
                },
                "option_text_gender": {
                    "type": "keyword",
                    "ignore_above": 256
                },
                "option_text_manufacturer": {
                    "type": "text",
                    "fields": {
                        "untouched": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    },
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "option_text_material": {
                    "type": "keyword",
                    "ignore_above": 256
                },
                "option_text_mycolor": {
                    "type": "text",
                    "fields": {
                        "untouched": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    },
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "option_text_mysize": {
                    "type": "text",
                    "fields": {
                        "untouched": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    },
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "option_text_new": {
                    "type": "keyword",
                    "ignore_above": 256
                },
                "option_text_performance_fabric": {
                    "type": "keyword",
                    "ignore_above": 256
                },
                "option_text_sale": {
                    "type": "keyword",
                    "ignore_above": 256
                },
                "option_text_size": {
                    "type": "keyword",
                    "ignore_above": 256
                },
                "option_text_status": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "option_text_strap_bags": {
                    "type": "keyword",
                    "ignore_above": 256
                },
                "option_text_style_bags": {
                    "type": "keyword",
                    "ignore_above": 256
                },
                "option_text_tax_class_id": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "performance_fabric": {
                    "type": "boolean"
                },
                "price": {
                    "type": "nested",
                    "properties": {
                        "customer_group_id": {
                            "type": "integer"
                        },
                        "final_price": {
                            "type": "double"
                        },
                        "is_discount": {
                            "type": "boolean"
                        },
                        "max_price": {
                            "type": "double"
                        },
                        "min_price": {
                            "type": "double"
                        },
                        "original_price": {
                            "type": "double"
                        },
                        "price": {
                            "type": "double"
                        },
                        "tax_class_id": {
                            "type": "integer"
                        }
                    }
                },
                "required_options": {
                    "type": "boolean"
                },
                "sale": {
                    "type": "boolean"
                },
                "search": {
                    "type": "text",
                    "fields": {
                        "shingle": {
                            "type": "text",
                            "analyzer": "shingle"
                        },
                        "whitespace": {
                            "type": "text",
                            "analyzer": "whitespace"
                        }
                    },
                    "analyzer": "standard"
                },
                "search_query": {
                    "type": "nested",
                    "properties": {
                        "is_blacklisted": {
                            "type": "boolean"
                        },
                        "position": {
                            "type": "integer"
                        },
                        "query_id": {
                            "type": "integer"
                        }
                    }
                },
                "short_description": {
                    "type": "text",
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "size": {
                    "type": "integer"
                },
                "sku": {
                    "type": "text",
                    "fields": {
                        "shingle": {
                            "type": "text",
                            "analyzer": "shingle"
                        },
                        "sortable": {
                            "type": "text",
                            "analyzer": "sortable",
                            "fielddata": true
                        },
                        "untouched": {
                            "type": "keyword",
                            "ignore_above": 256
                        },
                        "whitespace": {
                            "type": "text",
                            "analyzer": "whitespace"
                        }
                    },
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "reference"
                },
                "spelling": {
                    "type": "text",
                    "fields": {
                        "phonetic": {
                            "type": "text",
                            "analyzer": "phonetic"
                        },
                        "shingle": {
                            "type": "text",
                            "analyzer": "shingle"
                        },
                        "whitespace": {
                            "type": "text",
                            "analyzer": "whitespace"
                        }
                    },
                    "analyzer": "standard"
                },
                "status": {
                    "type": "integer"
                },
                "stock": {
                    "properties": {
                        "is_in_stock": {
                            "type": "boolean"
                        },
                        "qty": {
                            "type": "integer"
                        }
                    }
                },
                "strap_bags": {
                    "type": "integer"
                },
                "style_bags": {
                    "type": "integer"
                },
                "tax_class_id": {
                    "type": "integer"
                },
                "type_id": {
                    "type": "keyword"
                },
                "updated_at": {
                    "type": "date",
                    "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd"
                },
                "url_key": {
                    "type": "text",
                    "fields": {
                        "untouched": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    },
                    "copy_to": [
                        "search",
                        "spelling"
                    ],
                    "analyzer": "standard"
                },
                "visibility": {
                    "type": "integer"
                }
            }
        }
    }
}

}


Solution

  • Adding a working example with index data, mapping, search query, and search result

    Index Mapping:

    {
      "mappings": {
        "properties": {
          "category": {
            "type": "nested"
          }
        }
      }
    }
    

    Index Data:

    {
      "entity_id": "4",
      "attribute_set_id": "4",
      "type_id": "simple",
      "sku": "skuxxx",
      "has_options": false,
      "required_options": false,
      "created_at": "2020-01-29 06:38:52",
      "updated_at": "2020-09-04 12:31:00",
      "visibility": "4",
      "category": [
                  {
                    "category_id": 2,
                    "is_virtual": "false"
                  },
                  {
                    "category_id": 3,
                    "is_parent": true,
                    "is_virtual": "false",
                    "name": "Category name3"
                  }
                ]
    }
    
    {
      "entity_id": "4",
      "attribute_set_id": "4",
      "type_id": "simple",
      "sku": "skuxxx",
      "has_options": false,
      "required_options": false,
      "created_at": "2020-01-29 06:38:52",
      "updated_at": "2020-09-04 12:31:00",
      "visibility": "4",
      "category": [
                  {
                    "category_id": 2,
                    "is_parent": true,
                    "is_virtual": "false",
                    "name": "Category name1"
                  }
                ]
    }
    
    {
      "entity_id": "4",
      "attribute_set_id": "4",
      "type_id": "simple",
      "sku": "skuxxx",
      "has_options": false,
      "required_options": false,
      "created_at": "2020-01-29 06:38:52",
      "updated_at": "2020-09-04 12:31:00",
      "visibility": "4",
      "category": [
                  {
                    "category_id": 2,
                    "is_virtual": "false"
                  },
                  {
                    "category_id": 3,
                    "is_parent": true,
                    "is_virtual": "false",
                    "name": "Category name1"
                  },
                  {
                    "category_id": 4,
                    "is_parent": true,
                    "is_virtual": "false",
                    "name": "Category name2"
                  }
                ]
    }
    

    Search Query:

    {
      "size": 0,
      "aggs": {
        "nested_path": {
          "nested": {
            "path": "category"
          },
          "aggs": {
            "distinct_categories": {
              "terms": {
                "field": "category.category_id"
              },
              "aggs": {
                "top_category_hits": {
                  "top_hits": {
                   "_source": {
                      "includes": [
                        "category.category_id"
                      ]
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
    

    Search Result:

    "aggregations": {
        "nested_path": {
          "doc_count": 6,
          "distinct_categories": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": 2,
                "doc_count": 3,                <-- note this 
                "top_category_hits": {
                  "hits": {
                    "total": {
                      "value": 3,
                      "relation": "eq"
                    },
                    "max_score": 1.0,
                    "hits": [
                      {
                        "_index": "stof_64259310",
                        "_type": "_doc",
                        "_id": "1",
                        "_nested": {
                          "field": "category",
                          "offset": 0
                        },
                        "_score": 1.0,
                        "_source": {
                          "category_id": 2
                        }
                      },
                      {
                        "_index": "stof_64259310",
                        "_type": "_doc",
                        "_id": "3",
                        "_nested": {
                          "field": "category",
                          "offset": 0
                        },
                        "_score": 1.0,
                        "_source": {
                          "category_id": 2
                        }
                      },
                      {
                        "_index": "stof_64259310",
                        "_type": "_doc",
                        "_id": "2",
                        "_nested": {
                          "field": "category",
                          "offset": 0
                        },
                        "_score": 1.0,
                        "_source": {
                          "category_id": 2            <-- note this 
                        }
                      }
                    ]
                  }
                }
              },
              {
                "key": 3, 
                "doc_count": 2,                <-- note this 
                "top_category_hits": {
                  "hits": {
                    "total": {
                      "value": 2,
                      "relation": "eq"
                    },
                    "max_score": 1.0,
                    "hits": [
                      {
                        "_index": "stof_64259310",
                        "_type": "_doc",
                        "_id": "1",
                        "_nested": {
                          "field": "category",
                          "offset": 1
                        },
                        "_score": 1.0,
                        "_source": {
                          "category_id": 3
                        }
                      },
                      {
                        "_index": "stof_64259310",
                        "_type": "_doc",
                        "_id": "2",
                        "_nested": {
                          "field": "category",
                          "offset": 1
                        },
                        "_score": 1.0,
                        "_source": {
                          "category_id": 3             <-- note this 
                        }
                      }
                    ]
                  }
                }
              },
              {
                "key": 4,
                "doc_count": 1,                <-- note this                
                "top_category_hits": {
                  "hits": {
                    "total": {
                      "value": 1,
                      "relation": "eq"
                    },
                    "max_score": 1.0,
                    "hits": [
                      {
                        "_index": "stof_64259310",
                        "_type": "_doc",
                        "_id": "1",
                        "_nested": {
                          "field": "category",
                          "offset": 2
                        },
                        "_score": 1.0,
                        "_source": {
                          "category_id": 4       <-- note this
                        }
                      }
                    ]
                  }
                }
              }
            ]
          }