Is there a way to get document after composite aggregation? Assume after I aggregated and I get doc_count = 5. I want to know these 5 documents in doc_count because I have to analyze these documents. or is there a way to count word like my example below.
I want to count how many user[ant,bird,cat,elep] have bought product a. and my data look like these
{"Date":"20200515","product":["a","a","a","b","c"],"user":"ant","rank":"silver"}
{"Date":"20200515","product":["a","b","c","e","f"],"user":"ant","rank":"silver"}
{"Date":"20200515","product":["a","a","c","c","d"],"user":"bird","rank":"silver"}
{"Date":"20200515","product":["a","a","c","d","e"],"user":"cat","rank":"silver"}
{"Date":"20200515","product":["a","a","a","b","f"],"user":"cat","rank":"silver"}
{"Date":"20200515","product":["a","a","b","c","d"],"user":"elep","rank":"silver"}
And my query look like this
{
"aggs":{
"comp":{
"composite":{
"sources":[
{
"log_date":{
"terms":{
"field":"Date.keyword"
}
}
},
{
"product":{
"terms":{
"field":"product.keyword",
"missing_bucket":true
}
}
},
{
"rank":{
"terms":{
"field":"rank.keyword",
"missing_bucket":true
}
}
},
{
"user":{
"terms":{
"field":"user.keyword",
"missing_bucket":true
}
}
}
]
}
}
}
}
and this is my result
Date user rank product doc_count
20200515 ant silver a 2
20200515 bird silver a 1
20200515 cat silver a 2
20200515 elep silver a 1
...
And this is my expect result
Date user rank product doc_count amount
20200515 ant silver a 2 4
20200515 bird silver a 1 2
20200515 cat silver a 2 5
20200515 elep silver a 1 2
You need to convert your product field to nested type
Mapping:
{
"mappings": {
"properties": {
"product":{
"type": "nested",
"properties": {
"name":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword"
}
}
}
}
}
}
}
}
Data:
{
"Date": "20200515",
"product": [
{
"name": "a"
},
{
"name": "a"
},
{
"name": "a"
},
{
"name": "a"
},
{
"name": "b"
},
{
"name": "c"
}
],
"user": "ant",
"rank": "silver"
}
Query:
{
"query": {
"bool": {
"filter": {
"nested": { --> use nested type to filter on product
"path": "product",
"query": {
"match": {
"product.name": "a"
}
}
}
}
}
},
"aggs": {
"user_count": { --> total count of users
"cardinality": {
"field": "user.keyword"
}
},
"users": {
"terms": {
"field": "user.keyword",
"size": 10
},
"aggs": {
"product": {
"nested": {
"path": "product"
},
"aggs": {
"product_name": {
"terms": {
"field": "product.name.keyword",
"include":"a", --> include only specific value, accepts array
"size": 10
},
"aggs": {
"amount": {
"value_count": {
"field": "product.name.keyword"
}
}
}
}
}
}
}
}
}
}
Result
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.0,
"hits" : [
{
"_index" : "index44",
"_type" : "_doc",
"_id" : "WtSYJXIBEIlbGJUZf3Ve",
"_score" : 0.0,
"_source" : {
"Date" : "20200515",
"product" : [
{
"name" : "a"
},
{
"name" : "a"
},
{
"name" : "a"
},
{
"name" : "a"
},
{
"name" : "b"
},
{
"name" : "c"
}
],
"user" : "ant",
"rank" : "silver"
}
}
]
},
"aggregations" : {
"user_count" : {
"value" : 1
},
"users" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "ant",
"doc_count" : 1,
"product" : {
"doc_count" : 6,
"product_name" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "a",
"doc_count" : 4,
"amount" : {
"value" : 4
}
}
]
}
}
}
]
}
}