I want to apply some filters on the bucket response generated by the date_histogram
, that filter is dependent on the key of the date_histogram
output buckets.
Suppose I have following data in
{
"entryTime":"",
"soldTime:""
}
the elastic query is something like this
{
"aggs": {
"date": {
"date_histogram": {
"field": "entryTime",
"interval": "month",
"keyed": true
},
"aggs": {
"filter_try": {
"filter": {
"bool": {
"must": [
{
"range": {
"entryTime": {
"lte": 1588840533000
}
}
},
{
"bool": {
"should": [
{
"bool": {
"must": [
{
"exists": {
"field": "soldTime"
}
},
{
"range": {
"soldTime": {
"gt": 1588840533000
}
}
}
]
}
},
{
"bool": {
"must_not": [
{
"exists": {
"field": "soldTime"
}
}
]
}
}
]
}
}
]
}
}
}
}
}
}
}
so here in that bool query, I want to use the date generated for the specific bucket by date_histogram
aggregation in both the range clauses instead of the hardcoded epoch time.
Even if we can access using script then also it's fine.
for further clarification, this is the boolean query and in the query want to replace this "DATE"
with the date_histogram
bucket key.
# (entryTime < DATE)
# AND
# (
# (soldTime != null AND soldTime > DATE)
# OR
# (soldTime == NULL)
# )
Consider below 10 Document I have:
"hits" : [
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1577869200000",
"soldTime" : "1578646800000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1578214800000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1578560400000",
"soldTime" : "1579942800000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "4",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1579683600000",
"soldTime" : "1581325200000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "5",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1580893200000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "6",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1582189200000",
"soldTime" : "1582362000000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "7",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1582621200000",
"soldTime" : "1584349200000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "8",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1583053200000",
"soldTime" : "1583830800000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "9",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1584262800000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "10",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1585472400000"
}
}
]
Now the end of January 2020 in epoch is -> 1580515199000
So if I apply on the above-mentioned bool query,
Will get the output as the
"hits" : [
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "4",
"_score" : 3.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1579683600000",
"soldTime" : "1581325200000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1578214800000"
}
}
]
As document with ID 4 satisfy (soldTime != null AND soldTime > DATE)
and document with ID 2 satisfy (soldTime == null)
condition from OR part.
Now for the same bool request If I use the date of end February 2020 -> 1583020799000
, will get the hits as follows
"hits" : [
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "7",
"_score" : 3.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1582621200000",
"soldTime" : "1584349200000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1578214800000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "5",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1580893200000"
}
}
]
Now the same data required for each end of the month of a whole year to plot the trend.
Thank you
I couldn't find a way using normal queries as parent aggregation key is not available in sub aggregation. I have written a script for this which selects documents where soldTime is either null or doesnot fall in same month as entryTime
Query:
{
"query": {
"script": {
"script": """
ZonedDateTime entry;
ZonedDateTime sold;
if(doc['entryTime'].size()>0)
{
entry= doc['entryTime'].value;
}
if(doc['soldTime'].size()>0)
{
sold = doc['soldTime'].value;
}
if(sold==null || ( entry.getMonthValue()!==sold.getMonthValue()|| entry.getYear()!==sold.getYear()))
{
return true;
}
else false;
"""
}
},
"size": 10,
"aggs": {
"monthly_trend": {
"date_histogram": {
"field": "entryTime",
"interval": "month"
},
"aggs": {
"docs": {
"top_hits": {
"size": 10
}
}
}
}
}
}
Result:
"hits" : [
{
"_index" : "index22",
"_type" : "_doc",
"_id" : "55Kv83EB8a54AbXfngYU",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1578214800000"
}
}
]
},
"aggregations" : {
"monthly_trend" : {
"buckets" : [
{
"key_as_string" : "2020-01-01T00:00:00.000Z",
"key" : 1577836800000,
"doc_count" : 1,
"docs" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "index22",
"_type" : "_doc",
"_id" : "55Kv83EB8a54AbXfngYU",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1578214800000"
}
}
]
}
}
}
]
}
}