I have a query which gives me the results I want, but I need to filter further so that only records MISSING a specific bucket are shown.
My query is this:
{
"size": 0,
"query":
{
"bool":
{
"must": [{"match_all": {}}],
"filter":
[
{
"bool":
{
"should":
[
{"match_phrase": {"user": "bob_user"}},
{"match_phrase": {"user": "tom_user"}}
],"minimum_should_match": 1
}
},
{
"bool":
{
"should":
[
{"match_phrase": {"result_code": "403"}},
{"match_phrase": {"result_code": "200"}}
],"minimum_should_match": 1
}
},
{
"range": {"time": {"gte": "2021-05-12T18:51:22.512Z","lte": "2021-05-13T18:51:22.512Z","format": "strict_date_optional_time"}}}
]
}
},
"aggs":
{
"stats":
{
"terms": {"field": "host.keyword","order": {"total_distinct_ip_count": "desc"},"size": 10000},
"aggs":
{
"total_distinct_ip_count": {"cardinality": {"field": "ip.keyword"}},
"status_codes":
{
"terms": {"field": "result_code.keyword","order": {"distinct_ip_count_by_status_code": "desc"},"size": 2},
"aggs":
{
"distinct_ip_count_by_status_code": {"cardinality": {"field": "ip.keyword"}}
}
}
}
}
}
}
Which yields the following results:
{
"key" : "dom.com",
"doc_count" : 92974,
"status_codes" : {
"buckets" : [
{
"key" : "200",
"doc_count" : 92965,
"distinct_ip_count_by_status_code" : {"value" : 51269}
},
{
"key" : "403",
"doc_count" : 9,
"distinct_ip_count_by_status_code" : {"value" : 2}
}
]
},
"total_distinct_ip_count" : {"value" : 51269}
},
{
"key" : "dom2.com",
"doc_count" : 1420,
"status_codes" : {
"buckets" : [
{
"key" : "403",
"doc_count" : 1420,
"distinct_ip_count_by_status_code" : {"value" : 5}
}
]
},
"total_distinct_ip_count" : {"value" : 500}
},
{
"key" : "dom3.com",
"doc_count" : 171097,
"status_codes" : {
"buckets" : [
{
"key" : "200",
"doc_count" : 127437,
"distinct_ip_count_by_status_code" : {"value" : 735}
},
{
"key" : "403",
"doc_count" : 43660,
"distinct_ip_count_by_status_code" : {"value" : 73}
}
]
},
"total_distinct_ip_count" : {"value" : 808}
}
I need a way to return only the record that is missing the 200 bucket. In this case, it would be dom2.com record ONLY based on the fact that it has a 403 bucket, but no 200 bucket. I've messed around with a bucket_selector, but that can only exclude a bucket from the results.. I want to exclude the records that have both 200 and 403 records from the entire results.
{
"size": 0,
"query":
{
"bool":
{
"must": [{"match_all": {}}],
"filter":
[
{
"bool":
{
"should":
[
{"match_phrase": {"user": "bob_user"}},
{"match_phrase": {"user": "tom_user"}}
],"minimum_should_match": 1
}
},
{
"bool":
{
"should":
[
{"match_phrase": {"result_code": "403"}},
{"match_phrase": {"result_code": "200"}}
],"minimum_should_match": 1
}
},
{
"range": {"time": {"gte": "2021-05-12T18:51:22.512Z","lte": "2021-05-13T18:51:22.512Z","format": "strict_date_optional_time"}}}
]
}
},
"aggs":
{
"stats":
{
"terms": {"field": "host.keyword","order": {"total_distinct_ip_count": "desc"},"size": 10000},
"aggs":
{
"total_distinct_ip_count": {"cardinality": {"field": "ip.keyword"}},
"status_codes":
{
"terms": {"field": "result_code.keyword","order": {"distinct_ip_count_by_status_code": "desc"},"size": 2},
"aggs":
{
"distinct_ip_count_by_status_code": {"cardinality": {"field": "ip.keyword"}}
}
},
"only_403":
{
"bucket_selector":
{
"buckets_path":
{"var1": "status_codes['200']>_count"},
"script": "params.var1 == null"
}
}
}
}
}