Search code examples
elasticsearchquerydslelasticsearch-aggregationelasticsearch-query

Elasticsearch post aggregation string filter


I have a system with devices that communicate through some gateways, and then in backend metrics are saved in elasticsearch.

I want to know the sensors that are now communicating through a specific gateway_id.

I have a mapping like this one:

{
  "mappings": {
    "properties": {
      "context": {
        "properties": {
          "gateway": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "id": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          }
}},
      "timeserver": {
        "type": "date"
      },
      "timestamp": {
        "type": "date"
      },
      "type": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "value": {
        "type": "double"
      }
    }
  }
}

In the gateway field is saved, as a string, the id of the gateway used for each metric.

I am able to get the last communication for each device, with this query:

GET _search
{
  "size": 0,
  "aggs": {
    "id_agg": {
      "terms": {
        "field": "context.id.keyword"
        , "size": 10000
      },
      "aggs": {
        "group_docs": {
          "top_hits": {
            "size": 1,
            "sort": [
              {
                "timestamp": {
                  "order": "desc"
                }
              }
            ]
          }
        }
      }
    }
  },
  "query": {
    "bool": {
      "filter": [
        {
          "term": {
            "_index": "measurements.group.*"
          }
        }
      ]
    }
  }
}

But how can I filter this aggregation result, to get only the sensors that are currently using a specific gateway? Adding something like: "filter": {"term":{"context.gateway": {"value": "request_gateway_serial" }} },

I have searched for bucket_selector aggregation, and pipeline aggregations, but didn't find a way, and looks to me that they only work with numeric values, no strings, like my gateway field.

Query example returns:(A list of the most recent communication for each device)

"aggregations" : {
          {
            "key" : "1234",

                "context" : {
                  "gateway" : "123456",
                  "id" : "1234", 
          },{
            "key" : "12345",
                      "context" : {
                        "gateway" : "1234567",
                        "id" : "12345",
          }, {
             "key" : "12345678",
                     "context" : {
                        "gateway" : "1234567",
                        "id" : "12345678",
}} 

My expected result is then filter for "gateway" : "1234567", and get only "key" : "12345"and "key" : "12345678"


Solution

  • You can use filter aggregation

    GET sensors/_search
    {
      "size": 0,
      "aggs": {
        "filter_gateway": {
          "filter": {
            "term": {
              "context.gateway.keyword": "request_gateway_serial"
            }
          },
          "aggs": {
            "id_agg": {
              "terms": {
                "field": "context.id.keyword",
                "size": 10000
              },
              "aggs": {
                "group_docs": {
                  "top_hits": {
                    "size": 1,
                    "sort": [
                      {
                        "timestamp": {
                          "order": "desc"
                        }
                      }
                    ]
                  }
                }
              }
            }
          }
        }
      },
      "query": {
        "bool": {
          "filter": [
            {
              "term": {
                "_index": "measurements.group.*"
              }
            }
          ]
        }
      }
    }
    

    Depending on you requirement you can also filter documents in query part and then perform aggregation on it.

    EDIT 1:

    In below query I am getting maximum timestamp under a device id and maximum timestamp filtered on given gateway. If both the dates are same it will give the device id which communicated last with the gateway.

    ex.

    Query:

    GET sensors/_search
    {
      "size": 0,
      "aggs": {
        "id_agg": {
          "terms": {
            "field": "context.id.keyword",
            "size": 10000
          },
          "aggs": {
            "maxDate": {
              "max": {
                "field": "context.timestamp"
              }
            },
            "Filter": {
              "filter": {
                "term": {
                  "context.gateway": "1234568"
                }
              },
              "aggs": {
                "filtered_maxdate": {
                  "max": {
                    "field": "context.timestamp"
                  }
                }
              }
            },
            "last_geteway_filter": {
              "bucket_selector": {
                "buckets_path": {
                  "filtereddate": "Filter>filtered_maxdate",
                  "maxDate": "maxDate"
                },
                "script": "params.filtereddate==params.maxDate"
              }
            }
          }
        }
      }
    }
    

    Data:

     [
          {
            "_index" : "sensors",
            "_type" : "_doc",
            "_id" : "eiZ1pW0BcOVYVz455V6s",
            "_score" : 1.0,
            "_source" : {
              "context.gateway" : "1234567",
              "context.id" : 1234,
              "context.timestamp" : "2019-10-02"
            }
          },
          {
            "_index" : "sensors",
            "_type" : "_doc",
            "_id" : "eyZ2pW0BcOVYVz45B14T",
            "_score" : 1.0,
            "_source" : {
              "context.gateway" : "1234568",
              "context.id" : 1234,
              "context.timestamp" : "2019-10-03"
            }
          },
          {
            "_index" : "sensors",
            "_type" : "_doc",
            "_id" : "fCZ2pW0BcOVYVz45Jl6m",
            "_score" : 1.0,
            "_source" : {
              "context.gateway" : "1234569",
              "context.id" : 1234,
              "context.timestamp" : "2019-10-04"
            }
          },
          {
            "_index" : "sensors",
            "_type" : "_doc",
            "_id" : "fSZ2pW0BcOVYVz45dV48",
            "_score" : 1.0,
            "_source" : {
              "context.gateway" : "1234567",
              "context.id" : 1235,
              "context.timestamp" : "2019-10-02"
            }
          },
          {
            "_index" : "sensors",
            "_type" : "_doc",
            "_id" : "fiZ2pW0BcOVYVz45l17A",
            "_score" : 1.0,
            "_source" : {
              "context.gateway" : "1234568",
              "context.id" : 1235,
              "context.timestamp" : "2019-10-03"
            }
          }
        ]
      }
    

    Result:

    Device 12345 had last document under gateway 1234568
    
    "buckets" : [
            {
              "key" : "1235",
              "doc_count" : 2,
              "Filter" : {
                "doc_count" : 1,
                "filtered_maxdate" : {
                  "value" : 1.5700608E12,
                  "value_as_string" : "2019-10-03T00:00:00.000Z"
                }
              },
              "maxDate" : {
                "value" : 1.5700608E12,
                "value_as_string" : "2019-10-03T00:00:00.000Z"
              }
            }
          ]