Search code examples
elasticsearchlucenekibana

Kibana: same fields in one query concatenated "and not" operator. "AND" and "AND NOT" precedence


I have to search document where text field "Body" include "Balance for subscriber with SAN" and exclude "was not found after invoking reip-adapter". I create KQL request in Kibana:

Body : "Balance for subscriber with SAN" and not Body : "was not found after invoking reip-adapter"

But have result including two condition such: "Balance for subscriber with SAN" and "was not found after invoking reip-adapter". Why in my result present AND "Balance for subscriber with SAN" AND "was not found after invoking reip-adapter"?

Inspect KQL Request: Inspect KQL Request

 "query": {
    "bool": {
      "must": [],
      "filter": [
        {
          "bool": {
            "filter": [
              {
                "bool": {
                  "should": [
                    {
                      "match_phrase": {
                        "Body": "Balance for subscriber with SAN"
                      }
                    }
                  ],
                  "minimum_should_match": 1
                }
              },
              {
                "bool": {
                  "must_not": {
                    "bool": {
                      "should": [
                        {
                          "match_phrase": {
                            "Body": "was not found after invoking reip-adapter"
                          }
                        }
                      ],
                      "minimum_should_match": 1
                    }
                  }
                }
              }
            ]
          }
        },
        {
          "range": {
            "Timestamp": {
              "format": "strict_date_optional_time",
              "gte": "2020-08-29T08:24:55.067Z",
              "lte": "2020-08-29T10:24:55.067Z"
            }
          }
        }
      ],
      "should": [],
      "must_not": []
    }
  }

"and not" condition don`t working, Response:

-----omitted--------
        "_source": {
          "prospector": {},
          "Severity": "INFO",
          "uuid": "e71b207a-42a6-4b2c-98d1-b1094c578776",
          "Body": "Balance for subscriber with SAN=0400043102was not found after invoking reip-adapter.",
          "tags": [
            "iptv",
            "beats_input_codec_plain_applied"
          ],
          "source": "/applogs/Iptv/app.log",
          "host": {
            "name": "e38"
          },
          "offset": 23097554,
          "pid": "2473",
          "Configuration": "IptvFacadeBean",
          "Timestamp": "2020-08-29T10:24:50.040Z",
          "@timestamp": "2020-08-29T10:24:50.446Z",
          "input": {}
        }
-----omitted--------

Solution

  • The index data you are indexing for Body field is :

    "Body": "Balance for subscriber with SAN=0400043102was not found after invoking reip-adapter."

    There is no gap between the number and was ( 0400043102was), so the tokens generated are:

    POST/_analyze
    
    {
      "analyzer" : "standard",
      "text" : "Balance for subscriber with SAN=0400043102was not found after invoking reip-adapter."
    }
    

    The tokens are :

    {
        "tokens": [
            {
                "token": "balance",
                "start_offset": 0,
                "end_offset": 7,
                "type": "<ALPHANUM>",
                "position": 0
            },
            {
                "token": "for",
                "start_offset": 8,
                "end_offset": 11,
                "type": "<ALPHANUM>",
                "position": 1
            },
            {
                "token": "subscriber",
                "start_offset": 12,
                "end_offset": 22,
                "type": "<ALPHANUM>",
                "position": 2
            },
            {
                "token": "with",
                "start_offset": 23,
                "end_offset": 27,
                "type": "<ALPHANUM>",
                "position": 3
            },
            {
                "token": "san",
                "start_offset": 28,
                "end_offset": 31,
                "type": "<ALPHANUM>",
                "position": 4
            },
            {
                "token": "0400043102was",       <-- note this
                "start_offset": 32,
                "end_offset": 45,
                "type": "<ALPHANUM>",
                "position": 5
            },
            {
                "token": "not",
                "start_offset": 46,
                "end_offset": 49,
                "type": "<ALPHANUM>",
                "position": 6
            },
            {
                "token": "found",
                "start_offset": 50,
                "end_offset": 55,
                "type": "<ALPHANUM>",
                "position": 7
            },
            {
                "token": "after",
                "start_offset": 56,
                "end_offset": 61,
                "type": "<ALPHANUM>",
                "position": 8
            },
            {
                "token": "invoking",
                "start_offset": 62,
                "end_offset": 70,
                "type": "<ALPHANUM>",
                "position": 9
            },
            {
                "token": "reip",
                "start_offset": 71,
                "end_offset": 75,
                "type": "<ALPHANUM>",
                "position": 10
            },
            {
                "token": "adapter",
                "start_offset": 76,
                "end_offset": 83,
                "type": "<ALPHANUM>",
                "position": 11
            }
        ]
    }
    

    Therefore, when you are trying to do match_phrase like this :

     "should": [
                            {
                              "match_phrase": {
                                "Body": "was not found after invoking reip-adapter"
                              }
                            }
                          ]
    

    No token was is generated, therefore, the document matches and must_not condition is not working.

    Index Data:

    { "Body":"Balance for subscriber with SAN=0400043102" }
    { "Body":"Balance for subscriber with SAN=0400043102was not found after invoking reip-adapter." }
    

    Search Query

     {
      "query": {
        "bool": {
          "must": {
            "match_phrase": {
              "Body": "Balance for subscriber with SAN"
            }
          },
          "must_not": {
            "match_phrase": {
              "Body": "not found after invoking reip-adapter"
            }
          }
        }
      }
    }
    

    Search Result:

    "hits": [
                {
                    "_index": "my_index",
                    "_type": "_doc",
                    "_id": "2",
                    "_score": 1.055546,
                    "_source": {
                        "Body": "Balance for subscriber with SAN=0400043102"
                    }
                }
            ]