Search code examples
elasticsearchdslquerydsl

How to configure elasticsearch regexp query


I try to configure elasticsearch request. I use DSL and try to find some data with word "swagger" into "message" field.

Here is one of correct answer I want to show :

  {
                "_index": "apiconnect508",
                "_type": "audit",
                "_id": "AWF1us1T4ztincEzswAr",
                "_score": 1,
                "_source": {
                    "consumerOrgId": null,
                    "headers": {
                        "http_accept": "application/json",
                        "content_type": "application/json",
                        "request_path": "/apim-5a7c34e0e4b02e66c60edbb2-2018.02/auditevent",
                        "http_version": "HTTP/1.1",
                        "http_connection": "keep-alive",
                        "request_method": "POST",
                        "http_host": "localhost:9700",
                        "request_uri": "/apim-5a7c34e0e4b02e66c60edbb2-2018.02/auditevent",
                        "content_length": "533",
                        "http_user_agent": "Wink Client v1.1.1"
                    },
                    "nlsMessage": {
                        "resource": "messages",
                        "replacements": [
                            "test",
                            "1.0.0",
                            "ext_mafashagov@rencredit.ru"
                        ],
                        "key": "swagger.import.notification"
                    },
                    "notificationType": "EVENT",
                    "eventType": "AUDIT",
                    "source": null,
                    "envId": null,
                    "message": "API test version 1.0.0 was created from a Swagger document by ext_mafashagov@rencredit.ru.",
                    "userId": "ext_mafashagov@rencredit.ru",
                    "orgId": "5a7c34e0e4b02e66c60edbb2",
                    "assetType": "api",
                    "tags": [
                        "_geoip_lookup_failure"
                    ],
                    "gateway_geoip": {},
                    "datetime": "2018-02-08T14:04:32.731Z",
                    "@timestamp": "2018-02-08T14:04:32.747Z",
                    "assetId": "5a7c58f0e4b02e66c60edc53",
                    "@version": "1",
                    "host": "127.0.0.1",
                    "id": "5a7c58f0e4b02e66c60edc55",
                    "client_geoip": {}
                }
            }

I try to find ths JSON by :

POST myAddress/_search

Next query works without "regexp" field. How should I configure regexp part of my query?

{
  "query": {
    "filtered": {
      "filter": {
        "bool": {
          "must": [
            {
              "range": {
               "@timestamp" : {"gte" : "now-100d"}

              }
            },
            {
              "term": {
                "_type": "audit"
              }
            },
            {
                "regexp" : {
                    "message": "*wagger*"
                }
            }

          ]
        }
      }
    }

  },
  "sort": {
    "TraceDateTime": {
      "order": "desc",
      "ignore_unmapped": "true"
    }
  }
}

Solution

  • If message field is analyzed, this simple match query should work:

    "match":{
        "message":"*swagger*"
    }
    

    However if it is not analyzed, these two queries should also work for you: These two queries are case sensitive so you should consider lower casing your field if you wish to keep it not analyzed.

    "wildcard":{
        "message":"*swagger*"
    }
    

    or

    "regexp":{
        "message":"swagger"
    }
    

    Be careful as wildcard and regexp queries degrade performance.