Search code examples
elasticsearchelasticsearch-painless

painless: check if an individual document contains a key


I'm using painless to filter documents with Elastic 5.5

Problem

Using "painless", find documents with strings field.

Expected Results

Only documents with strings field are returned

Actual Results

All documents are returned.

Observation

All documents are returned, as long as there's a document with strings field. This could be a caching issue of some sort.

TestCase

Fixtures

PUT /test_idx

POST /test_idx/t/1
{
      "strings": ["hello", "world"]
}

POST /test_idx/t/2
{
      "numbers": [1, 2, 3]
}

Query

GET /test_idx/_search
{
   "query": {
      "bool": {
         "filter": [
            {
               "script": {
                  "script": {
                     "lang": "painless",
                     "inline": "return doc.containsKey(params.keypath)",
                     "params": {"keypath": "strings"}
                  }
               }
            }
         ]
      }
   }
}

Actual Response

{
   "took": 5,
   "timed_out": false,
   "_shards": {
      "total": 5,
      "successful": 5,
      "failed": 0
   },
   "hits": {
      "total": 2,
      "max_score": 0,
      "hits": [
         {
            "_index": "test_idx",
            "_type": "t",
            "_id": "2",
            "_score": 0,
            "_source": {
               "numbers": [
                  1,
                  2,
                  3
               ]
            }
         },
         {
            "_index": "test_idx",
            "_type": "t",
            "_id": "1",
            "_score": 0,
            "_source": {
               "strings": [
                  "hello",
                  "world"
               ]
            }
         }
      ]
   }
}

Expected Response

{
   "took": 5,
   "timed_out": false,
   "_shards": {
      "total": 5,
      "successful": 5,
      "failed": 0
   },
   "hits": {
      "total": 2,
      "max_score": 0,
      "hits": [
         {
            "_index": "test_idx",
            "_type": "t",
            "_id": "1",
            "_score": 0,
            "_source": {
               "strings": [
                  "hello",
                  "world"
               ]
            }
         }
      ]
   }
}

Solution

  • You might want to try this, even though it is strongly discouraged to overuse painless for performance reasons

    GET /test_idx/_search
    {
      "query": {
        "bool": {
          "filter": [
            {
              "script": {
                "script": {
                  "lang": "painless",
                  "inline": "return doc[params.keypath].value != null",
                  "params": {
                    "keypath": "strings.keyword"
                  }
                }
              }
            }
          ]
        }
      }
    }