Search code examples
elasticsearch

Elasticsearch "AND" query on flattened fields


I am trying to do a must query on several fields within a flattened field that has an array of objects. I want to match all the criteria on a single element of the array but it's matching to any element in the array.

I have a set of documents like the following:

[
{
    "_source": {
    "metadata": {
        "name": "banana",
        "seasonalities": [
            {
                "date": "2023-01-01",
                "country": "AA"
            },
            {
                "date": "2023-01-22",
                "country": "BB"
            }
        ]
    }
},
{
    "_source": {
        "metadata": {
            "name": "potato",
            "seasonalities": [
                {
                    "date": "2023-01-01",
                    "country": "AA"
                },
                {
                    "date": "2023-01-01",
                    "country": "BB"
                },
                {
                    "date": "2023-01-01",
                    "country": "CC"
                },
                {
                    "date": "2023-01-22",
                    "country": "DD"
                },
                {
                    "date": "2023-01-23",
                    "country": "EE"
                }
            ]
        }
}
]

The following query works and retrieves a single document:

{
    "query": {
        "bool": {
            "filter": [
                {
                    "bool": {
                        "must": [
                            {
                                "term": {
                                    "metadata.seasonalities.date": {"value":"2023-01-22"}
                                }
                            },
                            {
                                "term": {
                                    "metadata.name": {"value":"banana"}
                                }
                            }
                        ]
                    }
                }
            ]
        }
    }
}

This simpler query also work:

{
    "query": {
        "bool": {
            "must": [
                {
                    "term": {
                        "metadata.seasonalities.date": "2023-01-22"
                    }
                },
                {
                    "term": {
                        "metadata.name": "banana"
                    }
                }
            ]
        }
    }
}

These queries all return a single value as expected but when I try to do something like this:

{
    "query": {
        "bool": {
            "must": [
                {
                    "term": {
                        "metadata.seasonalities.date": "2023-01-22"
                    }
                },
                {
                    "term": {
                        "metadata.seasonalities.country": "BB"
                    }
                }
            ]
        }
    }
}

I get both documents instead of only the first one.

These are the mappings for the index:

        "mappings": {
            "properties": {
                "metadata": {
                    "type": "flattened"
                }
            }
        },

When I run the search in my actual index with explain I get this (I have a few more fields and documents):

{
    "took": 7,
    "timed_out": false,
    "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 943,
            "relation": "eq"
        },
...
            {
                "_source": {
                    "metadata": {
                        "name": "XXX",
                        "seasonalities": [
                            {
                                "date": "2023-01-22",
                                "country": "MM",
                            },
                            {
                                "date": "2023-03-01",
                                "country": "BB",
                            },
...
                "_explanation": {
                    "value": 9.256371,
                    "description": "sum of:",
                    "details": [
                        {
                            "value": 5.61615,
                            "description": "weight(metadata._keyed:seasonalities.date\u00002023-01-22 in 1) [PerFieldSimilarity], result of:",
                            "details": [
                                {
                                    "value": 5.61615,
                                    "description": "score(freq=1.0), computed as boost * idf * tf from:",
                                    "details": [
                                        {
                                            "value": 2.2,
                                            "description": "boost",
                                            "details": []
                                        },
                                        {
                                            "value": 3.3873937,
                                            "description": "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
                                            "details": [
                                                {
                                                    "value": 177,
                                                    "description": "n, number of documents containing term",
                                                    "details": []
                                                },
                                                {
                                                    "value": 5251,
                                                    "description": "N, total number of documents with field",
                                                    "details": []
                                                }
                                            ]
                                        },
                                        {
                                            "value": 0.75361645,
                                            "description": "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
                                            "details": [
                                                {
                                                    "value": 1,
                                                    "description": "freq, occurrences of term within document",
                                                    "details": []
                                                },
                                                {
                                                    "value": 1.2,
                                                    "description": "k1, term saturation parameter",
                                                    "details": []
                                                },
                                                {
                                                    "value": 0.75,
                                                    "description": "b, length normalization parameter",
                                                    "details": []
                                                },
                                                {
                                                    "value": 1,
                                                    "description": "dl, length of field",
                                                    "details": []
                                                },
                                                {
                                                    "value": 33.413826,
                                                    "description": "avgdl, average length of field",
                                                    "details": []
                                                }
                                            ]
                                        }
                                    ]
                                }
                            ]
                        },
                        {
                            "value": 3.640221,
                            "description": "weight(metadata._keyed:seasonalities.country\u0000BB in 1) [PerFieldSimilarity], result of:",
                            "details": [
                                {
                                    "value": 3.640221,
                                    "description": "score(freq=1.0), computed as boost * idf * tf from:",
                                    "details": [
                                        {
                                            "value": 2.2,
                                            "description": "boost",
                                            "details": []
                                        },
                                        {
                                            "value": 2.1956074,
                                            "description": "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
                                            "details": [
                                                {
                                                    "value": 584,
                                                    "description": "n, number of documents containing term",
                                                    "details": []
                                                },
                                                {
                                                    "value": 5251,
                                                    "description": "N, total number of documents with field",
                                                    "details": []
                                                }
                                            ]
                                        },
                                        {
                                            "value": 0.75361645,
                                            "description": "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
                                            "details": [
                                                {
                                                    "value": 1,
                                                    "description": "freq, occurrences of term within document",
                                                    "details": []
                                                },
                                                {
                                                    "value": 1.2,
                                                    "description": "k1, term saturation parameter",
                                                    "details": []
                                                },
                                                {
                                                    "value": 0.75,
                                                    "description": "b, length normalization parameter",
                                                    "details": []
                                                },
                                                {
                                                    "value": 1,
                                                    "description": "dl, length of field",
                                                    "details": []
                                                },
                                                {
                                                    "value": 33.413826,
                                                    "description": "avgdl, average length of field",
                                                    "details": []
                                                }
                                            ]
                                        }
                                    ]
                                }
                            ]
                        }
                    ]
                }
            },

I am getting 943 results but looking at the explain it does say: For 2023-01-22: 177 n, number of documents containing term and then for BB: 584 n, number of documents containing term At the intersection I'd expect one or two documents only, how do I get only those? I tried changing term to match in the query to no avail. I'm in ES 7.13.3.


Solution

  • Whenever you need to match two fields of an object field, you need to resort to using the nested field type and query.

    Your mapping should look like this:

    {
      "mappings": {
        "properties": {
          "metadata": {
            "properties": {
              "name": {
                "type": "keyword"
              },
              "seasonalities": {
                "type": "nested",
                "properties": {
                  "date": {
                    "type": "date"
                  },
                  "country": {
                    "type": "keyword"
                  }
                }
              }
            }
          }
        }
      }
    }
    

    The first query would then return only the first document as expected:

    {
      "query": {
        "bool": {
          "filter": [
            {
              "nested": {
                "path": "metadata.seasonalities",
                "query": {
                  "term": {
                    "metadata.seasonalities.date": "2023-01-22"
                  }
                }
              }
            },
            {
              "term": {
                "metadata.name": "banana"
              }
            }
          ]
        }
      }
    }
    

    And when you have two constraints on a seasonality object, you can do it like this in order to also return only the first document:

    {
      "query": {
        "nested": {
          "path": "metadata.seasonalities",
          "query": {
            "bool": {
              "filter": [
                {
                  "term": {
                    "metadata.seasonalities.date": "2023-01-22"
                  }
                },
                {
                  "term": {
                    "metadata.seasonalities.country": "BB"
                  }
                }
              ]
            }
          }
        }
      }
    }