Search code examples
pythonpython-3.xelasticsearchflaskelasticsearch-dsl

Elasticsearch-dsl with nested filters and AND and OR conditions with exact match


Three parameters come from the front-end:

  1. State - string
  2. Categories - an array of strings. String can consist of several words.
  3. Tags - similar to categories.

All parameters are optional.

If several are transmitted, need to implement their bundle through AND (and the coincidence of the state, and category, and tag). If multiple categories or tags are submitted, a match is made for at least one of them.

That is, if a request arrives with the parameters

{"state": "Alaska", "categories": ["category 1", "category 2"]}

answers will be

  • state = Alaska, categories = category 1;
  • state = Alaska, categories = category 2;
  • state = Alaska, categories = [category 1, category 2];
  • state = Alaska, categories = [category 1, category 3] (has at least one of the requested categories).

do not fit

  • state = Alabama, categories = category 1
  • state = Alaska, categories = 3
  • state = Alaska, categories = 1 category (category name should be 1-in-1 "category 1" != "1 category")

To elastikserch I send requests from a python (3.7). Took a library elasticsearch-dsl

Collected three filters through Q objects (used match in them).

combined_filter = state_filter & categories_filter & tags_filter

Lists of categories and tags are divided into subfilters through OR.

query = queries.pop()
for item in queries:
    query |= item

Such a request is created for elasticsearch.

Bool(minimum_should_match=1, 
    must=[Match(state='Alaska'), MatchAll()], 
    should=[Match(categories='category 1'), Match(categories='category 2')]
)

Why this logic find entries by not exact category / tag names?

from typing import List

from elasticsearch import Elasticsearch
from elasticsearch_dsl import Q, Search
from flask import request
from flask.views import MethodView


es = Elasticsearch()


class ArticleSearchAPIView(MethodView):
    """
    Search articles using ElasticSearch
    """

    @staticmethod
    def filter_create(queries: List[Q]) -> Q:
        """
        Creates Q.OR filter
        """
        query = queries.pop()
        for item in queries:
            query |= item
        return query

    def get(self) -> dict:
        """
        Search article
        First request - with empty params
        """
        search = Search(using=es, index=ArticleModel.__tablename__)
        state_filter = categories_filter = tags_filter = Q()
        result = "Articles not found."

        data = request.get_json()
        categories = data.get("categories")
        tags = data.get("tags")
        state = data.get("state")

        if state:
            state_filter = Q("match", state=state)

        if categories:
            queries = [Q("match", categories=value) for value in categories]
            categories_filter = self.filter_create(queries)

        if tags:
            queries = [Q("match", tags=value) for value in tags]
            tags_filter = self.filter_create(queries)

        combined_filter = state_filter & categories_filter & tags_filter
        found = (
            search.filter(combined_filter)
            .execute()
            .to_dict()["hits"]
            .get("hits")
        )

        if found:
            result = [article["_source"] for article in found]
        return {"response": result}

Update


Relationship between Article and Category and Article and Tag - MTM

Mapping

{
  "articles": {
    "mappings": {
      "properties": {
        ...
        "categories": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        },
        "state": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        },
        "tags": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        }
        ...
      }
    }
  }
}

Solution

  • I decided that using elasticsearch-dsl is unnecessary here.

    Here is what decision I came to.

    from typing import Dict, List, Tuple, Union
    
    from elasticsearch import Elasticsearch
    from flask import request
    from flask.views import MethodView
    
    from .models import AticleModel  # ArticleModel.__tablename__ == "articles"
    
    
    es = Elasticsearch()
    
    
    class ArticleSearchAPIView(MethodView):
        """
        Search articles using ElasticSearch
        """
    
        def get(
            self,
        ) -> Union[
            Dict[str, Union[list, List[str]]],
            Tuple[Dict[str, str], int],
            Dict[str, Union[list, str]],
        ]:
            """
            Search articles
            """
            data = request.get_json()
            categories = data.get("categories")
            tags = data.get("tags")
            state = data.get("state")
            result = "Articles not found."
    
            query = {"bool": {"must": []}}
            if state:
                query["bool"]["must"].append({"term": {"state.keyword": state}})
            if categories:
                query["bool"]["must"].append(
                    {"terms": {"categories.keyword": categories}}
                )
            if tags:
                query["bool"]["must"].append({"terms": {"tags.keyword": tags}})
    
            found = es.search(
                index=ArticleModel.__tablename__, body={"query": query},
            )["hits"].get("hits")
    
            if found:
                result = [article["_source"] for article in found]
            return {"response": result}