Search code examples
elasticsearchdjango-haystack

EdgeNgramField min and max letters in django haystack


Is there a way to restrict the size of the edge ngrams in django haystack indexing? For example, I create the ngram as follows:

#search_indexes.py
content_auto = indexes.EdgeNgramField(model_attr='name')

But I don't want to create 2 letter ngrams, I actually want to set the min at 4 or 5.

As background, I am using django-haystack/elasticsearch, with bonsai on heroku.


Solution

  • What you need to do is override the search mapping in Haystack's ElasticSearch backend.

    In brief: extend the ElasticSearch backend and either replace directly or by a settings.py import a new schema mapping.

    from django.conf import settings
    from haystack.backends.elasticsearch_backend import (ElasticsearchSearchBackend,
        ElasticsearchSearchEngine)
    
    class MyElasticBackend(ElasticsearchSearchBackend):
    
        def __init__(self, connection_alias, **connection_options):
            super(ConfigurableElasticBackend, self).__init__(
                                    connection_alias, **connection_options)
            MY_SETTINGS = {
                'settings': {
                    "analysis": {
                        "analyzer": {
                            "ngram_analyzer": {
                                "type": "custom",
                                "tokenizer": "lowercase",
                                "filter": ["haystack_ngram"]
                            },
                            "edgengram_analyzer": {
                                "type": "custom",
                                "tokenizer": "lowercase",
                                "filter": ["haystack_edgengram"]
                            }
                        },
                        "tokenizer": {
                            "haystack_ngram_tokenizer": {
                                "type": "nGram",
                                "min_gram": 3,
                                "max_gram": 15,
                            },
                            "haystack_edgengram_tokenizer": {
                                "type": "edgeNGram",
                                "min_gram": 2,
                                "max_gram": 15,
                                "side": "front"
                            }
                        },
                        "filter": {
                            "haystack_ngram": {
                                "type": "nGram",
                                "min_gram": 3,
                                "max_gram": 15
                            },
                            "haystack_edgengram": {
                                "type": "edgeNGram",
                                "min_gram": 5,
                                "max_gram": 15
                            }
                        }
                    }
                }
            }
            setattr(self, 'DEFAULT_SETTINGS', MY_SETTINGS)
    
    
    class ConfigurableElasticSearchEngine(ElasticsearchSearchEngine):
        backend = MyElasticBackend
    

    For a fuller explanation see my write up about extending the ElasticSearch backend to customize the search mapping.