Search code examples
pythonelasticsearchelasticsearch-analyzers

Elasticsearch 'failed to find analyzer' error & analyzer not shown by Settings API


I'm trying to get my Elasticsearch indices to use the Porter stemming algorithm, but my custom analyzer isn't defined when I test with the _analyze endpoint.

I've looked at the ES documentation and similar questions on SO, and I'm not sure what the problem is. I tried using a separate PUT request for the settings when creating the index, but that had no effect.

This is how I create the mappings:

@staticmethod
    def make_mapping():
        mapping = {
            'settings': {
                'analysis':
                {
                    'analyzer': {
                        'porter_english': {
                            'type': 'custom',
                            'tokenizer': 'standard',
                            'stopwords': '_english_',
                            'filter': ['lowercase', 'porter_stem']
                            }
                        }
                    }
                },
            'mappings': {
                'properties': {
                    'published': {
                        'type': 'boolean'
                    },
                    'title': {
                        'type': 'text',
                        'analyzer': 'english'
                    },
                    'date': {
                        'type': 'date'
                    },
                    'description': {
                        'type': 'text',
                        'analyzer': {
                            'porter_english': {
                                'type': 'custom',
                                'tokenizer': 'standard',
                                'stopwords': '_english_',
                                'filter': ['lowercase', 'porter_stem']
                            }
                        }
                    },
                    'keywords': {
                        'type': 'text',
                        'analyzer': {
                            'porter_english': {
                                'type': 'custom',
                                'tokenizer': 'standard',
                                'stopwords': '_english_',
                                'filter': ['lowercase', 'porter_stem']
                            }
                        }
                    },
                    'price': {
                        'type': 'float'
                    }
                }
            }
        }
        return mapping

This is the function that creates the indexes from the mapping.

def init_elasticsearch():
    es = elasticsearch.Elasticsearch(['http://localhost:9200'])
    # idx_client = elasticsearch.client.IndicesClient(es)
    for i in searchables.included:
        index_name = camelTo_snake(i.__name__)
        index_m = i.make_mapping()
        index_uri = "{}/{}".format(current_app.config['ELASTIC_URI'], index_name)
        create_index = requests.put(index_uri, json=index_m)
        init_settings = requests.put(index_uri, json=index_m['settings'])

And this is all I get if I query the settings:

>>> g = requests.get(e + '/gallery_item/_settings')
>>> g.text
'{
    "gallery_item":{
        "settings":{
            "index":{
                "creation_date":"1564789941204",
                "number_of_shards":"1",
                "number_of_replicas":"1",
                "uuid":"SgkEBN4nTxWUCeSGWMwbGw",
                "version":{"created":"7020099"},
                "provided_name":"gallery_item"
             }
         }
    }
}'

I just need those two fields to use the porter_stem token filter.


Solution

  • I believe this part of the mappings is wrong

    change this

    'description': {
                            'type': 'text',
                            'analyzer': {
                                'porter_english': {
                                    'type': 'custom',
                                    'tokenizer': 'standard',
                                    'stopwords': '_english_',
                                    'filter': ['lowercase', 'porter_stem']
                                }
                            }
                        },
    

    to

     'description': {
                     'type': 'text',
                      'analyzer': 'porter_english'    
    
      },
    

    because you already defined analyzer in your settings. you just have to use it in the mappings