I'm trying to get my Elasticsearch indices to use the Porter stemming algorithm, but my custom analyzer isn't defined when I test with the _analyze
endpoint.
I've looked at the ES documentation and similar questions on SO, and I'm not sure what the problem is. I tried using a separate PUT request for the settings when creating the index, but that had no effect.
This is how I create the mappings:
@staticmethod
def make_mapping():
mapping = {
'settings': {
'analysis':
{
'analyzer': {
'porter_english': {
'type': 'custom',
'tokenizer': 'standard',
'stopwords': '_english_',
'filter': ['lowercase', 'porter_stem']
}
}
}
},
'mappings': {
'properties': {
'published': {
'type': 'boolean'
},
'title': {
'type': 'text',
'analyzer': 'english'
},
'date': {
'type': 'date'
},
'description': {
'type': 'text',
'analyzer': {
'porter_english': {
'type': 'custom',
'tokenizer': 'standard',
'stopwords': '_english_',
'filter': ['lowercase', 'porter_stem']
}
}
},
'keywords': {
'type': 'text',
'analyzer': {
'porter_english': {
'type': 'custom',
'tokenizer': 'standard',
'stopwords': '_english_',
'filter': ['lowercase', 'porter_stem']
}
}
},
'price': {
'type': 'float'
}
}
}
}
return mapping
This is the function that creates the indexes from the mapping.
def init_elasticsearch():
es = elasticsearch.Elasticsearch(['http://localhost:9200'])
# idx_client = elasticsearch.client.IndicesClient(es)
for i in searchables.included:
index_name = camelTo_snake(i.__name__)
index_m = i.make_mapping()
index_uri = "{}/{}".format(current_app.config['ELASTIC_URI'], index_name)
create_index = requests.put(index_uri, json=index_m)
init_settings = requests.put(index_uri, json=index_m['settings'])
And this is all I get if I query the settings:
>>> g = requests.get(e + '/gallery_item/_settings')
>>> g.text
'{
"gallery_item":{
"settings":{
"index":{
"creation_date":"1564789941204",
"number_of_shards":"1",
"number_of_replicas":"1",
"uuid":"SgkEBN4nTxWUCeSGWMwbGw",
"version":{"created":"7020099"},
"provided_name":"gallery_item"
}
}
}
}'
I just need those two fields to use the porter_stem
token filter.
I believe this part of the mappings
is wrong
change this
'description': {
'type': 'text',
'analyzer': {
'porter_english': {
'type': 'custom',
'tokenizer': 'standard',
'stopwords': '_english_',
'filter': ['lowercase', 'porter_stem']
}
}
},
to
'description': {
'type': 'text',
'analyzer': 'porter_english'
},
because you already defined analyzer in your settings
. you just have to use it in the mappings