I have built a small app in play2 and elasticsearch that will provide my other applications with an autocomplete feature. It is time to move my elasticsearch instance into production.
mapping:
curl -XPUT 'http://127.0.0.1:9200/auto_complete/?pretty=1' -d '
{
"mappings": {
"search_word": {
"_all": {
"enabled": false
},
"properties": {
"id": {
"type": "string"
},
"word": {
"fields": {
"ngrams": {
"type": "string",
"analyzer": "custom_ngram"
},
"full": {
"type": "string",
"search_analyzer": "custom_full",
"index_analyzer": "custom_full"
}
},
"type": "multi_field"
},
"word_type": {
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"filter": {
"customnGram": {
"max_gram": 50,
"min_gram": 2,
"type": "edgeNGram"
}
},
"analyzer": {
"custom_ngram": {
"filter": [
"standard",
"lowercase",
"customnGram"
],
"type": "custom",
"tokenizer": "standard"
},
"custom_full": {
"filter": [
"standard",
"lowercase"
],
"type": "custom",
"tokenizer": "standard"
}
}
}
}
}
'
Some testing data for you:
curl -XPOST 'http://127.0.0.1:9200/_bulk?pretty=1' -d '
{"index" : {"_index" : "auto_complete", "_type" : "search_word"}}
{"word" : "vvs", "word_type":"STRONG_SEARCH_WORD"}
{"index" : {"_index" : "auto_complete", "_type" : "search_word"}}
{"word" : "och VVS ab", "word_type":"WEAK_SEARCH_WORD"}
{"index" : {"_index" : "auto_complete", "_type" : "search_word"}}
{"word" : "vvs och rörjouren", "word_type":"NAME"}
{"index" : {"_index" : "auto_complete", "_type" : "search_word"}}
{"word" : "vvs & rörjouren", "word_type":"NAME"}
{"index" : {"_index" : "auto_complete", "_type" : "search_word"}}
{"word" : "rot och vvs", "word_type":"NAME"}
{"index" : {"_index" : "auto_complete", "_type" : "search_word"}}
{"word" : "vvsjouren", "word_type":"NAME"}
{"index" : {"_index" : "auto_complete", "_type" : "search_word"}}
{"word" : "vvs-jouren", "word_type":"NAME"}
'
A test query for you:
curl -XGET 'http://127.0.0.1:9200/auto_complete/search_word/_search?pretty=1' -d '
{
"query": {
"bool": {
"should": [
{
"text": {
"search_word.ngrams": {
"operator": "and",
"query": "vvs"
}
}
},
{
"text": {
"search_word.full": {
"boost": 1,
"query": "vvs"
}
}
}
]
}
}
}
'
I have been running the instance in default mode when testing. Currently I have approximately 1 million docs.
If I do:
curl http://127.0.0.1:9200/auto_complete/_stats?pretty=1
I get:
{
"auto_complete": {
"primaries": {
"docs": {
"count": 971133,
"deleted": 0
},
"store": {
"size": "224.6mb",
"size_in_bytes": 235552784,
"throttle_time": "0s",
"throttle_time_in_millis": 0
},
"indexing": {
"index_total": 971126,
"index_time": "4m",
"index_time_in_millis": 242450,
"index_current": 0,
"delete_total": 0,
"delete_time": "0s",
"delete_time_in_millis": 0,
"delete_current": 0
},
"get": {
"total": 0,
"time": "0s",
"time_in_millis": 0,
"exists_total": 0,
"exists_time": "0s",
"exists_time_in_millis": 0,
"missing_total": 0,
"missing_time": "0s",
"missing_time_in_millis": 0,
"current": 0
},
"search": {
"query_total": 45,
"query_time": "1.1s",
"query_time_in_millis": 1152,
"query_current": 0,
"fetch_total": 35,
"fetch_time": "50ms",
"fetch_time_in_millis": 50,
"fetch_current": 0
}
},
"total": {
"docs": {
"count": 971133,
"deleted": 0
},
"store": {
"size": "224.6mb",
"size_in_bytes": 235552784,
"throttle_time": "0s",
"throttle_time_in_millis": 0
},
"indexing": {
"index_total": 971126,
"index_time": "4m",
"index_time_in_millis": 242450,
"index_current": 0,
"delete_total": 0,
"delete_time": "0s",
"delete_time_in_millis": 0,
"delete_current": 0
},
"get": {
"total": 0,
"time": "0s",
"time_in_millis": 0,
"exists_total": 0,
"exists_time": "0s",
"exists_time_in_millis": 0,
"missing_total": 0,
"missing_time": "0s",
"missing_time_in_millis": 0,
"current": 0
},
"search": {
"query_total": 45,
"query_time": "1.1s",
"query_time_in_millis": 1152,
"query_current": 0,
"fetch_total": 35,
"fetch_time": "50ms",
"fetch_time_in_millis": 50,
"fetch_current": 0
}
}
}
}
I have read through the configuration but what I would like is some sort of checklist:
So what I'm looking for here is: Whats your story when moving to production and what type of configuration did you do to make your index run smoothly. Do you have any tips for me, or for anybody out there that is moving to production?
you can find an "ELASTICSEARCH PRE-FLIGHT CHECKLIST" in this blogpost:
http://asquera.de/opensource/2012/11/25/elasticsearch-pre-flight-checklist/
it covers basic configuration, memory settings, name resolution and much more.