configuration elasticsearch production-environment

My elasticsearch instance is up and running, let's move it into production.

I have built a small app in play2 and elasticsearch that will provide my other applications with an autocomplete feature. It is time to move my elasticsearch instance into production.

mapping:

curl -XPUT 'http://127.0.0.1:9200/auto_complete/?pretty=1' -d '
{
    "mappings": {
        "search_word": {
            "_all": {
                "enabled": false
            },
            "properties": {
                "id": {
                    "type": "string"
                },
                "word": {
                    "fields": {
                        "ngrams": {
                            "type": "string",
                            "analyzer": "custom_ngram"
                        },
                        "full": {
                            "type": "string",
                            "search_analyzer": "custom_full",
                            "index_analyzer": "custom_full"
                        }
                    },
                    "type": "multi_field"
                },
                "word_type": {
                    "type": "string"
                }
            }
        }
    },
    "settings": {
        "analysis": {
            "filter": {
                "customnGram": {
                    "max_gram": 50,
                    "min_gram": 2,
                    "type": "edgeNGram"
                }
            },
            "analyzer": {
                "custom_ngram": {
                    "filter": [
                        "standard",
                        "lowercase",
                        "customnGram"
                    ],
                    "type": "custom",
                    "tokenizer": "standard"
                },
                "custom_full": {
                    "filter": [
                        "standard",
                        "lowercase"
                    ],
                    "type": "custom",
                    "tokenizer": "standard"
                }
            }
        }
    }
}
'

Some testing data for you:

curl -XPOST 'http://127.0.0.1:9200/_bulk?pretty=1' -d '
{"index" : {"_index" : "auto_complete", "_type" : "search_word"}}
{"word" : "vvs", "word_type":"STRONG_SEARCH_WORD"}
{"index" : {"_index" : "auto_complete", "_type" : "search_word"}}
{"word" : "och VVS ab", "word_type":"WEAK_SEARCH_WORD"}
{"index" : {"_index" : "auto_complete", "_type" : "search_word"}}
{"word" : "vvs och rörjouren", "word_type":"NAME"}
{"index" : {"_index" : "auto_complete", "_type" : "search_word"}}
{"word" : "vvs & rörjouren", "word_type":"NAME"}
{"index" : {"_index" : "auto_complete", "_type" : "search_word"}}
{"word" : "rot och vvs", "word_type":"NAME"}
{"index" : {"_index" : "auto_complete", "_type" : "search_word"}}
{"word" : "vvsjouren", "word_type":"NAME"}
{"index" : {"_index" : "auto_complete", "_type" : "search_word"}}
{"word" : "vvs-jouren", "word_type":"NAME"}
'

A test query for you:

curl -XGET 'http://127.0.0.1:9200/auto_complete/search_word/_search?pretty=1' -d ' 
{
    "query": {
        "bool": {
            "should": [
                {
                    "text": {
                        "search_word.ngrams": {
                            "operator": "and",
                            "query": "vvs"
                        }
                    }
                },
                {
                    "text": {
                        "search_word.full": {
                            "boost": 1,
                            "query": "vvs"
                        }
                    }
                }
            ]
        }
    }
}
'

I have been running the instance in default mode when testing. Currently I have approximately 1 million docs.

If I do:

curl http://127.0.0.1:9200/auto_complete/_stats?pretty=1

I get:

{
    "auto_complete": {
        "primaries": {
            "docs": {
                "count": 971133,
                "deleted": 0
            },
            "store": {
                "size": "224.6mb",
                "size_in_bytes": 235552784,
                "throttle_time": "0s",
                "throttle_time_in_millis": 0
            },
            "indexing": {
                "index_total": 971126,
                "index_time": "4m",
                "index_time_in_millis": 242450,
                "index_current": 0,
                "delete_total": 0,
                "delete_time": "0s",
                "delete_time_in_millis": 0,
                "delete_current": 0
            },
            "get": {
                "total": 0,
                "time": "0s",
                "time_in_millis": 0,
                "exists_total": 0,
                "exists_time": "0s",
                "exists_time_in_millis": 0,
                "missing_total": 0,
                "missing_time": "0s",
                "missing_time_in_millis": 0,
                "current": 0
            },
            "search": {
                "query_total": 45,
                "query_time": "1.1s",
                "query_time_in_millis": 1152,
                "query_current": 0,
                "fetch_total": 35,
                "fetch_time": "50ms",
                "fetch_time_in_millis": 50,
                "fetch_current": 0
            }
        },
        "total": {
            "docs": {
                "count": 971133,
                "deleted": 0
            },
            "store": {
                "size": "224.6mb",
                "size_in_bytes": 235552784,
                "throttle_time": "0s",
                "throttle_time_in_millis": 0
            },
            "indexing": {
                "index_total": 971126,
                "index_time": "4m",
                "index_time_in_millis": 242450,
                "index_current": 0,
                "delete_total": 0,
                "delete_time": "0s",
                "delete_time_in_millis": 0,
                "delete_current": 0
            },
            "get": {
                "total": 0,
                "time": "0s",
                "time_in_millis": 0,
                "exists_total": 0,
                "exists_time": "0s",
                "exists_time_in_millis": 0,
                "missing_total": 0,
                "missing_time": "0s",
                "missing_time_in_millis": 0,
                "current": 0
            },
            "search": {
                "query_total": 45,
                "query_time": "1.1s",
                "query_time_in_millis": 1152,
                "query_current": 0,
                "fetch_total": 35,
                "fetch_time": "50ms",
                "fetch_time_in_millis": 50,
                "fetch_current": 0
            }
        }
    }
}

I have read through the configuration but what I would like is some sort of checklist:

Change logfiles path
Since your index looks like X you should set the -Xmx and -Xms to X and Y
Since your index looks like X you should use X nodes and Y replicas
Remove all pretty in queries
For your most used queries you need to warm them up
If you do not use the _all field set "_all": {"enabled": false}
?

So what I'm looking for here is: Whats your story when moving to production and what type of configuration did you do to make your index run smoothly. Do you have any tips for me, or for anybody out there that is moving to production?

Solution

you can find an "ELASTICSEARCH PRE-FLIGHT CHECKLIST" in this blogpost:

http://asquera.de/opensource/2012/11/25/elasticsearch-pre-flight-checklist/

it covers basic configuration, memory settings, name resolution and much more.