We're in the process of migrating from ElasticSearch 5.6 to 7.9. On 5.6, we have 2 indices one with 3.4k documents making up 111.2 MB, and another with 81.6k documents making up 845.6 MB. On 7.9 we have the same 2 indices (written to by the same process) with similar mappings, however it uses 14.3 GB and 15.6 GB respectively.
I don't understand what could be making these indices so much larger on 7.9 vs 5.6.
If you are curious, Here are the mappings (I've obfuscated the names of many fields to protect our data): ES 5.6
{
"blah-state-37c088aea98d4b60ad58fb04abe55aa7": {
"mappings": {
"blahblah": {
"properties": {
"blahStatus": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"blah": {
"type": "integer"
},
"blahblah": {
"type": "long"
},
"blahblahblah": {
"type": "text"
},
"blahblahblahblah": {
"type": "integer"
},
"blahblahblahzzz": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "ngram_suggest"
},
"blahblahblahhh": {
"type": "text",
"index": false,
"store": true
},
"blahblahblaaaa": {
"type": "keyword"
},
"created": {
"type": "text"
},
"ended": {
"type": "text"
},
"blaaaaah": {
"type": "boolean"
},
"blaahaah": {
"type": "integer"
},
"bloop": {
"type": "boolean"
},
"bloopibob": {
"type": "integer"
},
"blabiba": {
"type": "keyword"
},
"blah": {
"type": "long"
},
"bleeeep": {
"type": "boolean"
},
"blahhh": {
"type": "boolean"
},
"blahah": {
"type": "text"
},
"hidden": {
"type": "boolean"
},
"blah1": {
"type": "boolean"
},
"blah2": {
"type": "boolean"
},
"blah3": {
"type": "boolean"
},
"blah4": {
"type": "boolean"
},
"blah5": {
"type": "boolean"
},
"blah6": {
"type": "boolean"
},
"blah7": {
"type": "boolean"
},
"blah8": {
"type": "boolean"
},
"blah9": {
"type": "boolean"
},
"blah10": {
"type": "boolean"
},
"blah11": {
"type": "boolean"
},
"blah12": {
"type": "boolean"
},
"blah13": {
"type": "boolean"
},
"isInvalid": {
"type": "boolean"
},
"blah14": {
"type": "boolean"
},
"isNew": {
"type": "boolean"
},
"blah15": {
"type": "boolean"
},
"keywords": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "normalized"
},
"languages": {
"type": "keyword"
},
"blah16": {
"type": "integer"
},
"blah17": {
"type": "integer"
},
"blah18": {
"type": "keyword"
},
"maxWait": {
"type": "integer"
},
"minBuyIn": {
"type": "float"
},
"nickname": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
},
"analyzer": "normalized"
},
"nicknamePartial": {
"type": "text",
"analyzer": "ngram_partial"
},
"nicknameSuggest": {
"type": "text",
"analyzer": "ngram_suggest"
},
"blah19": {
"type": "text"
},
"blah20": {
"type": "boolean"
},
"DocumentID": {
"type": "keyword"
},
"pledgedAmt": {
"type": "float"
},
"preferredLanguage": {
"type": "text"
},
"blah21": {
"type": "integer"
},
"blah22": {
"type": "integer"
},
"rating": {
"type": "integer"
},
"region": {
"type": "keyword"
},
"requestedAmt": {
"type": "float"
},
"showInFreeAreas": {
"type": "boolean"
},
"blah23": {
"type": "boolean"
},
"blah24": {
"type": "text"
},
"blah25": {
"type": "scaled_float",
"scaling_factor": 100000
},
"sortScore": {
"type": "long"
},
"started": {
"type": "text"
},
"statusKey": {
"type": "text"
},
"blah26": {
"type": "long"
},
"blah27": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"tagName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "normalized"
},
"tagNameRaw": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "normalized"
},
"tagNameSuggest": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "ngram_suggest"
},
"blah28": {
"type": "boolean"
},
"traceId": {
"type": "object",
"enabled": false
},
"updated": {
"type": "long"
},
"blah29": {
"type": "boolean"
}
}
}
}
}
}
and 7.9
{
"blah-state-37c088aea98d4b60ad58fb04abe55aa7" : {
"mappings" : {
"properties" : {
"accountStatus" : {
"type" : "keyword"
},
"boost" : {
"type" : "integer"
},
"age" : {
"type" : "integer"
},
"bleeeeeep" : {
"type" : "keyword"
},
"bleeeep" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
},
"analyzer" : "ngram_suggest"
},
"bleeep" : {
"type" : "keyword"
},
"bleep" : {
"type" : "keyword"
},
"blah0" : {
"type" : "boolean"
},
"blah1" : {
"type" : "boolean"
},
"blah2" : {
"type" : "text"
},
"hidden" : {
"type" : "boolean"
},
"blah3" : {
"type" : "boolean"
},
"blah4" : {
"type" : "boolean"
},
"blah5" : {
"type" : "boolean"
},
"blah6" : {
"type" : "boolean"
},
"blah7" : {
"type" : "boolean"
},
"blah8" : {
"type" : "boolean"
},
"blah9" : {
"type" : "boolean"
},
"blah10" : {
"type" : "boolean"
},
"blah11" : {
"type" : "boolean"
},
"blah12" : {
"type" : "boolean"
},
"blah13" : {
"type" : "boolean"
},
"blah14" : {
"type" : "boolean"
},
"blah15" : {
"type" : "boolean"
},
"blah16" : {
"type" : "boolean"
},
"isNew" : {
"type" : "boolean"
},
"blah17" : {
"type" : "boolean"
},
"keywords" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
},
"analyzer" : "normalized"
},
"languages" : {
"type" : "keyword"
},
"blah18" : {
"type" : "integer"
},
"blah19" : {
"type" : "integer"
},
"nickname" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
},
"analyzer" : "normalized"
},
"nicknamePartial" : {
"type" : "text",
"analyzer" : "ngram_partial"
},
"nicknameSuggest" : {
"type" : "text",
"analyzer" : "ngram_suggest"
},
"blah20" : {
"type" : "boolean"
},
"blah21" : {
"type" : "boolean"
},
"DocumentId" : {
"type" : "keyword"
},
"preferredLanguage" : {
"type" : "keyword"
},
"rating" : {
"type" : "integer"
},
"region" : {
"type" : "keyword"
},
"blah22" : {
"type" : "boolean"
},
"blah23" : {
"type" : "boolean"
},
"blah24" : {
"type" : "scaled_float",
"scaling_factor" : 100000.0
},
"sortScore" : {
"type" : "integer"
},
"blah25" : {
"type" : "keyword"
},
"tagName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
},
"analyzer" : "normalized"
},
"tagNameRaw" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
},
"analyzer" : "normalized"
},
"tagNameSuggest" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
},
"analyzer" : "ngram_suggest"
},
"blah26" : {
"type" : "boolean"
},
"traceId" : {
"type" : "object",
"enabled" : false
},
"updated" : {
"type" : "long"
},
"blah27" : {
"type" : "boolean"
}
}
}
}
}
edit: here are the settings: 5.6:
"settings": {
"index": {
"analysis": {
"filter": {
"english_stemmer": {
"type": "stemmer",
"language": "english"
}
},
"analyzer": {
"ngram_partial": {
"filter": [
"standard",
"asciifolding",
"lowercase"
],
"tokenizer": "ngram"
},
"ngram_suggest": {
"filter": [
"standard",
"asciifolding",
"lowercase"
],
"tokenizer": "edge_ngram"
},
"normalized": {
"filter": [
"standard",
"asciifolding",
"lowercase",
"english_stemmer"
],
"type": "custom",
"tokenizer": "standard"
}
},
"tokenizer": {
"edge_ngram": {
"token_chars": [
"letter",
"digit",
"punctuation"
],
"min_gram": "1",
"type": "edge_ngram",
"max_gram": "20"
},
"ngram": {
"token_chars": [
"letter",
"digit",
"punctuation"
],
"min_gram": "2",
"type": "ngram",
"max_gram": "20"
}
}
},
"number_of_shards": "12"
}
}
and 7.9:
"settings" : {
"index" : {
"analysis" : {
"filter" : {
"english_stemmer" : {
"type" : "stemmer",
"language" : "english"
}
},
"analyzer" : {
"ngram_partial" : {
"filter" : [
"asciifolding",
"lowercase"
],
"tokenizer" : "ngram"
},
"ngram_suggest" : {
"filter" : [
"asciifolding",
"lowercase"
],
"tokenizer" : "edge_ngram"
},
"normalized" : {
"filter" : [
"asciifolding",
"lowercase",
"english_stemmer"
],
"type" : "custom",
"tokenizer" : "standard"
}
},
"tokenizer" : {
"edge_ngram" : {
"token_chars" : [
"letter",
"digit",
"punctuation"
],
"min_gram" : "1",
"type" : "edge_ngram",
"max_gram" : "20"
},
"ngram" : {
"token_chars" : [
"letter",
"digit",
"punctuation"
],
"min_gram" : "3",
"type" : "ngram",
"max_gram" : "3"
}
}
},
"number_of_shards" : "12"
}
}
Results of _cat/shards 5.6:
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1 p STARTED 960 8mb 000.00.000.84 host5
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1 r STARTED 960 8mb 000.00.000.89 host10
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1 r STARTED 960 8.1mb 000.00.000.80 host1
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1 r STARTED 960 7.7mb 000.00.000.86 host7
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2 r STARTED 978 9.2mb 000.00.000.90 host11
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2 r STARTED 978 8.9mb 000.00.000.81 host2
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2 p STARTED 978 8.7mb 000.00.000.87 host8
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2 r STARTED 978 8.6mb 000.00.000.83 host4
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0 p STARTED 990 8.1mb 000.00.000.85 host6
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0 r STARTED 990 7.6mb 000.00.000.91 host12
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0 r STARTED 990 8.5mb 000.00.000.88 host9
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0 r STARTED 990 7.9mb 000.00.000.82 host3
and 7.9:
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 8 p STARTED 262 673.4mb 000.00.000.126 host12
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 8 r STARTED 286 667.8mb 000.00.000.124 host10
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 9 p STARTED 278 754.9mb 000.00.000.124 host10
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 9 r STARTED 196 729.7mb 000.00.000.123 host9
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 7 p STARTED 247 654.2mb 000.00.000.119 host5
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 7 r STARTED 262 645.1mb 000.00.000.126 host12
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 4 p STARTED 225 719.8mb 000.00.000.121 host7
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 4 r STARTED 282 660.9mb 000.00.000.122 host8
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 6 p STARTED 274 715.6mb 000.00.000.125 host11
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 6 r STARTED 334 706.3mb 000.00.000.119 host5
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 11 r STARTED 194 691.6mb 000.00.000.120 host6
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 11 p STARTED 255 713.1mb 000.00.000.115 host1
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 3 p STARTED 212 716.6mb 000.00.000.117 host3
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 3 r STARTED 292 709.3mb 000.00.000.121 host7
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1 p STARTED 249 749.5mb 000.00.000.118 host4
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1 r STARTED 289 695.5mb 000.00.000.116 host2
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 5 p STARTED 243 701.4mb 000.00.000.122 host8
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 5 r STARTED 204 680.9mb 000.00.000.125 host11
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2 p STARTED 246 685.8mb 000.00.000.116 host2
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2 r STARTED 305 676.7mb 000.00.000.117 host3
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 10 p STARTED 235 701.2mb 000.00.000.123 host9
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 10 r STARTED 276 690.5mb 000.00.000.115 host1
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0 p STARTED 245 674.7mb 000.00.000.120 host6
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0 r STARTED 301 623.5mb 000.00.000.118 host4
After much experimenting, we've determined that the cause for this was soft deletes. Unfortunately, disabling soft deletes is deprecated so this will be a problem for us in the future.