Search code examples
databaseelasticsearchmemoryluceneupgrade

ElasticSearch 7 Index way too big vs ElasticSearch 5


We're in the process of migrating from ElasticSearch 5.6 to 7.9. On 5.6, we have 2 indices one with 3.4k documents making up 111.2 MB, and another with 81.6k documents making up 845.6 MB. On 7.9 we have the same 2 indices (written to by the same process) with similar mappings, however it uses 14.3 GB and 15.6 GB respectively.

I don't understand what could be making these indices so much larger on 7.9 vs 5.6.

If you are curious, Here are the mappings (I've obfuscated the names of many fields to protect our data): ES 5.6

{
  "blah-state-37c088aea98d4b60ad58fb04abe55aa7": {
    "mappings": {
      "blahblah": {
        "properties": {
          "blahStatus": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "blah": {
            "type": "integer"
          },
          "blahblah": {
            "type": "long"
          },
          "blahblahblah": {
            "type": "text"
          },
          "blahblahblahblah": {
            "type": "integer"
          },
          "blahblahblahzzz": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword"
              }
            },
            "analyzer": "ngram_suggest"
          },
          "blahblahblahhh": {
            "type": "text",
            "index": false,
            "store": true
          },
          "blahblahblaaaa": {
            "type": "keyword"
          },
          "created": {
            "type": "text"
          },
          "ended": {
            "type": "text"
          },
          "blaaaaah": {
            "type": "boolean"
          },
          "blaahaah": {
            "type": "integer"
          },
          "bloop": {
            "type": "boolean"
          },
          "bloopibob": {
            "type": "integer"
          },
          "blabiba": {
            "type": "keyword"
          },
          "blah": {
            "type": "long"
          },
          "bleeeep": {
            "type": "boolean"
          },
          "blahhh": {
            "type": "boolean"
          },
          "blahah": {
            "type": "text"
          },
          "hidden": {
            "type": "boolean"
          },
          "blah1": {
            "type": "boolean"
          },
          "blah2": {
            "type": "boolean"
          },
          "blah3": {
            "type": "boolean"
          },
          "blah4": {
            "type": "boolean"
          },
          "blah5": {
            "type": "boolean"
          },
          "blah6": {
            "type": "boolean"
          },
          "blah7": {
            "type": "boolean"
          },
          "blah8": {
            "type": "boolean"
          },
          "blah9": {
            "type": "boolean"
          },
          "blah10": {
            "type": "boolean"
          },
          "blah11": {
            "type": "boolean"
          },
          "blah12": {
            "type": "boolean"
          },
          "blah13": {
            "type": "boolean"
          },
          "isInvalid": {
            "type": "boolean"
          },
          "blah14": {
            "type": "boolean"
          },
          "isNew": {
            "type": "boolean"
          },
          "blah15": {
            "type": "boolean"
          },
          "keywords": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword"
              }
            },
            "analyzer": "normalized"
          },
          "languages": {
            "type": "keyword"
          },
          "blah16": {
            "type": "integer"
          },
          "blah17": {
            "type": "integer"
          },
          "blah18": {
            "type": "keyword"
          },
          "maxWait": {
            "type": "integer"
          },
          "minBuyIn": {
            "type": "float"
          },
          "nickname": {
            "type": "text",
            "fields": {
              "raw": {
                "type": "keyword"
              }
            },
            "analyzer": "normalized"
          },
          "nicknamePartial": {
            "type": "text",
            "analyzer": "ngram_partial"
          },
          "nicknameSuggest": {
            "type": "text",
            "analyzer": "ngram_suggest"
          },
          "blah19": {
            "type": "text"
          },
          "blah20": {
            "type": "boolean"
          },
          "DocumentID": {
            "type": "keyword"
          },
          "pledgedAmt": {
            "type": "float"
          },
          "preferredLanguage": {
            "type": "text"
          },
          "blah21": {
            "type": "integer"
          },
          "blah22": {
            "type": "integer"
          },
          "rating": {
            "type": "integer"
          },
          "region": {
            "type": "keyword"
          },
          "requestedAmt": {
            "type": "float"
          },
          "showInFreeAreas": {
            "type": "boolean"
          },
          "blah23": {
            "type": "boolean"
          },
          "blah24": {
            "type": "text"
          },
          "blah25": {
            "type": "scaled_float",
            "scaling_factor": 100000
          },
          "sortScore": {
            "type": "long"
          },
          "started": {
            "type": "text"
          },
          "statusKey": {
            "type": "text"
          },
          "blah26": {
            "type": "long"
          },
          "blah27": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "tagName": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword"
              }
            },
            "analyzer": "normalized"
          },
          "tagNameRaw": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword"
              }
            },
            "analyzer": "normalized"
          },
          "tagNameSuggest": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword"
              }
            },
            "analyzer": "ngram_suggest"
          },
          "blah28": {
            "type": "boolean"
          },
          "traceId": {
            "type": "object",
            "enabled": false
          },
          "updated": {
            "type": "long"
          },
          "blah29": {
            "type": "boolean"
          }
        }
      }
    }
  }
}

and 7.9

{
  "blah-state-37c088aea98d4b60ad58fb04abe55aa7" : {
    "mappings" : {
      "properties" : {
        "accountStatus" : {
          "type" : "keyword"
        },
        "boost" : {
          "type" : "integer"
        },
        "age" : {
          "type" : "integer"
        },
        "bleeeeeep" : {
          "type" : "keyword"
        },
        "bleeeep" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword"
            }
          },
          "analyzer" : "ngram_suggest"
        },
        "bleeep" : {
          "type" : "keyword"
        },
        "bleep" : {
          "type" : "keyword"
        },
        "blah0" : {
          "type" : "boolean"
        },
        "blah1" : {
          "type" : "boolean"
        },
        "blah2" : {
          "type" : "text"
        },
        "hidden" : {
          "type" : "boolean"
        },
        "blah3" : {
          "type" : "boolean"
        },
        "blah4" : {
          "type" : "boolean"
        },
        "blah5" : {
          "type" : "boolean"
        },
        "blah6" : {
          "type" : "boolean"
        },
        "blah7" : {
          "type" : "boolean"
        },
        "blah8" : {
          "type" : "boolean"
        },
        "blah9" : {
          "type" : "boolean"
        },
        "blah10" : {
          "type" : "boolean"
        },
        "blah11" : {
          "type" : "boolean"
        },
        "blah12" : {
          "type" : "boolean"
        },
        "blah13" : {
          "type" : "boolean"
        },
        "blah14" : {
          "type" : "boolean"
        },
        "blah15" : {
          "type" : "boolean"
        },
        "blah16" : {
          "type" : "boolean"
        },
        "isNew" : {
          "type" : "boolean"
        },
        "blah17" : {
          "type" : "boolean"
        },
        "keywords" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword"
            }
          },
          "analyzer" : "normalized"
        },
        "languages" : {
          "type" : "keyword"
        },
        "blah18" : {
          "type" : "integer"
        },
        "blah19" : {
          "type" : "integer"
        },
        "nickname" : {
          "type" : "text",
          "fields" : {
            "raw" : {
              "type" : "keyword"
            }
          },
          "analyzer" : "normalized"
        },
        "nicknamePartial" : {
          "type" : "text",
          "analyzer" : "ngram_partial"
        },
        "nicknameSuggest" : {
          "type" : "text",
          "analyzer" : "ngram_suggest"
        },
        "blah20" : {
          "type" : "boolean"
        },
        "blah21" : {
          "type" : "boolean"
        },
        "DocumentId" : {
          "type" : "keyword"
        },
        "preferredLanguage" : {
          "type" : "keyword"
        },
        "rating" : {
          "type" : "integer"
        },
        "region" : {
          "type" : "keyword"
        },
        "blah22" : {
          "type" : "boolean"
        },
        "blah23" : {
          "type" : "boolean"
        },
        "blah24" : {
          "type" : "scaled_float",
          "scaling_factor" : 100000.0
        },
        "sortScore" : {
          "type" : "integer"
        },
        "blah25" : {
          "type" : "keyword"
        },
        "tagName" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword"
            }
          },
          "analyzer" : "normalized"
        },
        "tagNameRaw" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword"
            }
          },
          "analyzer" : "normalized"
        },
        "tagNameSuggest" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword"
            }
          },
          "analyzer" : "ngram_suggest"
        },
        "blah26" : {
          "type" : "boolean"
        },
        "traceId" : {
          "type" : "object",
          "enabled" : false
        },
        "updated" : {
          "type" : "long"
        },
        "blah27" : {
          "type" : "boolean"
        }
      } 
    }
  }
}

edit: here are the settings: 5.6:

"settings": {
      "index": {
        "analysis": {
          "filter": {
            "english_stemmer": {
              "type": "stemmer",
              "language": "english"
            }
          },
          "analyzer": {
            "ngram_partial": {
              "filter": [
                "standard",
                "asciifolding",
                "lowercase"
              ],
              "tokenizer": "ngram"
            },
            "ngram_suggest": {
              "filter": [
                "standard",
                "asciifolding",
                "lowercase"
              ],
              "tokenizer": "edge_ngram"
            },
            "normalized": {
              "filter": [
                "standard",
                "asciifolding",
                "lowercase",
                "english_stemmer"
              ],
              "type": "custom",
              "tokenizer": "standard"
            }
          },
          "tokenizer": {
            "edge_ngram": {
              "token_chars": [
                "letter",
                "digit",
                "punctuation"
              ],
              "min_gram": "1",
              "type": "edge_ngram",
              "max_gram": "20"
            },
            "ngram": {
              "token_chars": [
                "letter",
                "digit",
                "punctuation"
              ],
              "min_gram": "2",
              "type": "ngram",
              "max_gram": "20"
            }
          }
        },
        "number_of_shards": "12"
      }
    }

and 7.9:

"settings" : {
      "index" : {
        "analysis" : {
          "filter" : {
            "english_stemmer" : {
              "type" : "stemmer",
              "language" : "english"
            }
          },
          "analyzer" : {
            "ngram_partial" : {
              "filter" : [
                "asciifolding",
                "lowercase"
              ],
              "tokenizer" : "ngram"
            },
            "ngram_suggest" : {
              "filter" : [
                "asciifolding",
                "lowercase"
              ],
              "tokenizer" : "edge_ngram"
            },
            "normalized" : {
              "filter" : [
                "asciifolding",
                "lowercase",
                "english_stemmer"
              ],
              "type" : "custom",
              "tokenizer" : "standard"
            }
          },
          "tokenizer" : {
            "edge_ngram" : {
              "token_chars" : [
                "letter",
                "digit",
                "punctuation"
              ],
              "min_gram" : "1",
              "type" : "edge_ngram",
              "max_gram" : "20"
            },
            "ngram" : {
              "token_chars" : [
                "letter",
                "digit",
                "punctuation"
              ],
              "min_gram" : "3",
              "type" : "ngram",
              "max_gram" : "3"
            }
          }
        },
        "number_of_shards" : "12"
      }
    }

Results of _cat/shards 5.6:

redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1     p      STARTED  960   8mb 000.00.000.84 host5
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1     r      STARTED  960   8mb 000.00.000.89 host10
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1     r      STARTED  960 8.1mb 000.00.000.80 host1
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1     r      STARTED  960 7.7mb 000.00.000.86 host7
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2     r      STARTED  978 9.2mb 000.00.000.90 host11
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2     r      STARTED  978 8.9mb 000.00.000.81 host2
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2     p      STARTED  978 8.7mb 000.00.000.87 host8
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2     r      STARTED  978 8.6mb 000.00.000.83 host4
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0     p      STARTED  990 8.1mb 000.00.000.85 host6
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0     r      STARTED  990 7.6mb 000.00.000.91 host12
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0     r      STARTED  990 8.5mb 000.00.000.88 host9
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0     r      STARTED  990 7.9mb 000.00.000.82 host3

and 7.9:

redacted-state-37c088aea98d4b60ad58fb04abe55aa7 8     p      STARTED  262 673.4mb 000.00.000.126 host12
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 8     r      STARTED  286 667.8mb 000.00.000.124 host10
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 9     p      STARTED  278 754.9mb 000.00.000.124 host10
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 9     r      STARTED  196 729.7mb 000.00.000.123 host9
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 7     p      STARTED  247 654.2mb 000.00.000.119 host5
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 7     r      STARTED  262 645.1mb 000.00.000.126 host12
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 4     p      STARTED  225 719.8mb 000.00.000.121 host7
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 4     r      STARTED  282 660.9mb 000.00.000.122 host8
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 6     p      STARTED  274 715.6mb 000.00.000.125 host11
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 6     r      STARTED  334 706.3mb 000.00.000.119 host5
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 11    r      STARTED  194 691.6mb 000.00.000.120 host6
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 11    p      STARTED  255 713.1mb 000.00.000.115 host1
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 3     p      STARTED  212 716.6mb 000.00.000.117 host3
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 3     r      STARTED  292 709.3mb 000.00.000.121 host7
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1     p      STARTED  249 749.5mb 000.00.000.118 host4
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1     r      STARTED  289 695.5mb 000.00.000.116 host2
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 5     p      STARTED  243 701.4mb 000.00.000.122 host8
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 5     r      STARTED  204 680.9mb 000.00.000.125 host11
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2     p      STARTED  246 685.8mb 000.00.000.116 host2
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2     r      STARTED  305 676.7mb 000.00.000.117 host3
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 10    p      STARTED  235 701.2mb 000.00.000.123 host9
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 10    r      STARTED  276 690.5mb 000.00.000.115 host1
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0     p      STARTED  245 674.7mb 000.00.000.120 host6
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0     r      STARTED  301 623.5mb 000.00.000.118 host4

Solution

  • After much experimenting, we've determined that the cause for this was soft deletes. Unfortunately, disabling soft deletes is deprecated so this will be a problem for us in the future.