Search code examples
elasticsearchreindexelasticsearch-2.0

Elasticsearch Reindex API does not work


I am trying to use the reindex api for elasticsearch

https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-reindex.html

here is my source index

"testtype": {
        "_all": {
          "enabled": false
        },
        "dynamic_templates": [
          {
            "message_field": {
              "mapping": {
                "fielddata": {
                  "format": "disabled"
                },
                "index": "analyzed",
                "omit_norms": true,
                "type": "string"
              },
              "match": "message",
              "match_mapping_type": "string"
            }
          },
          {
            "string_fields": {
              "mapping": {
                "fielddata": {
                  "format": "disabled"
                },
                "index": "analyzed",
                "omit_norms": true,
                "type": "string",
                "fields": {
                  "raw": {
                    "ignore_above": 256,
                    "index": "not_analyzed",
                    "type": "string"
                  }
                }
              },
              "match": "*",
              "match_mapping_type": "string"
            }
          }
        ],
        "properties": {
          "@timestamp": {
            "type": "date",
            "format": "strict_date_optional_time||epoch_millis"
          },
          "@version": {
            "type": "string",
            "index": "not_analyzed"
          },
          "app_code": {
            "type": "string"
          },
          "data": {
            "properties": {
              "action": {
                "type": "string",
                "norms": {
                  "enabled": false
                },
                "fielddata": {
                  "format": "disabled"
                },
                "fields": {
                  "raw": {
                    "type": "string",
                    "index": "not_analyzed",
                    "ignore_above": 256
                  }
                }
              },
              "level": {
                "type": "string",
                "norms": {
                  "enabled": false
                },
                "fielddata": {
                  "format": "disabled"
                },
                "fields": {
                  "raw": {
                    "type": "string",
                    "index": "not_analyzed",
                    "ignore_above": 256
                  }
                }
              },
              "message": {
                "type": "string",
                "norms": {
                  "enabled": false
                },
                "fielddata": {
                  "format": "disabled"
                }
              },
              "timestamp": {
                "type": "date",
                "format": "strict_date_optional_time||epoch_millis"
              }
            }
          },
          "header": {
            "properties": {
              "@timestamp": {
                "type": "date",
                "format": "strict_date_optional_time||epoch_millis"
              },
              "_id": {
                "type": "long"
              },
              "app_code": {
                "type": "string",
                "norms": {
                  "enabled": false
                },
                "fielddata": {
                  "format": "disabled"
                },
                "fields": {
                  "raw": {
                    "type": "string",
                    "index": "not_analyzed",
                    "ignore_above": 256
                  }
                }
              },
              "host": {
                "type": "string",
                "norms": {
                  "enabled": false
                },
                "fielddata": {
                  "format": "disabled"
                },
                "fields": {
                  "raw": {
                    "type": "string",
                    "index": "not_analyzed",
                    "ignore_above": 256
                  }
                }
              },
              "meta_host": {
                "type": "string",
                "norms": {
                  "enabled": false
                },
                "fielddata": {
                  "format": "disabled"
                },
                "fields": {
                  "raw": {
                    "type": "string",
                    "index": "not_analyzed",
                    "ignore_above": 256
                  }
                }
              },
              "name": {
                "type": "string",
                "norms": {
                  "enabled": false
                },
                "fielddata": {
                  "format": "disabled"
                },
                "fields": {
                  "raw": {
                    "type": "string",
                    "index": "not_analyzed",
                    "ignore_above": 256
                  }
                }
              },
              "pid": {
                "type": "long"
              },
              "source_id": {
                "type": "string",
                "norms": {
                  "enabled": false
                },
                "fielddata": {
                  "format": "disabled"
                },
                "fields": {
                  "raw": {
                    "type": "string",
                    "index": "not_analyzed",
                    "ignore_above": 256
                  }
                }
              },
              "source_name": {
                "type": "string",
                "norms": {
                  "enabled": false
                },
                "fielddata": {
                  "format": "disabled"
                },
                "fields": {
                  "raw": {
                    "type": "string",
                    "index": "not_analyzed",
                    "ignore_above": 256
                  }
                }
              },
              "timestamp": {
                "type": "date",
                "format": "strict_date_optional_time||epoch_millis"
              },
              "user": {
                "type": "string",
                "norms": {
                  "enabled": false
                },
                "fielddata": {
                  "format": "disabled"
                },
                "fields": {
                  "raw": {
                    "type": "string",
                    "index": "not_analyzed",
                    "ignore_above": 256
                  }
                }
              }
            }
          },
          "source_id": {
            "type": "string"
          },
          "timestamp": {
            "type": "date",
            "format": "strict_date_optional_time||epoch_millis"
          }
        }
      }

So it has some string fields that also have the corresponding raw fields. The problem is that the default fields are analyzed. So i want the new index to be

{
  "mappings": {
    "test": {
        "dynamic_templates": [
            { "notanalyzed": {
                  "match": "*",
                  "path_unmatch":"data.message",
                  "match_mapping_type": "string",
                  "mapping": {
                      "type":        "string",
                      "index":       "not_analyzed",
                      "fielddata": {
                      "format": "disabled"
                    },
                    "fields": {
                      "raw": {
                        "ignore_above": 256,
                        "index": "not_analyzed",
                        "type": "string"
                      }
                    }
                  }
               }
            }
          ]
       }
   }
}

the old index has some data. so i tried to reindex that as

POST /_reindex
{
  "source": {
    "index": "oldindex",
    "type": ["testtype"]
  },
  "dest": {
    "index": "newindex"
  }
}

after i do this, i see that the new index has been converted to

{
  "newindex": {
    "aliases": {},
    "mappings": {
      "testtype": {
        "properties": {
          "data": {
            "properties": {
              "action": {
                "type": "string"
              },
              "level": {
                "type": "string"
              },
              "message": {
                "type": "string"
              },
              "timestamp": {
                "type": "date",
                "format": "strict_date_optional_time||epoch_millis"
              }
            }
          },
          "header": {
            "properties": {
              "@timestamp": {
                "type": "date",
                "format": "strict_date_optional_time||epoch_millis"
              },
              "_id": {
                "type": "long"
              },
              "app_code": {
                "type": "string"
              },
              "host": {
                "type": "string"
              },
              "meta_host": {
                "type": "string"
              },
              "name": {
                "type": "string"
              },
              "pid": {
                "type": "long"
              },
              "source_id": {
                "type": "string"
              },
              "source_name": {
                "type": "string"
              },
              "timestamp": {
                "type": "date",
                "format": "strict_date_optional_time||epoch_millis"
              },
              "user": {
                "type": "string"
              }
            }
          }
        }
      },
      "test": {
        "dynamic_templates": [
          {
            "notanalyzed": {
              "mapping": {
                "fielddata": {
                  "format": "disabled"
                },
                "index": "not_analyzed",
                "type": "string",
                "fields": {
                  "raw": {
                    "ignore_above": 256,
                    "index": "not_analyzed",
                    "type": "string"
                  }
                }
              },
              "match": "*",
              "match_mapping_type": "string",
              "path_unmatch": "data.message"
            }
          }
        ]
      }
    },
    "settings": {
      "index": {
        "creation_date": "1461792130202",
        "number_of_shards": "5",
        "number_of_replicas": "1",
        "uuid": "nho7V2PpTbqzfsUVWVdLkA",
        "version": {
          "created": "2030099"
        }
      }
    },
    "warmers": {}
  }
}

I cant understand what happened here! It looks like the new data was just auto indexed! This is not what I intended at all.

I even tried

POST /_reindex
{
  "source": {
    "index": "oldindex",
    "type": ["testtype"]
  },
  "dest": {
    "index": "newindex",
    "type": ["test"]
  }
}

but now i get

{
   "error": "org.elasticsearch.ElasticsearchParseException: Unknown array field [type]"
}

what am i doing wrong? I cant use elasticdump or knapsack as they are 3rd party plugins.


Solution

  • You're almost there.

    • delete the wrongly created index: DELETE newindex
    • create the newindex index:
    PUT /newindex
    {
      "mappings": {
        "test": {
          "dynamic_templates": [
            {
              "notanalyzed": {
                "match": "*",
                "path_unmatch": "data.message",
                "match_mapping_type": "string",
                "mapping": {
                  "type": "string",
                  "index": "not_analyzed",
                  "fielddata": {
                    "format": "disabled"
                  },
                  "fields": {
                    "raw": {
                      "ignore_above": 256,
                      "index": "not_analyzed",
                      "type": "string"
                    }
                  }
                }
              }
            }
          ]
        }
      }
    }
    
    • and use this slightly modified _reindex command:
    POST /_reindex
    {
      "source": {
        "index": "oldindex",
        "type": [
          "testtype"
        ]
      },
      "dest": {
        "index": "newindex"
      },
      "script": {
        "inline": "ctx._type='test'"
      }
    }
    

    The important bit is the script where you are telling the _reindex API to change the _type of the documents in newindex.