Search code examples
elasticsearchsearchelasticsearch-analyzers

Elastic Search 6.4 throwing Error upon creating custom character filter


So I am pretty sure I am missing something in the syntax but i can't seem to figure out what exactly. I am trying to create the phone number pattern capture token filter defined here. It says to define a keyword filter and then apply the pattern capture token on top. So that's what I did:

{
    "mappings": {
        "_doc": {
            "properties": {
                "phone": {
                    "type": "text",
                    "analyzer": "my_phone_analyzer"
                }
            }
        }
    },
    "settings": {
        "analysis": {
            "analyzer": {
                "my_phone_analyzer": {
                    "type": "custom",
                    "tokenizer": "keyword",
                    "char_filter": [
                        "phone_number"
                    ]
                }
            }
        },
        "char_filter": {
            "phone_number": {
                "type": "pattern_capture",
                "preserve_original": 1,
                "patterns": [
                    "1(\\d{3}(\\d+))"
                ]
            }
        }
    }
}

Which is causing the following error:

{
    "error": {
        "root_cause": [
            {
                "type": "illegal_argument_exception",
                "reason": "unknown setting [index.char_filter.phone_number.patterns] please check that any required plugins are installed, or check the breaking changes documentation for removed settings"
            }
        ],
        "type": "illegal_argument_exception",
        "reason": "unknown setting [index.char_filter.phone_number.patterns] please check that any required plugins are installed, or check the breaking changes documentation for removed settings",
        "suppressed": [
            {
                "type": "illegal_argument_exception",
                "reason": "unknown setting [index.char_filter.phone_number.preserve_original] please check that any required plugins are installed, or check the breaking changes documentation for removed settings"
            },
            {
                "type": "illegal_argument_exception",
                "reason": "unknown setting [index.char_filter.phone_number.type] please check that any required plugins are installed, or check the breaking changes documentation for removed settings"
            }
        ]
    },
    "status": 400
}

If anyone can point out what I am doing wrong, That'd be great!


Solution

  • The link which you've mentioned looks quite old.

    The pattern_capture no longer applies on char_filter but only on token filter

    Below is how your mapping would be if you are using Elasticsearch above 5.x

    PUT <your_index_name>
    {  
       "mappings":{  
          "_doc":{  
             "properties":{  
                "phone":{  
                   "type":"text",
                   "analyzer":"my_phone_analyzer"
                }
             }
          }
       },
       "settings":{  
          "analysis":{  
             "analyzer":{  
                "my_phone_analyzer":{  
                   "type":"custom",
                   "tokenizer":"keyword",
                   "filter":[  
                      "phone_number"
                   ]
                }
             },
             "filter":{  
                "phone_number":{  
                   "type":"pattern_capture",
                   "preserve_original":true,
                   "patterns":[  
                      "1(\\d{3}(\\d+))"
                   ]
                }
             }
          }
       }
    }
    

    You can make use of the Analyze API in order to see what tokens are generated as mentioned below:

    POST <your_index_name>/_analyze
    {
      "analyzer": "my_phone_analyzer",
      "text": "19195557321"
    }
    

    Tokens:

    {
      "tokens" : [
        {
          "token" : "19195557321",
          "start_offset" : 0,
          "end_offset" : 11,
          "type" : "word",
          "position" : 0
        },
        {
          "token" : "9195557321",
          "start_offset" : 0,
          "end_offset" : 11,
          "type" : "word",
          "position" : 0
        },
        {
          "token" : "5557321",
          "start_offset" : 0,
          "end_offset" : 11,
          "type" : "word",
          "position" : 0
        }
      ]
    }
    

    Hope that helps!