Search code examples
elasticsearchneststemmingsnowballsnowballanalyzer

Snowball stemmer is not working


I created an index for an attachment using elasticsearch2.3.3 and Nest 2.3.2.My indexing is given below. I am searching for singular words with plurals in the document.Read that snowball stemmer will do this type of conversion. But no records were returned on search.Is there any extra plugin/words collection need to be added to the application to make it work. Can anyone help.

 {  
 "mydocs":{  
   "aliases":{  

  },
  "mappings":{  
     "indexdocument":{  
        "properties":{  
           "docLocation":{  
              "type":"string",
              "index":"not_analyzed",
              "store":true
           },
           "documentType":{  
              "type":"string",
              "store":true
           },
           "file":{  
              "type":"attachment",
              "fields":{  
                 "content":{  
                    "type":"string"
                 },
                 "author":{  
                    "type":"string"
                 },
                 "title":{  
                    "type":"string",
                    "term_vector":"with_positions_offsets",
                    "analyzer":"full"
                 },
                 "name":{  
                    "type":"string"
                 },
                 "date":{  
                    "type":"date",
                    "format":"strict_date_optional_time||epoch_millis"
                 },
                 "keywords":{  
                    "type":"string"
                 },
                 "content_type":{  
                    "type":"string"
                 },
                 "content_length":{  
                    "type":"integer"
                 },
                 "language":{  
                    "type":"string"
                 }
              }
           },
           "id":{  
              "type":"double",
              "store":true
           },
           "lastModifiedDate":{  
              "type":"date",
              "store":true,
              "format":"strict_date_optional_time||epoch_millis"
           },
           "title":{  
              "type":"string",
              "store":true,
              "term_vector":"with_positions_offsets"
           }
        }
     }
  },
  "settings":{  
     "index":{  
        "creation_date":"1466180794989",
        "analysis":{  
           "filter":{  
              "nGram":{  
                 "min_gram":"2",
                 "side":"front",
                 "type":"edge_ngram",
                 "max_gram":"20"
              }
           },
           "analyzer":{  
              "partial":{  
                 "filter":[  
                    "standard",
                    "asciifolding",
                    "lowercase",
                    "snowball"
                 ],
                 "type":"custom",
                 "tokenizer":"nGram"
              },
              "full":{  
                 "filter":[  
                    "standard",
                    "asciifolding",
                    "lowercase",
                    "snowball",
                    "nGram"
                 ],
                 "type":"custom",
                 "tokenizer":"standard"
              }
           }
        },
        "number_of_shards":"5",
        "number_of_replicas":"1",
        "uuid":"tc-yTpbIQGeGDMOOMspf_g",
        "version":{  
           "created":"2030399"
        }
     }
  },
  "warmers":{  

  }
 }
 }

I tried stemmer_override also. But no luck


Solution

  • I found out the issue. Analyser is not set to the attachment field.

     var fullNameFilters = new List<string> { "lowercase", "snowball" };
            client.CreateIndex("mydocs", c => c
                  .Settings(st => st
                            .Analysis(anl => anl
                            .Analyzers(h => h
                                .Custom("full", ff => ff
                                     .Filters(fullNameFilters)
                                     .Tokenizer("standard"))
                                )
                                .TokenFilters(ba => ba
                                    .Snowball("snowball", sn => sn
                                        .Language(SnowballLanguage.English)))                    
                                 ))
                             .Mappings(mp => mp
                             .Map<IndexDocument>(ms => ms
                             .AutoMap()
                             .Properties(ps => ps
                                 .Nested<Attachment>(n => n
                                     .Name(sc => sc.File)
                                 .AutoMap()
                                 ))
                            .Properties(at => at
                            .Attachment(a => a.Name(o => o.File)
                            .FileField(fl=>fl.Analyzer("full"))
                            .TitleField(t => t.Name(x => x.Title)
                            .Analyzer("full")
                            .TermVector(TermVectorOption.WithPositionsOffsets)
                            )))
    
                            ))                        
                            );
    

    and http:localhost:9200/mydocs returns

    {  
     "mydocs":{  
        "aliases":{  
    
      },
      "mappings":{  
         "indexdocument":{  
            "properties":{  
               "docLocation":{  
                  "type":"string",
                  "index":"not_analyzed",
                  "store":true
               },
               "documentType":{  
                  "type":"string",
                  "store":true
               },
               "file":{  
                  "type":"attachment",
                  "fields":{  
                     "content":{  
                        "type":"string",
                        "analyzer":"full"
                     },
                     "author":{  
                        "type":"string"
                     },
                     "title":{  
                        "type":"string",
                        "term_vector":"with_positions_offsets",
                        "analyzer":"full"
                     },
                     "name":{  
                        "type":"string"
                     },
                     "date":{  
                        "type":"date",
                        "format":"strict_date_optional_time||epoch_millis"
                     },
                     "keywords":{  
                        "type":"string"
                     },
                     "content_type":{  
                        "type":"string"
                     },
                     "content_length":{  
                        "type":"integer"
                     },
                     "language":{  
                        "type":"string"
                     }
                  }
               },
               "filePermissionInfo":{  
                  "properties":{  
                     "accessControlType":{  
                        "type":"string",
                        "store":true
                     },
                     "accountValue":{  
                        "type":"string",
                        "store":true
                     },
                     "fileSystemRights":{  
                        "type":"string",
                        "store":true
                     },
                     "isInherited":{  
                        "type":"string",
                        "store":true
                     }
                  }
               },
               "id":{  
                  "type":"double",
                  "store":true
               },
               "lastModifiedDate":{  
                  "type":"date",
                  "store":true,
                  "format":"strict_date_optional_time||epoch_millis"
               },
               "title":{  
                  "type":"string",
                  "store":true,
                  "term_vector":"with_positions_offsets"
               }
            }
         }
      },
      "settings":{  
         "index":{  
            "creation_date":"1466482894271",
            "analysis":{  
               "filter":{  
                  "snowball":{  
                     "type":"snowball",
                     "language":"English"
                  }
               },
               "analyzer":{  
                  "full":{  
                     "filter":[  
                        "lowercase",
                        "snowball"
                     ],
                     "type":"custom",
                     "tokenizer":"standard"
                  }
               }
            },
            "number_of_shards":"5",
            "number_of_replicas":"1",
            "uuid":"PpxcRl29QTCPtFcsd3PHtw",
            "version":{  
               "created":"2030399"
            }
         }
      },
      "warmers":{  
    
      }
      }
     }