I just implemented apache's solr search
in our e-commerce application. I need to fine tune the search list for better user experience. I need guidance since I am new to solr search.
What I need to do is more relevant result or exact result. Say example if user types ‘pen’, it should list
But its displaying
First 3 is OK but dispensers, the word pen is present in middle of the word (dis pen sers) so no need to list dispensers with the pen lists. How to achieve this?
Update-1:
schema.xml
<types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true" />
<!-- boolean type: "true" or "false" -->
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true" />
<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
<fieldtype name="binary" class="solr.BinaryField" />
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0" />
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0" />
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0" />
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0" />
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0" />
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0" />
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0" />
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0" />
<fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0" />
<!-- A Trie based date field for faster date range queries and date faceting. -->
<fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0" />
<fieldType name="pint" class="solr.IntField" omitNorms="true" />
<fieldType name="plong" class="solr.LongField" omitNorms="true" />
<fieldType name="pfloat" class="solr.FloatField" omitNorms="true" />
<fieldType name="pdouble" class="solr.DoubleField" omitNorms="true" />
<fieldType name="pdate" class="solr.DateField" sortMissingLast="true" omitNorms="true" />
<fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true" />
<fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true" />
<fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true" />
<fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true" />
<fieldType name="random" class="solr.RandomSortField" indexed="true" />
<!-- A text field that only splits on whitespace for exact matching of words -->
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
<!-- Text keyword -->
<fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory" />
</analyzer>
</fieldType>
<!-- Text path -->
<fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="\" replace="/" />
</analyzer>
</fieldType>
<!--FOR AUTO SUGGESTION FIELD -->
<fieldType name="edgytext" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.NGramTokenizerFactory" minGramSize="1" maxGramSize="50"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false" />
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory" />
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false" />
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
<!-- Text Shingle -->
<fieldType name="text_shingle" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory" />
<filter class="solr.ShingleFilterFactory" maxShingleSize="7" outputUnigrams="true"/>
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
</types>
<!-- Fields definition -->
<fields>
<field name="unique_id" type="string" indexed="true" required="true" />
<field name="products_id" type="string" indexed="true" stored="true" required="true" />
<field name="sku" type="text_keyword" indexed="true" stored="true" omitNorms="true" />
<field name="category_id" type="int" indexed="true" stored="true" multiValued="true" />
<field name="store_id" type="int" indexed="true" stored="true" />
<field name="website_id" type="int" indexed="true" stored="true" />
<field name="product_status" type="int" indexed="true" stored="true" />
<field name="category_path" type="text_path" indexed="true" stored="true" multiValued="true" omitNorms="true" />
<field name="textSpell" type="text_ws" stored="true" indexed="true" multiValued="true" omitNorms="true" />
<field name="textSearch" type="edgytext" indexed="true" stored="true" multiValued="true" omitNorms="true" omitTermFreqAndPositions="true" />
<field name="textSearchText" type="text_ws" indexed="true" stored="true" multiValued="true" omitNorms="true" omitTermFreqAndPositions="true" />
<field name="_version_" type="long" indexed="true" stored="true" />
<!--Dynamic fields -->
<dynamicField name="*_int" type="int" indexed="true" stored="true" />
<dynamicField name="*_varchar" type="text_keyword" indexed="true" stored="true" omitNorms="true" omitTermFreqAndPositions="true" />
<dynamicField name="*_text" type="edgytext" indexed="true" stored="true" multiValued="true" omitNorms="true" omitTermFreqAndPositions="true" />
<dynamicField name="*_decimal" type="float" indexed="true" stored="true" />
<dynamicField name="*_datetime" type="date" indexed="true" stored="true" />
<dynamicField name="*_static" type="string" indexed="true" stored="true" />
<dynamicField name="*_boost" type="edgytext" indexed="true" stored="true" multiValued="true" />
<dynamicField name="*_boost_exact" type="text_ws" indexed="true" stored="true" multiValued="true" />
<dynamicField name="*_facet" type="text_keyword" indexed="true" stored="true" multiValued="true" omitNorms="true" />
</fields>
Update-2:
Your textSearch field of type edgytext is based on the letter-level ngrams, i.e.
In: "bicycle"
Out: "bicy", "bicyc", "icyc", "icycl", "cycl", "cycle", "ycle"
example taken from: https://cwiki.apache.org/confluence/display/solr/Tokenizers#Tokenizers-N-GramTokenizer
This will allow for inter-token search results, that you see. If you would like to have token-level n-grams define you search field based on text_shingle. Btw, be careful with the maxShingleSize. Value 7 seems a bit too high, it will significantly contribute to the index size.