Search code examples
solrsolrcloud

Does indexing all fields have a negative effect on Solr?


I have a Solr cloud (version 7.4) with 2 nodes and 10 collections. This is one of my managed-schema :

<!-- Solr managed schema - automatically generated - DO NOT EDIT -->
<schema name="example" version="1.6">
<uniqueKey>total_post_id</uniqueKey>
<fieldType name="boolean" class="solr.BoolField" positionIncrementGap="0"/>
<fieldType name="date" class="solr.DatePointField"/>
<fieldType name="double" class="solr.DoublePointField"/>
<fieldType name="float" class="solr.FloatPointField"/>
<fieldType name="int" class="solr.IntPointField"/>
<fieldType name="long" class="solr.LongPointField"/>
<fieldType name="lowerCase_text" class="solr.TextField" sortMissingLast="true">
 <analyzer>
   <tokenizer class="solr.KeywordTokenizerFactory"/>
   <filter class="solr.LowerCaseFilterFactory"/>
 </analyzer>
</fieldType>
<fieldType name="random" class="solr.RandomSortField"/>
<fieldType name="string" class="solr.StrField" sortMissingLast="true"/>
<fieldType name="strings" class="solr.StrField" indexed="true" sortMissingLast="true" docValues="true" multiValued="true"/>
<fieldType name="text_general" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100">
 <analyzer type="index">
   <tokenizer class="solr.WhitespaceTokenizerFactory"/>
   <filter class="solr.WordDelimiterFilterFactory" catenateNumbers="1" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="1"/>
   <filter class="solr.LowerCaseFilterFactory"/>
 </analyzer>
 <analyzer type="query">
   <tokenizer class="solr.WhitespaceTokenizerFactory"/>
   <filter class="solr.WordDelimiterFilterFactory" catenateNumbers="0" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="0"/>
   <filter class="solr.LowerCaseFilterFactory"/>
 </analyzer>
</fieldType>
<field name="ExtractedHashtag" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
<field name="ExtractedIP" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
<field name="RobotSeeDate_dt" type="date" omitNorms="true" docValues="true" indexed="true" stored="true"/>
<field name="_version_" type="long" docValues="true" indexed="true" stored="true"/>
<field name="address" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
<field name="category" type="string" omitNorms="true" docValues="true" indexed="true" stored="true"/>
<field name="channel_id" type="long" omitNorms="true" docValues="true" indexed="true" stored="true"/>
<field name="channel_id_real" type="long" docValues="true" indexed="true" stored="true"/>
<field name="channel_username" type="lowerCase_text" indexed="true" stored="true"/>
<field name="create_date" type="date" omitNorms="true" docValues="true" indexed="true" stored="true"/>
<field name="edit_date" type="date" docValues="true" indexed="true" stored="true"/>
<field name="fax" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
<field name="format" type="string" omitNorms="true" docValues="true" indexed="true" stored="true"/>
<field name="forwarded_channel" type="long" omitNorms="true" docValues="true" indexed="true" stored="true"/>
<field name="forwarded_channel_username" type="lowerCase_text" indexed="true" stored="true"/>
<field name="forwarded_post_id" type="long" docValues="true" multiValued="false" indexed="true" stored="true"/>
<field name="forwarded_user" type="long" omitNorms="true" docValues="true" indexed="true" stored="true"/>
<field name="grouped_id" type="string" docValues="true" indexed="true" stored="true"/>
<field name="id" type="long" docValues="true" multiValued="false" indexed="true" stored="true"/>
<field name="is_silent" type="boolean" docValues="true" indexed="true" stored="true"/>
<field name="keywords" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
<field name="langD_s" type="string" docValues="true" indexed="true" stored="true"/>
<field name="location" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
<field name="media_id" type="long" omitNorms="true" docValues="true" indexed="true" stored="true"/>
<field name="media_unread" type="boolean" docValues="true" indexed="true" stored="true"/>
<field name="mentioned" type="boolean" docValues="true" indexed="true" stored="true"/>
<field name="message_number" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
<field name="mime_type" type="string" docValues="true" indexed="true" stored="true"/>
<field name="norm_ExtractedEmail" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
<field name="norm_ExtractedMention" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
<field name="norm_ExtractedMobile" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
<field name="norm_ExtractedNationalCode" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
<field name="norm_ExtractedPhone" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
<field name="norm_ExtractedPostalCode" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
<field name="norm_ExtractedURL" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
<field name="norm_channel_title" type="text_general" omitNorms="true" indexed="true" stored="true"/>
<field name="norm_media_caption" type="text_general" indexed="true" stored="true"/>
<field name="norm_post_author" type="string" docValues="true" indexed="true" stored="true"/>
<field name="organization" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
<field name="other" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
<field name="out" type="boolean" docValues="true" indexed="true" stored="true"/>
<field name="person" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
<field name="position" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
<field name="post" type="boolean" docValues="true" indexed="true" stored="true"/>
<field name="post_id" type="int" docValues="true" multiValued="false" indexed="true" stored="true"/>
<field name="processed_text" type="text_general" indexed="false" stored="true"/>
<field name="producer_name" type="string" docValues="true" indexed="true" stored="true"/>
<field name="product" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
<field name="province" type="string" docValues="true" indexed="true" stored="true"/>
<field name="raw_channel_title" type="string" omitNorms="true" docValues="false" indexed="false" stored="true"/>
<field name="raw_media_caption" type="string" indexed="false" stored="true"/>
<field name="raw_post_author" type="string" indexed="false" stored="true"/>
<field name="reply_markup" type="string" docValues="true" indexed="true" stored="true"/>
<field name="signatureField" type="string" docValues="true" multiValued="false" indexed="true" stored="true"/>
<field name="text" type="text_general" omitNorms="true" indexed="true" stored="true"/>
<field name="time" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
<field name="total_post_id" type="string" docValues="true" multiValued="false" indexed="true" required="true" stored="true"/>
<field name="view_num" type="int" omitNorms="true" docValues="true" indexed="true" stored="true"/>
<dynamicField name="random_*" type="random" indexed="true" stored="true"/>
<dynamicField name="*_raw" type="string" docValues="false" indexed="false" stored="true"/>
<dynamicField name="*_ss" type="strings" docValues="true" indexed="true" stored="true"/>
</schema>

my collections has 1 billion documents . Can I Index‍ and Docvalues All Fields?. Does this have a negative impact in read or write query?


Solution

  • Can I Index‍ and Docvalues All Fields?

    Yes, sure you can.

    Does this have a negative impact in read or write query?

    Every field you index or enable docvalues has a cost. By enabling index/docValues, index will be bigger, and indexing (write) slower. On query side (read) you will typically be faster (you cannot search in a field you are not indexing, but docValues is not necessary for many operations and can improve query perf on them).

    As always you need to find a balance. If your collections are big (1 billion in big) you typically enable index/docValues only in fields you absolutely need. If all works fine, maybe you enable in a handful of fields you want next, and test, iterativelly