Search code examples
solrlucene

Solr Nested Documents


I am trying to figure out how to index nested documents in Solr 8. I have found examples that talk about doing this but none provide a schema.xml.

schema.xml

<schema name="example-data-driven-schema" version="1.6">
  
  <fields>
        <field name="_version_" type="long" indexed="true" stored="true" required="true"/> 
        <field name="_root_" type="string" indexed="true" stored="false" docValues="false" />
        <field name="id" type="string" indexed="true" stored="true" required="true"/>
        <field name="title" type="text_general" indexed="true" stored="true"/>
        <field name="author" type="text_general" indexed="true" stored="true"/>
        <field name="comment" type="text_general" indexed="true" stored="true"/>
        <field name="commenter" type="text_general" indexed="true" stored="true"/>
        <field name="contributor_name" type="text_general" indexed="true" stored="true"/>
        <field name="contributor_role" type="text_general" indexed="true" stored="true"/>
        <field name="_nest_path_" type="_nest_path_" />
        <field name="_nest_parent_" type="string" indexed="true" stored="true" />
    
    <dynamicField name="*" type="ignored"/>

  </fields>  
  
  <uniqueKey>id</uniqueKey>
 
  
  <fieldType name="ignored" class="solr.StrField" indexed="false" stored="false" multiValued="true"/>
  <fieldType name="_nest_path_" class="solr.NestPathField" />
  <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
  <fieldType name="long" class="solr.TrieLongField" positionIncrementGap="0" docValues="true" precisionStep="0"/>
  <fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true"/>
  <fieldType name="tdates" class="solr.TrieDateField" positionIncrementGap="0" docValues="true" multiValued="true" precisionStep="6"/>
  <fieldType name="tdoubles" class="solr.TrieDoubleField" positionIncrementGap="0" docValues="true" multiValued="true" precisionStep="8"/>
  <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"/>
  <fieldType name="tlongs" class="solr.TrieLongField" positionIncrementGap="0" docValues="true" multiValued="true" precisionStep="8"/>

</schema>

I am adding documents with this command:

curl -X POST -k -H 'Content-Type: application/json' -i 'http://localhost:8983/solr/nested_documents_example/update?commit=true' --data '[
  {
    "id": "post101",
    "title": "How to Optimize Solr Queries",
    "author": "Mike Johnson",
    "_nest_parent_": "post101",
    "_childDocuments_": [
      {
        "id": "comment101",
        "comment": "This article helped me a lot!",
        "commenter": "Sophie"
      },
      {
        "id": "contributor101",
        "contributor_name": "Karen",
        "contributor_role": "Reviewer"
      }
    ]
  },
  {
    "id": "post102",
    "title": "Advanced Solr Schema Design",
    "author": "Sarah Brown",
    "_nest_parent_": "post102",
    "_childDocuments_": [
      {
        "id": "comment102",
        "comment": "Great schema design tips!",
        "commenter": "James"
      }
    ]
  }
]'

This is what I get when I query Solr (http://localhost:8983/solr/nested_documents_example/select?q=*:*):

{

      "responseHeader": {
            "zkConnected": true,
            "status": 0,
            "QTime": 0,
            "params": {
                  "q": "*:*",
                  "indent": "true",
                  "q.op": "OR"
            }
      },
      "response": {
            "numFound": 5,
            "start": 0,
            "numFoundExact": true,
            "docs": [
                  {
                        "id": "comment101",
                        "comment": "This article helped me a lot!",
                        "commenter": "Sophie",
                        "_version_": 1820876636938043400
                  },
                  {
                        "id": "contributor101",
                        "contributor_name": "Karen",
                        "contributor_role": "Reviewer",
                        "_version_": 1820876636938043400
                  },
                  {
                        "id": "post101",
                        "title": "How to Optimize Solr Queries",
                        "author": "Mike Johnson",
                        "_nest_parent_": "post101",
                        "_version_": 1820876636938043400
                  },
                  {
                        "id": "comment102",
                        "comment": "Great schema design tips!",
                        "commenter": "James",
                        "_version_": 1820876636939092000
                  },
                  {
                        "id": "post102",
                        "title": "Advanced Solr Schema Design",
                        "author": "Sarah Brown",
                        "_nest_parent_": "post102",
                        "_version_": 1820876636939092000
                  }
            ]
      }

}

Instead of nested results, everything is at the top level (flattened). Can anyone shed light on what I'm doing wrong?


Solution

  • Here's what I came up with, it seems to work:

    schema.xml

    <schema name="example-data-driven-schema" version="1.6">
      <fields>
            <field name="_version_" type="long" indexed="true" stored="true" required="true"/> 
            <field name="_root_" type="string" indexed="true" />
            <field name="id" type="string" indexed="true" stored="true" required="true"/>
            <field name="title" type="text_general" indexed="true" stored="true"/>
            <field name="author" type="text_general" indexed="true" stored="true"/>
            <field name="comment" type="text_general" indexed="true" stored="true"/>
            <field name="commenter" type="text_general" indexed="true" stored="true"/>
            <field name="contributor_name" type="text_general" indexed="true" stored="true"/>
            <field name="contributor_role" type="text_general" indexed="true" stored="true"/>
            <field name="_nest_path_" type="_nest_path_" />
            <field name="_nest_parent_" type="string"/>
            <dynamicField name="*" type="ignored"/>
      </fields>  
      <uniqueKey>id</uniqueKey>
      <fieldType name="ignored" class="solr.StrField" indexed="false" stored="false" multiValued="true"/>
      <fieldType name="_nest_path_" class="solr.NestPathField" />
      <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
      <fieldType name="long" class="solr.TrieLongField" positionIncrementGap="0" docValues="true" precisionStep="0"/>
      <fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true"/>
      <fieldType name="tdates" class="solr.TrieDateField" positionIncrementGap="0" docValues="true" multiValued="true" precisionStep="6"/>
      <fieldType name="tdoubles" class="solr.TrieDoubleField" positionIncrementGap="0" docValues="true" multiValued="true" precisionStep="8"/>
      <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"/>
      <fieldType name="tlongs" class="solr.TrieLongField" positionIncrementGap="0" docValues="true" multiValued="true" precisionStep="8"/>
    </schema>
    

    Insert these documents:

    [
      {
        "id": "post101",
        "title": "How to Optimize Solr Queries",
        "author": "Mike Johnson",
        "comments": [
          {
            "id": "comment101",
            "comment": "This article helped me a lot!",
            "commenter": "Sophie"
          }],
          "contributors": [{
            "id": "contributor101",
            "contributor_name": "Karen",
            "contributor_role": "Reviewer"
          }
        ]
      },
      {
        "id": "post102",
        "title": "Advanced Solr Schema Design",
        "author": "Sarah Brown",
        "comments": [
          {
            "id": "comment102",
            "comment": "Great schema design tips!",
            "commenter": "James"
          }
        ]
      }
    ]
    

    The select that I did is correct in returning everything "flattened" - the problem was that I needed to add fl=*,[child] to the request. After doing that, my results are:

    .
    .
    .
    docs": [
    
              {
                    "id": "comment101",
                    "comment": "This article helped me a lot!",
                    "commenter": "Sophie",
                    "_nest_parent_": "post101",
                    "_root_": "post101",
                    "_version_": 1820881500429615000
              },
              {
                    "id": "contributor101",
                    "contributor_name": "Karen",
                    "contributor_role": "Reviewer",
                    "_nest_parent_": "post101",
                    "_root_": "post101",
                    "_version_": 1820881500429615000
              },
              {
                    "id": "post101",
                    "title": "How to Optimize Solr Queries",
                    "author": "Mike Johnson",
                    "_version_": 1820881500429615000,
                    "_root_": "post101",
                    "comments": [
                          {
                                "id": "comment101",
                                "comment": "This article helped me a lot!",
                                "commenter": "Sophie",
                                "_nest_parent_": "post101",
                                "_root_": "post101",
                                "_version_": 1820881500429615000
                          }
                    ],
                    "contributors": [
                          {
                                "id": "contributor101",
                                "contributor_name": "Karen",
                                "contributor_role": "Reviewer",
                                "_nest_parent_": "post101",
                                "_root_": "post101",
                                "_version_": 1820881500429615000
                          }
                    ]
              },
              {
                    "id": "comment102",
                    "comment": "Great schema design tips!",
                    "commenter": "James",
                    "_nest_parent_": "post102",
                    "_root_": "post102",
                    "_version_": 1820881500430663700
              },
              {
                    "id": "post102",
                    "title": "Advanced Solr Schema Design",
                    "author": "Sarah Brown",
                    "_version_": 1820881500430663700,
                    "_root_": "post102",
                    "comments": [
                          {
                                "id": "comment102",
                                "comment": "Great schema design tips!",
                                "commenter": "James",
                                "_nest_parent_": "post102",
                                "_root_": "post102",
                                "_version_": 1820881500430663700
                          }
                    ]
              }
    
        ]