Search code examples
solrdih

Solr: DIH for mysql query with multiValued field?


I am trying to setup a multivalued field in Solr, but it is failing in my case!!

DB Query result (sample)

|id  | another_id    | name          | phone       | type        |
|----------------------------------------------------------------|
|'1' | '11'          | 'F. Brown'    | '112233440' | 'employee'  |
|'2' | '22'          | 'Jhon Smith'  | '123123123' | 'guest'     |
|'2' | '22'          | 'Jhon Smith'  | '321321321' | 'guest'     |

Solr-data-config.xml

<?xml version="1.0" encoding="UTF-8"?>
<dataConfig>
  <dataSource   type="JdbcDataSource"
                driver="com.mysql.jdbc.Driver"
                url="jdbc:mysql://localhost:3306/servme_prd"
                user="root"
                password="root" />
  <document>
    <entity name="person_cards" query="SELECT table1.id, table2.id AS another_id, table1.name, table2.phone, table1.type 
        FROM table1
        INNER JOIN table2 ON table1.id = table2.fk_id">
        <field column="id" name="uid" />
        <field column="another_id" name="pid" />
        <field column="name" name="name" />
        <field column="phone" name="phone" />
        <field column="type" name="type"/>
    </entity>
</document>
</dataConfig>

managed-schema.xml

<uniqueKey>uid</uniqueKey>
<field name="_version_" type="plong" indexed="false" stored="false"/>
<field name="uid" type="string" docValues="false" multiValued="false" indexed="true" required="true" stored="true"/>
<field name="pid" type="string" docValues="false" multiValued="false" indexed="true" required="true" stored="true"/>
<field name="name" type="string" indexed="true" stored="true"/>
<field name="phone" type="string" docValues="false" multiValued="true" indexed="true" stored="true"/>
<field name="type" type="string" indexed="true" stored="true"/>

Whenever, I am doing a full-import I am not getting the phone as multivalued field; Sample solr query response:

{
    "name":"F. Brown",
    "uid":"1",
    "pid":"11",
    "phone":["112233440"],
    "type":"employee" 
    "_version_":1608065390436417536
},
{
    "name":"Jhon Smith",
    "uid":"2",
    "pid":"22",
    "phone":["123123123"],
    "type":"guest" 
    "_version_":1608065390436417536
},
{
    "name":"Jhon Smith",
    "uid":"2",
    "pid":"22",
    "phone":["321321321"],
    "type":"guest" 
    "_version_":1608065390436417536
}

Where as I want to get the below response from solr query search:

{
    "name":"F. Brown",
    "uid":"1",
    "pid":"11",
    "phone":["112233440"],
    "type":"employee" 
    "_version_":1608065390436417536
},
{
    "name":"Jhon Smith",
    "uid":"2",
    "pid":"22",
    "phone":["123123123", "321321321"],
    "type":"guest" 
    "_version_":1608065390436417536
}

Anything missing from solr config part, so I am not getting the multivalue field working as expected?

Btw I am using Solr 7.4 installed on ubuntu 14 server. Thanks


Solution

  • Since you're using MySQL, a quick fix is to use GROUP_CONCAT and then split the column with DIH's RegexTransformer:

    <entity transformer="RegexTransformer" name="person_cards" query="SELECT 
            table1.id, 
            table2.id AS another_id, 
            table1.name, 
            GROUP_CONCAT(table2.phone) AS phone, table1.type 
        FROM table1
        INNER JOIN table2 ON table1.id = table2.fk_id
        GROUP BY uid
        ">
        ...
        <field column="phone" name="phone" splitBy="," />
        ...
    </entity>