Search code examples
jsonxpathtalend

Uniting multiple JSON entries in Talend


I am trying to use tExtractJSONFields. In the JSON text there are multiple Authors whose FirstName and LastName are under different tags. I would like to unite both and delimit every author's name that can then be seen in tLogRow.

I ticked the fields of Array in the "columns" section of tExtractJSONFields component:

Job configuration

I am getting the output from tLogRow as [Han, Kamber], [Jiawei, Micheline]:

Output

I want the output as Han, Jiawei; Kamber, Micheline.

   {  
    "d":{  
        "__type":"Response:http:\/\/research.microsoft.com",
        "Author":null,
        "Conference":null,
        "Domain":null,
        "Journal":null,
        "Keyword":null,
        "Organization":null,
        "Publication":{  
            "__type":"PublicationResponse:http:\/\/research.microsoft.com",
            "EndIdx":1,
            "StartIdx":1,
            "TotalItem":112686,
            "Result":[  
                {  
                    "__type":"Publication:http:\/\/research.microsoft.com",
                    "Abstract":null,
                    "Author":[  
                        {  
                            "__type":"Author:http:\/\/research.microsoft.com",
                            "Affiliation":null,
                            "CitationCount":0,
                            "DisplayPhotoURL":null,
                            "FirstName":"Jiawei",
                            "GIndex":0,
                            "HIndex":0,
                            "HomepageURL":null,
                            "ID":594572,
                            "LastName":"Han",
                            "MiddleName":"",
                            "NativeName":null,
                            "PublicationCount":0,
                            "ResearchInterestDomain":null
                        },
                        {  
                            "__type":"Author:http:\/\/research.microsoft.com",
                            "Affiliation":null,
                            "CitationCount":0,
                            "DisplayPhotoURL":null,
                            "FirstName":"Micheline",
                            "GIndex":0,
                            "HIndex":0,
                            "HomepageURL":null,
                            "ID":2331044,
                            "LastName":"Kamber",
                            "MiddleName":"",
                            "NativeName":null,
                            "PublicationCount":0,
                            "ResearchInterestDomain":null
                        }
                    ],
                    "CitationContext":null,
                    "CitationCount":5979,
                    "Conference":null,
                    "DOI":"",
                    "FullVersionURL":null,
                    "ID":694978,
                    "Journal":null,
                    "Keyword":[  
                        {  
                            "__type":"Keyword:http:\/\/research.microsoft.com",
                            "CitationCount":0,
                            "ID":9033,
                            "Name":null,
                            "PublicationCount":0
                        },
                        {  
                            "__type":"Keyword:http:\/\/research.microsoft.com",
                            "CitationCount":0,
                            "ID":9972,
                            "Name":null,
                            "PublicationCount":0
                        },
                        {  
                            "__type":"Keyword:http:\/\/research.microsoft.com",
                            "CitationCount":0,
                            "ID":22078,
                            "Name":null,
                            "PublicationCount":0
                        },
                        {  
                            "__type":"Keyword:http:\/\/research.microsoft.com",
                            "CitationCount":0,
                            "ID":35009,
                            "Name":null,
                            "PublicationCount":0
                        },
                        {  
                            "__type":"Keyword:http:\/\/research.microsoft.com",
                            "CitationCount":0,
                            "ID":36239,
                            "Name":null,
                            "PublicationCount":0
                        },
                        {  
                            "__type":"Keyword:http:\/\/research.microsoft.com",
                            "CitationCount":0,
                            "ID":38375,
                            "Name":null,
                            "PublicationCount":0
                        },
                        {  
                            "__type":"Keyword:http:\/\/research.microsoft.com",
                            "CitationCount":0,
                            "ID":40483,
                            "Name":null,
                            "PublicationCount":0
                        },
                        {  
                            "__type":"Keyword:http:\/\/research.microsoft.com",
                            "CitationCount":0,
                            "ID":41259,
                            "Name":null,
                            "PublicationCount":0
                        },
                        {  
                            "__type":"Keyword:http:\/\/research.microsoft.com",
                            "CitationCount":0,
                            "ID":73998,
                            "Name":null,
                            "PublicationCount":0
                        }
                    ],
                    "ReferenceCount":160,
                    "Title":"Data Mining: Concepts and Techniques",
                    "Type":1,
                    "Year":2000
                }
            ]
        },
        "ResultCode":0,
        "Trend":null,
        "Version":"1.1"
    }
}

Solution

  • I could use tJavaRow. Separating the inputs because they are arrays and then uniting the relevant indexes. It is highly complicated but I didn't need it. The information I was looking for already existed in the source and I just had to reroute it instead.