Search code examples
sql-serversql-server-2016open-json

A concatenation OPENJSON in a SQL Server 2016 stored procedure


I need to combine all of the authors for a particular UID. The basic fields are working from the code at another post.

DECLARE @json NVARCHAR(MAX)

SET @json = '{
        "header": {
            "type": "esummary",
            "version": "0.3"
        },
        "result": {
            "uids": [
                "17784783",
                "19505939",
                "30166592"
            ],
            "17784783": {
                "uid": "17784783",
                "pubdate": "2007 Aug",
                "epubdate": "2007 Jul 20",
                "source": "PLoS Comput Biol",          
                "sortpubdate": "2007/08/01 00:00",
                   "authors": [
                {
                    "name": "Yu Y",
                    "authtype": "Author",
                    "clusterid": ""
                },
                {
                    "name": "Wang G",
                    "authtype": "Author",
                    "clusterid": ""
                },
                {
                    "name": "Simha R",
                    "authtype": "Author",
                    "clusterid": ""
                }
                 ]          
            ,
            "19505939": {
                "uid": "19505939",
                "pubdate": "2009 Aug 1",
                "epubdate": "2009 Jun 8",
                "source": "Bioinformatics",          
                "sortpubdate": "2009/08/01 00:00"
                  },
                "authors": [
                {
                    "name": "Zang C",
                    "authtype": "Author",
                    "clusterid": ""
                }],

        "30166592": {
            "uid": "30166592",
            "pubdate": "2019 Jan",
            "epubdate": "2018 Aug 30",
            "source": "Oncogene",
             "sortpubdate": "2019/01/01 00:00",
            "authors": [
            {
                "name": "Sun J",
                "authtype": "Author",
                "clusterid": ""
            },
            {
                "name": "Cai X",
                "authtype": "Author",
                "clusterid": ""
            }],

        }
    }
}'

and I would like to end up with

uid         sortpubdate         epubdate         Authors
-----------------------------------------------------------------------
17784783    2007/08/01 00:00    2007 Jul 20      Yu Y,Wang G,Simha R
19505939    2009/08/01 00:00    2009 Jun 8       Simha R   
30166592    2019/01/01 00:00    2018 Aug 30      Sun J, Cai, X  

I got a great answer from Zohar Peled at OPENJSON syntax in a stored procedure in SQL Server 2016 that help with the first part now I need to see if I can finish this up.

SELECT [uid], [sortpubdate], [epubdate]
FROM OPENJSON(@json, N'$.result') AS items
CROSS APPLY
-- parse each object in the array
OPENJSON(items.[value])
WITH(
    [uid] nvarchar(max) N'$.uid' ,
    [sortpubdate] nvarchar(max) N'$.sortpubdate',
    [epubdate] nvarchar(max) N'$.epubdate'
) As content
WHERE [key] <> 'uids' -- Get only the relevant content

Solution

  • The json you've posted is a bit messy, so I've edited it.

    This next query should get you the results including the author names using a common table expression and a combination of stuff and for xml.

    Fixed json:

    DECLARE @json NVARCHAR(MAX) = 
    '{
      "header": {
        "type": "esummary",
        "version": "0.3"
      },
      "result": {
        "17784783": {
          "uid": "17784783",
          "pubdate": "2007 Aug",
          "epubdate": "2007 Jul 20",
          "source": "PLoS Comput Biol",
          "sortpubdate": "2007/08/01 00:00",
          "authors": [
            {
              "name": "Yu Y",
              "authtype": "Author",
              "clusterid": ""
            },
            {
              "name": "Wang G",
              "authtype": "Author",
              "clusterid": ""
            },
            {
              "name": "Simha R",
              "authtype": "Author",
              "clusterid": ""
            }
          ]
        },
        "19505939": {
          "uid": "19505939",
          "pubdate": "2009 Aug 1",
          "epubdate": "2009 Jun 8",
          "source": "Bioinformatics",
          "sortpubdate": "2009/08/01 00:00",
          "authors": [
            {
              "name": "Zang C",
              "authtype": "Author",
              "clusterid": ""
            }
          ]
        },
        "30166592": {
          "uid": "30166592",
          "pubdate": "2019 Jan",
          "epubdate": "2018 Aug 30",
          "source": "Oncogene",
          "sortpubdate": "2019/01/01 00:00",
          "authors": [
            {
              "name": "Sun J",
              "authtype": "Author",
              "clusterid": ""
            },
            {
              "name": "Cai X",
              "authtype": "Author",
              "clusterid": ""
            }
          ]
        },
        "uids": [
          "17784783",
          "19505939",
          "30166592"
        ]
      }
    }'; 
    

    Use a common table expression to extract the values from the json:

    WITH CTE AS
    (
        SELECT  [uid], 
                [sortpubdate], 
                [epubdate], 
                [name]
        FROM OPENJSON(@json, N'$.result') AS items
        CROSS APPLY
        -- parse each object in the array
        OPENJSON(items.[value])
        WITH(
            [uid] nvarchar(max) N'$.uid' ,
            [sortpubdate] nvarchar(max) N'$.sortpubdate',
            [epubdate] nvarchar(max) N'$.epubdate',
            -- Note the AS JSON on the next row - will not work without it!
            [authors] nvarchar(max) N'$.authors'  AS JSON 
        ) As content
        CROSS APPLY 
        OPENJSON([authors])
        WITH ([name] nvarchar(max) N'$.name') 
        As authorsNames
        WHERE items.[key] <> 'uids' -- Get only the relevant content
    )
    

    And query the cte with stuff and for xml:

    SELECT  DISTINCT [uid], 
            [sortpubdate], 
            [epubdate], 
            STUFF((
                SELECT ',' + [name] 
                FROM CTE As t1
                WHERE t1.[uid] = t0.[uid]
                FOR XML PATH('')
            ), 1, 1, '') As authors
    FROM CTE As t0
    

    Results:

    uid         sortpubdate         epubdate        authors
    17784783    2007/08/01 00:00    2007 Jul 20     Yu Y,Wang G,Simha R
    19505939    2009/08/01 00:00    2009 Jun 8      Zang C
    30166592    2019/01/01 00:00    2018 Aug 30     Sun J,Cai X