Search code examples
db2db2-luw

DB2 Avoid Duplicates in JSON_ARRAY


I am using DB2LUW 11.5. I build a json and gets an output like this

{
        "ID": 1,
        "NAME": "a",
        "B_OBJECTS": [{
                "ID": 1,
                "SIZE": 10
            }, {
                "ID": 1,
                "SIZE": 20
            }
        ]
    }

But I want the id from B_OBJECTS listed only once.

{
        "ID": 1,
        "NAME": "a",
        "B_OBJECTS": [{
                "ID": 1,
                "SIZE": 10
            }
        ]
    }

Here is my query...

WITH TABLE_A(ID,NAME) AS (
        VALUES (1, 'a')),
TABLE_B(ID, ID_A, SIZE) AS (
        VALUES (1, 1, 10), (1, 1, 20)),
JSON_STEP_1 AS (
        SELECT A.ID AS A_ID, A.NAME AS A_NAME, B.ID AS B_ID,
        JSON_OBJECT('ID' VALUE B.ID, 'SIZE' VALUE B.SIZE) B_JSON
        FROM TABLE_A A
        JOIN TABLE_B B ON B.ID_A = A.ID       
        GROUP BY A.ID, A.NAME, B.ID, B.SIZE),
JSON_STEP_2 AS (
 SELECT JSON_OBJECT ('ID' VALUE A_ID,
                     'NAME' VALUE A_NAME,
                     'B_OBJECTS' VALUE JSON_ARRAY (LISTAGG(B_JSON, ', ') WITHIN GROUP (ORDER BY B_ID) FORMAT JSON) FORMAT JSON
                    ) JSON_OBJS
 FROM JSON_STEP_1
 GROUP BY A_ID, A_NAME
) 
SELECT JSON_ARRAY (SELECT JSON_OBJS FROM JSON_STEP_2 FORMAT JSON) FROM SYSIBM.SYSDUMMY1;

I just updated the query with additional table TABLE_C

WITH 
  TABLE_A(ID,NAME) AS 
(
        VALUES (1, 'a')
)
, TABLE_B(ID, ID_A, SIZE) AS 
(
        VALUES (1, 1, 10), (1, 1, 20), (2, 1, 10), (2, 1, 20)
), TABLE_C(ID, ID_A, SIZE) AS
(
        VALUES (1, 1, 5), (2,1,10), (3,1,15)
)
, JSON_STEP_1 AS 
(
  SELECT A_ID, A_NAME, B_ID
  , JSON_OBJECT('ID' VALUE B_ID, 'SIZE' VALUE B_SIZE) B_JSON
  , JSON_OBJECT('ID' VALUE C_ID, 'SIZE' VALUE C_SIZE) C_JSON
  FROM
  (
        SELECT 
          A.ID AS A_ID, A.NAME AS A_NAME, B.ID AS B_ID, B.SIZE AS B_SIZE, C.ID AS C_ID, C.SIZE AS C_SIZE
        , ROW_NUMBER () OVER (PARTITION BY B.ID, B.ID_A, B.SIZE) AS RN_
        , ROW_NUMBER () OVER (PARTITION BY C.ID, C.ID_A, C.SIZE) AS RN1_
        
        FROM TABLE_A A
        JOIN TABLE_B B ON B.ID_A = A.ID      
        JOIN TABLE_C C ON C.ID_A = A.ID     
   )     
   WHERE RN_ = 1 AND RN1_ = 1
   GROUP BY A_ID, A_NAME, B_ID, B_SIZE, B_ID, B_SIZE, C_ID, C_SIZE
)
, JSON_STEP_2 AS 
(
 SELECT 
 JSON_OBJECT 
  (
    'ID' VALUE A_ID,
    'NAME' VALUE A_NAME,
    'B_OBJECTS' VALUE JSON_ARRAY (LISTAGG(B_JSON, ', ') WITHIN GROUP (ORDER BY B_ID) FORMAT JSON) FORMAT JSON,
    'C_OBJECTS' VALUE JSON_ARRAY (LISTAGG(C_JSON, ', ') WITHIN GROUP (ORDER BY B_ID) FORMAT JSON) FORMAT JSON
  ) JSON_OBJS
 FROM JSON_STEP_1
 GROUP BY A_ID, A_NAME
) 
SELECT JSON_ARRAY (SELECT JSON_OBJS FROM JSON_STEP_2 FORMAT JSON) FROM SYSIBM.SYSDUMMY1

The output should be like

{
        "ID": 1,
        "NAME": "a",
        "B_OBJECTS": [{
                "ID": 1,
                "SIZE": 10
            },
            {
                "ID": 1,
                "SIZE": 20
            },
            {
                "ID": 2,
                "SIZE": 10
            },
            {
                "ID": 2,
                "SIZE": 20
            }
        ],
        "C_OBJECTS": [{
                "ID": 1,
                "SIZE": 5
            },
            {
                "ID": 2,
                "SIZE": 10
            },
            {
                "ID": 3,
                "SIZE": 15
            }
        ]
    }

Solution

  • WITH 
      TABLE_A (ID,NAME) AS 
    (
      VALUES (1, 'a')
    )
    , TABLE_B (ID, ID_A, SIZE) AS 
    (
      VALUES (1, 1, 10), (1, 1, 10), (2, 1, 10), (2, 1, 20)
    )
    , TABLE_C (ID, ID_A, SIZE) AS
    (
      VALUES (1, 1, 5), (2, 1, 10), (3, 1, 15)
    )
    SELECT 
    JSON_OBJECT 
    (
        'ID' VALUE A.ID
      , 'NAME' VALUE A.NAME
      , 'B_OBJECTS' VALUE 
        JSON_ARRAY 
        (
          (
            SELECT JSON_OBJECT ('ID' VALUE ID, 'SIZE' VALUE SIZE FORMAT JSON)
            FROM TABLE (SELECT DISTINCT ID, SIZE FROM TABLE_B B WHERE B.ID_A = A.ID ORDER BY ID)
          ) 
          FORMAT JSON
        ) FORMAT JSON
      , 'C_OBJECTS' VALUE 
        JSON_ARRAY 
        (
          (
            SELECT JSON_OBJECT ('ID' VALUE ID, 'SIZE' VALUE SIZE FORMAT JSON)
            FROM TABLE (SELECT DISTINCT ID, SIZE FROM TABLE_C C WHERE C.ID_A = A.ID ORDER BY ID)
          ) 
          FORMAT JSON
        ) FORMAT JSON
    ) JSON_OBJ
    FROM TABLE_A A
    
    {
     "ID":1,"NAME":"a"
    ,"B_OBJECTS":
      [
        {"ID":1,"SIZE":10},
        {"ID":2,"SIZE":10},
        {"ID":2,"SIZE":20}
      ]
    ,"C_OBJECTS":
      [
        {"ID":1,"SIZE":5},
        {"ID":2,"SIZE":10},
        {"ID":3,"SIZE":15}
      ]
    }