Search code examples
jsonazurestreamazure-stream-analytics

How to flatten nested json data in Azure stream analytics


I have a problem writing a query to extract a table out of the arrays from a JSON file. I want to flatten the three arrays i.e. case_Time, details & others, and make them all in a normal SQL table.

Sample JSON data:

{
    "case_Time": [
        {
            "v1": "1",
            "v2": "0",
            "v3": "0",
            "date": "30 January ",
            "dateymd": "2020-01-30",
            "v4": "1",
            "v5": "0",
            "v6": "0"
        },
        {
            "v1": "1",
            "v2": "0",
            "v3": "0",
            "date": "31 January ",
            "dateymd": "2020-01-31",
            "v4": "1",
            "v5": "0",
            "v6": "0"
        }],
      "details": [
        {
            "d1": "281844",
            "d2": "10124024",
            "d3": "146791",
            "d4": "0",
            "d5": "0",
            "d6": "0",
            "lastupdatedtime": "24/12/2020 09:12:24",
            "d7": "2746",
            "d8": "9692643",
            "d9": "Total",           
            "notes": "some text"
        },
        {
            "d1": "281944",
            "d2": "1012",
            "d3": "1791",
            "d4": "0",
            "d5": "0",
            "d6": "0",
            "lastupdatedtime": "25/12/2020 09:12:24",
            "d7": "2746",
            "d8": "96643",
            "d9": "Total",           
            "notes": "some text"
        }],
    "others": [
        {
            "p1": "",
            "p2": "75.64",
            "p3": "",
            "p4": "",
            "p5": "",
            "p6": "",
            "date": "13/03/2020",
            "p7": "",
            "p8": "1.20%",
            "p9": "",
            "p10": "83.33",
            "p11": "5",
            "p12": "5900",
            "p13": "78"
                    },
        {
             "p1": "",
            "p2": "75.64",
            "p3": "",
            "p4": "",
            "p5": "",
            "p6": "",
            "date": "14/03/2020",
            "p7": "",
            "p8": "1.20%",
            "p9": "",
            "p10": "81.33",
            "p11": "5",
            "p12": "500",
            "p13": "78"
        }
]
}

I tried the below query but getting first array data only, how to flatten remaining array :

WITH Cases AS
(
   SELECT   
   arrayElement.ArrayIndex,  
   arrayElement.ArrayValue as av  
   FROM input as event  
   CROSS APPLY GetArrayElements(event.case_Time) AS arrayElement 
)
SELECT av.v1, av.v2, av.v3,av.date,av.dateymd, av.v4,av.v5,av.v6
INTO powerbi
FROM Cases

Appreciate any help :)


Solution

  • You can Cross APPLY all your array, try something like this:

    WITH Cases AS
        (
           SELECT   
           arrayElement.ArrayIndex as ai,  
           arrayElement.ArrayValue as av,
           y.ArrayIndex as yi,
           y.ArrayValue as dt,
           z.ArrayIndex as zi,
           z.ArrayValue as ot
           FROM input as event  
           CROSS APPLY GetArrayElements(event.case_Time) AS arrayElement
           CROSS APPLY GetArrayElements(event.details) AS y
           CROSS APPLY GetArrayElements(event.others) AS z
        )
    
    SELECT av.v1, av.v2, av.v3,av.date,av.dateymd,av.v4,av.v5,av.v6,dt.d1,dt.d2,dt.d3,dt.d4,dt.d5,dt.d6,dt.lastupdatedtime,dt.d7,dt.d8,dt.d9,dt.notes,ot.p1,ot.p2,ot.p3,ot.p4,ot.p5,ot.p6,ot.p7,ot.p8,ot.p9,ot.p10,ot.p11,ot.p12,ot.p13,ot.date as tdate
    FROM Cases
    INTO powerbi
    

    This query will result in a complete cross product, so you will get 8 rows. If you only want to get 2 rows(correspond index), you can add Where ai = yi and yi = zi