Search code examples
azureazure-data-lakeu-sql

Query JSON nested objects using U-SQL


I am trying to get Country and Category from below. I am able to get country but not Category.

Example input:

[{
    "context": {
        "location": {
            "clientip": "0.0.0.0",
            "continent": "Asia",
            "country": "Singapore"
        },
        "custom": {
            "dimensions": [{
                "Category": "Noah Version"
            }]
        }
    }
}]

My Query:

@json =
EXTRACT
    [location] string,
    [device] string,
    [custom.dimensions] string
FROM
    @InputFile
USING new JsonExtractor("context");

@CreateJSONTuple = 
SELECT 
JsonFunctions.JsonTuple([location]) AS LocationData,
JsonFunctions.JsonTuple([device]) AS DeviceData,
JsonFunctions.JsonTuple([custom.dimensions]) AS DimensionData
FROM @json;

@Dataset =
SELECT
LocationData["country"] ?? "NA" AS Country
,LocationData["province"] ?? "NA" AS Province
,LocationData["city"] ?? "N/A" AS City
,DeviceData["osVersion"] ?? "N/A" AS OsVersion
,DeviceData["roleName"] ?? "N/A" AS RoleName
,DimensionData["Category"] ?? "N/A" AS Category
FROM @CreateJSONTuple;


OUTPUT @Dataset
TO @transBlobOutputFile
USING Outputters.Csv(outputHeader:true,quoting:true);

Solution

  • You have to take in account that the array consists of complex types as well. Also, you cannot just use a notation like EXTRACT [custom.dimensions] string, you have to add itermediate steps for that as well like this:

    @json  =
        EXTRACT location String,
                device String,
                custom String
        FROM @InputFile
        USING new Microsoft.Analytics.Samples.Formats.Json.JsonExtractor("context");
    
    @CreateJSONTuple =
        SELECT JsonFunctions.JsonTuple(location) AS LocationData,
               JsonFunctions.JsonTuple(device) AS DeviceData,
               JsonFunctions.JsonTuple(custom, "dimensions[0]") AS DimensionData
        FROM @json;
    
    @result =
        SELECT LocationData["country"] ?? "N/A" AS Country,
               LocationData["province"] ?? "N/A" AS Province,
               LocationData["city"] ?? "N/A" AS City,
               DeviceData["osVersion"] ?? "N/A" AS OsVersion,
               DeviceData["roleName"] ?? "N/A" AS RoleName,
               DimensionData["dimensions[0]"] AS CategoryDimension
        FROM @CreateJSONTuple;
    
    @result =
        SELECT Country,
               Province,
               City,
               OsVersion,
               RoleName,
               JsonFunctions.JsonTuple(CategoryDimension) AS CategoryDimension
        FROM @result;
    
    @result =
        SELECT Country,
               Province,
               City,
               OsVersion,
               RoleName,
               CategoryDimension["Category"] AS Category
        FROM @result;
    
    OUTPUT @result
    TO @output
    USING Outputters.Csv(outputHeader : true, quoting : true);
    

    This outputs:

    "Country" "Province" "City" "OsVersion" "RoleName" "Category"
    "Singapore" "N/A" "N/A" "Microsoft Windows NT 6.1.7601 Service Pack 1" "Connexx" "Noah Version"