Search code examples
jsonazureazure-stream-analyticsazure-application-insights

Export Custom Event Dimensions to SQL from Application Insights using Stream Analytics


I'm following the example walkthrough Export to SQL from Application Insights using Stream Analytics. I am trying to export custom event dimensions (context.custom.dimensions in the JSON example below) which get added as a nested JSON array in the data file. How do I flatten the dimensions array at context.custom.dimensions for export to SQL?

JSON...

{
  "event": [
    {
      "name": "50_DistanceSelect",
      "count": 1
    }
  ],
  "internal": {
    "data": {
      "id": "aad2627b-60c5-48e8-aa35-197cae30a0cf",
      "documentVersion": "1.5"
    }
  },
  "context": {
    "device": {
      "os": "Windows",
      "osVersion": "Windows 8.1",
      "type": "PC",
      "browser": "Chrome",
      "browserVersion": "Chrome 43.0",
      "screenResolution": {
        "value": "1920X1080"
      },
      "locale": "unknown",
      "id": "browser",
      "userAgent": "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"
    },
    "application": {},
    "location": {
      "continent": "North America",
      "country": "United States",
      "point": {
        "lat": 38.0,
        "lon": -97.0
      },
      "clientip": "0.115.6.185",
      "province": "",
      "city": ""
    },
    "data": {
      "isSynthetic": false,
      "eventTime": "2015-07-15T23:43:27.595Z",
      "samplingRate": 0.0
    },
    "operation": {
      "id": "2474EE6F-5F6F-48C3-BA43-51636928075A"
    },
    "user": {
      "anonId": "BA05C4BE-1C42-482F-9836-D79008E78A9D",
      "anonAcquisitionDate": "0001-01-01T00:00:00Z",
      "authAcquisitionDate": "0001-01-01T00:00:00Z",
      "accountAcquisitionDate": "0001-01-01T00:00:00Z"
    },
    "custom": {
      "dimensions": [
        {
          "CategoryAction": "click"
        },
        {
          "SessionId": "73ef454d-fa39-4125-b4d0-44486933533b"
        },
        {
          "WebsiteVersion": "3.0"
        },
        {
          "PageSection": "FilterFind"
        },
        {
          "Category": "EventCategory1"
        },
        {
          "Page": "/page-in-question"
        }
      ],
      "metrics": []
    },
    "session": {
      "id": "062703E5-5E15-491A-AC75-2FE54EF03623",
      "isFirst": false
    }
  }
}

Solution

  • A slightly more dynamic solution is to set up a temp table:

    WITH ATable AS (
    SELECT
         temp.internal.data.id as ID
        ,dimensions.ArrayValue.CategoryAction as CategoryAction
        ,dimensions.ArrayValue.SessionId as SessionId 
        ,dimensions.ArrayValue.WebsiteVersion as WebsiteVersion 
        ,dimensions.ArrayValue.PageSection as PageSection 
        ,dimensions.ArrayValue.Category as Category 
        ,dimensions.ArrayValue.Page as Page  
    FROM [analyticseventinputs] temp 
    CROSS APPLY GetElements(temp.[context].[custom].[dimensions]) as dimensions)
    

    and then doing joins based on a unique key

    FROM [analyticseventinputs] Input 
    Left JOIN ATable CategoryAction on 
        Input.internal.data.id = CategoryAction.ID AND
        CategoryAction.CategoryAction <> "" AND
         DATEDIFF(day, Input, CategoryAction) BETWEEN 0 AND 5 
    

    The rather annoying bit is the requirement for the datediff, because the joins are intended to combine 2 streams of data but in this case you are only joining on the unique key. So I set it to a large value of 5 days. This really only protects against the custom params not coming in ordered compared to the other solution.