Search code examples
snapshotazure-data-explorerkql

How to convert multiple record updates into periodic snapshot in kusto


I have a mechanism that is posting an update to Azure Data Explorer each time a record changes at source. So the data end up as a series of versions of the record in ADX. I would like to turn it into a daily snapshot with the most recent version being used at the snapshot time. I have managed to do something close with

let visits = datatable(id:guid, timestamp:datetime, category:string, start:datetime, end:datetime, row:int)
[
    "b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-01T01:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 1,
    "b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-02T02:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 2,
    "b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-02T02:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 3,
    "b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-04T04:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 4,
    "b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-05T07:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(2021-10-01T07:00:00), 5
];
let binsize = 1d;
let min_date_time = toscalar(visits | summarize startofday(min(timestamp)));
let max_date_time = toscalar(visits | summarize endofday(max(timestamp)));
//
range hour from min_date_time to max_date_time step binsize
| join kind=leftouter (
    visits
    | summarize arg_max(timestamp, *) by id, bin(timestamp, binsize)
    | extend hour = bin(timestamp, binsize)
) on hour
| project-away hour1

This gives the following:

hour id timestamp timestamp1 category start end row
2021-10-01 00:00:00.0000000 b5ce180e-ce11-4936-b3f1-c817a261622e 2021-10-01 00:00:00.0000000 2021-10-01 01:02:03.0000000 SRU 2021-09-30 01:02:03.0000000 1
2021-10-02 00:00:00.0000000 b5ce180e-ce11-4936-b3f1-c817a261622e 2021-10-02 00:00:00.0000000 2021-10-02 02:05:03.0000000 SRU 2021-09-30 01:02:03.0000000 3
2021-10-03 00:00:00.0000000
2021-10-04 00:00:00.0000000 b5ce180e-ce11-4936-b3f1-c817a261622e 2021-10-04 00:00:00.0000000 2021-10-04 04:05:03.0000000 SRU 2021-09-30 01:02:03.0000000 4
2021-10-05 00:00:00.0000000 b5ce180e-ce11-4936-b3f1-c817a261622e 2021-10-05 00:00:00.0000000 2021-10-05 07:05:03.0000000 SRU 2021-09-30 01:02:03.0000000 2021-10-01 07:00:00.0000000 5

This is sort of right as it correctly picks the latest value (row 3) for 2021-10-02, however it doesn't pick row 3 for the following time period and we get blanks.

I'm stumped on the last part.

If it helps, the next part of the puzzle is to aggregate grouped on category resulting in something like

day category total started ended
2021-10-01 SRU 1 1 0
2021-10-02 SRU 1 1 0
2021-10-03 SRU 1 1 0
2021-10-04 SRU 1 1 0
2021-10-05 SRU 1 0 1

Solution

  • Here is the solution for the first table, uncomment the last line to get the second table:

    let visits = datatable(id:guid, timestamp:datetime, category:string, start:datetime, end:datetime, row:int)
    [
        "b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-01T01:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 1,
        "b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-02T02:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 2,
        "b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-02T02:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 3,
        "b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-04T04:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 4,
        "b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-05T07:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(2021-10-01T07:00:00), 5,
        "8acaffa4-3ab8-479c-8f13-191c016bff70", datetime(2021-10-01T01:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 6, 
        "8acaffa4-3ab8-479c-8f13-191c016bff70", datetime(2021-10-02T02:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(2021-10-02T07:00:00), 7
    ];
    let binsize = 1d;
    let StartDate = datetime(2021-10-01);
    let EndDate = datetime(2021-10-06);
    visits
    | summarize arg_max(timestamp, *) by ['id'], Day = bin(timestamp,1d)
    | partition hint.strategy=native  by ['id'] 
    (
        make-series  timestamp = take_any(tolong(timestamp)) default=long(null),
                     start = take_any(tolong(start)) default=long(null),
                     end = take_any(tolong(end)) default=long(null),
                     row = take_any(row) default=long(null),total = count() default=long(null), 
                     started=countif(isnull(end)) default=long(null), 
                     ended=countif(isnotnull(end)) 
                     on Day from StartDate to EndDate step 1d  by category, ['id']
        | extend timestamp = series_fill_forward(timestamp), 
                 start = series_fill_forward(start), 
                 end = series_fill_forward(end),
                 row = series_fill_forward(row),
                 total = series_fill_forward(total), 
                 started=series_fill_forward(started), 
                 ended=series_fill_forward(ended)
    )
    | mv-expand timestamp to typeof(long), start to typeof(long), end to typeof(long), Day to typeof(datetime), row to typeof(int), total to typeof(int), started to typeof(int), ended to typeof(int)
    | extend timestamp = todatetime(timestamp), start=todatetime(start), end=todatetime(end)
    | project-reorder  id, Day, timestamp, start, end, row, category
    //| summarize Total = sum(total), sum(started), sum(ended) by Day
    
    id Day timestamp start end row category total started ended
    b5ce180e-ce11-4936-b3f1-c817a261622e 2021-10-01 00:00:00.0000000 2021-10-01 01:02:03.0000000 2021-09-30 01:02:03.0000000 1 SRU 1 1 0
    8acaffa4-3ab8-479c-8f13-191c016bff70 2021-10-01 00:00:00.0000000 2021-10-01 01:02:03.0000000 2021-09-30 01:02:03.0000000 6 SRU 1 1 0
    8acaffa4-3ab8-479c-8f13-191c016bff70 2021-10-02 00:00:00.0000000 2021-10-02 02:02:03.0000000 2021-09-30 01:02:03.0000000 2021-10-02 07:00:00.0000000 7 SRU 1 0 1
    b5ce180e-ce11-4936-b3f1-c817a261622e 2021-10-02 00:00:00.0000000 2021-10-02 02:05:03.0000000 2021-09-30 01:02:03.0000000 3 SRU 1 1 0
    8acaffa4-3ab8-479c-8f13-191c016bff70 2021-10-03 00:00:00.0000000 2021-10-02 02:02:03.0000000 2021-09-30 01:02:03.0000000 2021-10-02 07:00:00.0000000 7 SRU 1 0 1
    b5ce180e-ce11-4936-b3f1-c817a261622e 2021-10-03 00:00:00.0000000 2021-10-02 02:05:03.0000000 2021-09-30 01:02:03.0000000 3 SRU 1 1 0
    8acaffa4-3ab8-479c-8f13-191c016bff70 2021-10-04 00:00:00.0000000 2021-10-02 02:02:03.0000000 2021-09-30 01:02:03.0000000 2021-10-02 07:00:00.0000000 7 SRU 1 0 1
    b5ce180e-ce11-4936-b3f1-c817a261622e 2021-10-04 00:00:00.0000000 2021-10-04 04:05:03.0000000 2021-09-30 01:02:03.0000000 4 SRU 1 1 0
    8acaffa4-3ab8-479c-8f13-191c016bff70 2021-10-05 00:00:00.0000000 2021-10-02 02:02:03.0000000 2021-09-30 01:02:03.0000000 2021-10-02 07:00:00.0000000 7 SRU 1 0 1
    b5ce180e-ce11-4936-b3f1-c817a261622e 2021-10-05 00:00:00.0000000 2021-10-05 07:05:03.0000000 2021-09-30 01:02:03.0000000 2021-10-01 07:00:00.0000000 5 SRU 1 0 1