I have a mechanism that is posting an update to Azure Data Explorer each time a record changes at source. So the data end up as a series of versions of the record in ADX. I would like to turn it into a daily snapshot with the most recent version being used at the snapshot time. I have managed to do something close with
let visits = datatable(id:guid, timestamp:datetime, category:string, start:datetime, end:datetime, row:int)
[
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-01T01:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 1,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-02T02:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 2,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-02T02:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 3,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-04T04:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 4,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-05T07:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(2021-10-01T07:00:00), 5
];
let binsize = 1d;
let min_date_time = toscalar(visits | summarize startofday(min(timestamp)));
let max_date_time = toscalar(visits | summarize endofday(max(timestamp)));
//
range hour from min_date_time to max_date_time step binsize
| join kind=leftouter (
visits
| summarize arg_max(timestamp, *) by id, bin(timestamp, binsize)
| extend hour = bin(timestamp, binsize)
) on hour
| project-away hour1
This gives the following:
hour | id | timestamp | timestamp1 | category | start | end | row |
---|---|---|---|---|---|---|---|
2021-10-01 00:00:00.0000000 | b5ce180e-ce11-4936-b3f1-c817a261622e | 2021-10-01 00:00:00.0000000 | 2021-10-01 01:02:03.0000000 | SRU | 2021-09-30 01:02:03.0000000 | 1 | |
2021-10-02 00:00:00.0000000 | b5ce180e-ce11-4936-b3f1-c817a261622e | 2021-10-02 00:00:00.0000000 | 2021-10-02 02:05:03.0000000 | SRU | 2021-09-30 01:02:03.0000000 | 3 | |
2021-10-03 00:00:00.0000000 | |||||||
2021-10-04 00:00:00.0000000 | b5ce180e-ce11-4936-b3f1-c817a261622e | 2021-10-04 00:00:00.0000000 | 2021-10-04 04:05:03.0000000 | SRU | 2021-09-30 01:02:03.0000000 | 4 | |
2021-10-05 00:00:00.0000000 | b5ce180e-ce11-4936-b3f1-c817a261622e | 2021-10-05 00:00:00.0000000 | 2021-10-05 07:05:03.0000000 | SRU | 2021-09-30 01:02:03.0000000 | 2021-10-01 07:00:00.0000000 | 5 |
This is sort of right as it correctly picks the latest value (row 3) for 2021-10-02, however it doesn't pick row 3 for the following time period and we get blanks.
I'm stumped on the last part.
If it helps, the next part of the puzzle is to aggregate grouped on category resulting in something like
day | category | total | started | ended |
---|---|---|---|---|
2021-10-01 | SRU | 1 | 1 | 0 |
2021-10-02 | SRU | 1 | 1 | 0 |
2021-10-03 | SRU | 1 | 1 | 0 |
2021-10-04 | SRU | 1 | 1 | 0 |
2021-10-05 | SRU | 1 | 0 | 1 |
Here is the solution for the first table, uncomment the last line to get the second table:
let visits = datatable(id:guid, timestamp:datetime, category:string, start:datetime, end:datetime, row:int)
[
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-01T01:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 1,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-02T02:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 2,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-02T02:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 3,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-04T04:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 4,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-05T07:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(2021-10-01T07:00:00), 5,
"8acaffa4-3ab8-479c-8f13-191c016bff70", datetime(2021-10-01T01:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 6,
"8acaffa4-3ab8-479c-8f13-191c016bff70", datetime(2021-10-02T02:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(2021-10-02T07:00:00), 7
];
let binsize = 1d;
let StartDate = datetime(2021-10-01);
let EndDate = datetime(2021-10-06);
visits
| summarize arg_max(timestamp, *) by ['id'], Day = bin(timestamp,1d)
| partition hint.strategy=native by ['id']
(
make-series timestamp = take_any(tolong(timestamp)) default=long(null),
start = take_any(tolong(start)) default=long(null),
end = take_any(tolong(end)) default=long(null),
row = take_any(row) default=long(null),total = count() default=long(null),
started=countif(isnull(end)) default=long(null),
ended=countif(isnotnull(end))
on Day from StartDate to EndDate step 1d by category, ['id']
| extend timestamp = series_fill_forward(timestamp),
start = series_fill_forward(start),
end = series_fill_forward(end),
row = series_fill_forward(row),
total = series_fill_forward(total),
started=series_fill_forward(started),
ended=series_fill_forward(ended)
)
| mv-expand timestamp to typeof(long), start to typeof(long), end to typeof(long), Day to typeof(datetime), row to typeof(int), total to typeof(int), started to typeof(int), ended to typeof(int)
| extend timestamp = todatetime(timestamp), start=todatetime(start), end=todatetime(end)
| project-reorder id, Day, timestamp, start, end, row, category
//| summarize Total = sum(total), sum(started), sum(ended) by Day
id | Day | timestamp | start | end | row | category | total | started | ended |
---|---|---|---|---|---|---|---|---|---|
b5ce180e-ce11-4936-b3f1-c817a261622e | 2021-10-01 00:00:00.0000000 | 2021-10-01 01:02:03.0000000 | 2021-09-30 01:02:03.0000000 | 1 | SRU | 1 | 1 | 0 | |
8acaffa4-3ab8-479c-8f13-191c016bff70 | 2021-10-01 00:00:00.0000000 | 2021-10-01 01:02:03.0000000 | 2021-09-30 01:02:03.0000000 | 6 | SRU | 1 | 1 | 0 | |
8acaffa4-3ab8-479c-8f13-191c016bff70 | 2021-10-02 00:00:00.0000000 | 2021-10-02 02:02:03.0000000 | 2021-09-30 01:02:03.0000000 | 2021-10-02 07:00:00.0000000 | 7 | SRU | 1 | 0 | 1 |
b5ce180e-ce11-4936-b3f1-c817a261622e | 2021-10-02 00:00:00.0000000 | 2021-10-02 02:05:03.0000000 | 2021-09-30 01:02:03.0000000 | 3 | SRU | 1 | 1 | 0 | |
8acaffa4-3ab8-479c-8f13-191c016bff70 | 2021-10-03 00:00:00.0000000 | 2021-10-02 02:02:03.0000000 | 2021-09-30 01:02:03.0000000 | 2021-10-02 07:00:00.0000000 | 7 | SRU | 1 | 0 | 1 |
b5ce180e-ce11-4936-b3f1-c817a261622e | 2021-10-03 00:00:00.0000000 | 2021-10-02 02:05:03.0000000 | 2021-09-30 01:02:03.0000000 | 3 | SRU | 1 | 1 | 0 | |
8acaffa4-3ab8-479c-8f13-191c016bff70 | 2021-10-04 00:00:00.0000000 | 2021-10-02 02:02:03.0000000 | 2021-09-30 01:02:03.0000000 | 2021-10-02 07:00:00.0000000 | 7 | SRU | 1 | 0 | 1 |
b5ce180e-ce11-4936-b3f1-c817a261622e | 2021-10-04 00:00:00.0000000 | 2021-10-04 04:05:03.0000000 | 2021-09-30 01:02:03.0000000 | 4 | SRU | 1 | 1 | 0 | |
8acaffa4-3ab8-479c-8f13-191c016bff70 | 2021-10-05 00:00:00.0000000 | 2021-10-02 02:02:03.0000000 | 2021-09-30 01:02:03.0000000 | 2021-10-02 07:00:00.0000000 | 7 | SRU | 1 | 0 | 1 |
b5ce180e-ce11-4936-b3f1-c817a261622e | 2021-10-05 00:00:00.0000000 | 2021-10-05 07:05:03.0000000 | 2021-09-30 01:02:03.0000000 | 2021-10-01 07:00:00.0000000 | 5 | SRU | 1 | 0 | 1 |