Below is Percentile at week level or whatever available dates in table
let MySourceTable = datatable(col1:int, col2:int, Value:int, EventUTCDateTime:datetime)
[
1,2,10,'2022-01-01 01:00:01',
1,2,20,'2022-01-01 02:01:01',
1,2,15,'2022-01-02 03:05:01',
1,2,40,'2022-01-03 01:30:01',
1,2,20,'2022-01-03 02:30:01',
1,2,50,'2022-01-04 07:30:01',
1,2,70,'2022-01-05 06:30:01',
1,2,45,'2022-01-05 07:30:01',
1,2,90,'2022-01-05 07:30:01',
1,2,40,'2022-01-06 07:30:01'
];
MySourceTable
| extend EventUTCDate = bin(EventUTCDateTime,1d)
| project col1,col2,Value,EventUTCDate
| summarize Median=percentile(Value,50)
Below is Percentile at day level
let MySourceTable = datatable(col1:int, col2:int, Value:int, EventUTCDateTime:datetime)
[
1,2,10,'2022-01-01 01:00:01',
1,2,20,'2022-01-01 02:01:01',
1,2,15,'2022-01-02 03:05:01',
1,2,40,'2022-01-03 01:30:01',
1,2,20,'2022-01-03 02:30:01',
1,2,50,'2022-01-04 07:30:01',
1,2,70,'2022-01-05 06:30:01',
1,2,45,'2022-01-05 07:30:01',
1,2,90,'2022-01-05 07:30:01',
1,2,40,'2022-01-06 07:30:01'
];
MySourceTable
| extend EventUTCDate = bin(EventUTCDateTime,1d)
| project col1,col2,Value,EventUTCDate
| summarize Median=percentile(Value,50) by col1,col2,EventUTCDate
| project col1,col2, Median,EventUTCDate
Below is Percentile on top of pre-calculated percentile at day level
let MySourceTable = datatable(col1:int, col2:int, Value:int, EventUTCDateTime:datetime)
[
1,2,10,'2022-01-01 01:00:01',
1,2,20,'2022-01-01 02:01:01',
1,2,15,'2022-01-02 03:05:01',
1,2,40,'2022-01-03 01:30:01',
1,2,20,'2022-01-03 02:30:01',
1,2,50,'2022-01-04 07:30:01',
1,2,70,'2022-01-05 06:30:01',
1,2,45,'2022-01-05 07:30:01',
1,2,90,'2022-01-05 07:30:01',
1,2,40,'2022-01-06 07:30:01'
];
MySourceTable
| extend EventUTCDate = bin(EventUTCDateTime,1d)
| project col1,col2,Value,EventUTCDate
| summarize Median=percentile(Value,50) by col1,col2,EventUTCDate
| project col1,col2, Median,EventUTCDate
| summarize Median02=percentile(Median,50)
Can you please help me how to apply hll() and tdigest(), I have tried as per document but got different result.
My goal is Percentile value in 1st query and last query should be same
tdigest
are the intermediate results for computing percentiles
. You can compute the tdigest
per day, and based on that compute the overall percentile:
let MySourceTable = datatable(col1:int, col2:int, Value:int, EventUTCDateTime:datetime)
[
1,2,10,'2022-01-01 01:00:01',
1,2,20,'2022-01-01 02:01:01',
1,2,15,'2022-01-02 03:05:01',
1,2,40,'2022-01-03 01:30:01',
1,2,20,'2022-01-03 02:30:01',
1,2,50,'2022-01-04 07:30:01',
1,2,70,'2022-01-05 06:30:01',
1,2,45,'2022-01-05 07:30:01',
1,2,90,'2022-01-05 07:30:01',
1,2,40,'2022-01-06 07:30:01'
];
MySourceTable
| extend EventUTCDate = bin(EventUTCDateTime,1d)
| project col1,col2,Value,EventUTCDate
| summarize Median=percentile(Value,50), tdigest = tdigest(Value) by col1,col2,EventUTCDate
| project col1,col2, Median, MedianFromTdigest = percentile_tdigest(tdigest, 50), EventUTCDate
col1 | col2 | Median | MedianFromTdigest | EventUTCDate |
---|---|---|---|---|
1 | 2 | 10 | 10 | 2022-01-01 00:00:00.0000000 |
1 | 2 | 15 | 15 | 2022-01-02 00:00:00.0000000 |
1 | 2 | 20 | 20 | 2022-01-03 00:00:00.0000000 |
1 | 2 | 50 | 50 | 2022-01-04 00:00:00.0000000 |
1 | 2 | 70 | 70 | 2022-01-05 00:00:00.0000000 |
1 | 2 | 40 | 40 | 2022-01-06 00:00:00.0000000 |
let MySourceTable = datatable(col1:int, col2:int, Value:int, EventUTCDateTime:datetime)
[
1,2,10,'2022-01-01 01:00:01',
1,2,20,'2022-01-01 02:01:01',
1,2,15,'2022-01-02 03:05:01',
1,2,40,'2022-01-03 01:30:01',
1,2,20,'2022-01-03 02:30:01',
1,2,50,'2022-01-04 07:30:01',
1,2,70,'2022-01-05 06:30:01',
1,2,45,'2022-01-05 07:30:01',
1,2,90,'2022-01-05 07:30:01',
1,2,40,'2022-01-06 07:30:01'
];
MySourceTable
| extend EventUTCDate = bin(EventUTCDateTime,1d)
| project col1,col2,Value,EventUTCDate
| summarize tdigest = tdigest(Value) by col1,col2,EventUTCDate
| summarize Median = percentile_tdigest(tdigest_merge(tdigest), 50)
Median |
---|
40 |