I have Application sights table with around million rows and want equal split data 4 datasets /queries and then merge them all in report. I have the insertDate which can be used to partition to different datasets without overlap of data between the queries. using insertDate>=ago(180d) and insertDate < ago(180d) will give me only 2 partitions of this data , how can i split this data equally ,lets say 5 datasets meaning 5 queries each of which returns 2 months data?
Note I don't know the underlying dates so cannot hard code dates like between dates operator . This is sample data , some months have more data than other months but need to split the data equally to avoid the app insights rest API's max rows limit of 500K rows
Context: Application Insights rest API connector to powerBI does not support more than 500K rows so above is one workaround I am thinking of.
let T = datatable(release:string, insertDate:datetime )
[
"2205", datetime(2022-01-05),
"2205", datetime(2022-01-10),
"2205", datetime(2022-02-10),
"2204", datetime(2022-03-25),
"2203", datetime(2022-04-15),
"2205", datetime(2022-04-12),
"2205", datetime(2022-05-12),
"2206", datetime(2022-06-23),
"2207", datetime(2022-07-27),
"2209", datetime(2022-08-12),
"2201", datetime(2022-09-26),
"2201", datetime(2022-10-08),
];
T
|take 100;
let T = datatable(release:string, insertDate:datetime )
[
"2205", datetime(2022-01-05),
"2205", datetime(2022-01-10),
"2205", datetime(2022-02-10),
"2204", datetime(2022-03-25),
"2203", datetime(2022-04-15),
"2205", datetime(2022-04-12),
"2205", datetime(2022-05-12),
"2206", datetime(2022-06-23),
"2207", datetime(2022-07-27),
"2209", datetime(2022-08-12),
"2201", datetime(2022-09-26),
"2201", datetime(2022-10-08),
];
let groups = 5;
let total_rows = toscalar(T | count);
let group_rows = 1.0 * total_rows / groups;
let edge_rows = toscalar
(
range x from group_rows to total_rows - group_rows step group_rows
| extend x = round(x)
| summarize make_list(x)
);
union (print insertDate = datetime(0001))
,(T
| order by insertDate asc
| where row_number() in (edge_rows)
| project insertDate
)
| order by insertDate asc
| project fromDate = insertDate
,toDate = next(insertDate, 1, datetime(3000))
fromDate | toDate |
---|---|
0001-01-01T00:00:00Z | 2022-01-10T00:00:00Z |
2022-01-10T00:00:00Z | 2022-04-12T00:00:00Z |
2022-04-12T00:00:00Z | 2022-05-12T00:00:00Z |
2022-05-12T00:00:00Z | 2022-08-12T00:00:00Z |
2022-08-12T00:00:00Z | 3000-01-01T00:00:00Z |