I am trying to get the time differences per id and status change with the following data:
create table Table1(idversion text, id text, status text, dt datetime);
insert into Table1 values
("1_1", "1", "OK",'2020-05-14T01:00:00')
,("1_2", "1", "OK",'2020-05-14T01:00:10')
,("1_3", "1", "NOT OK",'2020-05-14T01:00:20')
,("2_1", "2", "OK",'2020-05-14T01:00:00')
,("2_2", "2", "NOT OK", '2020-05-14T01:00:10')
,("1_4", "1", "OK",'2020-05-14T01:00:30')
,("1_5", "1", "OK",'2020-05-14T01:00:40')
,("1_6", "1", "OK",'2020-05-14T01:00:50')
;
Select * FROM Table1 ORDER BY idversion;
idversion | id | status | dt |
---|---|---|---|
1_1 | 1 | OK | 2020-05-14 01:00:00 |
1_2 | 1 | OK | 2020-05-14 01:00:10 |
1_3 | 1 | NOT OK | 2020-05-14 01:00:20 |
1_4 | 1 | OK | 2020-05-14 01:00:30 |
1_5 | 1 | OK | 2020-05-14 01:00:40 |
1_6 | 1 | OK | 2020-05-14 01:00:50 |
2_1 | 2 | OK | 2020-05-14 01:00:00 |
2_2 | 2 | NOT OK | 2020-05-14 01:00:10 |
I can get the time_diff between each id as such
SELECT *,
dt - LAG(dt) OVER ( partition by id order by idversion )as lag
FROM Table1
ORDER BY idversion;
But what I am looking to do is get all the time differences for the same ID and when there is a status change.
So the expected output would be
idversion | id | status | time_diff |
---|---|---|---|
"1_1" | "1" | "OK" | 20 |
"1_3" | "1" | "NOT OK" | 10 |
"2_1" | "2" | "OK" | 10 |
Fixing the table first as I do have those fields, I just omitted them
create table Table1(idversion text, id text, version int, status text, dt datetime);
insert into Table1 values
("1_1", "1", 1, "OK",'2020-05-14T01:00:00')
,("1_2", "1", 2, "OK",'2020-05-14T01:00:10')
,("1_3", "1", 3, "NOT OK",'2020-05-14T01:00:20')
,("2_1", "2", 1, "OK",'2020-05-14T01:00:00')
,("2_2", "2", 2, "NOT OK", '2020-05-14T01:00:10')
,("1_4", "1", 4, "OK",'2020-05-14T01:00:30')
,("1_5", "1", 5, "OK",'2020-05-14T01:00:40')
,("1_6", "1", 6, "OK",'2020-05-14T01:00:50')
;
Records: 8 Duplicates: 0 Warnings: 0
Select * FROM Table1 ORDER BY id, version;
idversion | id | version | status | dt |
---|---|---|---|---|
1_1 | 1 | 1 | OK | 2020-05-14 01:00:00 |
1_2 | 1 | 2 | OK | 2020-05-14 01:00:10 |
1_3 | 1 | 3 | NOT OK | 2020-05-14 01:00:20 |
1_4 | 1 | 4 | OK | 2020-05-14 01:00:30 |
1_5 | 1 | 5 | OK | 2020-05-14 01:00:40 |
1_6 | 1 | 6 | OK | 2020-05-14 01:00:50 |
2_1 | 2 | 1 | OK | 2020-05-14 01:00:00 |
2_2 | 2 | 2 | NOT OK | 2020-05-14 01:00:10 |
And with the following CTEs, I have the desired output, for ID changes and state changes only being shown
WITH cte AS (SELECT *,
COALESCE(lag(status) OVER (partition by id order by id, version), "init") as last_status
FROM Table1
ORDER BY id, version
),
cte2 AS (
SELECT *,
LEAD(dt) OVER (partition by id order by id, version) - dt as time_diff FROM cte
WHERE status != last_status
)
SELECT * FROM cte2 WHERE time_diff IS NOT NULL;
idversion | id | version | status | dt | last_status | time_diff |
---|---|---|---|---|---|---|
1_1 | 1 | 1 | OK | 2020-05-14 01:00:00 | init | 20 |
1_3 | 1 | 3 | NOT OK | 2020-05-14 01:00:20 | OK | 10 |
2_1 | 2 | 1 | OK | 2020-05-14 01:00:00 | init | 10 |