I am trying to figure out how to fill NULLS in the upward direction in Microsoft SQL Server. This is super simple in Oracle but the boss didn't go for it. After six hours of google searching I finally decided to post.
In the result set....
VALUE is our time stamped data with all possible date times.
VALUE2 is our fill nulls downward method.
VALUE3 is our fill nulls upward method.
Given this fake crappy Table.
-- Test Data & Table
DECLARE @TEST001 TABLE
(TIME_STAMP datetime,
TagA integer,
TagB integer,
TagC integer)
-- Insert test Values
INSERT INTO @TEST001
VALUES
('2017-01-21 00:01:00.042', NULL, NULL, 87),
('2017-01-21 00:04:10.155', NULL, 1239, NULL),
('2017-01-21 00:04:10.959', NULL, NULL, 86),
('2017-01-21 00:06:49.401', NULL, 1240, NULL),
('2017-01-21 00:06:59.301', NULL, 1239, NULL),
('2017-01-21 00:07:10.124', 108, NULL, NULL),
('2017-01-21 00:12:11.789', 109, NULL, NULL),
('2017-01-21 00:16:12.190', 108, NULL, NULL),
('2017-01-21 00:16:13.987', 107, NULL, NULL),
('2017-01-21 00:17:31.410', NULL, 1260, NULL),
('2017-01-21 00:17:32.511', NULL, 1261, 87),
('2017-01-21 00:17:32.966', NULL, 1262, NULL)
Given this Query which queries TagA in our fake crappy table.
-- Start of Query used in VBS ADODB CONN.
declare @s datetime
declare @e datetime
set @s = '2017-01-21 00:00:00'
set @e = '2017-01-21 23:59:59'
;
-- We need to get all intervals between our two dates.
WITH ALL_INTERVALS AS (
SELECT TOP (datediff(mi,@s,@e))
TIMES = dateadd(mi,CONVERT(INT, ROW_NUMBER() OVER (ORDER BY s1.[object_id])),@s)
FROM sys.all_objects AS s1
CROSS JOIN
sys.all_objects AS s2
),
-- We need to include both our real data and all possible intervals.
ALL_TIMES AS (
SELECT
Time_Stamp as TIMES,
TagA AS VALUE
FROM @TEST001 H
WHERE Time_Stamp BETWEEN @s and @e
UNION ALL
SELECT
TIMES AS TIMES,
NULL AS VALUE
FROM ALL_INTERVALS
),
-- We need to find the real first value and fill all nulls with this value until we hit a new value.
FILL_DOWN AS ( SELECT
TIMES,
VALUE,
ISNULL(VALUE, (SELECT TOP 1 VALUE FROM ALL_TIMES WHERE TIMES < AT.TIMES AND VALUE IS NOT NULL ORDER BY TIMES DESC)) AS VALUE2
FROM ALL_TIMES AT
),
-- Our fill up method does not work if our first set of values is null. UGH, this does not work either....crap. It fills our first set
-- With our last real value....
FILL_UP AS (
SELECT
TIMES,
VALUE,
VALUE2,
ISNULL(VALUE2, (SELECT TOP 1 VALUE2 FROM FILL_DOWN WHERE TIMES > FD.TIMES AND VALUE2 IS NOT NULL ORDER BY TIMES DESC)) AS VALUE3
FROM FILL_DOWN FD
)
SELECT *
FROM FILL_UP
ORDER BY TIMES ASC
RESULTS
TIMES VALUE VALUE2 VALUE3
2017-01-2100:01:00.000 NULL NULL 107 <---------- This should be 108 from our fake crappy table.
2017-01-2100:01:00.043 NULL NULL 107
2017-01-2100:02:00.000 NULL NULL 107
2017-01-2100:03:00.000 NULL NULL 107
2017-01-2100:04:00.000 NULL NULL 107
2017-01-2100:04:10.157 NULL NULL 107
2017-01-2100:04:10.960 NULL NULL 107
2017-01-2100:05:00.000 NULL NULL 107
2017-01-2100:06:00.000 NULL NULL 107
2017-01-2100:06:49.400 NULL NULL 107
2017-01-2100:06:59.300 NULL NULL 107
2017-01-2100:07:00.000 NULL NULL 107
2017-01-2100:07:10.123 108 108 108
2017-01-2100:08:00.000 NULL 108 108
2017-01-2100:09:00.000 NULL 108 108
2017-01-2100:10:00.000 NULL 108 108
2017-01-2100:11:00.000 NULL 108 108
2017-01-2100:12:00.000 NULL 108 108
2017-01-2100:12:11.790 109 109 109
2017-01-2100:13:00.000 NULL 109 109
2017-01-2100:14:00.000 NULL 109 109
2017-01-2100:15:00.000 NULL 109 109
2017-01-2100:16:00.000 NULL 109 109
2017-01-2100:16:12.190 108 108 108
2017-01-2100:16:13.987 107 107 107
2017-01-2100:17:00.000 NULL 107 107
2017-01-2100:17:31.410 NULL 107 107
2017-01-2100:17:32.510 NULL 107 107
2017-01-2100:17:32.967 NULL 107 107
2017-01-2100:18:00.000 NULL 107 107
2017-01-2100:19:00.000 NULL 107 107
2017-01-2100:20:00.000 NULL 107 107
2017-01-2100:21:00.000 NULL 107 107
2017-01-2100:22:00.000 NULL 107 107
2017-01-2100:23:00.000 NULL 107 107
2017-01-2100:24:00.000 NULL 107 107
2017-01-2100:25:00.000 NULL 107 107
2017-01-2100:26:00.000 NULL 107 107
EDIT:
I played around with very large datasets today and found this to be the fastest thing ever. It does exactly what I want just over a second. You can not see the speed difference in this small table but with large datasets 1 second vs 24 Minutes is a lot! Thank you for all that helped, Best wishes!
-- Test Data & Table
DECLARE @TEST001 TABLE
(TIME_STAMP datetime,
TagA integer,
TagB integer,
TagC integer)
-- Insert test Values
INSERT INTO @TEST001
VALUES
('2017-01-21 00:01:00.042', NULL, NULL, 87),
('2017-01-21 00:04:10.155', NULL, 1239, NULL),
('2017-01-21 00:04:10.959', NULL, NULL, 86),
('2017-01-21 00:06:49.401', NULL, 1240, NULL),
('2017-01-21 00:06:59.301', NULL, 1239, NULL),
('2017-01-21 00:07:10.124', 108, NULL, NULL),
('2017-01-21 00:12:11.789', 109, NULL, NULL),
('2017-01-21 00:16:12.190', 108, NULL, NULL),
('2017-01-21 00:16:13.987', 107, NULL, NULL),
('2017-01-21 00:17:31.410', NULL, 1260, NULL),
('2017-01-21 00:17:32.511', NULL, 1261, 87),
('2017-01-21 00:17:32.966', NULL, 1262, NULL)
-- Start of Query used in VBS ADODB CONNN
declare @s datetime
declare @e datetime
set @s = '2017-01-01 00:00:00'
set @e = '2017-01-31 23:59:59'
;
WITH ALL_INTERVALS
AS ( SELECT TOP (datediff(mi,@s,@e))
TIMES = dateadd(mi,CONVERT(INT, ROW_NUMBER() OVER (ORDER BY s1.[object_id])),@s),
NULL AS VALUE
FROM sys.all_objects AS s1
CROSS JOIN
sys.all_objects AS s2
),
ALL_TIMES
AS ( SELECT
H.TIME_STAMP as TIMES,
TagA AS VALUE
FROM @TEST001 H
WHERE TIME_STAMP BETWEEN @s and @e
UNION ALL
SELECT
AI.TIMES AS TIMES,
AI.VALUE AS VALUE
FROM ALL_INTERVALS AI
),
-- JUST INCASE OUR REAL TIME == INTERVAL TIME exactly.
FILL_ACCROSS AS( SELECT TIMES AS TIMES,
ISNULL(AT.VALUE, (SELECT TOP 1 VALUE FROM ALL_TIMES WHERE TIMES = AT.TIMES AND VALUE IS NOT NULL ORDER BY TIMES ASC)) AS VALUE
FROM ALL_TIMES AT
),
-- FILL UP AND FILL DOWN.
FILL_UP_DOWN AS ( SELECT
TIMES,
VALUE,
ISNULL(FA.VALUE, (SELECT TOP 1 VALUE FROM ALL_TIMES WHERE TIMES > FA.TIMES AND VALUE IS NOT NULL ORDER BY TIMES ASC)) AS VALUE2,
ISNULL(FA.VALUE, (SELECT TOP 1 VALUE FROM ALL_TIMES WHERE TIMES < FA.TIMES AND VALUE IS NOT NULL ORDER BY TIMES DESC)) AS VALUE3
FROM FILL_ACCROSS FA
)
--Just a nice display of what is going on.
select FD.TIMES,
FD.VALUE,
FD.VALUE2,
FD.VALUE3,
CASE
WHEN FD.VALUE3 IS NULL THEN (FD.VALUE2)
ELSE (FD.VALUE3)
END AS VALUE4
FROM FILL_UP_DOWN FD order by TIMES ASC
If you are looking for an alternative, try this grouping
-- Start of Query used in VBS ADODB CONN.
declare @s datetime
declare @e datetime
set @s = '2017-01-21 00:00:00.001'
set @e = '2017-01-21 23:59:59'
;
--select datepart(millisecond,@s);
-- We need to get all intervals between our two dates.
WITH ALL_INTERVALS AS (
SELECT TOP (datediff(mi,@s,@e))
TIMES = dateadd(mi,CONVERT(INT, ROW_NUMBER() OVER (ORDER BY s1.[object_id])),@s)
FROM sys.all_objects AS s1
CROSS JOIN
sys.all_objects AS s2
),
-- We need to include both our real data and all possible intervals.
ALL_TIMES AS (
SELECT
Time_Stamp as TIMES,
TagA AS VALUE
FROM @TEST001 H
WHERE Time_Stamp BETWEEN @s and @e
UNION ALL
SELECT
TIMES AS TIMES,
NULL AS VALUE
FROM ALL_INTERVALS
),
g1 AS (
SELECT
TIMES,
VALUE,
diff = case
when value=lag(value,1) over(order by TIMES)
or coalesce (value, lag(value,1) over(order by TIMES)) is null
then 0 else 1 end
FROM ALL_TIMES AT
),
g2 as (
select TIMES, VALUE
, grp = sum(diff) over(order by TIMES)
, direction = case sum(diff) over(order by TIMES) when 0 then 1 else -1 end
from g1
)
select
TIMES,
VALUE,
grp,
ISNULL(VALUE,
(SELECT TOP 1 VALUE FROM g2 repl WHERE grp = g2.grp + g2.direction AND VALUE IS NOT NULL ORDER BY TIMES )
) AS VALUE3
from g2;