Search code examples
sqlsql-serversql-server-2014

In Microsoft SQL Server is there a better way to fill null values in both directions than what I am currently doing?


I am trying to figure out how to fill NULLS in the upward direction in Microsoft SQL Server. This is super simple in Oracle but the boss didn't go for it. After six hours of google searching I finally decided to post.

In the result set....

VALUE is our time stamped data with all possible date times.

VALUE2 is our fill nulls downward method.

VALUE3 is our fill nulls upward method.

Given this fake crappy Table.

-- Test Data & Table 
DECLARE @TEST001 TABLE
(TIME_STAMP datetime,
    TagA integer,
    TagB integer,
    TagC integer) 
-- Insert test Values       
 INSERT INTO @TEST001
 VALUES
    ('2017-01-21 00:01:00.042', NULL, NULL, 87),
    ('2017-01-21 00:04:10.155', NULL, 1239, NULL),
    ('2017-01-21 00:04:10.959', NULL, NULL, 86),
    ('2017-01-21 00:06:49.401', NULL, 1240, NULL),
    ('2017-01-21 00:06:59.301', NULL, 1239, NULL),
    ('2017-01-21 00:07:10.124', 108, NULL, NULL),
    ('2017-01-21 00:12:11.789', 109, NULL, NULL),
    ('2017-01-21 00:16:12.190', 108, NULL, NULL),
    ('2017-01-21 00:16:13.987', 107, NULL, NULL),
    ('2017-01-21 00:17:31.410', NULL, 1260, NULL),
    ('2017-01-21 00:17:32.511', NULL, 1261, 87),
    ('2017-01-21 00:17:32.966', NULL, 1262, NULL)

Given this Query which queries TagA in our fake crappy table.

-- Start of Query used in VBS ADODB CONN.
declare @s datetime
declare @e datetime
set @s = '2017-01-21 00:00:00'
set @e = '2017-01-21 23:59:59'
;
-- We need to get all intervals between our two dates.
WITH ALL_INTERVALS AS (
    SELECT TOP (datediff(mi,@s,@e))
    TIMES = dateadd(mi,CONVERT(INT, ROW_NUMBER() OVER (ORDER BY s1.[object_id])),@s)
        FROM sys.all_objects AS s1
        CROSS JOIN
        sys.all_objects AS s2
),
-- We need to include both our real data and all possible intervals.
ALL_TIMES AS (
SELECT
    Time_Stamp as TIMES,
    TagA AS VALUE
FROM @TEST001 H
WHERE Time_Stamp BETWEEN @s and @e
UNION ALL
SELECT
    TIMES AS TIMES,
    NULL AS VALUE
    FROM ALL_INTERVALS
),
-- We need to find the real first value and fill all nulls with this value until we hit a new value.
FILL_DOWN AS ( SELECT 
            TIMES, 
            VALUE,
            ISNULL(VALUE, (SELECT TOP 1 VALUE FROM ALL_TIMES WHERE TIMES < AT.TIMES AND VALUE IS NOT NULL ORDER BY TIMES DESC)) AS VALUE2
            FROM ALL_TIMES AT
),
-- Our fill up method does not work if our first set of values is null. UGH, this does not work either....crap. It fills our first set
-- With our last real value....
FILL_UP AS  ( 
        SELECT 
            TIMES, 
            VALUE, 
            VALUE2,
            ISNULL(VALUE2, (SELECT TOP 1 VALUE2 FROM FILL_DOWN WHERE TIMES > FD.TIMES AND VALUE2 IS NOT NULL ORDER BY TIMES DESC)) AS VALUE3
            FROM FILL_DOWN FD
)
SELECT * 
FROM FILL_UP 
ORDER BY TIMES ASC

RESULTS

TIMES                   VALUE   VALUE2  VALUE3
2017-01-2100:01:00.000  NULL    NULL    107 <---------- This should be 108 from our fake crappy table.
2017-01-2100:01:00.043  NULL    NULL    107 
2017-01-2100:02:00.000  NULL    NULL    107 
2017-01-2100:03:00.000  NULL    NULL    107 
2017-01-2100:04:00.000  NULL    NULL    107 
2017-01-2100:04:10.157  NULL    NULL    107 
2017-01-2100:04:10.960  NULL    NULL    107 
2017-01-2100:05:00.000  NULL    NULL    107 
2017-01-2100:06:00.000  NULL    NULL    107 
2017-01-2100:06:49.400  NULL    NULL    107 
2017-01-2100:06:59.300  NULL    NULL    107 
2017-01-2100:07:00.000  NULL    NULL    107 
2017-01-2100:07:10.123  108     108     108 
2017-01-2100:08:00.000  NULL    108     108 
2017-01-2100:09:00.000  NULL    108     108 
2017-01-2100:10:00.000  NULL    108     108 
2017-01-2100:11:00.000  NULL    108     108 
2017-01-2100:12:00.000  NULL    108     108 
2017-01-2100:12:11.790  109     109     109 
2017-01-2100:13:00.000  NULL    109     109 
2017-01-2100:14:00.000  NULL    109     109 
2017-01-2100:15:00.000  NULL    109     109 
2017-01-2100:16:00.000  NULL    109     109 
2017-01-2100:16:12.190  108     108     108 
2017-01-2100:16:13.987  107     107     107 
2017-01-2100:17:00.000  NULL    107     107 
2017-01-2100:17:31.410  NULL    107     107 
2017-01-2100:17:32.510  NULL    107     107 
2017-01-2100:17:32.967  NULL    107     107 
2017-01-2100:18:00.000  NULL    107     107 
2017-01-2100:19:00.000  NULL    107     107 
2017-01-2100:20:00.000  NULL    107     107 
2017-01-2100:21:00.000  NULL    107     107 
2017-01-2100:22:00.000  NULL    107     107 
2017-01-2100:23:00.000  NULL    107     107 
2017-01-2100:24:00.000  NULL    107     107 
2017-01-2100:25:00.000  NULL    107     107 
2017-01-2100:26:00.000  NULL    107     107

EDIT:

I played around with very large datasets today and found this to be the fastest thing ever. It does exactly what I want just over a second. You can not see the speed difference in this small table but with large datasets 1 second vs 24 Minutes is a lot! Thank you for all that helped, Best wishes!

-- Test Data & Table 
DECLARE @TEST001 TABLE
(TIME_STAMP datetime,
    TagA integer,
    TagB integer,
    TagC integer) 
-- Insert test Values       
 INSERT INTO @TEST001
 VALUES
    ('2017-01-21 00:01:00.042', NULL, NULL, 87),
    ('2017-01-21 00:04:10.155', NULL, 1239, NULL),
    ('2017-01-21 00:04:10.959', NULL, NULL, 86),
    ('2017-01-21 00:06:49.401', NULL, 1240, NULL),
    ('2017-01-21 00:06:59.301', NULL, 1239, NULL),
    ('2017-01-21 00:07:10.124', 108, NULL, NULL),
    ('2017-01-21 00:12:11.789', 109, NULL, NULL),
    ('2017-01-21 00:16:12.190', 108, NULL, NULL),
    ('2017-01-21 00:16:13.987', 107, NULL, NULL),
    ('2017-01-21 00:17:31.410', NULL, 1260, NULL),
    ('2017-01-21 00:17:32.511', NULL, 1261, 87),
    ('2017-01-21 00:17:32.966', NULL, 1262, NULL)
-- Start of Query used in VBS ADODB CONNN
declare @s datetime
declare  @e datetime
set @s = '2017-01-01 00:00:00'
set @e = '2017-01-31 23:59:59'
;
WITH ALL_INTERVALS 
AS ( SELECT TOP (datediff(mi,@s,@e))
    TIMES = dateadd(mi,CONVERT(INT, ROW_NUMBER() OVER (ORDER BY s1.[object_id])),@s),
    NULL AS VALUE
        FROM sys.all_objects AS s1
        CROSS JOIN
        sys.all_objects AS s2
),
ALL_TIMES 
AS ( SELECT
    H.TIME_STAMP as TIMES,
    TagA AS VALUE
    FROM @TEST001 H
    WHERE TIME_STAMP BETWEEN @s and @e
    UNION ALL
    SELECT 
        AI.TIMES AS TIMES, 
        AI.VALUE AS VALUE
        FROM ALL_INTERVALS AI
),
-- JUST INCASE OUR REAL TIME == INTERVAL TIME exactly.
FILL_ACCROSS AS( SELECT TIMES AS TIMES,
                ISNULL(AT.VALUE, (SELECT TOP 1 VALUE FROM ALL_TIMES WHERE TIMES = AT.TIMES AND VALUE IS NOT NULL ORDER BY TIMES ASC)) AS VALUE
                FROM ALL_TIMES AT

),
-- FILL UP AND FILL DOWN.
FILL_UP_DOWN AS ( SELECT 
                TIMES, 
                VALUE,
                ISNULL(FA.VALUE, (SELECT TOP 1 VALUE FROM ALL_TIMES WHERE TIMES > FA.TIMES AND VALUE IS NOT NULL ORDER BY TIMES ASC)) AS VALUE2,
                ISNULL(FA.VALUE, (SELECT TOP 1 VALUE FROM ALL_TIMES WHERE TIMES < FA.TIMES AND VALUE IS NOT NULL ORDER BY TIMES DESC)) AS VALUE3
                FROM FILL_ACCROSS FA
)
--Just a nice display of what is going on.
select FD.TIMES,
       FD.VALUE,
       FD.VALUE2,
       FD.VALUE3,
       CASE
          WHEN FD.VALUE3 IS NULL THEN (FD.VALUE2)
          ELSE (FD.VALUE3)
          END AS VALUE4
  FROM FILL_UP_DOWN FD order by TIMES ASC

Solution

  • If you are looking for an alternative, try this grouping

    -- Start of Query used in VBS ADODB CONN.
    declare @s datetime
    declare @e datetime
    set @s = '2017-01-21 00:00:00.001'
    set @e = '2017-01-21 23:59:59'
    ;
    --select datepart(millisecond,@s);
    -- We need to get all intervals between our two dates.
    WITH ALL_INTERVALS AS (
        SELECT TOP (datediff(mi,@s,@e))
        TIMES = dateadd(mi,CONVERT(INT, ROW_NUMBER() OVER (ORDER BY s1.[object_id])),@s)
            FROM sys.all_objects AS s1
            CROSS JOIN
            sys.all_objects AS s2
    ),
    -- We need to include both our real data and all possible intervals.
    ALL_TIMES AS (
    SELECT
        Time_Stamp as TIMES,
        TagA AS VALUE
    FROM @TEST001 H
    WHERE Time_Stamp BETWEEN @s and @e
    UNION ALL
    SELECT
        TIMES AS TIMES,
        NULL AS VALUE
        FROM ALL_INTERVALS
    ),
    g1 AS ( 
        SELECT 
            TIMES, 
            VALUE,
            diff = case 
                   when value=lag(value,1) over(order by TIMES) 
                      or coalesce (value, lag(value,1) over(order by TIMES)) is null
                   then 0 else 1 end
         FROM ALL_TIMES AT
    ),
    g2 as ( 
        select TIMES, VALUE
             , grp = sum(diff) over(order by TIMES)
             , direction = case sum(diff) over(order by TIMES) when 0 then 1 else -1 end 
        from g1
    )
    select 
        TIMES, 
        VALUE, 
        grp,
        ISNULL(VALUE,
               (SELECT TOP 1 VALUE FROM g2 repl WHERE grp = g2.grp + g2.direction AND VALUE IS NOT NULL ORDER BY TIMES ) 
               )  AS VALUE3
    from g2;