Search code examples
sqlsql-serverdate-range

adding values based on date range sql


Stuck on the problem below.

Current data: table1

|emp_id | date     | day_one |week_num |
 --------------------------------------
|    100|2021-01-01|     TRUE|        1|
|    100|2021-01-02|     NULL|     NULL|
|    100|2021-01-03|     NULL|     NULL|
|    100|2021-01-04|     NULL|     NULL|
|    100|2021-01-05|     NULL|     NULL|
|    100|2021-01-06|     NULL|     NULL|
|    100|2021-01-07|     NULL|     NULL|
|    100|2021-01-08|     NULL|     NULL|
|    100|2021-01-09|     NULL|     NULL|
|    100|2021-01-10|     NULL|     NULL|
|    100|2021-01-11|     NULL|     NULL|
|    100|2021-01-12|     NULL|     NULL|
|    100|2021-01-13|     NULL|     NULL|
|    100|2021-01-14|     NULL|     NULL|
 --------------------------------------

Desired output:

|emp_id | dates    | day_one |week_num |
 --------------------------------------
|    100|2021-01-01|     TRUE|        1|
|    100|2021-01-02|     NULL|        1|
|    100|2021-01-03|     NULL|        1|
|    100|2021-01-04|     NULL|        1|
|    100|2021-01-05|     NULL|        1|
|    100|2021-01-06|     NULL|        1|
|    100|2021-01-07|     NULL|        1|
|    100|2021-01-08|     NULL|        2|
|    100|2021-01-09|     NULL|        2|
|    100|2021-01-10|     NULL|        2|
|    100|2021-01-11|     NULL|        2|
|    100|2021-01-12|     NULL|        2|
|    100|2021-01-13|     NULL|        2|
|    100|2021-01-14|     NULL|        2|
 --------------------------------------

The data has many employees across thousands of dates so it needs be done in a way where weeks are added based off the day_one column. I have used a lag/lead but if an employee has an 8 week cycle it becomes very inefficient. The code below gives the week_num =1, repeating gives 2 etc.

with t1 as (
select 
    *
    ,
    LAG(week_num) OVER(PARTITION BY emp_id ORDER BY emp_id, dates) as second_day,
    LAG(week_num, 2) OVER(PARTITION BY emp_id ORDER BY emp_id, dates) as third_day,
    LAG(week_num, 3) OVER(PARTITION BY emp_id ORDER BY emp_id, dates) as forth_day,
    LAG(week_num, 4) OVER(PARTITION BY emp_id ORDER BY emp_id, dates) as fifth_day,
    LAG(week_num, 5) OVER(PARTITION BY emp_id ORDER BY emp_id, dates) as sixth_day,
    LAG(week_num, 6) OVER(PARTITION BY emp_id ORDER BY emp_id, dates) as seventh_day
from table1 
)
select 
    emp_id,
    dates,
    day_one,
    case when week_num = 1 then 1 
         when second_day = 1 then 1 
         when third_day = 1 then 1 
         when forth_day = 1 then 1 
         when fifth_day = 1 then 1 
         when sixth_day = 1 then 1 
         when seventh_day = 1 then 1 
         else NULL 
    end as week_num
from table1
order by emp_id, dates

And level 2 to the problem. Current data:

|emp_id | dates    | day_one |week_num |
 --------------------------------------
|    100|2021-01-01|     TRUE|        1|
|    100|2021-01-02|     NULL|     NULL|
|    100|2021-01-03|     NULL|     NULL|
|    100|2021-01-04|     NULL|     NULL|
|    100|2021-01-05|     NULL|     NULL|
|    100|2021-01-06|     NULL|     NULL|
|    100|2021-01-07|     NULL|     NULL|
|    100|2021-01-08|     NULL|     NULL|
|    100|2021-01-09|     NULL|     NULL|
|    100|2021-01-10|     NULL|     NULL|
|    100|2021-01-11|     NULL|     NULL|
|    100|2021-01-12|     NULL|     NULL|
|    100|2021-01-13|     NULL|     NULL|
|    100|2021-01-14|     NULL|     NULL|
|    100|2021-01-15|     NULL|     NULL|
|    100|2021-01-16|     NULL|     NULL|
|    100|2021-01-17|     NULL|     NULL|
|    100|2021-01-18|     NULL|     NULL|
|    100|2021-01-19|     NULL|     NULL|
|    100|2021-01-20|     NULL|     NULL|
|    100|2021-01-21|     NULL|     NULL|
|    100|2021-01-22|     NULL|     NULL|
|    100|2021-01-23|     NULL|     NULL|
|    100|2021-01-24|     NULL|     NULL|
|    100|2021-01-25|     NULL|     NULL|
|    100|2021-01-26|     NULL|     NULL|
|    100|2021-01-27|     NULL|     NULL|
|    100|2021-01-28|     NULL|     NULL|
 --------------------------------------

Desired output (repeating cycles):

|emp_id | dates    | day_one |week_num |
 --------------------------------------
|    100|2021-01-01|     TRUE|        1|
|    100|2021-01-02|     NULL|        1|
|    100|2021-01-03|     NULL|        1|
|    100|2021-01-04|     NULL|        1|
|    100|2021-01-05|     NULL|        1|
|    100|2021-01-06|     NULL|        1|
|    100|2021-01-07|     NULL|        1|
|    100|2021-01-08|     NULL|        2|
|    100|2021-01-09|     NULL|        2|
|    100|2021-01-10|     NULL|        2|
|    100|2021-01-11|     NULL|        2|
|    100|2021-01-12|     NULL|        2|
|    100|2021-01-13|     NULL|        2|
|    100|2021-01-14|     NULL|        2|
|    100|2021-01-15|     NULL|        1|
|    100|2021-01-16|     NULL|        1|
|    100|2021-01-17|     NULL|        1|
|    100|2021-01-18|     NULL|        1|
|    100|2021-01-19|     NULL|        1|
|    100|2021-01-20|     NULL|        1|
|    100|2021-01-21|     NULL|        1|
|    100|2021-01-22|     NULL|        2|
|    100|2021-01-23|     NULL|        2|
|    100|2021-01-24|     NULL|        2|
|    100|2021-01-25|     NULL|        2|
|    100|2021-01-26|     NULL|        2|
|    100|2021-01-27|     NULL|        2|
|    100|2021-01-28|     NULL|        2|
 --------------------------------------

Solution

  • -- PLEASE PROVIDE SET UP SCRIPT FOR SAMPLE DATA IN FUTURE POSTS
    
    CREATE TABLE #table1
    (
       emp_id   INT
      ,dates    DATE 
      ,day_one  VARCHAR(9)
      ,week_num_expected INT
    );
    
    INSERT INTO #table1(emp_id,dates,day_one,week_num_expected) VALUES
     (100,'2021-01-01','TRUE',1),(100,'2021-01-02',NULL,1),(100,'2021-01-03',NULL,1),(100,'2021-01-04',NULL,1)
    ,(100,'2021-01-05',NULL,1),(100,'2021-01-06',NULL,1),(100,'2021-01-07',NULL,1),(100,'2021-01-08',NULL,2)
    ,(100,'2021-01-09',NULL,2),(100,'2021-01-10',NULL,2),(100,'2021-01-11',NULL,2),(100,'2021-01-12',NULL,2)
    ,(100,'2021-01-13',NULL,2),(100,'2021-01-14',NULL,2),(100,'2021-01-15',NULL,1),(100,'2021-01-16',NULL,1)
    ,(100,'2021-01-17',NULL,1),(100,'2021-01-18',NULL,1),(100,'2021-01-19',NULL,1),(100,'2021-01-20',NULL,1)
    ,(100,'2021-01-21',NULL,1),(100,'2021-01-22',NULL,2),(100,'2021-01-23',NULL,2),(100,'2021-01-24',NULL,2)
    ,(100,'2021-01-25',NULL,2),(100,'2021-01-26',NULL,2),(100,'2021-01-27',NULL,2),(100,'2021-01-28',NULL,2);
    
    
    -- MAYBE ??
    SELECT
      emp_id,
      dates,
      day_one,
      week_num_expected,
      CASE
        WHEN (ROW_NUMBER() OVER (ORDER BY dates ASC) - 1) / 7 = 0
        OR
        ((ROW_NUMBER() OVER (ORDER BY dates ASC) - 1) / 7) % 2 = 0 
        THEN 1
        ELSE 2
      END AS week_num_by_code
    
    FROM #table1
    
    DROP TABLE #table1