Search code examples
sqlsql-serverdatabaseforecasting

SQL - Distribute Amount Between Dates


I'm trying to forecast an amount between two dates. The amount is evenly spread between the dates - see tables below for reference. Should (hopefully) be simple - any ideas?

The source table looks like this:

+-----------+---------------+---------------+------+--------------+----------------+
|  Project  |  Start Date   |   End Date    | Days | Total Budget | Budget Per Day |
+-----------+---------------+---------------+------+--------------+----------------+
| Project 1 | Jan. 01, 2017 | Apr. 11, 2017 |  100 |          100 |  1.00          |
| Project 2 | Feb. 05, 2017 | Apr. 06, 2017 |   60 |          200 |  3.33          |
| Project 3 | Feb. 03, 2017 | May. 03, 2017 |   89 |           50 |  0.56          |
| Project 4 | Jan. 01, 2017 | Aug. 04, 2017 |  215 |          300 |  1.40          |
+-----------+---------------+---------------+------+--------------+----------------+

The resulting table should look like this:

+-----------+---------------+--------+
|  Project  |      Day      | Budget |
+-----------+---------------+--------+
| Project 1 | Jan. 01, 2017 |  1.00  |
| Project 1 | Jan. 02, 2017 |  1.00  |
| Project 1 | Jan. 03, 2017 |  1.00  |
| Project 1 | Jan. 04, 2017 |  1.00  |
| Project 1 | Jan. 05, 2017 |  1.00  |
| …         | …             | …      |
+-----------+---------------+--------+

Side note: The goal is to then easily do different aggregations: by month, by year, etc. And eventually different distributions such as 10% of budget in first 10% of the days, etc. Any suggestions for how to best do this would be greatly appreciated.


Solution

  • Firstly, generate for yourself a Calendar table. They are extremely useful!

    CREATE TABLE dbo.tCalendar(
        Date_Value DATE PRIMARY KEY NOT NULL,
        Year AS (DATEPART(YEAR, Date_Value)) PERSISTED,
        Month AS (DATEPART(MONTH, Date_Value)) PERSISTED,
        Day AS (DATEPART(DAY, Date_Value)) PERSISTED,
        Day_Of_Year AS (DATEPART(DY, Date_Value)) PERSISTED,
    )
    
    INSERT INTO dbo.tCalendar
    SELECT *
    FROM (
        SELECT DATEADD(DAY, ROW_NUMBER() OVER(ORDER BY (SELECT 1)) - 1, '2000-01-01') AS d
        FROM        (VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1))v(n)
        CROSS JOIN (VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1))w(n)
        CROSS JOIN (VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1))x(n)
        CROSS JOIN (VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1))y(n)
    ) cal
    WHERE cal.d <= '2027-12-31'
    
    --SELECT * FROM  dbo.tCalendar
    

    Now you can simply JOIN the calendar table to get the result you require:

    CREATE TABLE #Project(
        Project VARCHAR(100) PRIMARY KEY NOT NULL,
        Start_Date DATE,
        End_Date DATE,
        Days AS (DATEDIFF(DAY, Start_Date,End_Date) + 1) PERSISTED,
        Total_Budget DECIMAL(19,2),
        Budget_Per_Day AS (CAST(Total_Budget / (DATEDIFF(DAY, Start_Date,End_Date) + 1) AS DECIMAL(19,2))) PERSISTED
    )
    
    INSERT INTO #Project (Project, Start_Date, End_Date, Total_Budget)
    VALUES
    ('Project 1', '2017-01-01', '2017-04-10', 100),
    ('Project 2', '2017-02-05', '2017-04-05', 200),
    ('Project 3', '2017-02-03', '2017-05-02', 50),
    ('Project 4', '2017-01-01', '2017-08-03', 300)
    
    SELECT *
    FROM #Project
    
    SELECT   p.Project
            ,cal.Date_Value
            ,p.Budget_Per_Day
    FROM #Project p
        INNER JOIN dbo.tCalendar cal
            ON cal.Date_Value BETWEEN p.Start_Date AND p.End_Date
    

    For more complex distributions, one solution is to have a table to store budget allocation by a date range, eg:

    CREATE TABLE #Project_Budget_Range(
        Project VARCHAR(100) NOT NULL,
        Range_Start_Date DATE NOT NULL,
        Range_End_Date DATE NOT NULL,
        Range_Days AS (DATEDIFF(DAY, Range_Start_Date, Range_End_Date) + 1) PERSISTED,
        Budget_Allocation_Value DECIMAL(19,0) NULL,
        Budget_Allocation_Pct DECIMAL(9,4) NULL,
        PRIMARY KEY (Project, Range_Start_Date),
        -- End Date >= Start Date
        CONSTRAINT CK_Range_Start_End_Date CHECK (Range_End_Date>=Range_Start_Date),
        -- Either Budget_Allocation_Value or Budget_Allocation_Pct must have a value (but not both)
        CONSTRAINT CK_Allocation_Type CHECK (Budget_Allocation_Value + Budget_Allocation_Pct IS NULL AND COALESCE(Budget_Allocation_Value,Budget_Allocation_Pct) IS NOT NULL)
    )
    
    INSERT INTO #Project_Budget_Range
    VALUES
    ('Project 1', '2017-01-01', '2017-01-31', NULL, 0.5),
    ('Project 1', '2017-02-01', '2017-02-28', 30, NULL),
    ('Project 1', '2017-03-01', '2017-04-10', 20, NULL)
    
    SELECT * FROM #Project_Budget_Range
    
    SELECT   p.Project
            ,p.Total_Budget
            ,cal.Date_Value
            ,CAST(COALESCE( 
                (b.Budget_Allocation_Pct * p.Total_Budget) / b.Range_Days, -- Allocation by Pct
                b.Budget_Allocation_Value / b.Range_Days -- Allocation by value
            ) AS DECIMAL(19,8)) AS Budget_Allocation
    FROM #Project p
        INNER JOIN #Project_Budget_Range b
            ON p.Project = b.Project
        INNER JOIN dbo.tCalendar cal
            ON cal.Date_Value BETWEEN b.Range_Start_Date AND Range_End_Date
    WHERE p.Project = 'Project 1'
    ;
    

    Sample output showing budget allocation by day