Search code examples
sql-servert-sqltimeline

Creating a timeline from 3 tables in SQL Server


I have 3 tables with event dates that I am trying to combine to end up with a timeline table. I'm not sure the best way to implement what I require and was hoping to get a little help.

My first table is the employees table. It contains four columns

EmployeeId, HiredDate, CommencedDate, ResignDate, and FinishDate.

When an employee is hired they get a record in the table with the EmployeeId and HiredDate columns. The table is continually updated when required.

CREATE TABLE Employees (EmployeeId INT NOT NULL, HiredDate DATE NOT NULL, CommencedDate DATE NULL, ResignDate DATE NULL, FinishDate DATE NULL)
Insert Employees Values (111, '2012-01-02', '2012-01-05', '2012-03-15', '2012-04-06')
Insert Employees Values (112, '2012-01-05', '2012-01-10', NULL, NULL)

The second table called ReportingCycles which contains reportingCycleId, startDate, and endDate columns. The number of days in a reporting cycle varies.

CREATE TABLE ReportingCycles (reportingCycleId INT NOT NULL, startDate DATE NOT NULL, endDate DATE NOT NULL)
INSERT ReportingCycles Values (1,'2011-12-03','2011-12-31')
INSERT ReportingCycles Values (2,'2012-01-01','2012-01-30')
INSERT ReportingCycles Values (3,'2012-01-31','2012-02-27')
INSERT ReportingCycles Values (4,'2012-02-28','2012-04-02')
INSERT ReportingCycles Values (5,'2012-04-03','2012-04-28')
INSERT ReportingCycles Values (6,'2012-04-29','2012-05-28')
INSERT ReportingCycles Values (7,'2012-05-29','2012-07-01')
INSERT ReportingCycles Values (8,'2012-07-02','2012-08-01')
INSERT ReportingCycles Values (9,'2012-08-02','2012-08-28')
INSERT ReportingCycles Values (10,'2012-08-29','2012-09-29')
INSERT ReportingCycles Values (11,'2012-09-30','2012-10-28')
INSERT ReportingCycles Values (12,'2012-10-29','2012-11-26')
INSERT ReportingCycles Values (13,'2012-11-27','2012-12-26')
INSERT ReportingCycles Values (14,'2012-12-27','2013-01-21')
INSERT ReportingCycles Values (15,'2013-01-22','2013-02-21')

My third table is called Duties which for each employee shows the location of the employee at certain dates. This is populated with a dummy location in the event where an employee has been hired but hasn't commenced yet. It is also possible for an employee to be in more than 1 location within a date range so an assignement value is also given. The assignment value for a date range will always sum to 1 for a single employee.

CREATE TABLE Duties (EmployeeId INT NOT NULL, LocationId INT NOT NULL, AssignmentStartDate DATE NOT NULL, AssignmentEndDate DATE NULL, AssignmentValue Decimal(18,2) NOT NULL)
INSERT Duties Values (111, 0, '2012-01-02', '2012-01-04', 1.00)
INSERT Duties Values (111, 25, '2012-01-05', '2012-01-10', 1.00)
INSERT Duties Values (111, 30, '2012-01-11', '2012-04-06', 0.25)
INSERT Duties Values (111, 31, '2012-01-11', '2012-04-06', 0.25)
INSERT Duties Values (111, 32, '2012-01-11', '2012-04-06', 0.25)
INSERT Duties Values (111, 33, '2012-01-11', '2012-04-06', 0.25)
INSERT Duties Values (112, 0, '2012-01-05', '2012-01-09', 1)
INSERT Duties Values (112, 70, '2012-01-10', NULL, 1)

Now, what I want to do is join all these tables together and end up with a timeline for each employee. The resulting table would have the following columns:

CREATE TABLE EmployeeTimelime (
fromDate DATE NOT NULL, 
toDate DATE NOT NULL, 
reportingCycleId INT NOT NULL, 
employeeId INT NOT NULL, 
LocationId INT NOT NULL, 
AssignmentValue Decimal(18,2),
Hired INT NOT NULL, 
Commenced INT NOT NULL, 
Resigned INT NOT NULL, 
Finished Int NOT NULL, 
startCycle INT NOT NULL, 
endCycle INT NOT NULL)

where the Hired, Commenced, Resigned, Finished, startCycle, and endCycle columns contain a 0 or 1 value showing what type of record it is. For employee 111, the table would look like this:

Insert EmployeeTimelime Values ('2012-01-02','2012-01-04',2,111,0,1,1,0,0,0,0,0)
Insert EmployeeTimelime Values ('2012-01-05','2012-01-10',2,111,25,1,0,1,0,0,0,0)
Insert EmployeeTimelime Values ('2012-01-11','2012-01-30',2,111,30,0.25,0,0,0,0,0,1)
Insert EmployeeTimelime Values ('2012-01-11','2012-01-30',2,111,31,0.25,0,0,0,0,0,1)
Insert EmployeeTimelime Values ('2012-01-11','2012-01-30',2,111,32,0.25,0,0,0,0,0,1)
Insert EmployeeTimelime Values ('2012-01-11','2012-01-30',2,111,33,0.25,0,0,0,0,0,1)
Insert EmployeeTimelime Values ('2012-01-31','2012-02-27',3,111,30,0.25,0,0,0,0,1,1)
Insert EmployeeTimelime Values ('2012-01-31','2012-02-27',3,111,31,0.25,0,0,0,0,1,1)
Insert EmployeeTimelime Values ('2012-01-31','2012-02-27',3,111,32,0.25,0,0,0,0,1,1)
Insert EmployeeTimelime Values ('2012-01-31','2012-02-27',3,111,33,0.25,0,0,0,0,1,1)
Insert EmployeeTimelime Values ('2012-02-28','2012-03-14',4,111,30,0.25,0,0,0,0,1,0)
Insert EmployeeTimelime Values ('2012-02-28','2012-03-14',4,111,31,0.25,0,0,0,0,1,0)
Insert EmployeeTimelime Values ('2012-02-28','2012-03-14',4,111,32,0.25,0,0,0,0,1,0)
Insert EmployeeTimelime Values ('2012-02-28','2012-03-14',4,111,33,0.25,0,0,0,0,1,0)
Insert EmployeeTimelime Values ('2012-03-15','2012-04-02',4,111,30,0.25,0,0,1,0,0,1)
Insert EmployeeTimelime Values ('2012-03-15','2012-04-02',4,111,31,0.25,0,0,1,0,0,1)
Insert EmployeeTimelime Values ('2012-03-15','2012-04-02',4,111,32,0.25,0,0,1,0,0,1)
Insert EmployeeTimelime Values ('2012-03-15','2012-04-02',4,111,33,0.25,0,0,1,0,0,1)
Insert EmployeeTimelime Values ('2012-04-03','2012-04-05',5,111,30,0.25,0,0,0,0,1,0)
Insert EmployeeTimelime Values ('2012-04-03','2012-04-05',5,111,31,0.25,0,0,0,0,1,0)
Insert EmployeeTimelime Values ('2012-04-03','2012-04-05',5,111,32,0.25,0,0,0,0,1,0)
Insert EmployeeTimelime Values ('2012-04-03','2012-04-05',5,111,33,0.25,0,0,0,0,1,0)
Insert EmployeeTimelime Values ('2012-04-06','2012-04-06',5,111,30,0.25,0,0,0,1,0,0)
Insert EmployeeTimelime Values ('2012-04-06','2012-04-06',5,111,31,0.25,0,0,0,1,0,0)
Insert EmployeeTimelime Values ('2012-04-06','2012-04-06',5,111,32,0.25,0,0,0,1,0,0)
Insert EmployeeTimelime Values ('2012-04-06','2012-04-06',5,111,33,0.25,0,0,0,1,0,0)

I started out with this query but it's not correct:

SELECT *
FROM Employees x

Join ReportingCycles y
on (y.startDate between x.HiredDate and Coalesce(x.FinishDate, '9999-12-31') 
or y.endDate between x.HiredDate and Coalesce(x.FinishDate, '9999-12-31') )

Left Join Duties z
on z.EmployeeId = x.EmployeeId 
and (z.AssignmentStartDate  between y.startDate and y.endDate 
or Coalesce(z.AssignmentEndDate, '9999-12-31') between y.startDate and y.endDate 
)

Order by x.EmployeeId, y.reportingCycleId

Thanks to anyone who can help.


Solution

  • ---------------------------------------------------
    -- Preparation of Data
    ---------------------------------------------------
    DECLARE @EmployeeTimelime table (
        fromDate DATE NOT NULL, 
        toDate DATE NOT NULL, 
        reportingCycleId INT NOT NULL, 
        employeeId INT NOT NULL, 
        LocationId INT NOT NULL, 
        AssignmentValue Decimal(18,2),
    
        -- Should be using bit data type for boolean values
        Hired INT NOT NULL,
        Commenced INT NOT NULL, 
        Resigned INT NOT NULL, 
        Finished Int NOT NULL, 
        startCycle INT NOT NULL, 
        endCycle INT NOT NULL 
    )
    
    DECLARE @Employees table (employeeId INT NOT NULL, HiredDate DATE NOT NULL, CommencedDate DATE NULL, ResignDate DATE NULL, FinishDate DATE NULL)
    INSERT @Employees Values 
    (111, '2012-01-02', '2012-01-05', '2012-03-15', '2012-04-06')
    ,(112, '2012-01-05', '2012-01-10', NULL, NULL)
    
    DECLARE @ReportingCycles table (reportingCycleId INT NOT NULL, startDate DATE NOT NULL, endDate DATE NOT NULL)
    INSERT @ReportingCycles Values 
    (1,'2011-12-03','2011-12-31')
    ,(2,'2012-01-01','2012-01-30')
    ,(3,'2012-01-31','2012-02-27')
    ,(4,'2012-02-28','2012-04-02')
    ,(5,'2012-04-03','2012-04-28')
    ,(6,'2012-04-29','2012-05-28')
    ,(7,'2012-05-29','2012-07-01')
    ,(8,'2012-07-02','2012-08-01')
    ,(9,'2012-08-02','2012-08-28')
    ,(10,'2012-08-29','2012-09-29')
    ,(11,'2012-09-30','2012-10-28')
    ,(12,'2012-10-29','2012-11-26')
    ,(13,'2012-11-27','2012-12-26')
    ,(14,'2012-12-27','2013-01-21')
    ,(15,'2013-01-22','2013-02-21')
    
    DECLARE @Duties table (EmployeeId INT NOT NULL, LocationId INT NOT NULL, AssignmentStartDate DATE NOT NULL, AssignmentEndDate DATE NULL, AssignmentValue Decimal(18,2) NOT NULL)
    INSERT @Duties Values 
    (111, 0, '2012-01-02', '2012-01-04', 1.00)
    ,(111, 25, '2012-01-05', '2012-01-10', 1.00)
    ,(111, 30, '2012-01-11', '2012-04-06', 0.25)
    ,(111, 31, '2012-01-11', '2012-04-06', 0.25)
    ,(111, 32, '2012-01-11', '2012-04-06', 0.25)
    ,(111, 33, '2012-01-11', '2012-04-06', 0.25)
    ,(112, 0, '2012-01-05', '2012-01-09', 1)
    ,(112, 70, '2012-01-10', NULL, 1)
    
    
    ---------------------------------------------------
    -- The logic lays below
    ---------------------------------------------------
    
    ;WITH dates(Value, EndDate) AS -- tally table of dates
    (
        SELECT MIN(HiredDate), MAX(ISNULL(ResignDate, GETDATE())) FROM @Employees
        UNION ALL
        SELECT DATEADD(DAY, 1, Value), EndDate FROM dates WHERE DATEADD(DAY, 1, Value) <= EndDate
    ), data AS -- Prepare the non-grouped data, with well declared relations
    (
        SELECT 
            d.Value, e.employeeId, rc.reportingCycleId, du.LocationId, du.AssignmentValue, 
            CASE WHEN e.HiredDate = d.Value THEN 1 ELSE 0 END AS HiredFlag,
            CASE WHEN e.CommencedDate = d.Value THEN 1 ELSE 0 END AS CommencedFlag,
            CASE WHEN e.ResignDate >= d.Value THEN 1 ELSE 0 END AS ResignFlag,
            CASE WHEN e.FinishDate = d.Value THEN 1 ELSE 0 END AS FinishFlag, 
            CASE WHEN rc.startDate = d.Value THEN 1 ELSE 0 END AS CycleStartFlag, 
            CASE WHEN rc.endDate = d.Value THEN 1 ELSE 0 END AS CycleEndFlag, 
            -- Magically group by continuous dates by employee and location
            ROW_NUMBER() OVER (PARTITION BY e.employeeId, du.LocationId ORDER BY Value) rn_loc, 
            DENSE_RANK() OVER (PARTITION BY e.employeeId ORDER BY Value) rn
            -- End Magic
        FROM 
            @Employees e 
            CROSS JOIN dates d
            INNER JOIN @Duties du ON d.Value BETWEEN du.AssignmentStartDate AND ISNULL(du.AssignmentEndDate, '9999-12-31') AND du.employeeId = e.employeeId
            INNER JOIN @ReportingCycles rc ON d.Value BETWEEN rc.startDate AND rc.endDate
    )
    INSERT @EmployeeTimelime
    SELECT 
        MIN(Value) AS fromDate, MAX(Value) AS toDate, reportingCycleId, employeeId, locationId, assignmentValue, 
        MAX(HiredFlag) AS Hired,
        MAX(CommencedFlag) AS Commenced,
        ResignFlag AS Resigned,
        FinishFlag AS Finished,
        MAX(CycleStartFlag) AS startCycle,
        MAX(CycleEndFlag) AS endCycle
    FROM data
    GROUP BY EmployeeId, reportingCycleId, locationId, assignmentValue, ResignFlag, FinishFlag, rn - rn_loc
    --ORDER BY fromDate, toDate, employeeId, reportingCycleId, LocationId
    OPTION (MAXRECURSION 0)
    
    SELECT * FROM @EmployeeTimelime WHERE employeeId = '111'
    

    SQL Fiddle