Search code examples
sqlsql-serversql-server-2022

How to write this windowing query in T-SQL?


I have some tables to store flying data:

CREATE TABLE [dbo].[wings]
(
    [Id] [int] NOT NULL,
    [Manufacturer] [varchar](50) NOT NULL,
    [Model] [varchar](50) NULL,
    [Size] [decimal](3, 1) NULL,
    [hoursWhenBought] [tinyint] NULL,
    [purchaseDate] [date] NULL,

    CONSTRAINT [PK__wings__3214EC07E42B45BC] 
        PRIMARY KEY CLUSTERED ([Id] ASC)
)

CREATE TABLE [dbo].[wingServiceHistory]
(
    [wingId] [int] NOT NULL,
    [date] [date] NOT NULL,
    [servicedBy] [varchar](100) NOT NULL,
    [comments] [varchar](200) NULL,

    CONSTRAINT [PK_wingServiceHistory] 
        PRIMARY KEY CLUSTERED ([wingId] ASC, [date] ASC)
)

CREATE TABLE [dbo].[flights]
(
    [Id] [int] NOT NULL,
    [Date] [date] NOT NULL,
    [TakeOffTime] [time](7) NOT NULL,
    [LandingTime] [time](7) NOT NULL,
    [WingId] [int] NULL
)

CREATE CLUSTERED INDEX [ClusteredIndex-Date] 
ON [dbo].[flights] ([Date] ASC)

-- Sample data
INSERT INTO [dbo].[flights] ([Id], [Date], [TakeOffTime], [LandingTime], [WingId]) 
VALUES (1, CAST(N'2019-09-02' AS Date), CAST(N'10:00:00' AS Time), CAST(N'12:00:00' AS Time), 3)
INSERT INTO [dbo].[flights] ([Id], [Date], [TakeOffTime], [LandingTime], [WingId]) 
VALUES (2, CAST(N'2019-09-03' AS Date), CAST(N'09:30:00' AS Time), CAST(N'12:30:00' AS Time), 3)
INSERT INTO [dbo].[flights] ([Id], [Date], [TakeOffTime], [LandingTime], [WingId]) 
VALUES (3, CAST(N'2020-05-05' AS Date), CAST(N'07:00:00' AS Time), CAST(N'08:45:00' AS Time), 3)
INSERT INTO [dbo].[flights] ([Id], [Date], [TakeOffTime], [LandingTime], [WingId]) 
VALUES (4, CAST(N'2020-09-28' AS Date), CAST(N'13:00:00' AS Time), CAST(N'15:00:00' AS Time), 3)
INSERT INTO [dbo].[flights] ([Id], [Date], [TakeOffTime], [LandingTime], [WingId]) 
VALUES (5, CAST(N'2021-01-03' AS Date), CAST(N'17:00:00' AS Time), CAST(N'19:00:00' AS Time), 8)
INSERT INTO [dbo].[flights] ([Id], [Date], [TakeOffTime], [LandingTime], [WingId]) 
VALUES (6, CAST(N'2021-01-05' AS Date), CAST(N'15:30:00' AS Time), CAST(N'17:00:00' AS Time), 8)
INSERT INTO [dbo].[flights] ([Id], [Date], [TakeOffTime], [LandingTime], [WingId]) 
VALUES (7, CAST(N'2021-08-25' AS Date), CAST(N'06:00:00' AS Time), CAST(N'08:00:00' AS Time), 8)
INSERT INTO [dbo].[flights] ([Id], [Date], [TakeOffTime], [LandingTime], [WingId]) 
VALUES (8, CAST(N'2021-08-26' AS Date), CAST(N'07:00:00' AS Time), CAST(N'09:30:00' AS Time), 3)
INSERT INTO [dbo].[flights] ([Id], [Date], [TakeOffTime], [LandingTime], [WingId]) 
VALUES (9, CAST(N'2021-09-01' AS Date), CAST(N'06:00:00' AS Time), CAST(N'07:00:00' AS Time), 8)
INSERT INTO [dbo].[flights] ([Id], [Date], [TakeOffTime], [LandingTime], [WingId]) 
VALUES (10, CAST(N'2022-08-10' AS Date), CAST(N'07:00:00' AS Time), CAST(N'09:00:00' AS Time), 8)
INSERT INTO [dbo].[flights] ([Id], [Date], [TakeOffTime], [LandingTime], [WingId]) 
VALUES (11, CAST(N'2022-10-17' AS Date), CAST(N'15:00:00' AS Time), CAST(N'17:00:00' AS Time), 13)
INSERT INTO [dbo].[flights] ([Id], [Date], [TakeOffTime], [LandingTime], [WingId]) 
VALUES (12, CAST(N'2022-10-19' AS Date), CAST(N'16:00:00' AS Time), CAST(N'18:00:00' AS Time), 8)
INSERT INTO [dbo].[flights] ([Id], [Date], [TakeOffTime], [LandingTime], [WingId]) 
VALUES (13, CAST(N'2022-12-21' AS Date), CAST(N'13:00:00' AS Time), CAST(N'15:30:00' AS Time), 13)

INSERT INTO [dbo].[wings] ([Id], [Manufacturer], [Model], [Size], [hoursWhenBought], [purchaseDate]) 
VALUES (2, N'Dudek', N'Synthesis LT', CAST(31.0 AS Decimal(3, 1)), 45, CAST(N'2017-11-04' AS Date))
INSERT INTO [dbo].[wings] ([Id], [Manufacturer], [Model], [Size], [hoursWhenBought], [purchaseDate]) 
VALUES (3, N'Dudek', N'Universal 1.1', CAST(28.0 AS Decimal(3, 1)), 0, CAST(N'2019-08-23' AS Date))
INSERT INTO [dbo].[wings] ([Id], [Manufacturer], [Model], [Size], [hoursWhenBought], [purchaseDate]) 
VALUES (8, N'Dudek', N'Nucleon XX', CAST(24.0 AS Decimal(3, 1)), 150, CAST(N'2021-01-02' AS Date))
INSERT INTO [dbo].[wings] ([Id], [Manufacturer], [Model], [Size], [hoursWhenBought], [purchaseDate]) 
VALUES (13, N'Dudek', N'Hadron 3', CAST(20.0 AS Decimal(3, 1)), 3, CAST(N'2022-10-16' AS Date))

INSERT INTO [dbo].[wingServiceHistory] ([wingId], [date], [servicedBy], [comments]) 
VALUES (3, CAST(N'2020-09-21' AS Date), N'Joe Blogs', N'full trim service')
INSERT INTO [dbo].[wingServiceHistory] ([wingId], [date], [servicedBy], [comments]) 
VALUES (8, CAST(N'2021-08-24' AS Date), N'Joe Blogs', N'full trim service')
INSERT INTO [dbo].[wingServiceHistory] ([wingId], [date], [servicedBy], [comments]) 
VALUES (8, CAST(N'2022-08-03' AS Date), N'Joe Blogs', N'full trim service')

This query returns the flight duration and cumulative hours for all wings across all flights:

SELECT
    Id,
    [Date],
    CAST(DATEADD(minute, DATEDIFF(minute, [TakeOffTime], [LandingTime]), 0) AS time) AS Duration,
    CAST ((SUM(DATEDIFF(minute, [TakeOffTime], [LandingTime])) OVER (ORDER BY [Id]) / 60.0) AS DECIMAL(10, 1)) AS CumulativeHours
FROM  
    flights

I need a query that, for each flight in the flights table, ordered by flights.Id, the cumulative hours for each wingId since the last service date from wingServiceHistory or since the purchase date in wings whichever is sooner.

Or, in English, for every flight, how many hours has this wing been flown since the last service, or the number of hours since it was purchased including the hours when bought.

Results set desired is every column in the flights table plus duration, cumulativeHours, cumulativeHoursSinceService

I would expect results to look like

Id Date TakeOffTime LandingTime WingId duration (HH:MM) cumulativeHours cumulativeHoursSinceService
1 2019-09-02 10:00 12:00 3 02:00 2.0 2.0
2 2019-09-03 09:30 12:30 3 03:00 5.0 5.0
3 2020-05-05 07:00 08:45 3 01:45 6.75 1.75
4 2020-09-28 13:00 15:00 3 02:00 8.75 2.0
5 2021-01-03 17:00 19:00 8 02:00 10.75 152.0
6 2021-01-05 15:30 17:00 8 01:30 12.25 153.5
7 2021-08-25 06:00 08:00 8 02:00 14.25 2.0
8 2021-08-26 07:00 09:30 3 02:30 16.75 4.75
9 2021-09-01 06:00 07:00 8 01:00 17.75 3.0
10 2022-08-10 07:00 09:00 8 02:00 19.75 2.0
11 2022-10-17 15:00 17:00 13 02:00 21.75 5.0
12 2022-10-19 16:00 18:00 8 02:00 23.75 4.0
13 2022-12-21 13:00 15:30 13 02:30 26.25 7.5

Solution

  • A good approach for this is a couple of windowed functions to grab what you need. In this case you're also looking for a specific correlated row in the service history, so I opted for an outer apply for that.

    SELECT f.Id, f.Date, f.TakeOffTime, f.LandingTime, w.id AS WingID, CAST(DATEADD(MINUTE,DATEDIFF(MINUTE,TakeOffTime,LandingTime),'00:00:00') AS TIME) AS Duration,
    CAST(SUM(DATEDIFF(MINUTE,TakeOffTime,LandingTime)) OVER (ORDER BY f.id)/60.0 AS DECIMAL(4,2)) AS CumulativeHours,
    CAST(SUM(DATEDIFF(MINUTE,TakeOffTime,LandingTime)) OVER (PARTITION BY w.id, wsh.date ORDER BY f.id)/60.0 AS DECIMAL(4,2))+CASE WHEN wsh.date IS NULL THEN hoursWhenBought ELSE 0 END AS CumulativeHoursSinceService 
      FROM @flights f
        INNER JOIN @wings w
          ON f.WingId = w.Id
        OUTER APPLY (SELECT TOP 1 * FROM @wingServiceHistory wsh WHERE wingid = w.id and date <= f.Date ORDER BY date) wsh
     ORDER BY f.id
    
    Id Date TakeOffTime LandingTime WingID Duration CumulativeHours CumulativeHoursSinceService
    1 2019-09-02 10:00:00.0000000 12:00:00.0000000 3 02:00:00.0000000 2.00 2.00
    2 2019-09-03 09:30:00.0000000 12:30:00.0000000 3 03:00:00.0000000 5.00 5.00
    3 2020-05-05 07:00:00.0000000 08:45:00.0000000 3 01:45:00.0000000 6.75 6.75
    4 2020-09-28 13:00:00.0000000 15:00:00.0000000 3 02:00:00.0000000 8.75 2.00
    5 2021-01-03 17:00:00.0000000 19:00:00.0000000 8 02:00:00.0000000 10.75 152.00
    6 2021-01-05 15:30:00.0000000 17:00:00.0000000 8 01:30:00.0000000 12.25 153.50
    7 2021-08-25 06:00:00.0000000 08:00:00.0000000 8 02:00:00.0000000 14.25 2.00
    8 2021-08-26 07:00:00.0000000 09:30:00.0000000 3 02:30:00.0000000 16.75 4.50
    9 2021-09-01 06:00:00.0000000 07:00:00.0000000 8 01:00:00.0000000 17.75 3.00
    10 2022-08-10 07:00:00.0000000 09:00:00.0000000 8 02:00:00.0000000 19.75 5.00
    11 2022-10-17 15:00:00.0000000 17:00:00.0000000 13 02:00:00.0000000 21.75 5.00
    12 2022-10-19 16:00:00.0000000 18:00:00.0000000 8 02:00:00.0000000 23.75 7.00
    13 2022-12-21 13:00:00.0000000 15:30:00.0000000 13 02:30:00.0000000 26.25 7.50

    Windowed functions allow you to define the window they use by specifying them in the OVER clause:

    PARTITION BY - group by these things ORDER BY - in this order

    With the SUM for the cumulative hours we're asking for the running total, ordered by flight id.

    With the SUM for the hours since service we're again asking for the running total, but this time grouped by the wing id and the service date (so it starts over for each new one) and ordered by flight id.