I'm running the script below on SQL Server 2019 Developer edition. You can run this on your own SQL Server, any database is fine (no specific schema required).
At the bottom of my script I have a query with 2 grouping sets in the GROUP BY clause. The second grouping set contains a single expression as the only column in the grouping set, and I output that same expression in the SELECT clause as a column named 'Month'. The rows that are rendered as a result of this Grouping Set output this Month column as NULL.
I do not expect this. I anticipate that any column who's output is identical to a column specified in a grouping set should contain the value by which the rows of that set were grouped. In short, every row in my result set should have a value in the Month column since that column's expression is in both of my Grouping Sets.
Am I thinking about this incorrectly? Try it on your SQL Server by running this script...
The Query:
IF OBJECT_ID('tempdb..#InventoryItemBatch') IS NOT NULL
DROP TABLE #InventoryItemBatch
CREATE TABLE #InventoryItemBatch
(
[Id] [bigint] IDENTITY(1,1) NOT NULL,
[Quantity] [int] NOT NULL,
[BatchNumber] [varchar](200) NULL,
[InventoryDate] [date] NOT NULL,
[QuantityRemoved] [int] NOT NULL,
[QuantityRemaining] AS ([Quantity]-[QuantityRemoved]),
[PricePerUnit] [decimal](18, 2) NULL,
)
SET NOCOUNT ON
SET IDENTITY_INSERT #InventoryItemBatch ON
MERGE INTO #InventoryItemBatch AS Target
USING (VALUES
(144,543,'Q','1980-11-17',543,751.24)
,(1274,1,'S1','1980-08-17',0,0.00)
,(1469,609,'QA','1980-06-27',609,778.05)
,(1470,95,'QC','1980-11-24',95,887.99)
,(1473,120,'QB','1980-11-14',120,867.20)
,(1475,231,'QE','1980-05-15',231,783.09)
,(1476,109,'Q','1980-04-10',109,778.05)
,(1552,1,'S1','1980-05-28',0,3465.00)
,(3082,1,'S1','1980-05-30',0,20020.00)
,(4396,1,'S1','1980-05-31',0,3465.00)
,(4702,1,'S1','1980-08-17',0,0.00)
,(5114,1,'S1','1980-08-17',0,0.00)
,(5337,1,'S1','1978-04-07',0,1250.00)
,(5549,1,'S1','1980-08-17',0,0.00)
,(6152,1,'S1','1980-08-17',0,0.00)
,(6233,1,'S1','1980-07-22',0,0.00)
,(7616,1,'S1','1980-08-17',0,0.00)
,(9444,1,'S1','1980-04-30',0,3465.00)
,(10821,1,'S1','1980-05-21',0,1480.00)
,(11299,1,'S1','1980-09-14',0,1700.00)
,(17432,1,'S1','1980-02-18',0,1222.00)
,(20492,1,'S1','1980-02-12',0,2040.00)
,(22588,1,'S1','1980-01-01',0,0.00)
,(26842,1,'S1','1980-04-30',0,1593.00)
,(34323,1,'S1','1980-03-27',0,1219.00)
,(34334,1,'S1','1980-05-31',0,1219.00)
,(35223,1,'S1','1980-01-01',0,0.00)
,(35379,1,'S1','1980-02-18',0,1222.00)
,(36384,1,'S1','1980-04-30',0,3465.00)
,(36395,1,'S1','1980-07-13',0,1219.00)
,(43278,1,'S1','1978-06-09',0,850.00)
,(43998,1,'S1','1978-08-18',0,935.00)
,(47460,1,'S1','1978-03-15',0,2276.00)
,(47944,1,'S1','1978-04-07',0,0.00)
,(48926,1,'S1','1978-09-28',0,1350.00)
,(49166,1,'S1','1978-05-12',0,1602.00)
,(49248,1,'S1','1978-09-28',0,0.00)
,(49300,1,'S1','1978-05-12',0,0.00)
,(57492,1,'S1','1978-06-09',0,850.00)
,(57499,1,'S1','1979-06-30',0,963.00)
,(57524,1,'S1','1980-03-03',0,0.00)
) AS Source ([Id],[Quantity],[BatchNumber],[InventoryDate],[QuantityRemoved],[PricePerUnit])
ON (Target.[Id] = Source.[Id])
WHEN NOT MATCHED BY TARGET THEN
INSERT([Id],[Quantity],[BatchNumber],[InventoryDate],[QuantityRemoved],[PricePerUnit])
VALUES(Source.[Id],Source.[Quantity],Source.[BatchNumber],Source.[InventoryDate],Source.[QuantityRemoved],Source.[PricePerUnit])
;
SET IDENTITY_INSERT #InventoryItemBatch OFF
SET NOCOUNT OFF
-- offending query
SELECT
'Grouping-Month' = GROUPING(CONVERT(VARCHAR,YEAR(iib.InventoryDate)) + '.' + CONVERT(VARCHAR,FORMAT(iib.InventoryDate, 'MM')))
, 'Grouping-Id' = GROUPING(iib.Id)
, 'Grouping-DatePurchased' = GROUPING(iib.InventoryDate)
-- this column should have a value in *eveny* row but is missing values for rows produced by the second grouping set
, [Month] = CONVERT(VARCHAR,YEAR(iib.InventoryDate)) + '.' + CONVERT(VARCHAR,FORMAT(iib.InventoryDate, 'MM'))
, Id = iib.Id
, DatePurchased = iib.InventoryDate
, SumPurchaseAmt = Sum(iib.PricePerUnit * iib.Quantity)
FROM #InventoryItemBatch iib
GROUP BY
GROUPING SETS (
(
CONVERT(VARCHAR,YEAR(iib.InventoryDate)) + '.' + CONVERT(VARCHAR,FORMAT(iib.InventoryDate, 'MM'))
, iib.Id
, iib.InventoryDate
)
, (
-- this is the grouping expression that I'm not seeing a value for
CONVERT(VARCHAR,YEAR(iib.InventoryDate)) + '.' + CONVERT(VARCHAR,FORMAT(iib.InventoryDate, 'MM'))
)
)
The Partial Output:
Additional Info:
I also notice that, if you remove the "iib.InventoryDate" column from the first grouping set (and also remove it from the SELECT clause), it actually fixes the issue that I mention here... the rows that are being output by the second grouping set actually DO now contain those missing values in the Month column. Weird!! (right?)
This feels like a bug to me. Am I misunderstanding?
Rewrite the query so that [Month]
is introduced explicitly
with cte as (
select [Id] ,
[Quantity] ,
[BatchNumber] ,
[InventoryDate] ,
[QuantityRemoved] ,
[QuantityRemaining] ,
[PricePerUnit],
[Month] = CONVERT(VARCHAR,YEAR(InventoryDate)) + '.' + CONVERT(VARCHAR,FORMAT(InventoryDate, 'MM'))
from #InventoryItemBatch
)
-- offending query
SELECT
'Grouping-Month' = GROUPING([Month])
, 'Grouping-Id' = GROUPING(iib.Id)
, 'Grouping-DatePurchased' = GROUPING(iib.InventoryDate)
, [Month]
, Id = iib.Id
, DatePurchased = InventoryDate
, SumPurchaseAmt = Sum(iib.PricePerUnit * iib.Quantity)
FROM cte iib
GROUP BY
GROUPING SETS (
(
iib.Id
, iib.InventoryDate
, [Month]
)
, (
[Month]
)
);