Search code examples
sqlsql-servert-sqlpivot

Why does the pivot-function in SQL Server output multiple rows of data, if a distinct value in non-pivoted data is present?


I'm trying to Pivot a table. Can somebody explain me why this works as expected (results in one row of data):

-- This is what I would expect to get 
;WITH My_Data AS 
(
    SELECT  
        '4A8C72D8-F02A-44E4-8E5A-23451CB436B1' AS ENTRY_UID  
        ,'Entry 1' AS ENTRY_Lang_DE
        ,'' AS ENTRY_Lang_EN

    UNION ALL 

    SELECT  
        '5688ABC1-D2C5-4FE7-9E0A-6195CFF0563E' AS ENTRY_UID  
        ,'Entry 2' AS ENTRY_Lang_DE
        ,NULL AS ENTRY_Lang_EN
)
-- SELECT * FROM My_Data 


    SELECT
        MAX(CASE WHEN ENTRY_UID = '4A8C72D8-F02A-44E4-8E5A-23451CB436B1' THEN ENTRY_Lang_DE END) AS [4A8C72D8-F02A-44E4-8E5A-23451CB436B1],
        MAX(CASE WHEN ENTRY_UID = '5688ABC1-D2C5-4FE7-9E0A-6195CFF0563E' THEN ENTRY_Lang_DE END) AS [5688ABC1-D2C5-4FE7-9E0A-6195CFF0563E]
    FROM 
    (
        SELECT
             ENTRY_UID,
             ENTRY_Lang_DE
            ,ENTRY_Lang_EN
        FROM My_Data 
        -- WHERE ENTRY_UID <> '5688ABC1-D2C5-4FE7-9E0A-6195CFF0563E'
        -- WHERE ENTRY_UID = '5688ABC1-D2C5-4FE7-9E0A-6195CFF0563E'
        -- ORDER BY ENTRY_Lang_DE
    ) AS T_My_Data 
    ;

But the query below using the pivot-keyword creates two rows, one with NULL values, and another that is the correct output...
Why?
I mean obviously it's because ENTRY_Lang_EN is NULL and not string.empty, but why?
It creates one entry for every distinct ENTRY_Lang_EN?
ENTRY_Lang_EN is not subject to the pivotization process, why does it do that?
As one can see with the above query, the result is clearly not equal...

-- This is what PIVOT actually does ... 
;WITH My_Data AS 
(
    SELECT  
        '4A8C72D8-F02A-44E4-8E5A-23451CB436B1' AS ENTRY_UID  
        ,'Entry 1' AS ENTRY_Lang_DE
        ,'' AS ENTRY_Lang_EN

    UNION ALL 

    SELECT  
        '5688ABC1-D2C5-4FE7-9E0A-6195CFF0563E' AS ENTRY_UID  
        ,'Entry 2' AS ENTRY_Lang_DE
        ,NULL AS ENTRY_Lang_EN
)
-- SELECT * FROM My_Data 

SELECT pvt.* FROM My_Data 

PIVOT 
( 
    MAX( ENTRY_Lang_DE ) 
    FOR ENTRY_UID IN 
    ( 
         [4A8C72D8-F02A-44E4-8E5A-23451CB436B1] 
        ,[5688ABC1-D2C5-4FE7-9E0A-6195CFF0563E]
    ) 
) AS pvt 

Somehow it must be grouping on other fields, right?

I would imagine something along the lines of

-- This is what it would do if it would group 
;WITH My_Data AS 
(
    SELECT  
        '4A8C72D8-F02A-44E4-8E5A-23451CB436B1' AS ENTRY_UID  
        ,'Entry 1' AS ENTRY_Lang_DE
        ,'' AS ENTRY_Lang_EN

    UNION ALL 

    SELECT  
        '5688ABC1-D2C5-4FE7-9E0A-6195CFF0563E' AS ENTRY_UID  
        ,'Entry 2' AS ENTRY_Lang_DE
        ,'a' AS ENTRY_Lang_EN
)
SELECT
     CASE WHEN ENTRY_UID = '4A8C72D8-F02A-44E4-8E5A-23451CB436B1' THEN MAX(ENTRY_Lang_DE) END AS A
    ,CASE WHEN ENTRY_UID = '5688ABC1-D2C5-4FE7-9E0A-6195CFF0563E' THEN MAX(ENTRY_Lang_DE) END AS B 
FROM My_Data
GROUP BY ENTRY_UID
;

Except that it groups by all fields?
But if that is so, then why does it not output a row for every UID?

What exactly does the pivot-command do behind the scenes?
Or in other words, what would be the required SQL without using the pivot command (aka group by with case and max) to get to this strange result?


Solution

  • Ah, got it myselfs. What it does is NOT grouping by the columns contained in the AGGREGATE OR the FOR clause selection-source, but by all other columns.

    ;WITH My_Data AS 
    (
        SELECT  
            '4A8C72D8-F02A-44E4-8E5A-23451CB436B1' AS ENTRY_UID  
            ,'Entry 1' AS ENTRY_Lang_DE
            ,'' AS ENTRY_Lang_EN
            ,'' AS ENTRY_Lang_FR 
    
        UNION ALL 
        
        SELECT  
            '5688ABC1-D2C5-4FE7-9E0A-6195CFF0563E' AS ENTRY_UID  
            ,'Entry 2' AS ENTRY_Lang_DE
            ,NULL AS ENTRY_Lang_EN
            ,'' AS ENTRY_Lang_FR 
    
        UNION ALL 
    
        SELECT 
            '88908037-DB5A-4F43-ACAA-3863550A49B1' AS ENTRY_UID  
            ,'Entry 3' AS ENTRY_Lang_DE
            ,NULL AS ENTRY_Lang_EN
            ,'b' AS ENTRY_Lang_FR 
    
        UNION ALL 
    
        SELECT 
             '00225A88-4EF1-46D2-819D-DC130FAFEFEC' AS ENTRY_UID 
            ,'Entry 4' AS ENTRY_Lang_DE 
            ,NULL AS ENTRY_Lang_EN 
            ,'a' AS ENTRY_Lang_FR 
    )
    
    SELECT 
         ENTRY_Lang_EN 
        ,ENTRY_Lang_FR 
        ,MAX(CASE WHEN ENTRY_UID = '4A8C72D8-F02A-44E4-8E5A-23451CB436B1' THEN ENTRY_Lang_DE END) AS [4A8C72D8-F02A-44E4-8E5A-23451CB436B1] 
        ,MAX(CASE WHEN ENTRY_UID = '5688ABC1-D2C5-4FE7-9E0A-6195CFF0563E' THEN ENTRY_Lang_DE END) AS [5688ABC1-D2C5-4FE7-9E0A-6195CFF0563E]
    FROM My_Data
    GROUP BY 
        -- do not group by ENTRY_UID or ENTRY_Lang_DE 
        ENTRY_Lang_EN, ENTRY_Lang_FR 
    

    is the equivalent of

    ;WITH My_Data AS 
    (
        SELECT  
            '4A8C72D8-F02A-44E4-8E5A-23451CB436B1' AS ENTRY_UID  
            ,'Entry 1' AS ENTRY_Lang_DE
            ,'' AS ENTRY_Lang_EN
            ,'' AS ENTRY_Lang_FR 
    
        UNION ALL 
        
        SELECT  
            '5688ABC1-D2C5-4FE7-9E0A-6195CFF0563E' AS ENTRY_UID  
            ,'Entry 2' AS ENTRY_Lang_DE
            ,NULL AS ENTRY_Lang_EN
            ,'' AS ENTRY_Lang_FR 
    
        UNION ALL 
    
        SELECT 
            '88908037-DB5A-4F43-ACAA-3863550A49B1' AS ENTRY_UID  
            ,'Entry 3' AS ENTRY_Lang_DE
            ,NULL AS ENTRY_Lang_EN
            ,'b' AS ENTRY_Lang_FR 
    
        UNION ALL 
    
        SELECT 
             '00225A88-4EF1-46D2-819D-DC130FAFEFEC' AS ENTRY_UID 
            ,'Entry 4' AS ENTRY_Lang_DE 
            ,NULL AS ENTRY_Lang_EN 
            ,'a' AS ENTRY_Lang_FR 
    )
    SELECT pvt.* FROM My_Data 
    
    PIVOT 
    ( 
        MAX( ENTRY_Lang_DE ) 
        FOR ENTRY_UID IN 
        ( 
             [4A8C72D8-F02A-44E4-8E5A-23451CB436B1] 
            ,[5688ABC1-D2C5-4FE7-9E0A-6195CFF0563E]
        ) 
    ) AS pvt