Search code examples
sql-servert-sqlnullinformation-schema

T SQL Top 10 list of NULL Columns from a table


I'm using SQL Server 2014. I have a table with multiple columns in. Is it possible to dynamically list the top 10 columns within the table and the percent of the records in each column that is NULL? I do not want to hard code each column name in.

So the result set would be like:

Column       Percent_Null
ABC           100
QWE           75
REW           65
TRW           50

I've started with the below, but how I do get this in a list? Ultimately the results need to go into a table for reporting.

DECLARE @SQL NVARCHAR(MAX)

SELECT @SQL = STUFF((

SELECT ', CAST(SUM(CASE WHEN ' + Quotename(C.COLUMN_NAME) + ' IS NULL THEN 1 ELSE 0 END) * 100.00 
/@TotalCount AS INT) AS [' + C.COLUMN_NAME + ' NULL %]
'
            FROM INFORMATION_SCHEMA.COLUMNS C
            WHERE TABLE_NAME = 'tblMYTable'
                AND TABLE_SCHEMA = 'dbo'
            ORDER BY C.ORDINAL_POSITION
            FOR XML PATH('')
                ,type
            ).value('.', 'nvarchar(max)'), 1, 2, '')


            PRINT @SQL

Solution

  • Blargh, this is ugly, but it appears to do what you're after:

    DECLARE @Table sysname = N'icp_yyclient',
            @Schema sysname = N'dbo';
    
    DECLARE @SQL nvarchar(MAX),
            @CRLF nchar(2) = NCHAR(13) + NCHAR(10);
    
    --First get a Count, which will be one column per column    
    SET @SQL = N'WITH Counts AS(' + @CRLF +
               N'    SELECT ' + 
               STUFF((SELECT N',' + @CRLF +
                             N'           (COUNT(CASE WHEN ' + QUOTENAME(C.[name]) + N' IS NULL THEN 1 END) * 1.) / COUNT(*) AS ' + QUOTENAME(c.[name] + N'_PercNULL')
                      FROM sys.schemas s
                           JOIN sys.tables t ON s.schema_id = t.schema_id
                           JOIN sys.columns c ON t.object_id = c.object_id
                      WHERE s.name = @Schema
                        AND t.name = @Table
                      ORDER BY c.column_id
                      FOR XML PATH(N''),TYPE).value('.','nvarchar(MAX)'),1,14,N'') + @CRLF +
               N'    FROM ' + QUOTENAME(@Schema) + N'.' + QUOTENAME(@Table) + N')' + @CRLF +
    --Now that we have a COUNT of column, we need to unpivot the data
               N'SELECT TOP(10) V.*' + @CRLF +
               N'FROM Counts C' + @CRLF +
               N'     CROSS APPLY(VALUES' +
               STUFF((SELECT N',' + @CRLF +
                             N'                       (N' + QUOTENAME(C.[name],'''') + N', C.' + QUOTENAME(c.[name] + N'_PercNULL') + N')'
                      FROM sys.schemas s
                           JOIN sys.tables t ON s.schema_id = t.schema_id
                           JOIN sys.columns c ON t.object_id = c.object_id
                      WHERE s.name = @Schema
                        AND t.name = @Table
                      ORDER BY c.column_id
                      FOR XML PATH(N''),TYPE).value('.','nvarchar(MAX)'),1,26,N'') + N')V(ColumnName,ColumnPercNULL)' + @CRLF +
              N'ORDER BY V.ColumnPercNULL DESC;';
    
    --Uncommon to see the "mess". I strongly suggest ensuring you have retain CR/LF on in SSMS
    --SELECT @SQL; --PRINT is limited to 4,000 characters, so probably too small.
    
    EXEC sp_executesql @SQL;
    

    For a table I have, (called Asset) this creates the following statement:

    WITH Counts AS(
        SELECT (COUNT(CASE WHEN [AssetID] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [AssetID_PercNULL],
               (COUNT(CASE WHEN [AssetName] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [AssetName_PercNULL],
               (COUNT(CASE WHEN [TypeID] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [TypeID_PercNULL],
               (COUNT(CASE WHEN [SerialNumber] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [SerialNumber_PercNULL],
               (COUNT(CASE WHEN [BarcodeNumber] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [BarcodeNumber_PercNULL],
               (COUNT(CASE WHEN [Manufacturer] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [Manufacturer_PercNULL],
               (COUNT(CASE WHEN [Model] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [Model_PercNULL],
               (COUNT(CASE WHEN [LocationID] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [LocationID_PercNULL],
               (COUNT(CASE WHEN [IPAddress] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [IPAddress_PercNULL],
               (COUNT(CASE WHEN [PurchaseDate] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [PurchaseDate_PercNULL],
               (COUNT(CASE WHEN [SetupDate] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [SetupDate_PercNULL],
               (COUNT(CASE WHEN [DecommissionDate] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [DecommissionDate_PercNULL],
               (COUNT(CASE WHEN [ReasonID] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [ReasonID_PercNULL],
               (COUNT(CASE WHEN [DecommissionBy] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [DecommissionBy_PercNULL],
               (COUNT(CASE WHEN [InvoiceID] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [InvoiceID_PercNULL],
               (COUNT(CASE WHEN [IsVirtual] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [IsVirtual_PercNULL],
               (COUNT(CASE WHEN [HostAssetID] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [HostAssetID_PercNULL],
               (COUNT(CASE WHEN [DestroyDate] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [DestroyDate_PercNULL],
               (COUNT(CASE WHEN [DestroyID] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [DestroyID_PercNULL],
               (COUNT(CASE WHEN [DestroyedBy] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [DestroyedBy_PercNULL],
               (COUNT(CASE WHEN [ValueExVAT] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [ValueExVAT_PercNULL],
               (COUNT(CASE WHEN [VATValue] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [VATValue_PercNULL],
               (COUNT(CASE WHEN [ValueIncVAT] IS NULL THEN 1 END) * 1.) / COUNT(*) AS [ValueIncVAT_PercNULL]
        FROM [app].[Asset])
    SELECT TOP(10) V.*
    FROM Counts C
         CROSS APPLY(VALUES(N'AssetID', C.[AssetID_PercNULL]),
                           (N'AssetName', C.[AssetName_PercNULL]),
                           (N'TypeID', C.[TypeID_PercNULL]),
                           (N'SerialNumber', C.[SerialNumber_PercNULL]),
                           (N'BarcodeNumber', C.[BarcodeNumber_PercNULL]),
                           (N'Manufacturer', C.[Manufacturer_PercNULL]),
                           (N'Model', C.[Model_PercNULL]),
                           (N'LocationID', C.[LocationID_PercNULL]),
                           (N'IPAddress', C.[IPAddress_PercNULL]),
                           (N'PurchaseDate', C.[PurchaseDate_PercNULL]),
                           (N'SetupDate', C.[SetupDate_PercNULL]),
                           (N'DecommissionDate', C.[DecommissionDate_PercNULL]),
                           (N'ReasonID', C.[ReasonID_PercNULL]),
                           (N'DecommissionBy', C.[DecommissionBy_PercNULL]),
                           (N'InvoiceID', C.[InvoiceID_PercNULL]),
                           (N'IsVirtual', C.[IsVirtual_PercNULL]),
                           (N'HostAssetID', C.[HostAssetID_PercNULL]),
                           (N'DestroyDate', C.[DestroyDate_PercNULL]),
                           (N'DestroyID', C.[DestroyID_PercNULL]),
                           (N'DestroyedBy', C.[DestroyedBy_PercNULL]),
                           (N'ValueExVAT', C.[ValueExVAT_PercNULL]),
                           (N'VATValue', C.[VATValue_PercNULL]),
                           (N'ValueIncVAT', C.[ValueIncVAT_PercNULL]))V(ColumnName,ColumnPercNULL)
    ORDER BY V.ColumnPercNULL DESC;
    

    This should give you some insight into how it works out the result.