Search code examples
mysqlloopsdynamicfrequency

Loop through columns and run frequency (count*) on each column in MySQL


I'm trying to write a script that takes a simple query like this:

select col1, count(*)
from city
group by col1
order by 2 desc

And does it for every column. Ideally it'd be nice if the results were side-by-side (can you union columns?)

EDIT:

I figured it out. It took 3.5 hours but I got there. First time using stored procedures and dynamic SQL so it was a good learning experience.

---- This stored procedure takes the database name and table name and will run a frequency on each column.

DELIMITER $$
 DROP PROCEDURE IF EXISTS Data_Audit$$
 CREATE PROCEDURE Data_Audit(db_name varchar(100), tbl_name varchar(100))
       BEGIN         
                -- set up variables
               DECLARE col_number  int;
                    DECLARE sql_code varchar(255);            
               SET col_number =  0;
               SET @total_rows = 0;
               SET @col_count = 0;


               -- Set variable to number of columns. Used for number of loops
               SELECT COUNT(*)
               into @col_count
                    FROM INFORMATION_SCHEMA.COLUMNS
                    WHERE table_schema = db_name 
                    AND table_name = tbl_name;


                    -- Set variable to number of rows           
                    set @row_count_code = concat ('set @row_count = (select count(*) FROM ',tbl_name,')');                  

                    PREPARE stmt FROM @row_count_code;
                    EXECUTE stmt;
                    DEALLOCATE PREPARE stmt; 


                    set @drop_new_table = concat('
                    drop table if exists ',
                    db_name,
                    '.',
                    tbl_name,
                    '_frequency;');


                    PREPARE stmt FROM @drop_new_table;
                    EXECUTE stmt;
                    DEALLOCATE PREPARE stmt; 

                    -- Create basic table structure 

                    set @create_new_table = concat('
                    create table ',
                    db_name,
                    '.',
                    tbl_name,
                    '_frequency',
                    ' (Column_Value varchar(100), Frequency varchar(100),Frequency_percent varchar(50))'
                    ';');   


                    -- select @create_new_table                 
                    PREPARE stmt FROM @create_new_table;
                    EXECUTE stmt;
                    DEALLOCATE PREPARE stmt;      


                    -- Loop through columns
               WHILE col_number  < @col_count DO
                            SET  col_number = col_number + 1;
                           -- SET @sql = NULL;
                          --  set @col_num = col_number;
                                    SELECT column_name 
                                     INTO @sql_code
                                      FROM INFORMATION_SCHEMA.COLUMNS
                                     WHERE table_schema = db_name
                                       AND table_name = tbl_name
                                       AND ordinal_position = col_number;


                                     -- This is the main query. Inserts each time into the new table 
                                     SET @sql = CONCAT('insert into ',
                                                             db_name,
                                                              '.',
                                                              tbl_name,
                                                              '_frequency Select ''', upper(@sql_code), ''',''Frequency'',''Frequency_percent'' UNION ALL (SELECT `',
                                                             @sql_code,
                                                             '` , count(*), concat(truncate((count(*) / ',
                                                             @row_count,
                                                             ') *100,2),''%'') as Frequency FROM ',
                                                             tbl_name,
                                                             ' group by `',
                                                             @sql_code,
                                                             '` order by 2 desc limit 30) UNION ALL Select '''','''',''''');                                


                                    PREPARE stmt FROM @sql;
                                    EXECUTE stmt;
                                    DEALLOCATE PREPARE stmt;                


              END WHILE;       

     END$$
   DELIMITER ;


call Data_Audit('world','country');

Solution

  • I figured it out. It took 3.5 hours but I got there. First time using stored procedures and dynamic SQL so it was a good learning experience.

    ---- This stored procedure takes the database name and table name and will run a frequency on each column.
    
    DELIMITER $$
     DROP PROCEDURE IF EXISTS Data_Audit$$
     CREATE PROCEDURE Data_Audit(db_name varchar(100), tbl_name varchar(100))
           BEGIN         
                    -- set up variables
                   DECLARE col_number  int;
                        DECLARE sql_code varchar(255);            
                   SET col_number =  0;
                   SET @total_rows = 0;
                   SET @col_count = 0;
    
    
                   -- Set variable to number of columns. Used for number of loops
                   SELECT COUNT(*)
                   into @col_count
                        FROM INFORMATION_SCHEMA.COLUMNS
                        WHERE table_schema = db_name 
                        AND table_name = tbl_name;
    
    
                        -- Set variable to number of rows           
                        set @row_count_code = concat ('set @row_count = (select count(*) FROM ',tbl_name,')');                  
    
                        PREPARE stmt FROM @row_count_code;
                        EXECUTE stmt;
                        DEALLOCATE PREPARE stmt; 
    
    
                        set @drop_new_table = concat('
                        drop table if exists ',
                        db_name,
                        '.',
                        tbl_name,
                        '_frequency;');
    
    
                        PREPARE stmt FROM @drop_new_table;
                        EXECUTE stmt;
                        DEALLOCATE PREPARE stmt; 
    
                        -- Create basic table structure 
    
                        set @create_new_table = concat('
                        create table ',
                        db_name,
                        '.',
                        tbl_name,
                        '_frequency',
                        ' (Column_Value varchar(100), Frequency varchar(100),Frequency_percent varchar(50))'
                        ';');   
    
    
                        -- select @create_new_table                 
                        PREPARE stmt FROM @create_new_table;
                        EXECUTE stmt;
                        DEALLOCATE PREPARE stmt;      
    
    
                        -- Loop through columns
                   WHILE col_number  < @col_count DO
                                SET  col_number = col_number + 1;
                               -- SET @sql = NULL;
                              --  set @col_num = col_number;
                                        SELECT column_name 
                                         INTO @sql_code
                                          FROM INFORMATION_SCHEMA.COLUMNS
                                         WHERE table_schema = db_name
                                           AND table_name = tbl_name
                                           AND ordinal_position = col_number;
    
    
                                         -- This is the main query. Inserts each time into the new table 
                                         SET @sql = CONCAT('insert into ',
                                                                 db_name,
                                                                  '.',
                                                                  tbl_name,
                                                                  '_frequency Select ''', upper(@sql_code), ''',''Frequency'',''Frequency_percent'' UNION ALL (SELECT `',
                                                                 @sql_code,
                                                                 '` , count(*), concat(truncate((count(*) / ',
                                                                 @row_count,
                                                                 ') *100,2),''%'') as Frequency FROM ',
                                                                 tbl_name,
                                                                 ' group by `',
                                                                 @sql_code,
                                                                 '` order by 2 desc limit 30) UNION ALL Select '''','''',''''');                                
    
    
                                        PREPARE stmt FROM @sql;
                                        EXECUTE stmt;
                                        DEALLOCATE PREPARE stmt;                
    
    
                  END WHILE;       
    
         END$$
       DELIMITER ;
    
    
    call Data_Audit('world','country');