Search code examples
rdataframetypeof

Identify the classes of columns of a dataset and count how many columns are included in each class


In a dataset like below I want to identify the class() of each variable and then create a dataframe with the count of every class.

df <- structure(list(`Patient MRN` = c(20238782, 20290640, 20159980, 20176641, 20302197), 
                             `Stainless Steel Crowns` = c(8, 8, 8, 7, 6), 
                             `Zirconia Crowns` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), 
                             `Canine Zirconia` = c(2, 0, 0, 0, 3), 
                             Overbite...5 = c(0.5, 0, 1, 0, 0.1), 
                             Overjet...6 = c("1mm", "0mm", "1mm", "1mm", "1mm"), 
                             `Canine overlap` = c("2mm", "1mm", "2mm", "0mm", "1mm"), 
                             `Posterior open bite` = c("N", "N", "N", "N", "N"), 
                             Overbite...9 = c(0, 0, 0.5, -0.3, -0.2), 
                             Overjet...10 = c("2mm", "0mm", "1mm", "1mm", "1mm"), 
                             `Canine Overlap...11` = c("1mm", "-1mm", "1mm", "-1mm", "1mm"), 
                             `Posterior Open Bite...12` = c("N", "N", "N", "N", "N"), 
                             Overbite...13 = c(0, 0, 1, -0.2, 0.1), 
                             Overjet...14 = c("0", "1mm", "1mm", "1mm", "1mm"), 
                             `Canine Overlap...15` = c("2mm", "1mm", "2mm", "-1mm", "1mm"), 
                             `Posterior Open Bite...16` = c("N", "N", "N", "N", "N"), 
                             `Change in Overbite` = c(0, 0, 0.5, 0.1, 0.3), 
                             `Change in Canine Overlap (mm)` = c(1, 2, 1, 0, 0), 
                             `Change Score` = c(1, 1, 2, 1, 1), 
                             Date = c("45139", "45145", "45162", "45146", "45148"), 
                             Overbite...21 = c(NA, NA, NA, NA, NA), 
                             Overjet...22 = c(NA, NA, NA, NA, NA), 
                             `Canine Overlap...23` = c(NA, NA, NA, NA, NA), 
                             `Posterior Open Bite...24` = c(NA, NA, NA, NA, NA)), 
                        row.names = c(NA, -5L), 
                        class = c("tbl_df", "tbl", "data.frame"))

# Summary of variable classes
summary_data <- summary(df)

# Creating a dataframe with counts
counts_dataframe <- data.frame(variable = names(summary_data), count = summary_data)

Solution

  • How about

    library(tidyverse)
    
    df %>% 
      summarise(across(everything(), ~ class(.))) %>% 
      pivot_longer(everything(), names_to = "Column", values_to = "Type") %>%
      group_by(Type) %>% 
      summarise(N = n())
    # A tibble: 3 × 2
      Type          N
      <chr>     <int>
    1 character    10
    2 logical       4
    3 numeric      10