Search code examples
rdataframedplyrsummarize

Conditional dplyr::summarize() of a data.frame in R


In my DATA below, I wonder how to summarize() the number of 6 different Ethnicities (Hispanic, AmIndian, Asian, White, Pacific, AsiaPacific) chosen ("Y") when Ethinc_overall!="B"?

library(tidyverse)

DATA <- read.table(h=TRUE,text=
"EL_Type Language Black Hispanic AmIndian Asian White Pacific AsiaPacific Ethinc_overall
 Current  English Black        Y    N     N     N       N           N              H
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        Y    N     N     N       N           N              H
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     Y     N       N           Y              M
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        Y    Y     N     Y       N           N              H
 Current  English Black        Y    Y     N     N       N           N              H
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        Y    Y     N     Y       N           N              H
 Current  English Black        Y    Y     N     Y       N           N              H
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     N     Y       N           N              M
 Current  English Black        N    Y     N     N       N           N              M
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     Y     N       N           Y              M
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        Y    Y     N     N       N           N              H
 Current  English Black        Y    N     N     N       N           N              H
 Current  English Black        Y    N     N     N       N           N              H
 Current  English Black        Y    N     N     Y       N           N              H
 Current  English Black        Y    Y     N     N       Y           Y              H
 Current  English Black        Y    Y     N     N       Y           Y              H
 Current  English Black        Y    N     N     N       N           N              H
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        Y    Y     Y     Y       Y           Y              H
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        Y    N     N     Y       N           N              H
 Current  English Black        Y    N     N     Y       N           N              H
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        Y    N     N     N       N           N              H
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        Y    N     N     Y       N           N              H
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        Y    N     N     Y       N           N              H
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        Y    N     N     N       N           N              H
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        N    N     N     N       N           N              B
 Current  English Black        Y    N     N     N       N           N              H
 Current  English Black        Y    N     N     N       N           N              H
 Current  English Black        Y    N     N     N       N           N              H
 Current  English Black        N    N     N     Y       N           N              M
 Current  English Black        N    N     N     N       N           N              B")

Solution

  • We can summarize across the wanted columns by group, and then convert the results from wide to long format:

    DATA %>% 
      filter(Ethinc_overall != "B") %>% 
      summarise(across(Hispanic:AsiaPacific, 
                       list(Y = ~ sum(. == "Y"))), .by = Ethinc_overall) %>% 
      pivot_longer(-Ethinc_overall, values_to = "count") %>% 
      separate(name, into = c("ethnicity", "Y")) %>% 
      # filter(count > 0) %>% 
      select(-Y)
    
    #> # A tibble: 12 x 3
    #>    Ethinc_overall ethnicity   count
    #>    <chr>          <chr>       <int>
    #>  1 H              Hispanic       23
    #>  2 H              AmIndian        8
    #>  3 H              Asian           1
    #>  4 H              White           9
    #>  5 H              Pacific         3
    #>  6 H              AsiaPacific     3
    #>  7 M              Hispanic        0
    #>  8 M              AmIndian        1
    #>  9 M              Asian           2
    #> 10 M              White           2
    #> 11 M              Pacific         0
    #> 12 M              AsiaPacific     2
    

    or better, start with pivot_longer and then summarise:

    DATA %>% 
      filter(Ethinc_overall != "B") %>% 
      pivot_longer(Hispanic:AsiaPacific, names_to = "ethnicity") %>% 
      summarise(count = sum(value =="Y"), .by = c(Ethinc_overall, ethnicity))
    
    #> # A tibble: 12 x 3
    #>    Ethinc_overall ethnicity   count
    #>    <chr>          <chr>       <int>
    #>  1 H              Hispanic       23
    #>  2 H              AmIndian        8
    #>  3 H              Asian           1
    #>  4 H              White           9
    #>  5 H              Pacific         3
    #>  6 H              AsiaPacific     3
    #>  7 M              Hispanic        0
    #>  8 M              AmIndian        1
    #>  9 M              Asian           2
    #> 10 M              White           2
    #> 11 M              Pacific         0
    #> 12 M              AsiaPacific     2
    

    Created on 2024-02-09 with reprex v2.0.2