Search code examples
rdplyrgroup-bynest

Using dplyr to nest or group two variables, then perform the Cronbach's alpha function or other statistics to the data


In psychology, this kind of dataset presented below is pretty common

original ds

I would like to group all age (variable = quest), than group all scales (com_a4_1:com_a4_6; and gm_a4_1:gm_a4_6, etc) and then apply a reliability function to the data (psych::alpha).

I successfully create this syntax

d %>% 
  select(quest,contains("_a4_")) %>% #get the data
  group_by(quest) %>%  #group by all age interval
  do(alpha(.)$total)

result

However, I'm not being able to "sub" nest using the scales' items.

As far as I imagine, I'll have to pivot my data and then group or nest. However, I'm not having any success at this point. My expected result is something similar to this image below. There are "two nested results". The first result is grouped with the scale (ex: com_a4_1:com_a4_6) and the second is grouped with the age (quest)

expected output

Fake data and codes are below

library(psych)
library(tidyverse)
d %>% 
  select(quest,contains("_a4_")) %>% #get the data
  group_by(quest) %>%  #group by all age interval
  do(alpha(.)$total)


d <-structure(list(quest = c(6, 4, 2, 4, 2, 6, 2, 4, 2, 2, 4, 2, 
                             6, 4, 4, 2, 2, 4, 2, 6, 2, 2, 4, 6, 6, 4, 4, 4, 2, 6, 4, 2, 6, 
                             4, 6, 2, 2, 4, 6, 4, 2), com_a4_1 = c(10, 0, 10, 10, 5, 10, 5, 
                                                                   10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 10, 10, 0, 10, 
                                                                   10, 10, 10, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 5, 10, 10, 
                                                                   10, 10), com_a4_2 = c(10, 10, 5, 10, 10, 5, 10, 10, 10, 10, 10, 
                                                                                         10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 5, 10, 10, 10, 10, 5, 
                                                                                         10, 10, 10, 5, 0, 10, 10, 10, 10, 0, 10, 10, 10, 10), com_a4_3 = c(10, 
                                                                                                                                                            5, 0, 5, 10, 5, 5, 10, 10, 10, 10, 10, 5, 5, 10, 10, 5, 10, 10, 
                                                                                                                                                            10, 10, 5, 5, 10, 10, 5, 5, 10, 10, 10, 10, 5, 10, 10, 10, 10, 
                                                                                                                                                            0, 10, 5, 10, 10), com_a4_4 = c(10, 0, 0, 10, 5, 10, 10, 10, 
                                                                                                                                                                                            10, 5, 5, 10, 10, 5, 10, 10, 5, 10, 10, 10, 10, 5, 10, 10, 10, 
                                                                                                                                                                                            10, 0, 10, 5, 10, 10, 10, 10, 10, 10, 10, 5, 10, 10, 10, 10), 
                   com_a4_5 = c(10, 0, 0, 5, 0, 10, 5, 10, 10, 5, 10, 10, 0, 
                                10, 10, 10, 0, 10, 5, 10, 0, 0, 10, 0, 10, 10, 10, 10, 5, 
                                0, 10, 5, 5, 10, 10, 10, 0, 10, 10, 10, 10), com_a4_6 = c(5, 
                                                                                          10, 0, 10, 10, 5, 10, 10, 10, 0, 10, 10, 5, 10, 10, 10, 10, 
                                                                                          10, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 5, 10, 5, 10, 
                                                                                          5, 10, 5, 10, 0, 10, 5, 10, 10), gm_a4_1 = c(10, 10, 10, 
                                                                                                                                       10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                                                                                                                                       10, 10, 10, 10, 10, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 
                                                                                                                                       10, 10, 10, 10, 10, 10, 10, 10), gm_a4_2 = c(10, 10, 10, 
                                                                                                                                                                                    10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 10, 10, 10, 10, 10, 
                                                                                                                                                                                    10, 10, 10, 10, 10, 10, 10, 5, 5, 10, 10, 10, 0, 10, 10, 
                                                                                                                                                                                    5, 10, 10, 5, 10, 10, 10, 10), gm_a4_3 = c(10, 10, 10, 10, 
                                                                                                                                                                                                                               10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                                                                                                                                                                                                                               10, 10, 10, 10, 10, 10, 0, 0, 10, 10, 10, 0, 10, 10, 10, 
                                                                                                                                                                                                                               10, 10, 5, 10, 10, 10, 10), gm_a4_4 = c(0, 5, 10, 10, 10, 
                                                                                                                                                                                                                                                                       10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 
                                                                                                                                                                                                                                                                       10, 10, 10, 10, 10, 0, 0, 10, 10, 10, 0, 10, 5, 5, 5, 10, 
                                                                                                                                                                                                                                                                       10, 10, 10, 10, 10), gm_a4_5 = c(10, 10, 10, 10, 10, 10, 
                                                                                                                                                                                                                                                                                                        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                                                                                                                                                                                                                                                                                                        10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 10, 10, 10, 5, 10, 
                                                                                                                                                                                                                                                                                                        5, 10, 10, 10, 10), gm_a4_6 = c(0, 10, 5, 5, 10, 5, 5, 10, 
                                                                                                                                                                                                                                                                                                                                        10, 5, 10, 10, 0, 10, 10, 10, 5, 10, 5, 10, 10, 10, 10, 0, 
                                                                                                                                                                                                                                                                                                                                        10, 10, 10, 10, 10, 0, 10, 10, 10, 10, 0, 10, 0, 10, 10, 
                                                                                                                                                                                                                                                                                                                                        10, 10), fm_a4_1 = c(10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 
                                                                                                                                                                                                                                                                                                                                                             10, 10, 10, 10, 10, 5, 10, 10, 10, 10, 5, 0, 10, 10, 0, 5, 
                                                                                                                                                                                                                                                                                                                                                             10, 10, 10, 10, 5, 5, 10, 10, 5, 5, 10, 10, 10, 10, 10), 
                   fm_a4_2 = c(10, 10, 10, 10, 0, 10, 10, 10, 10, 10, 10, 10, 
                               10, 10, 10, 10, 10, 10, 10, 5, 10, 10, 10, 10, 10, 10, 5, 
                               10, 10, 5, 10, 10, 10, 10, 5, 10, 10, 10, 10, 10, 10), fm_a4_3 = c(0, 
                                                                                                  5, 10, 10, 5, 10, 5, 10, 10, 10, 10, 10, 5, 10, 5, 5, 5, 
                                                                                                  10, 10, 5, 0, 10, 5, 10, 5, 10, 10, 0, 10, 10, 5, 10, 10, 
                                                                                                  10, 0, 10, 0, 10, 10, 10, 10), fm_a4_4 = c(10, 5, 10, 10, 
                                                                                                                                             10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                                                                                                                                             10, 10, 10, 5, 10, 10, 10, 5, 10, 10, 10, 0, 10, 10, 10, 
                                                                                                                                             10, 10, 0, 10, 10, 10, 10), fm_a4_5 = c(0, 5, 10, 10, 10, 
                                                                                                                                                                                     0, 10, 10, 10, 10, 10, 10, 0, 10, 10, 5, 10, 10, 5, 0, 10, 
                                                                                                                                                                                     10, 10, 10, 10, 10, 5, 10, 10, 0, 5, 10, 0, 10, 0, 5, 5, 
                                                                                                                                                                                     5, 10, 10, 10), fm_a4_6 = c(10, 5, 5, 0, 0, 5, 10, 10, 10, 
                                                                                                                                                                                                                 0, 10, 10, 5, 10, 10, 10, 0, 10, 0, 10, 10, 0, 10, 10, 5, 
                                                                                                                                                                                                                 0, 0, 10, 10, 10, 0, 10, 10, 5, 5, 10, 0, 0, 10, 10, 5), 
                   cg_a4_1 = c(10, 5, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 
                               10, 10, 10, 10, 10, 10, 10, 10, 0, 10, 10, 10, 10, 5, 0, 
                               10, 10, 10, 10, 5, 10, 10, 10, 10, 5, 5, 10, 10, 10), cg_a4_2 = c(5, 
                                                                                                 10, 10, 5, 10, 5, 10, 10, 10, 10, 10, 10, 5, 10, 10, 10, 
                                                                                                 10, 10, 10, 5, 10, 10, 10, 10, 10, 5, 10, 10, 10, 10, 10, 
                                                                                                 10, 10, 10, 10, 10, 10, 10, 10, 10, 10), cg_a4_3 = c(10, 
                                                                                                                                                      10, 5, 10, 10, 10, 10, 10, 10, 5, 10, 10, 5, 10, 10, 10, 
                                                                                                                                                      5, 10, 10, 10, 10, 0, 10, 10, 5, 10, 5, 10, 10, 10, 5, 10, 
                                                                                                                                                      10, 10, 10, 10, 5, 10, 10, 10, 10), cg_a4_4 = c(10, 10, 0, 
                                                                                                                                                                                                      5, 5, 5, 10, 10, 10, 5, 10, 10, 0, 5, 10, 10, 5, 10, 10, 
                                                                                                                                                                                                      10, 10, 0, 5, 10, 10, 5, 0, 0, 10, 10, 0, 10, 0, 10, 10, 
                                                                                                                                                                                                      5, 0, 5, 5, 10, 10), cg_a4_5 = c(5, 0, 0, 5, 0, 10, 5, 10, 
                                                                                                                                                                                                                                       10, 0, 10, 10, 10, 10, 5, 10, 0, 10, 0, 10, 0, 0, 10, 10, 
                                                                                                                                                                                                                                       5, 10, 5, 10, 5, 5, 5, 0, 10, 10, 5, 10, 0, 10, 10, 10, 10
                                                                                                                                                                                                      ), cg_a4_6 = c(0, 0, 5, 10, 10, 10, 10, 10, 0, 10, 5, 10, 
                                                                                                                                                                                                                     10, 10, 5, 10, 10, 10, 10, 10, 5, 10, 10, 10, 10, 5, 5, 10, 
                                                                                                                                                                                                                     5, 10, 0, 10, 10, 5, 5, 10, 5, 10, 10, 10, 10), ps_a4_1 = c(10, 
                                                                                                                                                                                                                                                                                 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                                                                                                                                                                                                                                                                                 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                                                                                                                                                                                                                                                                                 10, 10, 5, 5, 10, 5, 10, 10, 10, 10), ps_a4_2 = c(0, 10, 
                                                                                                                                                                                                                                                                                                                                   10, 10, 5, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                                                                                                                                                                                                                                                                                                                                   10, 5, 10, 5, 10, 10, 10, 5, 10, 10, 10, 5, 0, 10, 10, 10, 
                                                                                                                                                                                                                                                                                                                                   5, 0, 10, 5, 10, 10, 10, 10), ps_a4_3 = c(10, 0, 10, 5, 5, 
                                                                                                                                                                                                                                                                                                                                                                             10, 5, 10, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                                                                                                                                                                                                                                                                                                                                                                             5, 10, 10, 10, 5, 10, 10, 10, 5, 10, 10, 10, 10, 5, 0, 5, 
                                                                                                                                                                                                                                                                                                                                                                             0, 10, 5, 10, 10), ps_a4_4 = c(10, 10, 10, 10, 5, 10, 5, 
                                                                                                                                                                                                                                                                                                                                                                                                            10, 10, 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 10, 
                                                                                                                                                                                                                                                                                                                                                                                                            10, 10, 10, 10, 10, 10, 5, 10, 5, 10, 10, 10, 10, 5, 5, 10, 
                                                                                                                                                                                                                                                                                                                                                                                                            10, 10, 10), ps_a4_5 = c(5, 5, 10, 5, 10, 5, 10, 10, 0, 0, 
                                                                                                                                                                                                                                                                                                                                                                                                                                     10, 10, 5, 10, 10, 10, 10, 10, 0, 10, 5, 5, 5, 10, 0, 10, 
                                                                                                                                                                                                                                                                                                                                                                                                                                     5, 10, 5, 0, 10, 10, 10, 10, 0, 5, 0, 5, 10, 10, 5), ps_a4_6 = c(5, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      5, 0, 5, 0, 10, 0, 10, 5, 5, 10, 10, 5, 10, 10, 10, 0, 10, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      5, 10, 5, 0, 5, 10, 5, 10, 5, 0, 5, 10, 0, 0, 10, 5, 0, 5, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      0, 10, 10, 10, 10)), row.names = c(NA, -41L), class = "data.frame")

Solution

  • I followed your idea of pivoting longer, using pivot_longer() from tidyr to put the scale groups in rows but leave the items in columns. (The last two examples in the documentation for pivot_longer() are my go-to when trying to remember how to do this.)

    However, this relies on you having the same number of items for each scale; I'm not sure how it will hold up for varying items per scale.

    Once things are in a longer form, use a nest_by() on quest and the scales variable followed by mutate() to nest and calculate the alpha for each row.

    I didn't paste all the warnings and messages here, but there were loads. You can also remove the data column at the end if you don't need it any longer.

    library(psych)
    library(dplyr)
    library(tidyr)
    
    d %>%
        pivot_longer(cols = -quest,
                     names_to = c("scale", ".value"),
                     names_pattern = "(\\w+_\\w+_)(.)") %>%
        nest_by(quest, scale) %>%
        mutate(alpha(data)$total)
    
    #> # A tibble: 15 x 12
    #> # Rowwise:  quest, name
    #>    quest name       data raw_alpha std.alpha `G6(smc)` average_r    `S/N`    ase
    #>    <dbl> <chr>  <list<t>     <dbl>     <dbl>     <dbl>     <dbl>    <dbl>  <dbl>
    #>  1     2 cg_a4_ [16 x 6]     0.619   0.594      0.728    0.226    1.46    0.141 
    #>  2     2 com_a~ [16 x 6]     0.810   0.808      0.881    0.412    4.20    0.0719
    #>  3     2 fm_a4_ [16 x 6]     0.400   0.421      0.546    0.108    0.728   0.221 
    #>  4     2 gm_a4_ [16 x 6]     0.842   0.952      0.745    0.831   19.7     0.0592
    #>  5     2 ps_a4_ [16 x 6]     0.684   0.753      0.870    0.337    3.05    0.123 
    #>  6     4 cg_a4_ [15 x 6]     0.677   0.696      0.807    0.276    2.29    0.126 
    #>  7     4 com_a~ [15 x 6]     0.673   0.613      0.842    0.209    1.58    0.110 
    #>  8     4 fm_a4_ [15 x 6]     0.669   0.714      0.811    0.294    2.50    0.124 
    #>  9     4 gm_a4_ [15 x 6]     0.811   0.759      0.873    0.386    3.15    0.0389
    #> 10     4 ps_a4_ [15 x 6]     0.533   0.551      0.605    0.170    1.23    0.161 
    #> 11     6 cg_a4_ [10 x 6]    -0.168  -0.00601    0.550   -0.00120 -0.00597 0.621 
    #> 12     6 com_a~ [10 x 6]    -0.184   0.228      0.486    0.0686   0.295   0.644 
    #> 13     6 fm_a4_ [10 x 6]     0.508   0.542      0.727    0.191    1.18    0.248 
    #> 14     6 gm_a4_ [10 x 6]    -0.075  -0.492     -0.0806  -0.0582  -0.330   0.398 
    #> 15     6 ps_a4_ [10 x 6]     0.844   0.879      0.903    0.592    7.26    0.0710
    #> # ... with 3 more variables: mean <dbl>, sd <dbl>, median_r <dbl>
    

    Created on 2021-09-23 by the reprex package (v2.0.0)