In psychology, this kind of dataset presented below is pretty common
I would like to group
all age (variable = quest
), than group all scales (com_a4_1:com_a4_6
; and gm_a4_1:gm_a4_6
, etc) and then apply a reliability function to the data (psych::alpha
).
I successfully create this syntax
d %>%
select(quest,contains("_a4_")) %>% #get the data
group_by(quest) %>% #group by all age interval
do(alpha(.)$total)
However, I'm not being able to "sub" nest using the scales' items.
As far as I imagine, I'll have to pivot my data and then group or nest. However, I'm not having any success at this point. My expected result is something similar to this image below. There are "two nested results". The first result is grouped with the scale (ex: com_a4_1:com_a4_6
) and the second is grouped with the age (quest
)
Fake data and codes are below
library(psych)
library(tidyverse)
d %>%
select(quest,contains("_a4_")) %>% #get the data
group_by(quest) %>% #group by all age interval
do(alpha(.)$total)
d <-structure(list(quest = c(6, 4, 2, 4, 2, 6, 2, 4, 2, 2, 4, 2,
6, 4, 4, 2, 2, 4, 2, 6, 2, 2, 4, 6, 6, 4, 4, 4, 2, 6, 4, 2, 6,
4, 6, 2, 2, 4, 6, 4, 2), com_a4_1 = c(10, 0, 10, 10, 5, 10, 5,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 10, 10, 0, 10,
10, 10, 10, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 5, 10, 10,
10, 10), com_a4_2 = c(10, 10, 5, 10, 10, 5, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 5, 10, 10, 10, 10, 5,
10, 10, 10, 5, 0, 10, 10, 10, 10, 0, 10, 10, 10, 10), com_a4_3 = c(10,
5, 0, 5, 10, 5, 5, 10, 10, 10, 10, 10, 5, 5, 10, 10, 5, 10, 10,
10, 10, 5, 5, 10, 10, 5, 5, 10, 10, 10, 10, 5, 10, 10, 10, 10,
0, 10, 5, 10, 10), com_a4_4 = c(10, 0, 0, 10, 5, 10, 10, 10,
10, 5, 5, 10, 10, 5, 10, 10, 5, 10, 10, 10, 10, 5, 10, 10, 10,
10, 0, 10, 5, 10, 10, 10, 10, 10, 10, 10, 5, 10, 10, 10, 10),
com_a4_5 = c(10, 0, 0, 5, 0, 10, 5, 10, 10, 5, 10, 10, 0,
10, 10, 10, 0, 10, 5, 10, 0, 0, 10, 0, 10, 10, 10, 10, 5,
0, 10, 5, 5, 10, 10, 10, 0, 10, 10, 10, 10), com_a4_6 = c(5,
10, 0, 10, 10, 5, 10, 10, 10, 0, 10, 10, 5, 10, 10, 10, 10,
10, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 5, 10, 5, 10,
5, 10, 5, 10, 0, 10, 5, 10, 10), gm_a4_1 = c(10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10), gm_a4_2 = c(10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 5, 5, 10, 10, 10, 0, 10, 10,
5, 10, 10, 5, 10, 10, 10, 10), gm_a4_3 = c(10, 10, 10, 10,
10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 0, 0, 10, 10, 10, 0, 10, 10, 10,
10, 10, 5, 10, 10, 10, 10), gm_a4_4 = c(0, 5, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 5,
10, 10, 10, 10, 10, 0, 0, 10, 10, 10, 0, 10, 5, 5, 5, 10,
10, 10, 10, 10, 10), gm_a4_5 = c(10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 10, 10, 10, 5, 10,
5, 10, 10, 10, 10), gm_a4_6 = c(0, 10, 5, 5, 10, 5, 5, 10,
10, 5, 10, 10, 0, 10, 10, 10, 5, 10, 5, 10, 10, 10, 10, 0,
10, 10, 10, 10, 10, 0, 10, 10, 10, 10, 0, 10, 0, 10, 10,
10, 10), fm_a4_1 = c(10, 5, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 5, 10, 10, 10, 10, 5, 0, 10, 10, 0, 5,
10, 10, 10, 10, 5, 5, 10, 10, 5, 5, 10, 10, 10, 10, 10),
fm_a4_2 = c(10, 10, 10, 10, 0, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 5, 10, 10, 10, 10, 10, 10, 5,
10, 10, 5, 10, 10, 10, 10, 5, 10, 10, 10, 10, 10, 10), fm_a4_3 = c(0,
5, 10, 10, 5, 10, 5, 10, 10, 10, 10, 10, 5, 10, 5, 5, 5,
10, 10, 5, 0, 10, 5, 10, 5, 10, 10, 0, 10, 10, 5, 10, 10,
10, 0, 10, 0, 10, 10, 10, 10), fm_a4_4 = c(10, 5, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 5, 10, 10, 10, 5, 10, 10, 10, 0, 10, 10, 10,
10, 10, 0, 10, 10, 10, 10), fm_a4_5 = c(0, 5, 10, 10, 10,
0, 10, 10, 10, 10, 10, 10, 0, 10, 10, 5, 10, 10, 5, 0, 10,
10, 10, 10, 10, 10, 5, 10, 10, 0, 5, 10, 0, 10, 0, 5, 5,
5, 10, 10, 10), fm_a4_6 = c(10, 5, 5, 0, 0, 5, 10, 10, 10,
0, 10, 10, 5, 10, 10, 10, 0, 10, 0, 10, 10, 0, 10, 10, 5,
0, 0, 10, 10, 10, 0, 10, 10, 5, 5, 10, 0, 0, 10, 10, 5),
cg_a4_1 = c(10, 5, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 0, 10, 10, 10, 10, 5, 0,
10, 10, 10, 10, 5, 10, 10, 10, 10, 5, 5, 10, 10, 10), cg_a4_2 = c(5,
10, 10, 5, 10, 5, 10, 10, 10, 10, 10, 10, 5, 10, 10, 10,
10, 10, 10, 5, 10, 10, 10, 10, 10, 5, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10), cg_a4_3 = c(10,
10, 5, 10, 10, 10, 10, 10, 10, 5, 10, 10, 5, 10, 10, 10,
5, 10, 10, 10, 10, 0, 10, 10, 5, 10, 5, 10, 10, 10, 5, 10,
10, 10, 10, 10, 5, 10, 10, 10, 10), cg_a4_4 = c(10, 10, 0,
5, 5, 5, 10, 10, 10, 5, 10, 10, 0, 5, 10, 10, 5, 10, 10,
10, 10, 0, 5, 10, 10, 5, 0, 0, 10, 10, 0, 10, 0, 10, 10,
5, 0, 5, 5, 10, 10), cg_a4_5 = c(5, 0, 0, 5, 0, 10, 5, 10,
10, 0, 10, 10, 10, 10, 5, 10, 0, 10, 0, 10, 0, 0, 10, 10,
5, 10, 5, 10, 5, 5, 5, 0, 10, 10, 5, 10, 0, 10, 10, 10, 10
), cg_a4_6 = c(0, 0, 5, 10, 10, 10, 10, 10, 0, 10, 5, 10,
10, 10, 5, 10, 10, 10, 10, 10, 5, 10, 10, 10, 10, 5, 5, 10,
5, 10, 0, 10, 10, 5, 5, 10, 5, 10, 10, 10, 10), ps_a4_1 = c(10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 5, 5, 10, 5, 10, 10, 10, 10), ps_a4_2 = c(0, 10,
10, 10, 5, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 5, 10, 5, 10, 10, 10, 5, 10, 10, 10, 5, 0, 10, 10, 10,
5, 0, 10, 5, 10, 10, 10, 10), ps_a4_3 = c(10, 0, 10, 5, 5,
10, 5, 10, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
5, 10, 10, 10, 5, 10, 10, 10, 5, 10, 10, 10, 10, 5, 0, 5,
0, 10, 5, 10, 10), ps_a4_4 = c(10, 10, 10, 10, 5, 10, 5,
10, 10, 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 10,
10, 10, 10, 10, 10, 10, 5, 10, 5, 10, 10, 10, 10, 5, 5, 10,
10, 10, 10), ps_a4_5 = c(5, 5, 10, 5, 10, 5, 10, 10, 0, 0,
10, 10, 5, 10, 10, 10, 10, 10, 0, 10, 5, 5, 5, 10, 0, 10,
5, 10, 5, 0, 10, 10, 10, 10, 0, 5, 0, 5, 10, 10, 5), ps_a4_6 = c(5,
5, 0, 5, 0, 10, 0, 10, 5, 5, 10, 10, 5, 10, 10, 10, 0, 10,
5, 10, 5, 0, 5, 10, 5, 10, 5, 0, 5, 10, 0, 0, 10, 5, 0, 5,
0, 10, 10, 10, 10)), row.names = c(NA, -41L), class = "data.frame")
I followed your idea of pivoting longer, using pivot_longer()
from tidyr to put the scale groups in rows but leave the items in columns. (The last two examples in the documentation for pivot_longer()
are my go-to when trying to remember how to do this.)
However, this relies on you having the same number of items for each scale; I'm not sure how it will hold up for varying items per scale.
Once things are in a longer form, use a nest_by()
on quest
and the scales variable followed by mutate()
to nest and calculate the alpha for each row.
I didn't paste all the warnings and messages here, but there were loads. You can also remove the data
column at the end if you don't need it any longer.
library(psych)
library(dplyr)
library(tidyr)
d %>%
pivot_longer(cols = -quest,
names_to = c("scale", ".value"),
names_pattern = "(\\w+_\\w+_)(.)") %>%
nest_by(quest, scale) %>%
mutate(alpha(data)$total)
#> # A tibble: 15 x 12
#> # Rowwise: quest, name
#> quest name data raw_alpha std.alpha `G6(smc)` average_r `S/N` ase
#> <dbl> <chr> <list<t> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2 cg_a4_ [16 x 6] 0.619 0.594 0.728 0.226 1.46 0.141
#> 2 2 com_a~ [16 x 6] 0.810 0.808 0.881 0.412 4.20 0.0719
#> 3 2 fm_a4_ [16 x 6] 0.400 0.421 0.546 0.108 0.728 0.221
#> 4 2 gm_a4_ [16 x 6] 0.842 0.952 0.745 0.831 19.7 0.0592
#> 5 2 ps_a4_ [16 x 6] 0.684 0.753 0.870 0.337 3.05 0.123
#> 6 4 cg_a4_ [15 x 6] 0.677 0.696 0.807 0.276 2.29 0.126
#> 7 4 com_a~ [15 x 6] 0.673 0.613 0.842 0.209 1.58 0.110
#> 8 4 fm_a4_ [15 x 6] 0.669 0.714 0.811 0.294 2.50 0.124
#> 9 4 gm_a4_ [15 x 6] 0.811 0.759 0.873 0.386 3.15 0.0389
#> 10 4 ps_a4_ [15 x 6] 0.533 0.551 0.605 0.170 1.23 0.161
#> 11 6 cg_a4_ [10 x 6] -0.168 -0.00601 0.550 -0.00120 -0.00597 0.621
#> 12 6 com_a~ [10 x 6] -0.184 0.228 0.486 0.0686 0.295 0.644
#> 13 6 fm_a4_ [10 x 6] 0.508 0.542 0.727 0.191 1.18 0.248
#> 14 6 gm_a4_ [10 x 6] -0.075 -0.492 -0.0806 -0.0582 -0.330 0.398
#> 15 6 ps_a4_ [10 x 6] 0.844 0.879 0.903 0.592 7.26 0.0710
#> # ... with 3 more variables: mean <dbl>, sd <dbl>, median_r <dbl>
Created on 2021-09-23 by the reprex package (v2.0.0)