Search code examples
rdplyrpsych

Psych alpha() function giving error when fed dplyr results


I have a large dataset that I am trying to trim down for focus. As part of it I have three variables that are measured at five times. I've made each time into a mean so I ended up with five variables (one for each time) which I want to find the internal consistency of. When I try to calculate Cronbach's alpha using psych::alpha() I get an error.

25 rows of reproducible data:

structure(list(catme_satis1a = c(4L, 4L, 5L, 5L, 5L, NA, 1L, 
4L, 4L, 4L, 4L, 2L, 4L, 4L, 3L, 4L, 4L, 5L, 3L, 4L, 5L, 3L, 4L, 
4L, 5L), catme_satis1b = c(4L, 4L, 4L, 5L, 5L, NA, 1L, 4L, 5L, 
5L, 4L, 2L, 5L, 4L, 3L, 4L, 4L, 5L, 3L, 4L, 5L, 3L, 4L, 4L, 5L
), catme_satis1c = c(3L, 4L, 5L, 5L, 5L, NA, 1L, 4L, 3L, 4L, 
4L, 2L, 4L, 5L, 3L, 4L, 4L, 5L, 3L, 4L, 5L, 3L, 4L, 4L, 5L), 
    catme_satis2a = c(4L, 4L, 4L, 5L, 5L, NA, 5L, 4L, 5L, NA, 
    NA, 3L, NA, 4L, 3L, 4L, 4L, 5L, 3L, NA, 5L, 5L, 4L, 4L, 5L
    ), catme_satis2b = c(4L, 4L, 5L, 5L, 5L, NA, 5L, 4L, 5L, 
    NA, NA, 3L, NA, 4L, 3L, 4L, 3L, 5L, 2L, NA, 5L, 5L, 4L, 4L, 
    5L), catme_satis2c = c(4L, 4L, 5L, 5L, 5L, NA, 5L, 4L, 5L, 
    NA, NA, 3L, NA, 4L, 3L, 4L, 3L, 5L, 3L, NA, 5L, 5L, 4L, 4L, 
    5L), catme_satis3a = c(4L, 4L, 4L, 5L, 5L, 5L, 4L, 4L, 5L, 
    5L, 3L, NA, 3L, 4L, 3L, NA, 4L, 5L, 3L, 5L, 5L, 5L, 4L, 5L, 
    5L), catme_satis3b = c(4L, 4L, 4L, 5L, 5L, 5L, 3L, 4L, 5L, 
    5L, 3L, NA, 3L, 4L, 3L, NA, 4L, 5L, 3L, 5L, 5L, 5L, 4L, 5L, 
    5L), catme_satis3c = c(4L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 5L, 
    5L, 3L, NA, 3L, 4L, 3L, NA, 4L, 5L, 4L, 5L, 5L, 5L, 4L, 4L, 
    5L), catme_satis4a = c(4L, 5L, 5L, 5L, 5L, 5L, 4L, 4L, 5L, 
    4L, 3L, 3L, 3L, 4L, 3L, 4L, 4L, 4L, 4L, NA, 5L, 5L, 5L, 5L, 
    5L), catme_satis4b = c(4L, 4L, 5L, 5L, 5L, 4L, 4L, 4L, 5L, 
    4L, 3L, 3L, 2L, 4L, 3L, 4L, 5L, 5L, 4L, NA, 5L, 5L, 5L, 5L, 
    5L), catme_satis4c = c(4L, 4L, 5L, 5L, 5L, 4L, 4L, 4L, 5L, 
    3L, 3L, 3L, 2L, 4L, 3L, 5L, 4L, 4L, 4L, NA, 5L, 5L, 5L, 5L, 
    5L), catme_satis5a = c(5L, 4L, 5L, 5L, 5L, 5L, 5L, 4L, 5L, 
    4L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 5L, 5L, 1L, 5L, 
    5L), catme_satis5b = c(5L, 4L, 5L, 5L, 5L, 5L, 5L, 4L, 5L, 
    4L, 3L, 3L, 3L, 4L, 4L, 5L, 4L, 5L, 5L, 4L, 5L, 5L, 1L, 5L, 
    5L), catme_satis5c = c(5L, 4L, 5L, 5L, 5L, 5L, 5L, 4L, 5L, 
    4L, 3L, 3L, 2L, 4L, 4L, 5L, 4L, 3L, 5L, 4L, 5L, 5L, 1L, 5L, 
    5L)), class = "data.frame", row.names = c(NA, -25L), .Names = c("catme_satis1a", 
"catme_satis1b", "catme_satis1c", "catme_satis2a", "catme_satis2b", 
"catme_satis2c", "catme_satis3a", "catme_satis3b", "catme_satis3c", 
"catme_satis4a", "catme_satis4b", "catme_satis4c", "catme_satis5a", 
"catme_satis5b", "catme_satis5c"))

Next, I tried this to get the mean for columns 1:3, 4:6, etc. (but by name):

library(dplyr)
df1 <- test %>%
  rowwise() %>%
  transmute(catme_satis1 = mean(c(catme_satis1a, catme_satis1b, catme_satis1c)),
            catme_satis2 = mean(c(catme_satis2a, catme_satis2b, catme_satis2c)),
            catme_satis3 = mean(c(catme_satis3a, catme_satis3b, catme_satis3c)),
            catme_satis4 = mean(c(catme_satis4a, catme_satis4b, catme_satis4c)),
            catme_satis5 = mean(c(catme_satis5a, catme_satis5b, catme_satis5c)))

Finally, I want to know the consistency of these variables using the psych package:

library(psych)
alpha(df1)

Which gives this error:

> alpha(df1)
Error in sort.list(y) : 'x' must be atomic for 'sort.list'
Have you called 'sort' on a list?

My data frame seems to be correct when I print it, and I should be able to get the consistency of these values. Why is r throwing this error?


Solution

  • After doing some exploration I found a way to make this work. It involves the dplyr output having additional classes beyond data.frame. I created the mean columns in a different manner to keep things away from dplyr using the following code (note that this one is named df2 to facilitate comparison later on):

    df2 <- data.frame(
      catme_satis1 = apply(test[, 1:3], 1, mean),
      catme_satis2 = apply(test[, 4:6], 1, mean),
      catme_satis3 = apply(test[, 7:9], 1, mean),
      catme_satis4 = apply(test[, 10:12], 1, mean),
      catme_satis5 = apply(test[, 13:15], 1, mean)
    )
    

    The alpha(df2) command worked just fine. This inspired me to check a few things about the dataframes. The class of df1 from my original post, and df2 here is different:

    > class(df1)
    [1] "rowwise_df" "tbl_df"     "tbl"        "data.frame"
    > class(df2)
    [1] "data.frame"
    

    Also, they recognized as being completely identical unless I coerce the dplyr output to be a dataframe!

    > identical(df1, df2)
    [1] FALSE
    > identical(as.data.frame(df1), df2)
    [1] TRUE
    

    Running the command alpha(as.data.frame(df1)) works and produces identical results. There are two solutions here:

    1. Use non dplyr methods to get the mean data. This keeps the data as a data.frame classed object.
    2. Use as.data.frame() to coerce the object into the right class when running the alpha() function. Or add %>% as.data.frame() to the end of the dplyr mutate command.