Search code examples
rplyr

Object not found using ddply


I have data like this:

> head(dat_oe)
    subjects PHENO        AGE SEX  mean_HBA1C DDuration REN_INSF          C1
1 0_1_K05914     1 -1.0912233   2  0.15392621 2.5936581        0 -0.00389484
2 0_2_K06757     2 -0.2053317   1 -0.30112172 0.8075640        0  0.00760754
3 0_3_K06768    NA  0.0000000  NA -4.54101273 0.0000000       NA  0.00268124
4 0_4_K07479     1  1.2711544   1 -0.09165522 1.6917690        0 -0.00302735
5 0_5_K05811     1 -0.6482775   1 -0.19277698 0.8606163        0  0.00546419
6 0_6_K06786     1  0.8282086   1  0.24782498 0.3300932        0  0.00246136
        C2           C3          C4           C5           C6           C7
1  5.64273e-04  0.010587900  0.00523132 -0.006524870  0.000303767 -0.002924090
2 -2.54041e-04  0.000225929  0.00701527  0.003650010 -0.001307740  0.004370730
3  1.02475e-03  0.003974150  0.01028650 -0.000763843  0.002334700 -0.009816260
4  1.64945e-03 -0.005755190  0.00229313  0.002421120 -0.002145760 -0.005601280
5 -2.87371e-03 -0.013499400  0.00587083  0.013513000  0.010422300  0.004475680
6 -6.49155e-05  0.002050810 -0.00726134 -0.002068480  0.005923370  0.000567228

and I am doing this:

library(plyr)
perPart <- ddply(dat_oe, .(subjects), plyr::summarise,
             c.C1 = mean(c.C1),
             c.C2 = mean(c.C2))

And I am getting this error:

Error in mean(c.C1) : object 'c.C1' not found

I am using R 3.6.3. Any idea how I can rewrite this code so that it works?


Solution

  • The column name is 'C1' and 'C2' and not c.C1 and c.C2

    plyr::ddply(dat_oe, c("subjects"), plyr::summarise,
                             C1 = mean(C1), C2 = mean(C2))
    

    -output

    #  subjects          C1           C2
    #1 0_1_K05914 -0.00389484  5.64273e-04
    #2 0_2_K06757  0.00760754 -2.54041e-04
    #3 0_3_K06768  0.00268124  1.02475e-03
    #4 0_4_K07479 -0.00302735  1.64945e-03
    #5 0_5_K05811  0.00546419 -2.87371e-03
    #6 0_6_K06786  0.00246136 -6.49155e-05
    

    For multiple columns, we can use colwise

    plyr::ddply(dat_oe, c("subjects"), plyr::colwise(mean, c("C1", "C2")))
    

    -output

    # subjects          C1           C2
    #1 0_1_K05914 -0.00389484  5.64273e-04
    #2 0_2_K06757  0.00760754 -2.54041e-04
    #3 0_3_K06768  0.00268124  1.02475e-03
    #4 0_4_K07479 -0.00302735  1.64945e-03
    #5 0_5_K05811  0.00546419 -2.87371e-03
    #6 0_6_K06786  0.00246136 -6.49155e-05
    

    With dplyr, we can use (version >= 1.0)

    library(dplyr)
    dat_oe %>%
          group_by(subjects) %>%
          summarise(across(c(C1, C2), mean), .groups = 'drop')
    

    The .(subjects) and .(C1, C2) should also work. We loaded dplyr as well, so used c("subjects") and c("C1, "C2")

    data

    dat_oe <- structure(list(subjects = c("0_1_K05914", "0_2_K06757", "0_3_K06768", 
    "0_4_K07479", "0_5_K05811", "0_6_K06786"), PHENO = c(1L, 2L, 
    NA, 1L, 1L, 1L), AGE = c(-1.0912233, -0.2053317, 0, 1.2711544, 
    -0.6482775, 0.8282086), SEX = c(2L, 1L, NA, 1L, 1L, 1L), mean_HBA1C = c(0.15392621, 
    -0.30112172, -4.54101273, -0.09165522, -0.19277698, 0.24782498
    ), DDuration = c(2.5936581, 0.807564, 0, 1.691769, 0.8606163, 
    0.3300932), REN_INSF = c(0L, 0L, NA, 0L, 0L, 0L), C1 = c(-0.00389484, 
    0.00760754, 0.00268124, -0.00302735, 0.00546419, 0.00246136), 
        C2 = c(0.000564273, -0.000254041, 0.00102475, 0.00164945, 
        -0.00287371, -6.49155e-05), C3 = c(0.0105879, 0.000225929, 
        0.00397415, -0.00575519, -0.0134994, 0.00205081), C4 = c(0.00523132, 
        0.00701527, 0.0102865, 0.00229313, 0.00587083, -0.00726134
        ), C5 = c(-0.00652487, 0.00365001, -0.000763843, 0.00242112, 
        0.013513, -0.00206848), C6 = c(0.000303767, -0.00130774, 
        0.0023347, -0.00214576, 0.0104223, 0.00592337), C7 = c(-0.00292409, 
        0.00437073, -0.00981626, -0.00560128, 0.00447568, 0.000567228
        )), class = "data.frame", row.names = c("1", "2", "3", "4", 
    "5", "6"))