I have data like this:
> head(dat_oe)
subjects PHENO AGE SEX mean_HBA1C DDuration REN_INSF C1
1 0_1_K05914 1 -1.0912233 2 0.15392621 2.5936581 0 -0.00389484
2 0_2_K06757 2 -0.2053317 1 -0.30112172 0.8075640 0 0.00760754
3 0_3_K06768 NA 0.0000000 NA -4.54101273 0.0000000 NA 0.00268124
4 0_4_K07479 1 1.2711544 1 -0.09165522 1.6917690 0 -0.00302735
5 0_5_K05811 1 -0.6482775 1 -0.19277698 0.8606163 0 0.00546419
6 0_6_K06786 1 0.8282086 1 0.24782498 0.3300932 0 0.00246136
C2 C3 C4 C5 C6 C7
1 5.64273e-04 0.010587900 0.00523132 -0.006524870 0.000303767 -0.002924090
2 -2.54041e-04 0.000225929 0.00701527 0.003650010 -0.001307740 0.004370730
3 1.02475e-03 0.003974150 0.01028650 -0.000763843 0.002334700 -0.009816260
4 1.64945e-03 -0.005755190 0.00229313 0.002421120 -0.002145760 -0.005601280
5 -2.87371e-03 -0.013499400 0.00587083 0.013513000 0.010422300 0.004475680
6 -6.49155e-05 0.002050810 -0.00726134 -0.002068480 0.005923370 0.000567228
and I am doing this:
library(plyr)
perPart <- ddply(dat_oe, .(subjects), plyr::summarise,
c.C1 = mean(c.C1),
c.C2 = mean(c.C2))
And I am getting this error:
Error in mean(c.C1) : object 'c.C1' not found
I am using R 3.6.3. Any idea how I can rewrite this code so that it works?
The column name is 'C1' and 'C2' and not c.C1
and c.C2
plyr::ddply(dat_oe, c("subjects"), plyr::summarise,
C1 = mean(C1), C2 = mean(C2))
-output
# subjects C1 C2
#1 0_1_K05914 -0.00389484 5.64273e-04
#2 0_2_K06757 0.00760754 -2.54041e-04
#3 0_3_K06768 0.00268124 1.02475e-03
#4 0_4_K07479 -0.00302735 1.64945e-03
#5 0_5_K05811 0.00546419 -2.87371e-03
#6 0_6_K06786 0.00246136 -6.49155e-05
For multiple columns, we can use colwise
plyr::ddply(dat_oe, c("subjects"), plyr::colwise(mean, c("C1", "C2")))
-output
# subjects C1 C2
#1 0_1_K05914 -0.00389484 5.64273e-04
#2 0_2_K06757 0.00760754 -2.54041e-04
#3 0_3_K06768 0.00268124 1.02475e-03
#4 0_4_K07479 -0.00302735 1.64945e-03
#5 0_5_K05811 0.00546419 -2.87371e-03
#6 0_6_K06786 0.00246136 -6.49155e-05
With dplyr
, we can use (version >= 1.0
)
library(dplyr)
dat_oe %>%
group_by(subjects) %>%
summarise(across(c(C1, C2), mean), .groups = 'drop')
The .(subjects)
and .(C1, C2)
should also work. We loaded dplyr
as well, so used c("subjects")
and c("C1, "C2")
dat_oe <- structure(list(subjects = c("0_1_K05914", "0_2_K06757", "0_3_K06768",
"0_4_K07479", "0_5_K05811", "0_6_K06786"), PHENO = c(1L, 2L,
NA, 1L, 1L, 1L), AGE = c(-1.0912233, -0.2053317, 0, 1.2711544,
-0.6482775, 0.8282086), SEX = c(2L, 1L, NA, 1L, 1L, 1L), mean_HBA1C = c(0.15392621,
-0.30112172, -4.54101273, -0.09165522, -0.19277698, 0.24782498
), DDuration = c(2.5936581, 0.807564, 0, 1.691769, 0.8606163,
0.3300932), REN_INSF = c(0L, 0L, NA, 0L, 0L, 0L), C1 = c(-0.00389484,
0.00760754, 0.00268124, -0.00302735, 0.00546419, 0.00246136),
C2 = c(0.000564273, -0.000254041, 0.00102475, 0.00164945,
-0.00287371, -6.49155e-05), C3 = c(0.0105879, 0.000225929,
0.00397415, -0.00575519, -0.0134994, 0.00205081), C4 = c(0.00523132,
0.00701527, 0.0102865, 0.00229313, 0.00587083, -0.00726134
), C5 = c(-0.00652487, 0.00365001, -0.000763843, 0.00242112,
0.013513, -0.00206848), C6 = c(0.000303767, -0.00130774,
0.0023347, -0.00214576, 0.0104223, 0.00592337), C7 = c(-0.00292409,
0.00437073, -0.00981626, -0.00560128, 0.00447568, 0.000567228
)), class = "data.frame", row.names = c("1", "2", "3", "4",
"5", "6"))