I am trying to take the output from my ICC calculation and create a data frame (see sample data and code below). Everything appears to work perfectly when I view the new data frame, but if you use str() it reveals that all variables are factors and my results are actually factor levels with the actual data counts for each factor level. This is particularly a problem because I would like to manipulate the new data frame.
I am relatively new to R and I cannot figure out how can I modify my code correct this problem.
Dataset:
WW_Wing_13C_Summary <- structure(list(Individual_ID = c("WW_08A_02", "WW_08A_03",
"WW_08A_04", "WW_08A_05", "WW_08A_06", "WW_08A_08", "WW_08A_09", "WW_08A_13",
"WW_08B_02", "WW_08G_01", "WW_08G_02", "WW_08G_05", "WW_08G_07",
"WW_08I_01", "WW_08I_03", "WW_08I_07", "WW_08I_12"), Region = c("South",
"South", "South", "South", "South", "South", "South", "South",
"South", "North", "North", "North", "North", "North", "North",
"North", "North"), P1 = c(-18.67, -20.06, -16.54, -20.33, -21.28,
-23.86, -21.3, -21.34, -20.87, -20.32, -19.35, -21.2, -21.61,
-18.3, -22.3, -21.6, -24.12), P2 = c(-19.16, -20.3, -15.6, -20.28,
-21.24, -23.95, -21.44, -24.13, -20.95, -20.02, -19.38, -21.29,
-21.42, -18.53, -22.2, -21.77, -24.08), P3 = c(-20.38, -21.21,
-16.61, -20.58, -21.22, -24, -21.49, -23.03, -20.76, -19.92,
-19.7, -21.85, -21.84, -19.55, -22.18, -22.17, -24), P4 = c(-20.96,
-22.9, -19.65, -20.8, -21.2, -24.16, -21.49, -21.77, -20.9, -20.05,
-19.94, -22.22, -21.68, -20.18, -22.14, -22.21, -24.2), P5 = c(-21.61,
-22.87, -20.98, -21.24, -21.47, -24.93, -21.1, -21.4, -21.02,
-20.23, -20.43, -22.34, -21.79, -20.96, -21.55, -22.24, -24.16
), P6 = c(-21.65, -21.13, -21.18, -20.94, -21.23, -24.93, -20.84,
-21.57, -20.84, -20.73, -20.08, -22.42, -21.49, -21.08, -20.85,
-22.47, -22.87), P7 = c(-21.31, -20.68, -21.7, -20.54, -21.89,
-24.48, -20.78, -21.45, -21.11, -20.91, -20.81, -22.69, -21.88,
-21.5, -23.1, -22.19, -22.51), P8 = c(-20.8, -20.58, -21.18,
-21.04, -21.89, -24.17, -21.58, -21.32, -20.64, -19.87, -20.9,
-22.75, -21.62, -17.42, -20.75, -21.89, -22.12), P9 = c(-21.28,
-20.69, -21.33, -20.42, -21.6, -23.1, -20.76, -21.59, -20.11,
-19.58, -19.24, -22.73, -21.54, -13.18, -20.9, -21.89, -22.3)),
.Names = c("Individual_ID", "Region", "P1", "P2", "P3", "P4", "P5",
"P6", "P7", "P8", "P9"), class = "data.frame", row.names = c(NA, -17L))
Code:
## split the complete dataset by regions
WW_Wing_13C_Summary_Region <- split(WW_Wing_13C_Summary, WW_Wing_13C_Summary$Region)
library(psych)
# calculate ICC for delta-13C and create a data frame with results for each region
WW_Wing_13C_ICC <- data.frame(t(sapply(WW_Wing_13C_Summary_Region,
function(temp) unlist(ICC(temp[ , c(3:11)])[c("results", "summary")]))))[ , c(49, 52, 55, 58, 61, 9, 39, 45)]
# View results
WW_Wing_13C_ICC
# View structure of dataframe
str(WW_Wing_13C_ICC)
It happens because in your function here:
WW_Wing_13C_ICC <- data.frame(t(sapply(WW_Wing_13C_Summary_Region,
function(temp) unlist(ICC(temp[ , c(3:11)])[c("results", "summary")]))))[ , c(49, 52, 55, 58, 61, 9, 39, 45)]
...(I'd probably clean that up a bit) you use unlist. When you do that you mixed character and numeric classes and R automatically converts everything to character. Then you only take the pieces you want which appear numeric but are actually character/factor.
Two fixes:
1. Rewrite you anonymous function above to pull only what you want and make sure everything is numeric
As in...
SUM <- function(temp) {
x <- ICC(temp[ , c(3:11)])
y <- x["summary"]
z <- x["results"]
sumR1 <- data.frame(y[[c(1, 1)]], check.names = FALSE)[1, ]
resR3 <- z[[1]][3, c("ICC", "lower bound", "upper bound")]
list(sum = sumR1, res = resR3)
}
sumres <- unlist(lapply(WW_Wing_13C_Summary_Region, SUM), recursive = FALSE)
DF <- data.frame(do.call(rbind, sumres[c(T, F)]),
do.call(rbind, sumres[c(F, T)]), check.names = FALSE)
rownames(DF) <- unlist(strsplit(rownames(DF) , "\\."))[c(T, F)]
DF
2. Change the classes manually:
As in...
NEW <- data.frame(apply(WW_Wing_13C_ICC, 2, as.numeric))
str(NEW)