Here is the data:
df<-structure(list(Subject.ID = c(168236L, 168236L, 168236L, 168236L,
168236L, 168236L, 168236L, 168236L, 168236L, 168695L, 168695L,
168695L, 168695L, 168695L, 168695L, 168695L, 168695L, 168695L,
168695L, 168695L, 168695L, 168695L, 168695L, 168695L, 168695L,
168695L, 168695L, 168695L, 168695L, 168695L, 168695L, 168695L,
168695L, 168695L, 168695L, 168695L, 168695L, 168695L, 168695L,
168696L, 168696L, 168696L, 168696L, 168696L, 168696L, 168696L,
168696L, 168696L, 168696L, 168696L, 168696L, 168696L, 168696L,
168696L, 168696L, 168696L, 168696L, 168696L, 168696L, 168914L,
168914L, 168914L, 168914L, 168914L, 168914L, 168914L, 168914L,
168914L, 169242L, 169242L, 169242L, 169242L, 169242L, 169242L,
169242L, 169242L, 169242L, 169242L, 169242L, 169242L, 169242L,
169242L, 169242L, 169242L, 169242L, 169242L, 169242L, 169242L,
169242L, 169242L, 169242L, 169242L, 169242L, 169242L, 169242L,
169242L, 169242L, 169242L, 169728L, 169728L, 169728L, 169728L,
169728L, 169728L, 169728L, 169728L, 169728L, 169728L, 169728L,
169728L, 169728L, 169728L, 169728L, 169728L, 169728L, 169728L,
169728L, 169728L, 170992L, 170992L, 170992L, 170992L, 170992L,
170992L, 170992L, 170992L, 170992L, 170992L, 170992L, 170992L,
170992L, 170992L, 170992L, 170992L, 170992L, 170992L, 170992L,
170992L, 170992L, 170992L, 170992L, 170992L, 170992L, 170992L,
170992L, 170992L, 170992L, 170992L, 172482L, 172482L, 172482L,
172482L, 172482L, 172482L, 172482L, 172482L, 172482L, 172482L,
172482L, 172482L, 172482L, 172482L, 172482L, 172482L, 172482L,
172482L, 172482L, 172482L, 172483L, 172483L, 172483L, 172483L,
172483L, 172483L, 172483L, 172483L, 172483L, 172483L, 172483L,
172483L, 172483L, 172483L, 172483L, 172483L, 172483L, 172483L,
172483L, 172483L, 172483L, 172483L, 172483L, 172483L, 172483L,
172483L, 172483L, 172483L, 172483L, 172483L, 172490L, 172490L,
172490L, 172490L, 172490L, 172490L, 172490L, 172490L, 172490L,
172490L, 172490L, 172490L, 172490L, 172490L, 172490L, 172490L,
172490L, 172490L, 172490L, 172490L, 172490L, 172490L, 172490L,
172490L, 172490L, 172490L, 172490L, 172490L, 172490L, 172490L
), Phase = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), levels = c("Run-in",
"Control", "TRE"), class = "factor"), FastingGlucose = c(83,
NA, NA, NA, NA, NA, NA, NA, NA, 96, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 91, NA, NA, NA, NA, NA, NA, NA, NA, NA, 96, NA, NA, NA,
NA, NA, NA, NA, NA, NA, 99, NA, NA, NA, NA, NA, NA, NA, NA, NA,
98, NA, NA, NA, NA, NA, NA, NA, NA, NA, 95, NA, NA, NA, NA, NA,
NA, NA, NA, 84, NA, NA, NA, NA, NA, NA, NA, NA, NA, 88, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 93, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 90, NA, NA, NA, NA, NA, NA, NA, NA, NA, 93, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 102, NA, NA, NA, NA, NA, NA, NA, NA, NA,
101, NA, NA, NA, NA, NA, NA, NA, NA, NA, 106, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 87, NA, NA, NA, NA, NA, NA, NA, NA, NA, 86,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 122, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 107, NA, NA, NA, NA, NA, NA, NA, NA, NA, 111,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 100, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 104, NA, NA, NA, NA, NA, NA, NA, NA, NA, 98,
NA, NA, NA, NA, NA, NA, NA, NA, NA), Chol = c(NA, 137, NA, NA,
NA, NA, NA, NA, NA, NA, 149, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 149, NA, NA, NA, NA, NA, NA, NA, NA, NA, 140, NA, NA, NA,
NA, NA, NA, NA, NA, NA, 203, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 181, NA, NA, NA, NA, NA, NA, NA, NA, NA, 274, NA, NA, NA,
NA, NA, NA, NA, NA, 141, NA, NA, NA, NA, NA, NA, NA, NA, NA,
123, NA, NA, NA, NA, NA, NA, NA, NA, NA, 137, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 163, NA, NA, NA, NA, NA, NA, NA, NA, NA,
152, NA, NA, NA, NA, NA, NA, NA, NA, NA, 177, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 172, NA, NA, NA, NA, NA, NA, NA, NA, NA,
167, NA, NA, NA, NA, NA, NA, NA, NA, NA, 175, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 165, NA, NA, NA, NA, NA, NA, NA, NA, NA,
207, NA, NA, NA, NA, NA, NA, NA, NA, NA, 188, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 201, NA, NA, NA, NA, NA, NA, NA, NA, NA,
186, NA, NA, NA, NA, NA, NA, NA, NA, NA, 188, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 200, NA, NA, NA, NA, NA, NA, NA, NA), Trig = c(NA,
NA, 68, NA, NA, NA, NA, NA, NA, NA, NA, 96, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 80, NA, NA, NA, NA, NA, NA, NA, NA, NA, 59, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 68, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 80, NA, NA, NA, NA, NA, NA, NA, NA, NA, 228, NA, NA,
NA, NA, NA, NA, NA, NA, 119, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 82, NA, NA, NA, NA, NA, NA, NA, NA, NA, 79, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 77, NA, NA, NA, NA, NA, NA, NA, NA, NA, 60,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 126, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 83, NA, NA, NA, NA, NA, NA, NA, NA, NA, 90, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 127, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 112, NA, NA, NA, NA, NA, NA, NA, NA, NA, 144, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 104, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 94, NA, NA, NA, NA, NA, NA, NA, NA, NA, 247, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 403, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 344, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA,
228L), class = "data.frame")
Here is an example of when the type of mapping I am trying to do works:
#Creating an object with the biomarkers names in it
## Find first biomarker column
bio.start <- which(colnames(df) == "FastingGlucose"); bio.start #Note in R, two "=="
signs are used to denote equals to
## Find last biomarker column
bio.stop <- which(colnames(df) == "Trig"); bio.stop
#select all biomarker column names between the first and last biomarker
bio <- colnames(df)[bio.start:bio.stop]
df[bio] %>% split(dfsub$Phase) %>% map(summary)
It shows all 3 variables 3 times (once for each Phase- the grouping variable).
However, when I try to do something similar with a written function instead of summary, it doesn't map things the same way
Here is the function:
summarystat <- function(x, na.rm = TRUE) {
#it takes two areguments x, and na.rm
if(na.rm){ #if na.rm is false it means no need to remove NA values. Default is TRUE
x = x[!is.na(x)]
}
my_list <- list("mean" = mean(x), "median" = median(x), "sd" = sd(x))
return(my_list)
}
I try to use it in a similar way:
df[bio] %>% split(dfsub$Phase) %>% map(summarystat)
But instead of returning 9 SETS of summary statistics (as summary does- 3 variables with 3 grouping categories- 3x3), it only returns 3 sets. No mention of the variable names. How can I alter my function to return things in a similar way
Your x
is a dataframe. This means it actually compute statistics across all values in x. We may use lapply
to avoid this:
summarystat <- function(x, na.rm = TRUE) {
#it takes two areguments x, and na.rm
my_list <- lapply(as.list(x),function(v) setNames(c(mean(v,na.rm=na.rm),median(v,na.rm=na.rm),sd(v,na.rm=na.rm)),c("mean","median","sd")))
return(my_list)
}
df[bio] %>% split(df$Phase) %>% map(summarystat)
$`Run-in`
$`Run-in`$FastingGlucose
mean median sd
97.00000 97.50000 12.40967
$`Run-in`$Chol
mean median sd
170.37500 170.00000 27.20786
$`Run-in`$Trig
mean median sd
118.12500 107.50000 59.21737
$Control
$Control$FastingGlucose
mean median sd
97.857143 96.000000 7.946248
$Control$Chol
mean median sd
162.42857 165.00000 20.79148
$Control$Trig
mean median sd
129.5714 90.0000 122.2495
$TRE
$TRE$FastingGlucose
mean median sd
96.125000 96.500000 7.827379
$TRE$Chol
mean median sd
184.37500 178.00000 44.43916
$TRE$Trig
mean median sd
139.75000 88.50000 96.70094
Or we can modify the code in summary.data.frame
:
mysummary2 <- function (object, na.rm=TRUE,...)
{
maxsum = 7L
digits = max(3L, getOption("digits") -
3L)
ncw <- function(x) {
z <- nchar(x, type = "w", allowNA = TRUE)
if (any(na <- is.na(z))) {
z[na] <- nchar(encodeString(z[na]), "b")
}
z
}
z <- lapply(X = as.list(object), FUN = function(v) setNames(c(mean(v,na.rm=na.rm),median(v,na.rm=na.rm),sd(v,na.rm=na.rm)),c("mean","median","sd")), ...)
nv <- length(object)
nm <- names(object)
lw <- numeric(nv)
nr <- if (nv)
max(vapply(z, function(x) NROW(x) + !is.null(attr(x,
"NAs")), integer(1)))
else 0
for (i in seq_len(nv)) {
sms <- z[[i]]
if (is.matrix(sms)) {
cn <- paste(nm[i], gsub("^ +", "", colnames(sms),
useBytes = TRUE), sep = ".")
tmp <- format(sms)
if (nrow(sms) < nr)
tmp <- rbind(tmp, matrix("", nr - nrow(sms),
ncol(sms)))
sms <- apply(tmp, 1L, function(x) paste(x, collapse = " "))
wid <- sapply(tmp[1L, ], nchar, type = "w")
blanks <- paste(character(max(wid)), collapse = " ")
wcn <- ncw(cn)
pad0 <- floor((wid - wcn)/2)
pad1 <- wid - wcn - pad0
cn <- paste0(substring(blanks, 1L, pad0), cn, substring(blanks,
1L, pad1))
nm[i] <- paste(cn, collapse = " ")
}
else {
sms <- format(sms, digits = digits)
lbs <- format(names(sms))
sms <- paste0(lbs, ":", sms, " ")
lw[i] <- ncw(lbs[1L])
length(sms) <- nr
}
z[[i]] <- sms
}
if (nv) {
z <- unlist(z, use.names = TRUE)
dim(z) <- c(nr, nv)
if (anyNA(lw))
warning("probably wrong encoding in names(.) of column ",
paste(which(is.na(lw)), collapse = ", "))
blanks <- paste(character(max(lw, na.rm = TRUE) + 2L),
collapse = " ")
pad <- floor(lw - ncw(nm)/2)
nm <- paste0(substring(blanks, 1, pad), nm)
dimnames(z) <- list(rep.int("", nr), nm)
}
else {
z <- character()
dim(z) <- c(nr, nv)
}
attr(z, "class") <- c("table")
z
}
df[bio] %>% split(df$Phase) %>% map(mysummary2)
$`Run-in`
FastingGlucose Chol Trig
mean :97.00 mean :170.38 mean :118.12
median:97.50 median:170.00 median:107.50
sd :12.41 sd : 27.21 sd : 59.22
$Control
FastingGlucose Chol Trig
mean :97.857 mean :162.43 mean :129.6
median:96.000 median:165.00 median: 90.0
sd : 7.946 sd : 20.79 sd :122.2
$TRE
FastingGlucose Chol Trig
mean :96.125 mean :184.38 mean :139.8
median:96.500 median:178.00 median: 88.5
sd : 7.827 sd : 44.44 sd : 96.7