I have a dataframe called 'my_data' that looks like so:
Injury BL.Time.Delay Acute.Time.Delay Chronic.Time.Delay Acute.Area.Def Chronic.Area.Def BL.BBB Acute.BBB Chronic.BBB
1 Moderate 0.35 1.10 0.60 1.84 0.150 21 11 18
2 Severe 0.42 1.47 0.86 3.04 0.420 21 3 14
3 Moderate 0.45 1.02 0.65 1.80 0.150 21 11 18
4 Moderate 0.42 0.97 0.70 1.76 0.000 21 8 17
5 Severe 0.40 1.55 0.80 3.12 0.370 21 4 11
6 Moderate 0.37 0.96 0.65 1.65 0.240 21 9 14
7 Severe 0.32 1.64 0.75 3.34 0.400 21 2 12
8 Severe 0.40 1.44 0.90 3.24 0.298 21 1 9
9 Moderate 0.47 1.01 0.82 1.77 0.140 21 7 17
10 Moderate 0.41 0.90 0.67 1.51 0.190 21 9 15
11 Moderate 0.38 0.86 0.57 1.78 0.100 21 10 17
12 Severe 0.39 1.59 0.70 3.27 0.360 21 2 12
I would like to format the Injury factors (Moderate and Severe) to columns with its assocaited measurement per column (time delay, area deficit, etc). There are also three time points (BL, Acute, Chronic) which I would also like to factor into my final dataframe.
I ran this code for basic stats but it is not grouped by injury group (Moderate vs Severe)
updated_df <- sapply(my_data[, c(2:9)], function(my_data) c("Mean"= mean(my_data,na.rm=TRUE),
"Stand dev" = sd(my_data),
"Stand Err" = sd(my_data)/sqrt(length(my_data)),
"Median" = median(my_data),
"CoeffofVariation" = sd(my_data)/mean(my_data,na.rm=TRUE),
"Minimum" = min(my_data),
"Maximun" = max(my_data),
"Upper Quantile" = quantile(my_data,.75),
"LowerQuartile" = quantile(my_data,.25),
"n" = length(my_data)
)
)
How to run the statistics shown above and group by Injury Type (Mod vs Sev) with time points factored in?
WIth the current set up, an option is to split
by 'Injury Type', loop over the list
with lapply
and apply the code
lst1 <- split(my_data, my_data$Injury, drop = TRUE)
lst2 <- lapply(lst1, function(dat)
sapply(dat[, 2:9], function(my_data1) c("Mean"= mean(my_data1,na.rm=TRUE),
"Stand dev" = sd(my_data1),
"Stand Err" = sd(my_data1)/sqrt(length(my_data1)),
"Median" = median(my_data1),
"CoeffofVariation" = sd(my_data1)/mean(my_data1,na.rm=TRUE),
"Minimum" = min(my_data1),
"Maximun" = max(my_data1),
"Upper Quantile" = quantile(my_data1,.75),
"LowerQuartile" = quantile(my_data1,.25),
"n" = length(my_data1))))
-output
lst2
#$Moderate
# BL.Time.Delay Acute.Time.Delay Chronic.Time.Delay Acute.Area.Def Chronic.Area.Def BL.BBB
# Mean 0.40714286 0.97428571 0.66571429 1.73000000 0.13857143 21
# Stand dev 0.04347961 0.07955232 0.08059304 0.11313708 0.07515064 0
# Stand Err 0.01643375 0.03006795 0.03046131 0.04276180 0.02840427 0
# Median 0.41000000 0.97000000 0.65000000 1.77000000 0.15000000 21
# CoeffofVariation 0.10679202 0.08165194 0.12106251 0.06539716 0.54232422 0
# Minimum 0.35000000 0.86000000 0.57000000 1.51000000 0.00000000 21
# Maximun 0.47000000 1.10000000 0.82000000 1.84000000 0.24000000 21
# Upper Quantile.75% 0.43500000 1.01500000 0.68500000 1.79000000 0.17000000 21
# LowerQuartile.25% 0.37500000 0.93000000 0.62500000 1.70500000 0.12000000 21
# n 7.00000000 7.00000000 7.00000000 7.00000000 7.00000000 7
# Acute.BBB Chronic.BBB
# Mean 9.2857143 16.5714286
# Stand dev 1.4960265 1.5118579
# Stand Err 0.5654449 0.5714286
# Median 9.0000000 17.0000000
# CoeffofVariation 0.1611105 0.0912328
# Minimum 7.0000000 14.0000000
# Maximun 11.0000000 18.0000000
# Upper Quantile.75% 10.5000000 17.5000000
# LowerQuartile.25% 8.5000000 16.0000000
# n 7.0000000 7.0000000
# $Severe
# BL.Time.Delay Acute.Time.Delay Chronic.Time.Delay Acute.Area.Def Chronic.Area.Def BL.BBB
# Mean 0.38600000 1.53800000 0.80200000 3.20200000 0.36960000 21
# Stand dev 0.03847077 0.08288546 0.08074652 0.12049896 0.04659184 0
# Stand Err 0.01720465 0.03706751 0.03611094 0.05388877 0.02083651 0
# Median 0.40000000 1.55000000 0.80000000 3.24000000 0.37000000 21
# CoeffofVariation 0.09966520 0.05389172 0.10068144 0.03763241 0.12606019 0
# Minimum 0.32000000 1.44000000 0.70000000 3.04000000 0.29800000 21
# Maximun 0.42000000 1.64000000 0.90000000 3.34000000 0.42000000 21
# Upper Quantile.75% 0.40000000 1.59000000 0.86000000 3.27000000 0.40000000 21
# LowerQuartile.25% 0.39000000 1.47000000 0.75000000 3.12000000 0.36000000 21
# n 5.00000000 5.00000000 5.00000000 5.00000000 5.00000000 5
# Acute.BBB Chronic.BBB
# Mean 2.4000000 11.6000000
# Stand dev 1.1401754 1.8165902
# Stand Err 0.5099020 0.8124038
# Median 2.0000000 12.0000000
# CoeffofVariation 0.4750731 0.1566026
# Minimum 1.0000000 9.0000000
# Maximun 4.0000000 14.0000000
# Upper Quantile.75% 3.0000000 12.0000000
# LowerQuartile.25% 2.0000000 11.0000000
# n 5.0000000 5.0000000
We can convert to a single dataset by rbind
ing
out <- do.call(rbind, unname(Map(function(x, y)
`row.names<-`(transform(data.frame(Injury = x, y,
stringsAsFactors = FALSE), rn = row.names(y)), NULL), names(lst2), lst2)))
my_data <- structure(list(Injury = c("Moderate", "Severe", "Moderate", "Moderate",
"Severe", "Moderate", "Severe", "Severe", "Moderate", "Moderate",
"Moderate", "Severe"), BL.Time.Delay = c(0.35, 0.42, 0.45, 0.42,
0.4, 0.37, 0.32, 0.4, 0.47, 0.41, 0.38, 0.39), Acute.Time.Delay = c(1.1,
1.47, 1.02, 0.97, 1.55, 0.96, 1.64, 1.44, 1.01, 0.9, 0.86, 1.59
), Chronic.Time.Delay = c(0.6, 0.86, 0.65, 0.7, 0.8, 0.65, 0.75,
0.9, 0.82, 0.67, 0.57, 0.7), Acute.Area.Def = c(1.84, 3.04, 1.8,
1.76, 3.12, 1.65, 3.34, 3.24, 1.77, 1.51, 1.78, 3.27), Chronic.Area.Def = c(0.15,
0.42, 0.15, 0, 0.37, 0.24, 0.4, 0.298, 0.14, 0.19, 0.1, 0.36),
BL.BBB = c(21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L,
21L, 21L), Acute.BBB = c(11L, 3L, 11L, 8L, 4L, 9L, 2L, 1L,
7L, 9L, 10L, 2L), Chronic.BBB = c(18L, 14L, 18L, 17L, 11L,
14L, 12L, 9L, 17L, 15L, 17L, 12L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"))