I have a summary statistic from my dataframe:
war_3 a1_1_area_mean a1_2_area_mean a1_3_area_mean a1_4_area_mean a1_5_area_mean a1_6_area_mean
1 1 0.23827851 0.07843460 0.02531607 0.1193928 0.7635068 0.02333938
2 2 0.23162416 0.05949285 0.01422585 0.3565457 0.8593997 0.06895526
3 3 0.09187454 0.07274503 0.10357251 0.2821142 0.5929178 0.02455053
a1_7_area_mean a1_8_area_mean a1_t_area_mean a2_1_area_mean a2_2_area_mean a2_3_area_mean
1 0.005387169 0.2725867 1.526242 0.107725394 0.19406917 0.02213419
2 0.016701786 0.2222106 1.829156 0.073991405 0.03504120 0.00815826
3 0.028382414 0.1997225 1.395880 0.003634443 0.03508602 0.00000000
a2_4_area_mean a2_5_area_mean a2_t_area_mean a1_1_area_var a1_2_area_var a1_3_area_var a1_4_area_var
1 0.02024704 0.0040841950 0.34826000 1.2730028 0.13048871 0.05165589 0.1851353
2 0.07621595 0.0005078053 0.19391462 0.6114136 0.09287735 0.05697542 0.7284144
3 0.00000000 0.0000000000 0.03872046 0.1171754 0.07581946 0.35349703 0.3883895
a1_5_area_var a1_6_area_var a1_7_area_var a1_8_area_var a1_t_area_var a2_1_area_var a2_2_area_var
1 2.7640424 0.01688505 0.001459156 0.8844626 7.940393 0.57992528 1.41104857
2 2.6797714 0.05490461 0.003428341 0.5725653 8.190389 0.18087732 0.11406984
3 0.9938991 0.01801805 0.006360622 0.3405592 3.460435 0.00306776 0.06579978
a2_3_area_var a2_4_area_var a2_5_area_var a2_t_area_var a1_1_area_sd a1_2_area_sd a1_3_area_sd
1 0.067049470 0.06260921 0.0045015472 2.10734089 1.1282743 0.3612322 0.2272793
2 0.009580693 0.29505206 0.0005616327 0.85060972 0.7819294 0.3047579 0.2386952
3 0.000000000 0.00000000 0.0000000000 0.06861217 0.3423089 0.2753533 0.5945562
a1_4_area_sd a1_5_area_sd a1_6_area_sd a1_7_area_sd a1_8_area_sd a1_t_area_sd a2_1_area_sd
1 0.4302735 1.6625410 0.1299425 0.03819890 0.9404587 2.817870 0.76152825
2 0.8534719 1.6370007 0.2343173 0.05855204 0.7566805 2.861886 0.42529674
3 0.6232090 0.9969449 0.1342313 0.07975351 0.5835745 1.860224 0.05538736
a2_2_area_sd a2_3_area_sd a2_4_area_sd a2_5_area_sd a2_t_area_sd
1 1.1878757 0.25893912 0.2502183 0.06709357 1.4516683
2 0.3377423 0.09788102 0.5431869 0.02369879 0.9222851
3 0.2565147 0.00000000 0.0000000 0.00000000 0.2619392
Above summary table is from following scripts and original data frame as below:
uid war_3 a1_1_area a1_2_area a1_3_area a1_4_area a1_5_area a1_6_area a1_7_area a1_8_area a1_t_area
1 1001 1 0 0.00000 0 0.67048 0.0000 0.02088 0 0.00000 0.69136
2 1002 2 0 0.00000 0 0.00000 0.9019 0.14493 0 0.00000 1.04683
3 1003 2 0 0.00000 0 0.00000 0.9019 0.00000 0 0.00000 0.90190
4 1004 2 0 1.09322 0 0.00000 0.0000 0.00000 0 0.00000 1.09322
5 1005 3 0 1.75000 0 0.00000 0.0000 0.00000 0 0.00000 1.75000
6 1006 2 0 2.43442 0 0.32223 0.0000 0.00000 0 0.76801 3.52466
a2_1_area a2_2_area a2_3_area a2_4_area a2_5_area a2_t_area
1 0 0 0 0 0 0
2 0 0 0 0 0 0
3 0 0 0 0 0 0
4 0 0 0 0 0 0
5 0 0 0 0 0 0
6 0 0 0 0 0 0
summary <- df.anov %>% select(-uid) %>% group_by(war_3,) %>%
summarize_each(funs(min,max,mean,median,var,sd)))
However, as it is difficult to compare each value in pairs of war_3
(group) by mean, var and sd, I would like to transform it into the following format:
variable war_3 mean variance s.d.
a1_1_area, 1 , x , x , x
a1_1_area, 2 , x , x , x
a1_1_area, 3 , x , x , x
a1_2_area, 1 , x , x , x
a1_2_area, 2 , x , x , x
a1_2_area, 3 , x , x , x
a1_3_area, 1 , x , x , x
a1_3_area, 2 , x , x , x
a1_3_area, 3 , x , x , x
a1_4_area, 1 , x , x , x
a1_4_area, 2 , x , x , x
a1_4_area, 3 , x , x , x
(it continues until `a2_5_area` in `variable`)
I used to use gather
in dplyr
to rearrange wide-format into long-format for simple dataframe, however this dataframe requires more complecated operation which may require repetitive select(matches())
or so.
variables are:
war_3
variable to group each record (it is already grouped by group_by(war_3) %>% summarize_each(funs(mean,var,sd))
in the previous operation)
aX_Y_area_Z
: where X
has two values as 1 and 2, Y
spreads 1-8
for X=1
and 1-5
for X=2
. Z
has three statistics as mean, variance and s.d.
.
Could you help me to make it possible?
I prefer to use dplyr
piping rather than data.table()
solution.
Following scripts are very manual way but makes duplicated records in each gather()
and I do not want to specify neither each column number nor name manually.
summary %>%
gather(key1,mean,
a1_1_area_mean,a1_2_area_mean,a1_3_area_mean,a1_4_area_mean,
a1_5_area_mean,a1_6_area_mean,a1_7_area_mean,a1_8_area_mean,
a1_t_area_mean,a2_1_area_mean,a2_2_area_mean,a2_3_area_mean,
a2_4_area_mean,a2_5_area_mean,a2_t_area_mean) %>%
gather(key2,var,
a1_1_area_var,a1_2_area_var,a1_3_area_var,a1_4_area_var,
a1_5_area_var,a1_6_area_var,a1_7_area_var,a1_8_area_var,
a1_t_area_var,a2_1_area_var,a2_2_area_var,a2_3_area_var,
a2_4_area_var,a2_5_area_var,a2_t_area_var) %>%
gather(key3,sd,
a1_1_area_sd,a1_2_area_sd,a1_3_area_sd,a1_4_area_sd,
a1_5_area_sd,a1_6_area_sd,a1_7_area_sd,a1_8_area_sd,
a1_t_area_sd,a2_1_area_sd,a2_2_area_sd,a2_3_area_sd,
a2_4_area_sd,a2_5_area_sd,a2_t_area_sd) %>%
mutate_at(vars(key1),funs(str_sub(.,1,9))) %>% select(-key2,-key3) %>%
rename(key=key1) -> summary2
Since you provided no easy to copy & paste sample data, I produced some by my own
library(tidyverse)
data <- mtcars %>%
group_by(cyl) %>%
mutate(disp_1 = disp, disp_2=disp, mpg_1 = mpg, mpg_2 = mpg, drat_1=drat, drat_2=drat) %>%
select(-disp, -mpg, -drat) %>%
summarise_at(vars(contains("mpg"),contains("disp"), contains("drat")), list(mean =mean, sd = sd))
data
# A tibble: 3 x 13
cyl mpg_1_mean mpg_2_mean disp_1_mean disp_2_mean drat_1_mean drat_2_mean mpg_1_sd
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 4 26.7 26.7 105. 105. 4.07 4.07 4.51
2 6 19.7 19.7 183. 183. 3.59 3.59 1.45
3 8 15.1 15.1 353. 353. 3.23 3.23 2.56
# ... with 5 more variables: mpg_2_sd <dbl>, disp_1_sd <dbl>, disp_2_sd <dbl>,
# drat_1_sd <dbl>, drat_2_sd <dbl>
then, simply gather, separate and spread
data %>%
gather(key, value, -cyl) %>%
separate(key, into = letters[1:3]) %>%
spread(c, value)
# A tibble: 18 x 5
cyl a b mean sd
<dbl> <chr> <chr> <dbl> <dbl>
1 4 disp 1 105. 26.9
2 4 disp 2 105. 26.9
3 4 drat 1 4.07 0.365
4 4 drat 2 4.07 0.365
5 4 mpg 1 26.7 4.51
6 4 mpg 2 26.7 4.51
7 6 disp 1 183. 41.6
8 6 disp 2 183. 41.6
9 6 drat 1 3.59 0.476
10 6 drat 2 3.59 0.476
11 6 mpg 1 19.7 1.45
12 6 mpg 2 19.7 1.45
13 8 disp 1 353. 67.8
14 8 disp 2 353. 67.8
15 8 drat 1 3.23 0.372
16 8 drat 2 3.23 0.372
17 8 mpg 1 15.1 2.56
18 8 mpg 2 15.1 2.56