Search code examples
rdataframemergeflextable

merge dataframes for flextable


I want to merge (cbind or similar) a number of flextable objects from this:

enter image description here

to this:

enter image description here

Set up:

library(tidyverse)
library(flextable)
model1 <- data.frame("variable" = c(letters[1:3]),
                     "coeff" = c(20.3, 12.2, 10.1),
                     "ci" = c("19-20", "10-14", "10-11"))
model2 <- data.frame("variable" = c(letters[1:4]),
                     "coeff" = c(17, 10.1, 189, 200),
                     "ci" = c("12-20", "0.1-1.2", "100-200", "150-250"))
model3 <- data.frame("variable" = c(letters[1:5]),
                     "coeff" = c(1:5),
                     "ci" = c("1-3", "0.1-2.2", "2-4", "3-6", "2-9"))
t1 <- flextable(model1) %>% 
  add_header_row(values = c("", "model 1", "model 1"), top = T) %>% merge_at(i = 1:1, j = 2:3, part = "header")
t2 <- flextable(model2) %>% 
  add_header_row(values = c("", "model 2", "model 2"), top = T) %>% merge_at(i = 1:1, j = 2:3, part = "header")
t3 <- flextable(model3) %>% 
  add_header_row(values = c("", "model 3", "model 3"), top = T) %>% merge_at(i = 1:1, j = 2:3, part = "header")

I'm not sure how to merge these efficiently, I could make the datasets the same size so that I could at least use cbind but this means it would be messy to drop and reorder columns with the same names:

model1[nrow(model1) + (nrow(model3) - nrow(model1)), ] <- NA
model2[nrow(model2) + (nrow(model3) - nrow(model2)), ] <- NA
df_bind <- cbind(model1, model2, model3)
df_bind

#   variable coeff    ci variable coeff      ci variable coeff      ci
# 1        a  20.3 19-20        a  17.0   12-20        a     1     1-3
# 2        b  12.2 10-14        b  10.1 0.1-1.2        b     2 0.1-2.2
# 3        c  10.1 10-11        c 189.0 100-200        c     3     2-4
# 4     <NA>    NA  <NA>        d 200.0 150-250        d     4     3-6
# 5     <NA>    NA  <NA>     <NA>    NA    <NA>        e     5     2-9

Then I would like to use flextable command:

flextable(df_bind)
#Error in flextable(df_bind) : duplicated col_keys

This wont work as they have the same column names.

The only way I can think of is to rename each dataframe column (e.g. rename to variable1, variable2, variable3 etc.) and then merge and then rename back to the original names using add_header_row but this is very inefficient.

Is there a simpler workaround?

thanks

edit

I cant seem to replicate @Ben solution. When I run:

bind_rows(list(model_1 = model1, model_2 = model2, model_3 = model3), .id = 'model') %>%
  pivot_wider(id_cols = variable, names_from = model, values_from = c(coeff, ci)) %>%
  select(variable, coeff_model_1, ci_model_1, 
         coeff_model_2, ci_model_2, coeff_model_3, ci_model_3) %>%
  flextable(col_keys = my_header$col_keys) 

I get the following which looks fine so far:

enter image description here

then when I add the next two lines:

bind_rows(list(model_1 = model1, model_2 = model2, model_3 = model3), .id = 'model') %>%
  pivot_wider(id_cols = variable, names_from = model, values_from = c(coeff, ci)) %>%
  select(variable, coeff_model_1, ci_model_1, 
         coeff_model_2, ci_model_2, coeff_model_3, ci_model_3) %>%
  flextable(col_keys = my_header$col_keys) %>%
  set_header_df(mapping = my_header, key = "col_keys") %>%
  theme_booktabs() 

enter image description here

and then

bind_rows(list(model_1 = model1, model_2 = model2, model_3 = model3), .id = 'model') %>%
  pivot_wider(id_cols = variable, names_from = model, values_from = c(coeff, ci)) %>%
  select(variable, coeff_model_1, ci_model_1, 
         coeff_model_2, ci_model_2, coeff_model_3, ci_model_3) %>%
  flextable(col_keys = my_header$col_keys) %>%
  set_header_df(mapping = my_header, key = "col_keys") %>%
  theme_booktabs() %>% 
  merge_v(part = "header") %>%
  merge_h(part = "header") 

enter image description here

and finally

bind_rows(list(model_1 = model1, model_2 = model2, model_3 = model3), .id = 'model') %>%
  pivot_wider(id_cols = variable, names_from = model, values_from = c(coeff, ci)) %>%
  select(variable, coeff_model_1, ci_model_1, 
         coeff_model_2, ci_model_2, coeff_model_3, ci_model_3) %>%
  flextable(col_keys = my_header$col_keys) %>%
  set_header_df(mapping = my_header, key = "col_keys") %>%
  theme_booktabs() %>% 
  merge_v(part = "header") %>%
  merge_h(part = "header") %>%
  align(align = "center", part = "all") %>%
  colformat_num(na_str = "")

enter image description here

R version 3.6.3 (2020-02-29)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 18363)

Matrix products: default
    
attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] flextable_0.5.11 forcats_0.5.0    stringr_1.4.0    dplyr_1.0.2      purrr_0.3.4      readr_1.3.1      tidyr_1.1.2      tibble_3.0.3    
 [9] ggplot2_3.3.2    tidyverse_1.3.0 

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.3        lubridate_1.7.9   lattice_0.20-38   assertthat_0.2.1  digest_0.6.25     utf8_1.1.4        R6_2.4.1          cellranger_1.1.0 
 [9] backports_1.1.5   reprex_0.3.0      evaluate_0.14     httr_1.4.1        pillar_1.4.3      gdtools_0.2.1     rlang_0.4.8       readxl_1.3.1     
[17] uuid_0.1-4        rstudioapi_0.11   data.table_1.12.8 rmarkdown_2.1     munsell_0.5.0     tinytex_0.20      broom_0.5.5       compiler_3.6.3   
[25] modelr_0.1.6      xfun_0.12         pkgconfig_2.0.3   systemfonts_0.1.1 base64enc_0.1-3   htmltools_0.4.0   tidyselect_1.1.0  fansi_0.4.1      
[33] crayon_1.3.4      dbplyr_1.4.2      withr_2.1.2       grid_3.6.3        nlme_3.1-144      jsonlite_1.6.1    gtable_0.3.0      lifecycle_0.2.0  
[41] DBI_1.1.0         magrittr_1.5      scales_1.1.0      zip_2.1.1         cli_2.0.2         stringi_1.4.6     fs_1.3.2          xml2_1.2.5       
[49] ellipsis_0.3.0    generics_0.0.2    vctrs_0.3.5       tools_3.6.3       glue_1.4.2        officer_0.3.14    hms_0.5.3         yaml_2.2.1       
[57] colorspace_1.4-1  rvest_0.3.5       knitr_1.28        haven_2.2.0

I cant see much difference in sessionInfo apart from the version of R. I also updated officer to version officer_0.3.14 but it didnt make any difference. It seems set_header_df and colformat_num dont seem to be working correctly.

Any ideas?


Solution

  • Perhaps this might be helpful.

    Setup the header with names of columns, as well as header labels.

    Use bind_rows to put all your models into long form, with an addition column representing which model the results belong to.

    Use pivot_wider to put into wide form for presentation. The select afterwards will order your columns.

    The flextable set_header_df will provide the header labels based on column names (taken from my_header keys).

    library(tidyverse)
    library(stringr)
    library(flextable)
    
    my_header <- data.frame(
      col_keys = c("variable", "coeff_model_1", "ci_model_1", 
                   "coeff_model_2", "ci_model_2", "coeff_model_3", "ci_model_3"),
      line1 = "Table 1",
      line2 = c("", rep("Model 1", 2), rep("Model 2", 2), rep("Model 3", 2)),
      line3 = c("Variable", "Coeff", "CI", "Coeff", "CI", "Coeff", "CI"),
      stringsAsFactors = FALSE
    )
    
    bind_rows(list(model_1 = model1, model_2 = model2, model_3 = model3), .id = 'model') %>%
      pivot_wider(id_cols = variable, names_from = model, values_from = c(coeff, ci)) %>%
      select(variable, coeff_model_1, ci_model_1, 
             coeff_model_2, ci_model_2, coeff_model_3, ci_model_3) %>%
      flextable(col_keys = my_header$col_keys) %>%
      set_header_df(mapping = my_header, key = "col_keys") %>%
      theme_booktabs() %>% 
      merge_v(part = "header") %>%
      merge_h(part = "header") %>%
      align(align = "center", part = "all") %>%
      colformat_num(na_str = "")
    

    Output

    flextable output

    Session Info

    R> sessionInfo()
    R version 4.0.2 (2020-06-22)
    Platform: x86_64-apple-darwin17.0 (64-bit)
    Running under: macOS  10.16
    
    Matrix products: default
    LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
    
    locale:
    [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
    
    attached base packages:
    [1] stats     graphics  grDevices utils     datasets  methods   base     
    
    other attached packages:
     [1] flextable_0.5.11 forcats_0.5.0    stringr_1.4.0    dplyr_1.0.2      purrr_0.3.4      readr_1.3.1     
     [7] tidyr_1.1.2      tibble_3.0.3     ggplot2_3.3.2    tidyverse_1.3.0 
    
    loaded via a namespace (and not attached):
     [1] tidyselect_1.1.0  xfun_0.16         haven_2.3.1       colorspace_1.4-1  vctrs_0.3.2       generics_0.0.2   
     [7] htmltools_0.5.0   viridisLite_0.3.0 yaml_2.2.1        base64enc_0.1-3   utf8_1.1.4        plotly_4.9.2.1   
    [13] rlang_0.4.7       pillar_1.4.4      glue_1.4.2        withr_2.2.0       DBI_1.1.0         gdtools_0.2.2    
    [19] dbplyr_1.4.3      uuid_0.1-4        modelr_0.1.7      readxl_1.3.1      lifecycle_0.2.0   munsell_0.5.0    
    [25] gtable_0.3.0      cellranger_1.1.0  zip_2.1.1         rvest_0.3.5       htmlwidgets_1.5.1 evaluate_0.14    
    [31] knitr_1.30        fansi_0.4.1       broom_0.7.0       Rcpp_1.0.5        backports_1.1.6   scales_1.1.1     
    [37] jsonlite_1.7.1    fs_1.4.1          systemfonts_0.2.1 hms_0.5.3         digest_0.6.25     stringi_1.4.6    
    [43] grid_4.0.2        cli_2.0.2         tools_4.0.2       magrittr_1.5      lazyeval_0.2.2    crayon_1.3.4     
    [49] pkgconfig_2.0.3   ellipsis_0.3.1    data.table_1.13.0 xml2_1.3.2        reprex_0.3.0      lubridate_1.7.9  
    [55] rmarkdown_2.2     officer_0.3.14    assertthat_0.2.1  httr_1.4.2        rstudioapi_0.11   R6_2.4.1         
    [61] compiler_4.0.2