Search code examples
rggplot2r-forestplot

Forest Plot, ordering and summarizing multiple variables


I have the following data:

DF<-structure(list(ref = structure(c(15L, 15L, 16L, 19L, 2L, 12L, 
11L, 23L, 6L, 21L, 5L, 13L, 8L, 22L, 26L, 27L, 20L, 17L, 9L, 
7L, 24L, 25L, 18L, 1L, 3L, 14L, 16L, 12L, 23L, 6L, 21L, 8L, 22L, 
26L, 27L, 20L, 17L, 9L, 7L, 24L, 25L, 18L, 4L, 1L, 14L, 16L, 
19L, 2L, 11L, 23L, 21L, 8L, 26L, 27L, 17L, 9L, 7L, 24L, 1L, 10L, 
14L), .Label = c("Bob 2012", "Bob 2016", "Arnez 2004", 
"Smithy 2013", "Smithy 2014", "Smithy 2016", "Carole 2011", "Craig 2014", 
"Fansa 2008", "Johnson 2010", "Joel 2017", "Joelo 2016", 
"Bob2 2017", "Bob2 2020", "Hunter 2015", "Hush 2016", 
"Lock 2012", "Mcdoo 2012", "Nick 2018", "Park 2015", "Joe 2012", 
"Sour 2017", "Shoe 2008", "Vega 2009", "West 2004", 
"West2016", "Smith 2016"), class = "factor"), yi = c(1, 
0.909090909090909, 1, 1, 0.98780487804878, 0.933333333333333, 
0.882352941176471, 0.980519480519481, 0.977272727272727, 1, 1, 
0.98019801980198, 0.959183673469388, 1, 1, 0.982758620689655, 
0.96969696969697, 0.6875, 1, 1, 1, 1, 1, 1, 0.75, 0.969811320754717, 
0, 0.0333333333333333, 0.064935064935065, 0.0227272727272727, 
0, 0.0204081632653061, 0.142857142857143, 0.0384615384615384, 
0.120689655172414, 0.0303030303030303, 0.0625, 0, 0.0625000000000001, 
0.148148148148148, 0.333333333333333, 0.0322580645161291, 0.0625, 
0, 0.0150943396226415, 0, 0.027027027027027, 0.0182926829268293, 
0.0588235294117647, 0.0324675324675325, 0.0416666666666667, 0.0408163265306122, 
0.192307692307692, 0.103448275862069, 0.0625, 0.03125, 0, 0, 
0.037037037037037, 0.0526315789473685, 0.0264150943396226), ci.lb = c(0.968401784273333, 
0.745137584391619, 0.957452087056599, 0.954039295289784, 0.963597464688465, 
0.809439442909756, 0.67719312002544, 0.951199930155904, 0.905001120558666, 
0.929555376052338, 0.880663089089027, 0.941246506281999, 0.880901216198665, 
0.880663089089027, 0.934891169467222, 0.927453022366531, 0.874486623056924, 
0.435962472420225, 0.946947080517241, 0.946947080517241, 0.937267052265125, 
0.861434988827223, 0.945257646596841, 0.937267052265125, 0.384687131024181, 
0.945252480837292, 0, 0, 0.0306637200529119, 0, 0, 0, 0.00329369106613314, 
0, 0.0474215778277017, 0, 0, 0, 0.00112833931883988, 0.0347070885129207, 
0.0895601878163022, 0, 0, 0, 0.00321663954072449, 0, 0, 0.00226571557474109, 
0, 0.00919930409127839, 0, 0.000687698884629828, 0.0597984369364536, 
0.0359775093204114, 0, 0, 0, 0, 0, 0, 0.00995385402759386), ci.ub = c(1, 
0.998207039140277, 1, 1, 0.999812850010077, 0.998780552481617, 
0.997483360196549, 0.997584224395838, 1, 1, 1, 0.999688395336243, 
0.99931230111537, 1, 1, 1, 1, 0.895437964404381, 1, 1, 1, 1, 
1, 1, 0.992197756884658, 0.987557818958737, 0.0425479129434015, 
0.137561603224075, 0.11002605111172, 0.0949988794413338, 0.070444623947662, 
0.0855442043005818, 0.384071178226987, 0.157747957353967, 0.21886405934029, 
0.125513376943076, 0.249041832299857, 0.0530529194827593, 0.179245839830917, 
0.311107006224451, 0.628630049521593, 0.133296666133586, 0.249041832299857, 
0.062732947734875, 0.0340807242282984, 0.0425479129434015, 0.112387275591248, 
0.0458586358986253, 0.235428911558493, 0.0674429497369029, 0.170235847270992, 
0.119098783801335, 0.36946549674197, 0.197004260641943, 0.249041832299857, 
0.129288064181111, 0.0530529194827593, 0.062732947734875, 0.152166113984736, 
0.212219059832308, 0.0497506289906541), TypeTwo = structure(c(2L, 
1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L), .Label = c("BAR", 
"FOO"), class = "factor"), Variable = c("Death", "Death", 
"Death", "Death", "Death", "Death", "Death", "Death", 
"Death", "Death", "Death", "Death", "Death", "Death", 
"Death", "Death", "Death", "Death", "Death", "Death", 
"Death", "Death", "Death", "Death", "Death", "Death", 
"Vein Problems", "Vein Problems", "Vein Problems", 
"Vein Problems", "Vein Problems", "Vein Problems", 
"Vein Problems", "Vein Problems", "Vein Problems", 
"Vein Problems", "Vein Problems", "Vein Problems", 
"Vein Problems", "Vein Problems", "Vein Problems", 
"Vein Problems", "Vein Problems", "Vein Problems", 
"Vein Problems", "Gas", "Gas", "Gas", 
"Gas", "Gas", "Gas", "Gas", "Gas", "Gas", 
"Gas", "Gas", "Gas", "Gas", "Gas", "Gas", 
"Gas")), row.names = c(NA, -61L), yi.names = "yi", ci.lb.names = "ci.lb", ci.ub.names = "ci.ub", digits = c(est = 4, 
se = 4, test = 4, pval = 4, ci = 4, var = 4, sevar = 4, fit = 4, 
het = 4), class = c("escalc", "data.frame"))

And I've used this code to create a forest plot:

DF%>%ggplot(aes(x=yi,y=ref,xmin=ci.lb,xmax=ci.ub,color=TypeTwo, group=TypeTwo))+geom_point()+geom_errorbarh(height=.1, size=.5)+geom_vline(xintercept = 0,color="black", linetype="dashed", alpha=.5)+facet_grid(Variable~.,scales="free",space="free")+labs(title="Forest Plot Combined",x="Effect Size",y="Study")

That looks like so: enter image description here

I'd like to reorder the graphs so that the "Foo" and the "Bar" are near each other, (pictured below), and I have separate "summary" data in a different data set, I'd like to add a line under each facet for, for each group, if that makes sense. How do I add a new line for that but keep it in the facet?

enter image description here

I'm totally lost on how to do that, please help!


Solution

  • There's quite a lot of ways to go about this, but here is one. Note that since you have the same study in multiple facets, and the TypeTwo is not consistent, we have to do some trickery to be able to sort within each facet.

    I also sorted by effect size, since that's pleasing and quite commonly done.

    There is a mistake in your data for Hunter 2015 which has two effect sizes for Death, so that's why there's a red bar in with the green bars.

    Using some random data for a mean effect:

    library(tidyverse)
    
    avg <- data.frame(
      Variable = c('Death', 'Gas', 'Vein Problems'),
      yi = c(0.9, 0.1, 0.1),
      ci.lb = c(0.5, 0, 0),
      ci.ub = c(1, 0.5, 0.5),
      TypeTwo = 'mean effect',
      ref = ''
    )
    DF2 <- bind_rows(DF, avg) %>% 
      arrange(desc(TypeTwo), yi) %>% 
      mutate(ref2 = fct_inorder(paste(ref, Variable)))
    
    ggplot(DF2, aes(x=yi,y=ref2,xmin=ci.lb,xmax=ci.ub,color=TypeTwo, group=TypeTwo))+
      geom_point()+
      geom_errorbarh(height=.1, size=.5)+
      geom_vline(xintercept = 0,color="black", linetype="dashed", alpha=.5)+
      facet_grid(Variable~.,scales="free",space="free")+
      scale_y_discrete(breaks = DF2$ref2, labels = DF2$ref) +
      labs(title="Forest Plot Combined",x="Effect Size",y="Study")
    

    enter image description here