Search code examples
rggplot2facet

Bar plot and scatterplot in GGPLOT facets


I have some data showing a response to a treatment (a categorical variable). Each replicate also has a known value assigned to it based on the magnitude of the treatment relative to the individual (a continuous variable). What I'd like to do is to show the response to both the treatment effect (a bar plot) and the continuous variable (a scatterplot / regression) in a single plot using ggplot2, in a similar way that the facet_wrap or facet_grid commands can be used. Basically, I'd like to recreate the plot below using ggplot2.

enter image description here

Here is the code which I used to generate the sample data and to create the plot

        ## GENERATE CONTINUOUS VARIABLES FOR EACH TREATMENT (A - D)

        A <- abs(norm(10, 1, 1))
        B <- abs(rnorm(10, 3, 1))
        C <- abs(rnorm(10, 5, 1))
        D <- abs(rnorm(10, 7, 1))

        ## GENERATE RESPONSE TO TREATMENTS

        res_A<-rnorm(10, 28, 3)
        res_B<-rnorm(10, 22, 3)
        res_C<-rnorm(10, 18, 3)
        res_D<-rnorm(10, 12, 3)

        ## ESTABLISH DATA FRAMES FOR TREATMENTS AND RESPONSE

        treatments<-data.frame(A, B, C, D)
        response<-data.frame(res_A, res_B, res_C, res_D)

        ## CONVERT EACH DATA FRAME TO LONG FORM

        library(reshape2)

        treatments <-treatments %>% gather(Treatment, cont_x, A:D)
        response <-response %>% gather(Treatment, Response, res_A:res_D)

        ## CREATE FINAL DATA FRAME WITH REQUIRED DATA

        data<-data.frame(treatments$Treatment, treatments$cont_x, response$Response)
        colnames(data) <- c("Treatment", "X", "Response")

        ## ESTABLISH MEANS AND STANDARD ERROR FOR TREATMENT EFFECTS

        means<-tapply(data$Response,list(data$Treatment),mean,na.rm=T)
        ER<-tapply(data$Response,list(data$Treatment),sd,na.rm=T)/sqrt(tapply(data$Response,list(data$Treatment),length))

        ## SET AESTHETICS AND LABEL VALUES

        cols<-c("darkcyan","olivedrab3", "palevioletred3","brown3")
        labs<-c("A", "B", "C", "D")

        ## GENERATE PLOT CANVASS

        par(mfrow=c(1,2))
        par(mar=c(3.5,3,2,1))

        ## GENEATE BAR PLOT

        graph<-tapply(data$Response,data$Treatment,mean,na.rm=T)
        plot<-barplot(graph,col=cols,las=1,xaxt='n',yaxt='n',
                      xlab=NA,ylab =NA,font.lab=2,
                      cex=0.6,cex.lab=0.6,font.lab=2,font.axis=2,
                      cex.axis=0.6,ylim=c(0,40), main="TREATMENT EFFECTS")
        box()
        arrows(x0=plot,y0=means-ER,x1=plot,
               y1=means+ER,code=3,angle=90,length=0.02,lwd=1)

        axis(side=1,line=0,at=plot,labels=labs,
             cex.axis=0.8,mgp=c(0,0.5,0),tck=-0.02,font.axis=1)

        axis(side=2,line=0,at=seq(0,40,10),las=1,cex.axis=0.8,
             labels=seq(0,40,10),cex=0.6,mgp=c(0,0.6,0))

        xlab<-c("Treatment")
        ylab<-c("Response")

        mtext(xlab, side=1, cex=1.2, line=2)
        mtext(ylab, side=2, cex=1.2, line=1.75)

        mark<-(means+ER)+2
        text(0.7,mark[1],"a",font=1,cex=1.2)
        text(1.9,mark[2],"b",font=1,cex=1.2)
        text(3.1,mark[3],"c",font=1,cex=1.2)
        text(4.3,mark[4],"d",font=1,cex=1.2)

        ## GENERATE SCATTERPLOT

        par(mar=c(3.5, 2, 2, 2))
        plot(data$X, data$Response,type='n',ylim = c(0, 40), xlim=c(0,9),pch=21, col='black', cex=1.5, xaxt='n', 
             yaxt='n', xlab=NA, ylab=NA, main = "CONTINUOUS RESPONSE")

        axis(side=1,line=0,tck=NA,at=seq(0,9,3),labels=T,
             cex.axis=0.8,mgp=c(0,0.5,0),tck=-0.02,font.axis=1)

        axis(side=2,line=0,at=seq(0,40,10),labels=F, tck=0.01)
        axis(side=2,line=0,at=seq(0,40,10),labels=F, tck=-0.01)

        xlab<-c("Continuous variable")

        mtext(xlab, side=1, cex=1.2, line=2)

        ## PERFORM REGRESSION AND ADD IN REGRESSION LINE

        model<-lm (Response ~ X, data = data)
        abline(model, lwd=2)

        ## ADD IN CONFIDENCE INTERVAL

        newx <- seq(0,9,length.out=1000)
        preds <- predict(model, newdata = data.frame(X=newx), 
                         interval = 'confidence')
        lines(newx, preds[ ,3], lty = 'dashed', col = "grey36",lwd=1)
        lines(newx, preds[ ,2], lty = 'dashed', col = 'grey36',lwd=1)

        polygon(c(rev(newx), newx), c(rev(preds[ ,3]), preds[ ,2]), col = 'grey80', border = NA)

        ## ADD IN POINTS ONTOP OF CI POLYGON

        points(data$X, data$Response, bg= ifelse(data$Treatment == "A", "darkcyan",
            ifelse(data$Treatment == "B","olivedrab3", ifelse(data$Treatment == "C", "palevioletred3", "brown3"))),pch=21, col='black', cex=1.5)

    ## ADD THE REGRESSION EQ


    eq<-expression(italic("y = 28.54 - 2.16x"))
    rsq<-expression(italic("R"^{2}~"= 0.76 ***"))
    text(5 ,35, eq, cex=1.2)
    text(4.5, 33, rsq, cex=1.2

Is it even possible to do this using ggplot2?


Solution

  • Sure you can use ggplot2 with a little help from ggpubr and ggpmisc.

    library(ggplot2)
    library(ggpubr)
    library(ggpmisc)
    
    a <- data %>% 
           group_by(Treatment) %>% 
           summarise(Response=mean(Response)) %>% 
           mutate(se = sd(Response)/sqrt(length(Response))) %>% ungroup %>%
         ggplot(aes(x=Treatment,y=Response,fill = Treatment)) + 
           geom_col(show.legend = FALSE) + 
           geom_text(aes(label=tolower(Treatment)), position=position_stack(vjust = 1.3)) +
           geom_errorbar(aes(ymin = Response - se, ymax = Response + se), width = 0.1) + scale_fill_manual(values = cols) +
           labs(title="TREATMENT EFFECTS")
    
    b <- data %>% 
         ggplot(aes(x=X,y=Response)) + 
          geom_smooth(method="lm", show.legend = FALSE) + 
          geom_point(aes(fill = factor(Treatment)),shape=21,size=3, show.legend = FALSE) +
          scale_fill_manual(values = cols) +
          labs(title = "CONTINUOUS RESPONSE", xlab="Continuous Variable") +
          xlab("Continuous Variable") +
          stat_poly_eq(formula = y ~ x, label.x = 0.9, label.y = 0.95, aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), parse = TRUE)
    
    ggarrange(a,b,nrow = 1)
    

    Plot Data:

    data <- structure(list(Treatment = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L), .Label = c("A", "B", "C", "D"), class = "factor"), X = c(0.0267649727104236, 
    1.39488715616046, 0.21355823940511, 0.49907410504292, 0.375279051787701, 
    0.681959942595334, 2.05952354292797, 2.26083984353423, 1.11354591314711, 
    0.34506137947979, 2.07866454079728, 3.20194490569932, 3.26342299162599, 
    2.06754732525705, 4.02484423090347, 3.21831253488128, 3.56925840330762, 
    3.92631915144912, 2.55130407898901, 4.88369094725247, 4.85805706436391, 
    6.06714331089959, 5.05696298595936, 4.28599088092722, 2.64907718621996, 
    5.50017966947343, 5.27853136585637, 5.8694723514342, 4.57774253201089, 
    4.28459862391154, 6.6919479712577, 6.89039252602714, 7.36883429701188, 
    5.01895090471179, 7.66623439220746, 7.27620218490248, 6.44135570941742, 
    7.66409390386461, 8.09858213415943, 4.89114777053612), Response = c(28.647362805403, 
    30.5878855986189, 36.3739824861786, 33.5874379487616, 23.8060926287858, 
    30.8520531077353, 26.5940268747477, 28.8356526462252, 30.4727218173035, 
    26.8151163416507, 17.6391456006427, 19.0921380684935, 21.6950437768534, 
    23.9017396212974, 27.1407090174467, 15.4322366130883, 26.9809942596379, 
    22.7341801522041, 23.6518581209459, 21.8377270248132, 13.2905142368901, 
    19.8951142352182, 17.1400860924093, 16.847732448511, 15.6213812276033, 
    18.3368951001566, 18.7411799795391, 17.5514579276854, 14.2841781950673, 
    21.6044042356051, 11.0037691942103, 13.0260853225773, 10.6862778263241, 
    9.4482751070798, 11.9896873712498, 10.0798146375625, 12.6332310111476, 
    14.4806588768585, 6.89810707498932, 7.55062781781536)), class = "data.frame", row.names = c(NA, 
    -40L))
    
    cols <- c("darkcyan", "olivedrab3", "palevioletred3", "brown3")