I have some data showing a response to a treatment (a categorical variable). Each replicate also has a known value assigned to it based on the magnitude of the treatment relative to the individual (a continuous variable). What I'd like to do is to show the response to both the treatment effect (a bar plot) and the continuous variable (a scatterplot / regression) in a single plot using ggplot2
, in a similar way that the facet_wrap
or facet_grid
commands can be used. Basically, I'd like to recreate the plot below using ggplot2
.
Here is the code which I used to generate the sample data and to create the plot
## GENERATE CONTINUOUS VARIABLES FOR EACH TREATMENT (A - D)
A <- abs(norm(10, 1, 1))
B <- abs(rnorm(10, 3, 1))
C <- abs(rnorm(10, 5, 1))
D <- abs(rnorm(10, 7, 1))
## GENERATE RESPONSE TO TREATMENTS
res_A<-rnorm(10, 28, 3)
res_B<-rnorm(10, 22, 3)
res_C<-rnorm(10, 18, 3)
res_D<-rnorm(10, 12, 3)
## ESTABLISH DATA FRAMES FOR TREATMENTS AND RESPONSE
treatments<-data.frame(A, B, C, D)
response<-data.frame(res_A, res_B, res_C, res_D)
## CONVERT EACH DATA FRAME TO LONG FORM
library(reshape2)
treatments <-treatments %>% gather(Treatment, cont_x, A:D)
response <-response %>% gather(Treatment, Response, res_A:res_D)
## CREATE FINAL DATA FRAME WITH REQUIRED DATA
data<-data.frame(treatments$Treatment, treatments$cont_x, response$Response)
colnames(data) <- c("Treatment", "X", "Response")
## ESTABLISH MEANS AND STANDARD ERROR FOR TREATMENT EFFECTS
means<-tapply(data$Response,list(data$Treatment),mean,na.rm=T)
ER<-tapply(data$Response,list(data$Treatment),sd,na.rm=T)/sqrt(tapply(data$Response,list(data$Treatment),length))
## SET AESTHETICS AND LABEL VALUES
cols<-c("darkcyan","olivedrab3", "palevioletred3","brown3")
labs<-c("A", "B", "C", "D")
## GENERATE PLOT CANVASS
par(mfrow=c(1,2))
par(mar=c(3.5,3,2,1))
## GENEATE BAR PLOT
graph<-tapply(data$Response,data$Treatment,mean,na.rm=T)
plot<-barplot(graph,col=cols,las=1,xaxt='n',yaxt='n',
xlab=NA,ylab =NA,font.lab=2,
cex=0.6,cex.lab=0.6,font.lab=2,font.axis=2,
cex.axis=0.6,ylim=c(0,40), main="TREATMENT EFFECTS")
box()
arrows(x0=plot,y0=means-ER,x1=plot,
y1=means+ER,code=3,angle=90,length=0.02,lwd=1)
axis(side=1,line=0,at=plot,labels=labs,
cex.axis=0.8,mgp=c(0,0.5,0),tck=-0.02,font.axis=1)
axis(side=2,line=0,at=seq(0,40,10),las=1,cex.axis=0.8,
labels=seq(0,40,10),cex=0.6,mgp=c(0,0.6,0))
xlab<-c("Treatment")
ylab<-c("Response")
mtext(xlab, side=1, cex=1.2, line=2)
mtext(ylab, side=2, cex=1.2, line=1.75)
mark<-(means+ER)+2
text(0.7,mark[1],"a",font=1,cex=1.2)
text(1.9,mark[2],"b",font=1,cex=1.2)
text(3.1,mark[3],"c",font=1,cex=1.2)
text(4.3,mark[4],"d",font=1,cex=1.2)
## GENERATE SCATTERPLOT
par(mar=c(3.5, 2, 2, 2))
plot(data$X, data$Response,type='n',ylim = c(0, 40), xlim=c(0,9),pch=21, col='black', cex=1.5, xaxt='n',
yaxt='n', xlab=NA, ylab=NA, main = "CONTINUOUS RESPONSE")
axis(side=1,line=0,tck=NA,at=seq(0,9,3),labels=T,
cex.axis=0.8,mgp=c(0,0.5,0),tck=-0.02,font.axis=1)
axis(side=2,line=0,at=seq(0,40,10),labels=F, tck=0.01)
axis(side=2,line=0,at=seq(0,40,10),labels=F, tck=-0.01)
xlab<-c("Continuous variable")
mtext(xlab, side=1, cex=1.2, line=2)
## PERFORM REGRESSION AND ADD IN REGRESSION LINE
model<-lm (Response ~ X, data = data)
abline(model, lwd=2)
## ADD IN CONFIDENCE INTERVAL
newx <- seq(0,9,length.out=1000)
preds <- predict(model, newdata = data.frame(X=newx),
interval = 'confidence')
lines(newx, preds[ ,3], lty = 'dashed', col = "grey36",lwd=1)
lines(newx, preds[ ,2], lty = 'dashed', col = 'grey36',lwd=1)
polygon(c(rev(newx), newx), c(rev(preds[ ,3]), preds[ ,2]), col = 'grey80', border = NA)
## ADD IN POINTS ONTOP OF CI POLYGON
points(data$X, data$Response, bg= ifelse(data$Treatment == "A", "darkcyan",
ifelse(data$Treatment == "B","olivedrab3", ifelse(data$Treatment == "C", "palevioletred3", "brown3"))),pch=21, col='black', cex=1.5)
## ADD THE REGRESSION EQ
eq<-expression(italic("y = 28.54 - 2.16x"))
rsq<-expression(italic("R"^{2}~"= 0.76 ***"))
text(5 ,35, eq, cex=1.2)
text(4.5, 33, rsq, cex=1.2
Is it even possible to do this using ggplot2
?
Sure you can use ggplot2
with a little help from ggpubr
and ggpmisc
.
library(ggplot2)
library(ggpubr)
library(ggpmisc)
a <- data %>%
group_by(Treatment) %>%
summarise(Response=mean(Response)) %>%
mutate(se = sd(Response)/sqrt(length(Response))) %>% ungroup %>%
ggplot(aes(x=Treatment,y=Response,fill = Treatment)) +
geom_col(show.legend = FALSE) +
geom_text(aes(label=tolower(Treatment)), position=position_stack(vjust = 1.3)) +
geom_errorbar(aes(ymin = Response - se, ymax = Response + se), width = 0.1) + scale_fill_manual(values = cols) +
labs(title="TREATMENT EFFECTS")
b <- data %>%
ggplot(aes(x=X,y=Response)) +
geom_smooth(method="lm", show.legend = FALSE) +
geom_point(aes(fill = factor(Treatment)),shape=21,size=3, show.legend = FALSE) +
scale_fill_manual(values = cols) +
labs(title = "CONTINUOUS RESPONSE", xlab="Continuous Variable") +
xlab("Continuous Variable") +
stat_poly_eq(formula = y ~ x, label.x = 0.9, label.y = 0.95, aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), parse = TRUE)
ggarrange(a,b,nrow = 1)
data <- structure(list(Treatment = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L), .Label = c("A", "B", "C", "D"), class = "factor"), X = c(0.0267649727104236,
1.39488715616046, 0.21355823940511, 0.49907410504292, 0.375279051787701,
0.681959942595334, 2.05952354292797, 2.26083984353423, 1.11354591314711,
0.34506137947979, 2.07866454079728, 3.20194490569932, 3.26342299162599,
2.06754732525705, 4.02484423090347, 3.21831253488128, 3.56925840330762,
3.92631915144912, 2.55130407898901, 4.88369094725247, 4.85805706436391,
6.06714331089959, 5.05696298595936, 4.28599088092722, 2.64907718621996,
5.50017966947343, 5.27853136585637, 5.8694723514342, 4.57774253201089,
4.28459862391154, 6.6919479712577, 6.89039252602714, 7.36883429701188,
5.01895090471179, 7.66623439220746, 7.27620218490248, 6.44135570941742,
7.66409390386461, 8.09858213415943, 4.89114777053612), Response = c(28.647362805403,
30.5878855986189, 36.3739824861786, 33.5874379487616, 23.8060926287858,
30.8520531077353, 26.5940268747477, 28.8356526462252, 30.4727218173035,
26.8151163416507, 17.6391456006427, 19.0921380684935, 21.6950437768534,
23.9017396212974, 27.1407090174467, 15.4322366130883, 26.9809942596379,
22.7341801522041, 23.6518581209459, 21.8377270248132, 13.2905142368901,
19.8951142352182, 17.1400860924093, 16.847732448511, 15.6213812276033,
18.3368951001566, 18.7411799795391, 17.5514579276854, 14.2841781950673,
21.6044042356051, 11.0037691942103, 13.0260853225773, 10.6862778263241,
9.4482751070798, 11.9896873712498, 10.0798146375625, 12.6332310111476,
14.4806588768585, 6.89810707498932, 7.55062781781536)), class = "data.frame", row.names = c(NA,
-40L))
cols <- c("darkcyan", "olivedrab3", "palevioletred3", "brown3")