Search code examples
rhistogram

How to add a second x-axis which labels standard deviations from mean to a histogram in R?


I am unsure of how to add a second set of axis labels for the number of standard deviations the confidence intervals are from the mean.

I would like it to look like the example in the image below, only with the values I have calculated.

enter image description here

Here is what I have so far.

mu <- 0.4
sd <- 0.3

dist <- rnorm(100000, mu, sd)

density <- seq(min(dist), max(dist), length = 40)

curve <- dnorm(density, mu, sd)

ci_99 <- c(0.4-2.576*sd, 0.4+2.576*sd)
ci_95 <- c(0.4-1.96*sd, 0.4+1.96*sd)
ci_90 <- c(0.4-1.645*sd, 0.4+1.645*sd)

hist(dist,
     probability = TRUE,
     ylim = c(0, max(curve)),
     xaxt = "n",
     col = "lightblue")
axis(side = 1, at = seq(-1, 1.5, 0.1), labels = seq(-1, 1.5, 0.1))
lines(density, curve, col = "black", lwd = 2.5)
abline(v = c(ci_99[1], ci_99[2]), col = "red", lty = 2, lwd = 2)
abline(v = c(ci_95[1], ci_95[2]), col = "purple", lty = 2, lwd = 2)
abline(v = c(ci_90[1], ci_90[2]), col = "darkgreen", lty = 2, lwd = 2)
abline(v = mu, col = "black", lty = 2, lwd = 2)
legend("topright", 
       legend = c("99%-CI", "95%-CI", "90%-CI", "Mean"),
       fill = c("red", "purple", "darkgreen", "black"),
       cex = 1,
       x.intersp = 1,  
       y.intersp = 1)

Solution

  • A small issue in recreating your desired figure compared to the one you presented is that the values for the axis labels aren't nicely laid out like the one in the figure, so they overlap one another.

    Nevertheless, an attempt at this can get pretty close to the image you provided. I would use segments as opposed to abline and to get the second axis, just add a another axis(..., padj = 2):

    hist(dist,
         probability = TRUE,
         ylim = c(0, max(curve)),
         xaxt = "n",
         col = "lightblue", border = "lightblue", xlab = "", main = "")
    axis(side = 1, at = seq(-1, 2, 0.2), labels = seq(-1, 2, 0.2))
    lines(density, curve, col = "black", lwd = 2.5)
    #abline(v = c(ci_99[1], ci_99[2]), col = "red", lty = 2, lwd = 2)
    segments(x0 = ci_99[1], x1 = ci_99[2], y0 = max(curve[density < ci_99[1]]), col = "red", lty = 2, lwd = 2)
    segments(x0 = c(ci_99[1], ci_99[2]), y0 = 0, y1 = max(curve[density < ci_99[1]]), col = "red", lty = 2, lwd = 2)
    segments(x0 = ci_95[1], x1 = ci_95[2], y0 = max(curve[density < ci_95[1]]), col = "purple", lty = 2, lwd = 2)
    segments(x0 = c(ci_95[1], ci_95[2]), y0 = 0, y1 = max(curve[density < ci_95[1]]), col = "purple", lty = 2, lwd = 2)
    segments(x0 = ci_90[1], x1 = ci_90[2], y0 = max(curve[density < ci_90[1]]), col = "darkgreen", lty = 2, lwd = 2)
    segments(x0 = c(ci_90[1], ci_90[2]), y0 = 0, y1 = max(curve[density < ci_90[1]]), col = "darkgreen", lty = 2, lwd = 2)
    text(x = mean(density), y = max(curve[density < ci_99[1]]) + 0.05, labels = "99.74", cex = 0.7)
    text(x = mean(density), y = max(curve[density < ci_95[1]]) + 0.05, labels = "95.44", cex = 0.7)
    text(x = mean(density), y = max(curve[density < ci_90[1]]) + 0.05, labels = "68.26", cex = 0.7)
    axis(side = 1, at = c(ci_99, ci_95, ci_90), labels = parse(text = paste(c(-3,3,-2,2,-1,1),"~sigma", sep = "")),
           padj = 2, tick = FALSE)
    

    enter image description here

    As you can see, since the SD's are pretty close together, I had to make the figure wider to avoid them overlapping each other.

    Or if you wanted it to look even more like the figure you referenced:

    hist(dist,
         probability = TRUE,
         ylim = c(0, max(curve)),
         xaxt = "n",
         col = NA, border = NA, xlab = "", main = "")
    axis(side = 1, at = seq(-1, 2, 0.2), labels = seq(-1, 2, 0.2))
    lines(density, curve, col = "maroon", lwd = 2.5)
    #abline(v = c(ci_99[1], ci_99[2]), col = "red", lty = 2, lwd = 2)
    segments(x0 = ci_99[1], x1 = ci_99[2], y0 = max(curve[density < ci_99[1]]), col = "darkblue", lty = 2, lwd = 2)
    segments(x0 = c(ci_99[1], ci_99[2]), y0 = 0, y1 = max(curve[density < ci_99[1]]), col = "darkblue", lty = 2, lwd = 2)
    segments(x0 = ci_95[1], x1 = ci_95[2], y0 = max(curve[density < ci_95[1]]), col = "darkblue", lty = 2, lwd = 2)
    segments(x0 = c(ci_95[1], ci_95[2]), y0 = 0, y1 = max(curve[density < ci_95[1]]), col = "darkblue", lty = 2, lwd = 2)
    segments(x0 = ci_90[1], x1 = ci_90[2], y0 = max(curve[density < ci_90[1]]), col = "darkblue", lty = 2, lwd = 2)
    segments(x0 = c(ci_90[1], ci_90[2]), y0 = 0, y1 = max(curve[density < ci_90[1]]), col = "darkblue", lty = 2, lwd = 2)
    text(x = mean(density), y = max(curve[density < ci_99[1]]) + 0.05, labels = "99.74", cex = 0.7)
    text(x = mean(density), y = max(curve[density < ci_95[1]]) + 0.05, labels = "95.44", cex = 0.7)
    text(x = mean(density), y = max(curve[density < ci_90[1]]) + 0.05, labels = "68.26", cex = 0.7)
    axis(side = 1, at = c(ci_99, ci_95, ci_90), labels = parse(text = paste(c(-3,3,-2,2,-1,1),"~sigma", sep = "")),
           padj = 2, tick = FALSE)
    

    enter image description here