Search code examples
rlinecumulative-distribution-function

Cumulative plot in R - One line showing thicker than others


I wrote the below code to create this cumulative plot but for some reason, the green line is showing thicker than the other lines on the plot.

enter image description here

Below is the code I am using:

plot(ecdf(data1[data1$Group=="0-25",]$Change_in_PM2.5),
     xlim=c(-1,1),
     xlab="ΔPM2.5 (µg/m³)",
     ylab="Cumulative Proportion",
     main="ΔPM2.5 (µg/m³) distribution across Minority rate ranges",
     col="orange")
lines(ecdf(data1[data1$Group=="25-50",]$Change_in_PM2.5),
      col="#CC6666")
lines(ecdf(data1[data1$Group=="50-75",]$Change_in_PM2.5),
      col="#9999CC")
lines(ecdf(data1[data1$Group=="75-100",]$Change_in_PM2.5),
      col="#66CC99")

abline(v=0, col="black", lty=2, lwd=1)
legend(x = c(0.8, 1), y = c(0, 0.45), 
       legend=c("0-25","25-50","50-75","75-100"),  # text in the legend
       cex = 0.77, x.intersp = 0.3, y.intersp = 0.3,
       col=c("orange", "#CC6666", "#9999CC", "#66CC99"),  # point colors
       pch=15,bty="n")  # specify the point type to be a square

Any idea why this would be the case?

Sample data looks like this:

enter image description here

Dput output:

structure(list(Minority_rate = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.41, 0, 0, 0, 0.5 ), Change_in_PM2.5 = c(-0.2465, -0.2424, -0.2332, -0.2313, -0.224, -0.2142, -0.2056, -0.1947, -0.1911, -0.1865, -0.1859, -0.1761, -0.1725, -0.1593, -0.1577, -0.1532, -0.1531, -0.1413, -0.1332, -0.1294, -0.119, -0.1159, -0.1153, -0.0993, -0.0962, -0.499, -0.0859, -0.0817, -0.0806, -0.4755), Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 3L), .Label = c("0-25", "25-50", "50-75", "75-100"), class = "factor"), Population = c(29308L, 36379L, 29395L, 29582L, 29588L, 23079L, 29546L, 29608L, 75403L, 36379L, 29629L, 36068L, 77297L, 81857L, 29186L, 34215L, 57007L, 36264L, 79050L, 31984L, 24278L, 59723L, 36197L, 21931L, 21257L, 77362L, 36396L, 67234L, 29504L, 186205L), Population_weighted_ChangeinPM2.5 = c(-5.4e-06, -6.6e-06, -5.13e-06, -5.12e-06, -4.96e-06, -3.7e-06, -4.54e-06, -4.31e-06, -1.08e-05, -5.07e-06, -4.12e-06, -4.75e-06, -9.97e-06, -9.75e-06, -3.44e-06, -3.92e-06, -6.53e-06, -3.83e-06, -7.88e-06, -3.1e-06, -2.16e-06, -5.18e-06, -3.12e-06, -1.63e-06, -1.53e-06, -2.89e-05, -2.34e-06, -4.11e-06, -1.78e-06, -6.62e-05)), row.names = c(NA, 30L), class = "data.frame")


Solution

  • I am not sure how you originally created the above plot, since the default option just plots markers.
    It seems that adding the "vertical=TRUE" and "do.points = FALSE" to the plot statements creates the lines which you are looking for.

    Try this:

    plot(ecdf(data1[data1$Group=="0-25",]$Change_in_PM2.5),
         xlim=c(-1,1),
         xlab="ΔPM2.5 (µg/m³)",
         ylab="Cumulative Proportion",
         main="ΔPM2.5 (µg/m³) distribution across Minority rate ranges",
         col="orange", verticals = TRUE, do.points = FALSE)
    lines(ecdf(data1[data1$Group=="25-50",]$Change_in_PM2.5),
          col="#CC6666", verticals = TRUE, do.points = FALSE)
    lines(ecdf(data1[data1$Group=="50-75",]$Change_in_PM2.5),
          col="#9999CC", verticals = TRUE, do.points = FALSE)
    lines(ecdf(data1[data1$Group=="75-100",]$Change_in_PM2.5),
          col="#66CC99", verticals = TRUE, do.points = FALSE)
    
    abline(v=0, col="black", lty=2, lwd=1)
    legend(x = c(0.8, 1), y = c(0, 0.45), 
           legend=c("0-25","25-50","50-75","75-100"),  # text in the legend
           cex = 0.9, x.intersp = 0.3, y.intersp = 0.99,
           col=c("orange", "#CC6666", "#9999CC", "#66CC99"),  # point colors
           pch=19,bty="n")  # specify the point type to be a square