Search code examples
rggplot2aspect-ratio

how to fix x and y axis in combination with geom_smooth()?


I am trying to make square shaped plots with the same x and y tick-marks (i.e. aspect-ratio =1). Originally I wanted to use facet_wrap with ggplot, but reading from a number of questions here on stackoverflow I realized this is not possible. So now I want to plot them one by one and use grid.arrange to organize the plots in the end. BUT it is still not working for me. I can get the axis to be correct, but now the confidence interval from geom_smooth() is no longer correctly plotted.

dat <- structure(list(analyte = structure(c(2L, 8L, 9L, 5L, 6L, 4L, 
1L, 7L, 10L, 3L, 9L, 10L, 7L, 7L, 10L, 10L, 10L, 10L, 6L, 6L, 
10L, 6L, 4L, 6L, 7L, 4L, 2L, 10L, 10L, 4L, 2L, 6L, 6L, 8L, 10L, 
1L, 1L, 3L, 8L, 2L, 1L, 10L, 7L, 6L, 3L, 3L, 7L, 7L, 6L, 6L, 
9L, 5L, 9L, 7L, 6L, 7L, 8L, 7L, 5L, 7L, 5L), .Label = c("Alanine", 
"Glutamic acid", "Glutamine", "Glycine", "Histidine", "Isoleucine", 
"Leucine", "Phenylalanine", "Tyrosine", "Valine"), class = "factor"), 
    x = c(23.8, 51.5, 68.8, 83.5, 165.8, 178.6, 201.1, 387.4, 
    417.7, 550.1, 101.4, 103.1, 115.5, 119.9, 131.4, 156.9, 157.2, 
    169.9, 170.1, 174.6, 204.3, 21.8, 218.7, 22.2, 220.3, 226, 
    24.3, 259.3, 263.1, 301, 38.7, 39.8, 41.5, 42.4, 428.9, 431.7, 
    437.2, 440.1, 46.7, 47, 462.6, 470.1, 474.5, 51.3, 512.3, 
    516.4, 527.2, 547.3, 57.3, 58.5, 60.6, 63.9, 65.9, 69.9, 
    71.8, 771.9, 81.2, 82.4, 82.6, 823.5, 83.8), y = c(100L, 
    50L, 50L, 80L, 160L, 210L, 240L, 390L, 340L, 620L, 70L, 90L, 
    70L, 90L, 130L, 130L, 160L, 130L, 160L, 150L, 180L, 30L, 
    140L, 30L, 230L, 210L, 60L, 230L, 270L, 250L, 60L, 30L, 50L, 
    50L, 390L, 480L, 460L, 410L, 50L, 290L, 410L, 420L, 440L, 
    50L, 530L, 730L, 530L, 400L, 50L, 40L, 40L, 100L, 50L, 70L, 
    70L, 750L, 50L, 70L, 110L, 800L, 160L)), class = "data.frame", row.names = c(NA, 
-61L))

and the plot:

my.formula <- y ~ x

p1 <- ggplot(dat[which(dat$analyte== 'Alanine'),], aes(x = x, y = y))+ geom_point()+
   scale_x_continuous(limits=c(min(dat[which(dat$analyte== 'Alanine'),]$x, dat[which(dat$analyte== 'Alanine'),]$y), max(dat[which(dat$analyte== 'Alanine'),]$x,dat[which(dat$analyte== 'Alanine'),]$y))) +
                             scale_y_continuous(limits=c(min(dat[which(dat$analyte== 'Alanine'),]$x, dat[which(dat$analyte== 'Alanine'),]$y), max(dat[which(dat$analyte== 'Alanine'),]$x,dat[which(dat$analyte== 'Alanine'),]$y))) +
  geom_smooth(method='lm') + stat_poly_eq(formula = my.formula, aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
              parse = T, size=3)
p1 

UPDATE: So I try to combine the suggested code and some of my own settings and I am getting closer. But it is driving me crazy, why the confidence intervals are not plotted in some of the plots and plotted wrong in one plot (Alanine) (see the last picture)?

The updated code:

dat_split <- split(dat, dat$analyte)

plots <- 
  lapply(dat_split, function(df)
    ggplot(df, aes(x = x, y = y)) +
      geom_point() +
      scale_x_continuous(expand= c(0,0), limits=c(min(as.numeric(min(df$x)-as.numeric(1/8*min(df$x))), as.numeric(min(df$y)-as.numeric(1/8*min(df$y)))), max(as.numeric(max(df$x)+as.numeric(1/8*max(df$x))), as.numeric(max(df$y)+as.numeric(1/8*max(df$y)))))) +
      scale_y_continuous(expand= c(0,0), limits=c(min(as.numeric(min(df$x)-as.numeric(1/8*min(df$x))), as.numeric(min(df$y)-as.numeric(1/8*min(df$y)))), max(as.numeric(max(df$x)+as.numeric(1/8*max(df$x))), as.numeric(max(df$y)+as.numeric(1/8*max(df$y)))))) +
      theme(aspect.ratio = 1) +
      geom_smooth(method = 'lm', inherit.aes = T, se=T) +
      ggtitle(df$analyte[1]) +
      ggpmisc::stat_poly_eq(formula = my.formula, 
                            aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                            parse = TRUE, size=3))


gridExtra::grid.arrange(grobs = plots)

enter image description here enter image description here enter image description here


Solution

  • This seems to do roughly what you're looking for. For some of the analyte factors, the x and y ranges are considerably different, so I'm not sure you really want to show them all with identical axes.

    dat_split <- split(dat, dat$analyte)
    
    plots <- 
       lapply(dat_split, function(df)
         ggplot(df, aes(x = x, y = y)) +
         geom_point() +
         coord_equal() +
         geom_smooth(method = 'lm', inherit.aes = T) +
         ggtitle(df$analyte[1]) +
         ggpmisc::stat_poly_eq(formula = my.formula, 
                          aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                 parse = T, size=3))
    
    gridExtra::grid.arrange(grobs = plots)