Search code examples
rggplot2roundingequationpolynomials

How to specifically round each coefficient of an nth-order polynomial equation on a ggplot2 graph?


I would like to display the equation of a 4th order polynomial regression on a ggplot2 graph.
The lm_equ function proposed here is close to my goal except that all coefficients are rounded with the same number of digits, whereas I would like to be able to round each coefficient specifically.
Is it possible to integrate a list of the different rounding digits into the lm_equ function or is it necessary to create each coefficient with its specific rounding?
Thanks for help.

Initial graph:
enter image description here

Desired graph:
enter image description here

Data:

df <-  
structure(list(x0 = c(70.6, 20.85, 136.5, 110.5, 52.65, 95.05, 
99.05, 26.55, 28.7, 37.9, 124.5, 52.35, 113.5, 129.5, 40.25, 
37.25, 125, 215.5, 70.1, 171.5, 56.35, 44.85, 106.5, 110, 102.5, 
64.5, 85.75, 137, 253, 109.5, 146.5, 159.5, 83.85, 160, 215, 
125.5, 94.95, 150.5, 200.5, 111.5, 207.5, 63.35, 63.1, 94.5, 
143.5, 135, 62.65, 55.95, 161.5, 126.5, 49.55, 43.9, 87, 48.8, 
98, 163, 76, 60.5, 97.35, 62, 84, 54.25, 52.65, 54.2, 175.5, 
75.85, 202.5, 97, 181, 98, 33.7, 72.4, 252.5, 156, 46.9, 243, 
136, 127, 175.5, 169.5, 113, 144.5, 94, 96, 40.35, 60.75, 95.5, 
89.75, 48.35, 18.5, 43.8, 113.5, 27.1, 54.85, 65.25, 57.1, 46.05, 
66.75, 23.55, 94.3, 46.85, 60.75, 40.35, 96, 71, 94, 71.8, 60.95, 
62.35, 85.85, 76.3, 146.5, 128.5, 70.65, 96, 60.5, 67.95, 59.6, 
70.15, 70.8, 59.3, 158.5, 114.5, 30.6, 131.5, 64.25, 86.9, 71.1, 
62.85, 102.5, 51.1, 169.5, 47.15, 73.4, 77.35, 73.35, 99.5, 290.5, 
85.25, 108.5, 236, 74.05, 33.25, 97.5, 114.5, 64.2, 38.1, 59.2, 
124, 92, 165, 112, 113, 154.5, 119, 27.7, 113, 68.45, 50, 130, 
72.75, 18.8, 105.5, 141, 124, 139, 351.5, 137, 114.5, 72.75), 
    y0 = c(-7.2, -9.7, -9, -47, -19.3, -0.0999999999999943, -0.0999999999999943, 
    -9.1, -7.4, -9.8, 5, -6.7, -11, -11, -16.5, -12.5, -36, 3, 
    -18.2, -5, -6.7, -21.7, -11, -6, -9, -13, -15.5, 18, 36, 
    -1, -15, 11, -21.7, 26, 14, -19, -7.90000000000001, -5, 19, 
    -5, 17, -16.7, -4.2, -27, -1, 8, 0.700000000000003, -25.9, 
    7, -11, -13.1, -11.8, -30, -19.6, -12, -10, -28, -9, 3.3, 
    -10, -26, -22.5, -11.3, -20.4, -7, 0.299999999999997, -43, 
    -16, 2, -6, -11.4, -16.8, 15, -2, -13.8, 10, -16, -4, 21, 
    -5, -10, -21, -10, -18, -16.7, -11.5, -15, -11.5, -6.7, -7, 
    -13.6, -13, -10.2, -27.7, -6.5, -16.2, -18.1, -27.5, -13.1, 
    -10.6, -17.7, -11.5, -16.7, -18, -16, -10, -5.59999999999999, 
    -17.9, -14.7, -15.7, -16.6, -17, -5, -17.3, -8, -1, 4.09999999999999, 
    -19.2, -6.3, -9.59999999999999, -8.6, -3, 1, -13.2, 3, -14.5, 
    -11.8, 9.8, -23.7, -13, 3.8, 7, -8.3, -16.8, -14.7, -12.7, 
    -7, 3, -16.5, -15, -30, -10.1, -8.5, -11, 3, -10.4, -34.2, 
    -24.4, 8, -16, 20, -6, -20, -1, -4, -13.4, 0, -14.9, -10, 
    -6, 0.5, -1.6, -1, 8, -4, -10, 11, 0, 3, 0.5)), class = "data.frame", row.names = c(NA, 
-170L))  

Code (initial graph):

library(ggplot2)  
library(dplyr)  

    # equation
lm_eqn <- function(df, degree, raw=TRUE){
  m <- lm(y0 ~ poly(x0, degree, raw=raw), df)  # get the fit
  cf <- round(coef(m), 5)  # round the coefficients
  r2 <- round(summary(m)$r.squared, 4)  # round the r.squared
  powers <- paste0("^", seq(length(cf)-1))  # create the powers for the equation
  powers[1] <- ""  # remove the first one as it's redundant (x^1 = x)
  # first check the sign of the coefficient and assign +/- and paste it with
  # the appropriate *italic(x)^power. collapse the list into a string
  pcf <- paste0(ifelse(sign(cf[-1])==1, " + ", " - "), abs(cf[-1]),
                paste0("*italic(x)", powers), collapse = "")
  # paste the rest of the equation together
  eq <- paste0("italic(y) == ", cf[1], pcf, "*','", "~italic(r)^2==", r2)
  eq
}

# plot
ggplot(df, aes(x0, y0)) +
  geom_point() +
  scale_y_continuous(limits = c(-50, 50), expand=c(0,0)) +
  stat_smooth(method = "lm", formula = y ~ poly(x, 4, raw = TRUE), linewidth = 1) +    
  annotate("text", x = 0, y = 45, label = lm_eqn(df, 4, raw = TRUE),
           hjust = 0, family = "Times", parse = TRUE, size = 4.5)

Solution

  • This doesn't seem to be well covered by the help file for round() but the number of digits can be a vector, rather than a single value. As a result you can modify your function to take this as an argument, for example:

    # equation
    lm_eqn <- function(df, degree, raw=TRUE, digits=rep(5,degree+1)){
      m <- lm(y0 ~ poly(x0, degree, raw=raw), df)  # get the fit
      cf <- round(coef(m), digits)  # round the coefficients, *digits now a function argument*
      r2 <- round(summary(m)$r.squared, 4)  # round the r.squared
      powers <- paste0("^", seq(length(cf)-1))  # create the powers for the equation
      powers[1] <- ""  # remove the first one as it's redundant (x^1 = x)
      # first check the sign of the coefficient and assign +/- and paste it with
      # the appropriate *italic(x)^power. collapse the list into a string
      pcf <- paste0(ifelse(sign(cf[-1])==1, " + ", " - "), abs(cf[-1]),
                    paste0("*italic(x)", powers), collapse = "")
      # paste the rest of the equation together
      eq <- paste0("italic(y) == ", cf[1], pcf, "*','", "~italic(r)^2==", r2)
      eq
    }
    
    # plot
    ggplot(df, aes(x0, y0)) +
      geom_point() +
      scale_y_continuous(limits = c(-50, 50), expand=c(0,0)) +
      stat_smooth(method = "lm", formula = y ~ poly(x, 4, raw = TRUE), linewidth = 1) +    
      annotate("text", x = 0, y = 45, label = lm_eqn(df, 4, raw = TRUE, 
                                                     digits=c(1, 2, 3, 7, 9)),  #Addition, specify digits for each coefficient
               hjust = 0, family = "Times", parse = TRUE, size = 4.5)
    

    Output plot with modified equation digits