Search code examples
rggplot2plotconditional-statementsmodel-comparison

Highlight the prediction line of the best fitting model in R?


I have fitted a few models to the same data. And have plotted the predction lines for all models in a single plot.

Now I'd like to highlight (in bold, thicker line) the prediction line of the model with the lowest AIC. Can't seem to find resources on this, so I'm a bit stuck.

library(ggplot2)
set.seed(101)

x <- rnorm(100)
y <- rweibull(100,1)
d <- data.frame(x,y)

model1 <- lm(y~1, data = d)
model2 <- lm(y~x, data = d)
model3 <- lm(y~abs(x), data = d)

newvalsforx <- function(x) {
  xrng <- seq(min(x), max(x), length.out=100)
  function(m) data.frame(x=xrng, y=predict(m, data.frame(x=xrng)))
}
pred <- newvalsforx(d$x)

ggplot(d, aes(x,y)) +
  geom_point() + 
  geom_line(data=pred(model1), color="red") + 
  geom_line(data=pred(model2), color="blue") + 
  geom_line(data=pred(model3), color="green")

AIC1 <- AIC(model1
AIC2 <- AIC(model2)
AIC3 <- AIC(model3) 

UPDATE

this is the full list of models I work with

abs.x <- abs(x)
ipos <- (x>0)

models <- list("model1" = lm(y~1, data = d), 
               "model2" = lm(y~x, data = d), 
               "model3" = lm(y~abs(x), data = d),
               "model4" = lm(y~abs.x + ipos, data = d),
               "model5" = lm(y~abs.x : ipos, data = d),
               "model6" = lm(y~abs.x * ipos, data = d),
               "model7" = mgcv::gam(y ~ abs.x + ipos, data = d)
              )

Solution

  • I made a few adjustments to make it a bit more feasible with ggplot2 (long format etc.). Let me know if you understand. Play with the values 1.5 & 0.5 to get the right thickness of the curve with minimal AIC! Greets

    set.seed(101)
    library(ggplot2)
    xrng <- 
    x <- rnorm(100)
    y <- rweibull(100,1)
    ipos <- (x>0)
    abs.x <- abs(x)
    d <- data.frame(x,y, ipos, abs.x)
    
    
    
    models <- list("model1" = lm(y~1, data = d), 
                   "model2" = lm(y~x, data = d), 
                   "model3" = lm(y~abs.x, data = d),
                   "model4" = lm(y~abs.x + ipos, data = d),
                   "model5" = lm(y~abs.x : ipos, data = d),
                   "model6" = lm(y~abs.x * ipos, data = d),
                   "model7" = mgcv::gam(y ~ abs.x + ipos, data = d)
    )
    newvalsforx <- function(x) {
      xrng <- seq(min(x), max(x), length.out=100)
      function(m) data.frame(x=xrng, y=predict(m, newdata = data.frame(x=xrng, ipos=(xrng>0), abs.x=abs(xrng))))
    }
    pred <- newvalsforx(d$x)
    
    pred_df <- do.call("rbind", lapply(models, pred))
    pred_df <- cbind(rep(names(models), each=length(y)), pred_df)
    names(pred_df) = c("model", "x", "pred")
    which_model_is_min <- names(which.min(unlist(lapply(models, AIC))))
    pred_df[["linewidth"]] = ifelse(pred_df$model == which_model_is_min, 1.5, 0.5)
    
    ggplot(d, aes(x,y)) +
      geom_point() + 
      geom_line(data=pred_df, aes(x = x, y = pred, color=model), size = pred_df$linewidth)