Search code examples
rggplot2non-linear-regression

How do I reproduce a plot in ggplot based on nonlinear regressions from the R package Growthrates?


I am working with the Growthrates package to generate parameter estimates of growth rate curves for my data. I have made the regressions and looked at the produced plots and I am happy with the data, but I would like to reproduce the following plots in ggplot2.

Figure 1: Multiplot of a regression for each group:treatment combo

I would like a multiplot of the regression lines for each group:Treatment combination, but with all the regressions I performed on it in ((i.e. logistic, gompertz, gompertz2, etc..). So far I have:

library(growthrates)

####Using logistic regression to fit the data across mutliple groups
p     <- c(y0 = 1, mumax = 0.5, K = 200)
lower <- c(y0 = 0, mumax = 0,   K = 20)
upper <- c(y0 = 100, mumax = 5,   K = 400)

many_logistics <- all_growthmodels(y_data ~ 
                                     grow_logistic(total_time_days, parms) | sample + treatment,
                                   data = Alldata, 
                                   p = p,
                                   lower = lower, 
                                   upper = upper, 
                                   log = "y")
pp   <- coef(many_logistics)

par(mfrow = c(5, 3))
par(mar = c(2.5, 4, 2, 1))
plot(many_logistics)

many_logistics_results <- results(many_logistics)
xyplot(mumax ~ treatment | sample, data = many_logistics_results, layout = c(3, 1))
xyplot(r2 ~ treatment | sample, data = many_logistics_results, layout = c(3, 1))
xyplot(K ~ treatment | sample, data = many_logistics_results, layout = c(3, 1))

curve_logistics <- predict(many_logistics) #Prediction for given data (data for curve)
est_logistics <- predict(many_logistics, newdata=data.frame(time=seq(0, 1, 0.1))) #Extrapolation/Interpolation from curve


####Using Gompertz regression to fit the data across mutliple groups
p     <- c(y0 = 1, mumax = 0.5, K = 200)
lower <- c(y0 = 0, mumax = 0,   K = 20)
upper <- c(y0 = 100, mumax = 5,   K = 400)

many_gompertz <- all_growthmodels(y_datay_data ~ 
                                    grow_gompertz(total_time_days, parms) | sample + treatment,
                                   data = Alldata, 
                                   p = p,
                                   lower = lower, 
                                   upper = upper)
pp   <- coef(many_gompertz)

par(mfrow = c(5, 3))
par(mar = c(2.5, 4, 2, 1))
plot(many_gompertz)

many_gompertz_results <- results(many_gompertz)
xyplot(mumax ~ treatment | sample, data = many_gompertz_results, layout = c(3, 1))
xyplot(r2 ~ treatment | sample, data = many_gompertz_results, layout = c(3, 1))
xyplot(K ~ treatment | sample, data = many_gompertz_results, layout = c(3, 1))

curve_gompertz <- predict(many_gompertz) #Prediction for given data (data for curve)
est_gompertz <- predict(many_gompertz, newdata=data.frame(time=seq(0, 1, 0.1))) #Extrapolation/Interpolation from curve

#Prepare the data frames

curve_logistics2 <- curve_logistics %>% 
  map_df(as_tibble, .id = "src") %>%
  separate(src, c("sample", "treatment"), ":") %>%
  mutate(regression = "logistic")

curve_gompertz2 <- curve_gompertz %>% 
  map_df(as_tibble, .id = "src") %>%
  separate(src, c("sample", "treatment"), ":") %>%
  mutate(regression = "gompertz")

alldata2<- Alldata %>%
  select("sample", "treatment","total_time_days", "y_data") %>%
  rename(time = "total_time_days") %>%
  rename(y = "y_data") %>%
  mutate(regression = "none") 

comp_reg <- bind_rows(curve_logistics2, curve_gompertz2, alldata2)

#define the function to automatically generate plots#define the function to automatically generate plots

REGRESSION_LINE_PLOT <-function(x) {ggplot(data = x, aes(x=time, y=y, colour = regression, linetype = regression)) + 
    geom_point(size = 2.5, data = subset(x, regression %in% c("none"))) +
    stat_smooth(data = subset(x, regression %in% c("gompertz", "logistic"))) +
    theme_bw() + 
    theme(panel.grid.major=element_blank(),
          panel.grid.minor=element_blank(),
          panel.spacing = unit(0,"cm"),
          axis.line=element_line(colour="black"),
          # axis.title.x = element_text(size=14, colour = "black"),
          axis.title.x = element_blank(),
          # axis.title.y = element_text(size=14, colour = "black"),
          axis.title.y = element_blank(),
          # axis.text.y = element_text(size=14, colour = "black"),
          # axis.text.x = element_text(size=14, colour = "black"),
          strip.background = element_blank(),
          strip.text = element_text(size = 12, colour="black", face = "bold"),
          legend.text= element_text(size = 12, colour = "black"),
          legend.title=element_blank(), 
          text = element_text(size=12,  family="Arial")) +
    # plot.margin=unit(c(0.1,0.1,0.1,0.1),"cm")) +
    #scale_colour_manual(values = cbbPalette) + ### here I tell R to use my custom colour palette
    #scale_x_continuous(limits = c(-1,14)) + # set time range from -1 to 70 since we started sampling on day -1
    #scale_y_continuous(limits = c(-1,350), breaks = seq(0, 360, 90)) + # For comparison purposes, i want all my panels to have the same y axis scale
    ylab("") + 
    xlab("")
}

comp_reg_nested<- comp_reg %>%
  group_by(sample, treatment) %>%
  nest() %>%
  mutate(plots=map(.x=data, ~REGRESSION_LINE_PLOT(.x)))

fo_ad_line <- comp_reg_nested[[1,"plots"]]

However, I do not think the regression lines are properly represented in ggplot22. Is there a better way to do this?


Solution

  • I created a data example more or less similar to your data structure from the builtin data of the package and simplified the code a little bit, omitting the default plot functions. I very much enjoyed your data frame construction method with map_df, thank you. Then I added a simple ggplot, that can of course be extended and adapted to your needs.

    library(growthrates)
    library(dplyr)
    library(purrr)
    library(tidyr)
    library(ggplot2)
    
    ## use subset of built-in example data of the package
    ## and adapt it to the existing part of the script
    data(bactgrowth)
    Alldata <- bactgrowth[(bactgrowth$conc < 1) & bactgrowth$replicate == 1, ]
    names(Alldata)    <- c("sample", "replicate", "treatment", "total_time_days", "y_data")
    Alldata$y_data    <- Alldata$y_data * 1000
    Alldata$treatment <- as.character(Alldata$treatment)
    
    ####Using logistic regression to fit the data across mutliple groups
    p     <- c(y0 = 1, mumax = 0.5, K = 200)
    lower <- c(y0 = 0, mumax = 0,   K = 20)
    upper <- c(y0 = 100, mumax = 5,   K = 400)
    
    many_logistics <- all_growthmodels(y_data ~
      grow_logistic(total_time_days, parms) | sample + treatment,
      data = Alldata,
      p = p,
      lower = lower,
      upper = upper)
    
    many_logistics_results <- results(many_logistics)
    curve_logistics <- predict(many_logistics)
    
    ####Using Gompertz regression to fit the data across mutliple groups
    many_gompertz <- all_growthmodels(y_data ~
      grow_gompertz(total_time_days, parms) | sample + treatment,
      data = Alldata,
      p = p,
      lower = lower,
      upper = upper)
    
    many_gompertz_results <- results(many_gompertz)
    curve_gompertz <- predict(many_gompertz)
    
    #Prepare the data frames
    curve_logistics2 <- curve_logistics %>%
      map_df(as_tibble, .id = "src") %>%
      separate(src, c("sample", "treatment"), ":") %>%
      mutate(regression = "logistic")
    
    curve_gompertz2 <- curve_gompertz %>%
      map_df(as_tibble, .id = "src") %>%
      separate(src, c("sample", "treatment"), ":") %>%
      mutate(regression = "gompertz")
    
    alldata2<- Alldata %>%
      rename(time = "total_time_days", y = "y_data")
    
    ## combine the two curves to a joint data frame
    comp_reg <- bind_rows(curve_logistics2, curve_gompertz2)
    
    ## plot it
    ggplot(comp_reg, aes(time, y)) +
      geom_point(data = alldata2) +
      geom_line(aes(color = regression)) +
      facet_grid(treatment ~ sample)