Search code examples
rfor-loopggplot2glmnet

How to get distinct ggplot2 plot from list of glmnet model in R?


I'm trying to make plots of glmnet (lasso) model in R from list of model objects.

My data looks like (original data is large)

head(SLE28sy_w20_dat3)
# A tibble: 6 × 31
  EE86222ln1 EE86223ln1 EE86224ln1 EE86225ln1 blood_vessel_w20 adrenal_gland_w20 bone_element_w20 brain_w20
       <dbl>      <dbl>      <dbl>      <dbl>            <dbl>             <dbl>            <dbl>     <dbl>
1    0.00809    0.00357    0.00146    0.00228                0                 1                0         1
2    0.00437    0.00212    0.00156    0.00197                0                 1                0         1
3    0.00437    0.00303    0.00172    0.00237                1                 0                0         1
4    0.00833    0.00303    0.00126    0.00217                0                 1                0         1
5    0.00833    0.00316    0.00165    0.00217                1                 1                0         1
6    0.00833    0.00236    0.00134    0.00189                1                 0                0         1

I created a list of lasso models by below code,

  j <- SLE28sy_w20_dat3[, c(1:4)]
  l <- data.matrix(SLE28sy_w20_dat3[, -c(1:4)])
  
  for (i in 1:ncol(j)) {
    las_glmnet <- glmnet(l, data.matrix(j[, i]))
    assign(paste0("glmnet_lasso_", names(j)[i]), las_glmnet, envir = .GlobalEnv)
  }

Now, I'm trying to make plots of models from every samples, how can I do this...

What I did so far,

  las_sam<-list(glmnet_lasso_EE86222ln1, glmnet_lasso_EE86223ln1,
       glmnet_lasso_EE86224ln1, glmnet_lasso_EE86225ln1)
  
  for (i in seq_along(las_sam)){
    
    betas = as.matrix(las_sam[[i]]$beta)
    lambdas = las_sam[[i]]$lambda
    names(lambdas) = colnames(betas)

 plot_las<- as.data.frame(betas) %>% 
      tibble::rownames_to_column("variable") %>% 
      pivot_longer(-variable) %>% 
      mutate(lambda=lambdas[name]) %>% 
      ggplot(aes(x=lambda,y=value,col=variable)) + 
      geom_line() + 
      geom_label_repel(data=~subset(.x,lambda==min(lambda)), size = 2.5,
                       aes(label=variable),nudge_x=-0.8) +
      theme(legend.position="none")   +
      scale_x_log10()
 assign(paste0("Plot_lasso_", names(las_sam)[i]), plot_las, envir = .GlobalEnv)
    
  }  

By this I'm only getting the plot of last sample's model which is glmnet_lasso_EE86225ln1. That is, the loop is functional, but it is unable to store the image objects with distinct names. Perhaps the last line of the code is where I'm going wrong. My end goal is to make a separate image in PNG or TIFF format for every sample's model.


Solution

  • What about this "tidy-style" solution?

    • it reshapes your initial data frame so you can group by EE-variant
    • it then nests the data, one subset per EE-variant
    • it runs the models per nest/group and stores the result per row
    • finally it plots the data with base plot functions and multiplot layout (with {ggplot} or {lattice}, faceting by group would streamline the process even better

    • your example data:
    SLE28sy_w20_dat3 <- 
    structure(list(EE86222ln1 = c(0.00809, 0.00437, 0.00437, 0.00833, 
    0.00833, 0.00833), EE86223ln1 = c(0.00357, 0.00212, 0.00303, 
    0.00303, 0.00316, 0.00236), EE86224ln1 = c(0.00146, 0.00156, 
    0.00172, 0.00126, 0.00165, 0.00134), EE86225ln1 = c(0.00228, 
    0.00197, 0.00237, 0.00217, 0.00217, 0.00189), blood_vessel_w20 = c(0L, 
    0L, 1L, 0L, 1L, 1L), adrenal_gland_w20 = c(1L, 1L, 0L, 1L, 1L, 
    0L), bone_element_w20 = c(0L, 0L, 0L, 0L, 0L, 0L), brain_w20 = c(1L, 
    1L, 1L, 1L, 1L, 1L)), class = "data.frame", row.names = c(NA, 
    6L))
    
    • reshape data
        library(glmnet)
        library(tidyr)
        library(dplyr)
    
        data_long <- 
          SLE28sy_w20_dat3 |>
          pivot_longer(cols = starts_with('EE'), 
                       names_to = 'EE_variant',
                       values_to = 'Y',
                       )
        
        ## > data_long
        ## # A tibble: 24 x 6
        ##    blood_vessel_w20 adrenal_gland_w20 bone_element_w20 brain_w20 EE_variant
        ##               <int>             <int>            <int>     <int> <chr>     
        ##  1                0                 1                0         1 EE86222ln1
        ##  2                0                 1                0         1 EE86223ln1
        ##  3                0                 1                0         1 EE86224ln1
        ## ...
    
    • nest data per EE-variant:
        data_nested <- 
          data_long |>
          group_by(EE_variant) |>
          nest(data = -EE_variant)
        
        + # A tibble: 4 x 2
        # Groups:   EE_variant [4]
          EE_variant data            
          <chr>      <list>          
        1 EE86222ln1 <tibble [6 x 5]>
        2 EE86223ln1 <tibble [6 x 5]>
        3 EE86224ln1 <tibble [6 x 5]>
        4 EE86225ln1 <tibble [6 x 5]>
    
    • compute models per nest:
        the_models <- 
          data_nested |>
          rowwise() |>
          ## save the model in list-column
          summarise(the_model = list(glmnet(x = data |> select(-Y) |> data.matrix(),
                                            y = data |> select(Y) |> data.matrix()
                                            )
                                     )
                    )
        ## > the_models
        ## # A tibble: 4 x 2
        ## # Groups:   EE_variant [4]
        ##   EE_variant the_model
        ##   <chr>      <list>   
        ## 1 EE86222ln1 <elnet>  
        ## 2 EE86223ln1 <elnet>  
        ## 3 EE86224ln1 <elnet>  
        ## 4 EE86225ln1 <elnet> 
    
    • set graphic parameters to 2x2 layout and plot:
    par(mfrow = c(2, 2))
    
    the_models |> mutate(plot(the_model[[1]], sub = EE_variant))
    

    four models plottet in one go


    a ggplot variant:

    
        the_plot_data <- 
          the_models |>
          rowwise() |>
          transmute(plot_data = cbind(lambda = the_model$lambda,
                                      t(as.matrix(the_model$beta))
                                      ) |>
                      as.data.frame() |>
                      pivot_longer(-lambda, names_to = 'variable') |>
                      list()
                    ) |>
          reframe(plot_data)
    
    
        ## + # A tibble: 1,000 x 4
        ##    EE_variant   lambda variable              value
        ##    <chr>         <dbl> <chr>                 <dbl>
        ##  1 EE86222ln1 0.000438 blood_vessel_w20  0        
        ##  2 EE86222ln1 0.000438 adrenal_gland_w20 0        
        ##  3 EE86222ln1 0.000438 bone_element_w20  0        
        ##  4 EE86222ln1 0.000438 brain_w20         0        
        ##  5 EE86222ln1 0.000399 blood_vessel_w20  0        
        ##  6 EE86222ln1 0.000399 adrenal_gland_w20 0.0000826
    
    the_plot_data |>
      ggplot(aes(lambda, value)) +
      geom_line(aes(col = variable)) +
      facet_wrap(~ EE_variant)
    

    facetted glmnet results

    • to save one ggplot per variant, you can split the data into a list of subsets (one per variant) and Map like so:
    
        the_plot_data <- 
          the_plot_data |> split(the_plot_data$EE_variant)
        
        names(the_plot_data) |>
          Map(f = \(variant){ 
            the_plot_data[[variant]] |>
              ggplot(aes(lambda, value)) +
              geom_line(aes(col = variable))
            ggsave(filename = sprintf('plot-of-%s.png', variant))
          }
          )