Search code examples
rfor-loopggplot2scatter-plot

How do I save a plot in every round of a foor loop in R?


I need to create a scatter-plot to see the correlation between the reads in the uORF (untranslated ORF) and the reads in the mORF (main ORF) of 90K transcripts in all 44 tissues.

I have all these values in two dataframes: uORF_df and mORF_df. Both have the transcript IDs as rownames and the tissue names as colnames. Inside each cell there are counts corresponding to that transcript in that tissues in either the uORF or the mORF.

I would like to save each plot that each generated and to title the plot with the name of the corresponding transcript.

The first thing I do is transpose both dataframes, so that the tissue names become the rownames and the transcript IDs become the colnames.

# Transpose the uORF and mORF dataframes into large matrices
uORF_df_T <- t(uORF_df)
mORF_df_T <- t(mORF_df)

# Create a character vector for the for lop
 samples = c("OneLeaf1", "OneLeaf2", "OneRoot1", "OneRoot2", "ThreeLeaf1", "ThreeLeaf2", "Ear1", "Ear2", "Ear5mm1", "Ear5mm2", "Embryo15_1", "Embryo15_2", "Embryo20_1", "Embryo20_2", "Endosperm15_1", "Endosperm15_2", "Endosperm20_1", "Endosperm20_2", "Inflorescence1", "Inflorescence2", "Kernel1", "Kernel2", "Leaf1", "Leaf2", "Root1", "Root2", "Root6DAS1", "Root6DAS2", "SAM_V1_1", "SAM_V1_2", "SAM_V3_1", "SAM_V3_2", "Seedling1", "Seedling2", "Seedling6DAS1", "Seedling6DAS2", "Stem1", "Stem2", "Tassel1", "Tassel2", "Tassel1cm1", "Tassel1cm2", "TopLeaf1", "TopLeaf2")

uORF_names <- colnames(uORF_TPM_log2_T)

# Plotting the reads in uORF and mORF for each transcript
 for (i in 1:nrow(mORF_df)){

  # Create a dataframe from the reads in the uORF and in the mORF
  df = as.data.frame(cbind(uORF_df_T[,i],mORF_df_T[,i]))

  # Rename the rows (I would like to eventually ID each point in the plot with the corresponding tissue name)
  rownames(df) <- samples

  # Basic scatter plot
  jpeg(filename = "/home/stef/Downloads/plot_"i".jpeg")
  plot_i <- ggplot(df, aes(x=V1, y=V2)) + geom_point() + geom_smooth(method=lm)
  dev.off()
}

Here are the first 10 rows of both of the dataframes. My dataframes have more than 99,000 rows.

  > dput(mORF_TPM_10)
  structure(list(TPM_mORF_1Leaf1 = c(0, 0, 15.9577310906664, 15.9775954862149,2.78678991038632, 2.78678991038632, 2.78678991038632, 2.78678991038632,14.0225801608805, 14.0225801608805), TPM_mORF_1Leaf2 = c(0, 0,10.9721879068452, 10.9858462320404, 5.23135534859068, 5.23135534859068, 5.23135534859068, 5.23135534859068, 8.81519572796751, 8.81519572796751), TPM_mORF_1Root1 = c(0, 0, 15.2545295583641, 15.2735185993082, 0, 0, 0, 0, 10.0842351029196, 10.0842351029196), TPM_mORF_1Root2 = c(0.343544763388441, 0.365321371340932, 14.9610683447621, 14.9796920812908, 0, 0, 0, 0, 13.8771566804707, 13.8771566804707), TPM_mORF_3Leaf1 = c(0, 0, 13.7407553130073, 13.7578599876708, 0, 0, 0, 0, 7.11530150881452, 7.11530150881452), TPM_mORF_3Leaf2 = c(0, 0, 15.0898218275324, 15.1086058381061, 0.662658150794125, 0.662658150794125, 0.662658150794125, 0.662658150794125, 13.4925500827389, 13.4925500827389), TPM_mORF_Ear1 = c(0.0760966486956441, 0.0809202613998489, 8.8167202339968, 8.82769540441256, 0.409637837791766, 0.409637837791766, 0.409637837791766, 0.409637837791766, 3.63109781853219, 3.63109781853219), TPM_mORF_Ear2 = c(0.0989407598144533, 0.105212414534932, 11.4182721224643, 11.4324857392142, 0, 0, 0, 0, 5.34284426702046, 5.34284426702046), TPM_mORF_Ear5mm_1 = c(0.104551293297911, 0.111178588392205, 32.2090774974947, 32.2491717848359, 0, 0, 0, 0, 10.1135671840607, 10.1135671840607), TPM_mORF_Ear5mm_2 = c(0.685769475329068, 0.72923901584173, 24.7489457541728, 24.7797535704643, 0, 0, 0, 0, 9.47665487265555, 9.47665487265555), TPM_mORF_Embryo15_1 = c(0.438970125615888, 0.466795554343452, 27.3602274978775, 27.3942858723561, 0, 0, 0, 0, 2.68865205489207, 2.68865205489207), TPM_mORF_Embryo15_2 = c(0.183828153543562, 0.19548064852228, 24.0092443355057, 24.0391313616495, 0, 0, 0, 0, 5.60173788500298, 5.60173788500298), TPM_mORF_Embryo20_1 = c(0.0492631332412111,0.0523858236543152, 25.8635736817635, 25.8957690016993, 0, 0, 0, 0, 5.09702999415875, 5.09702999415875), TPM_mORF_Embryo20_2 = c(0, 0, 15.4492008332927, 15.4684322036246, 0, 0, 0, 0, 3.37328874561412, 3.37328874561412), TPM_mORF_Endosperm15_1 = c(0.284960191689523, 0.303023242091701, 16.5820101656358, 16.6026516720661, 0, 0, 0, 0, 4.93713195264232, 4.93713195264232), TPM_mORF_Endosperm15_2 = c(0.414532526807716, 0.440808905556183, 15.7123804255646, 15.7319394053474, 0, 0, 0, 0, 4.13826302736584, 4.13826302736584), TPM_mORF_Endosperm20_1 = c(0.755067264729411, 0.802929452001876, 14.309979727866, 14.3277929806393, 0, 0, 0, 0, 5.49684769183169, 5.49684769183169), TPM_mORF_Endosperm20_2 = c(0.415271301202561, 0.441594509366221, 16.0334218643644, 16.0533804807931, 0, 0, 0, 0, 4.37755081503468, 4.37755081503468), TPM_mORF_Inflorescence1 = c(0.962658543958137, 1.02367952270084, 30.8346128127823, 30.8729961482339, 0, 0, 0, 0, 14.148327878666, 14.148327878666), TPM_mORF_Inflorescence2 = c(0.253064810686685, 0.172996764026799, 26.5095614564813, 26.5425609105765, 0.0729792711316374, 0.0729792711316374, 0.0729792711316374, 0.0729792711316374, 12.489660767776, 12.489660767776), TPM_mORF_Kernel1 = c(0, 0, 22.0153898488796, 22.042794898484, 0.386564992374038, 0.386564992374038, 0.386564992374038, 0.386564992374038, 11.3239110681831, 11.3239110681831), TPM_mORF_Kernel2 = c(0.288839940994747, 0.307148920861081, 21.4012979695502, 21.4279385894293, 0.534483728988032, 0.534483728988032, 0.534483728988032, 0.534483728988032, 13.8337256590664, 13.8337256590664), TPM_mORF_Leaf1 = c(0, 0, 5.60652347398228, 5.61350254884616, 0, 0, 0, 0, 0, 0), TPM_mORF_Leaf2 = c(0, 0, 3.61049607609657, 3.61499046955229, 1.37526811341663, 1.37526811341663, 1.37526811341663, 1.37526811341663, 1.93118494003443, 1.93118494003443), TPM_mORF_Root1 = c(0.075966119209846, 0.0807814579138581, 12.9573391538621, 12.9734686216885, 0.766753464638798, 0.766753464638798, 0.766753464638798, 0.766753464638798, 9.69024480583571, 9.69024480583571), TPM_mORF_Root2 = c(0, 0, 11.9844233034174, 11.9993416726748, 0.485803306586024, 0.485803306586024, 0.485803306586024, 0.485803306586024, 13.2171759049679, 13.2171759049679), TPM_mORF_Root6DAS_1 = c(0, 0, 3.68099341662474, 3.68557556610601, 0, 0, 0, 0, 8.54861027025474, 8.54861027025474), TPM_mORF_Root6DAS_2 = c(0.0859279434500381, 0.0913747420512427, 20.9712701215337, 20.9973754370377, 0.607111227010591, 0.607111227010591, 0.607111227010591, 0.607111227010591, 8.76878078126128, 8.76878078126128), TPM_mORF_SAM_V1_1 = c(0.111113029385815, 0.118156259663823, 21.670386860971, 21.6973624462751, 0.822436205751297, 0.822436205751297, 0.822436205751297, 0.822436205751297, 10.5908197277331, 10.5908197277331), TPM_mORF_SAM_V1_2 = c(0.337618989220899, 0.239346649553322, 23.8243155365058, 23.8539723608251, 0, 0, 0, 0, 11.2186115309571, 11.2186115309571), TPM_mORF_SAM_V3_1 = c(0.19244911952294, 0.204648080104722, 24.483348273863, 24.5138254708844, 0, 0, 0, 0, 11.5243102210465, 11.5243102210465), TPM_mORF_SAM_V3_2 = c(0.885797519484353, 0.941946433287732, 21.1824657296363, 21.2088339442375, 0.621959939713711, 0.621959939713711, 0.621959939713711, 0.621959939713711, 8.67912706842305, 8.67912706842305), TPM_mORF_Seedling1 = c(0.0577068647184253, 0.0613647861978446, 6.86443842289814, 6.87298336699304, 1.28140367859192, 1.28140367859192, 1.28140367859192, 1.28140367859192, 2.8626471638543, 2.8626471638543), TPM_mORF_Seedling2 = c(1.0274385206843, 1.09256577117576, 4.55726253082638, 4.56293547173612, 0, 0, 0, 0, 3.34931488046525, 3.34931488046525), TPM_mORF_Seedling6DAS1 = c(0.170047092744869, 0.180826034142905, 10.3881296800559, 10.4010609618153, 0, 0, 0, 0, 6.62787339274703, 6.62787339274703), TPM_mORF_Seedling6DAS2 = c(0.200225443771896, 0.212917329825197, 14.4519953855864, 14.469985421336, 0, 0, 0, 0, 8.51359170240591, 8.51359170240591), TPM_mORF_Stem1 = c(0.07452871786933, 0.0792529426085881, 12.3335051788672, 12.3488580898782, 1.80538860963132, 1.80538860963132, 1.80538860963132, 1.80538860963132, 6.12666229313586, 6.12666229313586), TPM_mORF_Stem2 = c(0, 0, 10.2054007660267, 10.2181045844076, 0.532192947104563, 0.532192947104563, 0.532192947104563, 0.532192947104563, 4.4305298183051, 4.4305298183051), TPM_mORF_Tassel1 = c(0.424885586034637, 0.395340946240699, 14.6485160508083, 14.6667507180915, 0.804098175333964, 0.804098175333964, 0.804098175333964, 0.804098175333964, 3.76378072843866, 3.76378072843866), TPM_mORF_Tassel2 = c(0.149978447560262, 0.159485278115995, 14.673033890032, 14.691299077447, 1.09749529429666, 1.09749529429666, 1.09749529429666, 1.09749529429666, 5.553380764842, 5.553380764842), TPM_mORF_Tassel1cm1 = c(0.291650426819245, 0.310137557699591, 32.7720159395946, 32.8128109801833, 0, 0, 0, 0, 13.0210338773437, 13.0210338773437), TPM_mORF_Tassel1cm2 = c(0.150791469374745, 0.160349835739483, 25.112060164431, 25.1433199903619, 0, 0, 0, 0, 12.2890105358246, 12.2890105358246), TPM_mORF_TopLeaf1 = c(0.185266040760669, 0.197009680502875, 8.35765398911196, 8.3680577077706, 0.934978514834704, 0.934978514834704, 0.934978514834704, 0.934978514834704, 7.25388065238328, 7.25388065238328), TPM_mORF_TopLeaf2 = c(0, 0, 8.10490776009202, 8.11499685688881, 1.40080302886771, 1.40080302886771, 1.40080302886771, 1.40080302886771, 6.07069762787997, 6.07069762787997)), row.names = c("Zm00001eb000210_T001_1", "Zm00001eb000210_T002_9", "Zm00001eb000340_T001_1", "Zm00001eb000340_T002_1", "Zm00001eb000470_T001_1", "Zm00001eb000470_T001_2", "Zm00001eb000470_T001_3", "Zm00001eb000470_T001_4", "Zm00001eb000590_T001_1", "Zm00001eb000590_T001_2"), class = "data.frame")
  > dput(uORF_TPM_10)
  structure(list(TPM_uORF_1Leaf1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_1Leaf2 = c(0, 0, 0, 0, 83.4955169709039, 0, 147.722837717753, 125.243275456356, 0, 0), TPM_uORF_1Root1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_1Root2 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_3Leaf1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_3Leaf2 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_Ear1 = c(0, 0, 0, 0, 0, 0, 0, 41.8068094051703, 0, 0), TPM_uORF_Ear2 = c(0, 0, 0, 0, 28.7464807043411, 0, 50.8591581692189, 45.9943691269458, 0, 0), TPM_uORF_Ear5mm_1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_Ear5mm_2 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_Embryo15_1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_Embryo15_2 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_Embryo20_1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_Embryo20_2 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_Endosperm15_1 = c(11.4347855154007, 11.4347855154007, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_Endosperm15_2 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_Endosperm20_1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_Endosperm20_2 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_Inflorescence1 = c(0, 0, 0, 0, 11.2406460145824, 0, 19.8872967950304, 0, 0, 0), TPM_uORF_Inflorescence2 = c(33.3338244380922, 33.3338244380922, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_Kernel1 = c(0, 0, 0, 0, 21.4246067462734, 0, 54.1501049631087, 3.06065810661049, 0, 0), TPM_uORF_Kernel2 = c(0, 0, 0, 0, 3.79467228769529, 0, 6.71365097053781, 3.79467228769529, 0, 0), TPM_uORF_Leaf1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_Leaf2 = c(361.143298701066, 361.143298701066, 0, 0, 15.7018825522203, 0, 27.7802537462358, 0, 0, 0), TPM_uORF_Root1 = c(0, 0, 0, 0, 18.5389127807478, 0, 32.7996149197846, 0, 0, 0), TPM_uORF_Root2 = c(0, 0, 0, 0, 0, 0, 0, 144.251178847067, 0, 0), TPM_uORF_Root6DAS_1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_Root6DAS_2 = c(0, 0, 0, 0, 27.5534687981229, 0, 64.9979263955721, 36.7379583974973, 0, 0), TPM_uORF_SAM_V1_1 = c(0, 0, 0, 0, 2.40281615863606, 0, 4.25113628066379, 4.80563231727211, 0, 0), TPM_uORF_SAM_V1_2 = c(0, 0, 0, 0, 0, 0, 0, 11.8552123025014, 0, 0), TPM_uORF_SAM_V3_1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_SAM_V3_2 = c(73.9301306725009, 73.9301306725009, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_Seedling1 = c(0, 0, 0, 0, 0, 0, 0, 190.042200283117, 0, 0), TPM_uORF_Seedling2 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_Seedling6DAS1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_Seedling6DAS2 = c(0, 0, 0, 0, 18.6624163966679, 0, 33.0181213171818, 24.8832218622239, 0, 0), TPM_uORF_Stem1 = c(0, 0, 0, 0, 0, 0, 14.1747578900174, 52.0768279002814, 0, 0), TPM_uORF_Stem2 = c(0, 0, 0, 0, 101.319692897418, 0, 179.257918203124, 55.2652870349554, 0, 0), TPM_uORF_Tassel1 = c(0, 0, 0, 0, 0, 0, 15.7804228950156, 8.91936946240015, 0, 0), TPM_uORF_Tassel2 = c(0, 0, 0, 0, 18.1722085926888, 0, 85.7355482321728, 24.2296114569184, 0, 0), TPM_uORF_Tassel1cm1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_Tassel1cm2 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TPM_uORF_TopLeaf1 = c(0, 0, 0, 0, 121.429685770491, 25.7804563635811, 214.837136363176,     14.5715622924589, 0, 0), TPM_uORF_TopLeaf2 = c(0, 0, 0, 0,     23.4813161069057, 0, 49.852640350046, 4.69626322138114, 0,     0)), row.names = c("Zm00001eb000210_T001_1", "Zm00001eb000210_T002_9", "Zm00001eb000340_T001_1", "Zm00001eb000340_T002_1", "Zm00001eb000470_T001_1", "Zm00001eb000470_T001_2", "Zm00001eb000470_T001_3", "Zm00001eb000470_T001_4", "Zm00001eb000590_T001_1", "Zm00001eb000590_T001_2"), class = "data.frame")

Solution

  • Here is a general strategy that works well:

    1. Create a list of the ggplots.
    2. Iterate over that list and save each plot.
    library(tidyverse)
    
    plots <- 
      mtcars |> 
      group_by(cyl) |> 
      group_split() |> 
      map(\(x) ggplot(x) +
            geom_point(aes(disp, hp)))
    
    map2(plots,
         paste("plot", 1:length(plots)),
         \(x, y) ggsave(paste0(y, ".png"), plot = x)) 
    

    Here is what that can look like with your data:

    library(tidyverse)
    
    # `pivot_longer()` here can be used to achieve the same as you do with `t()`. 
    # We add an id that equals the row number so we can join on that id in the next
    # step.
    morf_long <- 
      mORF_TPM |> 
      rename_with(\(x) str_remove(x, "TPM_mORF_")) |>
      mutate(id = row_number()) |> 
      pivot_longer(-id)
    
    uorf_long <- 
      uORF_TPM |> 
      rename_with(\(x) str_remove(x, "TPM_uORF_")) |> 
      mutate(id = row_number()) |> 
      pivot_longer(-id)
    
    # Join `morf` and `uorf`.
    big_df <- 
      full_join(morf_long,
              uorf_long,
              by = c("id", "name"))
    
    # Now we split `df` by `id` and we get a list where each element is 
    # a `data.frame` for we want to turn into one plot.
    big_list <- 
      split(big_df, big_df$id)
    
    # Now we can apply the steps described above.
    plots <-
      big_list |>
      map(\(x) ggplot(x, aes(x = value.x, y = value.y)) + 
            geom_point() + 
            geom_smooth(method = lm))
    
    map2(plots,
         paste("plot", 1:length(plots)),
         \(x, y) ggsave(paste0(y, ".png"), plot = x))