Search code examples
rggplot2dplyrtibblefacet-wrap

How to arrange first three columns of the dataframe in one column with fixed rows while making plot in R?


I need to create a density plot and QQ plot for the dataset contains triplicate values of columns. So the plot should contain 3 rows and the first three columns in the dataframe need to be in the first column of the plot (which represents the replicate values).

The sample data is

structure(list(ID = c("P19882", "P38999", "P34227", "Q12335", 
"P40893", "P33416", "Q04947", "Q12074", "P38116", "P32598", "P38221", 
"P52910", "P0CX51;P0CX52", "Q02805", "P50861", "Q02821", "P53691", 
"P19414", "Q04013", "Q07799", "P47039"), `1_3ng` = c(964860000, 
46301000, 135090000, 126280000, 96191000, 91552000, 75231000, 
76684000, 6150500, 21904000, 55134000, 231070000, 259060000, 
7304500, 180320000, 22025000, 79510000, 677560000, 5994400, 18950000, 
3468200), `2_3ng` = c(1074500000, 42888000, 125590000, 1.27e+08, 
68911000, 93280000, 66457000, 69568000, 4848600, 20342000, 52573000, 
242230000, 286970000, 5770200, 190140000, 22305000, 87598000, 
673050000, 6459300, 21634000, 3115400), `3_3ng` = c(1021200000, 
56550000, 117190000, 124730000, 69026000, 80712000, 55917000, 
71989000, 5390500, 19299000, 51348000, 213050000, 254080000, 
6976000, 181280000, 20951000, 83504000, 664590000, 6155300, 21158000, 
2940600), `4_7-5ng` = c(846340000, 34713000, 95928000, 121440000, 
16256000, 63182000, 46570000, 59008000, 1465600, 24838000, 48464000, 
176140000, 175320000, 8738500, 120870000, 24496000, 75694000, 
524780000, 4373900, 16281000, 5801700), `5_7-5ng` = c(921740000, 
35240000, 109990000, 111250000, 39047000, 74125000, 51027000, 
65971000, 3027100, 36529000, 39712000, 179630000, 218240000, 
8092500, 152610000, 26032000, 62681000, 587100000, 4887000, 18454000, 
3811100), `6_7-5ng` = c(908710000, 38475000, 106400000, 113590000, 
36745000, 74175000, 48912000, 64201000, 3073300, 39069000, 40650000, 
206350000, 220280000, 8673100, 154260000, 30254000, 61422000, 
617900000, 5561000, 17102000, 5486600), `7_10ng` = c(964860000, 
46301000, 135090000, 126280000, 96191000, 91552000, 75231000, 
76684000, 6150500, 21904000, 55134000, 231070000, 259060000, 
7304500, 180320000, 22025000, 79510000, 677560000, 5994400, 18950000, 
3468200), `8_10ng` = c(1074500000, 42888000, 125590000, 1.27e+08, 
68911000, 93280000, 66457000, 69568000, 4848600, 20342000, 52573000, 
242230000, 286970000, 5770200, 190140000, 22305000, 87598000, 
673050000, 6459300, 21634000, 3115400), `9_10ng` = c(1021200000, 
56550000, 117190000, 124730000, 69026000, 80712000, 55917000, 
71989000, 5390500, 19299000, 51348000, 213050000, 254080000, 
6976000, 181280000, 20951000, 83504000, 664590000, 6155300, 21158000, 
2940600), `10_15ng` = c(846340000, 34713000, 95928000, 121440000, 
16256000, 63182000, 46570000, 59008000, 1465600, 24838000, 48464000, 
176140000, 175320000, 8738500, 120870000, 24496000, 75694000, 
524780000, 4373900, 16281000, 5801700), `11_15ng` = c(921740000, 
35240000, 109990000, 111250000, 39047000, 74125000, 51027000, 
65971000, 3027100, 36529000, 39712000, 179630000, 218240000, 
8092500, 152610000, 26032000, 62681000, 587100000, 4887000, 18454000, 
3811100), `12_15ng` = c(908710000, 38475000, 106400000, 113590000, 
36745000, 74175000, 48912000, 64201000, 3073300, 39069000, 40650000, 
206350000, 220280000, 8673100, 154260000, 30254000, 61422000, 
617900000, 5561000, 17102000, 5486600)), row.names = 500:520, class = "data.frame")

I have used the below function for generating the density plot,

Densityplot_data <- function (data){
  variable <- value <- NULL
  
  new_data <- as.data.frame(data)
  new_data <- new_data[,-1]
  options(warn = -1)
  
  density_plot <- new_data %>%
    tibble::as.tibble() %>%
    tidyr::gather(variable, value) %>%
    dplyr::mutate(order_hlp = as.numeric(gsub("[A-Za-z]","",variable))) %>% 
    dplyr::arrange(order_hlp) %>% 
    dplyr::mutate(variable_fac = factor(variable, levels = unique(variable))) %>% 
    ggplot2::ggplot(ggplot2::aes(x=value) ) +
    ggplot2::geom_density (fill= "#69b3a2") +
    ggplot2::facet_wrap(~variable_fac, scales="free", nrow = 3)+
    ggplot2::theme_gray()+
    ggplot2::theme(text = ggplot2::element_text(size = 20))
  return(plotly::ggplotly(density_plot))
}

This function returned the below plot,

Densityplot_data(data)

enter image description here

Like this I have used the below function for generating the QQ plot,

QQplot_data <- function(data){
  Observed = stats::rnorm(nrow(data))
  new_dat <- cbind(Observed,data[,-1])
  colnames(data) <- c("ID", "1_3ng",    "2_3ng",    "3_3ng",    "4_7-5ng",  "5_7-5ng",
                         "6_7-5ng", "7_10ng",   "8_10ng",   "9_10ng",   "10_15ng",  "11_15ng",  "12_15ng")
  plot<-new_dat %>%
    tidyr::gather(variable, value, -Observed) %>%
    dplyr::mutate(order_hlp = as.numeric(gsub("[A-Za-z]", "",variable))) %>% 
    
    dplyr::mutate(variable_fac = factor(variable, levels = unique(variable)))%>%  
    dplyr::arrange(order_hlp)  

    ggplot2::ggplot(ggplot2::aes(sample = value, color = variable))+
    ggplot2::stat_qq_line(col = "red",
                          lwd = 0.5)+
    ggplot2::theme(text = ggplot2::element_text(size=16))+
    ggplot2::stat_qq()+
    ggplot2::facet_wrap(~variable)+
    ggplot2::ylab('Observed values')+
    ggplot2::xlab('Expected under normality')
  return(plotly::ggplotly(plot))
}
QQplot_data(data)

And I got the plot like below, enter image description here

In both of the above plots, the column order is not representing the first three columns in the same one column. Suppose the dataset contains the 12 columns I need the following order of plots,

1 4 7 10
2 5 8 11
3 6 9 12

How to change the above-mentioned codes to get this result.


Solution

  • Well, the approach you used was working for the data in one of your previous questions. With different column names you have to adjust the approach. In the code below I first use the formula

    (as.numeric(factor(variable, names(data)[-1])) - 1) %% 3
    

    to split the columns into groups based on the column position and hence does not rely on the column names. Then I add the row order using group_by and row_number. Afterwards arrange by both columns and set the order of the factor levels

    library(tidyverse)
    library(plotly)
    
    QQplot_data <- function(data) {
      Observed <- stats::rnorm(nrow(data))
      new_dat <- cbind(Observed, data[, -1])
      colnames(data) <- c(
        "ID", "1_3ng", "2_3ng", "3_3ng", "4_7-5ng", "5_7-5ng",
        "6_7-5ng", "7_10ng", "8_10ng", "9_10ng", "10_15ng", "11_15ng", "12_15ng"
      )
      
      dat_plot <- new_dat %>%
        tidyr::gather(variable, value, -Observed) %>%
        dplyr::mutate(
          position = as.numeric(factor(variable, names(data)[-1])),
          order_col = (position - 1) %% 3
        ) %>%
        dplyr::group_by(order_col, position) |> 
        dplyr::mutate(order_row = dplyr::row_number()) |> 
        dplyr::ungroup() |> 
        dplyr::arrange(order_row, order_col) %>%
        dplyr::mutate(variable = factor(variable, levels = unique(variable)))
      
      dat_plot %>%
        ggplot2::ggplot(ggplot2::aes(sample = value, color = variable)) +
        ggplot2::stat_qq_line(
          col = "red",
          lwd = 0.5
        ) +
        ggplot2::theme(text = ggplot2::element_text(size = 16)) +
        ggplot2::stat_qq() +
        ggplot2::facet_wrap(~variable) +
        ggplot2::ylab("Observed values") +
        ggplot2::xlab("Expected under normality")
      
      plotly::ggplotly()
    }
    QQplot_data(data)
    

    enter image description here