I need to create a density plot and QQ plot for the dataset contains triplicate values of columns. So the plot should contain 3 rows and the first three columns in the dataframe need to be in the first column of the plot (which represents the replicate values).
The sample data is
structure(list(ID = c("P19882", "P38999", "P34227", "Q12335",
"P40893", "P33416", "Q04947", "Q12074", "P38116", "P32598", "P38221",
"P52910", "P0CX51;P0CX52", "Q02805", "P50861", "Q02821", "P53691",
"P19414", "Q04013", "Q07799", "P47039"), `1_3ng` = c(964860000,
46301000, 135090000, 126280000, 96191000, 91552000, 75231000,
76684000, 6150500, 21904000, 55134000, 231070000, 259060000,
7304500, 180320000, 22025000, 79510000, 677560000, 5994400, 18950000,
3468200), `2_3ng` = c(1074500000, 42888000, 125590000, 1.27e+08,
68911000, 93280000, 66457000, 69568000, 4848600, 20342000, 52573000,
242230000, 286970000, 5770200, 190140000, 22305000, 87598000,
673050000, 6459300, 21634000, 3115400), `3_3ng` = c(1021200000,
56550000, 117190000, 124730000, 69026000, 80712000, 55917000,
71989000, 5390500, 19299000, 51348000, 213050000, 254080000,
6976000, 181280000, 20951000, 83504000, 664590000, 6155300, 21158000,
2940600), `4_7-5ng` = c(846340000, 34713000, 95928000, 121440000,
16256000, 63182000, 46570000, 59008000, 1465600, 24838000, 48464000,
176140000, 175320000, 8738500, 120870000, 24496000, 75694000,
524780000, 4373900, 16281000, 5801700), `5_7-5ng` = c(921740000,
35240000, 109990000, 111250000, 39047000, 74125000, 51027000,
65971000, 3027100, 36529000, 39712000, 179630000, 218240000,
8092500, 152610000, 26032000, 62681000, 587100000, 4887000, 18454000,
3811100), `6_7-5ng` = c(908710000, 38475000, 106400000, 113590000,
36745000, 74175000, 48912000, 64201000, 3073300, 39069000, 40650000,
206350000, 220280000, 8673100, 154260000, 30254000, 61422000,
617900000, 5561000, 17102000, 5486600), `7_10ng` = c(964860000,
46301000, 135090000, 126280000, 96191000, 91552000, 75231000,
76684000, 6150500, 21904000, 55134000, 231070000, 259060000,
7304500, 180320000, 22025000, 79510000, 677560000, 5994400, 18950000,
3468200), `8_10ng` = c(1074500000, 42888000, 125590000, 1.27e+08,
68911000, 93280000, 66457000, 69568000, 4848600, 20342000, 52573000,
242230000, 286970000, 5770200, 190140000, 22305000, 87598000,
673050000, 6459300, 21634000, 3115400), `9_10ng` = c(1021200000,
56550000, 117190000, 124730000, 69026000, 80712000, 55917000,
71989000, 5390500, 19299000, 51348000, 213050000, 254080000,
6976000, 181280000, 20951000, 83504000, 664590000, 6155300, 21158000,
2940600), `10_15ng` = c(846340000, 34713000, 95928000, 121440000,
16256000, 63182000, 46570000, 59008000, 1465600, 24838000, 48464000,
176140000, 175320000, 8738500, 120870000, 24496000, 75694000,
524780000, 4373900, 16281000, 5801700), `11_15ng` = c(921740000,
35240000, 109990000, 111250000, 39047000, 74125000, 51027000,
65971000, 3027100, 36529000, 39712000, 179630000, 218240000,
8092500, 152610000, 26032000, 62681000, 587100000, 4887000, 18454000,
3811100), `12_15ng` = c(908710000, 38475000, 106400000, 113590000,
36745000, 74175000, 48912000, 64201000, 3073300, 39069000, 40650000,
206350000, 220280000, 8673100, 154260000, 30254000, 61422000,
617900000, 5561000, 17102000, 5486600)), row.names = 500:520, class = "data.frame")
I have used the below function for generating the density plot,
Densityplot_data <- function (data){
variable <- value <- NULL
new_data <- as.data.frame(data)
new_data <- new_data[,-1]
options(warn = -1)
density_plot <- new_data %>%
tibble::as.tibble() %>%
tidyr::gather(variable, value) %>%
dplyr::mutate(order_hlp = as.numeric(gsub("[A-Za-z]","",variable))) %>%
dplyr::arrange(order_hlp) %>%
dplyr::mutate(variable_fac = factor(variable, levels = unique(variable))) %>%
ggplot2::ggplot(ggplot2::aes(x=value) ) +
ggplot2::geom_density (fill= "#69b3a2") +
ggplot2::facet_wrap(~variable_fac, scales="free", nrow = 3)+
ggplot2::theme_gray()+
ggplot2::theme(text = ggplot2::element_text(size = 20))
return(plotly::ggplotly(density_plot))
}
This function returned the below plot,
Densityplot_data(data)
Like this I have used the below function for generating the QQ plot,
QQplot_data <- function(data){
Observed = stats::rnorm(nrow(data))
new_dat <- cbind(Observed,data[,-1])
colnames(data) <- c("ID", "1_3ng", "2_3ng", "3_3ng", "4_7-5ng", "5_7-5ng",
"6_7-5ng", "7_10ng", "8_10ng", "9_10ng", "10_15ng", "11_15ng", "12_15ng")
plot<-new_dat %>%
tidyr::gather(variable, value, -Observed) %>%
dplyr::mutate(order_hlp = as.numeric(gsub("[A-Za-z]", "",variable))) %>%
dplyr::mutate(variable_fac = factor(variable, levels = unique(variable)))%>%
dplyr::arrange(order_hlp)
ggplot2::ggplot(ggplot2::aes(sample = value, color = variable))+
ggplot2::stat_qq_line(col = "red",
lwd = 0.5)+
ggplot2::theme(text = ggplot2::element_text(size=16))+
ggplot2::stat_qq()+
ggplot2::facet_wrap(~variable)+
ggplot2::ylab('Observed values')+
ggplot2::xlab('Expected under normality')
return(plotly::ggplotly(plot))
}
QQplot_data(data)
And I got the plot like below,
In both of the above plots, the column order is not representing the first three columns in the same one column. Suppose the dataset contains the 12 columns I need the following order of plots,
1 4 7 10
2 5 8 11
3 6 9 12
How to change the above-mentioned codes to get this result.
Well, the approach you used was working for the data in one of your previous questions. With different column names you have to adjust the approach. In the code below I first use the formula
(as.numeric(factor(variable, names(data)[-1])) - 1) %% 3
to split the columns into groups based on the column position and hence does not rely on the column names. Then I add the row order using group_by
and row_number
. Afterwards arrange
by both columns and set the order of the factor
levels
library(tidyverse)
library(plotly)
QQplot_data <- function(data) {
Observed <- stats::rnorm(nrow(data))
new_dat <- cbind(Observed, data[, -1])
colnames(data) <- c(
"ID", "1_3ng", "2_3ng", "3_3ng", "4_7-5ng", "5_7-5ng",
"6_7-5ng", "7_10ng", "8_10ng", "9_10ng", "10_15ng", "11_15ng", "12_15ng"
)
dat_plot <- new_dat %>%
tidyr::gather(variable, value, -Observed) %>%
dplyr::mutate(
position = as.numeric(factor(variable, names(data)[-1])),
order_col = (position - 1) %% 3
) %>%
dplyr::group_by(order_col, position) |>
dplyr::mutate(order_row = dplyr::row_number()) |>
dplyr::ungroup() |>
dplyr::arrange(order_row, order_col) %>%
dplyr::mutate(variable = factor(variable, levels = unique(variable)))
dat_plot %>%
ggplot2::ggplot(ggplot2::aes(sample = value, color = variable)) +
ggplot2::stat_qq_line(
col = "red",
lwd = 0.5
) +
ggplot2::theme(text = ggplot2::element_text(size = 16)) +
ggplot2::stat_qq() +
ggplot2::facet_wrap(~variable) +
ggplot2::ylab("Observed values") +
ggplot2::xlab("Expected under normality")
plotly::ggplotly()
}
QQplot_data(data)