I wanna plot a heatmap and cluster only the rows (i.e. genes in this tydf1). Also, wanna keep order of the heatmap's column labels as same as in the df (i.e. tydf1)?
Sample data
df1 <- structure(list(Gene = c("AA", "PQ", "XY", "UBQ"), X_T0_R1 = c(1.46559502, 0.220140568, 0.304127515, 1.098842127), X_T0_R2 = c(1.087642983, 0.237500819, 0.319844338, 1.256624804), X_T0_R3 = c(1.424945196, 0.21066267, 0.256496284, 1.467120048), X_T1_R1 = c(1.289943948, 0.207778662, 0.277942721, 1.238400358), X_T1_R2 = c(1.376535013, 0.488774258, 0.362562315, 0.671502431), X_T1_R3 = c(1.833390311, 0.182798731, 0.332856558, 1.448757569), X_T2_R1 = c(1.450753714, 0.247576125, 0.274415259, 1.035410946), X_T2_R2 = c(1.3094609, 0.390028842, 0.352460646, 0.946426593), X_T2_R3 = c(0.5953716, 1.007079177, 1.912258811, 0.827119776), X_T3_R1 = c(0.7906009, 0.730242116, 1.235644748, 0.832287694), X_T3_R2 = c(1.215333041, 1.012914813, 1.086362205, 1.00918082), X_T3_R3 = c(1.069312467, 0.780421013, 1.002313082, 1.031761442), Y_T0_R1 = c(0.053317766, 3.316414959, 3.617213894, 0.788193798), Y_T0_R2 = c(0.506623748, 3.599442788, 1.734075583, 1.179462912), Y_T0_R3 = c(0.713670106, 2.516735845, 1.236204882, 1.075393433), Y_T1_R1 = c(0.740998252, 1.444496448, 1.077023349, 0.869258744), Y_T1_R2 = c(0.648231834, 0.097957459, 0.791438659, 0.428805547), Y_T1_R3 = c(0.780499252, 0.187840968, 0.820430227, 0.51636582), Y_T2_R1 = c(0.35344654, 1.190274584, 0.401845911, 1.223534348), Y_T2_R2 = c(0.220223951, 1.367784148, 0.362815405, 1.102117612), Y_T2_R3 = c(0.432856978, 1.403057729, 0.10802472, 1.304233845), Y_T3_R1 = c(0.234963735, 1.232129062, 0.072433381, 1.203096462), Y_T3_R2 = c(0.353770497, 0.885122768, 0.011662112, 1.188149743), Y_T3_R3 = c(0.396091395, 1.333921747, 0.192594116, 1.838029829), Z_T0_R1 = c(0.398000559, 1.286528398, 0.129147097, 1.452769794), Z_T0_R2 = c(0.384759325, 1.122251177, 0.119475721, 1.385513609), Z_T0_R3 = c(1.582230097, 0.697419716, 2.406671502, 0.477415567), Z_T1_R1 = c(1.136843842, 0.804552001, 2.13213228, 0.989075996), Z_T1_R2 = c(1.275683837, 1.227821594, 0.31900326, 0.835941568), Z_T1_R3 = c(0.963349308, 0.968589683, 1.706670339, 0.807060135), Z_T2_R1 = c(3.765036263, 0.477443352, 1.712841882, 0.469173869), Z_T2_R2 = c(1.901023385, 0.832736132, 2.223429427, 0.593558769), Z_T2_R3 = c(1.407713024, 0.911920317, 2.011259223, 0.692553388), Z_T3_R1 = c(0.988333629, 1.095130142, 1.648598854, 0.629915612), Z_T3_R2 = c(0.618606729, 0.497458337, 0.549147265, 1.249492088), Z_T3_R3 = c(0.429823986, 0.471389536, 0.977124788, 1.136635484)), row.names = c(NA, -4L ), class = c("data.table", "data.frame"))
Scripts used
library(dplyr)
library(stringr)
library(tidyr)
gdf1 <- gather(df1, "group", "Expression", -Gene)
gdf1$tgroup <- apply(str_split_fixed(gdf1$group, "_", 3)[, c(1, 2)],
1, paste, collapse ="_")
library(dplyr)
tydf1 <- gdf1 %>%
group_by(Gene, tgroup) %>%
summarize(expression_mean = mean(Expression)) %>%
spread(., tgroup, expression_mean)
#1 heatmap script is being used
library(tidyverse)
tydf1 <- tydf1 %>%
as.data.frame() %>%
column_to_rownames(var=colnames(tydf1)[1])
library(gplots)
library(vegan)
randup.m <- as.matrix(tydf1)
scaleRYG <- colorRampPalette(c("red","yellow","darkgreen"),
space = "rgb")(30)
data.dist <- vegdist(randup.m, method = "euclidean")
row.clus <- hclust(data.dist, "aver")
heatmap.2(randup.m, Rowv = as.dendrogram(row.clus),
dendrogram = "row", col = scaleRYG, margins = c(7,10),
density.info = "none", trace = "none", lhei = c(2,6),
colsep = 1:3, sepcolor = "black", sepwidth = c(0.001,0.0001),
xlab = "Identifier", ylab = "Rows")
#2 heatmap script is being used
df2 <- as.matrix(tydf1[, -1])
heatmap(df2)
Also, I want to add a color key.
It is still unclear to me, what the desired output is. There are some notes:
vegdist()
to calculate distance matrix for your hclust()
call. Because if you check all(vegdist(randup.m, method = "euclidian") == dist(randup.m))
it returns TRUE
;Colv = F
in your heatmap.2()
call will prevent reordering of the columns (default is TRUE
);heatmap.2()
returns the heatmap with color key.So summing it up - in your first script you just miss the Colv = F
argument, and after a little adjustment it looks like this:
heatmap.2(randup.m,
Rowv = as.dendrogram(row.clus),
Colv = F,
dendrogram = "row",
#scale = "row",
col = scaleRYG,
density.info = "none",
trace = "none",
srtCol = -45,
adjCol = c(.1, .5),
xlab = "Identifier",
ylab = "Rows"
)
However I am still not sure - is it what you need?