I have a data like this
# Generation of dataframe
name_genes <- paste(rep("GEN", 20), LETTERS[1:20], sep="_") # rows
name_patients <- paste(rep("PATIENT", 20), seq(1,20,1), sep="_") # columns
value_expression <- data.frame(genes = name_genes,
matrix(rnorm(400, 2, 1.8),nrow = 20, ncol = 20))
names(value_expression)[2:21] <- name_patients
df_heatmap <- melt(value_expression, id.vars = "genes")
names(df_heatmap)[2:3] <- c("patient", "expression_level")
I am trying to only mention specific labels
Then I can plot it like this
ggplot(df_heatmap, aes(patient, genes )) +
geom_tile(aes(fill = expression_level), color = "white") +
scale_fill_gradient(low = "white", high = "steelblue") +
ylab("List of genes ") +
xlab("List of patients") +
theme(legend.title = element_text(size = 10),
legend.text = element_text(size = 12),
plot.title = element_text(size=16),
axis.title=element_text(size=14,face="bold"),
axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(fill = "Expression level")
I want only to have the label of
GEN_R
GEN_O
GEN_F
GEN_E
GEN_A
how can I do that?
You have to rewrite the y scale labels to do what you want, using scale_y_discrete()
.
Just before the call to ggplot()
, add
nameskeep = c(1,5,6,15,18) # This is the indices of the genes you want to keep
labels = rep("",20) # This is a vector containing empty strings
labels[nameskeep ] = name_genes[nameskeep] # which are replaced with the genes you which to keep
Then plot your graph with the +scale_y_discrete added
ggplot(df_heatmap, aes(patient, genes )) +
geom_tile(aes(fill = expression_level), color = "white") +
scale_fill_gradient(low = "white", high = "steelblue") +
scale_y_discrete(labels=labels) + #added line here
ylab("List of genes ") +
xlab("List of patients") +
theme(legend.title = element_text(size = 10),
legend.text = element_text(size = 12),
plot.title = element_text(size=16),
axis.title=element_text(size=14,face="bold"),
axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(fill = "Expression level")