I'm trying to plot Alluvial Plots using ggplot. So far it went well until I want to try to clean the plot up.
As you can see on the plot, from left to right, the first stratum/column is the ID column then it follows by a column of labels: disease risk. What I want to achieve is in the out plot, instead of having the patient IDs zigzagging, I want them to be ordered by disease risk column, so that all the high risk IDs are all together on top, followed by low risk then the not filled ones. In this way it is much easier to see if there's any relations.
I have looked around for the arrange() and order() functions, they seem to do the trick for my actual input data but once I pass that data frame in ggplot, the output figure is still scrambled.
I thought of set the IDs to factor, then use levels=.... But this is not very smart if the patient ID keeps growing.
Is there a smarter way? please enlighten me. I have attached a link towards the sample data.
https://drive.google.com/file/d/16Pd8V3MCgEHmZEButVi2UjDiwZWklK-T/view?usp=sharing
My code to plot the graph :
library(tidyr)
library(ggplot2)
library(ggalluvial)
library(RColorBrewer)
# Define the number of colors you want
nb.cols <- 10
mycolor1 <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
mycolors <- c("Black")
#read the data
CLL3S.plusrec <- read.csv("xxxx.CSV", as.is = T)
CLL3S.plusrec$risk_by_DS <- factor(CLL3S.plusrec$risk_by_DS, levels = c("low_risk", "high_risk", "Not filled"))
CLL3S.plusrec$`Enriched response phenotype` <- factor(CLL3S.plusrec$`Enriched response phenotype`, levels = c("Live cells","Pre-dead", "TN & PDB", "PDB & Lenalidomide", "TN & STSVEN & Live cells","Mixed"))
#here I reorder the dataframe and it looks good
#but the output ggplot changes the order of ID in the output graph
OR <- with(CLL3S.plusrec, CLL3S.plusrec[order(risk_by_DS),])
d <-ggplot(OR, aes(y = count,
axis1= Patient.ID,
axis2= risk_by_DS,
axis3 = `Cluster assigned consensus`,
axis4 = `Cluster assigned single drug`,
axis5 = `Enriched response phenotype`
)) +
scale_x_discrete(limits = c("Patient ID","Disease Risk", "Consensus cluster", "Single-drug cluster", "Enriched drug response by Phenoptype")) +
geom_alluvium(aes(fill=`Cluster assigned consensus`)) +
geom_stratum(width = 1/3, fill = c(mycolor1[1:69],mycolor1[1:3],mycolor1[1:8],mycolor1[1:8],mycolor1[1:6]), color = "red") +
#geom_stratum() +
geom_text(stat = "stratum", aes(label = after_stat(stratum)), size=3) +
theme(axis.title.x = element_text(size = 15, face="bold"))+
theme(axis.title.y = element_text(size = 15, face="bold"))+
theme(axis.text.x = element_text(size = 10, face="bold")) +
theme(axis.text.y = element_text(size = 10, face="bold")) +
labs(fill = "Consensus clusters")+
guides(fill=guide_legend(override.aes = list(color=mycolors)))+
ggtitle("Patient flow between the Consensus clusters and Single-drug treated clusters",
"3S stimulated patients")
print(d)
Not sure if this is what you want, try formating the risk column in this way:
library(tidyr)
library(ggplot2)
library(ggalluvial)
library(RColorBrewer)
# Define the number of colors you want
nb.cols <- 10
mycolor1 <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
mycolors <- c("Black")
#read the data
CLL3S.plusrec <- read.csv("test data.CSV", as.is = T)
CLL3S.plusrec$risk_by_DS <- factor(CLL3S.plusrec$risk_by_DS,
levels = c("high_risk","low_risk","Not filled"),ordered = T)
CLL3S.plusrec$Enriched.response.phenotype <- factor(CLL3S.plusrec$Enriched.response.phenotype, levels = c("Live cells","Pre-dead", "TN & PDB", "PDB & Lenalidomide", "TN & STSVEN & Live cells","Mixed"))
#here I reorder the dataframe and it looks good
#but the output ggplot changes the order of ID in the output graph
OR <- with(CLL3S.plusrec, CLL3S.plusrec[order(risk_by_DS),])
ggplot(OR, aes(y = count,
axis1= reorder(Patient.ID,risk_by_DS),
axis2= risk_by_DS,
axis3 = reorder(Cluster.assigned.consensus,risk_by_DS),
axis4 = reorder(Cluster.assigned.single.drug,risk_by_DS),
axis5 = reorder(Enriched.response.phenotype,risk_by_DS)
)) +
scale_x_discrete(limits = c("Patient ID","Disease Risk", "Consensus cluster", "Single-drug cluster", "Enriched drug response by Phenoptype")) +
geom_alluvium(aes(fill=Cluster.assigned.consensus)) +
geom_stratum(width = 1/3, fill = c(mycolor1[1:69],mycolor1[1:3],mycolor1[1:8],mycolor1[1:8],mycolor1[1:6]), color = "red") +
#geom_stratum() +
geom_text(stat = "stratum", aes(label = after_stat(stratum)), size=3) +
theme(axis.title.x = element_text(size = 15, face="bold"))+
theme(axis.title.y = element_text(size = 15, face="bold"))+
theme(axis.text.x = element_text(size = 10, face="bold")) +
theme(axis.text.y = element_text(size = 10, face="bold")) +
labs(fill = "Consensus clusters")+
guides(fill=guide_legend(override.aes = list(color=mycolors)))+
ggtitle("Patient flow between the Consensus clusters and Single-drug treated clusters",
"3S stimulated patients")
Output:
Also in my read.csv()
the quotes got off and dots are in the variables. That is why your original quoted variables now have dots. Maybe an issue from reading.
Update:
#Update
OR <- with(CLL3S.plusrec, CLL3S.plusrec[order(risk_by_DS),])
OR <- OR[order(OR$risk_by_DS,OR$Patient.ID),]
OR$Patient.ID <- factor(OR$Patient.ID,levels = unique(OR$Patient.ID),ordered = T)
#Plot
ggplot(OR, aes(y = count,
axis1= reorder(Patient.ID,risk_by_DS),
axis2= risk_by_DS,
axis3 = reorder(Cluster.assigned.consensus,risk_by_DS),
axis4 = reorder(Cluster.assigned.single.drug,risk_by_DS),
axis5 = reorder(Enriched.response.phenotype,risk_by_DS)
)) +
scale_x_discrete(limits = c("Patient ID","Disease Risk", "Consensus cluster", "Single-drug cluster", "Enriched drug response by Phenoptype")) +
geom_alluvium(aes(fill=Cluster.assigned.consensus)) +
geom_stratum(width = 1/3, fill = c(mycolor1[1:69],mycolor1[1:3],mycolor1[1:8],mycolor1[1:8],mycolor1[1:6]), color = "red") +
#geom_stratum() +
geom_text(stat = "stratum", aes(label = after_stat(stratum)), size=3) +
theme(axis.title.x = element_text(size = 15, face="bold"))+
theme(axis.title.y = element_text(size = 15, face="bold"))+
theme(axis.text.x = element_text(size = 10, face="bold")) +
theme(axis.text.y = element_text(size = 10, face="bold")) +
labs(fill = "Consensus clusters")+
guides(fill=guide_legend(override.aes = list(color=mycolors)))+
ggtitle("Patient flow between the Consensus clusters and Single-drug treated clusters",
"3S stimulated patients")
Output: