I have two lists of genes and i want to represent the overlap between the lists in a venn diagram. Could anybody suggest what I am missing in the code below? I'm using the VennDiagram package and the result i get is two non intersecting circles. I have also used Venny https://bioinfogp.cnb.csic.es/tools/venny/index2.0.2.html to draw the venn and confirmed there is an overlap.
library(VennDiagram)
#select the data
dataset1 <- data.frame(as.character(v7$HGNC_Symbol)) #567 genes
dataset2 <- data.frame(as.character(test$toupper.v13.HGNC_Symbol.)) #476 genes
#rename the columns so I know the source
names(dataset1)[1] <- "Progenesis"
names(dataset2)[1] <- "BiomaRt"
#sort the dataframes alphabetically
dataset1 <- dataset1 %>% arrange(Progenesis)
dataset2 <- dataset2 %>% arrange(BiomaRt)
#create a list of numbers and join it to dataframe1
mylist <- c(1:592)
dataset1 <- data.frame(mylist, dataset1)
#create a list of numbers and join it to dataframe2
mylist <- c(1:494)
dataset2 <- data.frame(mylist, dataset2)
#join the two dataframes
v <- plyr::join(dataset1, dataset2, type='full')
#draw the Venn Diagram
Dataset1 <- as.vector(v$Progenesis)
Dataset2 <- as.vector(v$BiomaRt)
Dataset2 <- na.omit(Dataset2)
#first 20 rows of each dataset
dput(Dataset1)
c("AACS ", "AARS ", "ABCF1 ", "ACAD11 ", "ACIN1 ", "ACO1 ", "ACOX3 ",
"ACP1 ", "ACSL3 ", "ACY1A ", "ACYP1 ", "ADA ", "ADI1 ", "ADK ",
"ADSL ", "ADSS ", "AGPAT3 ", "AHSA2 ", "AK4 ", "AKAP1 ")
dput(Dataset2)
structure(c("AACS", "AARS", "AARS", "ABCF1", "ACAD11", "ACIN1",
"ACO1", "ACOX3", "ACSL3", "ACYP1", "ADA", "ADI1", "ADK", "ADSL",
"AGPAT3", "AHSA2", "AKAP1", "AKAP12", "AKR1A1", "AKR1A1"),
na.action = structure(495:592, class = "omit"))
venn.diagram(
x = list(Dataset1, Dataset2),
category.names = c("Set1" , "Set2"),
filename = 'venn_diagram.png',
output=TRUE
)
All suggestions welcome.
Thanks
I have a few remarks on your data:
dput(Dataset1)
c("AACS ", "AARS ", "ABCF1 ", "ACAD11 ", "ACIN1 ", "ACO1 ", "ACOX3 ",
"ACP1 ", "ACSL3 ", "ACY1A ", "ACYP1 ", "ADA ", "ADI1 ", "ADK ",
"ADSL ", "ADSS ", "AGPAT3 ", "AHSA2 ", "AK4 ", "AKAP1 ")
dput(Dataset2)
structure(c("AACS", "AARS", "AARS", "ABCF1", "ACAD11", "ACIN1",
"ACO1", "ACOX3", "ACSL3", "ACYP1", "ADA", "ADI1", "ADK", "ADSL",
"AGPAT3", "AHSA2", "AKAP1", "AKAP12", "AKR1A1", "AKR1A1"),
na.action = structure(495:592, class = "omit"))
Your Dataset1
contains strings ending with a whitespace. They are not equal to the ones in Dataset2
, for example "AACS "
is not the same as "AACS"
. Try transforming your dataset to
Dataset1 <- c("AACS","AARS","ABCF1","ACAD11","ACIN1","ACO1","ACOX3",
"ACP1","ACSL3","ACY1A","ACYP1","ADA","ADI1","ADK",
"ADSL","ADSS","AGPAT3","AHSA2","AK4","AKAP1")
In this case your output should look like this:
Your output filetyp is
.png
, therefor you should use the parameter imagetype='png'
.