Search code examples
rvenn-diagram

My venn diagrams dont intersect - what's missing/wrong with my code


I have two lists of genes and i want to represent the overlap between the lists in a venn diagram. Could anybody suggest what I am missing in the code below? I'm using the VennDiagram package and the result i get is two non intersecting circles. I have also used Venny https://bioinfogp.cnb.csic.es/tools/venny/index2.0.2.html to draw the venn and confirmed there is an overlap.

library(VennDiagram)

#select the data
dataset1 <- data.frame(as.character(v7$HGNC_Symbol)) #567 genes
dataset2 <- data.frame(as.character(test$toupper.v13.HGNC_Symbol.)) #476 genes

#rename the columns so I know the source
names(dataset1)[1] <- "Progenesis"
names(dataset2)[1] <- "BiomaRt"

#sort the dataframes alphabetically 
dataset1 <- dataset1 %>% arrange(Progenesis)
dataset2 <- dataset2 %>% arrange(BiomaRt)

#create a list of numbers and join it to dataframe1
mylist <- c(1:592)
dataset1 <- data.frame(mylist, dataset1)

#create a list of numbers and join it to dataframe2
mylist <- c(1:494)
dataset2 <- data.frame(mylist, dataset2)

#join the two dataframes
v <- plyr::join(dataset1, dataset2, type='full')

#draw the Venn Diagram

Dataset1 <- as.vector(v$Progenesis)
Dataset2 <- as.vector(v$BiomaRt)
Dataset2 <- na.omit(Dataset2)

    #first 20 rows of each dataset 
    dput(Dataset1)
        c("AACS ", "AARS ", "ABCF1 ", "ACAD11 ", "ACIN1 ", "ACO1 ", "ACOX3 ", 
        "ACP1 ", "ACSL3 ", "ACY1A ", "ACYP1 ", "ADA ", "ADI1 ", "ADK ", 
        "ADSL ", "ADSS ", "AGPAT3 ", "AHSA2 ", "AK4 ", "AKAP1 ")
        dput(Dataset2)
        structure(c("AACS", "AARS", "AARS", "ABCF1", "ACAD11", "ACIN1", 
        "ACO1", "ACOX3", "ACSL3", "ACYP1", "ADA", "ADI1", "ADK", "ADSL", 
        "AGPAT3", "AHSA2", "AKAP1", "AKAP12", "AKR1A1", "AKR1A1"),
        na.action = structure(495:592, class = "omit"))

venn.diagram(
  x = list(Dataset1, Dataset2),
  category.names = c("Set1" , "Set2"),
  filename = 'venn_diagram.png',
  output=TRUE
  )

All suggestions welcome.

Thanks

The result of my code is this

What I want is here


Solution

  • I have a few remarks on your data:

    dput(Dataset1)
            c("AACS ", "AARS ", "ABCF1 ", "ACAD11 ", "ACIN1 ", "ACO1 ", "ACOX3 ", 
            "ACP1 ", "ACSL3 ", "ACY1A ", "ACYP1 ", "ADA ", "ADI1 ", "ADK ", 
            "ADSL ", "ADSS ", "AGPAT3 ", "AHSA2 ", "AK4 ", "AKAP1 ")
    dput(Dataset2)
            structure(c("AACS", "AARS", "AARS", "ABCF1", "ACAD11", "ACIN1", 
            "ACO1", "ACOX3", "ACSL3", "ACYP1", "ADA", "ADI1", "ADK", "ADSL", 
            "AGPAT3", "AHSA2", "AKAP1", "AKAP12", "AKR1A1", "AKR1A1"),
            na.action = structure(495:592, class = "omit"))
    

    Your Dataset1 contains strings ending with a whitespace. They are not equal to the ones in Dataset2, for example "AACS " is not the same as "AACS". Try transforming your dataset to

    Dataset1 <- c("AACS","AARS","ABCF1","ACAD11","ACIN1","ACO1","ACOX3", 
                  "ACP1","ACSL3","ACY1A","ACYP1","ADA","ADI1","ADK", 
                  "ADSL","ADSS","AGPAT3","AHSA2","AK4","AKAP1")
    

    In this case your output should look like this: venn diagramm Your output filetyp is .png, therefor you should use the parameter imagetype='png'.