Search code examples
rigraph

Generate network graph to look at similarity between groups


I have some data CM (below) which comprises 6 gene panels. Some of the genes in the panels overlap. I created a venn diagram, but my supervisor wants the data presented in a different format. They want the overlap between panels represented by a network graph. They want the node sizes to refer to the size of the gene panels and the edges to refer to the genes shared between the panels whereby the thickness of the line corresponds with the number of genes shared).

I am at a total loss, I'm not even sure how the data need to be manipulated to fit with something like igraph.

To break it down, I can count the number of genes in each group:

CM %>% group_by(Panel) %>% tally()

I can also look at the overlap:

table(CM$Gene.Symbol, CM$Panel)

Then I'm stuck... please go easy on me, I'm a clinician!

> dput(CM)
structure(list(Gene.Symbol = c("DES", "DSC2", "DSG2", "DSP", 
"FLNC", "JUP", "LMNA", "PKP2", "PLN", "TMEM43", "ANK2", "CDH2", 
"CAVIN4", "CTNNA3", "LDB3", "RBM20", "RYR2", "SCN5A", "TGFB3", 
"TTN", "AARS2", "ABCC9", "ACAD9", "ACADVL", "ACTA1", "ACTC1", 
"ACTN2", "AGK", "ALMS1", "ALPK3", "BAG3", "CACNA1C", "CDH2", 
"COA5", "COA6", "COX10", "COX15", "CPT2", "CSRP3", "DES", "DMD", 
"DNAJC19", "DOLK", "DSC2", "DSG2", "DSP", "EMD", "EPG5", "FHL1", 
"FHOD3", "FKTN", "FLNC", "GAA", "GUSB", "HADHA", "HADHB", "HRAS", 
"IDH2", "IDUA", "JPH2", "JUP", "KRAS", "LAMP2", "LMNA", "LZTR1", 
"MAP2K1", "MAP2K2", "MLYCD", "MRAS", "MRPL44", "MUT", "MYBPC3", 
"MYH6", "MYH7", "MYL2", "MYL3", "MYLK3", "MYPN", "NDUFA11", "NDUFA2", 
"NDUFAF1", "NDUFB11", "NDUFS2", "NDUFS8", "NDUFV2", "NEXN", "NKX2-5", 
"NONO", "NRAP", "NRAS", "PCCA", "PCCB", "PKP2", "PLD1", "PLN", 
"PPA2", "PPCS", "PPP1CB", "PPP1R13L", "PRKAG2", "PTPN11", "RAF1", 
"RBM20", "RIT1", "RYR2", "SCN5A", "SCO1", "SCO2", "SHOC2", "SLC22A5", 
"SLC25A20", "SLC25A4", "SOS1", "SOS2", "SPEG", "TAZ", "TMEM43", 
"TMEM70", "TNNC1", "TNNI3", "TNNI3K", "TNNT2", "TPM1", "TSFM", 
"TTN", "TTR", "VCL", "AGL", "ANK2", "ARSB", "ATP5D", "ATPAF2", 
"BRAF", "CBL", "COX14", "COX20", "COX6B1", "COX7B", "CRYAB", 
"EYA4", "FAH", "FASTKD2", "FKRP", "FLII", "FNIP1", "FOXRED1", 
"GATA6", "GLA", "GLB1", "GSN", "HCN4", "HFE", "HGSNAT", "IDS", 
"LDB3", "LRPPRC", "MIB1", "MMACHC", "MT-TI", "NAA15", "NAGLU", 
"NDUFA1", "NDUFA10", "NDUFA4", "NDUFAF2", "NDUFAF3", "NDUFAF4", 
"NDUFAF5", "NDUFB3", "NDUFB8", "NDUFS1", "NDUFS3", "NDUFS4", 
"NDUFS6", "NDUFS7", "NDUFV1", "NF1", "NUBPL", "PDLIM3", "PET100", 
"PNPLA2", "RASA2", "RHBDF1", "RNF220", "RPL3L", "SDHA", "SDHAF1", 
"SDHD", "SGCD", "SGSH", "SHMT2", "SLC30A5", "SPRED2", "SURF1", 
"TMEM126B", "TOR1AIP1", "UQCC2", "ANKRD1", "APOPT1", "B3GAT3", 
"BCS1L", "BTK", "COA7", "COX6A1", "CPS1", "CTF1", "CYC1", "DHCR7", 
"DTNA", "ETFA", "ETFB", "ETFDH", "GALNS", "GBE1", "GLRA1", "GNS", 
"ILK", "KIF20A", "LAMA4", "LYRM7", "MCM10", "NDUFA6", "NDUFA9", 
"NDUFAF6", "NDUFAF8", "NEBL", "SPRED1", "TAB2", "TACO1", "TCAP", 
"TGFB3", "TMPO", "TTC19", "UQCRB", "", "ACTC1", "ACTN2", "BAG3", 
"CDH2", "DES", "DMD", "DOLK", "DSC2", "DSG2", "DSP", "EMD", "FLNC", 
"JUP", "LAMP2", "LMNA", "MYBPC3", "MYH7", "NEXN", "NKX2-5", "PKP2", 
"PLN", "RBM20", "RYR2", "SCN5A", "TMEM43", "TNNC1", "TNNI3", 
"TNNI3K", "TNNT2", "TPM1", "TTN", "VCL", "ANK2", "ANKRD1", "CRYAB", 
"CSRP3", "FKRP", "FKTN", "FLII", "GATA6", "LDB3", "MYH6", "MYLK3", 
"MYPN", "NRAP", "PRDM16", "RHBDF1", "RPL3L", "SGCD", "SLC6A6", 
"SPEG", "TBX20", "TBX5", "TCAP", "ABCC9", "EYA4", "FHOD3", "GATAD1", 
"ABCC9", "ACTC1", "ACTN2", "BAG3", "CSRP3", "DES", "DMD", "DSP", 
"EPG5", "EYA4", "HAMP", "HFE", "HFE2", "IDH2", "LMNA", "MYBPC3", 
"MYH6", "MYH7", "NEXN", "PLN", "PPP1R13L", "RBM20", "SCN5A", 
"SGCD", "SLC40A1", "SPEG", "TAZ", "TCAP", "TFR2", "TNNC1", "TNNI3", 
"TNNT2", "TPM1", "TTN", "VCL", "ANKRD1", "CRYAB", "FKTN", "FLNC", 
"GATAD1", "LDB3", "MYPN", "PRDM16", "PSEN1", "PSEN2", "SCN1B", 
"ACTA1", "ALMS1", "CAVIN4", "CTF1", "DMPK", "DNAJC19", "DOLK", 
"DSC2", "DSG2", "EMD", "FHL1", "FHL2", "GLA", "ILK", "JUP", "LAMA4", 
"LAMP2", "MPO", "MYL2", "MYL3", "NEBL", "NKX2-5", "NPPA", "PDLIM3", 
"PKP2", "PRKAG2", "RAB3GAP2", "RAF1", "RYR2", "SDHA", "SGCB", 
"SYNE1", "SYNE2", "TBX20", "TMEM43", "TMPO", "TTR", "TXNRD2", 
"XK", "ACTC1", "ACTN2", "CACNA1C", "CSRP3", "FHL1", "FHOD3", 
"FLNC", "GLA", "JPH2", "LAMP2", "MYBPC3", "MYH7", "MYL2", "MYL3", 
"PLN", "PRKAG2", "TNNC1", "TNNI3", "TNNT2", "TPM1", "TRIM63", 
"TTR", "ALPK3", "ATAD3A", "GYG1", "MT-TI", "MYLK2", "MYPN", "ACADVL", 
"ACTA1", "AGL", "ANKRD1", "ATP5E", "BRAF", "CALR3", "CASQ2", 
"CAV3", "COA5", "CRYAB", "DES", "FOXRED1", "FXN", "GAA", "GLB1", 
"GUSB", "HRAS", "KCNQ1", "KLF10", "LDB3", "LMNA", "LZTR1", "MAP2K1", 
"MAP2K2", "MRPL3", "MT-TL1", "MYH6", "MYO6", "MYOM1", "MYOZ2", 
"NEXN", "NRAS", "PDLIM3", "PTPN11", "RAF1", "SCO2", "SHOC2", 
"SLC25A3", "SLC25A4", "SOS1", "TCAP", "TMEM70", "TSFM", "TTN", 
"VCL", "ACTC1", "MYBPC3", "MYH7", "TAZ", "TNNT2", "TPM1", "CASQ2", 
"DNAJC19", "DTNA", "LDB3", "LMNA", "MIB1", "MYPN", "PRDM16", 
"SDHA", "TNNI3"), Panel = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), levels = c("Arrhythmogenic cardiomyopathy", 
"Cardiomyopathies - including childhood onset", "Dilated cardiomyopathy - adult and teen", 
"Dilated Cardiomyopathy and conduction defects", "Hypertrophic cardiomyopathy - teen and adult", 
"Left Ventricular Noncompaction Cardiomyopathy"), class = "factor")), row.names = c(NA, 
-468L), class = "data.frame")

Solution

  • It sounds to me like you are describing a bipartite network in which the panels are one node-type and the genes the other.

    library(igraph)
    #> 
    #> Attaching package: 'igraph'
    #> The following objects are masked from 'package:stats':
    #> 
    #>     decompose, spectrum
    #> The following object is masked from 'package:base':
    #> 
    #>     union
    g <- graph_from_data_frame(CM)
    V(g)$type <- bipartite.mapping(g)$type       # assign node-types
    g2 <- bipartite.projection(g)[[2]]           # project onto the panels 
    plot(g2, 
         edge.width = sqrt(E(g2)$weight),        # number of genes shared
         vertex.size = sqrt(strength(g2)),       # total of genes 
         edge.label = E(g2)$weight,              # optional labelling
         vertex.color = as.factor(V(g2)$name))   # optional color
    

    Created on 2023-12-12 with reprex v2.0.2