I have some data CM
(below) which comprises 6 gene panels. Some of the genes in the panels overlap. I created a venn diagram, but my supervisor wants the data presented in a different format. They want the overlap between panels represented by a network graph. They want the node sizes to refer to the size of the gene panels and the edges to refer to the genes shared between the panels whereby the thickness of the line corresponds with the number of genes shared).
I am at a total loss, I'm not even sure how the data need to be manipulated to fit with something like igraph.
To break it down, I can count the number of genes in each group:
CM %>% group_by(Panel) %>% tally()
I can also look at the overlap:
table(CM$Gene.Symbol, CM$Panel)
Then I'm stuck... please go easy on me, I'm a clinician!
> dput(CM)
structure(list(Gene.Symbol = c("DES", "DSC2", "DSG2", "DSP",
"FLNC", "JUP", "LMNA", "PKP2", "PLN", "TMEM43", "ANK2", "CDH2",
"CAVIN4", "CTNNA3", "LDB3", "RBM20", "RYR2", "SCN5A", "TGFB3",
"TTN", "AARS2", "ABCC9", "ACAD9", "ACADVL", "ACTA1", "ACTC1",
"ACTN2", "AGK", "ALMS1", "ALPK3", "BAG3", "CACNA1C", "CDH2",
"COA5", "COA6", "COX10", "COX15", "CPT2", "CSRP3", "DES", "DMD",
"DNAJC19", "DOLK", "DSC2", "DSG2", "DSP", "EMD", "EPG5", "FHL1",
"FHOD3", "FKTN", "FLNC", "GAA", "GUSB", "HADHA", "HADHB", "HRAS",
"IDH2", "IDUA", "JPH2", "JUP", "KRAS", "LAMP2", "LMNA", "LZTR1",
"MAP2K1", "MAP2K2", "MLYCD", "MRAS", "MRPL44", "MUT", "MYBPC3",
"MYH6", "MYH7", "MYL2", "MYL3", "MYLK3", "MYPN", "NDUFA11", "NDUFA2",
"NDUFAF1", "NDUFB11", "NDUFS2", "NDUFS8", "NDUFV2", "NEXN", "NKX2-5",
"NONO", "NRAP", "NRAS", "PCCA", "PCCB", "PKP2", "PLD1", "PLN",
"PPA2", "PPCS", "PPP1CB", "PPP1R13L", "PRKAG2", "PTPN11", "RAF1",
"RBM20", "RIT1", "RYR2", "SCN5A", "SCO1", "SCO2", "SHOC2", "SLC22A5",
"SLC25A20", "SLC25A4", "SOS1", "SOS2", "SPEG", "TAZ", "TMEM43",
"TMEM70", "TNNC1", "TNNI3", "TNNI3K", "TNNT2", "TPM1", "TSFM",
"TTN", "TTR", "VCL", "AGL", "ANK2", "ARSB", "ATP5D", "ATPAF2",
"BRAF", "CBL", "COX14", "COX20", "COX6B1", "COX7B", "CRYAB",
"EYA4", "FAH", "FASTKD2", "FKRP", "FLII", "FNIP1", "FOXRED1",
"GATA6", "GLA", "GLB1", "GSN", "HCN4", "HFE", "HGSNAT", "IDS",
"LDB3", "LRPPRC", "MIB1", "MMACHC", "MT-TI", "NAA15", "NAGLU",
"NDUFA1", "NDUFA10", "NDUFA4", "NDUFAF2", "NDUFAF3", "NDUFAF4",
"NDUFAF5", "NDUFB3", "NDUFB8", "NDUFS1", "NDUFS3", "NDUFS4",
"NDUFS6", "NDUFS7", "NDUFV1", "NF1", "NUBPL", "PDLIM3", "PET100",
"PNPLA2", "RASA2", "RHBDF1", "RNF220", "RPL3L", "SDHA", "SDHAF1",
"SDHD", "SGCD", "SGSH", "SHMT2", "SLC30A5", "SPRED2", "SURF1",
"TMEM126B", "TOR1AIP1", "UQCC2", "ANKRD1", "APOPT1", "B3GAT3",
"BCS1L", "BTK", "COA7", "COX6A1", "CPS1", "CTF1", "CYC1", "DHCR7",
"DTNA", "ETFA", "ETFB", "ETFDH", "GALNS", "GBE1", "GLRA1", "GNS",
"ILK", "KIF20A", "LAMA4", "LYRM7", "MCM10", "NDUFA6", "NDUFA9",
"NDUFAF6", "NDUFAF8", "NEBL", "SPRED1", "TAB2", "TACO1", "TCAP",
"TGFB3", "TMPO", "TTC19", "UQCRB", "", "ACTC1", "ACTN2", "BAG3",
"CDH2", "DES", "DMD", "DOLK", "DSC2", "DSG2", "DSP", "EMD", "FLNC",
"JUP", "LAMP2", "LMNA", "MYBPC3", "MYH7", "NEXN", "NKX2-5", "PKP2",
"PLN", "RBM20", "RYR2", "SCN5A", "TMEM43", "TNNC1", "TNNI3",
"TNNI3K", "TNNT2", "TPM1", "TTN", "VCL", "ANK2", "ANKRD1", "CRYAB",
"CSRP3", "FKRP", "FKTN", "FLII", "GATA6", "LDB3", "MYH6", "MYLK3",
"MYPN", "NRAP", "PRDM16", "RHBDF1", "RPL3L", "SGCD", "SLC6A6",
"SPEG", "TBX20", "TBX5", "TCAP", "ABCC9", "EYA4", "FHOD3", "GATAD1",
"ABCC9", "ACTC1", "ACTN2", "BAG3", "CSRP3", "DES", "DMD", "DSP",
"EPG5", "EYA4", "HAMP", "HFE", "HFE2", "IDH2", "LMNA", "MYBPC3",
"MYH6", "MYH7", "NEXN", "PLN", "PPP1R13L", "RBM20", "SCN5A",
"SGCD", "SLC40A1", "SPEG", "TAZ", "TCAP", "TFR2", "TNNC1", "TNNI3",
"TNNT2", "TPM1", "TTN", "VCL", "ANKRD1", "CRYAB", "FKTN", "FLNC",
"GATAD1", "LDB3", "MYPN", "PRDM16", "PSEN1", "PSEN2", "SCN1B",
"ACTA1", "ALMS1", "CAVIN4", "CTF1", "DMPK", "DNAJC19", "DOLK",
"DSC2", "DSG2", "EMD", "FHL1", "FHL2", "GLA", "ILK", "JUP", "LAMA4",
"LAMP2", "MPO", "MYL2", "MYL3", "NEBL", "NKX2-5", "NPPA", "PDLIM3",
"PKP2", "PRKAG2", "RAB3GAP2", "RAF1", "RYR2", "SDHA", "SGCB",
"SYNE1", "SYNE2", "TBX20", "TMEM43", "TMPO", "TTR", "TXNRD2",
"XK", "ACTC1", "ACTN2", "CACNA1C", "CSRP3", "FHL1", "FHOD3",
"FLNC", "GLA", "JPH2", "LAMP2", "MYBPC3", "MYH7", "MYL2", "MYL3",
"PLN", "PRKAG2", "TNNC1", "TNNI3", "TNNT2", "TPM1", "TRIM63",
"TTR", "ALPK3", "ATAD3A", "GYG1", "MT-TI", "MYLK2", "MYPN", "ACADVL",
"ACTA1", "AGL", "ANKRD1", "ATP5E", "BRAF", "CALR3", "CASQ2",
"CAV3", "COA5", "CRYAB", "DES", "FOXRED1", "FXN", "GAA", "GLB1",
"GUSB", "HRAS", "KCNQ1", "KLF10", "LDB3", "LMNA", "LZTR1", "MAP2K1",
"MAP2K2", "MRPL3", "MT-TL1", "MYH6", "MYO6", "MYOM1", "MYOZ2",
"NEXN", "NRAS", "PDLIM3", "PTPN11", "RAF1", "SCO2", "SHOC2",
"SLC25A3", "SLC25A4", "SOS1", "TCAP", "TMEM70", "TSFM", "TTN",
"VCL", "ACTC1", "MYBPC3", "MYH7", "TAZ", "TNNT2", "TPM1", "CASQ2",
"DNAJC19", "DTNA", "LDB3", "LMNA", "MIB1", "MYPN", "PRDM16",
"SDHA", "TNNI3"), Panel = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), levels = c("Arrhythmogenic cardiomyopathy",
"Cardiomyopathies - including childhood onset", "Dilated cardiomyopathy - adult and teen",
"Dilated Cardiomyopathy and conduction defects", "Hypertrophic cardiomyopathy - teen and adult",
"Left Ventricular Noncompaction Cardiomyopathy"), class = "factor")), row.names = c(NA,
-468L), class = "data.frame")
It sounds to me like you are describing a bipartite network in which the panels are one node-type and the genes the other.
library(igraph)
#>
#> Attaching package: 'igraph'
#> The following objects are masked from 'package:stats':
#>
#> decompose, spectrum
#> The following object is masked from 'package:base':
#>
#> union
g <- graph_from_data_frame(CM)
V(g)$type <- bipartite.mapping(g)$type # assign node-types
g2 <- bipartite.projection(g)[[2]] # project onto the panels
plot(g2,
edge.width = sqrt(E(g2)$weight), # number of genes shared
vertex.size = sqrt(strength(g2)), # total of genes
edge.label = E(g2)$weight, # optional labelling
vertex.color = as.factor(V(g2)$name)) # optional color
Created on 2023-12-12 with reprex v2.0.2