This is my data frame a subset of my big one as an example
dput(eee)
structure(list(interactome = c("HINT-binary", "HINT-binary",
"HINT-binary", "HINT-binary", "HINT-binary", "HINT-binary", "HINT-comp",
"HINT-comp", "HINT-comp", "HINT-comp", "HINT-comp", "HINT-comp",
"InBioMap", "InBioMap", "InBioMap", "InBioMap", "InBioMap", "InBioMap",
"Menche-2015", "Menche-2015", "Menche-2015", "Menche-2015", "Menche-2015",
"Menche-2015"), class = c("observed", "rewired", "rewired", "rewired",
"rewired", "rewired", "observed", "rewired", "rewired", "rewired",
"rewired", "rewired", "observed", "rewired", "rewired", "rewired",
"rewired", "rewired", "observed", "rewired", "rewired", "rewired",
"rewired", "rewired"), PPI = c(844L, 609L, 591L, 593L, 590L,
608L, 1329L, 874L, 872L, 864L, 807L, 855L, 7077L, 5049L, 5051L,
5025L, 4975L, 5014L, 2445L, 1673L, 1652L, 1716L, 1712L, 1683L
), LCC = c(290L, 191L, 188L, 214L, 183L, 215L, 401L, 346L, 365L,
366L, 359L, 356L, 635L, 615L, 613L, 613L, 617L, 615L, 528L, 476L,
493L, 490L, 492L, 480L)), row.names = c(1L, 2L, 3L, 4L, 5L, 6L,
1002L, 1003L, 1004L, 1005L, 1006L, 1007L, 2003L, 2004L, 2005L,
2006L, 2007L, 2008L, 3004L, 3005L, 3006L, 3007L, 3008L, 3009L
), class = "data.frame")
I would like to run ks test on my different groups.
My groups in the data-frame as such "HINT-binary" "HINT-comp" "InBioMap" "Menche-2015"
Here I found one solution but Im not sure how to modify for my data frame
Any suggestion or help would be really appreciated
UPDATE
this is what I'm trying to replicate KS test
The description for the figure give as such
(D) Number of protein-protein interactions (PPIs) between LC genes observed in the high-confidence human interactome (Menche et al., 2015) (dotted line) and 1000 randomized interactome networks (density), revealing significant enrichment for PPIs between LC genes relative to random expectation (p < 10−3). (E) Size of the largest connected component (LCC) between LC genes in the high-confidence human interactome (dotted line) and 1000 randomized interactome networks (density), revealing LC genes occupy a distinct region of the human interactome (p < 10−3). (F) LC genes are prioritized by a disease gene prediction algorithm (Ghiassian et al., 2015) (p < 10−15, Kolmogorov–Smirnov test).
Consider combn
to pass pairwise combinations of those groups into ks.test
method:
# BUILD NESTED LIST OF RESULTS
ks_results <- combn(
unique(eee$interactome),
2,
FUN = \(x) list(
PPI_ks_results = ks.test(
eee$PPI[eee$interactome == x[1]], eee$PPI[eee$interactome == x[2]]
),
LCC_ks_results = ks.test(
eee$LCC[eee$interactome == x[1]], eee$LCC[eee$interactome == x[2]]
)
),
simplify = FALSE
)
# NAME LIST ELEMENTS
ks_results_names <- setNames(
ks_results,
combn(
unique(eee$interactome), 2, simplify = FALSE
) |> lapply(
\(x) paste(x, collapse="_")
)
)
Output
# REVIEW LIST AND ELEMENTS
str(ks_results)
# List of 6
# $ HINT-binary_HINT-comp :List of 2
# ..$ PPI_ks_results:List of 5
# .. ..$ statistic : Named num 0.833
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value : num 0.026
# .. ..$ alternative: chr "two-sided"
# .. ..$ method : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name : chr "eee$PPI[eee$interactome == x[1]] and eee$PPI[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
# ..$ LCC_ks_results:List of 5
# .. ..$ statistic : Named num 1
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value : num 0.00216
# .. ..$ alternative: chr "two-sided"
# .. ..$ method : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name : chr "eee$LCC[eee$interactome == x[1]] and eee$LCC[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
# $ HINT-binary_InBioMap :List of 2
# ..$ PPI_ks_results:List of 5
# .. ..$ statistic : Named num 1
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value : num 0.00216
# .. ..$ alternative: chr "two-sided"
# .. ..$ method : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name : chr "eee$PPI[eee$interactome == x[1]] and eee$PPI[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
# ..$ LCC_ks_results:List of 5
# .. ..$ statistic : Named num 1
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value : num 0.00496
# .. ..$ alternative: chr "two-sided"
# .. ..$ method : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name : chr "eee$LCC[eee$interactome == x[1]] and eee$LCC[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
# $ HINT-binary_Menche-2015:List of 2
# ..$ PPI_ks_results:List of 5
# .. ..$ statistic : Named num 1
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value : num 0.00216
# .. ..$ alternative: chr "two-sided"
# .. ..$ method : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name : chr "eee$PPI[eee$interactome == x[1]] and eee$PPI[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
# ..$ LCC_ks_results:List of 5
# .. ..$ statistic : Named num 1
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value : num 0.00216
# .. ..$ alternative: chr "two-sided"
# .. ..$ method : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name : chr "eee$LCC[eee$interactome == x[1]] and eee$LCC[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
# $ HINT-comp_InBioMap :List of 2
# ..$ PPI_ks_results:List of 5
# .. ..$ statistic : Named num 1
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value : num 0.00216
# .. ..$ alternative: chr "two-sided"
# .. ..$ method : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name : chr "eee$PPI[eee$interactome == x[1]] and eee$PPI[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
# ..$ LCC_ks_results:List of 5
# .. ..$ statistic : Named num 1
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value : num 0.00496
# .. ..$ alternative: chr "two-sided"
# .. ..$ method : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name : chr "eee$LCC[eee$interactome == x[1]] and eee$LCC[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
# $ HINT-comp_Menche-2015 :List of 2
# ..$ PPI_ks_results:List of 5
# .. ..$ statistic : Named num 1
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value : num 0.00216
# .. ..$ alternative: chr "two-sided"
# .. ..$ method : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name : chr "eee$PPI[eee$interactome == x[1]] and eee$PPI[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
# ..$ LCC_ks_results:List of 5
# .. ..$ statistic : Named num 1
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value : num 0.00216
# .. ..$ alternative: chr "two-sided"
# .. ..$ method : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name : chr "eee$LCC[eee$interactome == x[1]] and eee$LCC[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
# $ InBioMap_Menche-2015 :List of 2
# ..$ PPI_ks_results:List of 5
# .. ..$ statistic : Named num 1
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value : num 0.00216
# .. ..$ alternative: chr "two-sided"
# .. ..$ method : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name : chr "eee$PPI[eee$interactome == x[1]] and eee$PPI[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
# ..$ LCC_ks_results:List of 5
# .. ..$ statistic : Named num 1
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value : num 0.00496
# .. ..$ alternative: chr "two-sided"
# .. ..$ method : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name : chr "eee$LCC[eee$interactome == x[1]] and eee$LCC[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
Access Individual Elements
ks_results$`HINT-binary_HINT-comp`$PPI_ks_results$statistic
# D
# 0.8333333
ks_results$`HINT-binary_HINT-comp`$PPI_ks_results$p.value
# [1] 0.02597403
Bind to Data Frame
data.frame(
statistic = sapply(ks_results, \(x) x$PPI_ks_results$statistic),
p_value = sapply(ks_results, \(x) x$PPI_ks_results$p.value),
alternative = sapply(ks_results, \(x) x$PPI_ks_results$alternative),
method = sapply(ks_results, \(x) x$PPI_ks_results$method)
)
# statistic p_value alternative method
# HINT-binary_HINT-comp.D 0.8333333 0.025974026 two-sided Two-sample Kolmogorov-Smirnov test
# HINT-binary_InBioMap.D 1.0000000 0.002164502 two-sided Two-sample Kolmogorov-Smirnov test
# HINT-binary_Menche-2015.D 1.0000000 0.002164502 two-sided Two-sample Kolmogorov-Smirnov test
# HINT-comp_InBioMap.D 1.0000000 0.002164502 two-sided Two-sample Kolmogorov-Smirnov test
# HINT-comp_Menche-2015.D 1.0000000 0.002164502 two-sided Two-sample Kolmogorov-Smirnov test
# InBioMap_Menche-2015.D 1.0000000 0.002164502 two-sided Two-sample Kolmogorov-Smirnov test