In a network of passes among basketball players I want to:
Following this question Extracting Open Triangles in R Igraph (Network Analysis) we can do the following:
library(igraph)
set.seed(1234)
G <- sample_gnm(10, 15)
G
IGRAPH 72f8e6a U--- 10 15 -- Erdos renyi (gnm) graph
+ attr: name (g/c), type (g/c), loops (g/l), m (g/n)
+ edges from 72f8e6a:
[1] 1-- 3 1-- 4 3-- 4 1-- 5 3-- 5 6-- 7 3-- 8 4-- 8 6-- 8 7-- 8 2-- 9 6-- 9 7-- 9 4--10 9--10
plot(G)
Find the open triangles:
openTriList <- unique(do.call(c, lapply(as_ids(V(G)), function(v) {
do.call(c, lapply(as_ids(neighbors(G, v)), function(v1) {
v2 <- as_ids(neighbors(G, v1))
v2 <- v2[shortest.paths(G, v, v2) == 2]
if(length(v2) != 0) {
lapply(v2, function(vv2) { c(v, v1, vv2)[order(c(v, v1, vv2))] })
} else { list() }
}))
})))
The results are correct:
do.call(rbind, openTriList)
[,1] [,2] [,3]
[1,] 1 3 8
[2,] 1 4 8
[3,] 1 4 10
[4,] 2 6 9
[5,] 2 7 9
[6,] 2 9 10
[7,] 3 4 10
[8,] 3 6 8
[9,] 3 7 8
[10,] 1 4 5
[11,] 3 4 5
[12,] 4 6 8
[13,] 4 7 8
[14,] 4 9 10
[15,] 3 5 8
[16,] 6 9 10
[17,] 7 9 10
[18,] 4 8 10
[19,] 6 8 9
[20,] 7 8 9
How do we find the players that are brokers?
And how can we efficiently count the number of times these player broker an open triangle?
[The real data holds millions of passes and several thousands of players. So performance is an important aspect. Using combn
results in extremely long computational times. Are there faster ways of doing this? Perhaps getting the adjacency graph to build a sparse matrix and converting it into a data.table
object for joining by neighbours? See this link. ]
If you want to speed up, below is an option using for
loops + combn
to define a user functions f
, which ouput a list including both the openTriList
and occurCnt
(thank @minem's feedback as well for performance improvement):
f <- function(G) {
dmat <- as_adj(G, sparse = FALSE)
resLst <- c()
for (broker in 1:nrow(dmat)) {
k <- which(dmat[broker, ] == 1)
if (length(k) > 1) {
inds <- t(combn(k, 2))
resLst[[broker]] <- subset(cbind(broker, inds), dmat[inds] == 0)
}
}
resLst <- do.call(rbind, resLst)
resCnt <- table(resLst[, "broker"])
list(openTriLst = resLst, occurCnt = resCnt)
}
and you will see that they can achieve the desired output
> set.seed(1234)
> G <- sample_gnm(10, 15)
> f(G)
$openTriLst
broker
[1,] 1 4 5
[2,] 3 1 8
[3,] 3 4 5
[4,] 3 5 8
[5,] 4 1 8
[6,] 4 1 10
[7,] 4 3 10
[8,] 4 8 10
[9,] 6 8 9
[10,] 7 8 9
[11,] 8 3 6
[12,] 8 3 7
[13,] 8 4 6
[14,] 8 4 7
[15,] 9 2 6
[16,] 9 2 7
[17,] 9 2 10
[18,] 9 6 10
[19,] 9 7 10
[20,] 10 4 9
$occurCnt
1 3 4 6 7 8 9 10
1 3 4 1 1 4 5 1
and the speed is remarkably improved than my previous answer. You can also compare it with the answer by @minem.
> set.seed(1234)
> G1 <- sample_gnm(1000, 4000)
> system.time(f(G1))
user system elapsed
0.07 0.00 0.08
> G2 <- sample_gnm(10000, 40000)
> system.time(f(G2))
user system elapsed
2.46 0.16 2.62
You can try the code below using combn
+ are_ajdacent
, e.g.,
G <- sample_gnm(10, 15) %>%
get.data.frame() %>%
graph_from_data_frame(directed = FALSE)
openTriList <- do.call(
rbind,
sapply(
names(V(G)),
function(v) {
nbs <- names(neighbors(G, v))
if (length(nbs) > 1) {
do.call(rbind, Filter(length, combn(nbs, 2, FUN = function(x) {
if (!are_adjacent(G, x[1], x[2])) {
sort(as.numeric(c(v, x)))
}
}, simplify = FALSE)))
}
}
)
)
occurCount <- na.omit(
sapply(names(V(G)), function(v) {
nbs <- names(neighbors(G, v))
ifelse(length(nbs) > 1,
sum(!combn(nbs,
2,
FUN = function(x) are_adjacent(G, x[1], x[2])
)),
NA
)
})
)
and you will get named vector
> openTriList
[,1] [,2] [,3]
[1,] 1 4 5
[2,] 1 3 8
[3,] 3 4 5
[4,] 3 5 8
[5,] 6 8 9
[6,] 1 4 8
[7,] 1 4 10
[8,] 3 4 10
[9,] 4 8 10
[10,] 7 8 9
[11,] 2 6 9
[12,] 6 9 10
[13,] 2 7 9
[14,] 7 9 10
[15,] 2 9 10
[16,] 3 6 8
[17,] 3 7 8
[18,] 4 6 8
[19,] 4 7 8
[20,] 4 9 10
> occurCount
1 3 6 4 7 9 5 8 10
1 3 1 4 1 5 0 4 1
attr(,"na.action")
2
6
attr(,"class")
[1] "omit"