I’m trying to understand how to calculate Newman’s modularity, assortativity coefficients, and comparing against values from various packages.
suppressMessages(library(dplyr))
suppressMessages(library(igraph))
suppressMessages(library(assortnet))
Here I simulate a network:
set.seed(12345)
netSize <- 20 # number of nodes
ids <- 1:netSize # names
# create an edgelist
df <- data.frame("from" = ids, "to" = ids)
eL <- df %>% # edited because I realized I needed to get rid of duplicate edges after using expand.grid
expand.grid() %>%
filter(from != to) %>%
mutate(key = paste0(pmin(as.character(from), as.character(to)), pmax(as.character(to), as.character(from)), sep = ""))%>%
filter(duplicated(key)) %>%
dplyr::select(-3) %>%
mutate(fromC = ifelse(from <= ceiling(netSize/2), "A", "B"),
toC = ifelse(to <= ceiling(netSize/2), "A", "B"))
# index for assigning weights based on membership
same <- which(eL[,3] == eL[,4])
diff <- which(eL[,3] != eL[,4])
eL[same, "weight"] <- rbeta(length(same), 10, 1) %>% round(2)
eL[diff, "weight"] <- rbeta(length(diff), 1, 10) %>% round(2)
# create a graph
g = graph_from_data_frame(eL[,c(as.character(1):as.character(2))], directed = FALSE) # not sure if numeric edge names poses a problem or not
E(g)$weight <- eL$weight
# get the adjacency matrix
adjmat <- get.adjacency(g, sparse = FALSE, attr = "weight") %>% as.matrix()
adjmat[lower.tri(adjmat)] <- 0
V(g)$name
#> [1] "1" "11" "2" "3" "4" "5" "6" "7" "8" "9" "10" "12" "13" "14" "15"
#> [16] "16" "17" "18" "19" "20"
community <- ifelse(as.numeric(V(g)$name) <= 10, 1, 2)
V(g)$membership <- community
plot(g, layout = layout.fruchterman.reingold(g), edge.width = E(g)$weight*2,
vertex.color = V(g)$membership)
Create a matrix of kronecker delta values to use
degrees <- degree(g)
kdmat <- matrix(kronecker(community, community, FUN = function(x,y){ifelse(x==y, 1, 0)}),
nrow = length(degrees))
rownames(kdmat) <- colnames(kdmat) <- V(g)$name
Create the modularity matrix
m <- E(g) %>% length()
modmat <- adjmat # create a matrix with same dimensions
modmat[,] <- 0 # empty it
for(i in 1:nrow(adjmat)){
for(j in 1:ncol(adjmat)){
modmat[i,j] <- (adjmat[i,j] - degrees[i] * degrees[j] / (2 * m)) * kdmat[i,j]
}
}
Sure looks like I’m calculating Newman’s Q incorrectly, but I can’t see my mistake
(myQ <- 1/(2 * m) * sum(modmat))
#> [1] -0.2909091
(igQ <- igraph::modularity(g, membership = community, weights = E(g)$weight))
#> [1] 0.3850534
Next I want to calculate Q max from Newman
kmat <- modmat
kmat[,] <- 0
for(i in 1:nrow(kmat)){
for(j in 1:ncol(kmat)){
kmat[i,j] <- (degrees[i] * degrees[j] / (2 * m) * kdmat[i,j])
}
}
# not sure if this is correct or not
(qmax <- 1/(2 * m) * ((2 * m) - sum(kmat)))
#> [1] 0.5
This should be the assortativity coefficient, if I were calculating q correctly:
myQ/qmax
#> [1] -0.5818182
Here’s the assortativity coefficient from assortnet:
assortnet::assortment.discrete(adjmat, types = as.factor(community), weighted = T)$r
#> [1] 0.7823255
The assortativity coefficient from igraph is completely different (and looks to me like it’s not reading the weights). Am I doing something wrong here, or is this function not meant for weighted networks?
igraph::assortativity_nominal(g, types = V(g)$membership, directed = FALSE)
#> [1] -0.09090909
Created on 2021-09-17 by the reprex package (v2.0.0)
This took a while to sort out. There were a few mistakes:
I modified other minor aspects of the code in places, but here is the correct version. Hopefully this is helpful for others.
Picking up after creating the graph:
# get the adjacency matrix
adjmat <- get.adjacency(g, sparse = TRUE, attr = "weight") %>% as.matrix() # use sparse = TRUE
# assign community or vertex type
community <- ifelse(as.numeric(V(g)$name) <= netSize/2, 1, 2)
V(g)$membership <- community
# plot(g, layout = layout.fruchterman.reingold(g), edge.width = E(g)$weight*2,
# vertex.color = V(g)$membership)
### Modularity calculations
Create a matrix of kronecker delta values to use
kdmat <- matrix(kronecker(community, community, FUN = function(x,y){ifelse(x==y, 1, 0)}),
nrow = length(community))
rownames(kdmat) <- colnames(kdmat) <- V(g)$name
Create the modularity matrix
# (m <- E(g) %>% length()) # m is the number of edges in binary networks
(m <- sum(E(g)$weight)) # m is the total sum of edge weights in weighted networks
#> [1] 93.55
modmat <- adjmat # create a matrix with same dimensions
modmat[,] <- 0 # empty it
for(i in 1:nrow(adjmat)){
for(j in 1:ncol(adjmat)){
modmat[i,j] <- adjmat[i,j] - strength(g)[i] * strength(g)[j] / (2*m)
}
}
## now finish off the modularity calculation:
(myQ <- 1 / (2*m) * sum(modmat * kdmat))
#> [1] 0.3850534
(igQ <- igraph::modularity(g, membership = community, weights = E(g)$weight))
#> [1] 0.3850534
# now the values match
Next I want to calculate Q max from Newman
kmat <- modmat
kmat[,] <- 0
for(i in 1:nrow(kmat)){
for(j in 1:ncol(kmat)){
kmat[i,j] <- (sum(adjmat[i,]) * sum(adjmat[j,]) / (2 * m) * kdmat[i,j])
}
}
(qmax <- (1/(2 * m)) * ((2 * m) - sum(kmat)))
#> [1] 0.4999652
This is the assortativity coefficient:
myQ/qmax # using my q value
#> [1] 0.7701604
igQ/qmax # using igraphs q value
#> [1] 0.7701604
Here’s the assortativity coefficient from assortnet:
assortnet::assortment.discrete(adjmat, types = as.factor(community), weighted = T)$r
#> [1] 0.7701604
NOW THEY MATCH!!!
Created on 2021-09-22 by the reprex package (v2.0.1)