Optimizing Functions for Network Graphs

In this question here (Summing Nodes in a Network), I learned how to find the square within the original network having the largest node sum.

Here is the data for this question:

library(igraph)

width <- 30
height <- 20
num_nodes <- width * height

# Create a grid
x <- rep(1:width, each = height)
y <- rep(1:height, times = width)

g <- make_empty_graph(n = num_nodes, directed = FALSE)

# Function to get node index
get_node_index <- function(i, j) (i - 1) * height + j

# Add edges
edges <- c()
for(i in 1:width) {
   for(j in 1:height) {
      current_node <- get_node_index(i, j)
    
      # Connect to right neighbor
      if(i < width) edges <- c(edges, current_node, get_node_index(i + 1, j))
    
      # Connect to bottom neighbor
      if(j < height) edges <- c(edges, current_node, get_node_index(i, j + 1))
   }
}

g <- add_edges(g, edges)

V(g)$x <- x
V(g)$y <- y

par(mfrow=c(1,2))

V(g)$name <- 1:num_nodes
plot(g, vertex.size = 7, vertex.label = V(g)$name, vertex.label.cex = 0.6, main = "Map with         Node Indices")

V(g)$value <- sample(1:100, num_nodes, replace = TRUE)
plot(g, vertex.size = 7, vertex.label = V(g)$value, vertex.label.cex = 0.6, main = "Map with     Population Values")

And here is the function:

sg <- subgraph_isomorphisms(make_ring(4), g)
lst <- unique(lapply(sg, \(x) sort(names(x))))
out <- do.call(
  rbind,
  lapply(
    lst,
    \(v) data.frame(
      node_id = toString(v),
      value = sum(V(induced_subgraph(g, v))$value)
    )
  )
)

This approach is currently using a brute force style approach in which every node is individually checked. Are there any ways in R to restructure this function so that it runs in parallel or a different type of search algorithm that can scan the network more efficiently?

I had two ideas about this:

Idea 1:

Rewriting the function to look at square grids and tesselate them over the network:

 efficient_sum_squares <- function(g, width, height) {
   results <- data.frame(node_id = character(), value = numeric())

   for (i in 1:(width - 1)) {
     for (j in 1:(height - 1)) {
       nodes <- c(
         get_node_index(i, j),
         get_node_index(i + 1, j),
         get_node_index(i, j + 1),
         get_node_index(i + 1, j + 1)
       )

       sum_value <- sum(V(g)$value[nodes])

       results <- rbind(results, data.frame(node_id = toString(nodes), value = sum_value))
     }
   }

   results
 }

 out_efficient <- efficient_sum_squares(g, width, height)

Idea 2:

I thought that comparisons could be carried out in a vectorized fashion:

 vectorized_sum_squares <- function(g, width, height) {
   x_mat <- matrix(V(g)$x, nrow = height, ncol = width, byrow = FALSE)
   y_mat <- matrix(V(g)$y, nrow = height, ncol = width, byrow = FALSE)
   value_mat <- matrix(V(g)$value, nrow = height, ncol = width, byrow = FALSE)

   sums <- value_mat[1:(height-1), 1:(width-1)] + 
           value_mat[2:height, 1:(width-1)] + 
           value_mat[1:(height-1), 2:width] + 
           value_mat[2:height, 2:width]

   node_ids <- apply(which(sums == sums, arr.ind = TRUE), 1, function(idx) {
     i <- idx[1]
     j <- idx[2]
     toString(c(
       get_node_index(j, i),
       get_node_index(j + 1, i),
       get_node_index(j, i + 1),
       get_node_index(j + 1, i + 1)
     ))
   })

   data.frame(node_id = node_ids, value = as.vector(sums))
 }

 out_vectorized <- vectorized_sum_squares(g, width, height)

Is there any better way to work on this problem?

Solution

If you search such squares (consisting of 4 adjacent nodes) in a grid, I don't you really need igraph at all. The 2nd idea is good enough if you work with a matrix only, and igraph operations can be avoided.

Here is an example similar to 2nd approach

set.seed(0)
width <- 15
height <- 10
gmat <- matrix(sample.int(10, width * height, replace = TRUE), height)

ul <- gmat[-height, -width]
ur <- gmat[-height, -1]
dl <- gmat[-1, -width]
dr <- gmat[-1, -1]

ssum <- ul + ur + dl + dr
idx <- apply(
  which(ssum == max(ssum), TRUE),
  1,
  \(x) {
    toString(crossprod(
      c(height, 1),
      x + cbind(
        c(-1, 0),
        c(-1, 1),
        c(0, 0),
        c(0, 1)
      )
    ))
  }
)

res <- data.frame(node_id = idx, value = max(ssum))

and you see that

> gmat
      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
 [1,]    2    3    1    2    3    3    2    1    1     3     1     2     3
 [2,]    1    3    3    2    2    3    1    1    1     2     2     1     1
 [3,]    3    1    1    1    1    2    3    1    1     2     2     3     1
 [4,]    1    1    1    3    3    2    3    1    1     2     1     3     2
 [5,]    2    1    1    1    2    2    2    3    3     3     3     1     2
 [6,]    1    2    1    3    1    2    3    2    2     2     3     2     2
 [7,]    3    2    2    2    1    1    3    3    1     2     2     1     1
 [8,]    3    2    1    2    3    2    2    1    1     3     3     3     1
 [9,]    2    2    1    2    2    2    3    1    3     3     2     2     1
[10,]    2    3    2    2    2    2    3    2    3     3     1     3     2
      [,14] [,15]
 [1,]     1     2
 [2,]     3     2
 [3,]     2     1
 [4,]     3     2
 [5,]     3     3
 [6,]     2     3
 [7,]     3     3
 [8,]     3     3
 [9,]     1     3
[10,]     1     1

and

> res
          node_id value
1 89, 90, 99, 100    12
2  74, 75, 84, 85    12