Search code examples
ralgorithmperformanceigraph

Optimizing Functions for Network Graphs


In this question here (Summing Nodes in a Network), I learned how to find the square within the original network having the largest node sum.

Here is the data for this question:

library(igraph)

width <- 30
height <- 20
num_nodes <- width * height

# Create a grid
x <- rep(1:width, each = height)
y <- rep(1:height, times = width)

g <- make_empty_graph(n = num_nodes, directed = FALSE)

# Function to get node index
get_node_index <- function(i, j) (i - 1) * height + j

# Add edges
edges <- c()
for(i in 1:width) {
   for(j in 1:height) {
      current_node <- get_node_index(i, j)
    
      # Connect to right neighbor
      if(i < width) edges <- c(edges, current_node, get_node_index(i + 1, j))
    
      # Connect to bottom neighbor
      if(j < height) edges <- c(edges, current_node, get_node_index(i, j + 1))
   }
}

g <- add_edges(g, edges)

V(g)$x <- x
V(g)$y <- y

par(mfrow=c(1,2))

V(g)$name <- 1:num_nodes
plot(g, vertex.size = 7, vertex.label = V(g)$name, vertex.label.cex = 0.6, main = "Map with         Node Indices")

V(g)$value <- sample(1:100, num_nodes, replace = TRUE)
plot(g, vertex.size = 7, vertex.label = V(g)$value, vertex.label.cex = 0.6, main = "Map with     Population Values")

And here is the function:

sg <- subgraph_isomorphisms(make_ring(4), g)
lst <- unique(lapply(sg, \(x) sort(names(x))))
out <- do.call(
  rbind,
  lapply(
    lst,
    \(v) data.frame(
      node_id = toString(v),
      value = sum(V(induced_subgraph(g, v))$value)
    )
  )
)

This approach is currently using a brute force style approach in which every node is individually checked. Are there any ways in R to restructure this function so that it runs in parallel or a different type of search algorithm that can scan the network more efficiently?

I had two ideas about this:

  1. Idea 1:

    Rewriting the function to look at square grids and tesselate them over the network:

     efficient_sum_squares <- function(g, width, height) {
       results <- data.frame(node_id = character(), value = numeric())
    
       for (i in 1:(width - 1)) {
         for (j in 1:(height - 1)) {
           nodes <- c(
             get_node_index(i, j),
             get_node_index(i + 1, j),
             get_node_index(i, j + 1),
             get_node_index(i + 1, j + 1)
           )
    
           sum_value <- sum(V(g)$value[nodes])
    
           results <- rbind(results, data.frame(node_id = toString(nodes), value = sum_value))
         }
       }
    
       results
     }
    
     out_efficient <- efficient_sum_squares(g, width, height)
    
  2. Idea 2:

    I thought that comparisons could be carried out in a vectorized fashion:

     vectorized_sum_squares <- function(g, width, height) {
       x_mat <- matrix(V(g)$x, nrow = height, ncol = width, byrow = FALSE)
       y_mat <- matrix(V(g)$y, nrow = height, ncol = width, byrow = FALSE)
       value_mat <- matrix(V(g)$value, nrow = height, ncol = width, byrow = FALSE)
    
       sums <- value_mat[1:(height-1), 1:(width-1)] + 
               value_mat[2:height, 1:(width-1)] + 
               value_mat[1:(height-1), 2:width] + 
               value_mat[2:height, 2:width]
    
       node_ids <- apply(which(sums == sums, arr.ind = TRUE), 1, function(idx) {
         i <- idx[1]
         j <- idx[2]
         toString(c(
           get_node_index(j, i),
           get_node_index(j + 1, i),
           get_node_index(j, i + 1),
           get_node_index(j + 1, i + 1)
         ))
       })
    
       data.frame(node_id = node_ids, value = as.vector(sums))
     }
    
     out_vectorized <- vectorized_sum_squares(g, width, height)
    

Is there any better way to work on this problem?


Solution

  • If you search such squares (consisting of 4 adjacent nodes) in a grid, I don't you really need igraph at all. The 2nd idea is good enough if you work with a matrix only, and igraph operations can be avoided.

    Here is an example similar to 2nd approach

    set.seed(0)
    width <- 15
    height <- 10
    gmat <- matrix(sample.int(10, width * height, replace = TRUE), height)
    
    ul <- gmat[-height, -width]
    ur <- gmat[-height, -1]
    dl <- gmat[-1, -width]
    dr <- gmat[-1, -1]
    
    ssum <- ul + ur + dl + dr
    idx <- apply(
      which(ssum == max(ssum), TRUE),
      1,
      \(x) {
        toString(crossprod(
          c(height, 1),
          x + cbind(
            c(-1, 0),
            c(-1, 1),
            c(0, 0),
            c(0, 1)
          )
        ))
      }
    )
    
    res <- data.frame(node_id = idx, value = max(ssum))
    

    and you see that

    > gmat
          [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
     [1,]    2    3    1    2    3    3    2    1    1     3     1     2     3
     [2,]    1    3    3    2    2    3    1    1    1     2     2     1     1
     [3,]    3    1    1    1    1    2    3    1    1     2     2     3     1
     [4,]    1    1    1    3    3    2    3    1    1     2     1     3     2
     [5,]    2    1    1    1    2    2    2    3    3     3     3     1     2
     [6,]    1    2    1    3    1    2    3    2    2     2     3     2     2
     [7,]    3    2    2    2    1    1    3    3    1     2     2     1     1
     [8,]    3    2    1    2    3    2    2    1    1     3     3     3     1
     [9,]    2    2    1    2    2    2    3    1    3     3     2     2     1
    [10,]    2    3    2    2    2    2    3    2    3     3     1     3     2
          [,14] [,15]
     [1,]     1     2
     [2,]     3     2
     [3,]     2     1
     [4,]     3     2
     [5,]     3     3
     [6,]     2     3
     [7,]     3     3
     [8,]     3     3
     [9,]     1     3
    [10,]     1     1
    

    and

    > res
              node_id value
    1 89, 90, 99, 100    12
    2  74, 75, 84, 85    12