Randomly Split a Graph into Mini Graphs

I have this graph network in R:

library(igraph)
n_rows <- 10
n_cols <- 5
g <- make_lattice(dimvector = c(n_cols, n_rows))

layout <- layout_on_grid(g, width = n_cols)

n_nodes <- vcount(g)
node_colors <- rep("white", n_nodes)

for (row in 0:(n_rows-1)) {
    start_index <- row * n_cols + 1
    node_colors[start_index:(start_index+2)] <- "orange"  
    node_colors[(start_index+3):(start_index+4)] <- "purple"    
}

node_labels <- 1:n_nodes

plot(g, 
     layout = layout, 
     vertex.color = node_colors,
     vertex.label = node_labels,
     vertex.label.color = "black",
     vertex.size = 15,
     edge.color = "gray",
     main = "Rectangular Undirected Network")

I am trying to write a function which randomly breaks this network into 5 connected subgraphs (i.e. mini graphs) such that each node appears exactly once.

I think in theory, this should not be too difficult to do. I would need to randomly identify a node, randomly decide how many neighbors to include, select those neighbors and remove them from the graph .... and restart this process on the remaining graph. Of course, some additional details would need to be specified, e.g. if the random number specified exceed the number of remaining nodes then use a max function, BFS would need to be used to select the nodes, etc.

Here was my first attempt at writing the code:

get_connected_subgraph <- function(graph, available_nodes, min_nodes = 5, max_nodes = 15) {
    if (length(available_nodes) == 0) return(NULL)
    
    start_node <- sample(available_nodes, 1)
    
    bfs_result <- bfs(graph, root = start_node, unreachable = FALSE, order = TRUE, rank = TRUE, father = TRUE)
    
    bfs_order <- intersect(bfs_result$order, available_nodes)
    
    n_subgraph_nodes <- min(sample(min_nodes:max_nodes, 1), length(bfs_order))
    
    subgraph_nodes <- bfs_order[1:n_subgraph_nodes]
    
    return(subgraph_nodes)
}

create_5_subgraphs <- function(graph) {
    available_nodes <- V(graph)
    subgraphs <- list()
    
    for (i in 1:5) {
        subgraph_nodes <- get_connected_subgraph(graph, available_nodes)
        if (is.null(subgraph_nodes)) break
        
        subgraphs[[i]] <- subgraph_nodes
        available_nodes <- setdiff(available_nodes, subgraph_nodes)
    }
    
    return(subgraphs)
}

set.seed(42) 
subgraphs <- create_5_subgraphs(g)

subgraph_colors <- c("red", "blue", "green", "yellow", "purple")

node_subgraph_colors <- rep("lightgray", vcount(g))
for (i in 1:length(subgraphs)) {
    node_subgraph_colors[subgraphs[[i]]] <- subgraph_colors[i]
}

edge_subgraph_colors <- rep("lightgray", ecount(g))
for (i in 1:length(subgraphs)) {
    subgraph_edges <- E(g)[.inc(subgraphs[[i]])]
    edge_subgraph_colors[subgraph_edges] <- subgraph_colors[i]
}

plot(g, 
     layout = layout,
     vertex.color = node_subgraph_colors,
     vertex.label = node_labels,
     vertex.label.color = "black",
     vertex.size = 15,
     edge.color = edge_subgraph_colors,
     edge.width = 2,
     main = "Network with 5 Separate Connected Subgraphs")

The above result looks almost correct, but the yellow nodes (e.g. 29) appears to be violating the connectivity.

Any pointers on how to fix this?

I wrote some optional code to compare the before/after:

node_info <- data.frame(
    Node_Index = 1:vcount(g),
    Original_Color = node_colors,
    New_Color = node_subgraph_colors
)

get_subgraph_number <- function(node) {
    subgraph_num <- which(sapply(subgraphs, function(x) node %in% x))
    if (length(subgraph_num) == 0) return(NA)
    return(subgraph_num)
}

node_info$Subgraph_Number <- sapply(node_info$Node_Index, get_subgraph_number)

head(node_info)

To complement jblood94's amazing answer, here is a quick plotting function that works with jblood94's answer:

library(igraph)
library(data.table)

f <- function(g, n) {
    m <- length(g)
    dt <- setDT(as_data_frame(g))
    dt <- rbindlist(list(dt, dt[,.(from = to, to = from)]))
    dt[,group := 0L]
    used <- logical(m)
    s <- sample(m, n)
    used[s] <- TRUE
    m <- m - n
    dt[from %in% s, group := .GRP, from]
    
    while (m) {
        dt2 <- unique(
            dt[group != 0L & !used[to], .(grow = to, onto = group)][sample(.N)],
            by = "grow"
        )
        dt[dt2, on = .(from = grow), group := onto]
        used[dt2[[1]]] <- TRUE
        m <- m - nrow(dt2)
    }
    
    unique(dt[,to := NULL])[,.(vertices = .(from)), group]
}


plot_multiple_subgraphs <- function(n_plots = 25, n_rows = 10, n_cols = 5, n_subgraphs = 5) {
    g <- make_lattice(dimvector = c(n_cols, n_rows))
    layout <- layout_on_grid(g, width = n_cols)
    n_nodes <- vcount(g)
    
    color_palette <- c("red", "blue", "green", "yellow", "purple")
    
    par(mfrow = c(5, 5), mar = c(0.5, 0.5, 2, 0.5))
    
    for (i in 1:n_plots) {
        subgraphs <- f(g, n_subgraphs)
        
        node_colors <- rep("white", n_nodes)
        
        for (j in 1:nrow(subgraphs)) {
            nodes <- unlist(subgraphs$vertices[j])
            node_colors[nodes] <- color_palette[j]
        }
        
        plot(g, 
             layout = layout, 
             vertex.color = node_colors,
             vertex.label = NA,  
             vertex.size = 15,   
             edge.color = "gray",
             edge.width = 0.5,  
             main = paste("Partition", i),  
             cex.main = 0.8)     
    }
}

plot_multiple_subgraphs()

Solution

Here's a function that randomly selects n vertices from the graph g as the initial subgraph member for each of n groups, then iteratively "grows" each group until all the vertices are in a subgraph.

library(data.table)

f <- function(g, n) {
  m <- length(g)
  dt <- setDT(as_data_frame(g))
  dt <- rbindlist(list(dt, dt[,.(from = to, to = from)]))
  dt[,group := 0L]
  used <- logical(m)
  s <- sample(m, n)
  used[s] <- TRUE
  m <- m - n
  dt[from %in% s, group := .GRP, from]
  
  while (m) {
    dt2 <- unique(
      dt[group != 0L & !used[to], .(grow = to, onto = group)][sample(.N)],
      by = "grow"
    )
    dt[dt2, on = .(from = grow), group := onto]
    used[dt2[[1]]] <- TRUE
    m <- m - nrow(dt2)
  }
  
  unique(dt[,to := NULL])[,.(vertices = .(from), .N), group]
}

Demonstrating on the OP's graph:

set.seed(907044864)
f(g, 5L)
#>    group              vertices     N
#>    <int>                <list> <int>
#> 1:     1       1,2,3,6,7,8,...     9
#> 2:     2  4, 5, 9,10,13,14,...    13
#> 3:     3 21,22,26,27,31,36,...     9
#> 4:     4 23,28,29,32,33,38,...    10
#> 5:     5 30,34,35,39,40,44,...     9

Note: during the iterations, if multiple groups try to "grow into" the same vertex, the winning group is selected randomly. This is done with [sample(.N)] after all the candidate growths are found with dt[group != 0L & !used[to], .(grow = to, onto = group)].

Performance check

Testing performance on partitioning a 100-by-100 grid into 10 groups:

system.time(dt <- f(make_lattice(c(100, 100)), 10))
#>    user  system elapsed 
#>    0.16    0.02    0.17
dt
#>     group                          vertices     N
#>     <int>                            <list> <int>
#>  1:     4                   1,2,3,4,5,6,...  2329
#>  2:     2             43,44,45,46,47,48,...  1093
#>  3:     1             87,88,89,90,91,92,...    99
#>  4:     3       695,696,697,795,796,797,...   380
#>  5:     5 1551,1552,1553,1554,1650,1651,...  1363
#>  6:     6 3171,3172,3173,3174,3175,3176,...  1048
#>  7:     7 5921,5922,5923,5924,5925,5926,...  2377
#>  8:     8 6169,6171,6269,6270,6271,6272,...   339
#>  9:     9 6475,6575,6576,6675,6676,6677,...   264
#> 10:    10 7980,7981,7982,7983,7984,7985,...   708