Search code examples
ralgorithmcluster-analysisigraphnetwork-analysis

Identify connected subnetworks, constrained by edge attributes


I am working with network data and want to identify connected subnetworks. I loaded my data using
graph <- graph_from_data_frame(filtered_df, directed = FALSE) and ploted my network

plot(graph)

E(graph)$conflict_period
[1] "72_1"   "72_1"   "72_1"   "72_1"   "72_1"   "72_1"   "72_1"   "72_1"   "72_1"   "72_1"   "72_1"   "372_1"  "372_1"  "372_1" 
[15] "372_1"  "372_1"  "372_1"  "372_1"  "372_1"  "372_1"  "372_1"  "522_0"  "522_0"  "522_0"  "522_0"  "522_0"  "522_0"  "522_0" 
[29] "522_0"  "522_0"  "522_0"  "715_0"  "715_0"  "715_0"  "715_0"  "5390_0"

So far, the information on the subgroups is stored in the edges. A Node belonges to each subgroup it receives an edge from, so a Node can also belong to multiple subgroups.For instance Government of Mali belongs together with Civilians to subgroups "72_1" and togehther with FIAA to subgroup "372_1". I want to know if subgroups "72_1" and "372_1" are conected, which they are if at least two Nodes belonging to subgroup "72_1" are conected via edges to at least two Nodes belonging to subgroup"372_1". I tired ways outside Network analysis to identify this relationship, but failed. Now I am here asking for help.

enter image description here

the desired output would be a table listing the connected subgroup based on the aforementioned criterium. In this case it should be:

conflict_period connected
72_1 372_1,522_0
372_1 72_1,522_0
522_0 372_1,72_1
715_0 NA
5390_0 NA

Here is the used data:

structure(list(side_a = c("Government of Mali", "Government of Mali", 
"Government of Mali", "Government of Mali", "Government of Mali", 
"Government of Mali", "Government of Mali", "Government of Mali", 
"Government of Mali", "Government of Mali", "Government of Mali", 
"Government of Mali", "Government of Mali", "Government of Mali", 
"Government of Mali", "Government of Mali", "Government of Mali", 
"Government of Mali", "Government of Mali", "Government of Mali", 
"Government of Mali", "FIAA", "FIAA", "FIAA", "FIAA", "FIAA", 
"FIAA", "FIAA", "FIAA", "FIAA", "FIAA", "MPGK", "MPGK", "MPGK", 
"MPGK", "ARLA, FIAA, FPLA"), side_b = c("Civilians", "Civilians", 
"Civilians", "Civilians", "Civilians", "Civilians", "Civilians", 
"Civilians", "Civilians", "Civilians", "Civilians", "FIAA", "FIAA", 
"FIAA", "FIAA", "FIAA", "FIAA", "FIAA", "FIAA", "FIAA", "FIAA", 
"Civilians", "Civilians", "Civilians", "Civilians", "Civilians", 
"Civilians", "Civilians", "Civilians", "Civilians", "Civilians", 
"Civilians", "Civilians", "Civilians", "Civilians", "MPA"), country = c("Mali", 
"Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", 
"Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", 
"Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", 
"Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", 
"Mali", "Mali", "Mali"), period_start = structure(c(765158400, 
765158400, 765158400, 765158400, 765158400, 765158400, 765158400, 
765158400, 765158400, 765158400, 765158400, 770256000, 770256000, 
770256000, 770256000, 770256000, 770256000, 770256000, 770256000, 
770256000, 770256000, 771552000, 771552000, 771552000, 771552000, 
771552000, 771552000, 771552000, 771552000, 771552000, 771552000, 
769910400, 769910400, 769910400, 769910400, 771206400), tzone = "UTC", class = c("POSIXct", 
"POSIXt")), period_end = structure(c(817862400, 817862400, 817862400, 
817862400, 817862400, 817862400, 817862400, 817862400, 817862400, 
817862400, 817862400, 819158400, 819158400, 819158400, 819158400, 
819158400, 819158400, 819158400, 819158400, 819158400, 819158400, 
816739200, 816739200, 816739200, 816739200, 816739200, 816739200, 
816739200, 816739200, 816739200, 816739200, 814492800, 814492800, 
814492800, 814492800, 802828800), tzone = "UTC", class = c("POSIXct", 
"POSIXt")), conflict_period = c("72_1", "72_1", "72_1", "72_1", 
"72_1", "72_1", "72_1", "72_1", "72_1", "72_1", "72_1", "372_1", 
"372_1", "372_1", "372_1", "372_1", "372_1", "372_1", "372_1", 
"372_1", "372_1", "522_0", "522_0", "522_0", "522_0", "522_0", 
"522_0", "522_0", "522_0", "522_0", "522_0", "715_0", "715_0", 
"715_0", "715_0", "5390_0")), row.names = c(NA, -36L), class = c("tbl_df", 
"tbl", "data.frame"))

Solution

  • According to your latest update, here is one option to achieve your desired output

    library(igraph)
    library(dplyr)
    
    # simplify the dataframe and generate a graph
    g <- df %>%
        select(matches("^side|conflict")) %>%
        distinct() %>%
        graph_from_data_frame(directed = FALSE)
    
    # repeat pruning the graph and retaining the subgraph(s) such that each node has the degree >= 2
    gh <- g
    repeat {
        d <- degree(gh)
        if (min(d) >= 2) break
        gh <- induced_subgraph(gh, V(gh)[d >= 2])
    }
    
    # retrieve the vertices of each desired cluster
    clt <- lapply(decompose(gh), \(x) E(x)$conflict_period)
    
    # go through each conflict_period and if they are in one of the found clusters
    out <- sapply(
        E(g)$conflict_period,
        \(p) {
            lapply(clt, \(q) {
                if (p %in% q) {
                    setdiff(q, p)
                } else {
                    NA
                }
            })
        }
    )
    
    # produce the desired output
    res <- within(
        data.frame(conflict_period = names(out)),
        connected <- out
    )
    

    such that

    > res
      conflict_period    connected
    1            72_1 372_1, 522_0
    2           372_1  72_1, 522_0
    3           522_0  372_1, 72_1
    4           715_0           NA
    5          5390_0           NA