Search code examples
rcluster-analysisigraph

Creating new graph basing on cluster anylise with edges which are connectors from created group


library(network)
library(networkD3)
library(igraph)
library(visNetwork)

df <- read.table(header = TRUE, 
                 text = "src   target
                 cllient1  cllient2
                 cllient1  cllient4
                 cllient1  cllient6
                 cllient2  cllient3
                 cllient4  cllient1
                 cllient4  cllient3
                 cllient5  cllient6
                 cllient6  cllient5")

df_graph <- graph_from_data_frame(df)
simpleNetwork(df,zoom = T,fontSize = 9)

enter image description here

wc <- cluster_walktrap(df_graph)

members <- membership(wc)

df_graph_cntrctd <- contract(df_graph, members, vertex.attr.comb = toString)

df_graph_cntrctd <-as.undirected(df_graph_cntrctd)

df_graph_cntrctd <- as_long_data_frame(df_graph_cntrctd)

idLabel <- df_graph_cntrctd[,c(2,4)]

idLabel <- idLabel[!duplicated(df_graph_cntrctd[c("to","ver2[el[, 2], ]")]),]

colnames(idLabel)[1] <- "id"
colnames(idLabel)[2] <- "title"
idLabel['label'] <- idLabel$id

FromTo <-df_graph_cntrctd[,c(1,2)]
FromTo <- FromTo[!duplicated(FromTo[c("from","to")]),]

nodes <- data.frame(id = idLabel$id, 
                    label = idLabel$label,
                    title = idLabel$title)      

edges <- data.frame(from = FromTo$from, to = FromTo$to)


network<-(visNetwork(nodes, edges, width = "100%",height = 900 ) %>% 
            visOptions(highlightNearest = TRUE, nodesIdSelection = TRUE))

enter image description here So far we got a network:) And its possible to get a list of clients by whom there is a connection between groups created by walktrap. The idea is to show those clients in edges. What was created are lines of code beneath that show the list of all connections just in descending order

V(df_graph)$name <- members
x <- as_edgelist(df_graph, names = T)
V(df_graph)$name <- 1:vcount(df_graph)
E(df_graph)[x[,1] != x[,2]]

In the result we get

+ 1/8 edge from c92bcba (vertex names):
[1] 1->5

which means that group with label "1" is connected with group(label "2") by "client1" by id number 1 and "client6" by id number 5 as far as I understood. My question is how to get such result like here, where we can create after all table like this:

  from  to  label
    1   1   NA
    1   2   Client1,Client6
    2   2   NA

where "from" and "to" are names of groups created from cluster analyze and Client1 and Client6 are those clients who connect that two groups

enter image description here


Solution

  • This works but is very inelegant:

    df <- read.table(header = TRUE, 
                 text = "src   target
                 cllient1  cllient2
                 cllient1  cllient4
                 cllient1  cllient6
                 cllient2  cllient3
                 cllient4  cllient1
                 cllient4  cllient3
                 cllient5  cllient6
                 cllient6  cllient5")
    
    df_graph <- graph_from_data_frame(df)
    wc <- cluster_walktrap(df_graph)
    df_graph0 <- df_graph
    V(df_graph)$name <- membership(wc)
    

    get edgelist based on membership, which translates into from and to in your request.

    x <- as_edgelist(df_graph, names = T)
    communities <- ends(df_graph, E(df_graph))
    

    reset names to vertex ids (not requested but maybe usefull)

    V(df_graph)$name <- 1:vcount(df_graph)
    ids <- ends(df_graph, E(df_graph))
    

    set vertex names corresponding to clients (label)

    V(df_graph)$name <- V(df_graph0)$name
    label <- ends(df_graph, E(df_graph))
    

    store in dataframe

    df_result <- data.frame(from = communities[,1], to = communities[,2],
                        label1 = label[,1], label2 = label[,2], ids1 = ids[,1], ids2 = ids[,2])
    

    Which results in this:

      from to   label1   label2 ids1 ids2
    1    1  1 cllient1 cllient2    1    2
    2    1  1 cllient1 cllient4    1    3
    3    1  2 cllient1 cllient6    1    5
    4    1  1 cllient2 cllient3    2    6
    5    1  1 cllient4 cllient1    3    1
    6    1  1 cllient4 cllient3    3    6
    7    2  2 cllient5 cllient6    4    5
    8    2  2 cllient6 cllient5    5    4
    

    Additionally you could paste label1 and label2 in order to get your comma separated label column.

    EDIT: In order to "contract" labels you could do something like this:

     library(tidyr) 
     library(dplyr) 
     df_result$label <- paste(df_result$label1, df_result$label2, sep = ",") 
     df_nested <- df_result %>% select(from, to, label) %>% nest(-from, -to) 
    

    To use these nested labels as edge labels or strings paste them together:

     df_nested$data <- sapply(1:nrow(df_nested), 
                         function(x) paste(unlist(df_nested$data[[x]]), collapse = " "))