Search code examples
rsankey-diagram

How to make multi Levels of sankey diagram using R?


I have accounting data need to be visualized by Sankey Diagram in multi levels.

I made my dataset sample reproducible in case you need it.

structure(list(
  network_name = c("YAGHO", "YAGHO", "YAGHO", "YAGHO","YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO","YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO","YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO"),
  type = c("deposits", "deposits", "withdrawals", "withdrawals", "trf_outgoing", "trf_outgoing","trf_incoming", "trf_incoming", "trf_incoming", "trf_incoming","trf_outgoing", "trf_incoming", "trf_outgoing", "trf_outgoing","chk_issued", "chk_issued", "chk_issued", "chk_issued", "chk_received","chk_received", "chk_received", "chk_received", "chk_received","chk_received"),
  thirdparty = c("Christine", "Mike", "Patrick","Natalie", "Renee", "Jacob", "Renee", "Kathy", "John", "Ahmad", "Ahmad", "Tito", "Tito", "John", "Sally", "Tito", "John", "Ahmad", "Mohamad", "Tito", "John", "Sally", "Tito", "John"),
  amount = c(2038472, 683488, 38765, 123413, 94543234, 20948043, 34842843, 218864, 6468486, 384684, 5348687, 34684687, 6936937, 16841287, 1584587, 1901504.4, 2281805.28, 2738166.34, 295910.77, 4114374.62, 26680528.46, 5336105.38, 12954836.15, 1218913.08)))

what I did:

source1 <- san1$network_name
target1 <- san1$type

nodes1 <- data.frame(name = unique(c(source1,target1)))

links1 <- data.frame(source1 = match(source1, nodes1$name)-1,
                     target1 = match(target1, nodes1$name)-1,
                     value1 = san1$amount)

sankeyNetwork(Links = links1, Nodes = nodes1,
              Source = "source1", Target = "target1",
              Value = "value1", NodeID = "name",
              nodeWidth = 40, nodePadding = 20, fontSize = 14)

and the result was: enter image description here

and I did the same thing for the second level of my data:

source2 <- san1$type
target2 <- san1$thirdparty

nodes2 <- data.frame(name2 = unique(c(source2,target2)))

links2 <- data.frame(source2 = match(source2, nodes2$name2)-1,
                     target2 = match(target2, nodes2$name2)-1,
                     value2 = san1$amount)

sankeyNetwork(Links = links2,
              Nodes = nodes2,
              Source = "source2",
              Target = "target2",
              Value = "value2",
              NodeID = "name2",
              nodeWidth = 20, nodePadding = 20, fontSize = 14)

and the result was the same first one:

enter image description here

now what I need to do is to make the both diagrams in one diagram as multi level. I searched a lot a read examples but didn't work for me because of my data nature is different from what I saw.

and also I need to make the amounts to be shown in the diagram.


Solution

  • Since you don't mention which non-base R package you're using to draw your Sankey diagrams, I am showing you an option using plotly.

    library(plotly)
    library(tidyverse)
    
    # Prepare node and link data for plotting
    nodes <- df %>%
        pivot_longer(-amount, values_to = "name_node") %>%
        distinct(name_node) %>%
        mutate(idx = (1:n()) - 1)
    links <- bind_rows(
        df %>% select(source = network_name, target = type, amount),
        df %>% select(source = type, target = thirdparty, amount)) %>%
        group_by(source, target) %>%
        summarise(value = sum(amount), .groups = "drop") %>%
        mutate(across(c(source, target), ~ nodes$idx[match(.x, nodes$name_node)]))
    
    # Plot
    library(plotly)
    plot_ly(
        type = "sankey",
        orientation = "h",
        node = list(label = nodes$name_node, pad = 15, thickness = 15),
        link = as.list(links))
    

    This produces

    enter image description here

    You can see the totals on hover; e.g. in the screenshot above, the value linking "trf_outgoing" to "Renee" is 94.5 million.


    Sample data

    df <- structure(list(
        network_name = c("YAGHO", "YAGHO", "YAGHO", "YAGHO","YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO","YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO","YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO"),
        type = c("deposits", "deposits", "withdrawals", "withdrawals", "trf_outgoing", "trf_outgoing","trf_incoming", "trf_incoming", "trf_incoming", "trf_incoming","trf_outgoing", "trf_incoming", "trf_outgoing", "trf_outgoing","chk_issued", "chk_issued", "chk_issued", "chk_issued", "chk_received","chk_received", "chk_received", "chk_received", "chk_received","chk_received"),
        thirdparty = c("Christine", "Mike", "Patrick","Natalie", "Renee", "Jacob", "Renee", "Kathy", "John", "Ahmad", "Ahmad", "Tito", "Tito", "John", "Sally", "Tito", "John", "Ahmad", "Mohamad", "Tito", "John", "Sally", "Tito", "John"),
        amount = c(2038472, 683488, 38765, 123413, 94543234, 20948043, 34842843, 218864, 6468486, 384684, 5348687, 34684687, 6936937, 16841287, 1584587, 1901504.4, 2281805.28, 2738166.34, 295910.77, 4114374.62, 26680528.46, 5336105.38, 12954836.15, 1218913.08)))
    df <- bind_cols(df)   # or: as.data.frame(df)