I have accounting data need to be visualized by Sankey Diagram in multi levels.
I made my dataset sample reproducible in case you need it.
structure(list(
network_name = c("YAGHO", "YAGHO", "YAGHO", "YAGHO","YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO","YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO","YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO"),
type = c("deposits", "deposits", "withdrawals", "withdrawals", "trf_outgoing", "trf_outgoing","trf_incoming", "trf_incoming", "trf_incoming", "trf_incoming","trf_outgoing", "trf_incoming", "trf_outgoing", "trf_outgoing","chk_issued", "chk_issued", "chk_issued", "chk_issued", "chk_received","chk_received", "chk_received", "chk_received", "chk_received","chk_received"),
thirdparty = c("Christine", "Mike", "Patrick","Natalie", "Renee", "Jacob", "Renee", "Kathy", "John", "Ahmad", "Ahmad", "Tito", "Tito", "John", "Sally", "Tito", "John", "Ahmad", "Mohamad", "Tito", "John", "Sally", "Tito", "John"),
amount = c(2038472, 683488, 38765, 123413, 94543234, 20948043, 34842843, 218864, 6468486, 384684, 5348687, 34684687, 6936937, 16841287, 1584587, 1901504.4, 2281805.28, 2738166.34, 295910.77, 4114374.62, 26680528.46, 5336105.38, 12954836.15, 1218913.08)))
what I did:
source1 <- san1$network_name
target1 <- san1$type
nodes1 <- data.frame(name = unique(c(source1,target1)))
links1 <- data.frame(source1 = match(source1, nodes1$name)-1,
target1 = match(target1, nodes1$name)-1,
value1 = san1$amount)
sankeyNetwork(Links = links1, Nodes = nodes1,
Source = "source1", Target = "target1",
Value = "value1", NodeID = "name",
nodeWidth = 40, nodePadding = 20, fontSize = 14)
and the result was:
and I did the same thing for the second level of my data:
source2 <- san1$type
target2 <- san1$thirdparty
nodes2 <- data.frame(name2 = unique(c(source2,target2)))
links2 <- data.frame(source2 = match(source2, nodes2$name2)-1,
target2 = match(target2, nodes2$name2)-1,
value2 = san1$amount)
sankeyNetwork(Links = links2,
Nodes = nodes2,
Source = "source2",
Target = "target2",
Value = "value2",
NodeID = "name2",
nodeWidth = 20, nodePadding = 20, fontSize = 14)
and the result was the same first one:
now what I need to do is to make the both diagrams in one diagram as multi level. I searched a lot a read examples but didn't work for me because of my data nature is different from what I saw.
and also I need to make the amounts to be shown in the diagram.
Since you don't mention which non-base R package you're using to draw your Sankey diagrams, I am showing you an option using plotly
.
library(plotly)
library(tidyverse)
# Prepare node and link data for plotting
nodes <- df %>%
pivot_longer(-amount, values_to = "name_node") %>%
distinct(name_node) %>%
mutate(idx = (1:n()) - 1)
links <- bind_rows(
df %>% select(source = network_name, target = type, amount),
df %>% select(source = type, target = thirdparty, amount)) %>%
group_by(source, target) %>%
summarise(value = sum(amount), .groups = "drop") %>%
mutate(across(c(source, target), ~ nodes$idx[match(.x, nodes$name_node)]))
# Plot
library(plotly)
plot_ly(
type = "sankey",
orientation = "h",
node = list(label = nodes$name_node, pad = 15, thickness = 15),
link = as.list(links))
This produces
You can see the totals on hover; e.g. in the screenshot above, the value linking "trf_outgoing" to "Renee" is 94.5 million.
df <- structure(list(
network_name = c("YAGHO", "YAGHO", "YAGHO", "YAGHO","YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO","YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO","YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO", "YAGHO"),
type = c("deposits", "deposits", "withdrawals", "withdrawals", "trf_outgoing", "trf_outgoing","trf_incoming", "trf_incoming", "trf_incoming", "trf_incoming","trf_outgoing", "trf_incoming", "trf_outgoing", "trf_outgoing","chk_issued", "chk_issued", "chk_issued", "chk_issued", "chk_received","chk_received", "chk_received", "chk_received", "chk_received","chk_received"),
thirdparty = c("Christine", "Mike", "Patrick","Natalie", "Renee", "Jacob", "Renee", "Kathy", "John", "Ahmad", "Ahmad", "Tito", "Tito", "John", "Sally", "Tito", "John", "Ahmad", "Mohamad", "Tito", "John", "Sally", "Tito", "John"),
amount = c(2038472, 683488, 38765, 123413, 94543234, 20948043, 34842843, 218864, 6468486, 384684, 5348687, 34684687, 6936937, 16841287, 1584587, 1901504.4, 2281805.28, 2738166.34, 295910.77, 4114374.62, 26680528.46, 5336105.38, 12954836.15, 1218913.08)))
df <- bind_cols(df) # or: as.data.frame(df)