Search code examples
rplotlyr-plotlysankey-diagram

Why are fixed positions for nodes in a plotly sankey graph being overridden or ignored?


I am seeking to make a large set of sankey graphs for flows between 5 nodes at Time1 and 5 nodes at Time2. I would like the nodes to be drawn in the same order every time, no matter the size of the nodes or flows. However, some of these graphs are drawn with nodes out of order. I attempted to dynamically calculate intended node.y positions, but they appear to be getting overridden or ignored.

The following code will produce four figures:

  • fig1 has data that results in out of order nodes.
  • fig2 has data that results in well-ordered nodes.
  • fig3 uses the fig1 data and includes an attempt to force the node.y positions but fails and looks just like fig1.
  • fig4 uses fig2 data and manual node.y positions that successfully swap the order of Node 8 and Node 6
library(plotly)
library(tidyverse)

my_labels <-
  c(
    "Node 0",
    "Node 1",
    "Node 2",
    "Node 3",
    "Node 4",
    "Node 5",
    "Node 6",
    "Node 7",
    "Node 8",
    "Node 9"
  )

# Uses original data, which includes some flows much larger than others
source_ids <-
  c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4)
target_ids <-
  c(5, 6, 7, 8, 9, 5, 6, 7, 8, 9, 5, 6, 7, 8, 9, 5, 6, 7, 8, 9, 5, 6, 7, 8, 9)
values1 <-
  c(60, 23, 1, 0, 9, 15, 33, 13, 4, 3, 0, 9, 8, 2, 1, 0, 4, 12, 127, 9, 4, 4, 1, 11, 1)

my_test_data1 <- data.frame(source_ids, target_ids, values1)

fig1 <- plot_ly(
  type = "sankey",
  arrangement = "snap",
  node = list(
    label = my_labels,
    pad = 10), # 10 Pixel
  link = list(
    source = my_test_data1$source_ids,
    target = my_test_data1$target_ids,
    value = my_test_data1$values1))

fig1 <- fig1 %>%
  layout(
    title = list(
      text = "fig1"
    )
  )

# Nodes do not appear in intended order. Node 3, the largest node, appears below
# Node 4, and the right hand nodes are also out of order.
fig1

# Build a new set of test data with even, identical flows
values2 <- rep(10, times = 25)
my_test_data2 <- data.frame(source_ids, target_ids, values2)

fig2 <- plot_ly(
  type = "sankey",
  arrangement = "snap",
  node = list(
    label = my_labels,
    pad = 10), 
  link = list(
    source = my_test_data2$source_ids,
    target = my_test_data2$target_ids,
    value = my_test_data2$values2))

fig2 <- fig2 %>%
  layout(
    title = list(
      text = "fig2"
    )
  )

# Displays nodes in intended order
fig2

# Second attempt of original data. First try dynamically determining node.y
# values that are intended to force arrange nodes in intended order.

label_pos_dfs <-
  list(
    
    # Label positions of source node labels
    my_test_data1 %>%
      group_by(source_ids) %>%
      summarize(n = sum(values1)) %>%
      rename(node.name = source_ids) %>%
      mutate(label.pos = 1 - (cumsum(n) - n/2) / sum(n)),
    
    # Label positions of target node labels
    my_test_data1 %>%
      group_by(target_ids) %>%
      summarize(n = sum(values1)) %>%
      rename(node.name = target_ids) %>%
      mutate(label.pos = 1 - (cumsum(n) - n/2) / sum(n))
  )

my_node_label_y_positions <- 
  lapply(label_pos_dfs, "[", "label.pos") %>% 
  bind_rows() %>% 
  pull(label.pos) 

fig3 <- plot_ly(
  type = "sankey",
  arrangement = "snap",
  node = list(
    label = my_labels,
    
    x = c(0,0,0,0,0,1,1,1,1,1),
    y = my_node_label_y_positions,
    
    pad = 10), 
  link = list(
    source = my_test_data1$source_ids,
    target = my_test_data1$target_ids,
    value = my_test_data1$values))

fig3 <- fig3 %>%
  layout(
    title = list(
      text = "fig3"
    )
  )

# Nodes do not appear in intended order. Just like in fig1, Node 3, the largest
# node, appears below Node 4, and the right hand nodes are also out of order.
fig3

fig4 <- plot_ly(
  type = "sankey",
  arrangement = "snap",
  node = list(
    label = my_labels,
    x = c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1),
    y = c(1,     # Node 0 in top position
          0.7,   # Node 1 in second position
          0.5,   # Node 2 in third/middle position
          0.3,   # Node 3 in fourth position
          0.1,   # Node 4 in bottom/fifth position
          1,     # Node 5 in top position
          0.3,   # Node 6 in fourth position
          0.5,   # Node 7 in middle position
          0.7,   # Node 8 in second position
          0.2),  # Node 9 in bottom position
    pad = 10), 
  link = list(
    source = my_test_data2$source_ids,
    target = my_test_data2$target_ids,
    value = my_test_data2$values2))

fig4 <- fig4 %>%
  layout(
    title = list(
      text = "fig4"
    )
  )

# Displays nodes in intended swapped order, with Node 8 and Node 6 switched. All
# nodes 5 through 9 do seem to drag lower for an unknown reason.
fig4

Solution

  • I found a solution after looking into open issues on github. Apparently, node.x and node.y cannot be equal to 0: https://github.com/plotly/plotly.py/issues/3002

    I'm not sure why, once that problem was solved, the dynamically created y positions now resulted in reverse of intended order. I supposed they are counted from the top rather than the bottom?

    
    fig5 <- plot_ly(
      type = "sankey",
      arrangement = "snap",
      node = list(
        label = my_labels,
    
        # Nodes cannot be positioned at 0 or 1!
        x = c(1e-09, 1e-09, 1e-09, 1e-09, 1e-09, 0.99, 0.99, 0.99, 0.99, 0.99),
    
        # My dynamic label positioning, while it seemed to give the right output to me,
        # actually landed them in reversed order, so they are unreversed here with *-1 + 1
        y = my_node_label_y_positions * -1 + 1,
        
        pad = 5), 
      link = list(
        source = my_test_data1$source_ids,
        target = my_test_data1$target_ids,
        value = my_test_data1$values))
    
    fig5 <- fig5 %>%
      layout(
        title = list(
          text = "fig5"
        )
      )
    
    # Nodes DO appear in the intended order, at last!!! 
    fig5