I have a similar issue to a question previously posted:
Plotly: How to set node positions in a Sankey Diagram?
..In which I need to get all my values which end in the same character to align in the same vertical column in my Sankey Diagram (There are three vertical columns in total, and I want (A) in the first, (B) in the second, and (C) in the third). There was an answer to this previous posting providing a custom function to assign nodes ending in the same character to the same destination, which I have modified to fit my dataset, as below:
# Extract list of nodes and list of Source / Target links from my_df DataFrame
all_nodes = my_df.Source.values.tolist() + my_df.Target.values.tolist()
values = my_df.Value.values.tolist()
source_indices = [all_nodes.index(source) for source in my_df.Source]
target_indices = [all_nodes.index(target) for target in my_df.Target]
label_names = all_nodes + my_df.Value.values.tolist()
print (label_names)
# Function to assign identical x-positions to label names that have a common ending ((A),(B),(C))
def nodify (node_names):
node_names = all_nodes
# unique name endings
ends = sorted(list(set([e[-2] for e in node_names])))
#intervals
steps = 0.5
# x-values for each unique name ending for input as node position
nodes_x = {}
xVal = 0.5
for e in ends:
nodes_x[str(e)] = xVal
xVal += steps
#x and y values in list form
x_values = [nodes_x[n[-2]] for n in node_names]
y_values = []
y_val = 0
for n in node_names:
y_values.append(y_val)
y_val+=.001
return x_values, y_values
nodified = nodify(node_names=all_nodes)
# Plot the Sankey Diagram from my_df with node destination control
fig = go.Figure(data=[go.Sankey(
arrangement='snap',
node = dict(
pad = 8,
thickness = 10,
line = dict(color = "black", width = 0.5),
label = all_nodes,
color = "blue",
x=nodified[0],
y=nodified[1]
),
# Add links
link = dict(
source = source_indices,
target = target_indices,
value = my_df.Value,
))])
fig.update_layout(title_text= "My Title",
font_size=10,
autosize=True,
height = 2000,
width = 2000
)
fig.show()
The destination assignment was not working for me at all, until I found an open GitHub issue (#3002) which indicated that Plotly does not like x and y coordinates set at 0, so I changed 'XVal' to start at 0.5 rather than 0, which snapped node destination mostly into place, with the exception of four (B) values still ending in the (C) column.
Is there anything I'm missing about the Plotly coordinate system or node destination in general that could help me understand why Plotly is continually overriding my node destination assignment for a handful of the total nodes?
Sample DataFrame:
0 1(A) 11(B) 6
1 1(A) 12(B) 2
2 1(A) 13(B) 20
3 1(A) 14(B) 1
4 1(A) 15(B) 1
5 1(A) 2(B) 17
6 1(A) 16(B) 5
7 1(A) 17(B) 9
8 1(A) 18(B) 6
9 1(A) 19(B) 5
10 1(A) 20(B) 255
11 1(A) 21(B) 1
12 1(A) 22(B) 9
13 1(A) 3(B) 200
14 1(A) 23(B) 1
15 1(A) 4(B) 1035
16 1(A) 24(B) 14
17 1(A) 25(B) 20
18 1(A) 26(B) 2
19 1(A) 27(B) 222
20 1(A) 28(B) 8
21 1(A) 29(B) 44
22 1(A) 5(B) 3
23 1(A) 6(B) 1529
24 1(A) 30(B) 1
25 1(A) 31(B) 2
26 1(A) 7(B) 6
27 1(A) 32(B) 1
28 1(A) 8(B) 10
29 1(A) 33(B) 11
30 1(A) 34(B) 35
31 1(A) 35(B) 1
32 1(A) 36(B) 41
33 1(A) 37(B) 6
34 1(A) 38(B) 4
35 1(A) 39(B) 2
36 1(A) 40(B) 68
37 1(A) 41(B) 46
38 1(A) 42(B) 24
39 1(A) 9(B) 21
40 1(A) 10(B) 13
41 1(A) 43(B) 6
42 2(B) 44(C) 12
43 3(B) 45(C) 19
44 4(B) 46(C) 1
45 5(B) 47(C) 6
46 6(B) 46(C) 2
47 6(B) 48(C) 1
48 6(B) 49(C) 1
49 7(B) 50(C) 84
50 8(B) 51(C) 2
51 9(B) 46(C) 4
52 10(B) 52(C) 2
53 10(B) 52(C) 2
54 10(B) 53(C) 8
55 10(B) 53(C) 8
56 10(B) 53(C) 12
57 10(B) 53(C) 20
58 10(B) 53(C) 10
59 10(B) 53(C) 4
Any help is appreciated!
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import itertools
S = 40
labels = [str(p + 1) + s for s, p in itertools.product(list("ABC"), range(5))]
df = pd.DataFrame(
{
"source": np.random.choice(labels, S),
"target": np.random.choice(labels, S),
"value": np.random.randint(1, 10, S),
}
)
# make sure paths are valid...
df = df.loc[df["source"].str[-1].apply(ord) < df["target"].str[-1].apply(ord)]
df = df.groupby(["source", "target"], as_index=False).sum()
def factorize(s):
a = pd.factorize(s, sort=True)[0]
return (a + 0.01) / (max(a) + 0.1)
# unique nodes
nodes = np.unique(df[["source", "target"]], axis=None)
nodes = pd.Series(index=nodes, data=range(len(nodes)))
# work out positioning of nodes
nodes = (
nodes.to_frame("id")
.assign(
x=lambda d: factorize(d.index.str[-1]),
y=lambda d: factorize(d.index.str[:-1]),
)
)
# now simple job of building sankey
fig = go.Figure(
go.Sankey(
arrangement="snap",
node={"label": nodes.index, "x": nodes["x"], "y": nodes["y"]},
link={
"source": nodes.loc[df["source"], "id"],
"target": nodes.loc[df["target"], "id"],
"value": df["value"],
},
)
)
fig
source | target | value |
---|---|---|
1A | 3C | 7 |
1B | 1C | 5 |
1B | 3C | 6 |
2A | 4B | 12 |
2B | 2C | 8 |
3A | 3C | 1 |
3B | 1C | 8 |
3B | 3C | 10 |
4A | 1B | 5 |
4B | 2C | 9 |
4B | 3C | 8 |
4B | 4C | 3 |
5A | 1B | 1 |
5A | 2C | 9 |
5A | 5B | 4 |
# work out positioning of nodes
nodes = (
nodes.to_frame("id")
.assign(
x=lambda d: factorize(d.index.str[-3:]),
y=lambda d: factorize(d.index.str[:-3]),
)
)