I want to reproduce a treemap in Plotly from a repository of dash samples.
The sample code is as follows:
import pandas as pd
from dash import Dash, Input, Output, callback, dcc, html, State
import plotly.express as px
import dash_bootstrap_components as dbc
df = pd.read_table(
"https://raw.githubusercontent.com/plotly/datasets/master/global_super_store_orders.tsv"
)
df["profit_derived"] = df["Profit"].str.replace(",", ".").astype("float")
df["ship_date"] = pd.to_datetime(df["Ship Date"])
# Hierarchical charts (sunburst, treemap, etc.) work only with positive aggregate values
# In this step, we ensure that aggregated values will be positive
df = df.query(expr="profit_derived >= 0")
df = df[["profit_derived", "Segment", "Region", "ship_date"]]
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.layout = dbc.Container(
[
html.H4(
"Distribution of profit as per business segment and region",
style={"textAlign": "center"},
className="mb-3",
),
# ------------------------------------------------- #
# Modal
html.Div(
[
dbc.Button("Open modal", id="open", n_clicks=0),
dbc.Modal(
[
dbc.ModalHeader(dbc.ModalTitle("Filters")),
dbc.ModalBody(
[
# Filter within dbc Modal
html.Label("Regions"),
dcc.Dropdown(
id="dynamic_callback_dropdown_region",
options=[
{"label": x, "value": x}
for x in sorted(df["Region"].unique())
],
multi=True,
),
html.Br(),
html.Label("Ship Date"),
dcc.DatePickerRange(
id="my-date-picker-range",
min_date_allowed=min(df["ship_date"]),
max_date_allowed=max(df["ship_date"]),
end_date=max(df["ship_date"]),
start_date=min(df["ship_date"]),
clearable=True,
),
]
),
],
id="modal",
is_open=False,
),
],
className="mb-5",
),
# ---------------------------------------- #
# Tabs
dcc.Tabs(
id="tab",
value="treemap",
children=[
dcc.Tab(label="Treemap", value="treemap"),
dcc.Tab(label="Sunburst", value="sunburst"),
],
),
html.Div(id="tabs-content"),
],
fluid=True,
)
@callback(
Output("tabs-content", "children"),
Input("dynamic_callback_dropdown_region", "value"),
Input("tab", "value"),
Input("my-date-picker-range", "start_date"),
Input("my-date-picker-range", "end_date"),
)
def main_callback_logic(region, tab, start_date, end_date):
dff = df.copy()
if region is not None and len(region) > 0:
dff = dff.query("Region == @region")
if start_date is not None:
dff = dff.query("ship_date > @start_date")
if end_date is not None:
dff = dff.query("ship_date < @end_date")
dff = dff.groupby(by=["Segment", "Region"]).sum().reset_index()
if tab == "treemap":
fig = px.treemap(
dff, path=[px.Constant("all"), "Segment", "Region"], values="profit_derived"
)
else:
fig = px.sunburst(
dff, path=[px.Constant("all"), "Segment", "Region"], values="profit_derived"
)
fig.update_traces(root_color="lightgrey")
fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))
return dcc.Graph(figure=fig)
@callback(
Output("modal", "is_open"),
Input("open", "n_clicks"),
State("modal", "is_open"),
)
def toggle_modal(n1, is_open):
if n1:
return not is_open
return is_open
if __name__ == "__main__":
app.run_server()
However, when I run the code, it does not display the example correctly.
Before accessing the dashboard on my localhost, the following output is printed to the console:
Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.
When I access the dashboard on my localhost, additional output is printed to the console.
How do I reproduce the dash example correctly?
The code works in pandas 1.x but not in pandas 2.x. This is because previously the numeric_only
parameter of DataFrameGroupBy.sum
had True
as default value, so the columns that were not of numeric type were discarded.
In fact, this already generates a FutureWarning in Pandas 1.5, but in Pandas 2 the default value is now False
and directly generates an exception if any of the columns is not of numerical type, as the ship_date
column in this case (datetime64
dtype).
The error is in line 102:
dff = dff.groupby(by=["Segment", "Region"]).sum().reset_index()
Either, specify numeric_only=True
:
dff = dff.groupby(by=["Segment", "Region"]).sum(numeric_only=True).reset_index()
Or select only columns with numeric types:
dff = dff.groupby(by=["Segment", "Region"])["profit_derived"].sum().reset_index()
As for the other warning, if you want it to disappear, change line 11:
df["ship_date"] = pd.to_datetime(df["Ship Date"])
by
df["ship_date"] = pd.to_datetime(df["Ship Date"], format='mixed')
The full code:
import pandas as pd
from dash import Dash, Input, Output, callback, dcc, html, State
import plotly.express as px
import dash_bootstrap_components as dbc
df = pd.read_table(
"https://raw.githubusercontent.com/plotly/datasets/master/global_super_store_orders.tsv"
)
df["profit_derived"] = df["Profit"].str.replace(",", ".").astype("float")
df["ship_date"] = pd.to_datetime(df["Ship Date"], format='mixed')
# Hierarchical charts (sunburst, treemap, etc.) work only with positive aggregate values
# In this step, we ensure that aggregated values will be positive
df = df.query(expr="profit_derived >= 0")
df = df[["profit_derived", "Segment", "Region", "ship_date"]]
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.layout = dbc.Container(
[
html.H4(
"Distribution of profit as per business segment and region",
style={"textAlign": "center"},
className="mb-3",
),
# ------------------------------------------------- #
# Modal
html.Div(
[
dbc.Button("Open modal", id="open", n_clicks=0),
dbc.Modal(
[
dbc.ModalHeader(dbc.ModalTitle("Filters")),
dbc.ModalBody(
[
# Filter within dbc Modal
html.Label("Regions"),
dcc.Dropdown(
id="dynamic_callback_dropdown_region",
options=[
{"label": x, "value": x}
for x in sorted(df["Region"].unique())
],
multi=True,
),
html.Br(),
html.Label("Ship Date"),
dcc.DatePickerRange(
id="my-date-picker-range",
min_date_allowed=min(df["ship_date"]),
max_date_allowed=max(df["ship_date"]),
end_date=max(df["ship_date"]),
start_date=min(df["ship_date"]),
clearable=True,
),
]
),
],
id="modal",
is_open=False,
),
],
className="mb-5",
),
# ---------------------------------------- #
# Tabs
dcc.Tabs(
id="tab",
value="treemap",
children=[
dcc.Tab(label="Treemap", value="treemap"),
dcc.Tab(label="Sunburst", value="sunburst"),
],
),
html.Div(id="tabs-content"),
],
fluid=True,
)
@callback(
Output("tabs-content", "children"),
Input("dynamic_callback_dropdown_region", "value"),
Input("tab", "value"),
Input("my-date-picker-range", "start_date"),
Input("my-date-picker-range", "end_date"),
)
def main_callback_logic(region, tab, start_date, end_date):
dff = df.copy()
if region is not None and len(region) > 0:
dff = dff.query("Region == @region")
if start_date is not None:
dff = dff.query("ship_date > @start_date")
if end_date is not None:
dff = dff.query("ship_date < @end_date")
dff = dff.groupby(by=["Segment", "Region"]).sum(numeric_only=True).reset_index()
if tab == "treemap":
fig = px.treemap(
dff, path=[px.Constant("all"), "Segment", "Region"], values="profit_derived"
)
else:
fig = px.sunburst(
dff, path=[px.Constant("all"), "Segment", "Region"], values="profit_derived"
)
fig.update_traces(root_color="lightgrey")
fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))
return dcc.Graph(figure=fig)
@callback(
Output("modal", "is_open"),
Input("open", "n_clicks"),
State("modal", "is_open"),
)
def toggle_modal(n1, is_open):
if n1:
return not is_open
return is_open
if __name__ == "__main__":
app.run_server()