Search code examples
pythonplotlyplotly-dash

Reproducing a treemap in Plotly from a repository of dash samples


I want to reproduce a treemap in Plotly from a repository of dash samples.

enter image description here

The sample code is as follows:

import pandas as pd
from dash import Dash, Input, Output, callback, dcc, html, State
import plotly.express as px
import dash_bootstrap_components as dbc

df = pd.read_table(
    "https://raw.githubusercontent.com/plotly/datasets/master/global_super_store_orders.tsv"
)

df["profit_derived"] = df["Profit"].str.replace(",", ".").astype("float")
df["ship_date"] = pd.to_datetime(df["Ship Date"])

# Hierarchical charts (sunburst, treemap, etc.) work only with positive aggregate values
# In this step, we ensure that aggregated values will be positive
df = df.query(expr="profit_derived >= 0")

df = df[["profit_derived", "Segment", "Region", "ship_date"]]

app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

app.layout = dbc.Container(
    [
        html.H4(
            "Distribution of profit as per business segment and region",
            style={"textAlign": "center"},
            className="mb-3",
        ),
        # ------------------------------------------------- #
        # Modal
        html.Div(
            [
                dbc.Button("Open modal", id="open", n_clicks=0),
                dbc.Modal(
                    [
                        dbc.ModalHeader(dbc.ModalTitle("Filters")),
                        dbc.ModalBody(
                            [
                                # Filter within dbc Modal
                                html.Label("Regions"),
                                dcc.Dropdown(
                                    id="dynamic_callback_dropdown_region",
                                    options=[
                                        {"label": x, "value": x}
                                        for x in sorted(df["Region"].unique())
                                    ],
                                    multi=True,
                                ),
                                html.Br(),
                                html.Label("Ship Date"),
                                dcc.DatePickerRange(
                                    id="my-date-picker-range",
                                    min_date_allowed=min(df["ship_date"]),
                                    max_date_allowed=max(df["ship_date"]),
                                    end_date=max(df["ship_date"]),
                                    start_date=min(df["ship_date"]),
                                    clearable=True,
                                ),
                            ]
                        ),
                    ],
                    id="modal",
                    is_open=False,
                ),
            ],
            className="mb-5",
        ),
        # ---------------------------------------- #
        # Tabs
        dcc.Tabs(
            id="tab",
            value="treemap",
            children=[
                dcc.Tab(label="Treemap", value="treemap"),
                dcc.Tab(label="Sunburst", value="sunburst"),
            ],
        ),
        html.Div(id="tabs-content"),
    ],
    fluid=True,
)


@callback(
    Output("tabs-content", "children"),
    Input("dynamic_callback_dropdown_region", "value"),
    Input("tab", "value"),
    Input("my-date-picker-range", "start_date"),
    Input("my-date-picker-range", "end_date"),
)
def main_callback_logic(region, tab, start_date, end_date):
    dff = df.copy()

    if region is not None and len(region) > 0:
        dff = dff.query("Region == @region")

    if start_date is not None:
        dff = dff.query("ship_date > @start_date")

    if end_date is not None:
        dff = dff.query("ship_date < @end_date")

    dff = dff.groupby(by=["Segment", "Region"]).sum().reset_index()

    if tab == "treemap":
        fig = px.treemap(
            dff, path=[px.Constant("all"), "Segment", "Region"], values="profit_derived"
        )
    else:
        fig = px.sunburst(
            dff, path=[px.Constant("all"), "Segment", "Region"], values="profit_derived"
        )

    fig.update_traces(root_color="lightgrey")
    fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))

    return dcc.Graph(figure=fig)


@callback(
    Output("modal", "is_open"),
    Input("open", "n_clicks"),
    State("modal", "is_open"),
)
def toggle_modal(n1, is_open):
    if n1:
        return not is_open
    return is_open


if __name__ == "__main__":
    app.run_server()

However, when I run the code, it does not display the example correctly.

enter image description here

Before accessing the dashboard on my localhost, the following output is printed to the console:

Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.

When I access the dashboard on my localhost, additional output is printed to the console.

How do I reproduce the dash example correctly?


Solution

  • The code works in pandas 1.x but not in pandas 2.x. This is because previously the numeric_only parameter of DataFrameGroupBy.sum had True as default value, so the columns that were not of numeric type were discarded.

    In fact, this already generates a FutureWarning in Pandas 1.5, but in Pandas 2 the default value is now False and directly generates an exception if any of the columns is not of numerical type, as the ship_date column in this case (datetime64 dtype).

    The error is in line 102:

    dff = dff.groupby(by=["Segment", "Region"]).sum().reset_index()
    

    Either, specify numeric_only=True:

    dff = dff.groupby(by=["Segment", "Region"]).sum(numeric_only=True).reset_index()
    

    Or select only columns with numeric types:

    dff = dff.groupby(by=["Segment", "Region"])["profit_derived"].sum().reset_index()
    

    As for the other warning, if you want it to disappear, change line 11:

    df["ship_date"] = pd.to_datetime(df["Ship Date"])
    

    by

    df["ship_date"] = pd.to_datetime(df["Ship Date"], format='mixed')
    

    The full code:

    import pandas as pd
    from dash import Dash, Input, Output, callback, dcc, html, State
    import plotly.express as px
    import dash_bootstrap_components as dbc
    
    df = pd.read_table(
        "https://raw.githubusercontent.com/plotly/datasets/master/global_super_store_orders.tsv"
    )
    
    df["profit_derived"] = df["Profit"].str.replace(",", ".").astype("float")
    df["ship_date"] = pd.to_datetime(df["Ship Date"], format='mixed')
    
    # Hierarchical charts (sunburst, treemap, etc.) work only with positive aggregate values
    # In this step, we ensure that aggregated values will be positive
    df = df.query(expr="profit_derived >= 0")
    
    df = df[["profit_derived", "Segment", "Region", "ship_date"]]
    
    app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
    
    app.layout = dbc.Container(
        [
            html.H4(
                "Distribution of profit as per business segment and region",
                style={"textAlign": "center"},
                className="mb-3",
            ),
            # ------------------------------------------------- #
            # Modal
            html.Div(
                [
                    dbc.Button("Open modal", id="open", n_clicks=0),
                    dbc.Modal(
                        [
                            dbc.ModalHeader(dbc.ModalTitle("Filters")),
                            dbc.ModalBody(
                                [
                                    # Filter within dbc Modal
                                    html.Label("Regions"),
                                    dcc.Dropdown(
                                        id="dynamic_callback_dropdown_region",
                                        options=[
                                            {"label": x, "value": x}
                                            for x in sorted(df["Region"].unique())
                                        ],
                                        multi=True,
                                    ),
                                    html.Br(),
                                    html.Label("Ship Date"),
                                    dcc.DatePickerRange(
                                        id="my-date-picker-range",
                                        min_date_allowed=min(df["ship_date"]),
                                        max_date_allowed=max(df["ship_date"]),
                                        end_date=max(df["ship_date"]),
                                        start_date=min(df["ship_date"]),
                                        clearable=True,
                                    ),
                                ]
                            ),
                        ],
                        id="modal",
                        is_open=False,
                    ),
                ],
                className="mb-5",
            ),
            # ---------------------------------------- #
            # Tabs
            dcc.Tabs(
                id="tab",
                value="treemap",
                children=[
                    dcc.Tab(label="Treemap", value="treemap"),
                    dcc.Tab(label="Sunburst", value="sunburst"),
                ],
            ),
            html.Div(id="tabs-content"),
        ],
        fluid=True,
    )
    
    
    @callback(
        Output("tabs-content", "children"),
        Input("dynamic_callback_dropdown_region", "value"),
        Input("tab", "value"),
        Input("my-date-picker-range", "start_date"),
        Input("my-date-picker-range", "end_date"),
    )
    def main_callback_logic(region, tab, start_date, end_date):
        dff = df.copy()
    
        if region is not None and len(region) > 0:
            dff = dff.query("Region == @region")
    
        if start_date is not None:
            dff = dff.query("ship_date > @start_date")
    
        if end_date is not None:
            dff = dff.query("ship_date < @end_date")
    
        dff = dff.groupby(by=["Segment", "Region"]).sum(numeric_only=True).reset_index()
    
        if tab == "treemap":
            fig = px.treemap(
                dff, path=[px.Constant("all"), "Segment", "Region"], values="profit_derived"
            )
        else:
            fig = px.sunburst(
                dff, path=[px.Constant("all"), "Segment", "Region"], values="profit_derived"
            )
    
        fig.update_traces(root_color="lightgrey")
        fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))
    
        return dcc.Graph(figure=fig)
    
    
    @callback(
        Output("modal", "is_open"),
        Input("open", "n_clicks"),
        State("modal", "is_open"),
    )
    def toggle_modal(n1, is_open):
        if n1:
            return not is_open
        return is_open
    
    
    if __name__ == "__main__":
        app.run_server()