python aggregate plotly-python choropleth

How to combine a mapping with aggregates with (and without) animation by years and still colorize land with missing values?

I'm trying to add an aggregate function to my choropleth. On the latter I had managed, thanks to @RobRaymond, to obtain an animation by year while displaying the countries with a missing value with their names.

On the Plotly site [https://plotly.com/python/aggregations/] I saw that we could obtain a mapping with aggregates.

I tried to add it to my code but I can't get my expected result. What did I miss?

My code:

import plotly.io as pio
import plotly.graph_objects as go
import plotly.express as px
import geopandas as gpd
import pandas as pd
import numpy as np

# need to know countries that make up natural earth...
world = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))

data = {"Country helped": 
        ['Afghanistan', 'Algérie', 'Angola', 'Bangladesh', 'Bénin',
       'Bhoutan', 'Bolivie (État plurinational de)', 'Burkina Faso',
       'Burundi', 'Cambodge', 'Cameroun', 'Colombie', 'Comores', 'Congo',
       "Côte d'Ivoire", 'Cuba', 'Djibouti', 'Égypte', 'El Salvador',
       'Équateur', 'Éthiopie', 'Gambie', 'Géorgie', 'Ghana', 'Guatemala',
       'Guinée', 'Guinée-Bissau', 'Haïti', 'Honduras',
       "Iran (République islamique d')", 'Iraq', 'Jordanie', 'Kenya',
       'Kirghizistan', 'Lesotho', 'Liban', 'Libéria', 'Libye',
       'Madagascar', 'Malawi', 'Mali', 'Mauritanie', 'Mozambique',
       'Myanmar', 'Népal', 'Nicaragua', 'Niger', 'Ouganda', 'Pakistan',
       'Palestine', 'Philippines', 'République arabe syrienne',
       'République centrafricaine', 'République démocratique du Congo',
       'République démocratique populaire lao', 'République dominicaine',
       'République populaire démocratique de Corée',
       'République-Unie de Tanzanie', 'Rwanda', 'Sao Tomé-et-Principe',
       'Sénégal', 'Sierra Leone', 'Somalie', 'Soudan', 'Soudan du Sud',
       'Sri Lanka', 'Tadjikistan', 'Tchad', 'Timor-Leste', 'Togo',
       'Vanuatu', 'Yémen', 'Zambie', 'Zimbabwe'],
        "Code zone (ISO3)": 
        ['AFG', 'DZA', 'AGO', 'BGD', 'BEN', 'BTN', 'BOL', 'BFA', 'BDI',
       'KHM', 'CMR', 'COL', 'COM', 'COG', 'CIV', 'CUB', 'DJI', 'EGY',
       'SLV', 'ECU', 'ETH', 'GMB', 'GEO', 'GHA', 'GTM', 'GIN', 'GNB',
       'HTI', 'HND', 'IRN', 'IRQ', 'JOR', 'KEN', 'KGZ', 'LSO', 'LBN',
       'LBR', 'LBY', 'MDG', 'MWI', 'MLI', 'MRT', 'MOZ', 'MMR', 'NPL',
       'NIC', 'NER', 'UGA', 'PAK', 'PSE', 'PHL', 'SYR', 'CAF', 'COD',
       'LAO', 'DOM', 'PRK', 'TZA', 'RWA', 'STP', 'SEN', 'SLE', 'SOM',
       'SDN', 'SSD', 'LKA', 'TJK', 'TCD', 'TLS', 'TGO', 'VUT', 'YEM',
       'ZMB', 'ZWE'],
       "Product":
        ['Autres non-céréales', 'Blé et Farin', 'Céréales',
       'Fruits secs, total', 'Huiles végétales',
       'Légumineuses Sèches,Tot.', 'Non-céréales', 'Riz, total',
       'Sucre, total', 'Céréales Secondaires', 'Lait, total',
       'Mélanges et préparations', 'Poiss&produi', 'BulgurFarEnt',
       'Viande Total', 'Graisses Com'],
        "Year": 
        ['2013', '2014', '2015', '2016'],
        "Quantity of donated products": np.random.random_integers(90, 40000, 65)
        }

df = pd.DataFrame(data, columns = ["Country helped", "Code zone (ISO3)",
                                   "Product", "Année", "Quantity of donated products"])



aggs = ["count","sum","avg","median","mode","rms","stddev","min","max","first","last"]

agg = []
agg_func = []
for i in range(0, len(aggs)):
    agg = dict(
        args=['transforms[0].aggregations[0].func', aggs[i]],
        label=aggs[i],
        method='restyle'
    )
    agg_func.append(agg)



# Utilisation de plotly.express pour l'animation
data = [dict(df,
             type = 'choropleth',
             locations = df["Code zone (ISO3)"],
             z = df["Quantity of donated products"],
             color="z",
             color_continuous_scale="earth_r",
             animation_frame=df["Year"],
             hover_name="Country helped",
             hover_data=["Product"],
             transforms = [dict(
                 type = 'aggregate',
                 groups = df["Code zone (ISO3)"],
                 aggregations = [dict(
                     target = 'z', func = 'sum', enabled = True)
                                ]
             )]
            )
       ]

layout = dict(
    autosize=True,
    title = "<b>Evolution of food aid in the world from 2013 to 2016</b><br>use dropdown to change aggregation",
    yaxis = dict(title = 'Score', range = [0,22]),
    geo=dict(
        landcolor="lightgray",
        showland=True,
        showcountries=True,
        countrycolor="gray",
        countrywidth=0.5,
        showframe=False,
        showcoastlines=False,
        projection_type="natural earth",
        showocean=True, oceancolor="lightBlue",
        showlakes=True, lakecolor="lightblue",
        showrivers=True, rivercolor="lightblue"
    ),
    annotations=[
        dict(
            x=0.55,
            y=0.15,
            xref="paper",
            yref="paper",
            text='Source: <a href="https://www.fao.org/faostat/fr/#data">\
            FAO</a>',
            showarrow=False
        )
    ],
    updatemenus = [dict(
        x = 0.95,
        y = 1.10,
        xref = 'paper',
        yref = 'paper',
        yanchor = 'top',
        active = 1,
        showactive = False,
        buttons = agg_func
    )],
    coloraxis2={"colorscale": [[0, "lightgray"], [1, "lightgray"]], "showscale": False}
)


fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 2000
fig.layout.updatemenus[0].buttons[0].args[1]['transition']['duration'] = 1000

# update each of the animation frames with missing countries
for fr in fig.frames:
    tr_missing = (
        px.choropleth(
            world.loc[~world["iso_a3"].isin(fr.data[0]["locations"]), "iso_a3"]
            .to_frame()
            .assign(color=1),
            color="color",
            locations="iso_a3"            
        )
        .update_traces(hovertemplate="%{location} : Datas manquantes",
                       coloraxis="coloraxis2"
                      )
        .data[0]
    )
    fr.update(data=[fr.data[0], tr_missing])

# re-construct the figure...
fig_dict = go.Figure(data=fig.frames[0].data, layout=fig.layout, frames=fig.frames)

pio.show(fig_dict, validate=True)

I didn't find how to just additional missing trace gets appended to trace in data if i want to take off the animation_frame=df["Year"]

Edit 2:

provide data sample as text
translate columns in english
trying to make my code easier to test
add a question

Edit 1:

The plot i get

Here is a sample of df_world_aide_alim

A second sample: df_world_aide_alim.head(25)

The dropdown to change aggregation i tried to combine

Solution

the code supplied to create a dataframe fails. Providing a dict to pd.DataFrame() all the lists need to be same length. Have synthesized a dataframe from this dict creating all lists of same length (plus as a side effect of this, make sure country name and code are consistent)
have used pandas named aggregations to create all of the analytics you want. Note there are a few that don't map 1:1 between deprecated plotly names and approach needed in pandas
within build_fig() now have data frame dfa (with list of aggregations and no animation)

Code zone (ISO3)	count	sum	avg	median	mode	rms	stddev	min	max	first	last	Country helped	Product
AFG	3	46647	15549	12358	5086	32115.5	12371.1	5086	29203	12358	29203	Afghanistan	['Huiles végétales', 'Céréales', 'Sucre, total']
AGO	5	75067	15013.4	14357	2187	38317.9	9236.46	2187	26697	14357	26697	Angola	['Riz, total', 'Fruits secs, total', 'Céréales Secondaires', 'Poiss&produi']

clearly from this structure a trace can be created from each of the columns. By default make first one visible (count)
add missing countries trace
create updatemenus to control trace visibility based on selection which missing always being visible

build_fig(df)

animation is similar. Without using dash as well I don't see an option of having multiple analytics per frame selected from drop down. Hence example of animation and one analytic. Clearly for animation you need another dimension in data, year has been used for this.

build_fig(df, aggs="rms", group=["Code zone (ISO3)", "Year"])

embedded list more sophisticated aggregate function

build_fig(
    df,
    agg_cols={
        "Country helped": ("Country helped", "first"),
        "Product": (
            "Product",
            lambda s: df.loc[s.index]
            .sort_values("Quantity of donated products", ascending=False)
            .loc[:, "Product"]
            .head(3),
        ),
    },
)

full code

import plotly.io as pio
import plotly.graph_objects as go
import plotly.express as px
import geopandas as gpd
import pandas as pd
import numpy as np
import math


# fmt: off
data = {"Country helped": 
        ['Afghanistan', 'Algérie', 'Angola', 'Bangladesh', 'Bénin',
       'Bhoutan', 'Bolivie (État plurinational de)', 'Burkina Faso',
       'Burundi', 'Cambodge', 'Cameroun', 'Colombie', 'Comores', 'Congo',
       "Côte d'Ivoire", 'Cuba', 'Djibouti', 'Égypte', 'El Salvador',
       'Équateur', 'Éthiopie', 'Gambie', 'Géorgie', 'Ghana', 'Guatemala',
       'Guinée', 'Guinée-Bissau', 'Haïti', 'Honduras',
       "Iran (République islamique d')", 'Iraq', 'Jordanie', 'Kenya',
       'Kirghizistan', 'Lesotho', 'Liban', 'Libéria', 'Libye',
       'Madagascar', 'Malawi', 'Mali', 'Mauritanie', 'Mozambique',
       'Myanmar', 'Népal', 'Nicaragua', 'Niger', 'Ouganda', 'Pakistan',
       'Palestine', 'Philippines', 'République arabe syrienne',
       'République centrafricaine', 'République démocratique du Congo',
       'République démocratique populaire lao', 'République dominicaine',
       'République populaire démocratique de Corée',
       'République-Unie de Tanzanie', 'Rwanda', 'Sao Tomé-et-Principe',
       'Sénégal', 'Sierra Leone', 'Somalie', 'Soudan', 'Soudan du Sud',
       'Sri Lanka', 'Tadjikistan', 'Tchad', 'Timor-Leste', 'Togo',
       'Vanuatu', 'Yémen', 'Zambie', 'Zimbabwe'],
        "Code zone (ISO3)": 
        ['AFG', 'DZA', 'AGO', 'BGD', 'BEN', 'BTN', 'BOL', 'BFA', 'BDI',
       'KHM', 'CMR', 'COL', 'COM', 'COG', 'CIV', 'CUB', 'DJI', 'EGY',
       'SLV', 'ECU', 'ETH', 'GMB', 'GEO', 'GHA', 'GTM', 'GIN', 'GNB',
       'HTI', 'HND', 'IRN', 'IRQ', 'JOR', 'KEN', 'KGZ', 'LSO', 'LBN',
       'LBR', 'LBY', 'MDG', 'MWI', 'MLI', 'MRT', 'MOZ', 'MMR', 'NPL',
       'NIC', 'NER', 'UGA', 'PAK', 'PSE', 'PHL', 'SYR', 'CAF', 'COD',
       'LAO', 'DOM', 'PRK', 'TZA', 'RWA', 'STP', 'SEN', 'SLE', 'SOM',
       'SDN', 'SSD', 'LKA', 'TJK', 'TCD', 'TLS', 'TGO', 'VUT', 'YEM',
       'ZMB', 'ZWE'],
       "Product":
        ['Autres non-céréales', 'Blé et Farin', 'Céréales',
       'Fruits secs, total', 'Huiles végétales',
       'Légumineuses Sèches,Tot.', 'Non-céréales', 'Riz, total',
       'Sucre, total', 'Céréales Secondaires', 'Lait, total',
       'Mélanges et préparations', 'Poiss&produi', 'BulgurFarEnt',
       'Viande Total', 'Graisses Com'],
        "Year": 
        ['2013', '2014', '2015', '2016'],
        "Quantity of donated products": np.random.randint(90, 40000, 500)
        }
# fmt: on

# this fails !!!
# df = pd.DataFrame(data, columns = ["Country helped", "Code zone (ISO3)",
#                                    "Product", "Année", "Quantity of donated products"])

# provided data is ragged, different lengths for each column, make all arrays same length for data frame
df = (
    pd.DataFrame(
        {
            k: data[k] if len(data[k]) == c else np.random.choice(data[k], c)
            for k in data.keys()
            if (c := max({k: len(data[k]) for k in data.keys()}.values()))
        }
    )
    .drop(columns=["Country helped"])
    .merge(
        pd.DataFrame(
            {
                k: v
                for k, v in data.items()
                if k in ["Country helped", "Code zone (ISO3)"]
            }
        ),
        on="Code zone (ISO3)",
        how="inner",
    )
)

aggs = [
    "count",
    "sum",
    "avg",
    "median",
    "mode",
    "rms",
    "stddev",
    "min",
    "max",
    "first",
    "last",
]


def build_fig(df,
    aggs=aggs,
    analytical_col="Quantity of donated products",
    group=["Code zone (ISO3)"],
    agg_cols={
        "Country helped": ("Country helped", "first"),
        "Product": ("Product", lambda s: s.unique().tolist()),
    },
):
    # make sure argument permutations make sense
    assert (len(group) == 1 and isinstance(aggs, list)) or (
        len(group) == 2 and isinstance(aggs, str)
    )
    
    # need to know countries that make up natural earth...
    world = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))

    # pandas aggregate functions don't map 1:1 to deprecated plotly functions
    override = {
        "avg": "mean",
        "mode": lambda s: s.mode().iloc[0],
        "rms": lambda s: math.sqrt((s ** 2).sum()),
        "stddev": "std",
    }

    # create dataframe with all wanted aggregates as columns, plus keep country name and list of products
    dfa = df.groupby(group, as_index=False).agg(
        **{
            **{
                agg: (
                    analytical_col,
                    agg if agg not in override.keys() else override[agg],
                )
                for agg in (aggs if isinstance(aggs, list) else [aggs])
            },
            **agg_cols,
        }
    )

    # generate trace of missing countries to be included where necessary
    def missing(values):
        return (
            px.choropleth(
                world.loc[~world["iso_a3"].isin(values), "iso_a3"],
                locations="iso_a3",
            )
            .update_traces(
                name="missing",
                colorscale=[[0, "red"], [1, "red"]],
                showlegend=False,
                hovertemplate="missing: %{location}",
            )
            .data
        )

    # create a trace for each required aggregate
    if len(group) == 1:
        fig = go.Figure(
            [
                t.update(name=agg, visible=(isinstance(aggs, str) or aggs[0] == agg))
                for agg in (aggs if isinstance(aggs, list) else [aggs])
                for t in px.choropleth(
                    dfa, locations=group[0], color=agg, hover_data=list(agg_cols.keys())
                ).data
            ]
        )

        # add the missing layer
        fig.add_traces(missing(fig.data[0]["locations"]))

        # add the drop down menu...
        fig.update_layout(
            updatemenus=[
                {
                    "buttons": [
                        {
                            "label": agg,
                            "method": "restyle",
                            "args": [
                                {
                                    "visible": [
                                        t.name == agg or t.name == "missing"
                                        for t in fig.data
                                    ]
                                }
                            ],
                        }
                        for agg in aggs
                    ]
                }
            ]
        )

    # generate an animation, with only one aggregation
    else:
        fig = px.choropleth(
            dfa,
            locations=group[0],
            color=aggs,
            animation_frame=group[1],
            hover_data=list(agg_cols.keys()),
        )

        # update each of the animation frames with missing countries
        for fr in fig.frames:
            fr.update(data=[fr.data[0], missing(fr.data[0]["locations"])[0]])

        # re-construct the figure...
        fig = go.Figure(data=fig.frames[0].data, layout=fig.layout, frames=fig.frames)

    return fig