Search code examples
pythonaggregateplotly-pythonchoropleth

How to combine a mapping with aggregates with (and without) animation by years and still colorize land with missing values?


  1. I'm trying to add an aggregate function to my choropleth. On the latter I had managed, thanks to @RobRaymond, to obtain an animation by year while displaying the countries with a missing value with their names.

On the Plotly site [https://plotly.com/python/aggregations/] I saw that we could obtain a mapping with aggregates.

I tried to add it to my code but I can't get my expected result. What did I miss?

My code:

import plotly.io as pio
import plotly.graph_objects as go
import plotly.express as px
import geopandas as gpd
import pandas as pd
import numpy as np

# need to know countries that make up natural earth...
world = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))

data = {"Country helped": 
        ['Afghanistan', 'Algérie', 'Angola', 'Bangladesh', 'Bénin',
       'Bhoutan', 'Bolivie (État plurinational de)', 'Burkina Faso',
       'Burundi', 'Cambodge', 'Cameroun', 'Colombie', 'Comores', 'Congo',
       "Côte d'Ivoire", 'Cuba', 'Djibouti', 'Égypte', 'El Salvador',
       'Équateur', 'Éthiopie', 'Gambie', 'Géorgie', 'Ghana', 'Guatemala',
       'Guinée', 'Guinée-Bissau', 'Haïti', 'Honduras',
       "Iran (République islamique d')", 'Iraq', 'Jordanie', 'Kenya',
       'Kirghizistan', 'Lesotho', 'Liban', 'Libéria', 'Libye',
       'Madagascar', 'Malawi', 'Mali', 'Mauritanie', 'Mozambique',
       'Myanmar', 'Népal', 'Nicaragua', 'Niger', 'Ouganda', 'Pakistan',
       'Palestine', 'Philippines', 'République arabe syrienne',
       'République centrafricaine', 'République démocratique du Congo',
       'République démocratique populaire lao', 'République dominicaine',
       'République populaire démocratique de Corée',
       'République-Unie de Tanzanie', 'Rwanda', 'Sao Tomé-et-Principe',
       'Sénégal', 'Sierra Leone', 'Somalie', 'Soudan', 'Soudan du Sud',
       'Sri Lanka', 'Tadjikistan', 'Tchad', 'Timor-Leste', 'Togo',
       'Vanuatu', 'Yémen', 'Zambie', 'Zimbabwe'],
        "Code zone (ISO3)": 
        ['AFG', 'DZA', 'AGO', 'BGD', 'BEN', 'BTN', 'BOL', 'BFA', 'BDI',
       'KHM', 'CMR', 'COL', 'COM', 'COG', 'CIV', 'CUB', 'DJI', 'EGY',
       'SLV', 'ECU', 'ETH', 'GMB', 'GEO', 'GHA', 'GTM', 'GIN', 'GNB',
       'HTI', 'HND', 'IRN', 'IRQ', 'JOR', 'KEN', 'KGZ', 'LSO', 'LBN',
       'LBR', 'LBY', 'MDG', 'MWI', 'MLI', 'MRT', 'MOZ', 'MMR', 'NPL',
       'NIC', 'NER', 'UGA', 'PAK', 'PSE', 'PHL', 'SYR', 'CAF', 'COD',
       'LAO', 'DOM', 'PRK', 'TZA', 'RWA', 'STP', 'SEN', 'SLE', 'SOM',
       'SDN', 'SSD', 'LKA', 'TJK', 'TCD', 'TLS', 'TGO', 'VUT', 'YEM',
       'ZMB', 'ZWE'],
       "Product":
        ['Autres non-céréales', 'Blé et Farin', 'Céréales',
       'Fruits secs, total', 'Huiles végétales',
       'Légumineuses Sèches,Tot.', 'Non-céréales', 'Riz, total',
       'Sucre, total', 'Céréales Secondaires', 'Lait, total',
       'Mélanges et préparations', 'Poiss&produi', 'BulgurFarEnt',
       'Viande Total', 'Graisses Com'],
        "Year": 
        ['2013', '2014', '2015', '2016'],
        "Quantity of donated products": np.random.random_integers(90, 40000, 65)
        }

df = pd.DataFrame(data, columns = ["Country helped", "Code zone (ISO3)",
                                   "Product", "Année", "Quantity of donated products"])



aggs = ["count","sum","avg","median","mode","rms","stddev","min","max","first","last"]

agg = []
agg_func = []
for i in range(0, len(aggs)):
    agg = dict(
        args=['transforms[0].aggregations[0].func', aggs[i]],
        label=aggs[i],
        method='restyle'
    )
    agg_func.append(agg)



# Utilisation de plotly.express pour l'animation
data = [dict(df,
             type = 'choropleth',
             locations = df["Code zone (ISO3)"],
             z = df["Quantity of donated products"],
             color="z",
             color_continuous_scale="earth_r",
             animation_frame=df["Year"],
             hover_name="Country helped",
             hover_data=["Product"],
             transforms = [dict(
                 type = 'aggregate',
                 groups = df["Code zone (ISO3)"],
                 aggregations = [dict(
                     target = 'z', func = 'sum', enabled = True)
                                ]
             )]
            )
       ]

layout = dict(
    autosize=True,
    title = "<b>Evolution of food aid in the world from 2013 to 2016</b><br>use dropdown to change aggregation",
    yaxis = dict(title = 'Score', range = [0,22]),
    geo=dict(
        landcolor="lightgray",
        showland=True,
        showcountries=True,
        countrycolor="gray",
        countrywidth=0.5,
        showframe=False,
        showcoastlines=False,
        projection_type="natural earth",
        showocean=True, oceancolor="lightBlue",
        showlakes=True, lakecolor="lightblue",
        showrivers=True, rivercolor="lightblue"
    ),
    annotations=[
        dict(
            x=0.55,
            y=0.15,
            xref="paper",
            yref="paper",
            text='Source: <a href="https://www.fao.org/faostat/fr/#data">\
            FAO</a>',
            showarrow=False
        )
    ],
    updatemenus = [dict(
        x = 0.95,
        y = 1.10,
        xref = 'paper',
        yref = 'paper',
        yanchor = 'top',
        active = 1,
        showactive = False,
        buttons = agg_func
    )],
    coloraxis2={"colorscale": [[0, "lightgray"], [1, "lightgray"]], "showscale": False}
)


fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 2000
fig.layout.updatemenus[0].buttons[0].args[1]['transition']['duration'] = 1000

# update each of the animation frames with missing countries
for fr in fig.frames:
    tr_missing = (
        px.choropleth(
            world.loc[~world["iso_a3"].isin(fr.data[0]["locations"]), "iso_a3"]
            .to_frame()
            .assign(color=1),
            color="color",
            locations="iso_a3"            
        )
        .update_traces(hovertemplate="%{location} : Datas manquantes",
                       coloraxis="coloraxis2"
                      )
        .data[0]
    )
    fr.update(data=[fr.data[0], tr_missing])

# re-construct the figure...
fig_dict = go.Figure(data=fig.frames[0].data, layout=fig.layout, frames=fig.frames)

pio.show(fig_dict, validate=True)
  1. I didn't find how to just additional missing trace gets appended to trace in data if i want to take off the animation_frame=df["Year"]

Edit 2:

  • provide data sample as text
  • translate columns in english
  • trying to make my code easier to test
  • add a question

Edit 1:

The plot i get

Here is a sample of df_world_aide_alim

A second sample: df_world_aide_alim.head(25)

The dropdown to change aggregation i tried to combine


Solution

    • the code supplied to create a dataframe fails. Providing a dict to pd.DataFrame() all the lists need to be same length. Have synthesized a dataframe from this dict creating all lists of same length (plus as a side effect of this, make sure country name and code are consistent)
    • have used pandas named aggregations to create all of the analytics you want. Note there are a few that don't map 1:1 between deprecated plotly names and approach needed in pandas
    • within build_fig() now have data frame dfa (with list of aggregations and no animation)
    Code zone (ISO3) count sum avg median mode rms stddev min max first last Country helped Product
    AFG 3 46647 15549 12358 5086 32115.5 12371.1 5086 29203 12358 29203 Afghanistan ['Huiles végétales', 'Céréales', 'Sucre, total']
    AGO 5 75067 15013.4 14357 2187 38317.9 9236.46 2187 26697 14357 26697 Angola ['Riz, total', 'Fruits secs, total', 'Céréales Secondaires', 'Poiss&produi']
    • clearly from this structure a trace can be created from each of the columns. By default make first one visible (count)
    • add missing countries trace
    • create updatemenus to control trace visibility based on selection which missing always being visible
    build_fig(df)
    

    enter image description here

    • animation is similar. Without using dash as well I don't see an option of having multiple analytics per frame selected from drop down. Hence example of animation and one analytic. Clearly for animation you need another dimension in data, year has been used for this.
    build_fig(df, aggs="rms", group=["Code zone (ISO3)", "Year"])
    

    enter image description here

    embedded list more sophisticated aggregate function

    build_fig(
        df,
        agg_cols={
            "Country helped": ("Country helped", "first"),
            "Product": (
                "Product",
                lambda s: df.loc[s.index]
                .sort_values("Quantity of donated products", ascending=False)
                .loc[:, "Product"]
                .head(3),
            ),
        },
    )
    

    full code

    import plotly.io as pio
    import plotly.graph_objects as go
    import plotly.express as px
    import geopandas as gpd
    import pandas as pd
    import numpy as np
    import math
    
    
    # fmt: off
    data = {"Country helped": 
            ['Afghanistan', 'Algérie', 'Angola', 'Bangladesh', 'Bénin',
           'Bhoutan', 'Bolivie (État plurinational de)', 'Burkina Faso',
           'Burundi', 'Cambodge', 'Cameroun', 'Colombie', 'Comores', 'Congo',
           "Côte d'Ivoire", 'Cuba', 'Djibouti', 'Égypte', 'El Salvador',
           'Équateur', 'Éthiopie', 'Gambie', 'Géorgie', 'Ghana', 'Guatemala',
           'Guinée', 'Guinée-Bissau', 'Haïti', 'Honduras',
           "Iran (République islamique d')", 'Iraq', 'Jordanie', 'Kenya',
           'Kirghizistan', 'Lesotho', 'Liban', 'Libéria', 'Libye',
           'Madagascar', 'Malawi', 'Mali', 'Mauritanie', 'Mozambique',
           'Myanmar', 'Népal', 'Nicaragua', 'Niger', 'Ouganda', 'Pakistan',
           'Palestine', 'Philippines', 'République arabe syrienne',
           'République centrafricaine', 'République démocratique du Congo',
           'République démocratique populaire lao', 'République dominicaine',
           'République populaire démocratique de Corée',
           'République-Unie de Tanzanie', 'Rwanda', 'Sao Tomé-et-Principe',
           'Sénégal', 'Sierra Leone', 'Somalie', 'Soudan', 'Soudan du Sud',
           'Sri Lanka', 'Tadjikistan', 'Tchad', 'Timor-Leste', 'Togo',
           'Vanuatu', 'Yémen', 'Zambie', 'Zimbabwe'],
            "Code zone (ISO3)": 
            ['AFG', 'DZA', 'AGO', 'BGD', 'BEN', 'BTN', 'BOL', 'BFA', 'BDI',
           'KHM', 'CMR', 'COL', 'COM', 'COG', 'CIV', 'CUB', 'DJI', 'EGY',
           'SLV', 'ECU', 'ETH', 'GMB', 'GEO', 'GHA', 'GTM', 'GIN', 'GNB',
           'HTI', 'HND', 'IRN', 'IRQ', 'JOR', 'KEN', 'KGZ', 'LSO', 'LBN',
           'LBR', 'LBY', 'MDG', 'MWI', 'MLI', 'MRT', 'MOZ', 'MMR', 'NPL',
           'NIC', 'NER', 'UGA', 'PAK', 'PSE', 'PHL', 'SYR', 'CAF', 'COD',
           'LAO', 'DOM', 'PRK', 'TZA', 'RWA', 'STP', 'SEN', 'SLE', 'SOM',
           'SDN', 'SSD', 'LKA', 'TJK', 'TCD', 'TLS', 'TGO', 'VUT', 'YEM',
           'ZMB', 'ZWE'],
           "Product":
            ['Autres non-céréales', 'Blé et Farin', 'Céréales',
           'Fruits secs, total', 'Huiles végétales',
           'Légumineuses Sèches,Tot.', 'Non-céréales', 'Riz, total',
           'Sucre, total', 'Céréales Secondaires', 'Lait, total',
           'Mélanges et préparations', 'Poiss&produi', 'BulgurFarEnt',
           'Viande Total', 'Graisses Com'],
            "Year": 
            ['2013', '2014', '2015', '2016'],
            "Quantity of donated products": np.random.randint(90, 40000, 500)
            }
    # fmt: on
    
    # this fails !!!
    # df = pd.DataFrame(data, columns = ["Country helped", "Code zone (ISO3)",
    #                                    "Product", "Année", "Quantity of donated products"])
    
    # provided data is ragged, different lengths for each column, make all arrays same length for data frame
    df = (
        pd.DataFrame(
            {
                k: data[k] if len(data[k]) == c else np.random.choice(data[k], c)
                for k in data.keys()
                if (c := max({k: len(data[k]) for k in data.keys()}.values()))
            }
        )
        .drop(columns=["Country helped"])
        .merge(
            pd.DataFrame(
                {
                    k: v
                    for k, v in data.items()
                    if k in ["Country helped", "Code zone (ISO3)"]
                }
            ),
            on="Code zone (ISO3)",
            how="inner",
        )
    )
    
    aggs = [
        "count",
        "sum",
        "avg",
        "median",
        "mode",
        "rms",
        "stddev",
        "min",
        "max",
        "first",
        "last",
    ]
    
    
    def build_fig(df,
        aggs=aggs,
        analytical_col="Quantity of donated products",
        group=["Code zone (ISO3)"],
        agg_cols={
            "Country helped": ("Country helped", "first"),
            "Product": ("Product", lambda s: s.unique().tolist()),
        },
    ):
        # make sure argument permutations make sense
        assert (len(group) == 1 and isinstance(aggs, list)) or (
            len(group) == 2 and isinstance(aggs, str)
        )
        
        # need to know countries that make up natural earth...
        world = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
    
        # pandas aggregate functions don't map 1:1 to deprecated plotly functions
        override = {
            "avg": "mean",
            "mode": lambda s: s.mode().iloc[0],
            "rms": lambda s: math.sqrt((s ** 2).sum()),
            "stddev": "std",
        }
    
        # create dataframe with all wanted aggregates as columns, plus keep country name and list of products
        dfa = df.groupby(group, as_index=False).agg(
            **{
                **{
                    agg: (
                        analytical_col,
                        agg if agg not in override.keys() else override[agg],
                    )
                    for agg in (aggs if isinstance(aggs, list) else [aggs])
                },
                **agg_cols,
            }
        )
    
        # generate trace of missing countries to be included where necessary
        def missing(values):
            return (
                px.choropleth(
                    world.loc[~world["iso_a3"].isin(values), "iso_a3"],
                    locations="iso_a3",
                )
                .update_traces(
                    name="missing",
                    colorscale=[[0, "red"], [1, "red"]],
                    showlegend=False,
                    hovertemplate="missing: %{location}",
                )
                .data
            )
    
        # create a trace for each required aggregate
        if len(group) == 1:
            fig = go.Figure(
                [
                    t.update(name=agg, visible=(isinstance(aggs, str) or aggs[0] == agg))
                    for agg in (aggs if isinstance(aggs, list) else [aggs])
                    for t in px.choropleth(
                        dfa, locations=group[0], color=agg, hover_data=list(agg_cols.keys())
                    ).data
                ]
            )
    
            # add the missing layer
            fig.add_traces(missing(fig.data[0]["locations"]))
    
            # add the drop down menu...
            fig.update_layout(
                updatemenus=[
                    {
                        "buttons": [
                            {
                                "label": agg,
                                "method": "restyle",
                                "args": [
                                    {
                                        "visible": [
                                            t.name == agg or t.name == "missing"
                                            for t in fig.data
                                        ]
                                    }
                                ],
                            }
                            for agg in aggs
                        ]
                    }
                ]
            )
    
        # generate an animation, with only one aggregation
        else:
            fig = px.choropleth(
                dfa,
                locations=group[0],
                color=aggs,
                animation_frame=group[1],
                hover_data=list(agg_cols.keys()),
            )
    
            # update each of the animation frames with missing countries
            for fr in fig.frames:
                fr.update(data=[fr.data[0], missing(fr.data[0]["locations"])[0]])
    
            # re-construct the figure...
            fig = go.Figure(data=fig.frames[0].data, layout=fig.layout, frames=fig.frames)
    
        return fig