On the Plotly site [https://plotly.com/python/aggregations/] I saw that we could obtain a mapping with aggregates.
I tried to add it to my code but I can't get my expected result. What did I miss?
My code:
import plotly.io as pio
import plotly.graph_objects as go
import plotly.express as px
import geopandas as gpd
import pandas as pd
import numpy as np
# need to know countries that make up natural earth...
world = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
data = {"Country helped":
['Afghanistan', 'Algérie', 'Angola', 'Bangladesh', 'Bénin',
'Bhoutan', 'Bolivie (État plurinational de)', 'Burkina Faso',
'Burundi', 'Cambodge', 'Cameroun', 'Colombie', 'Comores', 'Congo',
"Côte d'Ivoire", 'Cuba', 'Djibouti', 'Égypte', 'El Salvador',
'Équateur', 'Éthiopie', 'Gambie', 'Géorgie', 'Ghana', 'Guatemala',
'Guinée', 'Guinée-Bissau', 'Haïti', 'Honduras',
"Iran (République islamique d')", 'Iraq', 'Jordanie', 'Kenya',
'Kirghizistan', 'Lesotho', 'Liban', 'Libéria', 'Libye',
'Madagascar', 'Malawi', 'Mali', 'Mauritanie', 'Mozambique',
'Myanmar', 'Népal', 'Nicaragua', 'Niger', 'Ouganda', 'Pakistan',
'Palestine', 'Philippines', 'République arabe syrienne',
'République centrafricaine', 'République démocratique du Congo',
'République démocratique populaire lao', 'République dominicaine',
'République populaire démocratique de Corée',
'République-Unie de Tanzanie', 'Rwanda', 'Sao Tomé-et-Principe',
'Sénégal', 'Sierra Leone', 'Somalie', 'Soudan', 'Soudan du Sud',
'Sri Lanka', 'Tadjikistan', 'Tchad', 'Timor-Leste', 'Togo',
'Vanuatu', 'Yémen', 'Zambie', 'Zimbabwe'],
"Code zone (ISO3)":
['AFG', 'DZA', 'AGO', 'BGD', 'BEN', 'BTN', 'BOL', 'BFA', 'BDI',
'KHM', 'CMR', 'COL', 'COM', 'COG', 'CIV', 'CUB', 'DJI', 'EGY',
'SLV', 'ECU', 'ETH', 'GMB', 'GEO', 'GHA', 'GTM', 'GIN', 'GNB',
'HTI', 'HND', 'IRN', 'IRQ', 'JOR', 'KEN', 'KGZ', 'LSO', 'LBN',
'LBR', 'LBY', 'MDG', 'MWI', 'MLI', 'MRT', 'MOZ', 'MMR', 'NPL',
'NIC', 'NER', 'UGA', 'PAK', 'PSE', 'PHL', 'SYR', 'CAF', 'COD',
'LAO', 'DOM', 'PRK', 'TZA', 'RWA', 'STP', 'SEN', 'SLE', 'SOM',
'SDN', 'SSD', 'LKA', 'TJK', 'TCD', 'TLS', 'TGO', 'VUT', 'YEM',
'ZMB', 'ZWE'],
"Product":
['Autres non-céréales', 'Blé et Farin', 'Céréales',
'Fruits secs, total', 'Huiles végétales',
'Légumineuses Sèches,Tot.', 'Non-céréales', 'Riz, total',
'Sucre, total', 'Céréales Secondaires', 'Lait, total',
'Mélanges et préparations', 'Poiss&produi', 'BulgurFarEnt',
'Viande Total', 'Graisses Com'],
"Year":
['2013', '2014', '2015', '2016'],
"Quantity of donated products": np.random.random_integers(90, 40000, 65)
}
df = pd.DataFrame(data, columns = ["Country helped", "Code zone (ISO3)",
"Product", "Année", "Quantity of donated products"])
aggs = ["count","sum","avg","median","mode","rms","stddev","min","max","first","last"]
agg = []
agg_func = []
for i in range(0, len(aggs)):
agg = dict(
args=['transforms[0].aggregations[0].func', aggs[i]],
label=aggs[i],
method='restyle'
)
agg_func.append(agg)
# Utilisation de plotly.express pour l'animation
data = [dict(df,
type = 'choropleth',
locations = df["Code zone (ISO3)"],
z = df["Quantity of donated products"],
color="z",
color_continuous_scale="earth_r",
animation_frame=df["Year"],
hover_name="Country helped",
hover_data=["Product"],
transforms = [dict(
type = 'aggregate',
groups = df["Code zone (ISO3)"],
aggregations = [dict(
target = 'z', func = 'sum', enabled = True)
]
)]
)
]
layout = dict(
autosize=True,
title = "<b>Evolution of food aid in the world from 2013 to 2016</b><br>use dropdown to change aggregation",
yaxis = dict(title = 'Score', range = [0,22]),
geo=dict(
landcolor="lightgray",
showland=True,
showcountries=True,
countrycolor="gray",
countrywidth=0.5,
showframe=False,
showcoastlines=False,
projection_type="natural earth",
showocean=True, oceancolor="lightBlue",
showlakes=True, lakecolor="lightblue",
showrivers=True, rivercolor="lightblue"
),
annotations=[
dict(
x=0.55,
y=0.15,
xref="paper",
yref="paper",
text='Source: <a href="https://www.fao.org/faostat/fr/#data">\
FAO</a>',
showarrow=False
)
],
updatemenus = [dict(
x = 0.95,
y = 1.10,
xref = 'paper',
yref = 'paper',
yanchor = 'top',
active = 1,
showactive = False,
buttons = agg_func
)],
coloraxis2={"colorscale": [[0, "lightgray"], [1, "lightgray"]], "showscale": False}
)
fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 2000
fig.layout.updatemenus[0].buttons[0].args[1]['transition']['duration'] = 1000
# update each of the animation frames with missing countries
for fr in fig.frames:
tr_missing = (
px.choropleth(
world.loc[~world["iso_a3"].isin(fr.data[0]["locations"]), "iso_a3"]
.to_frame()
.assign(color=1),
color="color",
locations="iso_a3"
)
.update_traces(hovertemplate="%{location} : Datas manquantes",
coloraxis="coloraxis2"
)
.data[0]
)
fr.update(data=[fr.data[0], tr_missing])
# re-construct the figure...
fig_dict = go.Figure(data=fig.frames[0].data, layout=fig.layout, frames=fig.frames)
pio.show(fig_dict, validate=True)
animation_frame=df["Year"]
Edit 2:
Edit 1:
Here is a sample of df_world_aide_alim
pd.DataFrame()
all the lists need to be same length. Have synthesized a dataframe from this dict creating all lists of same length (plus as a side effect of this, make sure country name and code are consistent)build_fig()
now have data frame dfa (with list of aggregations and no animation)Code zone (ISO3) | count | sum | avg | median | mode | rms | stddev | min | max | first | last | Country helped | Product |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
AFG | 3 | 46647 | 15549 | 12358 | 5086 | 32115.5 | 12371.1 | 5086 | 29203 | 12358 | 29203 | Afghanistan | ['Huiles végétales', 'Céréales', 'Sucre, total'] |
AGO | 5 | 75067 | 15013.4 | 14357 | 2187 | 38317.9 | 9236.46 | 2187 | 26697 | 14357 | 26697 | Angola | ['Riz, total', 'Fruits secs, total', 'Céréales Secondaires', 'Poiss&produi'] |
build_fig(df)
build_fig(df, aggs="rms", group=["Code zone (ISO3)", "Year"])
build_fig(
df,
agg_cols={
"Country helped": ("Country helped", "first"),
"Product": (
"Product",
lambda s: df.loc[s.index]
.sort_values("Quantity of donated products", ascending=False)
.loc[:, "Product"]
.head(3),
),
},
)
import plotly.io as pio
import plotly.graph_objects as go
import plotly.express as px
import geopandas as gpd
import pandas as pd
import numpy as np
import math
# fmt: off
data = {"Country helped":
['Afghanistan', 'Algérie', 'Angola', 'Bangladesh', 'Bénin',
'Bhoutan', 'Bolivie (État plurinational de)', 'Burkina Faso',
'Burundi', 'Cambodge', 'Cameroun', 'Colombie', 'Comores', 'Congo',
"Côte d'Ivoire", 'Cuba', 'Djibouti', 'Égypte', 'El Salvador',
'Équateur', 'Éthiopie', 'Gambie', 'Géorgie', 'Ghana', 'Guatemala',
'Guinée', 'Guinée-Bissau', 'Haïti', 'Honduras',
"Iran (République islamique d')", 'Iraq', 'Jordanie', 'Kenya',
'Kirghizistan', 'Lesotho', 'Liban', 'Libéria', 'Libye',
'Madagascar', 'Malawi', 'Mali', 'Mauritanie', 'Mozambique',
'Myanmar', 'Népal', 'Nicaragua', 'Niger', 'Ouganda', 'Pakistan',
'Palestine', 'Philippines', 'République arabe syrienne',
'République centrafricaine', 'République démocratique du Congo',
'République démocratique populaire lao', 'République dominicaine',
'République populaire démocratique de Corée',
'République-Unie de Tanzanie', 'Rwanda', 'Sao Tomé-et-Principe',
'Sénégal', 'Sierra Leone', 'Somalie', 'Soudan', 'Soudan du Sud',
'Sri Lanka', 'Tadjikistan', 'Tchad', 'Timor-Leste', 'Togo',
'Vanuatu', 'Yémen', 'Zambie', 'Zimbabwe'],
"Code zone (ISO3)":
['AFG', 'DZA', 'AGO', 'BGD', 'BEN', 'BTN', 'BOL', 'BFA', 'BDI',
'KHM', 'CMR', 'COL', 'COM', 'COG', 'CIV', 'CUB', 'DJI', 'EGY',
'SLV', 'ECU', 'ETH', 'GMB', 'GEO', 'GHA', 'GTM', 'GIN', 'GNB',
'HTI', 'HND', 'IRN', 'IRQ', 'JOR', 'KEN', 'KGZ', 'LSO', 'LBN',
'LBR', 'LBY', 'MDG', 'MWI', 'MLI', 'MRT', 'MOZ', 'MMR', 'NPL',
'NIC', 'NER', 'UGA', 'PAK', 'PSE', 'PHL', 'SYR', 'CAF', 'COD',
'LAO', 'DOM', 'PRK', 'TZA', 'RWA', 'STP', 'SEN', 'SLE', 'SOM',
'SDN', 'SSD', 'LKA', 'TJK', 'TCD', 'TLS', 'TGO', 'VUT', 'YEM',
'ZMB', 'ZWE'],
"Product":
['Autres non-céréales', 'Blé et Farin', 'Céréales',
'Fruits secs, total', 'Huiles végétales',
'Légumineuses Sèches,Tot.', 'Non-céréales', 'Riz, total',
'Sucre, total', 'Céréales Secondaires', 'Lait, total',
'Mélanges et préparations', 'Poiss&produi', 'BulgurFarEnt',
'Viande Total', 'Graisses Com'],
"Year":
['2013', '2014', '2015', '2016'],
"Quantity of donated products": np.random.randint(90, 40000, 500)
}
# fmt: on
# this fails !!!
# df = pd.DataFrame(data, columns = ["Country helped", "Code zone (ISO3)",
# "Product", "Année", "Quantity of donated products"])
# provided data is ragged, different lengths for each column, make all arrays same length for data frame
df = (
pd.DataFrame(
{
k: data[k] if len(data[k]) == c else np.random.choice(data[k], c)
for k in data.keys()
if (c := max({k: len(data[k]) for k in data.keys()}.values()))
}
)
.drop(columns=["Country helped"])
.merge(
pd.DataFrame(
{
k: v
for k, v in data.items()
if k in ["Country helped", "Code zone (ISO3)"]
}
),
on="Code zone (ISO3)",
how="inner",
)
)
aggs = [
"count",
"sum",
"avg",
"median",
"mode",
"rms",
"stddev",
"min",
"max",
"first",
"last",
]
def build_fig(df,
aggs=aggs,
analytical_col="Quantity of donated products",
group=["Code zone (ISO3)"],
agg_cols={
"Country helped": ("Country helped", "first"),
"Product": ("Product", lambda s: s.unique().tolist()),
},
):
# make sure argument permutations make sense
assert (len(group) == 1 and isinstance(aggs, list)) or (
len(group) == 2 and isinstance(aggs, str)
)
# need to know countries that make up natural earth...
world = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
# pandas aggregate functions don't map 1:1 to deprecated plotly functions
override = {
"avg": "mean",
"mode": lambda s: s.mode().iloc[0],
"rms": lambda s: math.sqrt((s ** 2).sum()),
"stddev": "std",
}
# create dataframe with all wanted aggregates as columns, plus keep country name and list of products
dfa = df.groupby(group, as_index=False).agg(
**{
**{
agg: (
analytical_col,
agg if agg not in override.keys() else override[agg],
)
for agg in (aggs if isinstance(aggs, list) else [aggs])
},
**agg_cols,
}
)
# generate trace of missing countries to be included where necessary
def missing(values):
return (
px.choropleth(
world.loc[~world["iso_a3"].isin(values), "iso_a3"],
locations="iso_a3",
)
.update_traces(
name="missing",
colorscale=[[0, "red"], [1, "red"]],
showlegend=False,
hovertemplate="missing: %{location}",
)
.data
)
# create a trace for each required aggregate
if len(group) == 1:
fig = go.Figure(
[
t.update(name=agg, visible=(isinstance(aggs, str) or aggs[0] == agg))
for agg in (aggs if isinstance(aggs, list) else [aggs])
for t in px.choropleth(
dfa, locations=group[0], color=agg, hover_data=list(agg_cols.keys())
).data
]
)
# add the missing layer
fig.add_traces(missing(fig.data[0]["locations"]))
# add the drop down menu...
fig.update_layout(
updatemenus=[
{
"buttons": [
{
"label": agg,
"method": "restyle",
"args": [
{
"visible": [
t.name == agg or t.name == "missing"
for t in fig.data
]
}
],
}
for agg in aggs
]
}
]
)
# generate an animation, with only one aggregation
else:
fig = px.choropleth(
dfa,
locations=group[0],
color=aggs,
animation_frame=group[1],
hover_data=list(agg_cols.keys()),
)
# update each of the animation frames with missing countries
for fr in fig.frames:
fr.update(data=[fr.data[0], missing(fr.data[0]["locations"])[0]])
# re-construct the figure...
fig = go.Figure(data=fig.frames[0].data, layout=fig.layout, frames=fig.frames)
return fig