Search code examples
pythonpandasaltair

Altair choropleth map doesn't display


I'm trying to create a choropleth from some Kaggle data: https://www.kaggle.com/datasets/jessemostipak/hotel-booking-demand but something isn't working.

import pandas as pd
import altair as alt
from vega_datasets import data

# Load the hotel bookings dataset
df = pd.read_csv('hotel_bookings.csv')

# Sum the columns adults, children, and babies to get the total number of guests
df['total_guests'] = df['adults'] + df['children'] + df['babies']

# Group by country and sum the total number of guests
guests_by_country = df.groupby('country')['total_guests'].sum().reset_index()

# Load the world map data
world_map_data = alt.topo_feature(data.world_110m.url, 'countries')

# Create the Altair chart
chart = alt.Chart(world_map_data).mark_geoshape().encode(
    color=alt.Color('total_guests:Q', scale=alt.Scale(scheme='plasma')),
    tooltip=['country:N', 'total_guests:Q']
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(data=guests_by_country, key='country', fields=['total_guests'])
).project(
    type='naturalEarth1'
).properties(
    title='Country Wise Distribution of Guests'
).configure_view(
    stroke='black'
).configure_title(
    fontSize=20
)

# Show the chart
chart


This is the output: enter image description here


Solution

  • I think the reason is that the id in topojson and the country in the user data do not match. I could not find the best topojson data, so I got a world map with country names in topojson from countries-110m.json. I then downloaded the ISO3 data and merged it into the final dataframe. topojson and the association to country names.

    import pandas as pd
    import altair as alt
    from vega_datasets import data
    
    # Load the hotel bookings dataset
    df = pd.read_csv('./data/hotel_bookings.csv')
    
    # Sum the columns adults, children, and babies to get the total number of guests
    df['total_guests'] = df['adults'] + df['children'] + df['babies']
    
    # Group by country and sum the total number of guests
    guests_by_country = df.groupby('country')['total_guests'].sum().reset_index()
    
    # read iso3
    iso3 = pd.read_csv('./data/iso3.csv')
    
    # merge
    source = pd.merge(guests_by_country, iso3, left_on='country', right_on='iso3').drop(columns='iso3')
    
    # Load the world map data
    url = 'https://cdn.jsdelivr.net/npm/world-atlas@2/countries-110m.json'
    world_map_data = alt.topo_feature(url, 'countries')
    
    # Create the Altair chart
    chart = alt.Chart(world_map_data).mark_geoshape().encode(
        color=alt.Color('total_guests:Q', scale=alt.Scale(scheme='plasma')),
        tooltip=['country:N', 'total_guests:Q']
    ).transform_lookup(
        lookup='properties.name',
        from_=alt.LookupData(data=source, key='name', fields=['country','total_guests'])
    ).project(
        type='naturalEarth1'
    ).properties(
        title='Country Wise Distribution of Guests',
        width=600,
        height=400,
    ).configure_view(
        stroke='black'
    ).configure_title(
        fontSize=20
    )
    
    # Show the chart
    chart
    

    enter image description here