I'm trying to create a choropleth from some Kaggle data: https://www.kaggle.com/datasets/jessemostipak/hotel-booking-demand but something isn't working.
import pandas as pd
import altair as alt
from vega_datasets import data
# Load the hotel bookings dataset
df = pd.read_csv('hotel_bookings.csv')
# Sum the columns adults, children, and babies to get the total number of guests
df['total_guests'] = df['adults'] + df['children'] + df['babies']
# Group by country and sum the total number of guests
guests_by_country = df.groupby('country')['total_guests'].sum().reset_index()
# Load the world map data
world_map_data = alt.topo_feature(data.world_110m.url, 'countries')
# Create the Altair chart
chart = alt.Chart(world_map_data).mark_geoshape().encode(
color=alt.Color('total_guests:Q', scale=alt.Scale(scheme='plasma')),
tooltip=['country:N', 'total_guests:Q']
).transform_lookup(
lookup='id',
from_=alt.LookupData(data=guests_by_country, key='country', fields=['total_guests'])
).project(
type='naturalEarth1'
).properties(
title='Country Wise Distribution of Guests'
).configure_view(
stroke='black'
).configure_title(
fontSize=20
)
# Show the chart
chart
This is the output: enter image description here
I think the reason is that the id in topojson and the country in the user data do not match. I could not find the best topojson data, so I got a world map with country names in topojson from countries-110m.json. I then downloaded the ISO3 data and merged it into the final dataframe. topojson and the association to country names.
import pandas as pd
import altair as alt
from vega_datasets import data
# Load the hotel bookings dataset
df = pd.read_csv('./data/hotel_bookings.csv')
# Sum the columns adults, children, and babies to get the total number of guests
df['total_guests'] = df['adults'] + df['children'] + df['babies']
# Group by country and sum the total number of guests
guests_by_country = df.groupby('country')['total_guests'].sum().reset_index()
# read iso3
iso3 = pd.read_csv('./data/iso3.csv')
# merge
source = pd.merge(guests_by_country, iso3, left_on='country', right_on='iso3').drop(columns='iso3')
# Load the world map data
url = 'https://cdn.jsdelivr.net/npm/world-atlas@2/countries-110m.json'
world_map_data = alt.topo_feature(url, 'countries')
# Create the Altair chart
chart = alt.Chart(world_map_data).mark_geoshape().encode(
color=alt.Color('total_guests:Q', scale=alt.Scale(scheme='plasma')),
tooltip=['country:N', 'total_guests:Q']
).transform_lookup(
lookup='properties.name',
from_=alt.LookupData(data=source, key='name', fields=['country','total_guests'])
).project(
type='naturalEarth1'
).properties(
title='Country Wise Distribution of Guests',
width=600,
height=400,
).configure_view(
stroke='black'
).configure_title(
fontSize=20
)
# Show the chart
chart