Search code examples
pythoncsvgeopandasshapefile

Merge a csv and shapefile with geopandas


I'm trying to merge a CSV with a shapefile using geopandas. I have a unique ID for both and would like to attach data from the CSV into the shapefile when there is a matching unique ID. Thanks


Solution

    • here is a complete working example
    • load downloaded shape file of US states
    • load CSV (use io.StringIO to make this code standalone)
    • pandas merge() on key column iso_3166_2
    • show it's worked by plotting merged data, geometry from shape file, size from CSV
    import io
    import geopandas as gpd
    import pandas as pd
    from pathlib import Path
    from zipfile import ZipFile
    import urllib
    import requests
    
    # fmt: off
    # download boundaries
    url = "https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/110m/cultural/ne_110m_admin_1_states_provinces.zip"
    f = Path.cwd().joinpath(urllib.parse.urlparse(url).path.split("/")[-1])
    # fmt: on
    
    if not f.exists():
        r = requests.get(url, stream=True, headers={"User-Agent": "XY"})
        with open(f, "wb") as fd:
            for chunk in r.iter_content(chunk_size=128):
                fd.write(chunk)
        zfile = ZipFile(f)
        zfile.extractall(f.stem)
    
    # load downloaded boundaries (shape file)
    gdf2 = gpd.read_file(str(f.parent.joinpath(f.stem).joinpath(f"{f.stem}.shp")))
    
    # load CSV....
    df = pd.read_csv(io.StringIO("""size,iso_3166_2
    118,US-MN
    829,US-MT
    235,US-ND
    998,US-ID
    807,US-WA
    956,US-AZ
    2094,US-CA
    652,US-CO
    541,US-NV
    1,US-WV
    3,US-MD
    2,US-NJ
    8,US-NY
    16,US-PA
    1,US-ME
    66,US-MI"""))
    
    gdf = gpd.GeoDataFrame(df.merge(gdf2, on="iso_3166_2"))
    
    # prove it worked...
    gdf.plot(column="size", cmap="OrRd")
    

    enter image description here