Search code examples
pythondataframegeopandas

Sum dataframe values in for loop inside a for loop


I have a large polygon file, small polygon file and points file. What I do here is loop through large polygons to find which small polygons intersect. Then calculate the area of each small polygon within the large one. And then I loop through the small polygons to find points statistics in each of them.

I have found number_of_somethin value in each small polygon. And the question would be how to can I sum all number_of_somethin small polygons values within the large polygon and store the results in original large_polygon file as a new column, let's say large_polygon['smth_sum']?

With df_res_2.loc[idx, 'smth'] = number_of_somethin I get number_of_somethin values in each small polygon inside the large ones. Now I need to sum them in large_polygon['smth_sum']

Note: FID is the id for large polygons and ID is the id for small polygons

import geopandas as gpd

small_polygon = gpd.read_file(r'R:\...\small.shp')
large_polygon = gpd.read_file(r'R:\...\large.shp')
points = gpd.read_file(r'R:\...\points.shp')

SmallJoin =gpd.sjoin(small_polygon, large_polygon)[['FID', 'ID', 'someValue','geometry']]

for i in large_polygon.index:
    df_i = SmallJoin[SmallJoin['FID'] == i]

    # i do something here, f.e. calculate small polgyon area
    df_res = gpd.overlay(large_polygon, df_i, how='intersection')
    df_res['area'] = round((df_res.apply(lambda row: row.geometry.area, axis=1)), 4)

    # now i know area for each small polygon within large polygon
    df_res_2 = df_res[df_res['FID_1'] == i]

    # now point statistics in small polygons
    PointsJoin =gpd.sjoin(points, df_res)[['ID','someAttribute', 'someAttribute2','geometry']]

    for idx, val in df_res_2['ID'].items():
        df_idx = PointsJoin[PointsJoin['ID'] == val]
        number_of_somethin = df_idx ['someAttribute'] + 121 + df_idx['someAttribute2']
        df_res_2.loc[idx, 'smth'] = number_of_somethin

I had a few ideas how to do this, but none of them are not wokring, so I assume that there is some other way.

large_polygon.loc[i, 'smth_sum'] = df_res_2['smth']
large_polygon.loc[i, 'smth_sum'] = df_res_2['smth'].sum()

large_polygon['smth_sum'] = large_polygon[large_polygon['FID'] == df_res_2['FID_1'].sum()]

Solution

    • you describe three GeoDataFrame

      1. large - have used country boundaries for this
      2. small - have used UTM zone boundaries for this
      3. point - have used randomly generated points that mostly overlap 2
    • you define that you want two outputs for each large geometry (country here)

      • area - sum of intersection area of each small geometry
      • value - sum of value of points that is within a small geometry that spatially joins to a large geometry
    • all of the above can be achieved with spatial joins and pandas merge() and groupby()

    • to make this clearer - also included a way to visualise all of this

    import geopandas as gpd
    import shapely.geometry
    import requests
    import numpy as np
    import plotly.express as px
    
    # get some sample data....
    # fmt: off
    gdf_utm = gpd.GeoDataFrame.from_features(requests.get("https://opendata.arcgis.com/datasets/b294795270aa4fb3bd25286bf09edc51_0.geojson").json()).set_crs("EPSG:4326")
    gdf_countries = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
    
    large_polygon = gdf_countries.loc[lambda d: d["iso_a3"].isin(["BEL", "LUX", "NLD", "DEU", "AUT"])]
    # large_polygon.boundary.plot()
    
    small_polygon = gpd.sjoin(gdf_utm, large_polygon).loc[:, gdf_utm.columns].groupby(["FID", "ZONE"]).first().reset_index()
    # fmt: on
    
    # some points within geometry of small_polygon
    b = small_polygon.total_bounds
    POINTS = 10
    points = gpd.GeoDataFrame(
        geometry=[
            shapely.geometry.Point(x, y)
            for x, y in zip(
                np.random.uniform(*b[[0, 2]], POINTS),
                np.random.uniform(*b[[1, 3]], POINTS),
            )
        ],
        data={"value": np.arange(POINTS)},
        crs="EPSG:4326",
    )
    
    # spatial small to large with geometry from large
    SmallJoin = gpd.sjoin(small_polygon, large_polygon).merge(
        large_polygon["geometry"],
        left_on="index_right",
        right_index=True,
        suffixes=("", "_large"),
    )
    SmallJoin["area"] = SmallJoin.intersection(gpd.GeoSeries(SmallJoin["geometry_large"])).area
    
    # get sums of area of overlap and sum of values from points
    Final = (
        SmallJoin.rename(columns={"index_right": "index_large"})
        .sjoin(points)
        .groupby("index_large")
        .agg({"area": "sum", "value": "sum", "geometry_large": "first"})
    )
    

    output

    index_large area value
    114 24.6382 25
    121 90.3565 45
    128 0.603031 20
    129 7.65999 20
    130 10.5284 20

    visualise it

    px.choropleth_mapbox(
        Final,
        geojson=gpd.GeoSeries(Final["geometry_large"]),
        locations=Final.index,
        color="value",
        hover_data=["area"],
    ).add_traces(
        px.scatter_mapbox(
            points,
            lat=points.geometry.y,
            lon=points.geometry.x,
            color="value",
        )
        .update_traces(marker_coloraxis="coloraxis2", marker_size=10)
        .data
    ).update_layout(
        mapbox={
            "style": "carto-positron",
            "center": {"lon": sum(b[[0, 2]]) / 2, "lat": sum(b[[1, 3]]) / 2},
            "zoom": 3,
            "layers": [{"source": small_polygon.__geo_interface__, "type": "line"}],
        },
        coloraxis2={
            "colorbar": {"x": -0.1, "title": "scatter"},
            "colorscale": [[0, "blue"], [1, "blue"]],
        },
        coloraxis={"colorscale": [[0, "white"], [1, "green"]]},
        margin={"l": 0, "r": 0, "t": 0, "b": 0},
    )
    
    
    

    enter image description here