I have a large polygon
file, small polygon
file and points
file. What I do here is loop through large polygons to find which small polygons intersect. Then calculate the area of each small polygon within the large one. And then I loop through the small polygons to find points statistics in each of them.
I have found number_of_somethin
value in each small polygon. And the question would be how to can I sum all number_of_somethin
small polygons values within the large polygon and store the results in original large_polygon
file as a new column, let's say large_polygon['smth_sum']
?
With df_res_2.loc[idx, 'smth'] = number_of_somethin
I get number_of_somethin values in each small polygon inside the large ones. Now I need to sum them in large_polygon['smth_sum']
Note: FID is the id for large polygons and ID is the id for small polygons
import geopandas as gpd
small_polygon = gpd.read_file(r'R:\...\small.shp')
large_polygon = gpd.read_file(r'R:\...\large.shp')
points = gpd.read_file(r'R:\...\points.shp')
SmallJoin =gpd.sjoin(small_polygon, large_polygon)[['FID', 'ID', 'someValue','geometry']]
for i in large_polygon.index:
df_i = SmallJoin[SmallJoin['FID'] == i]
# i do something here, f.e. calculate small polgyon area
df_res = gpd.overlay(large_polygon, df_i, how='intersection')
df_res['area'] = round((df_res.apply(lambda row: row.geometry.area, axis=1)), 4)
# now i know area for each small polygon within large polygon
df_res_2 = df_res[df_res['FID_1'] == i]
# now point statistics in small polygons
PointsJoin =gpd.sjoin(points, df_res)[['ID','someAttribute', 'someAttribute2','geometry']]
for idx, val in df_res_2['ID'].items():
df_idx = PointsJoin[PointsJoin['ID'] == val]
number_of_somethin = df_idx ['someAttribute'] + 121 + df_idx['someAttribute2']
df_res_2.loc[idx, 'smth'] = number_of_somethin
I had a few ideas how to do this, but none of them are not wokring, so I assume that there is some other way.
large_polygon.loc[i, 'smth_sum'] = df_res_2['smth']
large_polygon.loc[i, 'smth_sum'] = df_res_2['smth'].sum()
large_polygon['smth_sum'] = large_polygon[large_polygon['FID'] == df_res_2['FID_1'].sum()]
you describe three GeoDataFrame
you define that you want two outputs for each large geometry (country here)
all of the above can be achieved with spatial joins and pandas merge()
and groupby()
to make this clearer - also included a way to visualise all of this
import geopandas as gpd
import shapely.geometry
import requests
import numpy as np
import plotly.express as px
# get some sample data....
# fmt: off
gdf_utm = gpd.GeoDataFrame.from_features(requests.get("https://opendata.arcgis.com/datasets/b294795270aa4fb3bd25286bf09edc51_0.geojson").json()).set_crs("EPSG:4326")
gdf_countries = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
large_polygon = gdf_countries.loc[lambda d: d["iso_a3"].isin(["BEL", "LUX", "NLD", "DEU", "AUT"])]
# large_polygon.boundary.plot()
small_polygon = gpd.sjoin(gdf_utm, large_polygon).loc[:, gdf_utm.columns].groupby(["FID", "ZONE"]).first().reset_index()
# fmt: on
# some points within geometry of small_polygon
b = small_polygon.total_bounds
POINTS = 10
points = gpd.GeoDataFrame(
geometry=[
shapely.geometry.Point(x, y)
for x, y in zip(
np.random.uniform(*b[[0, 2]], POINTS),
np.random.uniform(*b[[1, 3]], POINTS),
)
],
data={"value": np.arange(POINTS)},
crs="EPSG:4326",
)
# spatial small to large with geometry from large
SmallJoin = gpd.sjoin(small_polygon, large_polygon).merge(
large_polygon["geometry"],
left_on="index_right",
right_index=True,
suffixes=("", "_large"),
)
SmallJoin["area"] = SmallJoin.intersection(gpd.GeoSeries(SmallJoin["geometry_large"])).area
# get sums of area of overlap and sum of values from points
Final = (
SmallJoin.rename(columns={"index_right": "index_large"})
.sjoin(points)
.groupby("index_large")
.agg({"area": "sum", "value": "sum", "geometry_large": "first"})
)
index_large | area | value |
---|---|---|
114 | 24.6382 | 25 |
121 | 90.3565 | 45 |
128 | 0.603031 | 20 |
129 | 7.65999 | 20 |
130 | 10.5284 | 20 |
px.choropleth_mapbox(
Final,
geojson=gpd.GeoSeries(Final["geometry_large"]),
locations=Final.index,
color="value",
hover_data=["area"],
).add_traces(
px.scatter_mapbox(
points,
lat=points.geometry.y,
lon=points.geometry.x,
color="value",
)
.update_traces(marker_coloraxis="coloraxis2", marker_size=10)
.data
).update_layout(
mapbox={
"style": "carto-positron",
"center": {"lon": sum(b[[0, 2]]) / 2, "lat": sum(b[[1, 3]]) / 2},
"zoom": 3,
"layers": [{"source": small_polygon.__geo_interface__, "type": "line"}],
},
coloraxis2={
"colorbar": {"x": -0.1, "title": "scatter"},
"colorscale": [[0, "blue"], [1, "blue"]],
},
coloraxis={"colorscale": [[0, "white"], [1, "green"]]},
margin={"l": 0, "r": 0, "t": 0, "b": 0},
)