First time I work with zipfiles in Python :-/
The task at hand is the following (main requirement is not writing anything to disc)
given this url: http://shapefiles.fews.net.s3.amazonaws.com/ALL_HFIC.zip
Africa
in the file name.This is the structure of the code I have so far - but I keep getting an attribute error
AttributeError: 'ZipFile' object has no attribute 'seek'
import io
import zipfile
import pandas as pd
import geopandas as gpd
# util funcs
is_africa = lambda string: "Africa" in string
is_shape = lambda string: string.endswith('shp')
# get_zip() defined in module
filebytes = io.BytesIO(get_zip(url=URL).content)
# get the zipfile object
myzipfile = zipfile.ZipFile(filebytes)
# instantiate empty list where to store the shapefiles of interest.
shapefiles = []
# below code adapted from: https://stackoverflow.com/questions/4917284/
with zipfile.ZipFile(zip_file, 'r') as zf:
for file_name in zf.namelist():
if is_africa(file_name) and is_shape(file_name):
data = zf.read(file_name)
shapefiles.append(data)
# below code adapted from https://stackoverflow.com/questions/48874113/
gdf_africa = gpd.GeoDataFrame(pd.concat([gpd.read_file(i) for i in shapefiles],
ignore_index=True),
crs=gpd.read_file(shapefiles[0]).crs)
gdf_africa.to_file("output.json", driver="GeoJSON")
This code requests the ZipFile from the URL, reads the ZipFile into a stream and extract the names of the ShapeFiles for Africa.
from zipfile import ZipFile
import requests
# util funcs
is_africa = lambda string: "Africa" in string
is_shape = lambda string: string.endswith('shp')
# instantiate empty list where to store the shapefiles of interest.
africa_data = []
response = requests.get('http://shapefiles.fews.net.s3.amazonaws.com/ALL_HFIC.zip')
with ZipFile(io.BytesIO(response.content)) as zf:
for file_name in zf.namelist():
if is_africa(file_name) and is_shape(file_name):
print(file_name)
# Output
ALL_HFIC/ALL_HFIC/East Africa/EA_200907_CS.shp
ALL_HFIC/ALL_HFIC/East Africa/EA_200910_CS.shp
ALL_HFIC/ALL_HFIC/East Africa/EA_201001_CS.shp
ALL_HFIC/ALL_HFIC/East Africa/EA_201004_CS.shp
I have never worked with ShapeFiles or geopandas. I have spent the last 4 hours trying to understand how to use these. I was able to output a JSON file, but I'm unsure if the data within this JSON file meets your needs.
# util funcs
is_africa = lambda string: "Africa" in string
is_shape = lambda string: string.endswith('shp')
# instantiate empty list where to store the shapefiles of interest.
africa_data = []
response = requests.get('http://shapefiles.fews.net.s3.amazonaws.com/ALL_HFIC.zip')
with ZipFile(io.BytesIO(response.content)) as zf:
for file_name in zf.namelist():
if is_africa(file_name) and is_shape(file_name):
reader = shapefile.Reader(file_name)
fields = reader.fields[1:]
field_names = [field[0] for field in fields]
for sr in reader.shapeRecords():
atr = dict(zip(field_names, sr.record))
geom = sr.shape.__geo_interface__
africa_data.append(dict(type="Feature", geometry=geom, properties=atr))
geojson = open("african_geo_data.json", "w")
geojson.write(dumps({"type": "FeatureCollection", "features": africa_data}, indent=2) + "\n")
geojson.close()
Sample Out from the JSON file:
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": {
"type": "MultiPolygon",
"coordinates": [
[
[
[
40.213226318000125,
-10.277393340999765
],
[
40.21355056800013,
-10.279667853999932
],
[
40.21699915800019,
-10.27847569599988
]
},
"properties": {
"CS": 4.0,
"HA0": 0.0
}
}
]
}