I have some issues trying to sum 15 rasters with these dimensions Col = 53241 rows=45598 cell size =30 meters, data type= float32. I used rasterio to perform this operation but I have a problem with memory. Any suggestions?
Thanks in advance
This the code
input=r'data folder path'
output=r'output path'
dir_list = os.listdir(input)
merge_dir=[]
for filename in fnmatch.filter(dir_list,'*.tif'):
merge_dir.append((os.path.join(input, filename)))
map2array=[]
for raster in merge_dir:
with rasterio.open(raster) as src:
map2array.append(src.read().astype(np.float32))
profile=src.profile
profile.update(compress='lzw')
mosaic=np.nansum(map2array,0)
with rasterio.open(output, 'w', **profile) as dst:
# Write to disk
dst.write(mosaic)
Unable to allocate 100.04 GiB for an array with shape (1, 45598, 53241) and data type float32
There are several ways (there are some threads here in Stackoverflow about it) to improve performance and/or memory consumption while read, write and process large rasters. Here an example that uses a library called dask-raster for read/process/write using chunks or window blocks, I like it because its easy to use but you can also implement it in a pure dask:
import rasterio
import dask.array as da
from dask_rasterio import read_raster, write_raster
import os
import fnmatch
input_tif='data folder path'
merge_dir=[]
for filename in fnmatch.filter(os.listdir(input_tif),'*.tif'):
merge_dir.append((os.path.join(input_tif, filename)))
map2array=[]
for raster in merge_dir:
map2array.append(read_raster(raster, band=1, block_size=10))
ds_stack = da.stack(map2array)
with rasterio.open(merge_dir[0]) as src:
profile=src.profile
profile.update(compress='lzw')
write_raster("output.tif", da.nansum(ds_stack,0), **profile)