My goal is to extract certain files from Zip archive and stream them straight to another Zip without having to perform intermediate extraction to the disk.
So far I have:
from zipfile import ZipFile, ZIP_DEFLATED
def stream_conents(src_zip, dst_zip, file_subset_list):
with ZipFile(src_zip, "r", compression=ZIP_DEFLATED) as src_zip_archive:
with ZipFile(dst_zip, "w", compression=ZIP_DEFLATED) as dst_zip_archive:
for zitem in src_zip_archive.namelist():
if zitem in file_subset_list:
zitem_object = src_zip_archive.open(zitem)
dst_zip_archive.write(zitem_object, zitem, )
But it just throws TypeError: argument should be string, bytes or integer, not ZipExtFile
You can read the entire file into memory and use writestr
to write the archive.
def stream_conents(src_zip, dst_zip, file_subset_list):
with ZipFile(src_zip, "r", compression=ZIP_DEFLATED) as src_zip_archive:
with ZipFile(dst_zip, "w", compression=ZIP_DEFLATED) as dst_zip_archive:
for zitem in src_zip_archive.namelist():
if zitem in file_subset_list:
# warning, may blow up memory
dst_zip_archive.writestr(zitem,
src_zip_archive.read(zitem))
Starting with python 3.6, ZipFile.open
will open archive files in write mode. That lets you write the file in chunks and reduce overall memory usage.
def stream_conents(src_zip, dst_zip, file_subset_list):
with ZipFile(src_zip, "r", compression=ZIP_DEFLATED) as src_zip_archive:
with ZipFile(dst_zip, "w", compression=ZIP_DEFLATED) as dst_zip_archive:
for zitem in src_zip_archive.namelist():
if zitem in file_subset_list:
if sys.version_info >= (3, 6):
with src_zip_archive.open(zitem) as from_item:
with dst_zip_archive.open(zitem, "w") as to_item:
shutil.copyfileobj(from_item, to_item)
else:
# warning, may blow up memory
dst_zip_archive.writestr(zitem,
src_zip_archive.read(zitem))