Search code examples
pythonpython-3.xarchivezip

Extract file from ZIP straight to another ZIP


My goal is to extract certain files from Zip archive and stream them straight to another Zip without having to perform intermediate extraction to the disk.

So far I have:

from zipfile import ZipFile, ZIP_DEFLATED


def stream_conents(src_zip, dst_zip, file_subset_list):
    with ZipFile(src_zip, "r", compression=ZIP_DEFLATED) as src_zip_archive:
        with ZipFile(dst_zip, "w", compression=ZIP_DEFLATED) as dst_zip_archive:
            for zitem in src_zip_archive.namelist():
                if zitem in file_subset_list:
                    zitem_object = src_zip_archive.open(zitem)
                    dst_zip_archive.write(zitem_object, zitem, )

But it just throws TypeError: argument should be string, bytes or integer, not ZipExtFile


Solution

  • You can read the entire file into memory and use writestr to write the archive.

    def stream_conents(src_zip, dst_zip, file_subset_list):
        with ZipFile(src_zip, "r", compression=ZIP_DEFLATED) as src_zip_archive:
            with ZipFile(dst_zip, "w", compression=ZIP_DEFLATED) as dst_zip_archive:
                for zitem in src_zip_archive.namelist():
                    if zitem in file_subset_list:
                        # warning, may blow up memory
                        dst_zip_archive.writestr(zitem,
                            src_zip_archive.read(zitem))
    

    Starting with python 3.6, ZipFile.open will open archive files in write mode. That lets you write the file in chunks and reduce overall memory usage.

    def stream_conents(src_zip, dst_zip, file_subset_list):
        with ZipFile(src_zip, "r", compression=ZIP_DEFLATED) as src_zip_archive:
            with ZipFile(dst_zip, "w", compression=ZIP_DEFLATED) as dst_zip_archive:
                for zitem in src_zip_archive.namelist():
                    if zitem in file_subset_list:
                        if sys.version_info >= (3, 6):
                            with src_zip_archive.open(zitem) as from_item:
                                with dst_zip_archive.open(zitem, "w") as to_item:
                                    shutil.copyfileobj(from_item, to_item)
                        else:
                            # warning, may blow up memory
                            dst_zip_archive.writestr(zitem, 
                                src_zip_archive.read(zitem))