Search code examples
python-zipfile

How do I use Python 3.12 zipfile to extract all the data in a complex zip file


I'm using this code:

with ZipFile(sourceZip, mode="r") as extraction:
    extraction.extractall(extractionPath)
    extraction.close()

What happens is it only extracts the first layer of Zip files in the sourceZip to extractionPath.

I have read "https://docs.python.org/3/library/zipfile.html#" and I need to get the code to extract all second layer of zip files.


Solution

  • Recursively extract the archive file contents until all nested levels have been extracted. Iterate through the contents of each zip file, detecting if a file is a zip archive, and then extracting the contents.

    import os
    import zipfile
    
    def extract_zipfile_recursive(zip_file_path, extract_to):
        """
        Recursively extract the contents of a zip file and any nested zip files.
        zip_file_path: Path to the zip file.
        extract_to: Directory where extracted files should be saved.
        """
        with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
            # Extract all contents of the zip file to the extract_to directory
            zip_ref.extractall(extract_to)
    
            # Iterate through each extracted item (files and directories)
            for item in zip_ref.infolist():
                # Check if the item is a directory or a file
                if item.is_dir():
                    # Recursively extract contents of nested zip files in this directory
                    dir_path = os.path.join(extract_to, item.filename)
                    extract_zipfile_recursive(os.path.join(extract_to, item.filename), dir_path)
                elif item.filename.lower().endswith(".zip"):
                    # The item is a zip file, recursively extract its contents
                    nested_zip_path = os.path.join(extract_to, item.filename)
                    nested_extract_to = os.path.join(extract_to, os.path.splitext(item.filename)[0])
                    extract_zipfile_recursive(nested_zip_path, nested_extract_to)
    
    # Example usage:
    zip_file_path = "path/to/your/nested.zip"
    extract_to_directory = "path/to/extracted/files"
    
    # Create the directory if it doesn't exist
    os.makedirs(extract_to_directory, exist_ok=True)
    
    # Extract contents of the nested zip file recursively
    extract_zipfile_recursive(zip_file_path, extract_to_directory)