I'm writing a script for extracting zip/tar files just by Python3 on Windows. it works ok normally, but with some TAR/ZIP files from Linux with colon (":") in filenames, only empty file with names before ":" part was created.
is there some way to compact these files with ":" in name by python on windows? (e.g. replace ":" with "_" after successfully extract files?)
Here's my testing script code:
import zipfile
import tarfile
uri="/path/to/file.zip"
file = zipfile.ZipFile(uri, 'r')
file.extractall(uri[:-4])
uri="/path/to/file.tar"
file = tarfile.open(uri, 'r')
file.extractall(uri[:-4])
Instead of extracting all files usign extractall()
you can iterate over each item, replace invalid character in the filename and then extract them to the desired lovation:
Implementation for ZIP files:
import zipfile
import os
def sanitize_filename(filename)
return filename.replace(":", "_")
uri = "/path/file.zip"
output_dir = uri[:-4]
with zipfile.ZipFile(uri, 'r') as zip_file:
for member in zip_file.namelist():
filename = sanitize_filename(member)
source = zip_file.open(member)
target = os.path.join(output_dir, filename)
os.makedirs(os.path.dirname(target), exist_ok=True)
with open(target, "wb") as target_file:
target_file.write(source.read())
Implementation for TAR files:
import tarfile
import os
def sanitize_filename(filename):
return filename.replace(":", "_")
uri = "/path/file.zip"
output_dir = uri[:-4]
with tarfile.open(uri, 'r') as tar_file:
for member in tar_file.getmembers():
filename = sanitize_filename(member.name)
source = tar_file.extractfile(member)
target = os.path.join(output_dir, filename)
os.makedirs(os.path.dirname(target), exist_ok=True)
if source is not None:
with open(target, "wb") as target_file:
target_file.write(source.read())