zip file contains tar.gz file. How do I retrieve the members of the tar.gz file without extract to disk first?
abc.zip
|- def.txt
|- ghi.zip
|- jkl.tar.gz
def scan_zip_file(zfile):
l_files = []
with zipfile.ZipFile(zfile, 'r') as zf:
for zname in zf.namelist():
if zname.endswith('.zip'):
with zipfile.ZipFile(io.BytesIO(zf.read(zname))) as zf2:
l_files.extend(zf2.namelist())
elif zname.endswith('.tar.gz'):
pass
else:
l_files.append(zname)
You can use the tarfile module, in a very similar way you used the zipfile module. To complete your code and get the names of files in the tar.gz file:
def scan_zip_file(zfile):
l_files = []
with zipfile.ZipFile(zfile, 'r') as zf:
for zname in zf.namelist():
if zname.endswith('.zip'):
with zipfile.ZipFile(io.BytesIO(zf.read(zname))) as zf2:
l_files.extend(zf2.namelist())
elif zname.endswith('.tar.gz'):
with tarfile.open(fileobj=io.BytesIO(zf.read(zname))) as tf:
l_files.extend(tf.getnames())
else:
l_files.append(zname)
The fileobj
argument for tarfile.open
tells it to use a 'File-like object' which io.BytesIO returns.