I have two Binary I/O stream (both inherit of BufferedIOBase), which represent two tar archives compressed with gzip algorithm.
Is there any effecient way to create a third one which is the combination of the two others ?
I tried by converting both stream to tarfile.Tarfile
through the fileobj
arguments and adding each member to a third one :
def merge_environment(a: Optional[BinaryIO], b: Optional[BinaryIO]) -> Optional[BinaryIO]:
"""Merge <a> and <b>, returning a new tarfile.TarFile object.
If two files in <a> and <b> have the same name, the one in <a> prevails."""
destio = io.BytesIO()
with tarfile.open(fileobj=a, mode="r:gz") as t1, \
tarfile.open(fileobj=b, mode="r:gz") as t2, \
tarfile.open(fileobj=destio, mode="w:gz") as dest:
t1_members = [m for m in t1.getmembers() if m.name != ""]
t1_names = [m.name for m in t1.members]
t2_members = [m for m in t1.getmembers() if m.name != "" and m.name not in t1_names]
for member in t1_members:
dest.addfile(member, t1.extractfile(member))
for member in t2_members:
dest.addfile(member, t2.extractfile(member))
destio.seek(0, 0)
return destio
But this doesn't work if any of the tar.gz contain directory since tarfile.extractfile()
does not work on directory.
Any solution ?
The solution was quite simple, fileobj
argument of tarfile.addfile
is not mandatory, tarinfo
is enough :
destio = io.bytesIO()
with tarfile.open(fileobj=a, mode="r:gz") as t1, \
tarfile.open(fileobj=b, mode="r:gz") as t2, \
tarfile.open(fileobj=destio, mode="w:gz") as dest:
t1_members = [m for m in t1.getmembers()]
t1_names = t1.getnames()
t2_members = [m for m in t2.getmembers() if m.name not in t1_names]
for member in t1_members:
if member.isdir():
dest.addfile(member)
else:
dest.addfile(member, t1.extractfile(member))
for member in t2_members:
if member.isdir():
dest.addfile(member)
else:
dest.addfile(member, t2.extractfile(member))