I'm trying to create a tar stream in memory add files to it and then save it to S3. But there is some issue and the files inside the ta have zero size. Can any one please advise? Code snippet below-
def tar_and_upload(bucket, keys, dest_bucket):
s3 = boto3.client('s3')
file_obj = io.BytesIO()
tar_file_obj = tarfile.open(mode = "w:gz", fileobj=file_obj)
response = {}
for key in keys:
obj = s3.get_object(Bucket=bucket, Key=key)
_bytes = obj["Body"].read()
_file_name = key.split("/")[-1]
tar_file_obj.addfile(tarfile.TarInfo(_file_name), _bytes)
tar_file_obj.close()
try:
obj_name = "{}.tar.gz".format(str(uuid.uuid4()))
s3.put_object(Body=file_obj.getvalue(), Bucket=dest_bucket, Key=obj_name)
except Exception as e:
logging.error("Can't save tar to S3", exc_info=True)
return
Okay apparently when adding byte streams to a tar, we need to explicitly specify the size. Sample code-
import tarfile
import uuid
import io
import os
def tar_and_upload():
file_obj = io.BytesIO()
tar_file_obj = tarfile.open(mode = "w:gz", fileobj=file_obj)
for filename in os.listdir("images"):
print(filename)
file_path = os.path.join("images", filename)
#tar_file_obj.add(file_path)
with open(file_path, "rb") as f:
_bytes = f.read()
tar_info = tarfile.TarInfo(filename)
tar_info.size = len(_bytes)
tar_file_obj.addfile(tar_info, io.BytesIO(_bytes))
tar_file_obj.close()
try:
obj_name = "{}.tar.gz".format(str(uuid.uuid4()))
object_path = os.path.join("temp", obj_name)
with open(object_path, "wb") as f:
f.write(file_obj.getvalue())
print(obj_name)
except Exception as e:
print(str(e))
if __name__ == "__main__":
tar_and_upload()