I am having following code in AWS lambda to zip S3 files
import zipfile
import boto3
from io import BytesIO
def lambda_handler(event, context):
in_bucket = 'in_bucket'
in_key = 'test/run_id=123/'
out_bucket = 'out_bucket'
out_key = 'test/zip_output/model.zip'
createZipFileStream(in_bucket, in_key, out_bucket, out_key)
def create_zip(in_bucket, in_key, out_bucket, out_key):
s3 = boto3.resource('s3')
bucket = s3.Bucket(in_bucket)
files_collection = bucket.objects.filter(Prefix=in_key).all()
archive = BytesIO()
with zipfile.ZipFile(archive, 'w', zipfile.ZIP_DEFLATED) as zip_archive:
for f in files_collection:
with zip_archive.open(f.key, 'w') as file1:
file1.write(f.get()['Body'].read())
archive.seek(0)
s3.Object(out_bucket, out_key).upload_fileobj(archive)
archive.close()
Above code though work successfully the zip contains entire S3 path. I would like to have only the file objects in the zip and not the path location. For example, if the input path s3://bucket/key/file.csv the zip has folder key and then file.csv and I am looking for a solution to have only the file object.
Thank you
It appears that this line:
with zip_archive.open(f.key, 'w') as file1:
is telling the Zip what to name the file.
Therefore, you could change it like this:
with zip_archive.open(f.key.split('/')[-1], 'w') as file1:
That will give it only the characters found after the last slash.