I want to download the last created file in S3 folder.
Example S3 PATH:
my_Bucket/folder_1/folder_2/folder_3/folder_4/str_str2_2021_03_str3.csv
my_Bucket/folder_1/folder_2/folder_3/folder_4/str_str2_2023_04_str3.csv
my_Bucket/folder_1/folder_2/folder_3/folder_4/str_str2_2022_05_str3.csv
my_Bucket/folder_1/folder_2/folder_3/folder_4/str_str2_2021_05_str3.csv
I should download the last created file, from this list for example the file: str_str2_2023_04_str3.csv
should be downloaded.
I created a method get_file_folders()
that get all the files in the folder_4
:
str_str2_2021_03_str3.csv
str_str2_2023_04_str3.csv
str_str2_2021_03_str3.csv
str_str2_2021_05_str3.csv
session_root = boto3.Session(region_name='eu-west-3', profile_name='my_profile')
s3_client = session_root.client('s3')
def get_file_folders(s3_client, bucket_name, prefix=""):
file_names = []
folders = []
file_name_child = []
biggest_number = []
default_kwargs = {
"Bucket": bucket_name,
"Prefix": prefix
}
next_token = ""
while next_token is not None:
updated_kwargs = default_kwargs.copy()
if next_token != "":
updated_kwargs["ContinuationToken"] = next_token
response = s3_client.list_objects_v2(**default_kwargs)
contents = response.get("Contents")
for result in contents:
key = result.get("Key")
if key[-1] == "/":
folders.append(key)
else:
file_names.append(key)
next_token = response.get("NextContinuationToken")
for file in file_names:
file_name_child = file.split("/")[4]
print(file_name_child)
#for file in file_name_child:
return file_names, folders
I'm looking for the download method to modify it to download only the last created file:
def download_files(s3_client, bucket_name, local_path, file_names, folders):
local_path = Path(local_path)
for folder in folders:
folder_path = Path.joinpath(local_path, folder)
folder_path.mkdir(parents=True, exist_ok=True)
for file_name in file_names:
file_path = Path.joinpath(local_path, file_name)
file_path.parent.mkdir(parents=True, exist_ok=True)
s3_client.download_file(
bucket_name,
file_name,
str(file_path)
)
Do you have please an idea how to modify my download method to download the last created file or do you have another solution?
Thank you
If you are wanting to determine which object has the latest LastModified
date, as opposed to extracting the date from the Key (filename) of the object, then you could use:
import boto3
BUCKET = 'bucket-name-here'
session = boto3.Session(profile_name='dev')
s3_resource = session.resource('s3')
latest = None
for object in s3_resource.Bucket(BUCKET).objects.all():
if (latest is None or object.last_modified > latest) and not object.key.endswith('/'):
latest = object.last_modified
latest_key = object.key
print(latest_key)
target_filename = latest_key.split('/')[-1] # Assume you just want the filename portion
s3_resource.Object(BUCKET, latest_key).download_file(target_filename)