Search code examples
python-3.xamazon-web-servicesamazon-s3boto3

download the last created file in S3 folder


I want to download the last created file in S3 folder.

Example S3 PATH:

my_Bucket/folder_1/folder_2/folder_3/folder_4/str_str2_2021_03_str3.csv
my_Bucket/folder_1/folder_2/folder_3/folder_4/str_str2_2023_04_str3.csv
my_Bucket/folder_1/folder_2/folder_3/folder_4/str_str2_2022_05_str3.csv
my_Bucket/folder_1/folder_2/folder_3/folder_4/str_str2_2021_05_str3.csv

I should download the last created file, from this list for example the file: str_str2_2023_04_str3.csv should be downloaded.

I created a method get_file_folders() that get all the files in the folder_4:

str_str2_2021_03_str3.csv
str_str2_2023_04_str3.csv
str_str2_2021_03_str3.csv
str_str2_2021_05_str3.csv
session_root = boto3.Session(region_name='eu-west-3', profile_name='my_profile')
s3_client = session_root.client('s3')

def get_file_folders(s3_client, bucket_name, prefix=""):
    file_names = []
    folders = []
    file_name_child = []
    biggest_number = []

    default_kwargs = {
        "Bucket": bucket_name,
        "Prefix": prefix
    }
    next_token = ""

    while next_token is not None:
        updated_kwargs = default_kwargs.copy()
        if next_token != "":
            updated_kwargs["ContinuationToken"] = next_token

        response = s3_client.list_objects_v2(**default_kwargs)
        contents = response.get("Contents")

        for result in contents:
            key = result.get("Key")
            if key[-1] == "/":
                folders.append(key)
            else:
                file_names.append(key)
            

        next_token = response.get("NextContinuationToken")
        for file in file_names:
            file_name_child = file.split("/")[4]
            print(file_name_child)
            
            #for file in file_name_child:     
            
            
    return file_names, folders

I'm looking for the download method to modify it to download only the last created file:

def download_files(s3_client, bucket_name, local_path, file_names, folders):

    local_path = Path(local_path)

    for folder in folders:
        folder_path = Path.joinpath(local_path, folder)
        folder_path.mkdir(parents=True, exist_ok=True)

    for file_name in file_names:
        file_path = Path.joinpath(local_path, file_name)
        file_path.parent.mkdir(parents=True, exist_ok=True)
        s3_client.download_file(
            bucket_name,
            file_name,
            str(file_path)
        )

Do you have please an idea how to modify my download method to download the last created file or do you have another solution?

Thank you


Solution

  • If you are wanting to determine which object has the latest LastModified date, as opposed to extracting the date from the Key (filename) of the object, then you could use:

    import boto3
    
    BUCKET = 'bucket-name-here'
    
    session = boto3.Session(profile_name='dev')
    s3_resource = session.resource('s3')
    
    latest = None
    
    for object in s3_resource.Bucket(BUCKET).objects.all():
        if (latest is None or object.last_modified > latest) and not object.key.endswith('/'):
            latest = object.last_modified
            latest_key = object.key
    
    print(latest_key)
    
    target_filename = latest_key.split('/')[-1] # Assume you just want the filename portion
    s3_resource.Object(BUCKET, latest_key).download_file(target_filename)