Search code examples
pythonazureazure-blob-storageblob

Download and read blobs from Azure using Python


I would like to download files and read that file from Azure. Container name is: "cont". We have some folders inside container , We need to download from a folder named " special. I this folder, we have 6 different folders, aaa, bbb, ccc, ddd, mmm, ppp. we need to download from mmm and ppp in a specific time period.in this folders files are in the format 'zip'. Example, ppp 2023-01-09 11:00:00.zip. This is my code. In this code only 1 file is downloading from other folder ddd

import os
from datetime import datetime
from dateutil import tz
from azure.storage.blob import BlobServiceClient

start_time_str = input("Enter start time (format: yyyy-mm-dd hh:mm:ss): ")
end_time_str = input("Enter end time (format: yyyy-mm-dd hh:mm:ss): ")

start_time = datetime.fromisoformat(start_time_str).replace(tzinfo=tz.tzlocal())
end_time = datetime.fromisoformat(end_time_str).replace(tzinfo=tz.tzlocal())

print("Start time:", start_time)
print("End time:", end_time)

# Create a BlobServiceClient object
connection_string = <"connection string">
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
container_name = "sp"
container_client = blob_service_client.get_container_client(container_name)

local_path = "C:/Users/aaa/Downloads/"
for blob in container_client.list_blobs():
    blob_client = container_client.get_blob_client(blob)
    blob_props = blob_client.get_blob_properties()
    last_modified = blob_props.last_modified.astimezone(tz.tzlocal())
    if last_modified >= start_time and last_modified <= end_time:
        download_path = os.path.join(local_path, blob.name.split("/")[-1])
        with open(download_path, "wb") as download_file:
            download_stream = blob_client.download_blob()
            download_file.write(download_stream.readall())
        print(f"Downloaded blob: {blob.name}")


Solution

  • This is because you haven't mentioned any specific folder to download the file. After reproducing from my end, I could get this work using the below code.

    import os
    from datetime import datetime
    from dateutil import tz
    from azure.storage.blob import BlobServiceClient
    
    start_time_str = input("Enter start time (format: yyyy-mm-dd hh:mm:ss): ")
    end_time_str = input("Enter end time (format: yyyy-mm-dd hh:mm:ss): ")
    
    start_time = datetime.fromisoformat(start_time_str).replace(tzinfo=tz.tzlocal())
    end_time = datetime.fromisoformat(end_time_str).replace(tzinfo=tz.tzlocal())
    
    print("Start time:", start_time)
    print("End time:", end_time)
    
    # Create a BlobServiceClient object
    connection_string = "<CONNECTION_STRING>"
    blob_service_client = BlobServiceClient.from_connection_string(connection_string)
    container_name = "<CONTAINER_NAME>"
    container_client = blob_service_client.get_container_client(container_name)
    
    # YOUR PATH
    local_path = "C:\\Users\\<PATH>\\Downloads"
    for blob in container_client.list_blobs():
        blob_client = container_client.get_blob_client(blob)
        blob_props = blob_client.get_blob_properties()
        last_modified = blob_props.last_modified.astimezone(tz.tzlocal())
        print(f"{blob.name},{last_modified}")
        # RETRIEVING ONLY THE FOLDER NAME
        if last_modified >= start_time and last_modified <= end_time and blob.name.split('/')[1] in ['mmm','ppp']:
            download_path = os.path.join(local_path, blob.name.split("/")[-1])
            print(download_path)
            with open(download_path, "wb") as download_file:
                download_stream = blob_client.download_blob()
                download_file.write(download_stream.readall())
            print(f"Downloaded blob: {blob.name}")
    

    Results:

    enter image description here