Search code examples
pythonpython-3.xazure-data-lakeazure-data-lake-gen2

How do you download a file from Azure Data Lake when you know the type of the file but not the name?


I can run the following to download the file "some/path/known_name.json"

def download_file():
    try:
        file_system_client = FileSystemClient.from_connection_string(...)

        full_file_location = "some/path/known_name.json"
        target_file_client = file_system_client.get_file_client(full_file_location)

        download=target_file_client.download_file()
        downloaded_bytes = download.readall()
        local_file = open('my_file.json','wb')
        local_file.write(downloaded_bytes)
        local_file.close()

    except Exception as e:
        print(e)

My question is: how do I download from some other path when the name of the file is unknown but the file type is known e.g. "different/path/xxx.json"


Solution

  • You can list the blobs in the container and then filter the json files by the blob.name.

    Here is my blobs in my test container:

    enter image description here

    Here is my python code:

    import os, uuid
    from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
    
    try:
        # environment variable into account.
        connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
    
        # Create the BlobServiceClient object which will be used to create a container client
        blob_service_client = BlobServiceClient.from_connection_string(connect_str)
    
        # Create a unique name for the container
        container_name = "test"     
    
        # Create the container
        container_client = blob_service_client.get_container_client(container_name)
    
        # List the blobs in the container
        local_path = "./data"
        blob_list = container_client.list_blobs()
        for blob in blob_list:
            if('.json' in blob.name) :
                local_file_name = blob.name
                blob_client = blob_service_client.get_blob_client(container=container_name, blob=local_file_name)
                download_file_path = os.path.join(local_path, local_file_name)
                print("\nDownloading blob to \n\t" + local_path)
            
                with open(download_file_path, "wb") as download_file:
                    download_file.write(blob_client.download_blob().readall())
                print("\t" + blob.name)
    
    except Exception as ex:
        print('Exception:')
        print(ex)
    
    

    When I run the code, it will download the data.json and data2.json.
    enter image description here