Search code examples
pythonmultiprocessinguploadazure-blob-storage

Upload files to azure blob with async using python get stuck


I have a directory of images which I want to upload to azure blob. I use the following python code:

    from multiprocessing import Pool
    def upload_file_to_blob(file_path, dest):
         file_name = os.path.basename(file_path)
         blob_client = BlobClient.from_connection_string(connection_string, container_name, os.path.join(dest, file_name))
         with open(file_path, "rb") as data:        
             blob_client.upload_blob(data, overwrite=True)
      
    with Pool(processes=10) as pool:
         for file_ind, file_path in enumerate(file_list):
             pool.apply_async(upload_file_to_blob, args=(file_path, file_dest))
         pool.close()
         pool.join()
    print(f"done")

It seems that all of the files were indeed uploaded, but the program is stuck at finish and the final print is not printed to the terminal. What am I doing wrong?


Solution

  • Using the below code I am able to Upload multiple files from the folder to Azure Blob storage.

    
    import os
    from multiprocessing import Pool
    from azure.storage.blob import BlobClient
    
    # Replace these placeholders with your Azure Blob Storage connection information
    connection_string = ""
    container_name = " "
    local_file_path = " "  # Replace with your local file path
    file_dest = "your_destination_folder"
    
    def upload_file_to_blob(file_path, dest):
        file_name = os.path.basename(file_path)
        blob_client = BlobClient.from_connection_string(connection_string, container_name, os.path.join(dest, file_name))
        with open(file_path, "rb") as data:
            blob_client.upload_blob(data, overwrite=True)
    
    if __name__ == "__main__":
        with Pool(processes=1) as pool:  # Using 1 process for a single file
            result = pool.apply_async(upload_file_to_blob, args=(local_file_path, file_dest))
            result.get()  # Wait for the task to complete
    
        print("done")
    

    Output: enter image description here

    enter image description here

    enter image description here Code:

    import os
    from multiprocessing import Pool
    from azure.storage.blob import BlobClient
    
    # Replace these placeholders with your Azure Blob Storage connection information
    connection_string = ""
    container_name = " "
    local_file_paths = ["C:/Users/Newfolder13.zip", "C:/Users/DotNet6.zip"]  # Put the file paths in a list
    file_dest = "your_destination_folder"
    
    def upload_file_to_blob(file_path, dest):
        file_name = os.path.basename(file_path)
        blob_client = BlobClient.from_connection_string(connection_string, container_name, os.path.join(dest, file_name))
        with open(file_path, "rb") as data:
            blob_client.upload_blob(data, overwrite=True)
    
    if __name__ == "__main__":
        with Pool(processes=10) as pool:  # Use 10 processes for parallel uploads
            for file_path in local_file_paths:
                pool.apply_async(upload_file_to_blob, args=(file_path, file_dest))
            pool.close()
            pool.join()  # Wait for all processes to complete
    
        print("done")
    
    
    • with processes=10

    enter image description here

    enter image description here

    • For more details refer to this SO.