Search code examples
pythonazureonedriveazure-identitymsal

How to upload large files to Onedrive using python


import requests
from msal import PublicClientApplication
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()

# Configuration from environment variables
CLIENT_ID = os.getenv('CLIENT_ID')
# TENANT_ID is not used since we're using the 'consumers' endpoint
CLIENT_SECRET = os.getenv('CLIENT_SECRET')  # This might not be necessary for public clients
USER_ID = os.getenv('USER_ID')

authority_url = 'https://login.microsoftonline.com/consumers'
scopes = ['Files.ReadWrite.All']  # Scope 'https://graph.microsoft.com/.default' might not be needed

app = PublicClientApplication(CLIENT_ID, authority=authority_url)

# The following is a simplistic approach to illustrate the flow
# You need a URL to redirect the user to for login, including the redirect URI
redirect_uri = 'http://localhost:8000/callback'  # Make sure this matches the redirect URI configured in Azure
url = app.get_authorization_request_url(scopes, redirect_uri=redirect_uri)
print("Please go to this URL and sign-in:", url)

# After sign-in, you will receive a callback to the redirect URI with a code
# This part typically happens on your web server which handles the redirect
code = input("Enter the code you received: ")
result = app.acquire_token_by_authorization_code(code, scopes=scopes, redirect_uri=redirect_uri)

if 'access_token' in result:
    access_token = result['access_token']
else:
    print(result.get('error'))
    print(result.get('error_description'))

def upload_file(access_token, file_path, destination_path):
    headers = {
        'Authorization': 'Bearer ' + access_token,
        'Content-Type': 'application/octet-stream'
    }
    with open(file_path, 'rb') as file_data:
        response = requests.put(
            f'https://graph.microsoft.com/v1.0/users/{USER_ID}/drive/root:/{destination_path}:/content',
            headers=headers,
            data=file_data
        )
    return response.json()

I have a personal Microsoft Account and this code uses microsoft GRAPH API to upload my images, and zip files which are at least 2gb to onedrive. However, What I try to do I never get access token to upload. I do not know what I am missing.


Solution

  • To upload large files to OneDrive using Microsoft Graph API in python, make use of createUploadSession API call.

    Initially, I registered one multi-tenant application by selecting below Supported account type:

    enter image description here

    Make sure to enable public client flows option and add redirect URI in Mobile and desktop applications platform as below:

    enter image description here

    In my case, I ran below modified python code to upload large file to OneDrive using Microsoft Graph API:

    import requests
    from msal import PublicClientApplication
    from dotenv import load_dotenv
    import os
    import time
    
    load_dotenv()
    
    # Configuration from environment variables
    CLIENT_ID = os.getenv('CLIENT_ID')
    CLIENT_SECRET = os.getenv('CLIENT_SECRET')
    USER_ID = os.getenv('USER_ID')
    
    authority_url = 'https://login.microsoftonline.com/consumers'
    scopes = ['Files.ReadWrite.All']
    
    app = PublicClientApplication(CLIENT_ID, authority=authority_url)
    
    # Simplistic approach to illustrate the flow
    redirect_uri = 'http://localhost:8000/callback'
    url = app.get_authorization_request_url(scopes, redirect_uri=redirect_uri)
    print("Please go to this URL and sign-in:", url)
    
    # After sign-in, receive a callback with a code
    code = input("Enter the code you received: ")
    result = app.acquire_token_by_authorization_code(code, scopes=scopes, redirect_uri=redirect_uri)
    
    if 'access_token' in result:
        access_token = result['access_token']
        print("Access token acquired.")
    else:
        print(result.get('error'))
        print(result.get('error_description'))
        exit(1)
    
    def create_upload_session(access_token, file_name):
        url = f'https://graph.microsoft.com/v1.0/users/{USER_ID}/drive/root:/{file_name}:/createUploadSession'
        headers = {
            'Authorization': 'Bearer ' + access_token,
            'Content-Type': 'application/json'
        }
        response = requests.post(url, headers=headers)
        response.raise_for_status()
        return response.json()
    
    def upload_file_in_chunks(upload_url, file_path, chunk_size=327680, max_retries=5):
        with open(file_path, 'rb') as file:
            file_size = os.path.getsize(file_path)
            for i in range(0, file_size, chunk_size):
                chunk_data = file.read(chunk_size)
                headers = {
                    'Content-Length': str(len(chunk_data)),
                    'Content-Range': f'bytes {i}-{i + len(chunk_data) - 1}/{file_size}'
                }
                retries = 0
                while retries < max_retries:
                    try:
                        response = requests.put(upload_url, headers=headers, data=chunk_data, timeout=60)
                        response.raise_for_status()
                        uploaded = min(i + len(chunk_data), file_size)
                        print(f'Uploaded {uploaded} out of {file_size} bytes ({uploaded * 100 / file_size:.2f}%)')
                        break
                    except (requests.exceptions.ConnectTimeout, requests.exceptions.ConnectionError) as e:
                        retries += 1
                        print(f"Retrying ({retries}/{max_retries}) due to: {e}")
                        time.sleep(2 ** retries)  # Exponential backoff
                        if retries == max_retries:
                            raise
    
    file_path = 'C:\\Users\\xxxxx\\Downloads\\demofolder.zip'
    file_name = 'DemoFolder.zip'
    
    # Create upload session
    upload_session = create_upload_session(access_token, file_name)
    upload_url = upload_session['uploadUrl']
    print("Upload session created.")
    
    # Upload the file in chunks
    upload_file_in_chunks(upload_url, file_path)
    print("File uploaded successfully.")
    

    When you run above code, you will get authorization URL in console that need to be run in browser:

    enter image description here

    Running above URL in browser asks you to sign in with personal Microsoft account and gives code value in address bar after successful authentication:

    enter image description here

    Now, copy this code value and paste it in console which generates token and uploads file in chunks as below:

    enter image description here

    When I checked the same after some time, I got below response saying "File uploaded successfully":

    enter image description here

    To confirm that, I checked in OneDrive portal where zip file uploaded successfully as below:

    enter image description here