Search code examples
pythonazure-functionsazure-linux

How to convert xlsb to xlsx using Azure Function?


I've developed and successfully tested locally a function that converts xlsb files to xlsx. Once I tried to deploy and run on Azure Portal, I got the following failure Result: Failure Exception: OSError: [Errno 30] Read-only file system: 'TEST.xlsx' So I tried to research and read that due to Azure Function Python being linux based, files could only be saved to temp directory. I tried to modify my function to include temp directory but I get new error Result: Failure Exception: FileNotFoundError: [Errno 2] No such file or directory: '/tmp/TEST.xlsb'. Any suggestions on how I can achieve this result: blob triggered azure function that converts xlsb file (in blob container) to xlsx and saves in blob container? Below are my first attempt and subsequent changes with the temp directory discovery:

import os
import logging
import pandas as pd
#from io import BytesIO
import azure.functions as func
from azure.storage.blob import BlobServiceClient, ContainerClient, BlobClient
 
app = func.FunctionApp()
 
@app.blob_trigger(arg_name="myblob", path="{containerName}/{name}.xlsb",
                               connection="BlobStorageConnectionString")
def blob_trigger(myblob: func.InputStream):
    logging.info(f"Python blob trigger function processed blob"
                f"Name: {myblob.name}"
                f"Blob Size: {myblob.length} bytes")
   
    accountName = "name"
    accountKey = "key"
    connectionString = f"DefaultEndpointsProtocol=https;AccountName={accountName};AccountKey={accountKey};EndpointSuffix=core.windows.net"
   
    containerName = "{containerName}"
    inputBlobname = myblob.name.replace({containerName}, "")
    outputBlobname = inputBlobname.replace(".xlsb", ".xlsx")
 
    blob_service_client = BlobServiceClient.from_connection_string(connectionString)
    container_client = blob_service_client.get_container_client(containerName)
    blob_client = container_client.get_blob_client(inputBlobname)
    blob = BlobClient.from_connection_string(conn_str=connectionString, container_name=containerName, blob_name=outputBlobname)
 
    df = pd.read_excel(blob_client.download_blob().readall(), engine="pyxlsb")
 
    df.to_excel(outputBlobname, index=False)
 
    with open(outputBlobname, "rb") as data:
        blob.upload_blob(data, overwrite=True)
import os
import logging
import pandas as pd
#from io import BytesIO
import azure.functions as func
from azure.storage.blob import BlobServiceClient, ContainerClient, BlobClient

app = func.FunctionApp()

@app.blob_trigger(arg_name="myblob", path="{containerName}/{name}.xlsb",
                               connection="BlobStorageConnectionString") 
def blob_trigger(myblob: func.InputStream):
    logging.info(f"Python blob trigger function processed blob"
                f"Name: {myblob.name}"
                f"Blob Size: {myblob.length} bytes")
    
    accountName = "name"
    accountKey = "key"
    connectionString = f"DefaultEndpointsProtocol=https;AccountName={accountName};AccountKey={accountKey};EndpointSuffix=core.windows.net"
    
    containerName = "{containerName}"
    inputBlobname = myblob.name.replace({containerName}, "")
    localBlobname = "/tmp/" + inputBlobname
    outputBlobname = inputBlobname.replace(".xlsb", ".xlsx")

    blob_service_client = BlobServiceClient.from_connection_string(connectionString)
    container_client = blob_service_client.get_container_client(containerName)
    blob_client = container_client.get_blob_client(inputBlobname)
    blob = BlobClient.from_connection_string(conn_str=connectionString, container_name=containerName, blob_name=outputBlobname)

    df = pd.read_excel(blob_client.download_blob().readall(), engine="pyxlsb")

    df.to_excel("/tmp/" + outputBlobname, index=False)
    ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))

    with open(file = os.path.join(ROOT_DIR, localBlobname), mode="rb") as data:
        blob.upload_blob(data, overwrite=True)

Solution

  • I tried the following code to convert a .xlsb blob to a .xlsx blob in an Azure storage container using the Azure Function app.

    Code :

    import logging
    import pandas as pd
    import azure.functions as func
    from azure.storage.blob import BlobServiceClient
    from io import BytesIO
    
    app = func.FunctionApp()
    @app.blob_trigger(arg_name="myblob", path="<container_name>/<file_name>.xlsb",
                       connection="kamblobstr_STORAGE")
    def blob_trigger(myblob: func.InputStream):
        logging.info(f"Python blob trigger function processed blob"
                     f"Blob Size: {myblob.length} bytes")
    
        accountName = "<storage_name>"
        accountKey = "<strorage_key>"
        connectionString = f"DefaultEndpointsProtocol=https;AccountName={accountName};AccountKey={accountKey};EndpointSuffix=core.windows.net"
    
        containerName = "<container_name>"
        outputBlobname = "<file_name>.xlsx"  
    
        blob_service_client = BlobServiceClient.from_connection_string(connectionString)
        container_client = blob_service_client.get_container_client(containerName)
    
        input_data = myblob.read()
        df = pd.read_excel(BytesIO(input_data), engine="pyxlsb")
    
        output_data = BytesIO()
        df.to_excel(output_data, index=False)
        output_data.seek(0)
    
        blob_client = container_client.get_blob_client(outputBlobname)
        blob_client.upload_blob(output_data.getvalue(), overwrite=True)
    

    local.settings.json :

    {
      "IsEncrypted": false,
      "Values": {
        "AzureWebJobsStorage": "<storage_connec_string>",
        "FUNCTIONS_WORKER_RUNTIME": "python",
        "AzureWebJobsFeatureFlags": "EnableWorkerIndexing",
        "kamblobstr_STORAGE": "<storage_connec_string>"
      }
    }
    

    Output :

    The blob trigger function code is running, and I uploaded the kamb.xlsb file to the Azure blob storage container as shown below:

    enter image description here

    I received the message output: "blob kamb.xlsb converted to kamb.xlsx" as shown below:

     *  Executing task: .venv\Scripts\activate && func host start 
    
    Found Python version 3.10.11 (py).
    
    Azure Functions Core Tools
    Core Tools Version:       4.0.5030 Commit hash: N/A  (64-bit)
    Function Runtime Version: 4.15.2.20177
    
    [2024-03-17T04:00:10.684Z] Host lock lease acquired by instance ID '000000xxxxxxxxxxxx'.
    [2024-03-17T04:00:22.921Z] Worker process started and initialized.
    
    Functions:
    
            blob_trigger: blobTrigger
    
    For detailed output, run func with --verbose flag.
    [2024-03-17T04:00:43.865Z] Executing 'Functions.blob_trigger' (Reason='New blob detected(LogsAndContainerScan): kamcontainer/kamb.xlsb', Id=4c9d45e5xxxxxxxxxxxxxxxx)
    [2024-03-17T04:00:43.870Z] Trigger Details: MessageId: a1416e18xxxxxxxxxxxxxx, DequeueCount: 1, InsertedOn: 2024-03-17T04:00:43.000+00:00, BlobCreated: 2024-03-17T04:00:39.000+00:00, BlobLastModified: 2024-03-17T04:00:39.000+00:00
    [2024-03-17T04:00:44.005Z] Python blob trigger function processed blobBlob Size: None bytes
    [2024-03-17T04:00:47.081Z] Request URL: 'https://kamblobstr.blob.core.windows.net/kamcontainer/kamb.xlsx'
    Request method: 'PUT'
    Request headers:
        'Content-Length': '4976'
        'x-ms-blob-type': 'REDACTED'
        'x-ms-version': 'REDACTED'
        'Content-Type': 'application/octet-stream'
        'Accept': 'application/xml'
        'User-Agent': 'azsdk-python-storage-blob/12.19.1 Python/3.10.11 (Windows-10-10.0.22631-SP0)'
        'x-ms-date': 'REDACTED'
        'x-ms-client-request-id': 'ef51c49exxxxxxxxxxxxxxx'
        'Authorization': 'REDACTED'
    A body is sent with the request
    [2024-03-17T04:00:49.113Z] Response status: 201
    Response headers:
        'Content-Length': '0'
        'Content-MD5': 'REDACTED'
        'Last-Modified': 'Sun, 17 Mar 2024 04:00:49 GMT'
        'ETag': '"0x8DC4636D53FDEE9"'
        'Server': 'Windows-Azure-Blob/1.0 Microsoft-HTTPAPI/2.0'
        'x-ms-request-id': '94236a36xxxxxxxxxxxxxxxxx'
        'x-ms-client-request-id': 'ef51c49exxxxxxxxxxxxxxxxxx'
        'x-ms-version': 'REDACTED'
        'x-ms-content-crc64': 'REDACTED'
        'x-ms-request-server-encrypted': 'REDACTED'
        'Date': 'Sun, 17 Mar 2024 04:00:49 GMT'
    [2024-03-17T04:00:49.167Z] Executed 'Functions.blob_trigger' (Succeeded, Id=4c9d45e5xxxxxxxxxxxxxxx, Duration=6129ms)
    

    enter image description here enter image description here

    Afterward, I successfully deployed my project to the Azure function app, as shown below:

    enter image description here

    I uploaded the kamb.xlsb file to the Azure blob storage container, and it ran successfully in the Function app on the Azure Portal, as shown below:

    enter image description here

    The blob kamb.xlsb was converted to kamb.xlsx in the storage container, as shown below.

    enter image description here

    kamb.xlsx data :

    enter image description here

    Function app Monitor Logs :

    enter image description here