Search code examples
pythonazure-functionsazure-sql-databaseazure-blob-storage

How to bulk insert CSV from Azure Blob to Azure Sql using Azure Function v2 (python)


I have the following code below that inserts one line into an azure sql DB. How would I modify this code to bulk insert from Azure Blob (csvs) to Azure Sql using Azure Function v2 (python)?

import azure.functions as func
import logging
from azure.functions.decorators.core import DataType
import uuid

app = func.FunctionApp()

@app.function_name(name="HttpTrigger1")
@app.route(route="hello", auth_level=func.AuthLevel.ANONYMOUS)
@app.generic_output_binding(arg_name="toDoItems", type="sql", CommandText="dbo.ToDo", ConnectionStringSetting="SqlConnectionString",
    data_type=DataType.STRING)
def test_function(req: func.HttpRequest, toDoItems: func.Out[func.SqlRow]) -> func.HttpResponse:
        item_id = str(uuid.uuid4())  # Convert UUID to string
        toDoItems.set(func.SqlRow({"Id": item_id, "title": "name", "completed": False, "url": "www"}))
        return func.HttpResponse(f"Hello John")

Solution

  • To bulk insert data from Azure Blob csv file into Azure Sql database, I have used Azure Blob trigger v2 function and pyodbc .

    You can use the code given below-

    import azure.functions as func
    import logging
    import os
    import pyodbc
    import csv
    
    app = func.FunctionApp()
    
    @app.blob_trigger(arg_name="myblob", path="demo-container",
                                   connection="afrinstore01_STORAGE") 
    def blob_trigger(myblob: func.InputStream):
        logging.info(f"Python blob trigger function processed blob"
                    f"Name: {myblob.name}"
                    f"Blob Size: {myblob.length} bytes")
        conn_str = os.environ['SqlConnectionString']
        conn = pyodbc.connect(conn_str)
        cursor = conn.cursor()
        reader = csv.reader(myblob.read().decode('utf-8').splitlines())
        next(reader)
    
        # Insert the rows into the database
        rows = [(row[0], row[1], row[2], row[3]) for row in reader]
        cursor.executemany("INSERT INTO dbo.ToDo (Id, title, completed, url) VALUES (?, ?, ?, ?)", rows)
    
        conn.commit()
        cursor.close()
        conn.close()
    

    I have added the connection string as shown below-

    local.settings.json-

    {
      "IsEncrypted": false,
      "Values": {
        "AzureWebJobsStorage": "UseDevelopmentStorage=true",
        "FUNCTIONS_WORKER_RUNTIME": "python",
        "AzureWebJobsFeatureFlags": "EnableWorkerIndexing",
        "afrinstore01_STORAGE": "DefaultEndpointsProtocol=https;AccountName=afrinstore01;AccountKey=cRWkg***********;EndpointSuffix=core.windows.net",
        "SqlConnectionString": "Driver={ODBC Driver 18 for SQL Server};Server=tcp:{serverName}.database.windows.net,1433;Database={databaseName};Uid={userName};Pwd={password};Encrypt=yes;TrustServerCertificate=no;Connection Timeout=120;"
      }
    }
    

    requirements.txt-

    azure-functions
    pyodbc
    

    I have the following data in the csv file.

    enter image description here

    Using the given code, I am able to insert the rows into the Azure SQL database.

    enter image description here

    enter image description here

    UPDATE-

    Inserting data from Azure Blob Storage to Azure SQL Database using BULK INSERT command.

    1. Create a database scoped credential. If master key is not created then create it too.
    CREATE  MASTER  KEY ENCRYPTION BY  PASSWORD='<EnterStrongPasswordHere>';
     
    -- Create a database scoped credential.  
    CREATE  DATABASE SCOPED CREDENTIAL UploadSQLData WITH  IDENTITY = 'SHARED ACCESS SIGNATURE', SECRET = 'sv=2024-04-23&ss=b&srt=sco&sp=rwdlac&se=2025-05-24T02:25:19Z&st=2024-04-23T18:25:19Z&spr=https&sig=KS51************D';
    
    1. Create an external data source by referring to an Azure Storage account and use the same credential which is created in Step 1.
    CREATE EXTERNAL DATA SOURCE Upload_Blob_To_SQLDB
    WITH (
        TYPE = BLOB_STORAGE,
        LOCATION = 'https://{storageName}.blob.core.windows.net',
        CREDENTIAL = UploadSQLData
    );
    
    1. I have created the below function to insert the data.
    import logging
    import os
    import pyodbc
    import azure.functions as func
    
    app = func.FunctionApp()
    
    @app.blob_trigger(arg_name="myblob", path="demo-container", connection="afrinstore01_STORAGE")
    def blob_trigger(myblob: func.InputStream) -> None:
        conn = pyodbc.connect(os.environ["SqlConnectionString"])
        cursor = conn.cursor()
        bulk_insert_query = f"BULK INSERT dbo.ToDo FROM '{myblob.name}' WITH (FORMAT = 'CSV', DATA_SOURCE = 'Upload_Blob_To_SQLDB')"
        cursor.execute(bulk_insert_query)
        conn.commit()
    
        logging.info(f"Processed blob\n"
                     f"Name: {myblob.name}\n"
                     f"Blob Size: {myblob.length} bytes")
    

    By using the above approach, I am able to get the expected response.