Search code examples
pythonexceldataframeazure-blob-storage

Using Python, how to update an excel sheet on Azure Blob Storage while maintaining existing data in the sheet?


I'm trying to update an existing Excel worksheet on an Azure Blob Storage. The problem is that the existing data in the sheet gets wiped out. I want to maintain the existing data within the spreadsheet. Here is my code:

from azure.storage.blob import BlobServiceClient
import pandas as pd
import logging
import openpyxl as xl
import io
def update_excel():
    try:
        #Azure Blob Storage Configuration
        account_name = 'account_name'
        account_key = 'account_key'
        connect_str = 'DefaultEndpointsProtocol=https;AccountName=' + account_name + ';AccountKey=' + account_key + ';EndpointSuffix=core.windows.net'

        #create a client to interact with blob storage
        blob_service_client = BlobServiceClient.from_connection_string(connect_str)

        # container info
        container = 'container_name'
        directory = 'directory_name'
 
        #use the client to connect to the container
        container_client = blob_service_client.get_container_client(container)

        #excel info
        excel_name = 'Excel_file.xlsx'
        excel_name_new = 'Excel_file_new.xlsx'
        sheet_name = 'Sheet1'
        blob_name = directory + '/' + excel_name
        new_blob_name = directory + '/' + excel_name_new
        
        #download blob
        blob_client = blob_service_client.get_blob_client(container=container, blob=blob_name)
        downloaded_blob = blob_client.download_blob()

        #load excel
        wb1 = xl.load_workbook(filename=io.BytesIO(downloaded_blob.read()),keep_vba=False)
        ws = wb1[sheet_name]
        max_row = ws.max_row

        #create dataframe
        data = {'Name': ['Tom', 'nick', 'krish', 'jack'],
                'Age': ['20', '30', '40', '50']
             }
        df = pd.DataFrame(data)
        
        #upload excel
        output = io.BytesIO()
        df.to_excel(output, sheet_name = sheet_name, startrow = max_row, startcol=1, engine='openpyxl', header=False, index=False) 
        output.seek(0)   
        container_client.upload_blob(name=new_blob_name, data=output, overwrite=True)
    except Exception as e:
        logging.error(f"Error: {e}")

Is there a way I can modify the code so it maintains the existing data in the worksheet while updating?


Solution

  • Using Python, how to update an excel sheet on Azure Blob Storage while maintaining existing data in the sheet?

    In my Azure Storage Account, I had file with name sample_data.xlsx with below record.

    Record:

    enter image description here

    You can use the below code that will update the sheet with records and maintaining existing data in the sheet using python.

    Code:

    from azure.storage.blob import BlobServiceClient
    import pandas as pd
    import logging
    import openpyxl as xl
    import io
    
    def update_excel():
        try:
            # Azure Bl
            connect_str = "xxx"
    
            blob_service_client = BlobServiceClient.from_connection_string(connect_str)
            container = 'xxx'
            directory = 'xxx'
            container_client = blob_service_client.get_container_client(container)
            excel_name = 'sample_data.xlsx'
            blob_name = directory + '/' + excel_name
            blob_client = blob_service_client.get_blob_client(container=container, blob=blob_name)
            downloaded_blob = blob_client.download_blob()
    
            # load excel
            wb = xl.load_workbook(filename=io.BytesIO(downloaded_blob.read()), keep_vba=False)
            sheet_name = 'Sheet1'
            
            # List sheet names for verification
            print(f"Available sheets: {wb.sheetnames}")
            
            if sheet_name not in wb.sheetnames:
                logging.error(f"Sheet {sheet_name} not found.")
                return
            
            ws = wb[sheet_name]
            max_row_before = ws.max_row
            print(f"Max row before appending: {max_row_before}")
            
            # create new data
            data = {'Name': ['Dhoni', 'Sachin', 'Virat', 'Yuvaraj'],
                    'Age': [7, 10, 18, 12],
                    'City': ['Chennai', 'Mumbai', 'Delhi', 'Punjab']
                   }
            df = pd.DataFrame(data)
    
            # Add new rows to the worksheet without removing existing data
            for index, row in df.iterrows():
                ws.append(row.tolist())  # Appends row to the existing sheet
            
            # Check the row count after appending
            max_row_after = ws.max_row
            print(f"Max row after appending: {max_row_after}")
    
            # save the workbook to a BytesIO object
            output = io.BytesIO()
            wb.save(output)
            output.seek(0)
            container_client.upload_blob(name=blob_name, data=output, overwrite=True)
            logging.info("Excel file updated successfully.")
            
        except Exception as e:
            logging.error(f"Error: {e}")
    
    update_excel()
    

    Output:

    Available sheets: ['Sheet1']
    Max row before appending: 11
    Max row after appending: 15
    

    enter image description here

    Records:

    The above code executed and updated the data in the Azure blob storage.

    enter image description here