Search code examples
azureazure-openaifine-tuning

Deploying and undeploying a fine-tuned model on demand to avoid hourly costs


I have a fine-tuned model on Azure OpenAI that I expect to use perhaps during a few minutes every working day. The rest of the week nobody will use it. We are 3 potential users. Our backend are Azure Functions.

Since you pay $1.7 per hour regardless of actual usage, it can get unnecessarily expensive. What is the best way you can think for deploying it and undeploying it on demand, to avoid costs during inactivity times?

I was thinking of detecting 15 min of inactivity and then sending an undeploy call, but since we use Azure Functions, it is not easy to check inactivity. Also, we don't want to undeploy when one of the 3 people is using it.

Perhaps best to use OpenAI instead of Azure OpenAI, since there is no hourly rate?


Solution

  • For fine tune model, it is mentioned in documentation itself

    it continues to incur an hourly cost regardless of whether you're actively using it

    Workaround in this case would be, to use function app to deploy and delete the deploy based on the conditions.

    First, you need to add diagnostic setting.

    Go to your OpenAI resource, under Monitoring select Diagnostic settings.

    enter image description here

    Next, add below configuration.

    enter image description here

    Logs needed and archive to storage account.

    In storage account the logs will be in path like below.

    insights-logs-requestresponse/resourceId=/SUBSCRIPTIONS/0B361xxxxxB-AE97-yyyy/RESOURCEGROUPS/<resourcegrp>/PROVIDERS/MICROSOFT.COGNITIVESERVICES/ACCOUNTS/OPENAI-SOAUTOMATION/y=2024/m=11/d=18/h=10/m=00/PT1H.json
    

    In PT1H.json the content will be like below.

    { "callerIpAddress": "27.63.242.***", "category": "RequestResponse", "correlationId": "d7d6xyz6d", "durationMs": 835, "event": "ShoeboxCallResult", "location": "eastus", "operationName": "ChatCompletions_Create", "properties": "{\"apiName\":\"Azure OpenAI API version 2024-02-15-preview\",\"requestTime\":638675238320405867,\"requestLength\":242,\"responseTime\":638675238328757797,\"responseLength\":1419,\"objectId\":\"34d06566-4703-4953-9bc3-295e54b1b7a2\",\"streamType\":\"Non-Streaming\",\"modelDeploymentName\":\"gpt35-soautomation\",\"modelName\":\"gpt-35-turbo\",\"modelVersion\":\"0301\"}", "resourceId": "/SUBSCRIPTIONS/0B36xyz75DB0/RESOURCEGROUPS/x_y_z/PROVIDERS/MICROSOFT.COGNITIVESERVICES/ACCOUNTS/OPENAI-SOAUTOMATION", "resultSignature": "200", "time": "2024-11-18T10:54:59.0810000Z", "Tenant": "eastus"}
    { "callerIpAddress": "27.63.242.***", "category": "RequestResponse", "correlationId": "1813d0xxxyyyzzz6e", "durationMs": 91, "event": "ShoeboxCallResult", "location": "eastus", "operationName": "ChatCompletions_Create", "properties": "{\"apiName\":\"Azure OpenAI API version 2024-10-01-preview\",\"requestTime\":638675232368776560,\"requestLength\":313,\"responseTime\":638675232369687659,\"responseLength\":0,\"objectId\":\"\",\"streamType\":\"Streaming\",\"modelDeploymentName\":\"gpt-4o\",\"modelName\":\"gpt-4o\",\"modelVersion\":\"2024-05-13\"}", "resourceId": "/SUBSCRIPTIONS/0B361z_y_z/RESOURCEGROUPS/x_y_z/PROVIDERS/MICROSOFT.COGNITIVESERVICES/ACCOUNTS/OPENAI-SOAUTOMATION", "resultSignature": "200", "time": "2024-11-18T10:43:16.8470000Z", "Tenant": "eastus"}
    

    In this you need to extract latest PT1H.json and max time field in it.

    Below is the sample code you can try, alter it according to your needs.

    import os
    import json
    from datetime import datetime, timedelta
    import pytz
    
    # path till OPENAI-SOAUTOMATION
    base_path = "insights-logs-requestresponse/resourceId=/SUBSCRIPTIONS/0B361xxxxxB-AE97-yyyy/RESOURCEGROUPS/<resourcegrp>/PROVIDERS/MICROSOFT.COGNITIVESERVICES/ACCOUNTS/OPENAI-SOAUTOMATION"
    
    def get_latest_path(base_path):
        # get latest path according to date.
        latest_year = max([int(f.split('=')[1]) for f in os.listdir(base_path) if f.startswith('y=')])
        year_path = os.path.join(base_path, f"y={latest_year}")
    
        latest_month = max([int(f.split('=')[1]) for f in os.listdir(year_path) if f.startswith('m=')])
        month_path = os.path.join(year_path, f"m={latest_month}")
    
        latest_day = max([int(f.split('=')[1]) for f in os.listdir(month_path) if f.startswith('d=')])
        day_path = os.path.join(month_path, f"d={latest_day}")
    
        latest_hour = max([int(f.split('=')[1]) for f in os.listdir(day_path) if f.startswith('h=')])
        hour_path = os.path.join(day_path, f"h={latest_hour}")
    
        latest_minute = max([int(f.split('=')[1]) for f in os.listdir(hour_path) if f.startswith('m=')])
        minute_path = os.path.join(hour_path, f"m={latest_minute}")
    
        return os.path.join(minute_path, "PT1H.json")
    
    def find_latest_time_field(json_path):
     
        with open(json_path, 'r') as file:
            data = json.load(file)
    
        
        times = []
        for record in data:
            time_str = record.get("time") #Default time will be in UTC
            if time_str:
                times.append(datetime.strptime(time_str, "%Y-%m-%dT%H:%M:%S.%fZ"))
        
        # taking latest time
        if times:
            latest_time = max(times)
            return latest_time
        else:
            print("No valid `time` fields found.")
            return None
    
    def check_time_difference(latest_time):
        # Current time in UTC
        current_time = datetime.utcnow()
    
        
        time_difference = current_time - latest_time
        print(f"Current Time (UTC): {current_time}")
        print(f"Latest Time (UTC): {latest_time}")
        print(f"Time Difference: {time_difference}")
    
        
        if time_difference > timedelta(minutes=15):
            print("If greater than 15 mintes delete deployment")
        else:
            print("If time difference is within 15 minutes don't delete")
    
    if __name__ == "__main__":
        
        latest_json_path = get_latest_path(base_path)
        print("Latest JSON Path:", latest_json_path)
    
        latest_time = find_latest_time_field(latest_json_path)
        if latest_time:
            check_time_difference(latest_time)