Search code examples

rate_limit_exceeding error even though the request is within rpm limit - azure openai

I am repeatedly (over several days) getting rate_limit_exceeded error when calling gpt model. My understanding is that the error means that I am hitting RPM limit. However, my model deployment allows around 100 RPM while the current RPM is under 10. What is the issue?


from azure.identity import DefaultAzureCredential
from import AIProjectClient


project = AIProjectClient.from_connection_string(

print ("testing AI agent service ")

import os
from import AIProjectClient
from import CodeInterpreterTool
from azure.identity import DefaultAzureCredential
from typing import Any
from pathlib import Path

# Create an Azure AI Client from a connection string, copied from your Azure AI Foundry project.
# At the moment, it should be in the format "<HostName>;<AzureSubscriptionId>;<ResourceGroup>;<ProjectName>"
# HostName can be found by navigating to your discovery_url and removing the leading "https://" and trailing "/discovery"
# To find your discovery_url, run the CLI command: az ml workspace show -n {project_name} --resource-group {resource_group_name} --query discovery_url
# Project Connection example:;12345678-abcd-1234-9fc6-62780b3d3e05;my-resource-group;my-project-name
# Customer needs to login to Azure subscription via Azure CLI and set the environment variables

project_client = project

with project_client:
    # Create an instance of the CodeInterpreterTool
    code_interpreter = CodeInterpreterTool()

    # The CodeInterpreterTool needs to be included in creation of the agent
    agent = project_client.agents.create_agent(
        instructions="You are helpful agent",
    print(f"Created agent, agent ID: {}")

    # Create a thread
    thread = project_client.agents.create_thread()
    print(f"Created thread, thread ID: {}")

    # Create a message
    message = project_client.agents.create_message(,
        content="Could you please create a bar chart for the operating profit using the following data and provide the file to me? Company A: $1.2 million, Company B: $2.5 million, Company C: $3.0 million, Company D: $1.8 million",
    print(f"Created message, message ID: {}")

    # Run the agent
    run = project_client.agents.create_and_process_run(,
    print(f"Run finished with status: {run.status}")

    if run.status == "failed":
        # Check if you got "Rate limit is exceeded.", then you want to get more quota
        print(f"Run failed: {run.last_error}")

    # Get messages from the thread
    messages = project_client.agents.list_messages(
    print(f"Messages: {messages}")

    # Get the last message from the sender
    last_msg = messages.get_last_text_message_by_sender("assistant")
    if last_msg:
        print(f"Last Message: {last_msg.text.value}")

    # Generate an image file for the bar chart
    for image_content in messages.image_contents:
        print(f"Image File ID: {image_content.image_file.file_id}")
        file_name = f"{image_content.image_file.file_id}_image_file.png"
        project_client.agents.save_file(file_id=image_content.image_file.file_id, file_name=file_name)
        print(f"Saved image file to: {Path.cwd() / file_name}")

    # Print the file path(s) from the messages
    for file_path_annotation in messages.file_path_annotations:
        print(f"File Paths:")
        print(f"Type: {file_path_annotation.type}")
        print(f"Text: {file_path_annotation.text}")
        print(f"File ID: {file_path_annotation.file_path.file_id}")
        print(f"Start Index: {file_path_annotation.start_index}")
        print(f"End Index: {file_path_annotation.end_index}")
        project_client.agents.save_file(file_id=file_path_annotation.file_path.file_id, file_name=Path(file_path_annotation.text).name)

    # Delete the agent once done
    print("Deleted agent")



testing AI agent service 
Created agent, agent ID: asst_ZhagK90WpAVEzfYNTD1Thebb
Created thread, thread ID: thread_VXxeHYDgTd1Cl2Jn4pTlc0k2
Created message, message ID: msg_gIvOE2mClicg0FfkfJ8eUC7A
Run finished with status: RunStatus.FAILED
Run failed: {'code': 'rate_limit_exceeded', 'message': 'Rate limit is exceeded. Try again in 86400 seconds.'}
Messages: {'object': 'list', 'data': [{'id': 'msg_gIvOE2mClicg0FfkfJ8eUC7A', 'object': 'thread.message', 'created_at': 1737400903, 'assistant_id': None, 'thread_id': 'thread_VXxeHYDgTd1Cl2Jn4pTlc0k2', 'run_id': None, 'role': 'user', 'content': [{'type': 'text', 'text': {'value': 'Could you please create a bar chart for the operating profit using the following data and provide the file to me? Company A: $1.2 million, Company B: $2.5 million, Company C: $3.0 million, Company D: $1.8 million', 'annotations': []}}], 'attachments': [], 'metadata': {}}], 'first_id': 'msg_gIvOE2mClicg0FfkfJ8eUC7A', 'last_id': 'msg_gIvOE2mClicg0FfkfJ8eUC7A', 'has_more': False}
Deleted agent


enter image description here

Current usage

enter image description here


  • I was wrong . I ended up increasing the quote limit by editing the deployment to make the code work.

    enter image description here