I am testing with the GPT4 OCR Vision (model version: gpt-4 vision preview),
https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#example-request-2
Can you please help with the OCR enhancement with API?
You can use the the following URI path {base_url}/extensions/chat/completions for OCR enhancements.
I'm using the sample below (python) and it's working properly!!! https://learn.microsoft.com/en-us/azure/ai-services/openai/gpt-v-quickstart?tabs=image&pivots=rest-api
# Packages required:
import requests
import json
api_base = '<your_azure_openai_endpoint>'
deployment_name = '<your_deployment_name>'
API_KEY = '<your_azure_openai_key>'
base_url = f"{api_base}openai/deployments/{deployment_name}"
headers = {
"Content-Type": "application/json",
"api-key": API_KEY
}
# Prepare endpoint, headers, and request body
endpoint = f"{base_url}/extensions/chat/completions?api-version=2023-12-01-preview"
data = {
"model": "gpt-4-vision-preview",
"enhancements": {
"ocr": {
"enabled": True
},
"grounding": {
"enabled": True
}
},
"dataSources": [
{
"type": "AzureComputerVision",
"parameters": {
"endpoint": "<your_computer_vision_endpoint>",
"key": "<your_computer_vision_key>"
}
}],
"messages": [
{ "role": "system", "content": "You are a helpful assistant." },
{ "role": "user",
"content": [
{
"type": "text",
"text": "Describe this picture:"
},
{
"type": "image_url",
"image_url": {
"url" : "<URL or base 64 encoded image>"
}
}
]}
],
"max_tokens": 2000
}
# Make the API call
response = requests.post(endpoint, headers=headers, data=json.dumps(data))
print(f"Status Code: {response.status_code}")
print(response.text)