Search code examples
pythonazureazure-cognitive-services

Read text from a gif image using Azure Cognitive services


azure API is working for .jpg images but when i tried for gif image it show Operation returned an invalid status code 'Bad Request'

print("===== Read File - remote =====")
# Get an image with text
read_image_url = "https://ci6.googleusercontent.com/proxy/5NB2CkeM22wqFhiQSmRlJVVinEp3o2nEbZQcy6_8CCKlKst_WW25N0PcsPaYiWAASXO52hufvUAEimUd3IreGowknEXy322x5oYG3lzkBGyctLI0M3eH_w-qHH9qPqtobjpGYooM7AvyNX2CCZtcnEgu8duKlee2GGaswg=s0-d-e1-ft#https://image.e.us.partycity.com/lib/fe301570756406747c1c72/m/10/93d08fa0-c760-4d8b-8e35-ddd5308ec311.gif"

# Call API with URL and raw response (allows you to get the operation location)
read_response = computervision_client.read(read_image_url,  raw=True)

Solution

  • You CANNOT send a gif directly to azure read api because the documentation states below:

    Request body
    
    Input passed within the POST body. Supported input methods: raw image binary or image URL. 
    
    Input requirements:
    
    Supported image formats: JPEG, PNG, BMP, PDF and TIFF.
    Please do note MPO (Multi Picture Objects) embedded JPEG files are not supported.
    For multi-page PDF and TIFF documents:
    For the free tier, only the first 2 pages are processed.
    For the paid tier, up to 2,000 pages are processed.
    Image file size must be less than 50 MB (4 MB for the free tier).
    The image/document page dimensions must be at least 50 x 50 pixels and at most 10000 x 10000 pixels.
    

    To handle the gif you need to convert into png and then send a raw binary image for recognition as shown below:

    import glob
    import time
    
    import requests
    from PIL import Image
    
    endpoint = 'https://NAME.cognitiveservices.azure.com/'
    subscription_key = 'SUBSCRIPTION_KEY'
    
    read_url = endpoint + "vision/v3.2/read/analyze"
    
    uri = 'https://ci6.googleusercontent.com/proxy/5NB2CkeM22wqFhiQSmRlJVVinEp3o2nEbZQcy6_8CCKlKst_WW25N0PcsPaYiWAASXO52hufvUAEimUd3IreGowknEXy322x5oYG3lzkBGyctLI0M3eH_w-qHH9qPqtobjpGYooM7AvyNX2CCZtcnEgu8duKlee2GGaswg=s0-d-e1-ft#https://image.e.us.partycity.com/lib/fe301570756406747c1c72/m/10/93d08fa0-c760-4d8b-8e35-ddd5308ec311.gif'
    with open('/tmp/pr0n.gif', 'wb') as f:
        f.write(requests.get(uri).content)
    
    gif='/tmp/pr0n.gif'
    img = Image.open(gif)
    img.save(gif+".png",'png', optimize=True, quality=70)
    
    
    for filename in sorted(glob.glob("/tmp/pr0n.gif*.png")):
    
        # Read the image into a byte array
        image_data = open(filename, "rb").read()
        headers = {'Ocp-Apim-Subscription-Key': subscription_key, 'Content-Type': 'application/octet-stream'}
        params = {'visualFeatures': 'Categories,Description,Color'}
        response = requests.post(read_url, headers=headers, params=params, data=image_data)
        response.raise_for_status()
    
        # The recognized text isn't immediately available, so poll to wait for completion.
        analysis = {}
        poll = True
    
        while poll:
            response_final = requests.get(response.headers["Operation-Location"], headers=headers)
            analysis = response_final.json()
            time.sleep(1)
            if "analyzeResult" in analysis:
                poll = False
    
            if "status" in analysis and analysis['status'] == 'failed':
                poll = False
    
        polygons = []
        if ("analyzeResult" in analysis):
            # Extract the recognized text, with bounding boxes.
            print(analysis["analyzeResult"]["readResults"][0])