Search code examples
pythongoogle-cloud-visionbatch-request

Google Cloud Vision API batch Image Annotater


I have a directory with almost 200 images from which I want to get the image properties using the google-cloud-vision API.

I have a problem making a batch request for those images. The code posted is a tutorial from the official site. But it's using gs:// as source for it's images.

I want to use my local directory, but can't figure how to do it. The second is what I have wrote. I have tried to load the images with an imageLoader but failed.

Please help!

from google.cloud import vision_v1

def sample_async_batch_annotate_images(
    input_image_uri="gs://cloud-samples-data/vision/label/wakeupcat.jpg",
    output_uri="gs://your-bucket/prefix/",
):
    """Perform async batch image annotation."""
    client = vision_v1.ImageAnnotatorClient()

    source = {"image_uri": input_image_uri}
    image = {"source": source}
    features = [
        {"type_": vision_v1.Feature.Type.LABEL_DETECTION},
        {"type_": vision_v1.Feature.Type.IMAGE_PROPERTIES},
    ]

    # Each requests element corresponds to a single image.  To annotate more
    # images, create a request element for each image and add it to
    # the array of requests
    requests = [{"image": image, "features": features}]
    gcs_destination = {"uri": output_uri}

    # The max number of responses to output in each JSON file
    batch_size = 2
    output_config = {"gcs_destination": gcs_destination,
                     "batch_size": batch_size}

    operation = client.async_batch_annotate_images(requests=requests, output_config=output_config)

    print("Waiting for operation to complete...")
    response = operation.result(90)

    # The output is written to GCS with the provided output_uri as prefix
    gcs_output_uri = response.output_config.gcs_destination.uri
    print("Output written to GCS with prefix: {}".format(gcs_output_uri))

I have tried to simply changing the path to a local directory, but got an error that it's must be a gs:// then I startet to load the images in imgs with a imageLoader but then got this error TypeError: Cannot set google.cloud.vision.v1.ImageSource.image_uri to [<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=960x817 at 0x1237E0CD0

import io
import os

from google.cloud import vision_v1

from os import listdir
from PIL import Image as PImage

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = r"/Users/example/Documents/Project/ServiceAccountTolken.json"

def loadImages(path):
    # return array of images

    imagesList = listdir(path)
    loadedImages = []
    for image in imagesList:
        img = PImage.open(path + image)
        loadedImages.append(img)

    return loadedImages

path = "/Users/Documents/Project/images/"

# your images in an array
imgs = loadImages(path)

# its a bit messy here since I tried some options

def sample_async_batch_annotate_images(
    input_image_uri = imgs, #os.path.abspath("/Users/Documents/Project/images"),
    output_uri = os.path.abspath("/Users/Documents/Project/images/output"),
):
    """Perform async batch image annotation."""
    client = vision_v1.ImageAnnotatorClient()

    source = {"image_uri": input_image_uri}
    image = {"source": source}
    features = [
        {"type_": vision_v1.Feature.Type.IMAGE_PROPERTIES},
    ]

    # Each requests element corresponds to a single image.  To annotate more
    # images, create a request element for each image and add it to
    # the array of requests
    requests = [{"image": image, "features": features}]
    gcs_destination = {"uri": output_uri}

    # The max number of responses to output in each JSON file
    batch_size = 6
    output_config = {"gcs_destination": gcs_destination,
                     "batch_size": batch_size}

    operation = client.async_batch_annotate_images(requests=requests, output_config=output_config)

    print("Waiting for operation to complete...")
    response = operation.result(90)

    # The output is written to GCS with the provided output_uri as prefix
    gcs_output_uri = response.output_config.gcs_destination.uri
    print("Output written to GCS with prefix: {}".format(gcs_output_uri))

Solution

  • async_batch_annotate_images() does not support reading local files. You need to use batch_annotate_images() instead. To read local images, you need to read image content as bytes and pass it to the request. Code below includes saving the response.json to a GCS bucket. But if you won't be needing to save it to GCS, uncomment the part where it saves the file locally.

    See code below:

    import io
    import os
    
    from google.cloud import vision_v1
    
    from os import listdir
    import proto
    from google.cloud import storage
    #os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = r"/Users/example/Documents/Project/ServiceAccountTolken.json"
    
    def loadImages(path):
        # return array of bytes
    
        imagesList = listdir(path)
        loadedImages = []
    
        for image in imagesList:
            with io.open(path+image, 'rb') as image:
                content = image.read()
            loadedImages.append(content)
    
        return loadedImages
    
    path = "/home/user/your_local_path"
    
    # your images in an array
    contents = loadImages(path)
    
    def batch_annotate(
        contents = contents,
    ):
        """Perform async batch image annotation."""
        client = vision_v1.ImageAnnotatorClient()
    
        bucket_name = "your-bucket-name"
        destination_blob_name = "response.json"
    
        storage_client = storage.Client()
        bucket = storage_client.bucket(bucket_name)
        blob = bucket.blob(destination_blob_name)
    
        requests = []
    
        for content in contents:
            image = {"content": content}
            features = [
                {"type_": vision_v1.Feature.Type.IMAGE_PROPERTIES},
            ]
            requests.append({"image": image, "features": features})
    
        response = client.batch_annotate_images(requests=requests,)
        to_text = proto.Message.to_json(response) # convert object to text
    
        # uncomment if you want to save the response to your local directory
        """f = open('response.json', 'w')
        for data in to_text:
            f.write(data)
        f.close()"""
    
        file_obj = io.StringIO(to_text)
        file_obj.seek(0)
        blob.upload_from_file(file_obj)
    
    batch_annotate()
    
    

    See saved result in GCS:

    enter image description here

    Sample snippet of the response.json:

    enter image description here