python google-cloud-vision batch-request

Google Cloud Vision API batch Image Annotater

I have a directory with almost 200 images from which I want to get the image properties using the google-cloud-vision API.

I have a problem making a batch request for those images. The code posted is a tutorial from the official site. But it's using gs:// as source for it's images.

I want to use my local directory, but can't figure how to do it. The second is what I have wrote. I have tried to load the images with an imageLoader but failed.

Please help!

from google.cloud import vision_v1

def sample_async_batch_annotate_images(
    input_image_uri="gs://cloud-samples-data/vision/label/wakeupcat.jpg",
    output_uri="gs://your-bucket/prefix/",
):
    """Perform async batch image annotation."""
    client = vision_v1.ImageAnnotatorClient()

    source = {"image_uri": input_image_uri}
    image = {"source": source}
    features = [
        {"type_": vision_v1.Feature.Type.LABEL_DETECTION},
        {"type_": vision_v1.Feature.Type.IMAGE_PROPERTIES},
    ]

    # Each requests element corresponds to a single image.  To annotate more
    # images, create a request element for each image and add it to
    # the array of requests
    requests = [{"image": image, "features": features}]
    gcs_destination = {"uri": output_uri}

    # The max number of responses to output in each JSON file
    batch_size = 2
    output_config = {"gcs_destination": gcs_destination,
                     "batch_size": batch_size}

    operation = client.async_batch_annotate_images(requests=requests, output_config=output_config)

    print("Waiting for operation to complete...")
    response = operation.result(90)

    # The output is written to GCS with the provided output_uri as prefix
    gcs_output_uri = response.output_config.gcs_destination.uri
    print("Output written to GCS with prefix: {}".format(gcs_output_uri))

I have tried to simply changing the path to a local directory, but got an error that it's must be a gs:// then I startet to load the images in imgs with a imageLoader but then got this error TypeError: Cannot set google.cloud.vision.v1.ImageSource.image_uri to [<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=960x817 at 0x1237E0CD0

import io
import os

from google.cloud import vision_v1

from os import listdir
from PIL import Image as PImage

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = r"/Users/example/Documents/Project/ServiceAccountTolken.json"

def loadImages(path):
    # return array of images

    imagesList = listdir(path)
    loadedImages = []
    for image in imagesList:
        img = PImage.open(path + image)
        loadedImages.append(img)

    return loadedImages

path = "/Users/Documents/Project/images/"

# your images in an array
imgs = loadImages(path)

# its a bit messy here since I tried some options

def sample_async_batch_annotate_images(
    input_image_uri = imgs, #os.path.abspath("/Users/Documents/Project/images"),
    output_uri = os.path.abspath("/Users/Documents/Project/images/output"),
):
    """Perform async batch image annotation."""
    client = vision_v1.ImageAnnotatorClient()

    source = {"image_uri": input_image_uri}
    image = {"source": source}
    features = [
        {"type_": vision_v1.Feature.Type.IMAGE_PROPERTIES},
    ]

    # Each requests element corresponds to a single image.  To annotate more
    # images, create a request element for each image and add it to
    # the array of requests
    requests = [{"image": image, "features": features}]
    gcs_destination = {"uri": output_uri}

    # The max number of responses to output in each JSON file
    batch_size = 6
    output_config = {"gcs_destination": gcs_destination,
                     "batch_size": batch_size}

    operation = client.async_batch_annotate_images(requests=requests, output_config=output_config)

    print("Waiting for operation to complete...")
    response = operation.result(90)

    # The output is written to GCS with the provided output_uri as prefix
    gcs_output_uri = response.output_config.gcs_destination.uri
    print("Output written to GCS with prefix: {}".format(gcs_output_uri))

Solution

async_batch_annotate_images() does not support reading local files. You need to use batch_annotate_images() instead. To read local images, you need to read image content as bytes and pass it to the request. Code below includes saving the response.json to a GCS bucket. But if you won't be needing to save it to GCS, uncomment the part where it saves the file locally.

See code below:

import io
import os

from google.cloud import vision_v1

from os import listdir
import proto
from google.cloud import storage
#os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = r"/Users/example/Documents/Project/ServiceAccountTolken.json"

def loadImages(path):
    # return array of bytes

    imagesList = listdir(path)
    loadedImages = []

    for image in imagesList:
        with io.open(path+image, 'rb') as image:
            content = image.read()
        loadedImages.append(content)

    return loadedImages

path = "/home/user/your_local_path"

# your images in an array
contents = loadImages(path)

def batch_annotate(
    contents = contents,
):
    """Perform async batch image annotation."""
    client = vision_v1.ImageAnnotatorClient()

    bucket_name = "your-bucket-name"
    destination_blob_name = "response.json"

    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    requests = []

    for content in contents:
        image = {"content": content}
        features = [
            {"type_": vision_v1.Feature.Type.IMAGE_PROPERTIES},
        ]
        requests.append({"image": image, "features": features})

    response = client.batch_annotate_images(requests=requests,)
    to_text = proto.Message.to_json(response) # convert object to text

    # uncomment if you want to save the response to your local directory
    """f = open('response.json', 'w')
    for data in to_text:
        f.write(data)
    f.close()"""

    file_obj = io.StringIO(to_text)
    file_obj.seek(0)
    blob.upload_from_file(file_obj)

batch_annotate()

See saved result in GCS:

Sample snippet of the response.json: