Search code examples
tensorflowtf.data.dataset

Access values in dict in tf.dataset.Dataset.map with tf.striing


I am creating a tf.data.Dataset where I start with list_files to get all paths to my images. The annotations are stored on disc as json files. The structure of the json file is

{ 
   "img1.png": {
                data ...
               },
   "img2.png": ...
}

Hence the key-value is the image name.

I can easily extract the image names from the paths provided by list_files. However, that is tf.string, which cannot be used directly(?) to access the values in the annotation.

Is there an easy way to convert the tf.string to a python string so I can read the groundtruth data from the json file?

Alternatively convert the annotation to a proper tf type.

from typing import Mapping
from numpy import ndarray
import tensorflow as tf
import cv2 as cv
from pathlib import Path
from typing import Any, Mapping, NamedTuple
import json

class Point:
    x: float
    y: float

    def __init__(self, x: float, y: float):
        self.x = x
        self.y = y

class BoundingBox(NamedTuple):
    top: float
    left: float
    bottom: float
    right: float

class Annotation:
    image: tf.Tensor
    bounding_box: tf.Tensor
    is_visible: bool

    def __init__(self, image, bounding_box, is_visible):
        self.image = image
        self.bounding_box = bounding_box
        self.is_visible = is_visible

LABELS = {
    "NO_CLUB": 0,
    "CLUB": 1,
    "bbox": BoundingBox,
}


def is_in_split(image_path: tf.string, is_training: bool) -> bool:
    hash = tf.strings.to_hash_bucket_fast(image_path, 10)
    if is_training:
        return hash < 8
    else:
        return hash >= 8


def create_image_and_annotation(image_path: tf.string, annotation: Mapping[str, Any]):
    bits = tf.io.read_file(image_path)
    file_split = tf.strings.split(image_path, "/")
    image_name = file_split[-1]
    suffix = tf.strings.split(image_name, ".")[-1]

    jpeg = [
        tf.convert_to_tensor("jpg", dtype=tf.string),
        tf.convert_to_tensor("JPG", dtype=tf.string),
        tf.convert_to_tensor("jpeg", dtype=tf.string),
        tf.convert_to_tensor("JPEG", dtype=tf.string),
    ]
    is_jpeg = [tf.math.equal(suffix, s) for s in jpeg]
    png = [
        tf.convert_to_tensor("png", dtype=tf.string),
        tf.convert_to_tensor("PNG", dtype=tf.string),
    ]
    is_png = [tf.math.equal(suffix, s) for s in png]
    if tf.math.reduce_any(is_jpeg):
        image = tf.io.decode_jpeg(bits, channels=3)
    else:
        image = tf.io.decode_png(bits, channels=3)
    # Here I want to use image_name to access the annotation for the specific image! <---
    bounding_box = BoundingBox(0,0,10,10)
    return image, (bounding_box, True)


def createDataset(dir: Path, annotation: Mapping[str, Any], is_training: bool) -> tf.data.Dataset:
    image_path_png = str(dir / "images" / "*.png")
    image_path_PNG = str(dir / "images" / "*.PNG")
    image_path_jpg = str(dir / "images" / "*.jpg")
    image_path_JPG = str(dir / "images" / "*.JPG")
    image_path_jpeg = str(dir / "images" / "*.jpeg")
    image_path_JPEG = str(dir / "images" / "*.JPEG")
    image_dirs = [image_path_png, image_path_PNG, image_path_jpg, image_path_JPG, image_path_jpeg, image_path_JPEG]

    dataset = (tf.data.Dataset.list_files(image_dirs)
        .shuffle(1000)
        .map(lambda x: create_image_and_annotation(x, annotation))
        )

    for d in dataset:
        pass

    return dataset

def getDataset(data_root_path: Path, is_training: bool) -> tf.data.Dataset:
        dirs = [x for x in data_root_path.iterdir() if x.is_dir()]
        datasets = []
        for dir in dirs:
            json_path = dir / "annotations.json"
            with open(json_path) as json_file:
                annotation = json.load(json_file)
                createDataset(dir, annotation, is_training=is_training)



training_data = getDataset(Path("/home/erik/Datasets/ClubHeadDetection"), True)

Solution

  • The easiest solution was to read the file using

    annotation = tf.io.read_file(str(json_path))
    

    Then the bounding box is created by calling

    bbox = tf.py_function(
            create_bbox,
            inp=[annotation, image_name],
            Tout=[tf.float32, tf.float32, tf.float32, tf.float32, tf.float32],
        )
    

    Inside create_bbox we can now call python functions. Such as

    annotation_py = annotation.numpy()
    annotation_json = json.loads(annotation_py)
    key_py = image_name.numpy().decode("utf-8")