import keras_core as keras # so that i can use keras_cv
import keras_cv
from keras_cv import bounding_box
from keras_cv import visualization
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
def visualize_dataset(inputs, value_range, rows, cols, bounding_box_format):
inputs = next(iter(inputs.take(1)))
images, bounding_boxes = inputs["images"], inputs["bounding_boxes"]
visualization.plot_bounding_box_gallery(
images,
value_range=value_range,
rows=rows,
cols=cols,
y_true=bounding_boxes,
scale=5,
font_scale=0.7,
bounding_box_format=bounding_box_format,
class_mapping=class_mapping,
)
def unpackage_raw_tfds_inputs(inputs, bounding_box_format):
image = inputs["image"]
boxes = keras_cv.bounding_box.convert_format(
inputs["objects"]["bbox"],
images=image,
source="rel_yxyx",
target=bounding_box_format,
)
bounding_boxes = {
"classes": tf.cast(inputs["objects"]["label"] + 1, dtype=tf.float32),
"boxes": tf.cast(boxes, dtype=tf.float32),
}
return {"images": tf.cast(image, tf.float32), "bounding_boxes": bounding_boxes}
def load_pascal_voc(split, dataset, bounding_box_format):
ds = tfds.load(dataset, split=split, with_info=False, shuffle_files=True)
ds = ds.map(lambda x: unpackage_raw_tfds_inputs(x, bounding_box_format=bounding_box_format), num_parallel_calls=tf.data.AUTOTUNE)
return ds
train_ds = load_pascal_voc(split="test", dataset="voc/2007", bounding_box_format="xywh"); print(tf.data.experimental.cardinality(train_ds))
val_ds = load_pascal_voc(split="validation", dataset="voc/2007", bounding_box_format="xywh"); print(tf.data.experimental.cardinality(val_ds))
test_ds = load_pascal_voc(split="train", dataset="voc/2007", bounding_box_format="xywh"); print(tf.data.experimental.cardinality(test_ds))
# so that I can visualize_dataset
train_ds = train_ds.ragged_batch(1, drop_remainder=True)
val_ds = val_ds.ragged_batch(1, drop_remainder=True)
test_ds = test_ds.ragged_batch(1, drop_remainder=True)
Padding the classes and bounding boxes for training DETR model from scratch in tensorflow
def pad(dataset):
images, classes, boxes = [], [], [] # (m, None, None, 3), (m, 42), (m, 42, 4)
for x in dataset:
images.append(list(x["images"][0].numpy()))
classes.append(list(tf.keras.utils.pad_sequences(x["bounding_boxes"]["classes"].numpy(), maxlen=42, padding='post')[0]))
padded_bboxes = np.zeros((1, 42, 4), dtype=np.float32)
padded_bboxes[: ,:np.shape(x["bounding_boxes"]["boxes"][0])[0], :] = x["bounding_boxes"]["boxes"][0]
boxes.append(list(padded_bboxes[0]))
dataset = tf.data.Dataset.from_tensor_slices((images, (classes, boxes)))
return dataset
Is there any other efficient method to do the above without converting them from tf.data.Dataset
format to array then again convert to tf.data.Dataset
format?
I tried to use .map()
to the datasets but got an error that I can only use tf.keras.utils.pad_sequences
in eager mode
Use tf.pad
function
train_ds = load_pascal_voc(split="validation", dataset="voc/2007", bounding_box_format="xywh")
N = 42
max_height, max_width = 500, 500
def preprocess(x):
return keras_cv.layers.Resizing(max_height, max_width, bounding_box_format="xywh", pad_to_aspect_ratio=True)(x["images"]),
(tf.pad([x["bounding_boxes"]["classes"]], [[0,0], [N,N]])[0][N:-len(x["bounding_boxes"]["classes"])], tf.pad(x["bounding_boxes"]["boxes"], [[N,N], [0,0]])[N:-len(x["bounding_boxes"]["boxes"])])
TRAIN_DS = train_ds.map(lambda x: preprocess(x))
for i in TRAIN_DS.take(2): # test
print(i[0].numpy().shape)
print(i[1][0].numpy().shape)
print(i[1][1].numpy().shape)