So I'm trying to setup my tensorflow dataset object for model input. X is a series of images (.png files) and Y is a series of lists saved in json files.
Everything seems to work well until I go to print some of the dataset elements at the bottom of the code below. I'd like to make sure it worked correctly, but I get an error saying:
Error while parsing JSON: : Root element must be a message. [[{{node DecodeJSONExample}}]]{Op:IteratorGetNext]
Some description of the data:
json files contain a 1D list of various lengths and values that represent points in 3D space. They look like something this:
[.28, -.39, .48, 1, 55, 88]
Images are in raw format and are (1080, 1920, 3). I hope that helps a little. Let me know if more information is needed.
Any thoughts on what I'm doing wrong?
def build_dataset():
for root, dirs, files in os.walk(directory):
for filename in files:
path = os.path.join(root, filename)
if path.endswith('.png'):
x.append(path)
if path.endswith('.json'):
y.append(path)
dataset = tf.data.Dataset.from_tensor_slices((x, y))
return dataset.shuffle(buffer_size=len(x))
def read_data(x_img, y_model):
img_data = tf.io.read_file(x_img)
img = tf.io.decode_png(img_data)
model_data = tf.io.read_file(y_model)
model = tf.io.decode_json_example(model_data)
return img, model
def prepare_data(img, models):
return img/255, models
train_ds = build_dataset()
train_ds = train_ds.map(read_data)
train_ds = train_ds.map(prepare_data)
train_ds = train_ds.batch(64)
for x, y in train_ds:
print(x, y)
Since you are working with lists instead of proper JSONs, I would recommend fixing the JSON files. For example:
{ "data" : [.28, -.39, .48, 1, 55, 88] }
Or if you cannot change the files, just read each file and parse it into a tensor without any JSON utilities like tf.io.decode_json_example
:
import tensorflow as tf
def read_data(x_img, y_model):
img_data = tf.io.read_file(x_img)
img = tf.io.decode_png(img_data)
model_data = tf.io.read_file(y_model)
return img, tf.strings.to_number(tf.strings.split(tf.strings.regex_replace(tf.strings.strip(model_data), '[\[\],]', '')))
def prepare_data(img, models):
return img/255, models
train_ds = tf.data.Dataset.from_tensor_slices((['/content/result_image.png', '/content/result_image1.png'],
['/content/test.json', '/content/test2.json']))
train_ds = train_ds.map(read_data)
train_ds = train_ds.map(prepare_data)
train_ds = train_ds.batch(64)
for x, y in train_ds:
print(x.shape, y)
(2, 100, 100, 3) tf.Tensor(
[[ 0.28 -0.39 0.48 1. 55. 88. ]
[ 0.28 -0.39 0.48 1. 55. 88. ]], shape=(2, 6), dtype=float32)