Search code examples
pythontensorflowtensorflow2.0tensorflow-datasets

Find the max value in tensorflow.python.data.ops.dataset_ops.BatchDataset


Suppose the following code below:

import tensorflow as tf
import numpy as np
 
simple_features = np.array([
         [1, 1, 1],
         [2, 2, 2],
         [3, 3, 3],
         [4, 4, 4],
         [5, 5, 5],
         [6, 6, 6],
         [7, 7, 7],
         [8, 8, 8],
         [9, 9, 9],
         [10, 10, 10],
         [11, 11, 11],
         [12, 12, 12],
])
 
simple_labels = np.array([
         [-1, -1],
         [-2, -2],
         [-3, -3],
         [-4, -4],
         [-5, -5],
         [-6, -6],
         [-7, -7],
         [-8, -8],
         [-9, -9],
         [-10, -10],
         [-11, -11],
         [-12, -12],
])
 
def print_dataset(ds):
    for inputs, targets in ds:
        print("---Batch---")
        print("Feature:", inputs.numpy())
        print("Label:", targets.numpy())
        print("")
 
    
ds = tf.keras.preprocessing.timeseries_dataset_from_array(simple_features, simple_labels, sequence_length=4, batch_size=32)
print_dataset(ds)

I want to extract the max value from each simple_feature and its corresponding simple_label. After extracting the max value I would like to add that value to the simple_feature and its corresponding simple_label. For instance, the first simple_feature gives me [1,1,1] and its corresponding label gives me [-1,-1]. The max value would be 1. After that I add 1 to [1,1,1] and [-1,-1] and I would get [2,2,2] and [0,0]. The final dataset should be kept as tensorflow.python.data.ops.dataset_ops.BatchDataset.


Solution

  • You can solve your problem using tf.data.Dataset.from_tensor_slices and tf.data.Dataset.map:

    import tensorflow as tf
    import numpy as np
     
    simple_features = np.array([
             [1, 1, 1],
             [2, 2, 2],
             [3, 3, 3],
             [4, 4, 4],
             [5, 5, 5],
             [6, 6, 6],
             [7, 7, 7],
             [8, 8, 8],
             [9, 9, 9],
             [10, 10, 10],
             [11, 11, 11],
             [12, 12, 12],
    ])
     
    simple_labels = np.array([
             [-1, -1],
             [-2, -2],
             [-3, -3],
             [-4, -4],
             [-5, -5],
             [-6, -6],
             [-7, -7],
             [-8, -8],
             [-9, -9],
             [-10, -10],
             [-11, -11],
             [-12, -12],
    ])
     
    def print_dataset(ds):
        for inputs, targets in ds:
            print("---Batch---")
            print("Feature: \n", inputs.numpy())
            print("Label: \n", targets.numpy())
            print("")
    
    def map_max_values(x, y):
      max_values = tf.reduce_max(x, axis=1)
      temp_x = tf.reshape(tf.repeat(max_values, repeats=x.shape[1]), shape=(x.shape[0], x.shape[1]))
      temp_y = tf.reshape(tf.repeat(max_values, repeats=y.shape[1]), shape=(y.shape[0], y.shape[1]))
    
      x = x + temp_x
      y = y + temp_y
      return x, y
    
    batch_size = 4
    ds = tf.data.Dataset.from_tensor_slices((simple_features,simple_labels)).batch(batch_size, drop_remainder=True)
    ds = ds.map(map_max_values)
    print_dataset(ds)
    
    ---Batch---
    Feature: 
     [[2 2 2]
     [4 4 4]
     [6 6 6]
     [8 8 8]]
    Label: 
     [[0 0]
     [0 0]
     [0 0]
     [0 0]]
    
    ---Batch---
    Feature: 
     [[10 10 10]
     [12 12 12]
     [14 14 14]
     [16 16 16]]
    Label: 
     [[0 0]
     [0 0]
     [0 0]
     [0 0]]
    
    ---Batch---
    Feature: 
     [[18 18 18]
     [20 20 20]
     [22 22 22]
     [24 24 24]]
    Label: 
     [[0 0]
     [0 0]
     [0 0]
     [0 0]]
    

    Or if you really want to use tf.keras.preprocessing.timeseries_dataset_from_array, then try this:

    def map_max_values(x, y):
      max_values = tf.reduce_max(x, axis=2)
      temp_x = tf.reshape(tf.repeat(max_values, repeats=tf.shape(x)[2], axis=1), shape=tf.shape(x))
      temp_y = tf.reshape(tf.repeat(tf.expand_dims(max_values[:, 0], axis=1), repeats=tf.shape(y)[1], axis=1), shape=tf.shape(y))
    
      x = x + temp_x
      y = y + temp_y
      return x, y
    
    ds = tf.keras.preprocessing.timeseries_dataset_from_array(simple_features, simple_labels, sequence_length=4, batch_size=32)
    ds = ds.map(map_max_values)
    print_dataset(ds)