python numpy datetime tensorflow2.0 tensorflow-datasets

Creates a dataset of sliding windows over a timeseries from a pandas datetime index

Consider the following code:

import pandas as pd
import numpy as np
import tensorflow as tf


def random_dates(start, end, n=10):

    start_u = start.value//10**9
    end_u = end.value//10**9

    return pd.to_datetime(np.random.randint(start_u, end_u, n), unit='s')


start = pd.to_datetime('2015-01-01')
end = pd.to_datetime('2018-01-01')
dates=random_dates(start, end)

This code creates random dates with the following output:

print(dates)
DatetimeIndex(['2015-06-25 22:00:34', '2015-05-05 19:20:11',
               '2016-04-11 21:52:28', '2015-10-23 21:07:46',
               '2017-04-06 04:01:23', '2015-07-17 06:13:32',
               '2017-06-18 12:33:27', '2015-11-04 06:48:28',
               '2017-08-20 17:10:17', '2016-04-14 07:46:59'],
              dtype='datetime64[ns]', freq=None)

I would like to create a dataset of sliding windows using the datetime index as input with the following command:

tensorflow_dataset=tf.keras.preprocessing.timeseries_dataset_from_array(dates.values, None, sequence_length=1,sequence_stride=2, batch_size=1)

When I do this, I get the following error:

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported numpy type: NPY_DATETIME).

Any ideas how to solve this ?

Solution

You can try converting each numpy datatime object to a string:

import pandas as pd
import numpy as np
import tensorflow as tf

def random_dates(start, end, n=10):
    start_u = start.value//10**9
    end_u = end.value//10**9

    return pd.to_datetime(np.random.randint(start_u, end_u, n), unit='s')

start = pd.to_datetime('2015-01-01')
end = pd.to_datetime('2018-01-01')
dates=random_dates(start, end)
tensorflow_dataset=tf.keras.preprocessing.timeseries_dataset_from_array(np.datetime_as_string(dates.values), None, sequence_length=1,sequence_stride=2, batch_size=1)

for d in tensorflow_dataset:
  print(d)

tf.Tensor([[b'2016-11-16T02:46:49.000000000']], shape=(1, 1), dtype=string)
tf.Tensor([[b'2015-07-27T04:07:14.000000000']], shape=(1, 1), dtype=string)
tf.Tensor([[b'2015-09-10T14:57:51.000000000']], shape=(1, 1), dtype=string)
tf.Tensor([[b'2017-11-01T20:48:49.000000000']], shape=(1, 1), dtype=string)
tf.Tensor([[b'2017-08-25T11:34:42.000000000']], shape=(1, 1), dtype=string)

Afterwards, you can convert the strings to anything you want. You could also use the unit parameter of np.datetime_as_string to get a different output.

np.datetime_as_string(dates.values, unit='D'):

tf.Tensor([[b'2016-04-22']], shape=(1, 1), dtype=string)
tf.Tensor([[b'2015-04-03']], shape=(1, 1), dtype=string)
tf.Tensor([[b'2015-02-14']], shape=(1, 1), dtype=string)
tf.Tensor([[b'2017-02-09']], shape=(1, 1), dtype=string)
tf.Tensor([[b'2015-02-19']], shape=(1, 1), dtype=string)

np.datetime_as_string(dates.values, unit='h'):

tf.Tensor([[b'2017-01-19T15']], shape=(1, 1), dtype=string)
tf.Tensor([[b'2015-11-02T15']], shape=(1, 1), dtype=string)
tf.Tensor([[b'2016-12-11T06']], shape=(1, 1), dtype=string)
tf.Tensor([[b'2017-07-24T04']], shape=(1, 1), dtype=string)
tf.Tensor([[b'2016-06-22T04']], shape=(1, 1), dtype=string)