I have this piece of tf1 code, which was taken from nice book "Deep Learning" by S. Nikolenko.
It's a simple linear regression that learns k
and b
to 2 and 1 respectively.
%tensorflow_version 1.x
import numpy as np,tensorflow as tf
import pandas as pd
n_samples, batch_size, num_steps = 1000, 100, 20000 #set learning constants
X_data = np.random.uniform(1, 10, (n_samples, 1)) #generate array x from 1 to 10 of shape (1000,1)
print(X_data.shape)
y_data = 2 * X_data + 1 + np.random.normal(0, 2, (n_samples, 1)) #generate right answer and add noise to it (to make it scatter)
X = tf.placeholder(tf.float32, shape=(batch_size, 1)) #defining placeholders to put into session.run
y = tf.placeholder(tf.float32, shape=(batch_size, 1))
with tf.variable_scope('linear-regression'):
k = tf.Variable(tf.random_normal((1, 1)), name='slope') #defining 2 variables with shape (1,1)
b = tf.Variable(tf.zeros((1,)), name='bias') # and (1,)
print(k.shape,b.shape)
y_pred = tf.matmul(X, k) + b # all predicted y in batch, represents linear formula k*x + b
loss = tf.reduce_sum((y - y_pred) ** 2) # mean square
optimizer = tf.train.GradientDescentOptimizer(0.0001).minimize(loss)
display_step = 100
with tf.Session() as sess:
sess.run(tf.initialize_variables([k,b]))
for i in range(num_steps):
indices = np.random.choice(n_samples, batch_size) # taking random indices
X_batch, y_batch = X_data[indices], y_data[indices] # taking x and y from generated examples
_, loss_val, k_val, b_val = sess.run([optimizer, loss, k, b ],
feed_dict = { X : X_batch, y : y_batch })
if (i+1) % display_step == 0:
print('Epoch %d: %.8f, k=%.4f, b=%.4f' %
(i+1, loss_val, k_val, b_val))
I'm striving to port it on TensorFlow 2
And for long time I can't wrap my head what should I use instead of sess.run()
and feed_dict
, which doing magic behind the scenes, official documentation go into to details with writing model class and so on, but I'm want to keep this as flat as possible.
Also it's suggested to calculate derivatives with tf.GradientTape
, but I'm struggling with applying it right to my example
%tensorflow_version 2.x
import numpy as np,tensorflow as tf
import pandas as pd
n_samples, batch_size, num_steps = 1000, 100, 200
X_data = np.random.uniform(1, 10, (n_samples, 1))
y_data = 2 * X_data + 1 + np.random.normal(0, 2, (n_samples, 1))
X = tf.Variable(tf.zeros((batch_size, 1)), dtype=tf.float32, shape=(batch_size, 1))
y = tf.Variable(tf.zeros((batch_size, 1)), dtype=tf.float32, shape=(batch_size, 1))
k = tf.Variable(tf.random.normal((1, 1)), name='slope')
b = tf.Variable(tf.zeros((1,)), name='bias')
loss = lambda: tf.reduce_sum((y - (tf.matmul(X, k) + b)) ** 2)
optimizer = tf.keras.optimizers.SGD(0.01).minimize(loss, [k, b, X, y])
display_step = 100
for i in range(num_steps):
indices = np.random.choice(n_samples, batch_size)
X_batch, y_batch = X_data[indices], y_data[indices]
I need SGD optimizer minimize that given loss function and learn k and b values, how can I achieve it from this point?
After plenty of manuals I got how to do that was hiding under the hood of sess.run
in tf1, but without an optimizer:
k
and b
X_batch, y_batch = X_data[indices], y_data[indices]
X.assign(tf.convert_to_tensor(X_batch))
y.assign(tf.convert_to_tensor(y_batch))
with tf.GradientTape(persistent=True) as tape:
loss_val = loss()
dy_dk = tape.gradient(loss_val, k)
dy_db = tape.gradient(loss_val, b)
k.assign_sub(dy_dk * learn_rate)
b.assign_sub(dy_db * learn_rate)
if (i+1) % display_step == 0:
print('Epoch %d: %.8f, k=%.4f, b=%.4f' %
(i+1, loss_val, k.numpy(), b.numpy()))