I have implemented and trained a neural network in Pytorch, however, I am interested in the derivative of the neural network parameters with respect to the input. I have extensively searched for any procedure to that would allow evaluating the derivative of weights with respect to a given input, but I did not find anything. I know that I can compute the gradients of a function in the following way.
external_grad = torch.tensor([1., 1.])
Q.backward(gradient=external_grad)
But How would I do that with a trained neural network instead of a function Q?
Thanks in advance.
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import numpy as np
from scipy.stats import norm
from numpy import linalg as la
import numpy.random as npr
from tabulate import tabulate
from matplotlib import pyplot as plt
import random
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
#from torchvision import datasets, transforms
from torch.autograd import Variable
# In[2]:
import numpy as np
from scipy.stats import norm
from numpy import linalg as la
import numpy.random as npr
from tabulate import tabulate
from matplotlib import pyplot as plt
import random
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
#from torchvision import datasets, transforms
from torch.autograd import Variable
from torch import optim
# In[3]:
nSimul = 32768
T1 = 1.0
T2 = 2.0
K = 110.0
spot = 100.0
vol = 0.2
vol0 = 0.5 # vol is increased over the 1st period so we have more points in the wings
# simulate all Gaussian returns (N1, N2) first
# returns: matrix of shape [nSimul, TimeSteps=2]
returns = np.random.normal(size=[nSimul,2])
# generate paths, step by step, and not path by path as customary
# this is to avoid slow Python loops, using NumPy's optimized vector functions instead
# generate the vector of all scenarios for S1, of shape [nSimul]
S1 = spot * np.exp(-0.5*vol0*vol0*T1 + vol0*np.sqrt(T1)*returns[:,0])
# generate the vector of all scenarios for S2, of shape [nSimul]
S2 = S1 * np.exp(-0.5*vol*vol*(T2-T1) + vol*np.sqrt(T2-T1)*returns[:,1])
# training set, X and Y are both vectors of shape [nSimul]
X = S1
Y = np.maximum(0, S2 - K)
xAxis = np.linspace(20, 200, 100)
xAxis=xAxis.reshape(-1,1)
# In[4]:
#Normalization of the simulated data:
meanX = np.mean(X)
stdX = np.std(X)
meanY = np.mean(Y)
stdY = np.std(Y)
normX = (X - meanX) / stdX
normY = (Y - meanY) / stdY
normX=normX.reshape(-1,1)
normY=normY.reshape(-1,1)
# In[5]:
class NeuralNetwork(nn.Module):
def __init__(self,inputsize,outputsize):
super(NeuralNetwork, self).__init__()
#self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(inputsize,3),
nn.ELU(),
nn.Linear(3, 5),
nn.ELU(),
nn.Linear(5,3),
nn.ELU(),
nn.Linear(3,outputsize),
)
w = torch.empty(0,1)
nn.init.normal_(w)
def forward(self, x):
#x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
# In[6]:
inputDim = 1 # takes variable 'x'
outputDim = 1 # takes variable 'y'
learningRate = 0.05
epochs = 10000
#weight=torch.empty(3)
model = NeuralNetwork(inputDim, outputDim)
##### For GPU #######
if torch.cuda.is_available():
model.cuda()
# In[7]:
#criterion = torch.nn.MSELoss()
#optimizer = torch.optim.SGD(model.parameters(), lr=learningRate)
# In[ ]:
def ridge_loss(outputs,labels):
torch.mean((outputs-labels)**2)
# In[ ]:
# In[9]:
#Adam optmization
criterion = torch.nn.MSELoss()
#optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
optimizer = optim.Adam(model.parameters(), lr=0.05)
# In[10]:
for epoch in range(epochs):
# Converting inputs and labels to Variable
if torch.cuda.is_available():
inputs = Variable(torch.from_numpy(normX).cuda().float())
labels = Variable(torch.from_numpy(normY).cuda().float())
else:
inputs = Variable(torch.from_numpy(normX).float())
labels = Variable(torch.from_numpy(normY).float())
# Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
optimizer.zero_grad()
# get output from the model, given the inputs
outputs = model(inputs)
# get loss for the predicted output
loss = criterion(outputs, labels)
print(loss)
# get gradients w.r.t to parameters
loss.backward()
# update parameters
optimizer.step()
print('epoch {}, loss {}'.format(epoch, loss.item()))
# In[11]:
def predict(xs):
# first, normalize
nxs = (xs - meanX) / stdX
# forward feed through ANN
# we don't need gradients in the testing phase
with torch.no_grad():
if torch.cuda.is_available():
nys = model(Variable(torch.from_numpy(nxs.rehape(-1,1)).cuda().float())).cpu().data.numpy()
else:
nys = model(Variable(torch.from_numpy(nxs.reshape(-1,1))).float()).data.numpy()
# de-normalize output
ys = meanY + stdY * nys
# we get a matrix of shape [size of xs][1], which we reshape as vector [size of xs]
return np.reshape(ys, [-1])
# In[13]:
def BlackScholes(S0,r,sigma,T,K):
d1 = 1 / (sigma * np.sqrt(T)) * (np.log(S0/K) + (r+sigma**2/2)*T)
d2 = d1 - sigma * np.sqrt(T)
return norm.cdf(d1) * S0 - norm.cdf(d2) * K * np.exp(-r*T)
def BlackScholesCallDelta(S0,r,sigma,T,K):
d1 = 1 / (sigma * np.sqrt(T)) * (np.log(S0/K) + (r+sigma**2/2)*T)
return norm.cdf(d1)
BlackScholes_vec=np.vectorize(BlackScholes)
BlackScholesCallDelta_vec=np.vectorize(BlackScholesCallDelta)
# In[14]:
BS_price=BS_prices=BlackScholes_vec(S0=xAxis,r=0,sigma=0.2,T=1.0,K=110.0)
predicted=predict(xAxis)
S1=1
#line_learn = plt.plot(Sval,y,label="Deep Neural Net")
line_learn = plt.plot(xAxis,predicted,label="Neural Regression")
line_BS = plt.plot(xAxis,BS_price, label="Black-Scholes")
plt.xlabel("Spot Price")
plt.ylabel("Option Price")
#plt.title(r'Time: %1.1f' % time, loc='left', fontsize=11)
plt.title(r'Strike: %1.2f' % K, loc='right', fontsize=11)
plt.title(r'Initial price: %1.2f' % S1, loc='center', fontsize=11)
plt.legend()
plt.show()
#plt.savefig("deephedge.png", dpi=150)
plt.savefig("deephedge.pdf")
# In[15]:
Prices_rg_mc_diff=[]
for i in range(len(xAxis)-1):
delta=(predicted[i+1]-predicted[i])/(xAxis[i+1]-xAxis[i])
Prices_rg_mc_diff.append(delta)
# In[16]:
BS_delta=BlackScholesCallDelta(S0=xAxis,r=0,sigma=0.2,T=1.0,K=110.0)
predicted=predict(xAxis)
S1=1
#line_learn = plt.plot(Sval,y,label="Deep Neural Net")
line_learn = plt.plot(xAxis[1:],Prices_rg_mc_diff,label="Neural Regression")
line_BS = plt.plot(xAxis[1:],BS_delta[1:], label="Black-Scholes")
plt.xlabel("Spot Price")
plt.ylabel("Option Price")
#plt.title(r'Time: %1.1f' % time, loc='left', fontsize=11)
plt.title(r'Strike: %1.2f' % K, loc='right', fontsize=11)
plt.title(r'Initial price: %1.2f' % S1, loc='center', fontsize=11)
plt.legend()
plt.show()
#plt.savefig("deephedge.png", dpi=150)
plt.savefig("deephedge.pdf")
# In[17]:
model.backward(retain_graph=True)
# In[ ]:
print(NeuralNetwork.weight.grad)
# In[ ]:
def predict(xs):
# first, normalize
nxs = (xs - meanX) / stdX
# forward feed through ANN
# we don't need gradients in the testing phase
with torch.no_grad():
if torch.cuda.is_available():
nys = model(Variable(torch.from_numpy(nxs.rehape(-1,1)).cuda().float())).cpu().data.numpy()
else:
nys = model(Variable(torch.from_numpy(nxs.reshape(-1,1))).float()).data.numpy()
# de-normalize output
ys = meanY + stdY * nys
# we get a matrix of shape [size of xs][1], which we reshape as vector [size of xs]
return np.reshape(ys, [-1])
# In[21]:
c3=torch.from_numpy((predicted.reshape(-1,1)), requires_grad=True)
c4=torch.from_numpy(xAxis, requires_grad=True)
#c5=torch.Tensor(c3)
#c6=torch.Tensor(c4)
loss = criterion(c3,c4) # calculating loss
loss.backward()
# In[28]:
torch.tensor(predicted.reshape(-1,1), requires_grad=True)
torch.tensor(xAxis, requires_grad=True)
criterion(torch.tensor(predicted.reshape(-1,1), requires_grad=True),torch.tensor(xAxis, requires_grad=True))
loss.backward()
You need to explicitly use requires_grad = True
when create a tensor. And to calculate gradient you first need to apply some operation on the tensor.
Here is an example:
import torch
x = torch.rand(2, 2, requires_grad=True)
y = x + 2
z = y * y * 3
out = z.mean()
out.backward()
print(x.grad)
Output:
tensor([[3.3720, 3.4302],
[3.4030, 3.3605]])
In this way you are using torch.autograd
to calculate the gradient for tensor x
. See autograd for more.
And for neural network you can simply use the network and backward it afterward.
A neural network Example:
import torch
import torch.nn as nn
import torch.nn.functional as f
x = torch.rand(2, 2)
# define a neural network
network = nn.Sequential(
nn.Linear(2,100),
nn.Linear(100,2)
)
pred = network(x)
loss = f.mae_loss(pred, x) # calculating loss
loss.backward()
# Update weights with gradients
network[0].weight = 0.1 * network[0].weight.grad
network[1].weight = 0.1 * network[1].weight.grad
Note: I didn't put any activation function in network for the sack of simplicity.
Example of backward()
using torch.nn.MSELoss()
:
import torch
from torch.nn import MSELoss
criterion = MSELoss()
a = torch.tensor([1.,2.], requires_grad=True)
b = a**2
loss = criterion(b, a)
loss.backward()
print(a.grad)
Output:
tensor([0., 6.])