For below model I received error 'Expected stride to be a single value integer or list'. I used suggested answer from https://discuss.pytorch.org/t/expected-stride-to-be-a-single-integer-value-or-a-list/17612/2 and added
img.unsqueeze_(0)
I now receive error :
RuntimeError: Input type (torch.cuda.ByteTensor) and weight type (torch.FloatTensor) should be the same
For below code I three sample images and attempt to learn a representation of them using an auto-encoder :
%reset -f
import torch.utils.data as data_utils
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import metrics
import datetime
from sklearn.preprocessing import MultiLabelBinarizer
import seaborn as sns
sns.set_style("darkgrid")
from ast import literal_eval
import numpy as np
from sklearn.preprocessing import scale
import seaborn as sns
sns.set_style("darkgrid")
import torch
import torch
import torchvision
import torch.nn as nn
from torch.autograd import Variable
from os import listdir
import cv2
import torch.nn.functional as F
import numpy as np
from numpy.polynomial.polynomial import polyfit
import matplotlib.pyplot as plt
number_channels = 3
%matplotlib inline
x = np.arange(10)
m = 1
b = 2
y = x * x
plt.plot(x, y)
plt.axis('off')
plt.savefig('1-increasing.jpg')
x = np.arange(10)
m = 0.01
b = 2
y = x * x * x
plt.plot(x, y)
plt.axis('off')
plt.savefig('2-increasing.jpg')
x = np.arange(10)
m = 0
b = 2
y = (m*x)+b
plt.plot(x, y)
plt.axis('off')
plt.savefig('constant.jpg')
batch_size_value = 2
train_image = []
train_image.append(cv2.imread('1-increasing.jpg', cv2.IMREAD_UNCHANGED).reshape(3, 288, 432))
train_image.append(cv2.imread('2-increasing.jpg', cv2.IMREAD_UNCHANGED).reshape(3, 288, 432))
train_image.append(cv2.imread('decreasing.jpg', cv2.IMREAD_UNCHANGED).reshape(3, 288, 432))
train_image.append(cv2.imread('constant.jpg', cv2.IMREAD_UNCHANGED).reshape(3, 288, 432))
data_loader = data_utils.DataLoader(train_image, batch_size=batch_size_value, shuffle=False,drop_last=True)
import torch
import torchvision
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.utils import save_image
from torchvision.datasets import MNIST
import os
if not os.path.exists('./dc_img'):
os.mkdir('./dc_img')
def to_img(x):
x = 0.5 * (x + 1)
x = x.clamp(0, 1)
x = x.view(x.size(0), 1, 28, 28)
return x
num_epochs = 100
# batch_size = 128
batch_size = 2
learning_rate = 1e-3
dataloader = data_loader
class autoencoder(nn.Module):
def __init__(self):
super(autoencoder, self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(3, 16, 3, stride=3, padding=1), # b, 16, 10, 10
nn.ReLU(True),
nn.MaxPool2d(2, stride=2), # b, 16, 5, 5
nn.Conv2d(16, 8, 3, stride=2, padding=1), # b, 8, 3, 3
nn.ReLU(True),
nn.MaxPool3d(3, stride=1) # b, 8, 2, 2
)
self.decoder = nn.Sequential(
nn.ConvTranspose3d(8, 16, 3, stride=2), # b, 16, 5, 5
nn.ReLU(True),
nn.ConvTranspose3d(16, 8, 5, stride=3, padding=1), # b, 8, 15, 15
nn.ReLU(True),
nn.ConvTranspose3d(8, 1, 2, stride=2, padding=1), # b, 1, 28, 28
nn.Tanh()
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
model = autoencoder()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate,
weight_decay=1e-5)
for epoch in range(num_epochs):
for data in dataloader:
img, _ = data
img.unsqueeze_(0)
# img.unsqueeze_(0)
# print(img)
# img.unsqueeze_(0)
img = Variable(img).cuda()
# ===================forward=====================
output = model(img)
loss = criterion(output, img)
# ===================backward====================
optimizer.zero_grad()
loss.backward()
optimizer.step()
# ===================log=================to_img=======
print('epoch [{}/{}], loss:{:.4f}'
.format(epoch+1, num_epochs, loss.data[0]))
if epoch % 10 == 0:
pic = to_img(output.cpu().data)
save_image(pic, './dc_img/image_{}.png'.format(epoch))
torch.save(model.state_dict(), './conv_autoencoder.pth')
But as stated earlier this results in error :
299 def forward(self, input): 300 return F.conv2d(input, self.weight, self.bias, self.stride,
--> 301 self.padding, self.dilation, self.groups) 302 303
RuntimeError: Input type (torch.cuda.ByteTensor) and weight type (torch.FloatTensor) should be the same
The issue appears to be related to img.unsqueeze_(0)
?
How to train the auto-encoder on these images ?
This is because your image tensor resides in GPU (that happens here img = Variable(img).cuda()
), while your model is still in RAM. Please remember that you need to explicitly call cuda()
to send a tensor (or an instance of nn.Module
) to GPU.
Just change this line:
model = autoencoder()
To this:
model = autoencoder().cuda()