I'm new to pyTorch and this is my first project. I need to split the dataset and feed the training dataset to model. The training dataset must be splitted in to features and labels (which I failed to do that). Here is what I have tried so far, however, I don't know how to feed the dataset obtained from random_split()
to model.
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import SGD
import matplotlib.pyplot as plt
import seaborn as sns
from dataset import DataSet
class NeuralNetwork(nn.Module):
input_dim = 10
hidden_dim = 4
output_dim = 1
def __init__(self, dataset):
super().__init__()
self.layers = [
nn.Linear(self.input_dim, self.hidden_dim),
nn.Linear(self.hidden_dim, self.output_dim)
]
self.train_dataset = dataset["train_dataset"]
self.test_dataset = dataset["test_dataset"]
self.layers = nn.ModuleList(self.layers)
def forward(self, x):
for layer in self.layers:
x = nn.functional.rrelu(layer(x))
dataset = DataSet()
model = NeuralNetwork(dataset)
model(dataset["train_dataset"])
and this is dataset.py
import pandas as pd
import torch
from torch.utils.data import DataLoader
class DataSet:
divide_rate = 0.8
file = './pima-indians-diabetes.csv'
def __init__(self):
data_set = pd.read_csv(self.file)
train_size = int(self.divide_rate * len(data_set))
test_size = len(data_set) - train_size
self.train_dataset, self.test_dataset = torch.utils.data.random_split(data_set, [train_size, test_size])
self.train_dataset = torch.utils.data.DataLoader(self.train_dataset, shuffle=True)
self.test_dataset = torch.utils.data.DataLoader(self.test_dataset, shuffle=True)
def __getitem__(self, key):
return getattr(self, key)
The error is
TypeError: linear(): argument 'input' (position 1) must be Tensor, not DataLoader
I assume the problem lies with your class Dataset, please replace it with following function,
def load_data(test_split, batch_size):
"""Loads the data"""
sonar_dataset = SonarDataset('./sonar.all-data')
# Create indices for the split
dataset_size = len(sonar_dataset)
test_size = int(test_split * dataset_size)
train_size = dataset_size - test_size
train_dataset, test_dataset = random_split(sonar_dataset,
[train_size, test_size])
train_loader = DataLoader(
train_dataset.dataset,
batch_size=batch_size,
shuffle=True)
test_loader = DataLoader(
test_dataset.dataset,
batch_size=batch_size,
shuffle=True)
return train_loader, test_loader