I want to iterate over a custom DataLoader using batches with matching values and labels. Modification of PandasDataset described below is needed and since I copied it from online I do not have a great grasp of how it works
import torch
import pandas as pd
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
class PandasDataset(Dataset):
def __init__(self, dataframe):
self.dataframe = dataframe
def __len__(self):
return len(self.dataframe)
def __getitem__(self, index):
return self.dataframe.iloc[index]
d = {'values': [1, 2], 'values2': [3, 4],'labels': [5, 6]}
df = pd.DataFrame(data=d)
dataset = PandasDataset(df)
loader = DataLoader(torch.tensor(dataset), batch_size=1, shuffle=False)
for batch_index, (values, label) in enumerate(loader):
print(values)
print(label)
You can change __getitem__
to something like this:
def __getitem__(self, index):
data = self.dataframe.iloc[index].to_numpy()
return data[:-1], data[-1]
Then, you don't need to wrap your dataset with torch.tensor
:
loader = DataLoader(dataset, batch_size=1, shuffle=False)
and it'll return:
next(iter(loader))
# >>> [tensor([[1, 3]]), tensor([5])]