Search code examples
pythoncomputer-visionpytorchtorchvision

How to combine datasets in PyTorch to return image and numpy file simultaneously


I am trying to build a dataloader that will take images and poses. The images are saved in the form of .jpg files, and the poses in the form of .npy files. The images and poses are in different folders but have the same sub-folder structure and name. The sub-folders are in the form of classes, i.e., each class has a corresponding folder. I want to apply image transformations and then return the images (for which I am using torchvision datasets.ImageFolder). For the poses, I am using torchvision datasets.DatasetFolder. How do I combine these two datasets so that I get both pose and image of the same name simultaneously?

class ReIDFolder_images(datasets.ImageFolder):

    def __init__(self, root, transform):
        super().__init__(root, transform)
        targets = np.asarray([s[1] for s in self.samples])
        self.targets = targets
        self.img_num = len(self.samples)
        print(self.img_num)

    def _get_cam_id(self, path):
        camera_id = []
        filename = os.path.basename(path)
        camera_id = filename.split('c')[1][0]
        return int(camera_id)-1

    def _get_pos_sample(self, target, index, path):
        pos_index = np.argwhere(self.targets == target)
        pos_index = pos_index.flatten()
        pos_index = np.setdiff1d(pos_index, index)
        if len(pos_index)==0:  # in the query set, only one sample
            return path
        else:
            rand = random.randint(0,len(pos_index)-1)
        return self.samples[pos_index[rand]][0]

    def _get_neg_sample(self, target):
        neg_index = np.argwhere(self.targets != target)
        neg_index = neg_index.flatten()
        rand = random.randint(0,len(neg_index)-1)
        return self.samples[neg_index[rand]]

    def __getitem__(self, index):
        path, target = self.samples[index]
        sample = self.loader(path)

        pos_path = self._get_pos_sample(target, index, path)
        pos = self.loader(pos_path)

        if self.transform is not None:
            sample = self.transform(sample)
            pos = self.transform(pos)

        if self.target_transform is not None:
            target = self.target_transform(target)

        return sample, target, pos

class ReIDFolder_poses(datasets.DatasetFolder):

    def __init__(self, root):
        super().__init__(root, loader=self.npy_loader, extensions='.npy')

        targets = np.asarray([s[1] for s in self.samples])
        self.targets = targets  
        self.img_num = len(self.samples)
        print(self.img_num)

    def npy_loader(self, path):
        sample = torch.Tensor(np.load(path))
        return sample

    def _get_cam_id(self, path):
        camera_id = []
        filename = os.path.basename(path)
        camera_id = filename.split('c')[1][0]
        return int(camera_id)-1

    def _get_pos_sample(self, target, index, path):
        pos_index = np.argwhere(self.targets == target)
        pos_index = pos_index.flatten()
        pos_index = np.setdiff1d(pos_index, index)
        if len(pos_index)==0:  # in the query set, only one sample
            return path
        else:
            rand = random.randint(0,len(pos_index)-1)
        return self.samples[pos_index[rand]][0]

    def _get_neg_sample(self, target):
        neg_index = np.argwhere(self.targets != target)
        neg_index = neg_index.flatten()
        rand = random.randint(0,len(neg_index)-1)
        return self.samples[neg_index[rand]]

    def __getitem__(self, index):
        path, target = self.samples[index]
        sample = self.loader(path)

        pos_path = self._get_pos_sample(target, index, path)
        pos = self.loader(pos_path)

        return sample, target, pos

Solution

  • I was able to solve this problem! It turns out I didn't have to inherit datasets.DatasetFolder. Since the labels were the same, I just created one class which inherits datasets.ImageFolder, and fed a modified path to the function npy_loader.