machine-learning pytorch artificial-intelligence conv-neural-network siamese-network

I've 2 folders.One image in 1 folder and another in another folder. I have to compare two images and find the dissimilarity

I've 2 folders.One image in 1 folder and another in another folder. I have to compare two images and find the dissimilarity but the code is written random folder.

class InferenceSiameseNetworkDataset(Dataset):
    
    def __init__(self,imageFolderDataset,transform=None,should_invert=True):
        self.imageFolderDataset = imageFolderDataset    
        self.transform = transform
        self.should_invert = should_invert
        
    def __getitem__(self,index):
        img0_tuple = random.choice(self.imageFolderDataset.imgs)
        img1_tuple = random.choice(self.imageFolderDataset.imgs)
        #we need to make sure approx 50% of images are in the same class
        should_get_same_class = random.randint(0,1) 
        if should_get_same_class:
            while True:
                #keep looping till the same class image is found
                img1_tuple = random.choice(self.imageFolderDataset.imgs) 
                if img0_tuple[1]==img1_tuple[1]:
                    break
        else:
            while True:
                #keep looping till a different class image is found
                
                img1_tuple = random.choice(self.imageFolderDataset.imgs) 
                if img0_tuple[1] !=img1_tuple[1]:
                    break

        img0 = Image.open(img0_tuple[0])
        img1 = Image.open(img1_tuple[0])
        img0 = img0.convert("L")
        img1 = img1.convert("L")
        
        if self.should_invert:
            img0 = PIL.ImageOps.invert(img0)
            img1 = PIL.ImageOps.invert(img1)

        if self.transform is not None:
            img0 = self.transform(img0)
            img1 = self.transform(img1)
        
        return img0, img1 , torch.from_numpy(np.array([int(img1_tuple[1]!=img0_tuple[1])],dtype=np.float32))
    
    def __len__(self):
        return len(self.imageFolderDataset.imgs)

I took this code from GitHub and when I'm trying to compare the two images dissimilarity it's randomly choosing the images. The input folders are 2. one image should be in one folder and another image should be in another folder. When I'm trying to test it's testing on the same image sometimes I mean it's not checking for another image in another folder.

testing_dir1 = '/content/drive/My Drive/Signature Dissimilarity/Forged_Signature_Verification/processed_dataset/training1/'
folder_dataset_test = dset.ImageFolder(root=testing_dir1)
siamese_dataset = InferenceSiameseNetworkDataset(imageFolderDataset=folder_dataset_test,
                                        transform=transforms.Compose([transforms.Resize((100,100)),
                                                                      transforms.ToTensor()
                                                                      ])
                                       ,should_invert=False)

test_dataloader = DataLoader(siamese_dataset,num_workers=6,batch_size=1,shuffle=False)
dataiter = iter(test_dataloader)
x0,_,_ = next(dataiter)

for i in range(2):
  _,x1,label2 = next(dataiter)
  concatenated = torch.cat((x0,x1),0)
  
  output1,output2 = net(Variable(x0).cuda(),Variable(x1).cuda())
  euclidean_distance = F.pairwise_distance(output1, output2)
  imshow(torchvision.utils.make_grid(concatenated),'Dissimilarity: {:.2f}'.format(euclidean_distance.item()))
  dis = 'Dissimilarity: {:.2f}'.format(euclidean_distance.item())
  dis1 = dis
  dis1 = dis1.replace("Dissimilarity:", "").replace(" ", "")
  print(dis)
  if float(dis1) < 0.5:
    print("It's Same Signature")
  else:
    print("It's Forged Signature")

Solution

Just by assigning should_get_same_class=0 in __getitem__ function of your custom dataset class, InferenceSiameseNetworkDataset you can ensure that two images belong to different class/folder.

Secondly, You should not concatinate samples from two batches that may not satisfy your condition. You should use x0,x1,label2 = next(dataiter) under the scope of loop followed by concatination.