python opencv computer-vision classification object-recognition

Find closest match of image to +10.000 others with similar features

I am trying to find the closest match of an image, to a large list of other images (+10.000). Background color is all white, same camera angle and the image content shape is close to each other (see image below). I tried using opencv and ORB and BFMatcher with knnMatch to find the closest match. But I am not even close to find the match I want.

To my understanding, images needs to be greyscale, but in my case I think colors would be a very important descriptor?

I am new to both opencv and image matching, so can you help me to if I need to use another approach?

import cv2
import os
orb = cv2.ORB_create(nfeatures=1000) # Find 1000 features to match from 
bf = cv2.BFMatcher()

# Image to match
findImg = 'captainA.png'
imgCur = cv2.imread(f'{"Images"}/{findImg}', 0)
kp1,des1 = orb.detectAndCompute(imgCur,None)

# Loop through all superheroe images and find closest match
images = ["img1.png","img2.png","img3.png","img4.png","img5.png","img6.png","img7.png","img8.png","img9.png","img10.png","img11.png","img12.png"]

matchList = []
names = []
for img in images:
    imgCur = cv2.imread(f'{Superheroes}/{img}', 0)
    kp2,des2 = orb.detectAndCompute(imgCur,None)
 
    matches = bf.knnMatch(des1,des2,k=2)
    goodMatches = []
    for m, n in matches:
        if m.distance < 0.75 * n.distance: # Use 75 as a threshold defining a good match
            goodMatches.append([m])
    matchList.append(len(goodMatches))
    names.append(img)

matchIdx = matchList.index(max(matchList))
    
# Name of matched image
print(names[matchIdx])

What I want to find:

Solution

Here is a small code there should do the job.

from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model
import numpy as np
from PIL import Image

base_model = VGG16(weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)

def extract(img):
    img = img.resize((224, 224)) # Resize the image
    img = img.convert('RGB') # Convert the image color space
    x = image.img_to_array(img) # Reformat the image
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    feature = model.predict(x)[0] # Extract Features
    return feature / np.linalg.norm(feature)

# Iterate through images and extract Features
images = ["img1.png","img2.png","img3.png","img4.png","img5.png"...+2000 more]
all_features = np.zeros(shape=(len(images),4096))

for i in range(len(images)):
    feature = extract(img=Image.open(images[i]))
    all_features[i] = np.array(feature)

# Match image
query = extract(img=Image.open("image_to_match.png")) # Extract its features
dists = np.linalg.norm(all_features - query, axis=1) # Calculate the similarity (distance) between images
ids = np.argsort(dists)[:5] # Extract 5 images that have lowest distance