Search code examples
pythonopencvimage-processingcomputer-visionobject-detection

Keypoint detection not working when keypoints are a certain colour


I'm using keypoint detection to find text within a game.

The background in the below images is dynamic, it's always a vaguely moving star-lit sky that you can barely see.

The detection works well when the text is white:

enter image description here

However, when the text is purple (unpredictable when this happens) the detection fails entirely: enter image description here

Both the object I'm looking to detect and the image I'm running detection on are identical, screenshots are taken directly from within the game of the text i.e. the above. And then run on the exact same location the original screenshot were taken from.

The below code I've written using the official documentation I found here and here as a guide but it's very light on explaining itself.

Question: Is this an inherent limitation or is there something I can do to adjust to detect keypoints within the purple image?

import cv2 as cv
import win32gui, win32con, win32ui
import numpy as np
import glob

def get_haystack_image():
    w, h = 1920, 1080
    hwnd = None
    wDC = win32gui.GetWindowDC(hwnd)
    dcObj = win32ui.CreateDCFromHandle(wDC)
    cDC = dcObj.CreateCompatibleDC()
    dataBitMap = win32ui.CreateBitmap()
    dataBitMap.CreateCompatibleBitmap(dcObj, w, h)
    cDC.SelectObject(dataBitMap)
    cDC.BitBlt((0, 0), (w, h), dcObj, (0, 0), win32con.SRCCOPY)
    signedIntsArray = dataBitMap.GetBitmapBits(True)
    img = np.frombuffer(signedIntsArray, dtype='uint8')
    img.shape = (h, w, 4)
    dcObj.DeleteDC()
    cDC.DeleteDC()
    win32gui.ReleaseDC(hwnd, wDC)
    win32gui.DeleteObject(dataBitMap.GetHandle())
    img = img[...,:3]
    img = np.ascontiguousarray(img)
    return img

def loadImages(directory):
    # Intialise empty array
    image_list = []
    # Add images to array
    for i in directory:
        img = cv.imread(i, cv.IMREAD_UNCHANGED)
        image_list.append((img, i))
    return image_list

def preProcessNeedle(image_list):
    needle_kp1_desc = []
    for i in image_list:
        img = i[0]
        orb = cv.ORB_create(edgeThreshold=0, patchSize=32)
        keypoint_needle, descriptors_needle = orb.detectAndCompute(img, None)
        needle_kp1_desc.append((keypoint_needle, descriptors_needle, img))
    return needle_kp1_desc

def match_keypoints(descriptors_needle, keypoint_haystack, min_match_count): 
    orbHaystack = cv.ORB_create(edgeThreshold=0, patchSize=32, nfeatures=3000)
    keypoints_haystack, descriptors_haystack = orbHaystack.detectAndCompute(keypoint_haystack, None)

    FLANN_INDEX_LSH = 6
    index_params = dict(algorithm=FLANN_INDEX_LSH, table_number=6, key_size=12, multi_probe_level=1)
    search_params = dict(checks=50)

    try:
        flann = cv.FlannBasedMatcher(index_params, search_params)
        matches = flann.knnMatch(descriptors_needle, descriptors_haystack, k=2)
    except cv.error:
            return None, None, [], []
        
    good = []
    points = []

    for pair in matches:
        if len(pair) == 2:
            if pair[0].distance < 0.7*pair[1].distance:
                good.append(pair[0])

    if len(good) > min_match_count:
        for match in good:
            points.append(keypoints_haystack[match.trainIdx].pt)
    return keypoints_haystack, good, points

def shipDetection(needle_kp1_desc):
    res = False

    # Object Detection
    for i, img in enumerate(needle_kp1_desc):
        kp1 = img[0]
        descriptors_needle = img[1]
        needle_img = img[2]

        # get an updated image of the screen & crop it  
        keypoint_haystack = get_haystack_image()
        keypoint_haystack = keypoint_haystack[40:110, 850:1000]

        kp2, matches, match_points, ship_avoided = match_keypoints(kp1, descriptors_needle, keypoint_haystack, min_match_count=40)     
        # display the matches
        match_image = cv.drawMatches(needle_img, kp1, keypoint_haystack, kp2, matches, None)
        cv.imshow('Keypoint Search', match_image)
        cv.moveWindow("Keypoint Search",1940,30)
        cv.waitKey(1)       

        if match_points:
            # removed code as irrelevant to detection but left comments in
            
            # find the center point of all the matched features
            # account for the width of the needle image that appears on the left
            # drawn the found center point on the output image
            # display the processed image
            cv.imshow('Keypoint Search', match_image)
            cv.waitKey(1)       

            res = True
            break      
    return res

ships_to_avoid = loadImages(glob.glob(r"C:\Users\*.png"))
needle_kp1_desc = preProcessNeedle(ships_to_avoid) 
if shipDetection(needle_kp1_desc):
    # do something with the output

Solution

  • Isolating the red channel, converting to grayscale and applying binary thresholding has normalised the results, they're all now a consistent "white" which my detection is successfully identifying.

    apply_thresholding will perform this pre-processing to a folder, move the images from image_dir to output_dir then it'll delete the un-processes images from image_dir.

     def apply_thresholding():
            # get directory path where the images are stored
            image_dir = r"C:\Users\pre"
            # get directory path where you want to save the images
            output_dir = r"C:\Users\post"
            #iterate through all the files in the image directory
            for _, _, image_names in os.walk(image_dir):
    
                #iterate through all the files in the image_dir
                for image_name in image_names:
                    # check for extension .png
                    if '.png' in image_name:
                        # get image read path(path should not contain spaces in them)
                        filepath = os.path.join(image_dir, image_name)
                        # get image write path
                        dstpath = os.path.join(output_dir, image_name)
                        print(filepath, dstpath)                
                        # read the image
                        image = cv.imread(filepath)
    
                        r = image.copy()
                        # set blue and green channels to 0
                        r[:, :, 0] = 0
                        r[:, :, 1] = 0
                        # convert to grayscale now we've dropped b and g channels
                        gray = cv.cvtColor(r, cv.COLOR_BGR2GRAY)
                        # Apply binary thersholding 
                        (T, thresh) = cv.threshold(gray, 40, 255, cv.THRESH_BINARY)
                                    
                        # write the image in a different path with the same name
                        cv.imwrite(dstpath, thresh)
    
                files = glob.glob(r"C:\Users\pre\*")
                for f in files:
                    os.remove(f)
    

    I then applied the same channel isolation, grayscale conversion and binary thresholding to my detection area.

    def get_haystack_image():
        w, h = 1920, 1080
        hwnd = None
        wDC = win32gui.GetWindowDC(hwnd)
        dcObj = win32ui.CreateDCFromHandle(wDC)
        cDC = dcObj.CreateCompatibleDC()
        dataBitMap = win32ui.CreateBitmap()
        dataBitMap.CreateCompatibleBitmap(dcObj, w, h)
        cDC.SelectObject(dataBitMap)
        cDC.BitBlt((0, 0), (w, h), dcObj, (0, 0), win32con.SRCCOPY)
        signedIntsArray = dataBitMap.GetBitmapBits(True)
        img = np.frombuffer(signedIntsArray, dtype='uint8')
        img.shape = (h, w, 4)
        dcObj.DeleteDC()
        cDC.DeleteDC()
        win32gui.ReleaseDC(hwnd, wDC)
        win32gui.DeleteObject(dataBitMap.GetHandle())
        img = img[...,:3]
        img = np.ascontiguousarray(img)
    
        r = img.copy()
        # set blue and green channels to 0
        r[:, :, 0] = 0
        r[:, :, 1] = 0
        # convert to grayscale now we've dropped b and g channels
        gray = cv.cvtColor(r, cv.COLOR_BGR2GRAY)
        # Apply binary thersholding 
        (T, img) = cv.threshold(gray, 40, 255, cv.THRESH_BINARY)
        return img