OpenCV OMR Sheet - Detect Marked Answers in Python - Getting Proper Binarization

While solving an OMR problem I am not completely able to detect all the marked answers correctly. Here is my input sheet.
my code to Binarize the image.

import cv2

image = cv2.imread('input.png')
img = cv2.GaussianBlur(image,(5,5),0)
res, img = cv2.threshold(img, 60, 255, cv2.THRESH_BINARY)
img = 255 - img

Result I am getting.
I have changed the gaussian blur and threshold parameters as follow.

 img = cv2.GaussianBlur(image,(7,7),0)  
 res, img = cv2.threshold(img, 90, 255, cv2.THRESH_BINARY)

The result I am getting after this change.
Here is my complete code to detect the answers.

def solve(img,n_row = 50):
    height, width, channels = img.shape
    n_col = 4
    xShift = int(width/n_col)
    yShift = int(height/n_row)
    img = cv2.resize(img, (n_col * xShift, n_row*yShift))
    img = cv2.GaussianBlur(img,(5,5),0)
    res, img = cv2.threshold(img, 60, 255, cv2.THRESH_BINARY)
    img = 255 - img
    for row in range(0, n_row):
        tmp_img = img [row*yShift + 5:(row+1)*yShift - 5,]
        area_sum = []
        for col in range(n_col):
            area_sum.append(np.sum(tmp_img[1:,col*xShift :(col+1)*xShift]))                
        y = str(area_sum > np.median(area_sum) * 1)
        result.append(area_sum > np.median(area_sum) * 5)

Please If anyone can help me solving this problem I will be super thankful.

Answer Suggestions:
How can I Check out the white pixel count inside each bounding rectangle and keep only those bounding rectangles/contours whose area is above a minimum value.

inputImg= cv2.imread('input.jpg')
img = cv2.cvtColor(inputImg, cv2.COLOR_BGR2GRAY)    
mask = np.zeros(img.shape[:2], dtype=img.dtype)
ret, otsu_threshold = cv2.threshold(img, 120, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)     
contours, hierarchy = cv2.findContours(otsu_threshold, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
for c in contours:
    x,y,w,h = cv2.boundingRect(c)
    if cv2.contourArea(c) > 1500:
        cv2.rectangle(otsu_threshold, (x, y), (x+w, y+h), (0,255,0), 2)
cv2.imshow('Otsu', otsu_threshold)         


  • Here is one way to do that in Python/OpenCV.

    • Read the input
    • Threshold on color
    • Apply morphology to fill out the circles
    • Get external contours
    • For each contour, get the centroids and draw filled circles of radius=20 on the input at the centroid locations
    • Save results


    import cv2
    import numpy as np
    # read image
    img = cv2.imread('omr_sheet.jpg')
    h, w = img.shape[:2]
    # trim 15 from bottom to remove partial answer
    img = img[0:h-15, 0:w]
    # threshold on color
    thresh = cv2.inRange(img, lower, upper)
    # apply morphology close
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
    morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    morph = cv2.morphologyEx(morph, cv2.MORPH_OPEN, kernel)
    # get contours
    result = img.copy() 
    centers = []
    contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    contours = contours[0] if len(contours) == 2 else contours[1]
    print("count:", len(contours))
    i = 1
    for cntr in contours:
        M = cv2.moments(cntr)
        cx = int(M["m10"] / M["m00"])
        cy = int(M["m01"] / M["m00"])
        centers.append((cx,cy)), (cx, cy), 20, (0, 255, 0), -1)
        pt = (cx,cy)
        print("circle #:",i, "center:",pt)
        i = i + 1
    # print list of centers
    # save results
    # show results
    cv2.imshow("thresh", thresh)
    cv2.imshow("morph", morph)
    cv2.imshow("result", result)

    Threshold Image:

    Morphology Image:

    count: 48
    circle #: 1 center: (77, 3151)
    circle #: 2 center: (78, 3087)
    circle #: 3 center: (77, 3021)
    circle #: 4 center: (76, 2959)
    circle #: 5 center: (79, 2892)
    circle #: 6 center: (77, 2830)
    circle #: 7 center: (77, 2762)
    circle #: 8 center: (78, 2695)
    circle #: 9 center: (77, 2631)
    circle #: 10 center: (76, 2566)
    circle #: 11 center: (78, 2500)
    circle #: 12 center: (77, 2435)
    circle #: 13 center: (77, 2373)
    circle #: 14 center: (77, 2301)
    circle #: 15 center: (79, 2238)
    circle #: 16 center: (79, 2178)
    circle #: 17 center: (77, 2108)
    circle #: 18 center: (78, 2045)
    circle #: 19 center: (80, 1980)
    circle #: 20 center: (78, 1913)
    circle #: 21 center: (78, 1848)
    circle #: 22 center: (80, 1786)
    circle #: 23 center: (77, 1722)
    circle #: 24 center: (77, 1657)
    circle #: 25 center: (79, 1593)
    circle #: 26 center: (79, 1524)
    circle #: 27 center: (80, 1461)
    circle #: 28 center: (77, 1395)
    circle #: 29 center: (79, 1332)
    circle #: 30 center: (76, 1265)
    circle #: 31 center: (80, 1203)
    circle #: 32 center: (73, 1136)
    circle #: 33 center: (77, 1072)
    circle #: 34 center: (80, 1007)
    circle #: 35 center: (77, 944)
    circle #: 36 center: (78, 878)
    circle #: 37 center: (75, 815)
    circle #: 38 center: (77, 747)
    circle #: 39 center: (77, 684)
    circle #: 40 center: (79, 618)
    circle #: 41 center: (77, 554)
    circle #: 42 center: (78, 488)
    circle #: 43 center: (80, 423)
    circle #: 44 center: (78, 359)
    circle #: 45 center: (77, 293)
    circle #: 46 center: (78, 232)
    circle #: 47 center: (77, 165)
    circle #: 48 center: (78, 102)


    This solution is probably less sensitive to color of ink. Just threshold on white using cv2.inRange() then invert.

    import cv2
    import numpy as np
    # read image
    img = cv2.imread('omr_sheet.jpg')
    h, w = img.shape[:2]
    # trim 15 from bottom and 5 from right to remove partial answer and extraneous red
    img = img[0:h-15, 0:w-5]
    # threshold on white color
    thresh = cv2.inRange(img, lower, upper)
    thresh = 255 - thresh
    # apply morphology close
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
    morph = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7,7))
    morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
    # get contours
    result = img.copy() 
    contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    contours = contours[0] if len(contours) == 2 else contours[1]
    print("count:", len(contours))
    i = 1
    for cntr in contours:
        M = cv2.moments(cntr)
        cx = int(M["m10"] / M["m00"])
        cy = int(M["m01"] / M["m00"]), (cx, cy), 20, (0, 255, 0), -1)
        pt = (cx,cy)
        print("circle #:",i, "center:",pt)
        i = i + 1
    # save results
    # show results
    cv2.imshow("thresh", thresh)
    cv2.imshow("morph", morph)
    cv2.imshow("result", result)

    Threshold Image:

    Morphology Image:

    count: 48
    circle #: 1 center: (78, 3150)
    circle #: 2 center: (77, 3087)
    circle #: 3 center: (78, 3020)
    circle #: 4 center: (77, 2959)
    circle #: 5 center: (78, 2892)
    circle #: 6 center: (77, 2826)
    circle #: 7 center: (77, 2760)
    circle #: 8 center: (78, 2694)
    circle #: 9 center: (79, 2630)
    circle #: 10 center: (78, 2567)
    circle #: 11 center: (78, 2501)
    circle #: 12 center: (77, 2435)
    circle #: 13 center: (75, 2373)
    circle #: 14 center: (78, 2310)
    circle #: 15 center: (78, 2240)
    circle #: 16 center: (77, 2176)
    circle #: 17 center: (78, 2106)
    circle #: 18 center: (77, 2042)
    circle #: 19 center: (79, 1979)
    circle #: 20 center: (78, 1912)
    circle #: 21 center: (78, 1848)
    circle #: 22 center: (79, 1785)
    circle #: 23 center: (78, 1722)
    circle #: 24 center: (78, 1656)
    circle #: 25 center: (78, 1589)
    circle #: 26 center: (78, 1525)
    circle #: 27 center: (79, 1458)
    circle #: 28 center: (78, 1396)
    circle #: 29 center: (78, 1328)
    circle #: 30 center: (77, 1264)
    circle #: 31 center: (78, 1201)
    circle #: 32 center: (76, 1135)
    circle #: 33 center: (78, 1071)
    circle #: 34 center: (78, 1006)
    circle #: 35 center: (77, 944)
    circle #: 36 center: (77, 878)
    circle #: 37 center: (77, 814)
    circle #: 38 center: (77, 746)
    circle #: 39 center: (78, 683)
    circle #: 40 center: (77, 618)
    circle #: 41 center: (77, 553)
    circle #: 42 center: (78, 488)
    circle #: 43 center: (78, 423)
    circle #: 44 center: (77, 359)
    circle #: 45 center: (76, 293)
    circle #: 46 center: (79, 232)
    circle #: 47 center: (75, 165)
    circle #: 48 center: (79, 101)