python opencv image-processing computer-vision omr

Using python OpenCV to crop an image based on reference marks

I have an image I would like to crop based on reference marks of the image which are black squares at the layout margins.

While my code can detect the reference marks, there seem to be a persistent error which I can't seem to get around: I cannot get the exact accurate co-ordinates of the reference marks to crop the image perfectly in such a way that the reference marks are situated in the corner of the cropped image without "gaps" at the edges. The original image is shown below:

The code I use to auto-extract the region of interest is given below:

import os
import cv2
import imutils
import numpy as np
from PIL import Image
from matplotlib import pyplot as plt




img_path = "template_page_1-min.png"
img = cv2.imread(img_path)
template = img.copy()
sharpen_kernel = np.array([[0, -1, 0], [-1, 4, -1], [0, -1, 0]])
sharpened = cv2.filter2D(template.copy(), -1, sharpen_kernel)

gray = cv2.cvtColor(sharpened, cv2.COLOR_BGR2GRAY)

# Syntax is dest_img = cv2.bilateralFilter(src_image, diameter of
# pixel, sigmaColor, sigmaSpace). You can increase the sigma color
# and sigma space from 17 to higher values to blur out more
# background information, but be careful that the useful part does
# not get blurred.
bfilter = cv2.bilateralFilter(gray, 11, 65, 65)  # Noise reduction

hsv = cv2.cvtColor(np.stack((bfilter.copy(),) * 3, axis=-1),
                   cv2.COLOR_BGR2HSV)

# set the bounds for the gray hue
lower_gray = np.array([0, 0, 100])
upper_gray = np.array([255, 5, 255])

mask_grey = cv2.inRange(hsv, lower_gray, upper_gray)

# Build mask of non-black pixels.
nzmask = cv2.inRange(hsv, (0, 0, 5), (255, 255, 255))

# Erode the mask - all pixels around a black pixels should not be masked
nzmask = cv2.erode(nzmask, np.ones((3, 3)))
mask_grey = mask_grey & nzmask

template[np.where(mask_grey)] = 255

template = cv2.cvtColor(template.copy(), cv2.COLOR_BGR2RGB)

gray_processed = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)

# Only the edges that have an intensity gradient more than the
# minimum threshold value and less than the maximum threshold value
# will be displayed
edged = cv2.Canny(gray_processed.copy(), 40, 250)

adapt_thresh = cv2.adaptiveThreshold(
    edged.copy(),
    255, # maximum value assigned to pixel values exceeding the threshold
    cv2.ADAPTIVE_THRESH_GAUSSIAN_C,  # gaussian weighted sum of neighborhood
    cv2.THRESH_BINARY_INV,  # thresholding type
    11, # block size (51x51 window)
    2) # constant

# Apply some dilation and erosion to join the gaps -
# Change iteration to detect more or less area's
# adapt_thresh = cv2.dilate(adapt_thresh, None, iterations = 9)
# adapt_thresh = cv2.erode(adapt_thresh, None, iterations = 10)

adapt_thresh = cv2.dilate(adapt_thresh, None, iterations=5)
adapt_thresh = cv2.erode(adapt_thresh, None, iterations=5)
contours, hierarchy = cv2.findContours(
    adapt_thresh, 
    cv2.RETR_EXTERNAL,
    cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:20]

working_image = template.copy()
idx = 0
font = cv2.FONT_HERSHEY_COMPLEX

min_x, min_y, max_x, max_y = 0, 0, 0, 0
coord_matrix = None

# loop over our contours
for contour in contours:
    # approximate the contour
    area = cv2.contourArea(contour)
    x, y, w, h = cv2.boundingRect(contour)
    perimeter = cv2.arcLength(contour, True)
    approx = cv2.approxPolyDP(contour, 0.09 * perimeter, True)
    
    # p1:top-left, p2:bottom-left, p3:bottom-right, p4:top-right
    
    
    
    if len(approx) == 4 and 4500 < area < 4900: #
        
        coord = np.matrix([[x, y, x+w, y+h]])
        if coord_matrix is not None:
            coord_matrix = np.vstack((coord_matrix, coord))               
        else:
            coord_matrix = coord.copy()  

        _ = cv2.rectangle(working_image,(x,y),(x+w,y+h),(0,255,0),2)  
        cv2.putText(working_image,str(area) , (x, y), font, 1, (0,0,255))
        #cv2.putText(cleaned_bg_img, str(idx) + ":" +str(x) + ","+str(y), (x, y), font, 1, (0,0,255))
        
        idx+=1

 
start_x = np.min(coord_matrix[:,0])
start_y = np.min(coord_matrix[:,1])
end_x = np.max(coord_matrix[:,2])
end_y = np.max(coord_matrix[:,3])

roi_interest = img.copy()[start_y:end_y, start_x:end_x]

aligned_with_border = cv2.copyMakeBorder(
        roi_interest.copy(), 
        top = 1, 
        bottom = 1, 
        left = 1, 
        right = 1, 
        borderType = cv2.BORDER_CONSTANT,
        value=(0,0,0)
    )


plt.figure(figsize = (11.69*2,8.27*2))
plt.axis('off')
plt.imshow(aligned_with_border);

However, the resulting image has gaps between the reference marks and the image margins as shown in the image below by the red arrows at the top corners of the image.

My question is, given the code that I have shared, how do I ensure that I get no spaces between the reference marks and the image margins? That the reference marks are perfectly aligned to the image edge with no gaps?

Solution

Your approach is quite complicated. This is a simple problem and the best thing to do would be to find a simple solution to it.

Column-wise sum to get the first and last column where there is information. From these columns, get the row-wise sum and do the same. See the following plot, where the red line is the first and last column and the green first and last row:

I added some margins to make sure that there is some leeway in case the image is a bit skewed. Here is the code and the resulting image:

import cv2
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
im = cv2.imread("Form.jpg", cv2.IMREAD_GRAYSCALE)
imOTSU = cv2.threshold(im, 0, 1, cv2.THRESH_OTSU + cv2.THRESH_BINARY_INV)[1]
rowSum = np.sum(imOTSU, axis=1)
colSum = np.sum(imOTSU, axis=0)
xfirstNonZero = np.argmax(colSum != 0) # find last 0 in the first occurances of 0
xlastNonZero = len(colSum) - np.argmax(colSum[::-1] != 0) - 1 # find first 0 in the last occurances of 0
FirstColum = imOTSU[:,xfirstNonZero+30] # sample image on first non zero with some margins
LastColum = imOTSU[:,xlastNonZero-30] # and last non zero
# repeat the same, finding first non zero and last non zero
yfirstNonZero = np.argmax(FirstColum != 0) # find last 0 in the first occurances of 0
ylastNonZero = len(LastColum) - np.argmax(LastColum[::-1] != 0) - 1 # find first 0 in the last occurances of 0
cv2.imwrite("Cropped.jpg", im[yfirstNonZero:ylastNonZero, xfirstNonZero:xlastNonZero]) # save

Resulting image:

Note that there is a very small space that eventually disappears on either side of the cropped image. This can be solved by either deskewing or a four-point transform.