one problem with optical character recognition (ocr) is it can't recognize numbers properly when numbers are inside square boxes. one failure example with tesseract is discussed here : Tesseract - How can I recognize numbers in box? i was testing with paddleocr here : https://www.paddlepaddle.org.cn/hub/scene/ocr you can quickly try that api too,,for this input image :
again when i try image like this :
it returns all the numbers successfully.most of the times these number recognition(both printed and handwritten) failing when they are inside square boxes.for recognizing numbers inside square boxes we need to convert these so called numbers in box image into numbers in image by removing all the square boxes. i have some images like below :
see, the full square box outside numbers are not fully visible,,only some part of the square boxes are visible.i want to convert these images into image where i will have only the numbers by removing square boxes or some part of square boxes that is present in these images after then hopefully number/digit recognition will work. i tried this code :
import cv2
import numpy as np
import matplotlib.pyplot as plt
img = cv2.imread('/content/21.png')
gray=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
linek = np.zeros((11,11),dtype=np.uint8)
linek[...,5]=1
x=cv2.morphologyEx(gray, cv2.MORPH_OPEN, linek ,iterations=800)
gray-=x
plt.imshow(gray)
cv2.imwrite('21_output.jpg', gray)
output :
also tried this code :
import cv2
import numpy as np
import matplotlib.pyplot as plt
#https://stackoverflow.com/questions/57961119/how-to-remove-all-the-detected-lines-from-the-original-image-using-python
image = cv2.imread('/content/17.png')
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Remove vertical
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,10))
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(image, [c], -1, (255,255,255), 2)
image = thresh - detected_lines
plt.imshow( image)
output :
unfortunately,it's not able to remove the unwanted lines completely.when it removes unwanted lines,it removes part of original digit/numbers as well. how can i remove those complete or incomplete square boxes outside each number in image? thanks in advance.
the code below for me is doing decent job but it's hyper parameter sensitive :
import cv2
import imutils
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import pyplot as plt
def square_number_box_denoiser(image_path="/content/9.png",is_resize = False, resize_width = 768):
'''
ref : https://pretagteam.com/question/removing-horizontal-lines-in-image-opencv-python-matplotlib
Args :
image_path (str) : path of the image containing numbers/digits inside square box
is_resize (int) : whether to resize the input image or not? default : False
resize_width (int) : resizable image width for resizing the image by maintaining aspect ratio. default : 768
'''
img=cv2.imread(image_path)
if(is_resize):
print("resizing...")
img = imutils.resize(img, width=resize_width)
image = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Remove horizontal
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25,1))
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(image, [c], -1, (255,255,255), 2)
# Repair image
repair_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,6))
result = 255 - cv2.morphologyEx(255 - image, cv2.MORPH_CLOSE, repair_kernel, iterations=2)
# create figure
fig = plt.figure(figsize=(20, 20))
# setting values to rows and column variables
rows = 3
columns = 3
fig.add_subplot(rows, columns, 1)
plt.imshow(img)
fig.add_subplot(rows, columns, 2)
plt.imshow(thresh)
fig.add_subplot(rows, columns, 3)
plt.imshow(detected_lines)
fig.add_subplot(rows, columns, 4)
plt.imshow(image)
fig.add_subplot(rows, columns, 5)
plt.imshow(result)
result = cv2.rotate(result,cv2.ROTATE_90_COUNTERCLOCKWISE)
fig.add_subplot(rows, columns, 6)
plt.imshow(result)
cv2.imwrite("result.jpg", result)
plt.show()
without resizing :
with 768 resizing :