I would like to detect the labels in images like this one for the purpose of extracting the text using tesseract. I have tried various combinations of thresholding and using edge detection. However I can only detect about half of the labels at a time at max. These are a few of the images I've been trying to read the labels from:
All of the labels have the same aspect ratio (the width is 3.5 times larger than the height) so I'm trying to find contours that have a minAreaRect with that same aspect ratio. The hard part is handing the labels on the lighter background. This is the code I have so far:
from PIL import Image
import pytesseract
import numpy as np
import argparse
import cv2
import os
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image to be OCR'd")
args = vars(ap.parse_args())
#function to crop an image to a minAreaRect
def crop_minAreaRect(img, rect):
# rotate img
angle = rect[2]
rows,cols = img.shape[0], img.shape[1]
M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)
img_rot = cv2.warpAffine(img,M,(cols,rows))
# rotate bounding box
rect0 = (rect[0], rect[1], 0.0)
box = cv2.boxPoints(rect)
pts = np.int0(cv2.transform(np.array([box]), M))[0]
pts[pts < 0] = 0
# crop
img_crop = img_rot[pts[1][1]:pts[0][1],
pts[1][0]:pts[2][0]]
return img_crop
# load image and apply threshold
image = cv2.imread(args["image"])
bw = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#bw = cv2.threshold(bw, 210, 255, cv2.THRESH_BINARY)[1]
bw = cv2.adaptiveThreshold(bw, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 27, 20)
#do edge detection
v = np.median(bw)
sigma = 0.5
lower = int(max(0, (1.0 - sigma) * v))
upper = int(min(255, (1.0 + sigma) * v))
bw = cv2.Canny(bw, lower, upper)
kernel = np.ones((5,5), np.uint8)
bw = cv2.dilate(bw,kernel,iterations=1)
#find contours
image2, contours, hierarchy = cv2.findContours(bw,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
bw = cv2.drawContours(bw,contours,0,(0,0,255),2)
cv2.imwrite("edge.png", bw)
#test which contours have the correct aspect ratio
largestarea = 0.0
passes = []
for contour in contours:
(x,y),(w,h),a = cv2.minAreaRect(contour)
if h > 20 and w > 20:
if h > w:
maxdim = h
mindim = w
else:
maxdim = w
mindim = h
ratio = maxdim/mindim
print("ratio: {}".format(ratio))
if (ratio > 3.4 and ratio < 3.6):
passes.append(contour)
if not passes:
print "no passes"
exit()
passboxes = []
i = 1
#crop out each label and attemp to extract text
for ps in passes:
rect = cv2.minAreaRect(ps)
bw = crop_minAreaRect(image, rect)
cv2.imwrite("{}.png".format(i), bw)
i += 1
h, w = bw.shape[:2]
print str(h) + "x" + str(w)
if w and h:
bw = cv2.cvtColor(bw, cv2.COLOR_BGR2GRAY)
bw = cv2.threshold(bw, 50, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
cv2.imwrite("output.png", bw)
im = Image.open("output.png")
w, h = im.size
print "W:{} H:{}".format(w,h)
if h > w:
print ("rotating")
im.rotate(90)
im.save("output.png")
print pytesseract.image_to_string(Image.open("output.png"))
im.rotate(180)
im.save("output.png")
print pytesseract.image_to_string(Image.open("output.png"))
box = cv2.boxPoints(cv2.minAreaRect(ps))
passboxes.append(np.int0(box))
im.close()
cnts = cv2.drawContours(image,passboxes,0,(0,0,255),2)
cnts = cv2.drawContours(cnts,contours,-1,(255,255,0),2)
cnts = cv2.drawContours(cnts, passes, -1, (0,255,0), 3)
cv2.imwrite("output2.png", image)
I believe the problem I have could be the parameters for the thresholding. Or I could be over complicating this.
Only the white labels with "A-08337" and such? The following detects all of them on both images:
import numpy as np
import cv2
img = cv2.imread('labels.jpg')
#downscale the image because Canny tends to work better on smaller images
w, h, c = img.shape
resize_coeff = 0.25
img = cv2.resize(img, (int(resize_coeff*h), int(resize_coeff*w)))
#find edges, then contours
canny = cv2.Canny(img, 100, 200)
_, contours, _ = cv2.findContours(canny, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
#draw the contours, do morphological close operation
#to close possible small gaps, then find contours again on the result
w, h, c = img.shape
blank = np.zeros((w, h)).astype(np.uint8)
cv2.drawContours(blank, contours, -1, 1, 1)
blank = cv2.morphologyEx(blank, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8))
_, contours, _ = cv2.findContours(blank, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
#keep only contours of more or less correct area and perimeter
contours = [c for c in contours if 800 < cv2.contourArea(c) < 1600]
contours = [c for c in contours if cv2.arcLength(c, True) < 200]
cv2.drawContours(img, contours, -1, (0, 0, 255), 1)
cv2.imwrite("contours.png", img)
Probably with some additional convexity check you can get rid of the "Verbatim" contours and such (for example, only keep contours with near zero difference between their area and their convex hull's area).