Search code examples
pythonopencvtextdetectionroi

Extract image from ROI (OpenCV)


Given the following code (python)...

# Import the modules
import cv2
from sklearn.externals import joblib
from skimage.feature import hog
import numpy as np
from scipy import ndimage
import PIL
from PIL import Image

# Load the classifier
clf = joblib.load("digits_cls.pkl")

# Read the input image 
im = cv2.imread("C:\\Users\\Wkgrp\\Desktop\\test.jpg")

# Convert to grayscale and apply Gaussian filtering
im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
im_gray = cv2.GaussianBlur(im_gray, (5, 5), 0)

# Threshold the image
ret, im_th = cv2.threshold(im_gray, 90, 255, cv2.THRESH_BINARY_INV)

# Find contours in the image
image, ctrs, hier = cv2.findContours(im_th.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Get rectangles contains each contour
rects = [cv2.boundingRect(ctr) for ctr in ctrs]


# For each rectangular region, calculate HOG features and predict
# the digit using Linear SVM.
for rect in rects:
    # Draw the rectangles
    cv2.rectangle(im, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0), 3) 
    # Make the rectangular region around the digit
    leng = int(rect[3] * 1.6)
    pt1 = int(rect[1] + rect[3] // 2 - leng // 2)
    pt2 = int(rect[0] + rect[2] // 2 - leng // 2)
    roi = im_th[pt1:pt1+leng, pt2:pt2+leng]
    # Resize the image
    roi = cv2.resize(roi, (28, 28), interpolation=cv2.INTER_AREA)
    roi = cv2.dilate(roi, (3, 3))

    # Calculate the HOG features - Number Recognition (Not to print...)
    #roi_hog_fd = hog(roi, orientations=9, pixels_per_cell=(14, 14), cells_per_block=(1, 1), visualise=False)
    #nbr = clf.predict(np.array([roi_hog_fd], 'float64'))
    #cv2.putText(im, str(int(nbr[0])), (rect[0], rect[1]),cv2.FONT_HERSHEY_DUPLEX, 2, (0, 255, 255), 3)


#cv2.imshow("Resulting Image with Rectangular ROIs", im)
#cv2.waitKey()
#cv2.imwrite("C:\\Users\\Wkgrp\\Desktop\\crop\\img_with_ROI.jpg",im)
#cv2.imwrite("C:\\Users\\Wkgrp\\Desktop\\crop\\img_threshold.jpg",im_th)
cv2.imwrite("C:\\Users\\Wkgrp\\Desktop\\crop\\.jpg",roi)

print("NO ERRORS")

and the image used into...

Test Image

I can perform ROI and save it. The problem is that the code saves only the first digit (maybe because of the "for rects" at line 32). What I have to modify to save all the chars recognized (that with the bounding box around)?

Also, think about 10 of the example image. I have to save all of them in a folder, each with the different filename (automatically). How to do it?

Thank you


Solution

  • Here is a code that answer the request. The only thing is that it doesn't order the chars in a specific way but how it recognize them.

    # Import the modules
    import cv2
    from sklearn.externals import joblib
    from skimage.feature import hog
    import numpy as np
    from scipy import ndimage
    import PIL
    from PIL import Image
    
    # Load the classifier
    clf = joblib.load("digits_cls.pkl")
    
    # Read the input image 
    im = cv2.imread("C:\\Users\\Bob\\Desktop\\causale.jpg")
    
    # Convert to grayscale and apply Gaussian filtering
    im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    im_gray = cv2.GaussianBlur(im_gray, (5, 5), 0)
    
    # Threshold the image
    ret, im_th = cv2.threshold(im_gray, 90, 255, cv2.THRESH_BINARY_INV)
    
    # Find contours in the image
    image, ctrs, hier = cv2.findContours(im_th.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Get rectangles contains each contour
    rects = [cv2.boundingRect(ctr) for ctr in ctrs]
    
    idx =0
    
    for ctr in ctrs:
        idx += 1
        x,y,w,h = cv2.boundingRect(ctr)
        roi=im[y:y+h,x:x+w]
        cv2.imwrite('C:\\Users\\Bob\\Desktop\\crop\\' + str(idx) + '.jpg', roi)
        #cv2.rectangle(im,(x,y),(x+w,y+h),(200,0,0),2)
        #cv2.imshow('img',roi)
        #cv2.waitKey(0) 
    
    '''
    # For each rectangular region, calculate HOG features and predict
    # the digit using Linear SVM.
    for rect in rects:
        # Draw the rectangles
        cv2.rectangle(im, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0), 3) 
        # Make the rectangular region around the digit
        leng = int(rect[3] * 1.6)
        pt1 = int(rect[1] + rect[3] // 2 - leng // 2)
        pt2 = int(rect[0] + rect[2] // 2 - leng // 2)
        roi = im_th[pt1:pt1+leng, pt2:pt2+leng]
        # Resize the image
        roi = cv2.resize(roi, (28, 28), interpolation=cv2.INTER_AREA)
        roi = cv2.dilate(roi, (3, 3))
    
    '''
    
        # Calculate the HOG features - Number Recognition (Not to print...)
        #roi_hog_fd = hog(roi, orientations=9, pixels_per_cell=(14, 14), cells_per_block=(1, 1), visualise=False)
        #nbr = clf.predict(np.array([roi_hog_fd], 'float64'))
        #cv2.putText(im, str(int(nbr[0])), (rect[0], rect[1]),cv2.FONT_HERSHEY_DUPLEX, 2, (0, 255, 255), 3)
    
    
    #cv2.imshow("Resulting Image with Rectangular ROIs", im)
    #cv2.waitKey()
    #cv2.imwrite("C:\\Users\\Bob\\Desktop\\crop\\img_with_ROI.jpg",im)
    #cv2.imwrite("C:\\Users\\Bob\\Desktop\\crop\\img_threshold.jpg",im_th)
    #cv2.imwrite("C:\\Users\\Bob\\Desktop\\crop\\.jpg",roi)
    
    print("NO ERRORS")