I'm using the algorithm below to segment sentences into words and words into characters. As you can see in the output below the letters 'S' and 'T' in the word 'STAND' are bounded together and I can't understand what i've done wrong, will be glad if you could help me guys.
2.I've already trained a model on the EMNIST letters dataset. My model can predict only one letter at a time. To proceed further, I need to extract each character box into an array of character images. Ultimately, I aim to have an array containing all of the character images. After that, I plan to use my model to predict each character individually.
Additionally, I'll need to resize each character to 28x28 pixels, as the model is trained to predict letters from images of that size. I'm having trouble with doing this.. hope you can help me
import cv2
# Preprocessing
def preProcessing(myImage):
grayImg = cv2.cvtColor(myImage, cv2.COLOR_BGR2GRAY)
# cv2.imshow('Gray Image', grayImg)
# cv2.waitKey()
ret, thresh1 = cv2.threshold(grayImg, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
# cv2.imshow('After threshold', thresh1)
# cv2.waitKey()
print(f'The threshold valua applied to the image is: {ret} ')
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (18, 18))
dilation = cv2.dilate(thresh1, horizontal_kernel, iterations=1)
horizontal_contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
im2 = myImage.copy()
for cnt in horizontal_contours:
x, y, w, h = cv2.boundingRect(cnt)
rect = cv2.rectangle(im2, (x, y), (x + w, y + h), (255, 255, 255), 0)
im2= seg_word(rect)
#im2 = seg_word(rect)
#im2=character_seg(im2)
return im2
# Word segmentation
def seg_word(wordImage):
# convert the input image into gray scale
grayImg = cv2.cvtColor(wordImage, cv2.COLOR_BGR2GRAY)
# Binarize the gray image with OTSU algorithm
ret, thresh2 = cv2.threshold(grayImg, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
#print(ret)
# create a Structuring Element size of 8*10 for the vertical contouring
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 10))
# apply Dilation for once only
dilation = cv2.dilate(thresh2, vertical_kernel, iterations=1)
#fingd the vertical contours
vertical_contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
word_img = wordImage.copy()
# Run through each contour and extract the bounding box
for cnt in vertical_contours:
#computes the minimum rectangle
x, y, w, h = cv2.boundingRect(cnt)
# Draw a rectangular from the top left to the bottom right with the
# given Coordinates x,y and height and width
rect = cv2.rectangle(word_img, (x, y), (x + w, y + h), (0, 255, 0), 0)
# apply a Character Segmentation and return the output Image
word_img= character_seg(rect)
return word_img
# Character segmentation
def character_seg(img):
#conver the input image int gray scale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Threshold the image
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
# Apply morphological erosion to remove small artifacts
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,5))
eroded = cv2.erode(thresh, kernel, iterations=1)
# Apply morphological dilation to expand the characters
dilated = cv2.dilate(eroded, kernel, iterations=3)
# Find contours in the image
contours, hierarchy = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Iterate through each contour and extract the bounding box
for contour in contours:
(x, y, w, h) = cv2.boundingRect(contour)
cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0 ), 2)
return img
# Load the test image
image_path = r"C:\Users\student\Desktop\FinalProject\Flask\uploads\1_lWmB8FGf1uWT6r1TichK-Q-
ezgif.com-webp-to-png-converter.png"
myImage = cv2.imread(image_path)
# Display the image
cv2.imshow('Text Image', myImage)
cv2.waitKey(0)
processed_img = preProcessing(myImage)
cv2.imshow('Text Image', processed_img)
cv2.waitKey(0)
As you can see in the output below the letters 'S' and 'T' in the word 'STAND' are bounded together and I can't understand what i've done wrong, will be glad if you could help me guys.
The problem can be fixed.
Change iterations=3
on line 73:
dilated = cv2.dilate(eroded, kernel, iterations=3)
To:
dilated = cv2.dilate(eroded, kernel, iterations=1) #Change index to 1
Screenshot: