I am trying to locate the X&Y of all horizontal lines in a PDF document. I was using the code here: code to detect horizontal lines
This code marks the horizontal lines verywell but I am not able to extract their coordinates in the document.
This is my code:
def DetectLine(pageNum):
# Convert to grayscale and adaptive threshold to obtain a binary image
img = cv2.imread(outpath + 'page_' + str(pageNum) + '.jpg')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Then we create a kernel and perform morphological transformations to isolate horizontal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15,1))
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
# detect lines find contours and draw the result
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(img, [c], -1, (36,255,12), 3)
cv2.imshow('image_' + str(pageNum), img)
This function gets the pagenumber and reads a pre-prepared JPG of the specific page.
How can I return the Xs & Ys?
If only need the points: you can extract it with:
Point 1: c[0][0]
or cnts[num]c[0][0]
Point 2: c[1][0]
or cnts[num]c[1][0]
where num is the index of the contour
Middle point The solution will be:
(cnts[0][1][0][0]+cnts[0][0][0][0])//2,cnts[0][0][0][1]
Since each line or countour for get has two points, you can calculate the middle point with the average formula. e.g: x1=10 and x2=90, the middle point then is (10+90)/2
Here is the complete code:
import cv2
image = cv2.imread('2.png')
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15,1))
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
x,y=(c[1][0][0]+c[0][0][0])//2,c[0][0][1]
print(f'The middle point is: x: {x}, y: {y}')
cv2.drawContours(image, [c], -1, (36,255,12), 3)
cv2.circle(image, (x,y), radius=5, color=(0, 0, 255), thickness=-1)
cv2.imshow('thresh', thresh)
cv2.imshow('detected_lines', detected_lines)
cv2.imshow('image', image)
cv2.waitKey()