Search code examples
pythonopencvhough-transform

Converting graphs from a scanned document into data


I'm currently trying to write something that can extract data from some uncommon graphs in a book. I scanned the pages of the book, and by using opencv I would like to detect some features from the graphs in order to convert it into useable data. In the left graph I'm looking for the height of the "triangles" and in the right graph the distance from the center to the points where the dotted lines intersect with the gray area. In both cases I would like to convert these values into numeric data for further usage.

enter image description here

The first thing I thought of was detecting the lines of the charts, in the hopes I could somehow measure their length or position. For this I'm using the Hough Line Transform. The following snippet of code shows how far I've gotten already.

import numpy as np
import cv2

# Reading the image
img = cv2.imread('test2.jpg')
# Convert the image to grayscale
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# Apply edge detection
edges = cv2.Canny(gray,50,150,apertureSize = 3)

# Line detection
lines = cv2.HoughLinesP(edges,1,np.pi/180,100,minLineLength=50,maxLineGap=20)

for line in lines:
    x1,y1,x2,y2 = line[0]
    cv2.line(img,(x1,y1),(x2,y2),(0,0,255),2)

cv2.imwrite('linesDetected.jpg',img)

The only problem is that this detection algorithm is not accurate at all. At least not for me. And in order to extract some data from the charts, the detection of the lines should be somewhat accurate. Is their any way I could do this? Or is my strategy to detect lines just wrong in the first place? Should I maybe start with detecting something else, like circles,object sizes, contours or colors?


Solution

  • Using color segmentation is an easy way to convert this graph to data. This method does require some manual annotation. After the graph is segmented, count the pixels for each color. Check out the 'watershed' demo in the demo files that are included in the OpenCV library:

    segmenting

    import numpy as np
    import cv2 as cv
    from common import Sketcher
    
    class App:
        def __init__(self, fn):
            self.img = cv.imread(fn)
            self.img = cv.resize(self.img, (654,654))
            h, w = self.img.shape[:2]
            self.markers = np.zeros((h, w), np.int32)
            self.markers_vis = self.img.copy()
            self.cur_marker = 1
            self.colors = np.int32( list(np.ndindex(2, 2, 3)) ) * 123
            self.auto_update = True
            self.sketch = Sketcher('img', [self.markers_vis, self.markers], self.get_colors)
    
        def get_colors(self):
            return list(map(int, self.colors[self.cur_marker])), self.cur_marker
    
        def watershed(self):
            m = self.markers.copy()
            cv.watershed(self.img, m)
            cv.imshow('img', self.img)        
            overlay = self.colors[np.maximum(m, 0)]
            vis = cv.addWeighted(self.img, 0.5, overlay, 0.5, 0.0, dtype=cv.CV_8UC3)
            cv.imshow('overlay', np.array(overlay, np.uint8))
            cv.imwrite('/home/stephen/Desktop/overlay.png', np.array(overlay, np.uint8))
            cv.imshow('watershed', vis)
    
        def run(self):
            while cv.getWindowProperty('img', 0) != -1 or cv.getWindowProperty('watershed', 0) != -1:
                ch = cv.waitKey(50)
                if ch >= ord('1') and ch <= ord('9'):
                    self.cur_marker = ch - ord('0')
                    print('marker: ', self.cur_marker)
                if self.sketch.dirty and self.auto_update:
                    self.watershed()
                    self.sketch.dirty = False
                if ch == 27: break
            cv.destroyAllWindows()
    
    
    fn = '/home/stephen/Desktop/test.png'
    App(cv.samples.findFile(fn)).run() 
    

    The output will be an image like this:

    segmentation output

    You can count the pixels for each color using this code:

    # Extract the values from the image
    vals = []
    img = cv.imread('/home/stephen/Desktop/overlay.png')
    # Get the colors in the image
    flat = img.reshape(-1, img.shape[-1])
    colors = np.unique(flat, axis=0)
    # Iterate through the colors (ignore the first and last colors)
    for color in colors[1:-1]:
        a,b,c = color
        lower = a-1, b-1, c-1
        upper = a+1,b+1,c+1
        lower = np.array(lower)
        upper = np.array(upper)
        mask = cv.inRange(img, lower, upper)
        vals.append(sum(sum(mask)))
        cv.imshow('mask', mask)
        cv.waitKey(0)
    cv.destroyAllWindows()
    

    And print out the output data using this code:

    names = ['alcohol', 'esters', 'biter', 'hoppy', 'acid', 'zoetheid', 'mout']
    print(list(zip(names, vals)))
    

    The output is:

    [('alcohol', 22118), ('esters', 26000), ('biter', 16245), ('hoppy', 21170), ('acid', 19156), ('zoetheid', 11090), ('mout', 7167)]