Search code examples
pythonimageopencvimage-processingscikit-image

Straighten largest line in image


I have a large number of images of food items on a trey, the tray is the largest thing in the picture and it always contains the largest line. i.e. Input image I had to Photoshop out the items on the trey

i want a script that can align and straighten the trey, like so: expected output

Is this possible using python?


Solution

  • Here's an approach

    • Convert image to grayscale
    • Otsu's threshold to get a binary image
    • Find contours and filter using approximate contour
    • Perspective transform for top-down view
    • Rotate image to get correct orientation

    After converting to grayscale, we Otsu's threshold to obtain a binary image

    Now we find contours on this image and filter using cv2.arcLength() and cv2.approxPolyDP(). The idea is that if the contour has approximately 4 corners, then it must be our desired object. Additional filtering steps could be to use cv2.contourArea() to ensure that only the largest contour is used. Here's the detected contour

    From here we perform a perspective transform to get a top-down view

    Finally we rotate the image depending on the desired orientation. Here's the result

    import cv2
    import numpy as np
    import imutils
    
    def perspective_transform(image, corners):
        def order_corner_points(corners):
            # Separate corners into individual points
            # Index 0 - top-right
            #       1 - top-left
            #       2 - bottom-left
            #       3 - bottom-right
            corners = [(corner[0][0], corner[0][1]) for corner in corners]
            top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
            return (top_l, top_r, bottom_r, bottom_l)
    
        # Order points in clockwise order
        ordered_corners = order_corner_points(corners)
        top_l, top_r, bottom_r, bottom_l = ordered_corners
    
        # Determine width of new image which is the max distance between 
        # (bottom right and bottom left) or (top right and top left) x-coordinates
        width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
        width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
        width = max(int(width_A), int(width_B))
    
        # Determine height of new image which is the max distance between 
        # (top right and bottom right) or (top left and bottom left) y-coordinates
        height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
        height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
        height = max(int(height_A), int(height_B))
    
        # Construct new points to obtain top-down view of image in 
        # top_r, top_l, bottom_l, bottom_r order
        dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1], 
                        [0, height - 1]], dtype = "float32")
    
        # Convert to Numpy format
        ordered_corners = np.array(ordered_corners, dtype="float32")
    
        # Find perspective transform matrix
        matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
    
        # Return the transformed image
        return cv2.warpPerspective(image, matrix, (width, height))
    
    image = cv2.imread('1.jpg')
    original = image.copy()
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    
    cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    
    for c in cnts:
        peri = cv2.arcLength(c, True)
        approx = cv2.approxPolyDP(c, 0.015 * peri, True)
        if len(approx) == 4:
            cv2.drawContours(image,[c], 0, (36,255,12), 3)
            transformed = perspective_transform(original, approx)
    
    rotated = imutils.rotate_bound(transformed, angle=-90)
    
    cv2.imshow('thresh', thresh)
    cv2.imshow('image', image)
    cv2.imshow('transformed', transformed)
    cv2.imshow('rotated', rotated)
    cv2.waitKey()