Search code examples
pythonopencvtensorflowobject-detection-api

Extracting bounding box as .jpg


Extract the detected object along with the bounding box and save it as an image on my disk.

I have taken the code of Edge Electronics and successfully trained and tested the model. I got the bounding box on my images.

import os
import cv2
import numpy as np
import tensorflow as tf
import sys
from glob import glob
import glob
import csv
from PIL import Image
import json

sys.path.append("..")

# Import utilites
from utils import label_map_util
from utils import visualization_utils as vis_util

MODEL_NAME = 'inference_graph'

CWD_PATH = os.getcwd()

PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,'frozen_inference_graph.pb')

PATH_TO_LABELS = os.path.join(CWD_PATH,'training','labelmap.pbtxt')

PATH_TO_IMAGE = list(glob.glob("C:\\new_multi_cat\\models\\research\\object_detection\\img_test\\*jpeg"))

NUM_CLASSES = 3

label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

detection_graph = tf.Graph()

with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

    sess = tf.Session(graph=detection_graph)

image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')


detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')

num_detections = detection_graph.get_tensor_by_name('num_detections:0')


for paths in range(len(PATH_TO_IMAGE)):
    image = cv2.imread(PATH_TO_IMAGE[paths])
    image_expanded = np.expand_dims(image, axis=0)

    (boxes, scores, classes, num) = sess.run([detection_boxes, detection_scores, detection_classes, num_detections],feed_dict={image_tensor: image_expanded})


    vis_util.visualize_boxes_and_labels_on_image_array(
    image,
    np.squeeze(boxes),
    np.squeeze(classes).astype(np.int32),
    np.squeeze(scores),
    category_index,
    use_normalized_coordinates=True,
    line_thickness=4,
    min_score_thresh=0.80)


    white_bg_img = 255*np.ones(PATH_TO_IMAGE[paths].shape, np.uint8)
    vis_util.draw_bounding_boxes_on_image(
    white_bg_img ,
    np.squeeze(boxes),
    color='red',
    thickness=4)
    cv2.imwrite("bounding_boxes.jpg", white_bg_img)

    boxes = np.squeeze(boxes)
    for i in range(len(boxes)):
        box[0]=box[0]*height
        box[1]=box[1]*width
        box[2]=box[2]*height
        box[3]=box[3]*width
    roi = image[box[0]:box[2],box[1]:box[3]].copy()
    cv2.imwrite("box_{}.jpg".format(str(i)), roi)

This is the error I am getting:

Traceback (most recent call last):   File "objd_1.py", line
75, in <module>
     white_bg_img = 255*np.ones(PATH_TO_IMAGE[paths].shape, np.uint8) AttributeError: 'str' object has no attribute 'shape' 

I have searched a lot but not able to identify what is wrong in my code. Why am I not able to extract the detected region as an image?


Solution

  • You try to take shape from a file name instead of the image. Replace

    white_bg_img = 255*np.ones(PATH_TO_IMAGE[paths].shape, np.uint8)
    

    to

    white_bg_img = 255*np.ones(image.shape, np.uint8)
    

    Edit: corrected code

    import os
    import cv2
    import numpy as np
    import tensorflow as tf
    import sys
    from glob import glob
    import glob
    import csv
    from PIL import Image
    import json
    
    sys.path.append("..")
    
    # Import utilites
    from utils import label_map_util
    from utils import visualization_utils as vis_util
    
    MODEL_NAME = 'inference_graph'
    
    CWD_PATH = os.getcwd()
    
    PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,'frozen_inference_graph.pb')
    
    PATH_TO_LABELS = os.path.join(CWD_PATH,'training','labelmap.pbtxt')
    
    PATH_TO_IMAGE = list(glob.glob("C:\\new_multi_cat\\models\\research\\object_detection\\img_test\\*jpeg"))
    
    NUM_CLASSES = 3
    
    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)
    
    detection_graph = tf.Graph()
    
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')
    
        sess = tf.Session(graph=detection_graph)
    
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
    
    detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
    
    
    detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
    detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
    
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')
    
    
    for paths in range(len(PATH_TO_IMAGE)):
        image = cv2.imread(PATH_TO_IMAGE[paths])
        image_expanded = np.expand_dims(image, axis=0)
    
        (boxes, scores, classes, num) = sess.run([detection_boxes, detection_scores, detection_classes, num_detections],feed_dict={image_tensor: image_expanded})
    
    
        vis_util.visualize_boxes_and_labels_on_image_array(
        image,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        use_normalized_coordinates=True,
        line_thickness=4,
        min_score_thresh=0.80)
    
    
        white_bg_img = 255*np.ones(image.shape, np.uint8)
        vis_util.draw_bounding_boxes_on_image_array(
        white_bg_img ,
        np.squeeze(boxes),
        color='red',
        thickness=4)
        cv2.imwrite("bounding_boxes.jpg", white_bg_img)
    
        boxes = np.squeeze(boxes)
        for i in range(len(boxes)):
            box[0]=box[0]*height
            box[1]=box[1]*width
            box[2]=box[2]*height
            box[3]=box[3]*width
        roi = image[box[0]:box[2],box[1]:box[3]].copy()
        cv2.imwrite("box_{}.jpg".format(str(i)), roi)