python opencv tensorflow object-detection-api

Extracting bounding box as .jpg

Extract the detected object along with the bounding box and save it as an image on my disk.

I have taken the code of Edge Electronics and successfully trained and tested the model. I got the bounding box on my images.

import os
import cv2
import numpy as np
import tensorflow as tf
import sys
from glob import glob
import glob
import csv
from PIL import Image
import json

sys.path.append("..")

# Import utilites
from utils import label_map_util
from utils import visualization_utils as vis_util

MODEL_NAME = 'inference_graph'

CWD_PATH = os.getcwd()

PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,'frozen_inference_graph.pb')

PATH_TO_LABELS = os.path.join(CWD_PATH,'training','labelmap.pbtxt')

PATH_TO_IMAGE = list(glob.glob("C:\\new_multi_cat\\models\\research\\object_detection\\img_test\\*jpeg"))

NUM_CLASSES = 3

label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

detection_graph = tf.Graph()

with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

    sess = tf.Session(graph=detection_graph)

image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')


detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')

num_detections = detection_graph.get_tensor_by_name('num_detections:0')


for paths in range(len(PATH_TO_IMAGE)):
    image = cv2.imread(PATH_TO_IMAGE[paths])
    image_expanded = np.expand_dims(image, axis=0)

    (boxes, scores, classes, num) = sess.run([detection_boxes, detection_scores, detection_classes, num_detections],feed_dict={image_tensor: image_expanded})


    vis_util.visualize_boxes_and_labels_on_image_array(
    image,
    np.squeeze(boxes),
    np.squeeze(classes).astype(np.int32),
    np.squeeze(scores),
    category_index,
    use_normalized_coordinates=True,
    line_thickness=4,
    min_score_thresh=0.80)


    white_bg_img = 255*np.ones(PATH_TO_IMAGE[paths].shape, np.uint8)
    vis_util.draw_bounding_boxes_on_image(
    white_bg_img ,
    np.squeeze(boxes),
    color='red',
    thickness=4)
    cv2.imwrite("bounding_boxes.jpg", white_bg_img)

    boxes = np.squeeze(boxes)
    for i in range(len(boxes)):
        box[0]=box[0]*height
        box[1]=box[1]*width
        box[2]=box[2]*height
        box[3]=box[3]*width
    roi = image[box[0]:box[2],box[1]:box[3]].copy()
    cv2.imwrite("box_{}.jpg".format(str(i)), roi)

This is the error I am getting:

Traceback (most recent call last):   File "objd_1.py", line
75, in <module>
     white_bg_img = 255*np.ones(PATH_TO_IMAGE[paths].shape, np.uint8) AttributeError: 'str' object has no attribute 'shape'

I have searched a lot but not able to identify what is wrong in my code. Why am I not able to extract the detected region as an image?

Solution

You try to take shape from a file name instead of the image. Replace

white_bg_img = 255*np.ones(PATH_TO_IMAGE[paths].shape, np.uint8)

white_bg_img = 255*np.ones(image.shape, np.uint8)

Edit: corrected code

import os
import cv2
import numpy as np
import tensorflow as tf
import sys
from glob import glob
import glob
import csv
from PIL import Image
import json

sys.path.append("..")

# Import utilites
from utils import label_map_util
from utils import visualization_utils as vis_util

MODEL_NAME = 'inference_graph'

CWD_PATH = os.getcwd()

PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,'frozen_inference_graph.pb')

PATH_TO_LABELS = os.path.join(CWD_PATH,'training','labelmap.pbtxt')

PATH_TO_IMAGE = list(glob.glob("C:\\new_multi_cat\\models\\research\\object_detection\\img_test\\*jpeg"))

NUM_CLASSES = 3

label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

detection_graph = tf.Graph()

with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

    sess = tf.Session(graph=detection_graph)

image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')


detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')

num_detections = detection_graph.get_tensor_by_name('num_detections:0')


for paths in range(len(PATH_TO_IMAGE)):
    image = cv2.imread(PATH_TO_IMAGE[paths])
    image_expanded = np.expand_dims(image, axis=0)

    (boxes, scores, classes, num) = sess.run([detection_boxes, detection_scores, detection_classes, num_detections],feed_dict={image_tensor: image_expanded})


    vis_util.visualize_boxes_and_labels_on_image_array(
    image,
    np.squeeze(boxes),
    np.squeeze(classes).astype(np.int32),
    np.squeeze(scores),
    category_index,
    use_normalized_coordinates=True,
    line_thickness=4,
    min_score_thresh=0.80)


    white_bg_img = 255*np.ones(image.shape, np.uint8)
    vis_util.draw_bounding_boxes_on_image_array(
    white_bg_img ,
    np.squeeze(boxes),
    color='red',
    thickness=4)
    cv2.imwrite("bounding_boxes.jpg", white_bg_img)

    boxes = np.squeeze(boxes)
    for i in range(len(boxes)):
        box[0]=box[0]*height
        box[1]=box[1]*width
        box[2]=box[2]*height
        box[3]=box[3]*width
    roi = image[box[0]:box[2],box[1]:box[3]].copy()
    cv2.imwrite("box_{}.jpg".format(str(i)), roi)