Search code examples
tensorflowimage-processingcomputer-visionobject-detectionfaster-rcnn

Faster RCNN Bounding Box Coordinate


I trained a model using Faster RCNN, this model is used to follow the strips.

here is the output of my model

The python code I use to get this output is as follows:

import cv2
import numpy as np
import tensorflow as tf
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

IMAGE = "test6.JPG"
MODEL_NAME = 'D:/object_detection/inference_graph'
PATH_TO_CKPT = "D:/object_detection/inference_graph/frozen_inference_graph.pb"
PATH_TO_LABELS = "D:/object_detection/training/labelmap.pbtxt"
PATH_TO_IMAGE = "D:/object_detection/images/" + IMAGE
NUM_CLASSES = 2

label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.compat.v1.GraphDef()
    with tf.compat.v2.io.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

    sess = tf.compat.v1.Session(graph=detection_graph)


image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')

num_detections = detection_graph.get_tensor_by_name('num_detections:0')

image = cv2.imread(PATH_TO_IMAGE)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image_expanded = np.expand_dims(image_rgb, axis=0)

(boxes, scores, classes, num) = sess.run(
    [detection_boxes, detection_scores, detection_classes, num_detections],
    feed_dict={image_tensor: image_expanded})

vis_util.visualize_boxes_and_labels_on_image_array(
    image,
    np.squeeze(boxes),
    np.squeeze(classes).astype(np.int32),
    np.squeeze(scores),
    category_index,
    use_normalized_coordinates=True,
    line_thickness=8,
    min_score_thresh=0.60)

cv2.imshow('Object detector', image)
cv2.waitKey(0)
cv2.destroyAllWindows()

my aim is to reach the coordinates of the boxes in the photo

for this i tried:

visulaize = vis_util.visualize_boxes_and_labels_on_image_array(
    image,
    np.squeeze(boxes),
    np.squeeze(classes).astype(np.int32),
    np.squeeze(scores),
    category_index,
    use_normalized_coordinates=True,
    line_thickness=1,
    min_score_thresh=0.90)
print(visulaize)

and i tried:

perception = (boxes, scores, classes, num) = sess.run(
    [detection_boxes, detection_scores, detection_classes, num_detections],
    feed_dict={image_tensor: image_expanded})
print(perception)

then i tried:

n=boxes.shape[0]

for i in range(n):
    if not np.any(boxes[i]):
        continue
    print(boxes[i])
print(np.squeeze(boxes))

Lastly, I tried the following

x,y,h,w=boxes
print(x,y,h,w)

print(detection_boxes)

print(boxes)

x,y,w,h=detection_boxes
print(x,y,w,h)

print(np.squeenze(boxes))

print(boxes.shape)

but none of them gave satisfactory results

I need your help to reach the coordinates of the boxes


Solution

  • You need to apply nms and denormalize the boxes.

    def apply_non_max_suppression(boxes, scores, iou_thresh=.45, top_k=200):
        """Apply non maximum suppression.
        # Arguments
            boxes: Numpy array, box coordinates of shape (num_boxes, 4)
                where each columns corresponds to x_min, y_min, x_max, y_max
            scores: Numpy array, of scores given for each box in 'boxes'
            iou_thresh : float, intersection over union threshold
                for removing boxes.
            top_k: int, number of maximum objects per class
        # Returns
            selected_indices: Numpy array, selected indices of kept boxes.
            num_selected_boxes: int, number of selected boxes.
        """
    
        selected_indices = np.zeros(shape=len(scores))
        if boxes is None or len(boxes) == 0:
            return selected_indices
        # x_min = boxes[:, 0]
        # y_min = boxes[:, 1]
        # x_max = boxes[:, 2]
        # y_max = boxes[:, 3]
        x_min = boxes[:, 1]
        y_min = boxes[:, 0]
        x_max = boxes[:, 3]
        y_max = boxes[:, 2]
    
        areas = (x_max - x_min) * (y_max - y_min)
        remaining_sorted_box_indices = np.argsort(scores)
        remaining_sorted_box_indices = remaining_sorted_box_indices[-top_k:]
    
        num_selected_boxes = 0
        while len(remaining_sorted_box_indices) > 0:
            best_score_args = remaining_sorted_box_indices[-1]
            selected_indices[num_selected_boxes] = best_score_args
            num_selected_boxes = num_selected_boxes + 1
            if len(remaining_sorted_box_indices) == 1:
                break
    
            remaining_sorted_box_indices = remaining_sorted_box_indices[:-1]
    
            best_x_min = x_min[best_score_args]
            best_y_min = y_min[best_score_args]
            best_x_max = x_max[best_score_args]
            best_y_max = y_max[best_score_args]
    
            remaining_x_min = x_min[remaining_sorted_box_indices]
            remaining_y_min = y_min[remaining_sorted_box_indices]
            remaining_x_max = x_max[remaining_sorted_box_indices]
            remaining_y_max = y_max[remaining_sorted_box_indices]
    
            inner_x_min = np.maximum(remaining_x_min, best_x_min)
            inner_y_min = np.maximum(remaining_y_min, best_y_min)
            inner_x_max = np.minimum(remaining_x_max, best_x_max)
            inner_y_max = np.minimum(remaining_y_max, best_y_max)
    
            inner_box_widths = inner_x_max - inner_x_min
            inner_box_heights = inner_y_max - inner_y_min
    
            inner_box_widths = np.maximum(inner_box_widths, 0.0)
            inner_box_heights = np.maximum(inner_box_heights, 0.0)
    
            intersections = inner_box_widths * inner_box_heights
            remaining_box_areas = areas[remaining_sorted_box_indices]
            best_area = areas[best_score_args]
            unions = remaining_box_areas + best_area - intersections
            intersec_over_union = intersections / unions
            intersec_over_union_mask = intersec_over_union <= iou_thresh
            remaining_sorted_box_indices = remaining_sorted_box_indices[
                intersec_over_union_mask]
    
        return selected_indices.astype(int), num_selected_boxes
    
    def denormalize_box(box, image_shape):
        """Scales corner box coordinates from normalized values to image dimensions.
        # Arguments
            box: Numpy array containing corner box coordinates.
            image_shape: List of integers with (height, width).
        # Returns
            returns: box corner coordinates in image dimensions
        """
        # x_min, y_min, x_max, y_max = box[:4]
        y_min, x_min, y_max, x_max = box[:4]
    
        height, width = image_shape
        x_min = int(x_min * width)
        y_min = int(y_min * height)
        x_max = int(x_max * width)
        y_max = int(y_max * height)
    
        # return [x_min, y_min, x_max, y_max]
        return [y_min, x_min, y_max, x_max]
    
    (boxes, scores, classes, num) = sess.run(
        [detection_boxes, detection_scores, detection_classes, num_detections],
        feed_dict={image_tensor: image_expanded})
    
    conf_threshold = 0.5
    nms_threshold = 0.45
    image_shape = image.shape[:2]
    
    # Filtering the boxes based on conf_threshold
    filtered_scores = [scores[0][i] for i in np.where(scores[0] > conf_threshold)]
    filtered_boxes = [boxes[0][i] for i in np.where(scores[0] > conf_threshold)]
    filtered_classes = [classes[0][i] for i in np.where(scores[0] > conf_threshold)]
    
    if len(filtered_scores[0]) != 0:
                # NMS thresholding
        indices, count = apply_non_max_suppression(filtered_boxes[0], filtered_scores[0], nms_threshold, 200)
        selected_indices = indices[:count]
    
        ## Getting the final boxes
        final_boxes = filtered_boxes[0][selected_indices]
        final_scores = filtered_scores[0][selected_indices]
        final_classes = filtered_classes[0][selected_indices]
    
    
        final_boxes = [denormalize_box(box, image_shape) for box in final_boxes]