Search code examples
pythontensorflowobject-detection

Crop Boxes in Tensorflow Object Detection and display it as jpg image


I'm using the tensorflow objection detection to detect specific data on passports like full name and other things. I've already trained the data and everything is working fine. It perfectly identifies data surrounding it with a bounding box. However, now I just want to crop the detected boxes.

Code:

import os
import cv2
import numpy as np
import tensorflow as tf
import sys

sys.path.append("..")

from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

MODEL_NAME = 'inference_graph'

CWD_PATH = os.getcwd()

PATH_TO_CKPT = 'C:/Users/UI UX/Desktop/Captcha 3/CAPTCHA_frozen_inference_graph.pb'

PATH_TO_LABELS = 'C:/Users/UI UX/Desktop/Captcha 3/CAPTCHA_labelmap.pbtxt'

PATH_TO_IMAGE = 'C:/Users/UI UX/Desktop/(47).jpg'

NUM_CLASSES = 11

label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

    sess = tf.Session(graph=detection_graph)

image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')

num_detections = detection_graph.get_tensor_by_name('num_detections:0')

image = cv2.imread(PATH_TO_IMAGE)

image_np = cv2.resize(image, (0, 0), fx=2.0, fy=2.0)

image_expanded = np.expand_dims(image_np, axis=0)

(boxes, scores, classes, num) = sess.run(
    [detection_boxes, detection_scores, detection_classes, num_detections],
    feed_dict={image_tensor: image_expanded})

vis_util.visualize_boxes_and_labels_on_image_array(
    image_np,
    np.squeeze(boxes),
    np.squeeze(classes).astype(np.int32),
    np.squeeze(scores),
    category_index,
    use_normalized_coordinates=True,
    line_thickness=2,
    min_score_thresh=0.60)

width, height = image_np.shape[:2]
for i, box in enumerate(np.squeeze(boxes)):
      if(np.squeeze(scores)[i] > 0.80):
        (ymin, xmin, ymax, xmax) = (box[0]*height, box[1]*width, box[2]*height, box[3]*width)
        cropped_image = tf.image.crop_to_bounding_box(image_np, ymin, xmin, ymax - ymin, xmax - xmin)
        cv2.imshow('cropped_image', image_np)
        cv2.waitKey(0)

cv2.imshow('Object detector', image_np)

cv2.waitKey(0)

cv2.destroyAllWindows()

but get this error:

Traceback (most recent call last): File "C:/Users/UI UX/PycharmProjects/pythonProject1/vedio_object_detection.py", line 71, in cropped_image = tf.image.crop_to_bounding_box(image_np, ymin, xmin, ymax - ymin, xmax - xmin) File "C:\ProgramData\Anaconda2\envs\tf_cpu\lib\site-packages\tensorflow_core\python\ops\image_ops_impl.py", line 875, in crop_to_bounding_box array_ops.stack([-1, target_height, target_width, -1])) File "C:\ProgramData\Anaconda2\envs\tf_cpu\lib\site-packages\tensorflow_core\python\ops\array_ops.py", line 855, in slice return gen_array_ops.slice(input, begin, size, name=name) File "C:\ProgramData\Anaconda2\envs\tf_cpu\lib\site-packages\tensorflow_core\python\ops\gen_array_ops.py", line 9222, in _slice "Slice", input=input, begin=begin, size=size, name=name) File "C:\ProgramData\Anaconda2\envs\tf_cpu\lib\site-packages\tensorflow_core\python\framework\op_def_library.py", line 632, in _apply_op_helper param_name=input_name) File "C:\ProgramData\Anaconda2\envs\tf_cpu\lib\site-packages\tensorflow_core\python\framework\op_def_library.py", line 61, in _SatisfiesTypeConstraint ", ".join(dtypes.as_dtype(x).name for x in allowed_list))) TypeError: Value passed to parameter 'begin' has DataType float32 not in list of allowed values: int32, int64

Any Kind of help?


Solution

  • I found the solution of this by add this pice of code after end of this line:

    (boxes, scores, classes, num) = sess.run([detection_boxes, detection_scores, detection_classes, num_detections],feed_dict={image_tensor: image_expanded})
    

    I add this:

    (frame_height, frame_width) = image.shape[:2]
    
    for i in range(len(np.squeeze(scores))):
    #print(np.squeeze(boxes)[i])
    ymin = int((np.squeeze(boxes)[i][0]*frame_height))
    xmin = int((np.squeeze(boxes)[i][1]*frame_width))
    ymax = int((np.squeeze(boxes)[i][2]*frame_height))
    xmax = int((np.squeeze(boxes)[i][3]*frame_width))
    cropped_img = image[ymax:ymin,xmax:xmin]
    cv2.imwrite(f'/your/path/img_{i}.png', cropped_img)