I'm using the tensorflow objection detection to detect specific data on passports like full name and other things. I've already trained the data and everything is working fine. It perfectly identifies data surrounding it with a bounding box. However, now I just want to crop the detected boxes.
Code:
import os
import cv2
import numpy as np
import tensorflow as tf
import sys
sys.path.append("..")
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
MODEL_NAME = 'inference_graph'
CWD_PATH = os.getcwd()
PATH_TO_CKPT = 'C:/Users/UI UX/Desktop/Captcha 3/CAPTCHA_frozen_inference_graph.pb'
PATH_TO_LABELS = 'C:/Users/UI UX/Desktop/Captcha 3/CAPTCHA_labelmap.pbtxt'
PATH_TO_IMAGE = 'C:/Users/UI UX/Desktop/(47).jpg'
NUM_CLASSES = 11
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
sess = tf.Session(graph=detection_graph)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
image = cv2.imread(PATH_TO_IMAGE)
image_np = cv2.resize(image, (0, 0), fx=2.0, fy=2.0)
image_expanded = np.expand_dims(image_np, axis=0)
(boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: image_expanded})
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=2,
min_score_thresh=0.60)
width, height = image_np.shape[:2]
for i, box in enumerate(np.squeeze(boxes)):
if(np.squeeze(scores)[i] > 0.80):
(ymin, xmin, ymax, xmax) = (box[0]*height, box[1]*width, box[2]*height, box[3]*width)
cropped_image = tf.image.crop_to_bounding_box(image_np, ymin, xmin, ymax - ymin, xmax - xmin)
cv2.imshow('cropped_image', image_np)
cv2.waitKey(0)
cv2.imshow('Object detector', image_np)
cv2.waitKey(0)
cv2.destroyAllWindows()
Traceback (most recent call last): File "C:/Users/UI UX/PycharmProjects/pythonProject1/vedio_object_detection.py", line 71, in cropped_image = tf.image.crop_to_bounding_box(image_np, ymin, xmin, ymax - ymin, xmax - xmin) File "C:\ProgramData\Anaconda2\envs\tf_cpu\lib\site-packages\tensorflow_core\python\ops\image_ops_impl.py", line 875, in crop_to_bounding_box array_ops.stack([-1, target_height, target_width, -1])) File "C:\ProgramData\Anaconda2\envs\tf_cpu\lib\site-packages\tensorflow_core\python\ops\array_ops.py", line 855, in slice return gen_array_ops.slice(input, begin, size, name=name) File "C:\ProgramData\Anaconda2\envs\tf_cpu\lib\site-packages\tensorflow_core\python\ops\gen_array_ops.py", line 9222, in _slice "Slice", input=input, begin=begin, size=size, name=name) File "C:\ProgramData\Anaconda2\envs\tf_cpu\lib\site-packages\tensorflow_core\python\framework\op_def_library.py", line 632, in _apply_op_helper param_name=input_name) File "C:\ProgramData\Anaconda2\envs\tf_cpu\lib\site-packages\tensorflow_core\python\framework\op_def_library.py", line 61, in _SatisfiesTypeConstraint ", ".join(dtypes.as_dtype(x).name for x in allowed_list))) TypeError: Value passed to parameter 'begin' has DataType float32 not in list of allowed values: int32, int64
Any Kind of help?
I found the solution of this by add this pice of code after end of this line:
(boxes, scores, classes, num) = sess.run([detection_boxes, detection_scores, detection_classes, num_detections],feed_dict={image_tensor: image_expanded})
I add this:
(frame_height, frame_width) = image.shape[:2]
for i in range(len(np.squeeze(scores))):
#print(np.squeeze(boxes)[i])
ymin = int((np.squeeze(boxes)[i][0]*frame_height))
xmin = int((np.squeeze(boxes)[i][1]*frame_width))
ymax = int((np.squeeze(boxes)[i][2]*frame_height))
xmax = int((np.squeeze(boxes)[i][3]*frame_width))
cropped_img = image[ymax:ymin,xmax:xmin]
cv2.imwrite(f'/your/path/img_{i}.png', cropped_img)