Official demo shows we could use show_result(img, result, out_file='result.jpg')
api to draw results on a picture.
model = init_detector('configs/', 'checkpoints/any-checkpoints.pth', device='cpu')
results = inference_detector(model, 'some_pic.png')
model.show_result('some_pic.png', results, 'some_pic_results.png')
In debug tool I found format of results
is a tuple contains list[][]
. How should we get coordinates/shapes from it?
Is there a more detailed description for that format, or a direct api which could transform results
into a more easy-to-use json (such as a COCO dataset format)?
Okay that, I combined several methods and got a usable method. 🤣
If you guys have a better way please let me know.
# this method combined:
# mmdetection.mmdet.models.detectors.base.BaseDetector.show_result
# open-mmlab\Lib\site-packages\mmdet\core\visualization\ imshow_det_bboxes, draw_bboxes, draw_labels, draw_masks
def convert_polygon(
from matplotlib.patches import Polygon
import numpy as np
import torch
import cv2
ms_bbox_result, ms_segm_result = result
if isinstance(ms_bbox_result, dict):
result = (ms_bbox_result['ensemble'],
if isinstance(result, tuple):
bbox_result, segm_result = result
if isinstance(segm_result, tuple):
segm_result = segm_result[0] # ms rcnn
bbox_result, segm_result = result, None
bboxes = np.vstack(bbox_result)
labels = [
np.full(bbox.shape[0], i, dtype=np.int32)
for i, bbox in enumerate(bbox_result)
labels = np.concatenate(labels)
# draw segmentation masks
segms = segm_result
if segm_result is not None and len(labels) > 0: # non empty
segms = mmcv.concat_list(segm_result)
if isinstance(segms[0], torch.Tensor):
segms = torch.stack(segms, dim=0).detach().cpu().numpy()
segms = np.stack(segms, axis=0)
assert bboxes is None or bboxes.ndim == 2, \
f' bboxes ndim should be 2, but its ndim is {bboxes.ndim}.'
assert labels.ndim == 1, \
f' labels ndim should be 1, but its ndim is {labels.ndim}.'
assert bboxes is None or bboxes.shape[1] == 4 or bboxes.shape[1] == 5, \
f' bboxes.shape[1] should be 4 or 5, but its {bboxes.shape[1]}.'
assert bboxes is None or bboxes.shape[0] <= labels.shape[0], \
'labels.shape[0] should not be less than bboxes.shape[0].'
assert segms is None or segms.shape[0] == labels.shape[0], \
'segms.shape[0] and labels.shape[0] should have the same length.'
assert segms is not None or bboxes is not None, \
'segms and bboxes should not be None at the same time.'
if score_thr > 0:
assert bboxes is not None and bboxes.shape[1] == 5
scores = bboxes[:, -1]
inds = scores > score_thr
bboxes = bboxes[inds, :]
labels = labels[inds]
if segms is not None:
segms = segms[inds, ...]
num_bboxes = 0
ret_label = None
ret_bbox = None
ret_polygon = None
ret_area = None
ret_position = None
ret_mask = None
if bboxes is not None:
num_bboxes = bboxes.shape[0]
ret_bbox = bboxes
ret_polygon = []
for i, bbox in enumerate(bboxes):
bbox_int = bbox.astype(np.int32)
poly = [[bbox_int[0], bbox_int[1]], [bbox_int[0], bbox_int[3]],
[bbox_int[2], bbox_int[3]], [bbox_int[2], bbox_int[1]]]
np_poly = np.array(poly).reshape((4, 2))
ret_label = labels[:num_bboxes]
if segms is not None:
ret_mask = []
for i, mask in enumerate(segms):
temp_mask = []
from mmdet.core.mask.structures import bitmap_to_polygon
contours, _ = bitmap_to_polygon(mask)
temp_mask += [Polygon(c) for c in contours]
if num_bboxes < segms.shape[0]:
segms = segms[num_bboxes:]
areas = []
positions = []
for mask in segms:
_, _, stats, centroids = cv2.connectedComponentsWithStats(
mask.astype(np.uint8), connectivity=8)
largest_id = np.argmax(stats[1:, -1]) + 1
areas.append(stats[largest_id, -1])
areas = np.stack(areas, axis=0)
ret_area = areas
ret_position = positions
return {'labels': ret_label,
'bboxes': ret_bbox,
'polygons': ret_polygon,
'areas': ret_area,
'positions': ret_position,
'masks': ret_mask}
Key part of those code:
ret_mask = []
for i, mask in enumerate(segms):
temp_mask = []
from mmdet.core.mask.structures import bitmap_to_polygon
contours, _ = bitmap_to_polygon(mask)
temp_mask += [Polygon(c) for c in contours]
test code:
model = init_detector(config_file, checkpoint_file, device='cpu')
results = inference_detector(model, test_pic_file)
poly = convert_polygon(results)
After converting poly
into json, format would be like this:
"labels": [1, 1, 2, ...],
"bboxes": [
[499.54632568359375, 0.0, 599.1744384765625, 332.5544128417969, 0.9999723434448242],
"polygons": [
[ [499.0, 0.0], [499.0, 332.0], [599.0, 332.0], [599.0, 0.0], [499.0, 0.0] ],
"areas": null,
"positions": null,
"masks": [
[510.0, 0.0],
[509.0, 1.0],
[508.0, 1.0],
Some fields are easy to guess.
are the class id
of every instancebboxes
are left-top-x, left-top-y, right-bottom-x, right-bottom-y
of the bounding boxes in rectangle shape. The last number of bboxes
is confidence value of that instancepolygons
contains coordinate value which are the same to aboveareas
and positions
because they are always null
when testingmasks
contains coordinates arrays of instances. Only one array if there is no holes in that instance2023-07-31 update:
Recently I work on MMDetection again and found its APIs were changed a lot. Most important change is that in MMDetection3, return type of inference_detector
become DetDataSample
Any new updates would be pushed to this GitHub repo.