import yaml import numpy as np import numpy.random as npr from fast_rcnn.config import cfg from fast_rcnn.bbox_transform import bbox_transform from utils.cython_bbox import bbox_overlaps import pdb
DEBUG = False
def proposal_target_layer(rpn_rois, gt_boxes, _num_classes): """ Assign object detection proposals to ground-truth targets. Produces proposal classification labels and bounding-box regression targets. 将之前产生的目标检测的proposals和ground-truth目标进行匹配对齐,从而产生proposals的分类labels和bbox的回归目标。 :param rpn_rois:blob, shape为[N, 5],每一行的组成为[proposals的输入图片的索引(1),proposals坐标(4)]。 (由于每次值feed一张图片,这里的图片索引一般为0) :param gt_boxes: ground truth boxes,shape为[M, 5],每一行的前四个元素表示gt box的坐标,最后一个元素表示类别。 :param _num_classes: 类别的总数目,包括背景,这里一本为21,(Pascal VOC的类别数目为21)。 :returns: """
all_rois = rpn_rois
zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) all_rois = np.vstack((all_rois, np.hstack((zeros, gt_boxes[:, :-1]))))
assert np.all(all_rois[:, 0] == 0), 'Only single item batches are supported'
num_images = 1 rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
labels, rois, bbox_targets, bbox_inside_weights = _sample_rois( all_rois, gt_boxes, fg_rois_per_image, rois_per_image, _num_classes)
if DEBUG: print 'num fg: {}'.format((labels > 0).sum()) print 'num bg: {}'.format((labels == 0).sum()) _count += 1 _fg_num += (labels > 0).sum() _bg_num += (labels == 0).sum() print 'num fg avg: {}'.format(_fg_num / _count) print 'num bg avg: {}'.format(_bg_num / _count) print 'ratio: {:.3f}'.format(float(_fg_num) / float(_bg_num))
rois = rois.reshape(-1, 5) labels = labels.reshape(-1, 1) bbox_targets = bbox_targets.reshape(-1, _num_classes * 4) bbox_inside_weights = bbox_inside_weights.reshape(-1, _num_classes * 4)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
return rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
def _get_bbox_regression_labels(bbox_target_data, num_classes): """ Bounding-box regression targets (bbox_target_data) are stored in a compact form N x (class, tx, ty, tw, th)
This function expands those targets into the 4-of-4*K representation used by the network (i.e. only one class has non-zero targets).
这个函数目的一个是将bbox targets扩展转换成类似one-hot的形式,另一个目的是返回bbox inside weights。 :param bbox_target_data: _compute_targets函数生成的labels和bbox回归目标的关联矩阵,形如N x (class, tx, ty, tw, th)。 :param num_classes: 类别数目。
Returns: bbox_target (ndarray): N x 4K blob of regression targets N × 4K的矩阵,表示bbox回归的目标。 bbox_inside_weights (ndarray): N x 4K blob of loss weights N × 4K的矩阵,用以产生loss值。 """
clss = np.array(bbox_target_data[:, 0], dtype=np.uint16, copy=True) bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32) inds = np.where(clss > 0)[0] for ind in inds: cls = clss[ind] start = 4 * cls end = start + 4 bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
return bbox_targets, bbox_inside_weights
def _compute_targets(ex_rois, gt_rois, labels): """ Compute bounding-box regression targets for an image. 计算bbox的回归目标 :param ex_rois: 经过一系列计算保留下来的rois。 :param gt_rois: 和ex_rois拥有最大IOU的gt box,该参数中的gt box和前面额ex rois一一对应。 :param labels: 前面ex rois的labels :return: bbox的labels和回归目标共同组成的二维数组。 """
assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 4
targets = bbox_transform(ex_rois, gt_rois)
return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """ Generate a random sample of RoIs comprising foreground and background examples. 生成包含前景和背景示例的RoI的随机样本。 :param all_rois: 所有的rois,包括产生的proposals和gt boxes :param gt_boxes: ground truth boxes :param fg_rois_per_image: 每张图片的前景rois数目,该值一般为32 :param rois_per_image: 平均每张图片上的rois总数目,该值一般为128 :param num_classes: 类别总数目,该值一般为21,包括背景 :returns: """ overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4]
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size)) if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
keep_inds = np.append(fg_inds, bg_inds) labels = labels[keep_inds] labels[fg_rois_per_this_image:] = 0 rois = all_rois[keep_inds]
bbox_target_data = _compute_targets(rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
bbox_targets, bbox_inside_weights = _get_bbox_regression_labels(bbox_target_data, num_classes)
return labels, rois, bbox_targets, bbox_inside_weights