1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
|
import yaml import numpy as np import numpy.random as npr from fast_rcnn.config import cfg from fast_rcnn.bbox_transform import bbox_transform from utils.cython_bbox import bbox_overlaps import pdb
DEBUG = False
def proposal_target_layer(rpn_rois, gt_boxes, _num_classes): """ Assign object detection proposals to ground-truth targets. Produces proposal classification labels and bounding-box regression targets. 将之前产生的目标检测的proposals和ground-truth目标进行匹配对齐,从而产生proposals的分类labels和bbox的回归目标。 :param rpn_rois:blob, shape为[N, 5],每一行的组成为[proposals的输入图片的索引(1),proposals坐标(4)]。 (由于每次值feed一张图片,这里的图片索引一般为0) :param gt_boxes: ground truth boxes,shape为[M, 5],每一行的前四个元素表示gt box的坐标,最后一个元素表示类别。 :param _num_classes: 类别的总数目,包括背景,这里一本为21,(Pascal VOC的类别数目为21)。 :returns: """
all_rois = rpn_rois
zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) all_rois = np.vstack((all_rois, np.hstack((zeros, gt_boxes[:, :-1]))))
assert np.all(all_rois[:, 0] == 0), 'Only single item batches are supported'
num_images = 1 rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
labels, rois, bbox_targets, bbox_inside_weights = _sample_rois( all_rois, gt_boxes, fg_rois_per_image, rois_per_image, _num_classes)
if DEBUG: print 'num fg: {}'.format((labels > 0).sum()) print 'num bg: {}'.format((labels == 0).sum()) _count += 1 _fg_num += (labels > 0).sum() _bg_num += (labels == 0).sum() print 'num fg avg: {}'.format(_fg_num / _count) print 'num bg avg: {}'.format(_bg_num / _count) print 'ratio: {:.3f}'.format(float(_fg_num) / float(_bg_num))
rois = rois.reshape(-1, 5) labels = labels.reshape(-1, 1) bbox_targets = bbox_targets.reshape(-1, _num_classes * 4) bbox_inside_weights = bbox_inside_weights.reshape(-1, _num_classes * 4)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
return rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
def _get_bbox_regression_labels(bbox_target_data, num_classes): """ Bounding-box regression targets (bbox_target_data) are stored in a compact form N x (class, tx, ty, tw, th)
This function expands those targets into the 4-of-4*K representation used by the network (i.e. only one class has non-zero targets).
这个函数目的一个是将bbox targets扩展转换成类似one-hot的形式,另一个目的是返回bbox inside weights。 :param bbox_target_data: _compute_targets函数生成的labels和bbox回归目标的关联矩阵,形如N x (class, tx, ty, tw, th)。 :param num_classes: 类别数目。
Returns: bbox_target (ndarray): N x 4K blob of regression targets N × 4K的矩阵,表示bbox回归的目标。 bbox_inside_weights (ndarray): N x 4K blob of loss weights N × 4K的矩阵,用以产生loss值。 """
clss = np.array(bbox_target_data[:, 0], dtype=np.uint16, copy=True) bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32) inds = np.where(clss > 0)[0] for ind in inds: cls = clss[ind] start = 4 * cls end = start + 4 bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
return bbox_targets, bbox_inside_weights
def _compute_targets(ex_rois, gt_rois, labels): """ Compute bounding-box regression targets for an image. 计算bbox的回归目标 :param ex_rois: 经过一系列计算保留下来的rois。 :param gt_rois: 和ex_rois拥有最大IOU的gt box,该参数中的gt box和前面额ex rois一一对应。 :param labels: 前面ex rois的labels :return: bbox的labels和回归目标共同组成的二维数组。 """
assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 4
targets = bbox_transform(ex_rois, gt_rois)
if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS))
return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """ Generate a random sample of RoIs comprising foreground and background examples. 生成包含前景和背景示例的RoI的随机样本。 :param all_rois: 所有的rois,包括产生的proposals和gt boxes :param gt_boxes: ground truth boxes :param fg_rois_per_image: 每张图片的前景rois数目,该值一般为32 :param rois_per_image: 平均每张图片上的rois总数目,该值一般为128 :param num_classes: 类别总数目,该值一般为21,包括背景 :returns: """ overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4]
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size)) if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
keep_inds = np.append(fg_inds, bg_inds) labels = labels[keep_inds] labels[fg_rois_per_this_image:] = 0 rois = all_rois[keep_inds]
bbox_target_data = _compute_targets(rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
bbox_targets, bbox_inside_weights = _get_bbox_regression_labels(bbox_target_data, num_classes)
return labels, rois, bbox_targets, bbox_inside_weights
|