Loading [a11y]/accessibility-menu.js
Faster R-CNN源码阅读之一:Faster R-CNN/lib/networks/network.py

Faster R-CNN源码阅读之一:Faster R-CNN/lib/networks/network.py

一、介绍

   本demo由Faster R-CNN官方提供,我只是在官方的代码上增加了注释,一方面方便我自己学习,另一方面贴出来和大家一起交流。
   该文件中的函数和类的主要目的是产生一个基类,并在类中封装好需要的方法,以后生成网络时可以直接调用已经封装好的方法。

二、代码以及注释
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
# -*- coding:utf-8 -*-
import numpy as np
import tensorflow as tf
import roi_pooling_layer.roi_pooling_op as roi_pool_op
import roi_pooling_layer.roi_pooling_op_grad
from rpn_msr.proposal_layer_tf import proposal_layer as proposal_layer_py
from rpn_msr.anchor_target_layer_tf import anchor_target_layer as anchor_target_layer_py
from rpn_msr.proposal_target_layer_tf import proposal_target_layer as proposal_target_layer_py

DEFAULT_PADDING = 'SAME'


# 用以修饰层的函数
def layer(op):
def layer_decorated(self, *args, **kwargs):
# Automatically set a name if not provided.如果没有提供名称,则自动设置一个名称
name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
# Figure out the layer inputs. 获取输入层
if len(self.inputs) == 0:
raise RuntimeError('No input variables found for layer %s.' % name)
elif len(self.inputs) == 1:
layer_input = self.inputs[0]
else:
layer_input = list(self.inputs)
# Perform the operation and get the output. 进行层的计算并返回结果
layer_output = op(self, layer_input, *args, **kwargs)
# Add to layer LUT. 保存层到layers变量中
self.layers[name] = layer_output
# This output is now the input for the next layer. 输出是下一层的输入,将其feed到原网络结构中
self.feed(layer_output)
# Return self for chained calls. 返回self,以便可以进行链式调用。不管是conv还是其他的函数,使用layer进行装饰的方法最后返回的都是self
return self

return layer_decorated


# Network是一个基类
class Network(object):
def __init__(self, inputs, trainable=True):
# self.inputs保存的是上一层网络的输出
self.inputs = []
# self.layers保存的是所用的网络层
self.layers = dict(inputs)
# 是否可训练
self.trainable = trainable
self.setup()

# 该方法需要在子类中实现
def setup(self):
raise NotImplementedError('Must be subclassed.')

def load(self, data_path, session, saver, ignore_missing=False):
'''
加载模型
:param data_path: 模型文件的路径
:param session: tf 会话
:param saver: tf的Saver类
:param ignore_missing: 是否忽略缺失值
:return: None
'''
# 如果是ckpt文件,则直接restore
if data_path.endswith('.ckpt'):
saver.restore(session, data_path)
# 否则
else:
# 使用numpy加载数据
data_dict = np.load(data_path).item()
# 对每一个键值都用一个tf Variable保存
for key in data_dict:
with tf.variable_scope(key, reuse=True):
for subkey in data_dict[key]:
try:
var = tf.get_variable(subkey)
session.run(var.assign(data_dict[key][subkey]))
print "assign pretrain model " + subkey + " to " + key
except ValueError:
print "ignore " + key
if not ignore_missing:
raise

def feed(self, *args):
'''
:param args: 不定参数
:return: self
'''
# 未指定参数时,出错
# args一般有两种结构,一个是basestring,表示需要获取的层的键,
# 另一个是tf的Tensor,可以直接加入self.inputs中,供后面使用
assert len(args) != 0

# 将上一层网络的输出清空,因为在之前上一层网络的输出已经被使用
self.inputs = []

# 对被一个传入的参数
for layer in args:
# 如果是basestring,就在self.layers中获取相应的层
if isinstance(layer, basestring):
try:
layer = self.layers[layer]
print layer
except KeyError:
print self.layers.keys()
raise KeyError('Unknown layer name fed: %s' % layer)
# 将获取的层加入self.inputs中,表示网络中当前层的输出
self.inputs.append(layer)
return self

def get_output(self, layer):
'''
根据给定的layer获取相应的网络层
:param layer: 一个字符串,表示网络层的键
:return: 相应的网络层
'''
try:
layer = self.layers[layer]
except KeyError:
print self.layers.keys()
raise KeyError('Unknown layer name fed: %s' % layer)
return layer

def get_unique_name(self, prefix):
'''
根据给定的前缀生成一个不重复的名称
:param prefix: 一个字符串,表示给定的前缀
:return: 具有该前缀的不重复的名称
'''
id = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
return '%s_%d' % (prefix, id)

def make_var(self, name, shape, initializer=None, trainable=True):
'''
根据给定的参数生成一个tf variable
:param name: variable的名称
:param shape: variable的形状(shape)
:param initializer: variable的初始化方法
:param trainable: variabe是否可训练
:return: 满足条件的tf variable
'''
return tf.get_variable(name, shape, initializer=initializer, trainable=trainable)

def validate_padding(self, padding):
'''
验证是否是合法的padding方式('SAME'或者'VALID')
:param padding: 给定的padding方式
:return: None
'''
assert padding in ('SAME', 'VALID')

# 以下带有@layer开头的方法使用上面的layer进行装饰

@layer
def conv(self, input, k_h, k_w, c_o, s_h, s_w, name, relu=True, padding=DEFAULT_PADDING, group=1, trainable=True):
'''
卷积函数
:param input: 待卷积的矩阵
:param k_h: 卷积核的高度
:param k_w: 卷积核的宽度
:param c_o: 卷积核的数目
:param s_h: 步长的高度
:param s_w: 步长的宽度
:param name: 操作名称
:param relu: 是否使用relu激活
:param padding: padding方式
:param group: 组数目
:param trainable: 是否可训练
:return: 卷积后的矩阵
'''
self.validate_padding(padding)
c_i = input.get_shape()[-1]
assert c_i % group == 0
assert c_o % group == 0
convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
with tf.variable_scope(name) as scope:

init_weights = tf.truncated_normal_initializer(0.0, stddev=0.01)
init_biases = tf.constant_initializer(0.0)
kernel = self.make_var('weights', [k_h, k_w, c_i / group, c_o], init_weights, trainable)
biases = self.make_var('biases', [c_o], init_biases, trainable)

if group == 1:
conv = convolve(input, kernel)
else:
input_groups = tf.split(3, group, input)
kernel_groups = tf.split(3, group, kernel)
output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)]
conv = tf.concat(3, output_groups)
if relu:
bias = tf.nn.bias_add(conv, biases)
return tf.nn.relu(bias, name=scope.name)
return tf.nn.bias_add(conv, biases, name=scope.name)

@layer
def relu(self, input, name):
'''
relu激活
:param input: 待激活的矩阵
:param name: 名称
:return: 激活后的矩阵
'''
return tf.nn.relu(input, name=name)

@layer
def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING):
'''
最大池化
:param input: 待池化的矩阵
:param k_h: 池化核的高度
:param k_w: 池化核的宽度
:param s_h: 步长的高度
:param s_w: 步长的宽度
:param name: 名称
:param padding: padding方式
:return: 池化后的矩阵
'''
self.validate_padding(padding)
return tf.nn.max_pool(input,
ksize=[1, k_h, k_w, 1],
strides=[1, s_h, s_w, 1],
padding=padding,
name=name)

@layer
def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING):
'''
平均池化
:param input: 待池化的矩阵
:param k_h: 池化核的高度
:param k_w: 池化核的宽度
:param s_h: 步长的高度
:param s_w: 步长的宽度
:param name: 名称
:param padding: padding方式
:return: 池化后的矩阵
'''
self.validate_padding(padding)
return tf.nn.avg_pool(input,
ksize=[1, k_h, k_w, 1],
strides=[1, s_h, s_w, 1],
padding=padding,
name=name)

@layer
def roi_pool(self, input, pooled_height, pooled_width, spatial_scale, name):
'''
roi pooling层,
:param input: 需要池化的矩阵信息,里面包含特征层和rois两部分
:param pooled_height: 池化之后的矩阵高度
:param pooled_width: 池化之后的矩阵宽度
:param spatial_scale: 空间尺度,一般是缩放总步长的倒数
:param name: 名称
:return: 池化后的矩阵
'''
# only use the first input
if isinstance(input[0], tuple):
input[0] = input[0][0]

if isinstance(input[1], tuple):
input[1] = input[1][0]

print input
return roi_pool_op.roi_pool(input[0], input[1],
pooled_height,
pooled_width,
spatial_scale,
name=name)[0]

@layer
def proposal_layer(self, input, _feat_stride, anchor_scales, cfg_key, name):
'''

:param input: 输入矩阵
:param _feat_stride: 特征步长,一般是一个整数组成的list
:param anchor_scales: anchor的尺寸,一般是一个整数组成的list
:param cfg_key: 相关的配置信息,是一个字符串
:param name: 名称
:return: 排序之后的TOP N个proposals的batch inds和坐标
'''
if isinstance(input[0], tuple):
input[0] = input[0][0]
return tf.reshape(
tf.py_func(proposal_layer_py, [input[0], input[1], input[2], cfg_key, _feat_stride, anchor_scales],
[tf.float32]), [-1, 5], name=name)

@layer
def anchor_target_layer(self, input, _feat_stride, anchor_scales, name):
'''
Assign anchors to ground-truth targets. Produces anchor classification
labels and bounding-box regression targets.
将anchors和ground truth目标对齐,产生对应anchor的分类标签和bbox回归目标。
:param input: 输入矩阵
:param _feat_stride: 特征步长,一般是一个整数组成的list
:param anchor_scales: anchor的尺寸,一般是一个整数组成的list
:param name: 名称
:return: rpn的分类标签和bbox的回归目标,rpn的bbox的内部权重和外部权重
'''
if isinstance(input[0], tuple):
input[0] = input[0][0]

with tf.variable_scope(name) as scope:
rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = tf.py_func(
anchor_target_layer_py, [input[0], input[1], input[2], input[3], _feat_stride, anchor_scales],
[tf.float32, tf.float32, tf.float32, tf.float32])

rpn_labels = tf.convert_to_tensor(tf.cast(rpn_labels, tf.int32), name='rpn_labels')
rpn_bbox_targets = tf.convert_to_tensor(rpn_bbox_targets, name='rpn_bbox_targets')
rpn_bbox_inside_weights = tf.convert_to_tensor(rpn_bbox_inside_weights, name='rpn_bbox_inside_weights')
rpn_bbox_outside_weights = tf.convert_to_tensor(rpn_bbox_outside_weights, name='rpn_bbox_outside_weights')

return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights

@layer
def proposal_target_layer(self, input, classes, name):
"""
Assign object detection proposals to ground-truth targets. Produces proposal
classification labels and bounding-box regression targets.
将目标检测的proposals和ground truth对齐,产生proposa的分类标签和bbox回归目标
:param input: rpn_rois和gt_boxes
:param classes: 类别数目
:param name: 名称
:return: rois,rois的标签,bbox目标,bbox内部权重,bbox外部权重
"""
if isinstance(input[0], tuple):
input[0] = input[0][0]
with tf.variable_scope(name) as scope:
rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights = tf.py_func(proposal_target_layer_py,
[input[0], input[1],
classes],
[tf.float32, tf.float32,
tf.float32, tf.float32,
tf.float32])

rois = tf.reshape(rois, [-1, 5], name='rois')
labels = tf.convert_to_tensor(tf.cast(labels, tf.int32), name='labels')
bbox_targets = tf.convert_to_tensor(bbox_targets, name='bbox_targets')
bbox_inside_weights = tf.convert_to_tensor(bbox_inside_weights, name='bbox_inside_weights')
bbox_outside_weights = tf.convert_to_tensor(bbox_outside_weights, name='bbox_outside_weights')

return rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights

@layer
def reshape_layer(self, input, d, name):
'''
重新整理矩阵的shape
:param input: 输入矩阵
:param d:
:param name: 名称
:return: 整理之后的矩阵
'''
input_shape = tf.shape(input)
if name == 'rpn_cls_prob_reshape':
# step 1: tmp1 = tf.transpose(input, [0, 3, 1, 2]) # [N, H, W, C]通道顺序更改为[N, C, H, W]
# step 2: dim = tf.cast(tf.cast(input_shape[1], tf.float32) / tf.cast(d, tf.float32) * tf.cast(input_shape[3], tf.float32), tf.int32)
# # 去掉tf.cast之后: dim = input_shape[1] / d * input_shape[3]
# # 计算reshape需要的第三个维度
# step 3: tmp2 = tf.reshape(tmp1, [input_shape[0], int(d), dim, input_shape[2]) # reshape
# stap 4: tmp3 = tf.transpose(tmp2, [0, 2, 3, 1], name=name) # 恢复原先的通道顺序
# step 5: return tmp3
return tf.transpose(tf.reshape(tf.transpose(input, [0, 3, 1, 2]), [input_shape[0],
int(d), tf.cast(
tf.cast(input_shape[1], tf.float32) / tf.cast(d, tf.float32) * tf.cast(input_shape[3], tf.float32),
tf.int32), input_shape[2]]), [0, 2, 3, 1], name=name)
else:
# step 1: tmp1 = tf.transpose(input, [0, 3, 1, 2]) # [N, H, W, C]通道顺序更改为[N, C, H, W]
# step 2: dim = tf.cast(tf.cast(input_shape[1], tf.float32) * (tf.cast(input_shape[3], tf.float32) / tf.cast(d, tf.float32)), tf.int32)
# # 去掉tf.cast之后: dim = input_shape[1] * (input_shape[3] / d)
# # 计算reshape需要的第三个维度
# step 3: tmp2 = tf.reshape(tmp1, [input_shape[0], int(d), dim, input_shape[2]) # reshape
# stap 4: tmp3 = tf.transpose(tmp2, [0, 2, 3, 1], name=name) # 恢复原先的通道顺序
# step 5: return tmp3
return tf.transpose(tf.reshape(tf.transpose(input, [0, 3, 1, 2]), [input_shape[0],
int(d), tf.cast(
tf.cast(input_shape[1], tf.float32) * (
tf.cast(input_shape[3], tf.float32) / tf.cast(d, tf.float32)), tf.int32),
input_shape[2]]), [0, 2, 3, 1],
name=name)

@layer
def feature_extrapolating(self, input, scales_base, num_scale_base, num_per_octave, name):
'''
:param input:
:param scales_base:
:param num_scale_base:
:param num_per_octave:
:param name:
:return:
'''
return feature_extrapolating_op.feature_extrapolating(input,
scales_base,
num_scale_base,
num_per_octave,
name=name)

@layer
def lrn(self, input, radius, alpha, beta, name, bias=1.0):
'''
local response normalization,局部响应正则化
:param input: 输入矩阵
:param radius: depth_radius
:param alpha: alpha
:param beta: beta
:param name: 名称
:param bias: 偏置量
:return: lrn之后的矩阵
'''
return tf.nn.local_response_normalization(input,
depth_radius=radius,
alpha=alpha,
beta=beta,
bias=bias,
name=name)

@layer
def concat(self, inputs, axis, name):
'''
按照指定的维度连接若干矩阵
:param inputs: 输入的矩阵序列
:param axis: 连接维度
:param name: 名称
:return: 连接之后的矩阵
'''
return tf.concat(concat_dim=axis, values=inputs, name=name)

@layer
def fc(self, input, num_out, name, relu=True, trainable=True):
'''
全连接层
:param input: 输入矩阵
:param num_out: 输出维度
:param name: 名称
:param relu: 是否使用relu激活
:param trainable: 是否可训练
:return: 全连接层
'''
with tf.variable_scope(name) as scope:
# only use the first input
if isinstance(input, tuple):
input = input[0]

input_shape = input.get_shape()
if input_shape.ndims == 4:
dim = 1
for d in input_shape[1:].as_list():
dim *= d
feed_in = tf.reshape(tf.transpose(input, [0, 3, 1, 2]), [-1, dim])
else:
feed_in, dim = (input, int(input_shape[-1]))

if name == 'bbox_pred':
init_weights = tf.truncated_normal_initializer(0.0, stddev=0.001)
init_biases = tf.constant_initializer(0.0)
else:
init_weights = tf.truncated_normal_initializer(0.0, stddev=0.01)
init_biases = tf.constant_initializer(0.0)

weights = self.make_var('weights', [dim, num_out], init_weights, trainable)
biases = self.make_var('biases', [num_out], init_biases, trainable)

op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
fc = op(feed_in, weights, biases, name=scope.name)
return fc

@layer
def softmax(self, input, name):
'''
softmax层
:param input: 输入矩阵
:param name: 名称
:return: softmax层
'''
input_shape = tf.shape(input)
if name == 'rpn_cls_prob':
return tf.reshape(tf.nn.softmax(tf.reshape(input, [-1, input_shape[3]])),
[-1, input_shape[1], input_shape[2], input_shape[3]], name=name)
else:
return tf.nn.softmax(input, name=name)

@layer
def dropout(self, input, keep_prob, name):
'''
dropout层
:param input: 输入矩阵
:param keep_prob: 保留概率
:param name: 名称
:return: dropout层
'''
return tf.nn.dropout(input, keep_prob, name=name)

Comments

Powered By Valine
v1.5.2
Your browser is out-of-date!

Update your browser to view this website correctly. Update my browser now

×