SSD算法,其英文全名是Single Shot MultiBox Detector。
SSD的网络结构流程如下图所示:
SSD总共11个block,相比较于之前的VGG16,改变了第5个block的第4层,第6、7、8卷积层全部去掉,分别增加了红框、黑框、黄框、蓝框。
其tensorflow代码如下:
with tf.variable_scopescope, 'ssd_300_vgg', [inputs], reuse=reuse): # Original VGG-16 blocks. net = slim.repeatinputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') end_points['block1'] = net net = slim.max_pool2dnet, [2, 2], scope='pool1') # Block 2. net = slim.repeatnet, 2, slim.conv2d, 128, [3, 3], scope='conv2') end_points['block2'] = net net = slim.max_pool2dnet, [2, 2], scope='pool2') # Block 3. net = slim.repeatnet, 3, slim.conv2d, 256, [3, 3], scope='conv3') end_points['block3'] = net net = slim.max_pool2dnet, [2, 2], scope='pool3') # Block 4. net = slim.repeatnet, 3, slim.conv2d, 512, [3, 3], scope='conv4') end_points['block4'] = net net = slim.max_pool2dnet, [2, 2], scope='pool4') # Block 5. net = slim.repeatnet, 3, slim.conv2d, 512, [3, 3], scope='conv5') end_points['block5'] = net #注意处 net = slim.max_pool2dnet, [3, 3], stride=1, scope='pool5') # Additional SSD blocks. # Block 6: let's dilate the hell out of it! #注意处 net = slim.conv2dnet, 1024, [3, 3], rate=6, scope='conv6') end_points['block6'] = net net = tf.layers.dropoutnet, rate=dropout_keep_prob, training=is_training) # Block 7: 1x1 conv. Because the fuck. #注意处 net = slim.conv2dnet, 1024, [1, 1], scope='conv7') end_points['block7'] = net net = tf.layers.dropoutnet, rate=dropout_keep_prob, training=is_training) # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 except lasts). end_point = 'block8' with tf.variable_scopeend_point): net = slim.conv2dnet, 256, [1, 1], scope='conv1x1') #注意点:实际上相当于下面的卷积操作进行padding了 net = custom_layers.pad2dnet, pad=1, 1)) net = slim.conv2dnet, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block9' with tf.variable_scopeend_point): net = slim.conv2dnet, 128, [1, 1], scope='conv1x1') #注意点:实际上相当于下面的卷积操作进行padding了 net = custom_layers.pad2dnet, pad=1, 1)) net = slim.conv2dnet, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block10' with tf.variable_scopeend_point): net = slim.conv2dnet, 128, [1, 1], scope='conv1x1') net = slim.conv2dnet, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block11' with tf.variable_scopeend_point): net = slim.conv2dnet, 128, [1, 1], scope='conv1x1') net = slim.conv2dnet, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net
设计理念
参考博客: