From 9449a098c5a8b6582b07e4c8a5c10a1a9e1f17be Mon Sep 17 00:00:00 2001 From: LeoWang Date: Tue, 8 Jan 2019 14:56:34 +0800 Subject: [PATCH 1/2] Support for training model from scratch --- train_ssd.py | 2 +- utility/scaffolds.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/train_ssd.py b/train_ssd.py index a6c09a8..5111144 100644 --- a/train_ssd.py +++ b/train_ssd.py @@ -105,7 +105,7 @@ # checkpoint related configuration tf.app.flags.DEFINE_string( 'checkpoint_path', './model', - 'The path to a checkpoint from which to fine-tune.') + 'The path to a checkpoint from which to fine-tune. If it is None, training model from scratch.') tf.app.flags.DEFINE_string( 'checkpoint_model_scope', 'vgg_16', 'Model scope in the checkpoint. None if the same as the trained model.') diff --git a/utility/scaffolds.py b/utility/scaffolds.py index 820dabb..13ac082 100644 --- a/utility/scaffolds.py +++ b/utility/scaffolds.py @@ -25,6 +25,9 @@ def get_init_fn_for_scaffold(model_dir, checkpoint_path, model_scope, checkpoint if tf.train.latest_checkpoint(model_dir): tf.logging.info('Ignoring --checkpoint_path because a checkpoint already exists in %s.' % model_dir) return None + if checkpoint_path is None: + tf.logging.info('Training detector from scratch.') + return None exclusion_scopes = [] if checkpoint_exclude_scopes: exclusion_scopes = [scope.strip() for scope in checkpoint_exclude_scopes.split(',')] From 1f4fc8a36beeb936cea462b1b151666ad851f75a Mon Sep 17 00:00:00 2001 From: Jamie Date: Tue, 21 May 2019 15:28:03 +0800 Subject: [PATCH 2/2] Update SSD512 model --- eval_ssd.py | 30 ++++-- net/ssd_net_512.py | 259 +++++++++++++++++++++++++++++++++++++++++++++ train_ssd.py | 30 ++++-- 3 files changed, 302 insertions(+), 17 deletions(-) create mode 100644 net/ssd_net_512.py diff --git a/eval_ssd.py b/eval_ssd.py index 1a064b8..1d30949 100644 --- a/eval_ssd.py +++ b/eval_ssd.py @@ -23,7 +23,7 @@ import numpy as np -from net import ssd_net +from net import ssd_net, ssd_net_512 from dataset import dataset_common from preprocessing import ssd_preprocessing @@ -103,6 +103,9 @@ FLAGS = tf.app.flags.FLAGS #CUDA_VISIBLE_DEVICES +if FLAGS.train_image_size == 512: + ssd_net = ssd_net_512 + def get_checkpoint(): if tf.train.latest_checkpoint(FLAGS.model_dir): tf.logging.info('Ignoring --checkpoint_path because a checkpoint already exists in %s' % FLAGS.model_dir) @@ -124,20 +127,29 @@ def get_checkpoint(): def input_pipeline(dataset_pattern='train-*', is_training=True, batch_size=FLAGS.batch_size): def input_fn(): out_shape = [FLAGS.train_image_size] * 2 - anchor_creator = anchor_manipulator.AnchorCreator(out_shape, - layers_shapes = [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)], - anchor_scales = [(0.1,), (0.2,), (0.375,), (0.55,), (0.725,), (0.9,)], - extra_anchor_scales = [(0.1414,), (0.2739,), (0.4541,), (0.6315,), (0.8078,), (0.9836,)], - anchor_ratios = [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)], - #anchor_ratios = [(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., .5), (2., .5)], - layer_steps = [8, 16, 32, 64, 100, 300]) + ssd300_anchor_params = {'layers_shapes': [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)], 'anchor_scales': [(0.1,), (0.2,), (0.375,), (0.55,), (0.725,), (0.9,)], + 'extra_anchor_scales': [(0.1414,), (0.2739,), (0.4541,), (0.6315,), (0.8078,), (0.9836,)], + 'anchor_ratios': [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)], + 'layer_steps': [8, 16, 32, 64, 100, 300]} + ssd512_anchor_params = {'layers_shapes': [(64, 64), (32, 32), (16, 16), (8, 8), (4, 4), (2, 2), (1, 1)], + 'anchor_scales': [(0.07,), (0.15,), (0.3,), (0.45,), (0.6,), (0.75,), (0.9,)], + 'extra_anchor_scales': [(0.1025,), (0.2121,), (0.3674,), (0.5196,), (0.6708,), (0.8216,), (0.9721,)], + 'anchor_ratios': [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)], + 'layer_steps': [8, 16, 32, 64, 128, 256, 512]} + if FLAGS.train_image_size == 512: + net_params = ssd512_anchor_params + print('using ssd512 model') + else: + net_params = ssd300_anchor_params + print('using ssd300 model') + anchor_creator = anchor_manipulator.AnchorCreator(out_shape, **net_params) all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors() num_anchors_per_layer = [] for ind in range(len(all_anchors)): num_anchors_per_layer.append(all_num_anchors_depth[ind] * all_num_anchors_spatial[ind]) - anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(allowed_borders = [1.0] * 6, + anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(allowed_borders = [1.0] * len(net_params['layer_steps']), positive_threshold = FLAGS.match_threshold, ignore_threshold = FLAGS.neg_threshold, prior_scaling=[0.1, 0.1, 0.2, 0.2]) diff --git a/net/ssd_net_512.py b/net/ssd_net_512.py new file mode 100644 index 0000000..117b85f --- /dev/null +++ b/net/ssd_net_512.py @@ -0,0 +1,259 @@ +# Copyright 2018 Changan Wang + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +_BATCH_NORM_DECAY = 0.9 +_BATCH_NORM_EPSILON = 1e-5 +_USE_FUSED_BN = True + +# vgg_16/conv2/conv2_1/biases +# vgg_16/conv4/conv4_3/biases +# vgg_16/conv1/conv1_1/biases +# vgg_16/fc6/weights +# vgg_16/conv3/conv3_2/biases +# vgg_16/conv5/conv5_3/biases +# vgg_16/conv3/conv3_1/weights +# vgg_16/conv4/conv4_2/weights +# vgg_16/conv1/conv1_1/weights +# vgg_16/conv5/conv5_3/weights +# vgg_16/conv4/conv4_1/weights +# vgg_16/conv3/conv3_3/weights +# vgg_16/conv5/conv5_2/biases +# vgg_16/conv3/conv3_2/weights +# vgg_16/conv4/conv4_2/biases +# vgg_16/conv5/conv5_2/weights +# vgg_16/conv3/conv3_1/biases +# vgg_16/conv2/conv2_2/weights +# vgg_16/fc7/weights +# vgg_16/conv5/conv5_1/biases +# vgg_16/conv1/conv1_2/biases +# vgg_16/conv2/conv2_2/biases +# vgg_16/conv4/conv4_1/biases +# vgg_16/fc7/biases +# vgg_16/fc6/biases +# vgg_16/conv4/conv4_3/weights +# vgg_16/conv2/conv2_1/weights +# vgg_16/conv5/conv5_1/weights +# vgg_16/conv3/conv3_3/biases +# vgg_16/conv1/conv1_2/weights + +class ReLuLayer(tf.layers.Layer): + def __init__(self, name, **kwargs): + super(ReLuLayer, self).__init__(name=name, trainable=trainable, **kwargs) + self._name = name + def build(self, input_shape): + self._relu = lambda x : tf.nn.relu(x, name=self._name) + self.built = True + + def call(self, inputs): + return self._relu(inputs) + + def compute_output_shape(self, input_shape): + return tf.TensorShape(input_shape) + +def forward_module(m, inputs, training=False): + if isinstance(m, tf.layers.BatchNormalization) or isinstance(m, tf.layers.Dropout): + return m.apply(inputs, training=training) + return m.apply(inputs) + +class VGG16Backbone(object): + def __init__(self, data_format='channels_first'): + super(VGG16Backbone, self).__init__() + self._data_format = data_format + self._bn_axis = -1 if data_format == 'channels_last' else 1 + #initializer = tf.glorot_uniform_initializer glorot_normal_initializer + self._conv_initializer = tf.glorot_uniform_initializer + self._conv_bn_initializer = tf.glorot_uniform_initializer#lambda : tf.truncated_normal_initializer(mean=0.0, stddev=0.005) + # VGG layers + self._conv1_block = self.conv_block(2, 64, 3, (1, 1), 'conv1') + self._pool1 = tf.layers.MaxPooling2D(2, 2, padding='same', data_format=self._data_format, name='pool1') + self._conv2_block = self.conv_block(2, 128, 3, (1, 1), 'conv2') + self._pool2 = tf.layers.MaxPooling2D(2, 2, padding='same', data_format=self._data_format, name='pool2') + self._conv3_block = self.conv_block(3, 256, 3, (1, 1), 'conv3') + self._pool3 = tf.layers.MaxPooling2D(2, 2, padding='same', data_format=self._data_format, name='pool3') + self._conv4_block = self.conv_block(3, 512, 3, (1, 1), 'conv4') + self._pool4 = tf.layers.MaxPooling2D(2, 2, padding='same', data_format=self._data_format, name='pool4') + self._conv5_block = self.conv_block(3, 512, 3, (1, 1), 'conv5') + self._pool5 = tf.layers.MaxPooling2D(3, 1, padding='same', data_format=self._data_format, name='pool5') + self._conv6 = tf.layers.Conv2D(filters=1024, kernel_size=3, strides=1, padding='same', dilation_rate=6, + data_format=self._data_format, activation=tf.nn.relu, use_bias=True, + kernel_initializer=self._conv_initializer(), + bias_initializer=tf.zeros_initializer(), + name='fc6', _scope='fc6', _reuse=None) + self._conv7 = tf.layers.Conv2D(filters=1024, kernel_size=1, strides=1, padding='same', + data_format=self._data_format, activation=tf.nn.relu, use_bias=True, + kernel_initializer=self._conv_initializer(), + bias_initializer=tf.zeros_initializer(), + name='fc7', _scope='fc7', _reuse=None) + # SSD layers + with tf.variable_scope('additional_layers') as scope: + self._conv8_block = self.ssd_conv_block(256, 2, 'conv8') + self._conv9_block = self.ssd_conv_block(128, 2, 'conv9') + self._conv10_block = self.ssd_conv_block(128, 2, 'conv10') + self._conv11_block = self.ssd_conv_block(128, 2, 'conv11') + self._conv12_block = self.ssd_conv_block(128, 2, 'conv12', kernel_size=4) + + def l2_normalize(self, x, name): + with tf.name_scope(name, "l2_normalize", [x]) as name: + axis = -1 if self._data_format == 'channels_last' else 1 + square_sum = tf.reduce_sum(tf.square(x), axis, keep_dims=True) + x_inv_norm = tf.rsqrt(tf.maximum(square_sum, 1e-10)) + return tf.multiply(x, x_inv_norm, name=name) + + def forward(self, inputs, training=False): + # inputs should in BGR + feature_layers = [] + # forward vgg layers + for conv in self._conv1_block: + inputs = forward_module(conv, inputs, training=training) + inputs = self._pool1.apply(inputs) + for conv in self._conv2_block: + inputs = forward_module(conv, inputs, training=training) + inputs = self._pool2.apply(inputs) + for conv in self._conv3_block: + inputs = forward_module(conv, inputs, training=training) + inputs = self._pool3.apply(inputs) + for conv in self._conv4_block: + inputs = forward_module(conv, inputs, training=training) + # conv4_3 + with tf.variable_scope('conv4_3_scale') as scope: + weight_scale = tf.Variable([20.] * 512, trainable=training, name='weights') + if self._data_format == 'channels_last': + weight_scale = tf.reshape(weight_scale, [1, 1, 1, -1], name='reshape') + else: + weight_scale = tf.reshape(weight_scale, [1, -1, 1, 1], name='reshape') + + feature_layers.append(tf.multiply(weight_scale, self.l2_normalize(inputs, name='norm'), name='rescale') + ) + inputs = self._pool4.apply(inputs) + for conv in self._conv5_block: + inputs = forward_module(conv, inputs, training=training) + inputs = self._pool5.apply(inputs) + # forward fc layers + inputs = self._conv6.apply(inputs) + inputs = self._conv7.apply(inputs) + # fc7 + feature_layers.append(inputs) + # forward ssd layers + for layer in self._conv8_block: + inputs = forward_module(layer, inputs, training=training) + # conv8 + feature_layers.append(inputs) + for layer in self._conv9_block: + inputs = forward_module(layer, inputs, training=training) + # conv9 + feature_layers.append(inputs) + for layer in self._conv10_block: + inputs = forward_module(layer, inputs, training=training) + # conv10 + feature_layers.append(inputs) + for layer in self._conv11_block: + inputs = forward_module(layer, inputs, training=training) + # conv11 + feature_layers.append(inputs) + for layer in self._conv12_block: + inputs = forward_module(layer, inputs, training=training) + # conv12 + feature_layers.append(inputs) + return feature_layers + + def conv_block(self, num_blocks, filters, kernel_size, strides, name, reuse=None): + with tf.variable_scope(name): + conv_blocks = [] + for ind in range(1, num_blocks + 1): + conv_blocks.append( + tf.layers.Conv2D(filters=filters, kernel_size=kernel_size, strides=strides, padding='same', + data_format=self._data_format, activation=tf.nn.relu, use_bias=True, + kernel_initializer=self._conv_initializer(), + bias_initializer=tf.zeros_initializer(), + name='{}_{}'.format(name, ind), _scope='{}_{}'.format(name, ind), _reuse=None) + ) + return conv_blocks + + def ssd_conv_block(self, filters, strides, name, padding='same', reuse=None, kernel_size=3): + with tf.variable_scope(name): + conv_blocks = [] + conv_blocks.append( + tf.layers.Conv2D(filters=filters, kernel_size=1, strides=1, padding=padding, + data_format=self._data_format, activation=tf.nn.relu, use_bias=True, + kernel_initializer=self._conv_initializer(), + bias_initializer=tf.zeros_initializer(), + name='{}_1'.format(name), _scope='{}_1'.format(name), _reuse=None) + ) + conv_blocks.append( + tf.layers.Conv2D(filters=filters * 2, kernel_size=kernel_size, strides=strides, padding=padding, + data_format=self._data_format, activation=tf.nn.relu, use_bias=True, + kernel_initializer=self._conv_initializer(), + bias_initializer=tf.zeros_initializer(), + name='{}_2'.format(name), _scope='{}_2'.format(name), _reuse=None) + ) + return conv_blocks + + def ssd_conv_bn_block(self, filters, strides, name, reuse=None): + with tf.variable_scope(name): + conv_bn_blocks = [] + conv_bn_blocks.append( + tf.layers.Conv2D(filters=filters, kernel_size=1, strides=1, padding='same', + data_format=self._data_format, activation=None, use_bias=False, + kernel_initializer=self._conv_bn_initializer(), + bias_initializer=None, + name='{}_1'.format(name), _scope='{}_1'.format(name), _reuse=None) + ) + conv_bn_blocks.append( + tf.layers.BatchNormalization(axis=self._bn_axis, momentum=BN_MOMENTUM, epsilon=BN_EPSILON, fused=USE_FUSED_BN, + name='{}_bn1'.format(name), _scope='{}_bn1'.format(name), _reuse=None) + ) + conv_bn_blocks.append( + ReLuLayer('{}_relu1'.format(name), _scope='{}_relu1'.format(name), _reuse=None) + ) + conv_bn_blocks.append( + tf.layers.Conv2D(filters=filters * 2, kernel_size=3, strides=strides, padding='same', + data_format=self._data_format, activation=None, use_bias=False, + kernel_initializer=self._conv_bn_initializer(), + bias_initializer=None, + name='{}_2'.format(name), _scope='{}_2'.format(name), _reuse=None) + ) + conv_bn_blocks.append( + tf.layers.BatchNormalization(axis=self._bn_axis, momentum=BN_MOMENTUM, epsilon=BN_EPSILON, fused=USE_FUSED_BN, + name='{}_bn2'.format(name), _scope='{}_bn2'.format(name), _reuse=None) + ) + conv_bn_blocks.append( + ReLuLayer('{}_relu2'.format(name), _scope='{}_relu2'.format(name), _reuse=None) + ) + return conv_bn_blocks + +def multibox_head(feature_layers, num_classes, num_anchors_depth_per_layer, data_format='channels_first'): + with tf.variable_scope('multibox_head'): + cls_preds = [] + loc_preds = [] + for ind, feat in enumerate(feature_layers): + loc_preds.append(tf.layers.conv2d(feat, num_anchors_depth_per_layer[ind] * 4, (3, 3), use_bias=True, + name='loc_{}'.format(ind), strides=(1, 1), + padding='same', data_format=data_format, activation=None, + kernel_initializer=tf.glorot_uniform_initializer(), + bias_initializer=tf.zeros_initializer())) + cls_preds.append(tf.layers.conv2d(feat, num_anchors_depth_per_layer[ind] * num_classes, (3, 3), use_bias=True, + name='cls_{}'.format(ind), strides=(1, 1), + padding='same', data_format=data_format, activation=None, + kernel_initializer=tf.glorot_uniform_initializer(), + bias_initializer=tf.zeros_initializer())) + + return loc_preds, cls_preds + + diff --git a/train_ssd.py b/train_ssd.py index a6c09a8..9df9643 100644 --- a/train_ssd.py +++ b/train_ssd.py @@ -21,7 +21,7 @@ import tensorflow as tf -from net import ssd_net +from net import ssd_net, ssd_net_512 from dataset import dataset_common from preprocessing import ssd_preprocessing @@ -124,6 +124,10 @@ FLAGS = tf.app.flags.FLAGS #CUDA_VISIBLE_DEVICES + +if FLAGS.train_image_size == 512: + ssd_net = ssd_net_512 + def validate_batch_size_for_multi_gpu(batch_size): """For multi-gpu, batch-size must be a multiple of the number of available GPUs. @@ -166,19 +170,29 @@ def get_init_fn(): def input_pipeline(dataset_pattern='train-*', is_training=True, batch_size=FLAGS.batch_size): def input_fn(): out_shape = [FLAGS.train_image_size] * 2 - anchor_creator = anchor_manipulator.AnchorCreator(out_shape, - layers_shapes = [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)], - anchor_scales = [(0.1,), (0.2,), (0.375,), (0.55,), (0.725,), (0.9,)], - extra_anchor_scales = [(0.1414,), (0.2739,), (0.4541,), (0.6315,), (0.8078,), (0.9836,)], - anchor_ratios = [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)], - layer_steps = [8, 16, 32, 64, 100, 300]) + ssd300_anchor_params = {'layers_shapes': [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)], 'anchor_scales': [(0.1,), (0.2,), (0.375,), (0.55,), (0.725,), (0.9,)], + 'extra_anchor_scales': [(0.1414,), (0.2739,), (0.4541,), (0.6315,), (0.8078,), (0.9836,)], + 'anchor_ratios': [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)], + 'layer_steps': [8, 16, 32, 64, 100, 300]} + ssd512_anchor_params = {'layers_shapes': [(64, 64), (32, 32), (16, 16), (8, 8), (4, 4), (2, 2), (1, 1)], + 'anchor_scales': [(0.07,), (0.15,), (0.3,), (0.45,), (0.6,), (0.75,), (0.9,)], + 'extra_anchor_scales': [(0.1025,), (0.2121,), (0.3674,), (0.5196,), (0.6708,), (0.8216,), (0.9721,)], + 'anchor_ratios': [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)], + 'layer_steps': [8, 16, 32, 64, 128, 256, 512]} + if FLAGS.train_image_size == 512: + net_params = ssd512_anchor_params + print('using ssd512 model') + else: + net_params = ssd300_anchor_params + print('using ssd300 model') + anchor_creator = anchor_manipulator.AnchorCreator(out_shape, **net_params) all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors() num_anchors_per_layer = [] for ind in range(len(all_anchors)): num_anchors_per_layer.append(all_num_anchors_depth[ind] * all_num_anchors_spatial[ind]) - anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(allowed_borders = [1.0] * 6, + anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(allowed_borders = [1.0] * len(net_params['layer_steps']), positive_threshold = FLAGS.match_threshold, ignore_threshold = FLAGS.neg_threshold, prior_scaling=[0.1, 0.1, 0.2, 0.2])