caffe2/python/models/resnet.py - platform/external/pytorch - Git at Google

 ## @package resnet
 # Module caffe2.python.models.resnet


 from caffe2.python import brew
 import logging

 '''
 Utility for creating ResNe(X)t
 "Deep Residual Learning for Image Recognition" by He, Zhang et. al. 2015
 "Aggregated Residual Transformations for Deep Neural Networks" by Xie et. al. 2016
 '''


 class ResNetBuilder():
     '''
     Helper class for constructing residual blocks.
     '''

     def __init__(
         self,
         model,
         prev_blob,
         no_bias,
         is_test,
         bn_epsilon=1e-5,
         bn_momentum=0.9,
     ):
         self.model = model
         self.comp_count = 0
         self.comp_idx = 0
         self.prev_blob = prev_blob
         self.is_test = is_test
         self.bn_epsilon = bn_epsilon
         self.bn_momentum = bn_momentum
         self.no_bias = 1 if no_bias else 0

     def add_conv(
         self,
         in_filters,
         out_filters,
         kernel,
         stride=1,
         group=1,
         pad=0,
     ):
         self.comp_idx += 1
         self.prev_blob = brew.conv(
             self.model,
             self.prev_blob,
             'comp_%d_conv_%d' % (self.comp_count, self.comp_idx),
             in_filters,
             out_filters,
             weight_init=("MSRAFill", {}),
             kernel=kernel,
             stride=stride,
             group=group,
             pad=pad,
             no_bias=self.no_bias,
         )
         return self.prev_blob

     def add_relu(self):
         self.prev_blob = brew.relu(
             self.model,
             self.prev_blob,
             self.prev_blob,  # in-place
         )
         return self.prev_blob

     def add_spatial_bn(self, num_filters):
         self.prev_blob = brew.spatial_bn(
             self.model,
             self.prev_blob,
             'comp_%d_spatbn_%d' % (self.comp_count, self.comp_idx),
             num_filters,
             epsilon=self.bn_epsilon,
             momentum=self.bn_momentum,
             is_test=self.is_test,
         )
         return self.prev_blob

     '''
     Add a "bottleneck" component as described in He et. al. Figure 3 (right)
     '''

     def add_bottleneck(
         self,
         input_filters,   # num of feature maps from preceding layer
         base_filters,    # num of filters internally in the component
         output_filters,  # num of feature maps to output
         stride=1,
         group=1,
         spatial_batch_norm=True,
     ):
         self.comp_idx = 0
         shortcut_blob = self.prev_blob

         # 1x1
         self.add_conv(
             input_filters,
             base_filters,
             kernel=1,
             stride=1,
         )

         if spatial_batch_norm:
             self.add_spatial_bn(base_filters)

         self.add_relu()

         # 3x3 (note the pad, required for keeping dimensions)
         self.add_conv(
             base_filters,
             base_filters,
             kernel=3,
             stride=stride,
             group=group,
             pad=1,
         )

         if spatial_batch_norm:
             self.add_spatial_bn(base_filters)
         self.add_relu()

         # 1x1
         last_conv = self.add_conv(base_filters, output_filters, kernel=1)
         if spatial_batch_norm:
             last_conv = self.add_spatial_bn(output_filters)

         # Summation with input signal (shortcut)
         # When the number of feature maps mismatch between the input
         # and output (this usually happens when the residual stage
         # changes), we need to do a projection for the short cut
         if output_filters != input_filters:
             shortcut_blob = brew.conv(
                 self.model,
                 shortcut_blob,
                 'shortcut_projection_%d' % self.comp_count,
                 input_filters,
                 output_filters,
                 weight_init=("MSRAFill", {}),
                 kernel=1,
                 stride=stride,
                 no_bias=self.no_bias,
             )
             if spatial_batch_norm:
                 shortcut_blob = brew.spatial_bn(
                     self.model,
                     shortcut_blob,
                     'shortcut_projection_%d_spatbn' % self.comp_count,
                     output_filters,
                     epsilon=self.bn_epsilon,
                     momentum=self.bn_momentum,
                     is_test=self.is_test,
                 )

         self.prev_blob = brew.sum(
             self.model, [shortcut_blob, last_conv],
             'comp_%d_sum_%d' % (self.comp_count, self.comp_idx)
         )
         self.comp_idx += 1
         self.add_relu()

         # Keep track of number of high level components if this ResNetBuilder
         self.comp_count += 1

         return output_filters

     def add_simple_block(
         self,
         input_filters,
         num_filters,
         down_sampling=False,
         spatial_batch_norm=True
     ):
         self.comp_idx = 0
         shortcut_blob = self.prev_blob

         # 3x3
         self.add_conv(
             input_filters,
             num_filters,
             kernel=3,
             stride=(1 if down_sampling is False else 2),
             pad=1
         )

         if spatial_batch_norm:
             self.add_spatial_bn(num_filters)
         self.add_relu()

         last_conv = self.add_conv(num_filters, num_filters, kernel=3, pad=1)
         if spatial_batch_norm:
             last_conv = self.add_spatial_bn(num_filters)

         # Increase of dimensions, need a projection for the shortcut
         if (num_filters != input_filters):
             shortcut_blob = brew.conv(
                 self.model,
                 shortcut_blob,
                 'shortcut_projection_%d' % self.comp_count,
                 input_filters,
                 num_filters,
                 weight_init=("MSRAFill", {}),
                 kernel=1,
                 stride=(1 if down_sampling is False else 2),
                 no_bias=self.no_bias,
             )
             if spatial_batch_norm:
                 shortcut_blob = brew.spatial_bn(
                     self.model,
                     shortcut_blob,
                     'shortcut_projection_%d_spatbn' % self.comp_count,
                     num_filters,
                     epsilon=1e-3,
                     is_test=self.is_test,
                 )

         self.prev_blob = brew.sum(
             self.model, [shortcut_blob, last_conv],
             'comp_%d_sum_%d' % (self.comp_count, self.comp_idx)
         )
         self.comp_idx += 1
         self.add_relu()

         # Keep track of number of high level components if this ResNetBuilder
         self.comp_count += 1


 def create_resnet_32x32(
     model, data, num_input_channels, num_groups, num_labels, is_test=False
 ):
     '''
     Create residual net for smaller images (sec 4.2 of He et. al (2015))
     num_groups = 'n' in the paper
     '''
     # conv1 + maxpool
     brew.conv(
         model, data, 'conv1', num_input_channels, 16, kernel=3, stride=1
     )
     brew.spatial_bn(
         model, 'conv1', 'conv1_spatbn', 16, epsilon=1e-3, is_test=is_test
     )
     brew.relu(model, 'conv1_spatbn', 'relu1')

     # Number of blocks as described in sec 4.2
     filters = [16, 32, 64]

     builder = ResNetBuilder(model, 'relu1', no_bias=0, is_test=is_test)
     prev_filters = 16
     for groupidx in range(0, 3):
         for blockidx in range(0, 2 * num_groups):
             builder.add_simple_block(
                 prev_filters if blockidx == 0 else filters[groupidx],
                 filters[groupidx],
                 down_sampling=(True if blockidx == 0 and
                                groupidx > 0 else False))
         prev_filters = filters[groupidx]

     # Final layers
     brew.average_pool(
         model, builder.prev_blob, 'final_avg', kernel=8, stride=1
     )
     brew.fc(model, 'final_avg', 'last_out', 64, num_labels)
     softmax = brew.softmax(model, 'last_out', 'softmax')
     return softmax


 RESNEXT_BLOCK_CONFIG = {
     18: (2, 2, 2, 2),
     34: (3, 4, 6, 3),
     50: (3, 4, 6, 3),
     101: (3, 4, 23, 3),
     152: (3, 8, 36, 3),
     200: (3, 24, 36, 3),
 }

 RESNEXT_STRIDES = [1, 2, 2, 2]

 logging.basicConfig()
 log = logging.getLogger("resnext_builder")
 log.setLevel(logging.DEBUG)


 # The conv1 and final_avg kernel/stride args provide a basic mechanism for
 # adapting resnet50 for different sizes of input images.
 def create_resnext(
     model,
     data,
     num_input_channels,
     num_labels,
     num_layers,
     num_groups,
     num_width_per_group,
     label=None,
     is_test=False,
     no_loss=False,
     no_bias=1,
     conv1_kernel=7,
     conv1_stride=2,
     final_avg_kernel=7,
     log=None,
     bn_epsilon=1e-5,
     bn_momentum=0.9,
 ):
     if num_layers not in RESNEXT_BLOCK_CONFIG:
         log.error("{}-layer is invalid for resnext config".format(num_layers))

     num_blocks = RESNEXT_BLOCK_CONFIG[num_layers]
     strides = RESNEXT_STRIDES
     num_filters = [64, 256, 512, 1024, 2048]

     if num_layers in [18, 34]:
         num_filters = [64, 64, 128, 256, 512]

     # the number of features before the last FC layer
     num_features = num_filters[-1]

     # conv1 + maxpool
     conv_blob = brew.conv(
         model,
         data,
         'conv1',
         num_input_channels,
         num_filters[0],
         weight_init=("MSRAFill", {}),
         kernel=conv1_kernel,
         stride=conv1_stride,
         pad=3,
         no_bias=no_bias
     )

     bn_blob = brew.spatial_bn(
         model,
         conv_blob,
         'conv1_spatbn_relu',
         num_filters[0],
         epsilon=bn_epsilon,
         momentum=bn_momentum,
         is_test=is_test
     )
     relu_blob = brew.relu(model, bn_blob, bn_blob)
     max_pool = brew.max_pool(model, relu_blob, 'pool1', kernel=3, stride=2, pad=1)

     # Residual blocks...
     builder = ResNetBuilder(model, max_pool, no_bias=no_bias,
                             is_test=is_test, bn_epsilon=1e-5, bn_momentum=0.9)

     inner_dim = num_groups * num_width_per_group

     # 4 different kinds of residual blocks
     for residual_idx in range(4):
         residual_num = num_blocks[residual_idx]
         residual_stride = strides[residual_idx]
         dim_in = num_filters[residual_idx]

         for blk_idx in range(residual_num):
             dim_in = builder.add_bottleneck(
                 dim_in,
                 inner_dim,
                 num_filters[residual_idx + 1],  # dim out
                 stride=residual_stride if blk_idx == 0 else 1,
                 group=num_groups,
             )

         inner_dim *= 2

     # Final layers
     final_avg = brew.average_pool(
         model,
         builder.prev_blob,
         'final_avg',
         kernel=final_avg_kernel,
         stride=1,
         global_pooling=True,
     )

     # Final dimension of the "image" is reduced to 7x7
     last_out = brew.fc(
         model, final_avg, 'last_out_L{}'.format(num_labels), num_features, num_labels
     )

     if no_loss:
         return last_out

     # If we create model for training, use softmax-with-loss
     if (label is not None):
         (softmax, loss) = model.SoftmaxWithLoss(
             [last_out, label],
             ["softmax", "loss"],
         )

         return (softmax, loss)
     else:
         # For inference, we just return softmax
         return brew.softmax(model, last_out, "softmax")


 # The conv1 and final_avg kernel/stride args provide a basic mechanism for
 # adapting resnet50 for different sizes of input images.
 def create_resnet50(
     model,
     data,
     num_input_channels,
     num_labels,
     label=None,
     is_test=False,
     no_loss=False,
     no_bias=0,
     conv1_kernel=7,
     conv1_stride=2,
     final_avg_kernel=7,
 ):
     # resnet50 is a special case for ResNeXt50-1x64d
     return create_resnext(
         model,
         data,
         num_input_channels,
         num_labels,
         num_layers=50,
         num_groups=1,
         num_width_per_group=64,
         label=label,
         is_test=is_test,
         no_loss=no_loss,
         no_bias=no_bias,
         conv1_kernel=conv1_kernel,
         conv1_stride=conv1_stride,
         final_avg_kernel=final_avg_kernel,
     )
	## @package resnet
	# Module caffe2.python.models.resnet





	from caffe2.python import brew
	import logging

	'''
	Utility for creating ResNe(X)t
	"Deep Residual Learning for Image Recognition" by He, Zhang et. al. 2015
	"Aggregated Residual Transformations for Deep Neural Networks" by Xie et. al. 2016
	'''


	class ResNetBuilder():
	'''
	Helper class for constructing residual blocks.
	'''

	def __init__(
	self,
	model,
	prev_blob,
	no_bias,
	is_test,
	bn_epsilon=1e-5,
	bn_momentum=0.9,
	):
	self.model = model
	self.comp_count = 0
	self.comp_idx = 0
	self.prev_blob = prev_blob
	self.is_test = is_test
	self.bn_epsilon = bn_epsilon
	self.bn_momentum = bn_momentum
	self.no_bias = 1 if no_bias else 0

	def add_conv(
	self,
	in_filters,
	out_filters,
	kernel,
	stride=1,
	group=1,
	pad=0,
	):
	self.comp_idx += 1
	self.prev_blob = brew.conv(
	self.model,
	self.prev_blob,
	'comp_%d_conv_%d' % (self.comp_count, self.comp_idx),
	in_filters,
	out_filters,
	weight_init=("MSRAFill", {}),
	kernel=kernel,
	stride=stride,
	group=group,
	pad=pad,
	no_bias=self.no_bias,
	)
	return self.prev_blob

	def add_relu(self):
	self.prev_blob = brew.relu(
	self.model,
	self.prev_blob,
	self.prev_blob, # in-place
	)
	return self.prev_blob

	def add_spatial_bn(self, num_filters):
	self.prev_blob = brew.spatial_bn(
	self.model,
	self.prev_blob,
	'comp_%d_spatbn_%d' % (self.comp_count, self.comp_idx),
	num_filters,
	epsilon=self.bn_epsilon,
	momentum=self.bn_momentum,
	is_test=self.is_test,
	)
	return self.prev_blob

	'''
	Add a "bottleneck" component as described in He et. al. Figure 3 (right)
	'''

	def add_bottleneck(
	self,
	input_filters, # num of feature maps from preceding layer
	base_filters, # num of filters internally in the component
	output_filters, # num of feature maps to output
	stride=1,
	group=1,
	spatial_batch_norm=True,
	):
	self.comp_idx = 0
	shortcut_blob = self.prev_blob

	# 1x1
	self.add_conv(
	input_filters,
	base_filters,
	kernel=1,
	stride=1,
	)

	if spatial_batch_norm:
	self.add_spatial_bn(base_filters)

	self.add_relu()

	# 3x3 (note the pad, required for keeping dimensions)
	self.add_conv(
	base_filters,
	base_filters,
	kernel=3,
	stride=stride,
	group=group,
	pad=1,
	)

	if spatial_batch_norm:
	self.add_spatial_bn(base_filters)
	self.add_relu()

	# 1x1
	last_conv = self.add_conv(base_filters, output_filters, kernel=1)
	if spatial_batch_norm:
	last_conv = self.add_spatial_bn(output_filters)

	# Summation with input signal (shortcut)
	# When the number of feature maps mismatch between the input
	# and output (this usually happens when the residual stage
	# changes), we need to do a projection for the short cut
	if output_filters != input_filters:
	shortcut_blob = brew.conv(
	self.model,
	shortcut_blob,
	'shortcut_projection_%d' % self.comp_count,
	input_filters,
	output_filters,
	weight_init=("MSRAFill", {}),
	kernel=1,
	stride=stride,
	no_bias=self.no_bias,
	)
	if spatial_batch_norm:
	shortcut_blob = brew.spatial_bn(
	self.model,
	shortcut_blob,
	'shortcut_projection_%d_spatbn' % self.comp_count,
	output_filters,
	epsilon=self.bn_epsilon,
	momentum=self.bn_momentum,
	is_test=self.is_test,
	)

	self.prev_blob = brew.sum(
	self.model, [shortcut_blob, last_conv],
	'comp_%d_sum_%d' % (self.comp_count, self.comp_idx)
	)
	self.comp_idx += 1
	self.add_relu()

	# Keep track of number of high level components if this ResNetBuilder
	self.comp_count += 1

	return output_filters

	def add_simple_block(
	self,
	input_filters,
	num_filters,
	down_sampling=False,
	spatial_batch_norm=True
	):
	self.comp_idx = 0
	shortcut_blob = self.prev_blob

	# 3x3
	self.add_conv(
	input_filters,
	num_filters,
	kernel=3,
	stride=(1 if down_sampling is False else 2),
	pad=1
	)

	if spatial_batch_norm:
	self.add_spatial_bn(num_filters)
	self.add_relu()

	last_conv = self.add_conv(num_filters, num_filters, kernel=3, pad=1)
	if spatial_batch_norm:
	last_conv = self.add_spatial_bn(num_filters)

	# Increase of dimensions, need a projection for the shortcut
	if (num_filters != input_filters):
	shortcut_blob = brew.conv(
	self.model,
	shortcut_blob,
	'shortcut_projection_%d' % self.comp_count,
	input_filters,
	num_filters,
	weight_init=("MSRAFill", {}),
	kernel=1,
	stride=(1 if down_sampling is False else 2),
	no_bias=self.no_bias,
	)
	if spatial_batch_norm:
	shortcut_blob = brew.spatial_bn(
	self.model,
	shortcut_blob,
	'shortcut_projection_%d_spatbn' % self.comp_count,
	num_filters,
	epsilon=1e-3,
	is_test=self.is_test,
	)

	self.prev_blob = brew.sum(
	self.model, [shortcut_blob, last_conv],
	'comp_%d_sum_%d' % (self.comp_count, self.comp_idx)
	)
	self.comp_idx += 1
	self.add_relu()

	# Keep track of number of high level components if this ResNetBuilder
	self.comp_count += 1


	def create_resnet_32x32(
	model, data, num_input_channels, num_groups, num_labels, is_test=False
	):
	'''
	Create residual net for smaller images (sec 4.2 of He et. al (2015))
	num_groups = 'n' in the paper
	'''
	# conv1 + maxpool
	brew.conv(
	model, data, 'conv1', num_input_channels, 16, kernel=3, stride=1
	)
	brew.spatial_bn(
	model, 'conv1', 'conv1_spatbn', 16, epsilon=1e-3, is_test=is_test
	)
	brew.relu(model, 'conv1_spatbn', 'relu1')

	# Number of blocks as described in sec 4.2
	filters = [16, 32, 64]

	builder = ResNetBuilder(model, 'relu1', no_bias=0, is_test=is_test)
	prev_filters = 16
	for groupidx in range(0, 3):
	for blockidx in range(0, 2 * num_groups):
	builder.add_simple_block(
	prev_filters if blockidx == 0 else filters[groupidx],
	filters[groupidx],
	down_sampling=(True if blockidx == 0 and
	groupidx > 0 else False))
	prev_filters = filters[groupidx]

	# Final layers
	brew.average_pool(
	model, builder.prev_blob, 'final_avg', kernel=8, stride=1
	)
	brew.fc(model, 'final_avg', 'last_out', 64, num_labels)
	softmax = brew.softmax(model, 'last_out', 'softmax')
	return softmax


	RESNEXT_BLOCK_CONFIG = {
	18: (2, 2, 2, 2),
	34: (3, 4, 6, 3),
	50: (3, 4, 6, 3),
	101: (3, 4, 23, 3),
	152: (3, 8, 36, 3),
	200: (3, 24, 36, 3),
	}

	RESNEXT_STRIDES = [1, 2, 2, 2]

	logging.basicConfig()
	log = logging.getLogger("resnext_builder")
	log.setLevel(logging.DEBUG)


	# The conv1 and final_avg kernel/stride args provide a basic mechanism for
	# adapting resnet50 for different sizes of input images.
	def create_resnext(
	model,
	data,
	num_input_channels,
	num_labels,
	num_layers,
	num_groups,
	num_width_per_group,
	label=None,
	is_test=False,
	no_loss=False,
	no_bias=1,
	conv1_kernel=7,
	conv1_stride=2,
	final_avg_kernel=7,
	log=None,
	bn_epsilon=1e-5,
	bn_momentum=0.9,
	):
	if num_layers not in RESNEXT_BLOCK_CONFIG:
	log.error("{}-layer is invalid for resnext config".format(num_layers))

	num_blocks = RESNEXT_BLOCK_CONFIG[num_layers]
	strides = RESNEXT_STRIDES
	num_filters = [64, 256, 512, 1024, 2048]

	if num_layers in [18, 34]:
	num_filters = [64, 64, 128, 256, 512]

	# the number of features before the last FC layer
	num_features = num_filters[-1]

	# conv1 + maxpool
	conv_blob = brew.conv(
	model,
	data,
	'conv1',
	num_input_channels,
	num_filters[0],
	weight_init=("MSRAFill", {}),
	kernel=conv1_kernel,
	stride=conv1_stride,
	pad=3,
	no_bias=no_bias
	)

	bn_blob = brew.spatial_bn(
	model,
	conv_blob,
	'conv1_spatbn_relu',
	num_filters[0],
	epsilon=bn_epsilon,
	momentum=bn_momentum,
	is_test=is_test
	)
	relu_blob = brew.relu(model, bn_blob, bn_blob)
	max_pool = brew.max_pool(model, relu_blob, 'pool1', kernel=3, stride=2, pad=1)

	# Residual blocks...
	builder = ResNetBuilder(model, max_pool, no_bias=no_bias,
	is_test=is_test, bn_epsilon=1e-5, bn_momentum=0.9)

	inner_dim = num_groups * num_width_per_group

	# 4 different kinds of residual blocks
	for residual_idx in range(4):
	residual_num = num_blocks[residual_idx]
	residual_stride = strides[residual_idx]
	dim_in = num_filters[residual_idx]

	for blk_idx in range(residual_num):
	dim_in = builder.add_bottleneck(
	dim_in,
	inner_dim,
	num_filters[residual_idx + 1], # dim out
	stride=residual_stride if blk_idx == 0 else 1,
	group=num_groups,
	)

	inner_dim *= 2

	# Final layers
	final_avg = brew.average_pool(
	model,
	builder.prev_blob,
	'final_avg',
	kernel=final_avg_kernel,
	stride=1,
	global_pooling=True,
	)

	# Final dimension of the "image" is reduced to 7x7
	last_out = brew.fc(
	model, final_avg, 'last_out_L{}'.format(num_labels), num_features, num_labels
	)

	if no_loss:
	return last_out

	# If we create model for training, use softmax-with-loss
	if (label is not None):
	(softmax, loss) = model.SoftmaxWithLoss(
	[last_out, label],
	["softmax", "loss"],
	)

	return (softmax, loss)
	else:
	# For inference, we just return softmax
	return brew.softmax(model, last_out, "softmax")


	# The conv1 and final_avg kernel/stride args provide a basic mechanism for
	# adapting resnet50 for different sizes of input images.
	def create_resnet50(
	model,
	data,
	num_input_channels,
	num_labels,
	label=None,
	is_test=False,
	no_loss=False,
	no_bias=0,
	conv1_kernel=7,
	conv1_stride=2,
	final_avg_kernel=7,
	):
	# resnet50 is a special case for ResNeXt50-1x64d
	return create_resnext(
	model,
	data,
	num_input_channels,
	num_labels,
	num_layers=50,
	num_groups=1,
	num_width_per_group=64,
	label=label,
	is_test=is_test,
	no_loss=no_loss,
	no_bias=no_bias,
	conv1_kernel=conv1_kernel,
	conv1_stride=conv1_stride,
	final_avg_kernel=final_avg_kernel,
	)