caffe2/python/layers/fc_with_bootstrap.py - platform/external/pytorch - Git at Google

 ## @package fc_with_bootstrap
 # Module caffe2.python.layers.fc_with_bootstrap


 import math

 import numpy as np
 from caffe2.python import core, schema
 from caffe2.python.helpers.arg_scope import get_current_scope
 from caffe2.python.layers.layers import ModelLayer
 from caffe2.python.layers.sampling_trainable_mixin import SamplingTrainableMixin


 def get_fc_predictor_version(fc_version):
     assert fc_version in ["fp32"], (
         "Only support fp32 for the fully connected layer "
         "in the predictor net, the provided FC precision is {}".format(fc_version)
     )
     return fc_version


 class FCWithBootstrap(SamplingTrainableMixin, ModelLayer):
     def __init__(
         self,
         model,
         input_record,
         output_dims,
         num_bootstrap,
         weight_init=None,
         bias_init=None,
         weight_optim=None,
         bias_optim=None,
         name="fc_with_bootstrap",
         weight_reg=None,
         bias_reg=None,
         clip_param=None,
         axis=1,
         **kwargs
     ):
         super(FCWithBootstrap, self).__init__(model, name, input_record, **kwargs)
         assert isinstance(
             input_record, schema.Scalar
         ), "Incorrect input type {}".format(input_record)
         assert (
             len(input_record.field_types()[0].shape) > 0
         ), "FC expects limited dimensions of the input tensor"
         assert axis >= 1, "axis {} should >= 1.".format(axis)
         self.axis = axis
         input_dims = np.prod(input_record.field_types()[0].shape[axis - 1 :])

         assert input_dims > 0, "FC expects input dimensions > 0, got {}".format(
             input_dims
         )

         self.clip_args = None

         # attributes for bootstrapping below
         self.num_bootstrap = num_bootstrap

         # input dim shape
         self.input_dims = input_dims

         # bootstrapped fully-connected layers to be used in eval time
         self.bootstrapped_FCs = []

         # scalar containing batch_size blob so that we don't need to recompute
         self.batch_size = None

         # we want this to be the last FC, so the output_dim should be 1, set to None
         self.output_dim_vec = None

         # lower bound when creating random indices
         self.lower_bound = None

         # upper bound when creating random indices
         self.upper_bound = None

         if clip_param is not None:
             assert len(clip_param) == 2, (
                 "clip_param must be a tuple / list "
                 "of length 2 and in the form of (clip_min, clip max)"
             )
             clip_min, clip_max = clip_param
             assert (
                 clip_min is not None or clip_max is not None
             ), "clip_min, and clip_max in clip_param cannot both be None"
             assert (
                 clip_min is None or clip_max is None
             ) or clip_min < clip_max, (
                 "clip_param = [clip_min, clip_max] must have clip_min < clip_max"
             )
             self.clip_args = {}
             if clip_min is not None:
                 self.clip_args["min"] = clip_min
             if clip_max is not None:
                 self.clip_args["max"] = clip_max

         scale = math.sqrt(1.0 / input_dims)
         weight_init = (
             weight_init
             if weight_init
             else ("UniformFill", {"min": -scale, "max": scale})
         )
         bias_init = (
             bias_init if bias_init else ("UniformFill", {"min": -scale, "max": scale})
         )

         """
         bootstrapped FCs:
             Ex: [
                 bootstrapped_weights_blob_1, bootstrapped_bias_blob_1,
                 ...,
                 ...,
                 bootstrapped_weights_blob_b, bootstrapped_bias_blob_b
                 ]

         output_schema:
             Note: indices will always be on even indices.
             Ex: Struct(
                     indices_0_blob,
                     preds_0_blob,
                     ...
                     ...
                     indices_b_blob,
                     preds_b_blob
                 )
         """
         bootstrapped_FCs = []
         output_schema = schema.Struct()
         for i in range(num_bootstrap):
             output_schema += schema.Struct(
                 (
                     "bootstrap_iteration_{}/indices".format(i),
                     self.get_next_blob_reference(
                         "bootstrap_iteration_{}/indices".format(i)
                     ),
                 ),
                 (
                     "bootstrap_iteration_{}/preds".format(i),
                     self.get_next_blob_reference(
                         "bootstrap_iteration_{}/preds".format(i)
                     ),
                 ),
             )
             self.bootstrapped_FCs.extend(
                 [
                     self.create_param(
                         param_name="bootstrap_iteration_{}/w".format(i),
                         shape=[output_dims, input_dims],
                         initializer=weight_init,
                         optimizer=weight_optim,
                         regularizer=weight_reg,
                     ),
                     self.create_param(
                         param_name="bootstrap_iteration_{}/b".format(i),
                         shape=[output_dims],
                         initializer=bias_init,
                         optimizer=bias_optim,
                         regularizer=bias_reg,
                     ),
                 ]
             )

         self.output_schema = output_schema

         if axis == 1:
             output_shape = (output_dims,)
         else:
             output_shape = list(input_record.field_types()[0].shape)[0 : axis - 1]
             output_shape = tuple(output_shape + [output_dims])

     def _generate_bootstrapped_indices(self, net, copied_cur_layer, iteration):
         """
         Args:
             net: the caffe2 net to insert operator

             copied_cur_layer: blob of the bootstrapped features (make sure this
             blob has a stop_gradient on)

             iteration: the bootstrap interation to generate for. Used to correctly
             populate the output_schema

         Return:
             A blob containing the generated indices of shape: (batch_size,)
         """
         with core.NameScope("bootstrap_iteration_{}".format(iteration)):
             if iteration == 0:
                 # capture batch_size once for efficiency
                 input_shape = net.Shape(copied_cur_layer, "input_shape")
                 batch_size_index = net.Const(np.array([0]), "batch_size_index")
                 batch_size = net.Gather([input_shape, batch_size_index], "batch_size")
                 self.batch_size = batch_size

                 lower_bound = net.Const(np.array([0]), "lower_bound", dtype=np.int32)
                 offset = net.Const(np.array([1]), "offset", dtype=np.int32)
                 int_batch_size = net.Cast(
                     [self.batch_size], "int_batch_size", to=core.DataType.INT32
                 )
                 upper_bound = net.Sub([int_batch_size, offset], "upper_bound")

                 self.lower_bound = lower_bound
                 self.upper_bound = upper_bound

             indices = net.UniformIntFill(
                 [self.batch_size, self.lower_bound, self.upper_bound],
                 self.output_schema[iteration * 2].field_blobs()[0],
                 input_as_shape=1,
             )

             return indices

     def _bootstrap_ops(self, net, copied_cur_layer, indices, iteration):
         """
             This method contains all the bootstrapping logic used to bootstrap
             the features. Only used by the train_net.

             Args:
                 net: the caffe2 net to insert bootstrapping operators

                 copied_cur_layer: the blob representing the current features.
                     Note, this layer should have a stop_gradient on it.

             Returns:
                 bootstrapped_features: blob of bootstrapped version of cur_layer
                     with same dimensions
         """

         # draw features based upon the bootstrapped indices
         bootstrapped_features = net.Gather(
             [copied_cur_layer, indices],
             net.NextScopedBlob("bootstrapped_features_{}".format(iteration)),
         )

         bootstrapped_features = schema.Scalar(
             (np.float32, self.input_dims), bootstrapped_features
         )

         return bootstrapped_features

     def _insert_fc_ops(self, net, features, params, outputs, version):
         """
         Args:
             net: the caffe2 net to insert operator

             features: Scalar containing blob of the bootstrapped features or
             actual cur_layer features

             params: weight and bias for FC

             outputs: the output blobs

             version: support fp32 for now.
         """

         if version == "fp32":
             pred_blob = net.FC(
                 features.field_blobs() + params, outputs, axis=self.axis, **self.kwargs
             )
             return pred_blob
         else:
             raise Exception("unsupported FC type version {}".format(version))

     def _add_ops(self, net, features, iteration, params, version):
         """
         Args:
             params: the weight and bias, passed by either add_ops or
             add_train_ops function

             features: feature blobs to predict on. Can be the actual cur_layer
             or the bootstrapped_feature blobs.

             version: currently fp32 support only
         """

         if self.clip_args is not None:
             clipped_params = [net.NextScopedBlob("clipped_%s" % str(p)) for p in params]
             for p, cp in zip(params, clipped_params):
                 net.Clip([p], [cp], **self.clip_args)
             params = clipped_params

         if self.output_dim_vec is None or len(self.output_dim_vec) == 1:
             self._insert_fc_ops(
                 net=net,
                 features=features,
                 params=params,
                 outputs=[self.output_schema.field_blobs()[(iteration * 2) + 1]],
                 version=version,
             )

     def add_ops(self, net):
         """
             Both the predict net and the eval net will call this function.

             For bootstrapping approach, the goal is to pass the cur_layer feature
             inputs through all the bootstrapped FCs that are stored under
             self.bootstrapped_FCs. Return the preds in the same output_schema
             with dummy indices (because they are not needed).
         """

         version_info = get_current_scope().get(
             get_fc_predictor_version.__name__, {"fc_version": "fp32"}
         )
         predictor_fc_fp_version = version_info["fc_version"]

         for i in range(self.num_bootstrap):
             # these are dummy indices, not to be used anywhere
             indices = self._generate_bootstrapped_indices(
                 net=net,
                 copied_cur_layer=self.input_record.field_blobs()[0],
                 iteration=i,
             )

             params = self.bootstrapped_FCs[i * 2 : (i * 2) + 2]

             self._add_ops(
                 net=net,
                 features=self.input_record,
                 params=params,
                 iteration=i,
                 version=predictor_fc_fp_version,
             )

     def add_train_ops(self, net):
         # use the train_param_blobs to be consistent with the SamplingTrain unittest

         # obtain features
         for i in range(self.num_bootstrap):
             indices = self._generate_bootstrapped_indices(
                 net=net,
                 copied_cur_layer=self.input_record.field_blobs()[0],
                 iteration=i,
             )
             bootstrapped_features = self._bootstrap_ops(
                 net=net,
                 copied_cur_layer=self.input_record.field_blobs()[0],
                 indices=indices,
                 iteration=i,
             )
             self._add_ops(
                 net,
                 features=bootstrapped_features,
                 iteration=i,
                 params=self.train_param_blobs[i * 2 : (i * 2) + 2],
                 version="fp32",
             )

     def get_fp16_compatible_parameters(self):
         if self.output_dim_vec is None or len(self.output_dim_vec) == 1:
             return [
                 blob for idx, blob in enumerate(self.bootstrapped_FCs) if idx % 2 == 0
             ]

         else:
             raise Exception(
                 "Currently only supports functionality for output_dim_vec == 1"
             )

     @property
     def param_blobs(self):
         if self.output_dim_vec is None or len(self.output_dim_vec) == 1:
             return self.bootstrapped_FCs
         else:
             raise Exception("FCWithBootstrap layer only supports output_dim_vec==1")
	## @package fc_with_bootstrap
	# Module caffe2.python.layers.fc_with_bootstrap


	import math

	import numpy as np
	from caffe2.python import core, schema
	from caffe2.python.helpers.arg_scope import get_current_scope
	from caffe2.python.layers.layers import ModelLayer
	from caffe2.python.layers.sampling_trainable_mixin import SamplingTrainableMixin


	def get_fc_predictor_version(fc_version):
	assert fc_version in ["fp32"], (
	"Only support fp32 for the fully connected layer "
	"in the predictor net, the provided FC precision is {}".format(fc_version)
	)
	return fc_version


	class FCWithBootstrap(SamplingTrainableMixin, ModelLayer):
	def __init__(
	self,
	model,
	input_record,
	output_dims,
	num_bootstrap,
	weight_init=None,
	bias_init=None,
	weight_optim=None,
	bias_optim=None,
	name="fc_with_bootstrap",
	weight_reg=None,
	bias_reg=None,
	clip_param=None,
	axis=1,
	**kwargs
	):
	super(FCWithBootstrap, self).__init__(model, name, input_record, **kwargs)
	assert isinstance(
	input_record, schema.Scalar
	), "Incorrect input type {}".format(input_record)
	assert (
	len(input_record.field_types()[0].shape) > 0
	), "FC expects limited dimensions of the input tensor"
	assert axis >= 1, "axis {} should >= 1.".format(axis)
	self.axis = axis
	input_dims = np.prod(input_record.field_types()[0].shape[axis - 1 :])

	assert input_dims > 0, "FC expects input dimensions > 0, got {}".format(
	input_dims
	)

	self.clip_args = None

	# attributes for bootstrapping below
	self.num_bootstrap = num_bootstrap

	# input dim shape
	self.input_dims = input_dims

	# bootstrapped fully-connected layers to be used in eval time
	self.bootstrapped_FCs = []

	# scalar containing batch_size blob so that we don't need to recompute
	self.batch_size = None

	# we want this to be the last FC, so the output_dim should be 1, set to None
	self.output_dim_vec = None

	# lower bound when creating random indices
	self.lower_bound = None

	# upper bound when creating random indices
	self.upper_bound = None

	if clip_param is not None:
	assert len(clip_param) == 2, (
	"clip_param must be a tuple / list "
	"of length 2 and in the form of (clip_min, clip max)"
	)
	clip_min, clip_max = clip_param
	assert (
	clip_min is not None or clip_max is not None
	), "clip_min, and clip_max in clip_param cannot both be None"
	assert (
	clip_min is None or clip_max is None
	) or clip_min < clip_max, (
	"clip_param = [clip_min, clip_max] must have clip_min < clip_max"
	)
	self.clip_args = {}
	if clip_min is not None:
	self.clip_args["min"] = clip_min
	if clip_max is not None:
	self.clip_args["max"] = clip_max

	scale = math.sqrt(1.0 / input_dims)
	weight_init = (
	weight_init
	if weight_init
	else ("UniformFill", {"min": -scale, "max": scale})
	)
	bias_init = (
	bias_init if bias_init else ("UniformFill", {"min": -scale, "max": scale})
	)

	"""
	bootstrapped FCs:
	Ex: [
	bootstrapped_weights_blob_1, bootstrapped_bias_blob_1,
	...,
	...,
	bootstrapped_weights_blob_b, bootstrapped_bias_blob_b
	]

	output_schema:
	Note: indices will always be on even indices.
	Ex: Struct(
	indices_0_blob,
	preds_0_blob,
	...
	...
	indices_b_blob,
	preds_b_blob
	)
	"""
	bootstrapped_FCs = []
	output_schema = schema.Struct()
	for i in range(num_bootstrap):
	output_schema += schema.Struct(
	(
	"bootstrap_iteration_{}/indices".format(i),
	self.get_next_blob_reference(
	"bootstrap_iteration_{}/indices".format(i)
	),
	),
	(
	"bootstrap_iteration_{}/preds".format(i),
	self.get_next_blob_reference(
	"bootstrap_iteration_{}/preds".format(i)
	),
	),
	)
	self.bootstrapped_FCs.extend(
	[
	self.create_param(
	param_name="bootstrap_iteration_{}/w".format(i),
	shape=[output_dims, input_dims],
	initializer=weight_init,
	optimizer=weight_optim,
	regularizer=weight_reg,
	),
	self.create_param(
	param_name="bootstrap_iteration_{}/b".format(i),
	shape=[output_dims],
	initializer=bias_init,
	optimizer=bias_optim,
	regularizer=bias_reg,
	),
	]
	)

	self.output_schema = output_schema

	if axis == 1:
	output_shape = (output_dims,)
	else:
	output_shape = list(input_record.field_types()[0].shape)[0 : axis - 1]
	output_shape = tuple(output_shape + [output_dims])

	def _generate_bootstrapped_indices(self, net, copied_cur_layer, iteration):
	"""
	Args:
	net: the caffe2 net to insert operator

	copied_cur_layer: blob of the bootstrapped features (make sure this
	blob has a stop_gradient on)

	iteration: the bootstrap interation to generate for. Used to correctly
	populate the output_schema

	Return:
	A blob containing the generated indices of shape: (batch_size,)
	"""
	with core.NameScope("bootstrap_iteration_{}".format(iteration)):
	if iteration == 0:
	# capture batch_size once for efficiency
	input_shape = net.Shape(copied_cur_layer, "input_shape")
	batch_size_index = net.Const(np.array([0]), "batch_size_index")
	batch_size = net.Gather([input_shape, batch_size_index], "batch_size")
	self.batch_size = batch_size

	lower_bound = net.Const(np.array([0]), "lower_bound", dtype=np.int32)
	offset = net.Const(np.array([1]), "offset", dtype=np.int32)
	int_batch_size = net.Cast(
	[self.batch_size], "int_batch_size", to=core.DataType.INT32
	)
	upper_bound = net.Sub([int_batch_size, offset], "upper_bound")

	self.lower_bound = lower_bound
	self.upper_bound = upper_bound

	indices = net.UniformIntFill(
	[self.batch_size, self.lower_bound, self.upper_bound],
	self.output_schema[iteration * 2].field_blobs()[0],
	input_as_shape=1,
	)

	return indices

	def _bootstrap_ops(self, net, copied_cur_layer, indices, iteration):
	"""
	This method contains all the bootstrapping logic used to bootstrap
	the features. Only used by the train_net.

	Args:
	net: the caffe2 net to insert bootstrapping operators

	copied_cur_layer: the blob representing the current features.
	Note, this layer should have a stop_gradient on it.

	Returns:
	bootstrapped_features: blob of bootstrapped version of cur_layer
	with same dimensions
	"""

	# draw features based upon the bootstrapped indices
	bootstrapped_features = net.Gather(
	[copied_cur_layer, indices],
	net.NextScopedBlob("bootstrapped_features_{}".format(iteration)),
	)

	bootstrapped_features = schema.Scalar(
	(np.float32, self.input_dims), bootstrapped_features
	)

	return bootstrapped_features

	def _insert_fc_ops(self, net, features, params, outputs, version):
	"""
	Args:
	net: the caffe2 net to insert operator

	features: Scalar containing blob of the bootstrapped features or
	actual cur_layer features

	params: weight and bias for FC

	outputs: the output blobs

	version: support fp32 for now.
	"""

	if version == "fp32":
	pred_blob = net.FC(
	features.field_blobs() + params, outputs, axis=self.axis, **self.kwargs
	)
	return pred_blob
	else:
	raise Exception("unsupported FC type version {}".format(version))

	def _add_ops(self, net, features, iteration, params, version):
	"""
	Args:
	params: the weight and bias, passed by either add_ops or
	add_train_ops function

	features: feature blobs to predict on. Can be the actual cur_layer
	or the bootstrapped_feature blobs.

	version: currently fp32 support only
	"""

	if self.clip_args is not None:
	clipped_params = [net.NextScopedBlob("clipped_%s" % str(p)) for p in params]
	for p, cp in zip(params, clipped_params):
	net.Clip([p], [cp], **self.clip_args)
	params = clipped_params

	if self.output_dim_vec is None or len(self.output_dim_vec) == 1:
	self._insert_fc_ops(
	net=net,
	features=features,
	params=params,
	outputs=[self.output_schema.field_blobs()[(iteration * 2) + 1]],
	version=version,
	)

	def add_ops(self, net):
	"""
	Both the predict net and the eval net will call this function.

	For bootstrapping approach, the goal is to pass the cur_layer feature
	inputs through all the bootstrapped FCs that are stored under
	self.bootstrapped_FCs. Return the preds in the same output_schema
	with dummy indices (because they are not needed).
	"""

	version_info = get_current_scope().get(
	get_fc_predictor_version.__name__, {"fc_version": "fp32"}
	)
	predictor_fc_fp_version = version_info["fc_version"]

	for i in range(self.num_bootstrap):
	# these are dummy indices, not to be used anywhere
	indices = self._generate_bootstrapped_indices(
	net=net,
	copied_cur_layer=self.input_record.field_blobs()[0],
	iteration=i,
	)

	params = self.bootstrapped_FCs[i * 2 : (i * 2) + 2]

	self._add_ops(
	net=net,
	features=self.input_record,
	params=params,
	iteration=i,
	version=predictor_fc_fp_version,
	)

	def add_train_ops(self, net):
	# use the train_param_blobs to be consistent with the SamplingTrain unittest

	# obtain features
	for i in range(self.num_bootstrap):
	indices = self._generate_bootstrapped_indices(
	net=net,
	copied_cur_layer=self.input_record.field_blobs()[0],
	iteration=i,
	)
	bootstrapped_features = self._bootstrap_ops(
	net=net,
	copied_cur_layer=self.input_record.field_blobs()[0],
	indices=indices,
	iteration=i,
	)
	self._add_ops(
	net,
	features=bootstrapped_features,
	iteration=i,
	params=self.train_param_blobs[i * 2 : (i * 2) + 2],
	version="fp32",
	)

	def get_fp16_compatible_parameters(self):
	if self.output_dim_vec is None or len(self.output_dim_vec) == 1:
	return [
	blob for idx, blob in enumerate(self.bootstrapped_FCs) if idx % 2 == 0
	]

	else:
	raise Exception(
	"Currently only supports functionality for output_dim_vec == 1"
	)

	@property
	def param_blobs(self):
	if self.output_dim_vec is None or len(self.output_dim_vec) == 1:
	return self.bootstrapped_FCs
	else:
	raise Exception("FCWithBootstrap layer only supports output_dim_vec==1")