| # @package homotopy_weight |
| # Module caffe2.fb.python.layers.homotopy_weight |
| |
| |
| |
| |
| |
| |
| from caffe2.python import core, schema |
| from caffe2.python.layers.layers import ModelLayer |
| import numpy as np |
| import logging |
| logger = logging.getLogger(__name__) |
| ''' |
| Homotopy Weighting between two weights x, y by doing: |
| alpha x + beta y |
| where alpha is a decreasing scalar parameter ranging from [min, max] (default, |
| [0, 1]), and alpha + beta = max + min, which means that beta is increasing in |
| the range [min, max]; |
| |
| Homotopy methods first solves an "easy" problem (one to which the solution is |
| well known), and is gradually transformed into the target problem |
| ''' |
| |
| |
| class HomotopyWeight(ModelLayer): |
| def __init__( |
| self, |
| model, |
| input_record, |
| name='homotopy_weight', |
| min_weight=0., |
| max_weight=1., |
| half_life=1e6, |
| quad_life=3e6, |
| atomic_iter=None, |
| **kwargs |
| ): |
| super(HomotopyWeight, |
| self).__init__(model, name, input_record, **kwargs) |
| self.output_schema = schema.Scalar( |
| np.float32, self.get_next_blob_reference('homotopy_weight') |
| ) |
| data = self.input_record.field_blobs() |
| assert len(data) == 2 |
| self.x = data[0] |
| self.y = data[1] |
| # TODO: currently model building does not have access to iter counter or |
| # learning rate; it's added at optimization time; |
| self.use_external_iter = (atomic_iter is not None) |
| self.atomic_iter = ( |
| atomic_iter if self.use_external_iter else self.create_atomic_iter() |
| ) |
| # to map lr to [min, max]; alpha = scale * lr + offset |
| assert max_weight > min_weight |
| self.scale = float(max_weight - min_weight) |
| self.offset = self.model.add_global_constant( |
| '%s_offset_1dfloat' % self.name, float(min_weight) |
| ) |
| self.gamma, self.power = self.solve_inv_lr_params(half_life, quad_life) |
| |
| def solve_inv_lr_params(self, half_life, quad_life): |
| # ensure that the gamma, power is solvable |
| assert half_life > 0 |
| # convex monotonically decreasing |
| assert quad_life > 2 * half_life |
| t = float(quad_life) / float(half_life) |
| x = t * (1.0 + np.sqrt(2.0)) / 2.0 - np.sqrt(2.0) |
| gamma = (x - 1.0) / float(half_life) |
| power = np.log(2.0) / np.log(x) |
| logger.info( |
| 'homotopy_weighting: found lr param: gamma=%g, power=%g' % |
| (gamma, power) |
| ) |
| return gamma, power |
| |
| def create_atomic_iter(self): |
| self.mutex = self.create_param( |
| param_name=('%s_mutex' % self.name), |
| shape=None, |
| initializer=('CreateMutex', ), |
| optimizer=self.model.NoOptim, |
| ) |
| self.atomic_iter = self.create_param( |
| param_name=('%s_atomic_iter' % self.name), |
| shape=[1], |
| initializer=( |
| 'ConstantFill', { |
| 'value': 0, |
| 'dtype': core.DataType.INT64 |
| } |
| ), |
| optimizer=self.model.NoOptim, |
| ) |
| return self.atomic_iter |
| |
| def update_weight(self, net): |
| alpha = net.NextScopedBlob('alpha') |
| beta = net.NextScopedBlob('beta') |
| lr = net.NextScopedBlob('lr') |
| comp_lr = net.NextScopedBlob('complementary_lr') |
| scaled_lr = net.NextScopedBlob('scaled_lr') |
| scaled_comp_lr = net.NextScopedBlob('scaled_complementary_lr') |
| if not self.use_external_iter: |
| net.AtomicIter([self.mutex, self.atomic_iter], [self.atomic_iter]) |
| net.LearningRate( |
| [self.atomic_iter], |
| [lr], |
| policy='inv', |
| gamma=self.gamma, |
| power=self.power, |
| base_lr=1.0, |
| ) |
| net.Sub([self.model.global_constants['ONE'], lr], [comp_lr]) |
| net.Scale([lr], [scaled_lr], scale=self.scale) |
| net.Scale([comp_lr], [scaled_comp_lr], scale=self.scale) |
| net.Add([scaled_lr, self.offset], [alpha]) |
| net.Add([scaled_comp_lr, self.offset], [beta]) |
| return alpha, beta |
| |
| def add_ops(self, net): |
| alpha, beta = self.update_weight(net) |
| # alpha x + beta y |
| net.WeightedSum([self.x, alpha, self.y, beta], self.output_schema()) |