| ## @package optimizer_test_util |
| # Module caffe2.python.optimizer_test_util |
| |
| |
| |
| |
| |
| import unittest |
| import numpy as np |
| from caffe2.python import brew, core, workspace, cnn, optimizer |
| from caffe2.python.modeling.initializers import ( |
| Initializer, PseudoFP16Initializer) |
| |
| from caffe2.python.model_helper import ModelHelper |
| |
| |
| class OptimizerTestBase(object): |
| """ |
| This is an abstract base class. |
| Don't inherit from unittest.TestCase, and don't name it 'Test*'. |
| Do, however, do these things in classes which inherit from this. |
| """ |
| |
| def _createDense(self, dtype=core.DataType.FLOAT): |
| perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32) |
| np.random.seed(123) # make test deterministic |
| numpy_dtype = np.float32 if dtype == core.DataType.FLOAT else np.float16 |
| initializer = Initializer if dtype == core.DataType.FLOAT else \ |
| PseudoFP16Initializer |
| data = np.random.randint( |
| 2, |
| size=(20, perfect_model.size)).astype(numpy_dtype) |
| label = np.dot(data, perfect_model)[:, np.newaxis] |
| |
| model = ModelHelper(name="test", arg_scope={'order': 'NCHW'}) |
| out = brew.fc( |
| model, |
| 'data', 'fc', perfect_model.size, 1, ('ConstantFill', {}), |
| ('ConstantFill', {}), axis=0, |
| WeightInitializer=initializer, BiasInitializer=initializer |
| ) |
| if dtype == core.DataType.FLOAT16: |
| out = model.HalfToFloat(out, out + "_fp32") |
| sq = model.SquaredL2Distance([out, 'label']) |
| loss = model.AveragedLoss(sq, "avg_loss") |
| grad_map = model.AddGradientOperators([loss]) |
| self.assertIsInstance(grad_map['fc_w'], core.BlobReference) |
| return (model, perfect_model, data, label) |
| |
| def testDense(self): |
| model, perfect_model, data, label = self._createDense() |
| optimizer = self.build_optimizer(model) |
| workspace.FeedBlob('data', data[0]) |
| workspace.FeedBlob('label', label[0]) |
| workspace.RunNetOnce(model.param_init_net) |
| workspace.CreateNet(model.net, True) |
| for _ in range(2000): |
| idx = np.random.randint(data.shape[0]) |
| workspace.FeedBlob('data', data[idx]) |
| workspace.FeedBlob('label', label[idx]) |
| workspace.RunNet(model.net.Proto().name) |
| |
| np.testing.assert_allclose( |
| perfect_model[np.newaxis, :], |
| workspace.FetchBlob('fc_w'), |
| atol=1e-2 |
| ) |
| self.check_optimizer(optimizer) |
| |
| @unittest.skipIf(not workspace.has_gpu_support, "No gpu support") |
| def testGPUDense(self, dtype=core.DataType.FLOAT): |
| device_opt = core.DeviceOption(workspace.GpuDeviceType, 0) |
| with core.DeviceScope(device_opt): |
| model, _perfect_model, data, label = self._createDense(dtype) |
| if dtype == core.DataType.FLOAT16: |
| fc_fp32_for_host = model.HalfToFloat('fc', 'fc_fp32_for_host') |
| model.CopyGPUToCPU(fc_fp32_for_host, 'fc_cpu') |
| else: |
| model.CopyGPUToCPU('fc', 'fc_cpu') |
| workspace.FeedBlob('data', data[0]) |
| workspace.FeedBlob('label', label[0]) |
| |
| # Add some CPU ops |
| brew.fc(model, 'fc_cpu', 'fc2', dim_in=1, dim_out=10, axis=0) |
| |
| # Create optimizer in default device scope |
| self.build_optimizer(model) |
| |
| if self._skip_gpu: |
| return |
| |
| # Run net to see it does not crash |
| workspace.RunNetOnce(model.param_init_net) |
| workspace.CreateNet(model.net, True) |
| workspace.RunNet(model.net.Proto().name) |
| |
| def testSparse(self): |
| # to test duplicated indices we assign two indices to each weight and |
| # thus each weight might count once or twice |
| DUPLICATION = 2 |
| perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32) |
| np.random.seed(123) # make test deterministic |
| data = np.random.randint( |
| 2, |
| size=(20, perfect_model.size * DUPLICATION)).astype(np.float32) |
| label = np.dot(data, np.repeat(perfect_model, DUPLICATION)) |
| |
| model = cnn.CNNModelHelper("NCHW", name="test") |
| # imitate what model wrapper does |
| w = model.param_init_net.ConstantFill( |
| [], 'w', shape=[perfect_model.size], value=0.0) |
| model.params.append(w) |
| picked = model.net.Gather([w, 'indices'], 'gather') |
| out = model.ReduceFrontSum(picked, 'sum') |
| |
| sq = model.SquaredL2Distance([out, 'label']) |
| loss = model.AveragedLoss(sq, "avg_loss") |
| grad_map = model.AddGradientOperators([loss]) |
| self.assertIsInstance(grad_map['w'], core.GradientSlice) |
| optimizer = self.build_optimizer(model) |
| |
| workspace.CreateBlob('indices') |
| workspace.CreateBlob('label') |
| |
| for indices_type in [np.int32, np.int64]: |
| workspace.RunNetOnce(model.param_init_net) |
| workspace.CreateNet(model.net, True) |
| for _ in range(2000): |
| idx = np.random.randint(data.shape[0]) |
| # transform into indices of binary features |
| indices = np.repeat(np.arange(perfect_model.size), |
| DUPLICATION)[data[idx] == 1] |
| if indices.size == 0: |
| continue |
| workspace.FeedBlob( |
| 'indices', |
| indices.reshape((indices.size,)).astype(indices_type) |
| ) |
| workspace.FeedBlob('label', |
| np.array(label[idx]).astype(np.float32)) |
| workspace.RunNet(model.net.Proto().name) |
| |
| np.testing.assert_allclose( |
| perfect_model, |
| workspace.FetchBlob('w'), |
| atol=1e-2 |
| ) |
| self.check_optimizer(optimizer) |
| |
| |
| class LRModificationTestBase(object): |
| """ |
| This is an abstract base class. |
| Don't inherit from unittest.TestCase, and don't name it 'Test*'. |
| Do, however, do these things in classes which inherit from this. |
| """ |
| |
| def _gradient_ratio_reference(self, model, params, max_gradient_norm): |
| from caffe2.python import core |
| sum_squared_norms = 0.0 |
| for param in params: |
| grad = ( |
| model.param_to_grad[param] |
| if not isinstance( |
| model.param_to_grad[param], |
| core.GradientSlice, |
| ) else model.param_to_grad[param].values |
| ) |
| val = workspace.FetchBlob(grad) |
| sum_squared_norms += np.power(np.linalg.norm(val), 2.0) |
| global_norm = np.sqrt(sum_squared_norms) |
| clip_norm = max_gradient_norm |
| norm_ratio = clip_norm / np.maximum(clip_norm, global_norm) |
| return norm_ratio |
| |
| def test_global_norm_based_gradient_clipping(self): |
| max_gradient_norm = 1.0 |
| model, perfect_model, data, label = self._createDense() |
| opt = self.build_optimizer(model, max_gradient_norm=max_gradient_norm) |
| |
| params = [] |
| for param in model.GetParams(top_scope=True): |
| if param in model.param_to_grad: |
| if not isinstance( |
| model.param_to_grad[param], |
| core.GradientSlice, |
| ): |
| params.append(param) |
| |
| workspace.FeedBlob('data', data[0]) |
| workspace.FeedBlob('label', label[0]) |
| workspace.RunNetOnce(model.param_init_net) |
| workspace.CreateNet(model.net, True) |
| self.assertIsNotNone(opt._lr_multiplier) |
| |
| # Run net once |
| idx = np.random.randint(data.shape[0]) |
| workspace.FeedBlob('data', data[idx]) |
| workspace.FeedBlob('label', label[idx]) |
| workspace.RunNet(model.net.Proto().name) |
| |
| reference = self._gradient_ratio_reference( |
| model, |
| params, |
| max_gradient_norm, |
| ) |
| norm_ratio = workspace.FetchBlob( |
| 'norm_clipped_grad_update/norm_ratio') |
| np.testing.assert_almost_equal(norm_ratio, reference) |
| self.assertTrue( |
| reference < 1.0, "Bad test, gradient not being scaled." |
| ) |
| |
| def test_lr_injection(self): |
| model, perfect_model, data, label = self._createDense() |
| opt = self.build_optimizer( |
| model, max_gradient_norm=1, allow_lr_injection=True |
| ) |
| |
| workspace.FeedBlob('data', data[0]) |
| workspace.FeedBlob('label', label[0]) |
| workspace.RunNetOnce(model.param_init_net) |
| workspace.CreateNet(model.net, True) |
| |
| # Test LR injection initialized properly |
| self.assertIsNotNone(opt._lr_multiplier) |
| self.assertEqual(optimizer.get_lr_injection(), 1) |
| |
| # Test that we're able to modify the value of the lr_injection |
| optimizer.set_lr_injection(0) |
| self.assertEqual(optimizer.get_lr_injection(), 0) |
| |
| # Test that setting the lr_injector properly propagates to the |
| # lr_multiplier. Here, we have both lr_injector and norm_ratio that |
| # affect the lr_multiplier |
| workspace.RunNet(model.net.Proto().name) |
| self.assertEqual(workspace.FetchBlob('lr_multiplier'), 0) |