blob: 823ded42982b8d43aa2b15b0df5976cc26c09b8c [file] [log] [blame]
#pragma once
#include "caffe2/quantization/server/dnnlowp.h"
#include <cmath>
#include <vector>
namespace dnnlowp {
/**
* We use the 3-region approach described in "Efficient VLSI Implementation of
* Neural Networks with Hyperbolic Tangent Activation Function", IEEE
* Transactions on Very Large Scale Integration Systems, Zamanlooy and
* Mirhassani.
* The pass region (x < x_pq) is approximated as x.
* The saturation region (x >= x_sq) is approximated as 1.
* The processing region (x_pq <= x < x_sq) is divided into sub-ranges and the
* average value of tanh(x) is used per sub-range.
*/
template <typename T>
class Tanh {
public:
Tanh(double max_abs_err = DEFAULT_MAX_ABS_ERR);
T Compute(T x) const;
TensorQuantizationParams GetInputQuantizationParams() const {
return in_qparams_;
}
TensorQuantizationParams GetOutputQuantizationParams() const {
return out_qparams_;
}
int GetPassRegionEnd() const {
return x_pq_index_;
}
float GetPassRegionEndDequantized() const {
return fbgemm::Dequantize<T>(
static_cast<uint8_t>(x_pq_index_ + in_qparams_.zero_point),
in_qparams_);
}
float GetSaturationRegionBegin() const {
return fbgemm::Dequantize<T>(
static_cast<T>((1 << num_in_bits_) - 1), in_qparams_);
}
static constexpr double DEFAULT_MAX_ABS_ERR = 0.02;
static constexpr int DEFAULT_NUM_IN_BITS = 8;
static constexpr int DEFAULT_NUM_OUT_BITS = 8;
private:
const double max_abs_err_;
const int num_in_bits_ = DEFAULT_NUM_IN_BITS;
const int num_out_bits_ = DEFAULT_NUM_OUT_BITS;
int x_pq_index_;
std::vector<T> processing_region_lut_;
TensorQuantizationParams in_qparams_, out_qparams_;
}; // class TanhApproximation
} // namespace dnnlowp