blob: c289d6938dd8361b5cd75684b0d17049e27840b1 [file] [log] [blame]
#pragma once
#include "caffe2/core/operator.h"
#include "caffe2/quantization/server/dnnlowp.h"
#include "caffe2/utils/eigen_utils.h"
namespace dnnlowp {
/**
* Let consumers of op know that qparams the quantization parameter used
* for output_index'th output of op.
*/
void PropagateOutputTensorQuantizationParams(
caffe2::OperatorBase* op,
int output_index,
const TensorQuantizationParams& qparams);
/**
* If input_index'th input is already quantized, return quantization parameter
* used for the input tensor (should've been set by
* PropagateOutputTensorQuantizationParams when the producer was invoked).
* If the input tensor is not quantized, return the quantization parameter
* chosen by qfactory based on the distribution of the input tensor
*/
TensorQuantizationParams GetInputTensorQuantizationParamsOf(
caffe2::OperatorBase* op,
int input_index,
const QuantizationFactory* qfactory,
bool is_weight = false);
void SetStaticQuantizationParams(
caffe2::OperatorBase* op,
int output_index,
const TensorQuantizationParams& qparams);
/**
* @return true if op's outputs should use static quantization (i.e. op has
* Y_scale and optionally Y_zero_offset argument).
*/
bool HasStaticQuantization(
const caffe2::OperatorBase* op,
int output_index = 0);
/**
* Get output_index'th quantization parameter.
* Should be used only when UseStaticQuantization is true
*/
TensorQuantizationParams GetStaticQuantizationParamsOf(
const caffe2::OperatorBase* op,
int output_index);
/**
* Quantize input_index'th input if it's not already quantized.
* a vector temp should be passed to store quantized results.
*
* @return array of quantized values
*/
template <typename T>
const T* QuantizeInputIfNeeded(
caffe2::OperatorBase* op,
int input_index,
const TensorQuantizationParams& qparams,
std::vector<T>& temp);
template <typename T>
const T* RowWiseQuantizeInputIfNeeded(
caffe2::OperatorBase* op,
int input_index,
const std::vector<TensorQuantizationParams>& qparams,
std::vector<T>& temp);
struct QuantizationErrorStats {
float sum_sq{0}, sum_err_sq{0};
float max_abs_err{0};
// actual and reference values that resulted in max_abs_err
float max_err_actual{0}, max_err_ref{0};
int measure_cnt{0};
};
void MeasureQuantizationError(
const float* actual,
const float* ref,
size_t len,
QuantizationErrorStats* stat);
void ReportQuantizationError(
const caffe2::OperatorBase* op,
const QuantizationErrorStats& stat);
/**
* Get QuantizationFactory based on the arguments of op
*/
std::unique_ptr<QuantizationFactory> GetQuantizationFactoryOf(
const caffe2::OperatorBase* op);
void AdjustOutputTensorQuantizationParamsWithFollowedBy(
caffe2::OperatorBase* op,
const std::string& followed_by);
void ParseDNNLowPOperatorArguments(
caffe2::OperatorBase* op,
bool* dequantize_output = nullptr,
bool* measure_quantization_error = nullptr,
std::string* followed_by = nullptr);
caffe2::NetDef AddScaleZeroOffsetArgumentsWithHistogram(
caffe2::NetDef net_def,
const std::string& histogram_file_name);
} // namespace dnnlowp