caffe2/quantization/server/pybind.cc - platform/external/pytorch - Git at Google

 #include <fbgemm/FbgemmFP16.h>
 #include <fbgemm/Utils.h>
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 #include "activation_distribution_observer.h"
 #include "caffe2/opt/fakefp16_transform.h"
 #include "caffe2/opt/custom/freeze_quantization_params.h"
 #include "caffe2/quantization/server/caffe2_dnnlowp_utils.h"
 #include "caffe2/quantization/server/fbgemm_pack_blob.h"
 #include "caffe2/quantization/server/int8_gen_quant_params.h"
 #include "quantization_error_minimization.h"

 namespace caffe2 {
 namespace python {
 // defined in caffe2/python/pybind_state.cc
 Workspace* GetCurrentWorkspace();
 } // namespace python
 } // namespace caffe2

 PYBIND11_MODULE(dnnlowp_pybind11, m) {
   using namespace std;
   using namespace caffe2;

   m.def("ClearNetObservers", []() { ClearGlobalNetObservers(); });

   m.def(
       "ObserveMinMaxOfOutput",
       [](const string& min_max_file_name, int dump_freq, string delimiter) {
         AddGlobalNetObserverCreator(
             [dump_freq, min_max_file_name, delimiter](NetBase* net) {
               return make_unique<OutputMinMaxNetObserver>(
                   net, min_max_file_name, dump_freq, delimiter);
             });
       },
       pybind11::arg("min_max_file_name"),
       pybind11::arg("dump_freq") = -1,
       pybind11::arg("delimiter") = " ");

   m.def(
       "ObserveHistogramOfOutput",
       [](const string& out_file_name,
          int dump_freq,
          bool mul_nets,
          string op_filter,
          string delimiter) {
         AddGlobalNetObserverCreator(
             [out_file_name, dump_freq, mul_nets, op_filter, delimiter](
                 NetBase* net) {
               return make_unique<HistogramNetObserver>(
                   net,
                   out_file_name,
                   2048,
                   dump_freq,
                   mul_nets,
                   op_filter,
                   delimiter);
             });
       },
       pybind11::arg("out_file_name"),
       pybind11::arg("dump_freq") = -1,
       pybind11::arg("mul_nets") = false,
       pybind11::arg("op_filter") = "",
       pybind11::arg("delimiter") = " ");

   m.def(
       "DumpHistogramFile",
       [](Observable<NetBase>::Observer* ob) {
         HistogramNetObserver* hist_ob =
             dynamic_cast_if_rtti<HistogramNetObserver*>(ob);
         hist_ob->DumpHistogramFile();
       },
       pybind11::arg("ob"));

   m.def(
       "AddHistogramObserver",
       [](const string& net_name,
          const string& out_file_name,
          int dump_freq,
          bool mul_nets,
          string delimiter) {
         Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
         CAFFE_ENFORCE(gWorkspace);
         CAFFE_ENFORCE(
             gWorkspace->GetNet(net_name), "Can't find net ", net_name);
         pybind11::gil_scoped_release g;

         NetBase* net = gWorkspace->GetNet(net_name);
         const Observable<NetBase>::Observer* observer = nullptr;

         observer = net->AttachObserver(make_unique<HistogramNetObserver>(
             net, out_file_name, 2048, dump_freq, mul_nets, "", delimiter));

         CAFFE_ENFORCE(observer != nullptr);
         return pybind11::cast(observer);
       },
       pybind11::arg("net_name"),
       pybind11::arg("out_file_name"),
       pybind11::arg("dump_freq") = -1,
       pybind11::arg("mul_nets") = false,
       pybind11::arg("delimiter") = " ");

   m.def(
       "DumpOutputColumnMaxHistogramFile",
       [](Observable<NetBase>::Observer* ob) {
         OutputColumnMaxHistogramNetObserver* hist_ob =
             dynamic_cast_if_rtti<OutputColumnMaxHistogramNetObserver*>(ob);
         hist_ob->DumpOutputColumnMaxHistogramFile();
       },
       pybind11::arg("ob"));

   m.def(
       "AddOutputColumnMaxHistogramObserver",
       [](const string& net_name,
          const string& out_file_name,
          const std::vector<std::string>& observe_column_max_for_blobs,
          int dump_freq,
          int bin_nums,
          bool mul_nets,
          string delimiter) {
         Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
         CAFFE_ENFORCE(gWorkspace);
         CAFFE_ENFORCE(
             gWorkspace->GetNet(net_name), "Can't find net ", net_name);
         pybind11::gil_scoped_release g;

         NetBase* net = gWorkspace->GetNet(net_name);
         const Observable<NetBase>::Observer* observer = nullptr;

         observer = net->AttachObserver(
             make_unique<OutputColumnMaxHistogramNetObserver>(
                 net,
                 out_file_name,
                 observe_column_max_for_blobs,
                 bin_nums,
                 dump_freq,
                 mul_nets,
                 delimiter));

         CAFFE_ENFORCE(observer != nullptr);
         return pybind11::cast(observer);
       },
       pybind11::arg("net_name"),
       pybind11::arg("out_file_name"),
       pybind11::arg("observe_column_max_for_blobs"),
       pybind11::arg("dump_freq") = -1,
       pybind11::arg("bin_nums") = 16,
       pybind11::arg("mul_nets") = false,
       pybind11::arg("delimiter") = " ");

   m.def(
       "ChooseQuantizationParams",
       [](const std::string& blob_name) {
         Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
         CAFFE_ENFORCE(gWorkspace);
         pybind11::gil_scoped_release g;

         const auto* blob = gWorkspace->GetBlob(blob_name);
         if (blob == nullptr) {
           LOG(WARNING) << "Can't find blob " << blob_name;
         } else if (!BlobIsTensorType(*blob, CPU)) {
           LOG(WARNING) << "Blob " << blob_name << " is not a tensor";
         } else {
           const auto& tensor = blob->template Get<Tensor>();
           if (tensor.IsType<float>()) {
             dnnlowp::QuantizationFactory* qfactory =
                 dnnlowp::QuantizationFactory::GetDefaultInstance();
             dnnlowp::TensorQuantizationParams qparams =
                 qfactory->ChooseQuantizationParams(
                     tensor.data<float>(), tensor.size(), true /*weight*/);
             return std::tuple<float, int>(qparams.scale, qparams.zero_point);
           } else {
             LOG(WARNING) << "Blob " << blob_name << " is not a float tensor";
           }
         }
         return std::tuple<float, int>(1.0, 0);
       },
       pybind11::arg("blob_name"));

   m.def(
       "RegisterQuantizationParams",
       [](const string& min_max_file_name,
          bool is_weight,
          const string& qparams_output_file_name) {
         AddGlobalNetObserverCreator([min_max_file_name,
                                      is_weight,
                                      qparams_output_file_name](NetBase* net) {
           return make_unique<RegisterQuantizationParamsNetObserver>(
               net, min_max_file_name, is_weight, qparams_output_file_name);
         });
       },
       pybind11::arg("min_max_file_name"),
       pybind11::arg("is_weight") = false,
       pybind11::arg("qparams_output_file_name") = "");

   m.def(
       "RegisterQuantizationParamsWithHistogram",
       [](const string& histogram_file_name,
          bool is_weight,
          const string& qparams_output_file_name) {
         AddGlobalNetObserverCreator([histogram_file_name,
                                      is_weight,
                                      qparams_output_file_name](NetBase* net) {
           return make_unique<
               RegisterQuantizationParamsWithHistogramNetObserver>(
               net, histogram_file_name, is_weight, qparams_output_file_name);
         });
       },
       pybind11::arg("histogram_file_name"),
       pybind11::arg("is_weight") = false,
       pybind11::arg("qparams_output_file_name") = "");

   m.def(
       "AddRegisterQuantizationParamsWithHistogramObserver",
       [](const string& net_name,
          const string& histogram_file_name,
          int is_weight,
          const string& qparams_output_file_name) {
         Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
         CAFFE_ENFORCE(gWorkspace);
         CAFFE_ENFORCE(
             gWorkspace->GetNet(net_name), "Can't find net ", net_name);
         pybind11::gil_scoped_release g;

         NetBase* net = gWorkspace->GetNet(net_name);
         const Observable<NetBase>::Observer* observer = nullptr;

         observer = net->AttachObserver(
             make_unique<RegisterQuantizationParamsWithHistogramNetObserver>(
                 net, histogram_file_name, is_weight, qparams_output_file_name));

         CAFFE_ENFORCE(observer != nullptr);
         return pybind11::cast(observer);
       },
       pybind11::arg("net_name"),
       pybind11::arg("histogram_file_name"),
       pybind11::arg("is_weight") = false,
       pybind11::arg("qparams_output_file_name") = "");

   m.def(
       "AddScaleZeroOffsetArgumentsWithHistogram",
       [](const pybind11::bytes& net_def_bytes,
          const string& histogram_file_name) {
         NetDef def;
         CAFFE_ENFORCE(
             ParseProtoFromLargeString(net_def_bytes.cast<string>(), &def));
         pybind11::gil_scoped_release g;

         string protob;
         auto transformed_net =
             dnnlowp::AddScaleZeroOffsetArgumentsWithHistogram(
                 def, histogram_file_name);

         CAFFE_ENFORCE(transformed_net.SerializeToString(&protob));
         return pybind11::bytes(protob);
       });

   pybind11::class_<dnnlowp::TensorQuantizationParams>(m, "QueryTensorQparam")
       .def(pybind11::init<float, std::int32_t, int>())
       .def_property_readonly(
           "scale",
           [](dnnlowp::TensorQuantizationParams& qparam) {
             return qparam.scale;
           })
       .def_property_readonly(
           "zero_point",
           [](dnnlowp::TensorQuantizationParams& qparam) {
             return qparam.zero_point;
           })
       .def_property_readonly(
           "precision",
           [](dnnlowp::TensorQuantizationParams& qparam) {
             return qparam.precision;
           })
       .def_property_readonly(
           "min",
           [](dnnlowp::TensorQuantizationParams& qparam) {
             return qparam.Min();
           })
       .def_property_readonly(
           "max", [](dnnlowp::TensorQuantizationParams& qparam) {
             return qparam.Max();
           });
   m.def("get_fakefp16_mapping", [](bool use_fp16_acc, bool use_nnpi) {
     return caffe2::opt::getFakeFp16OpMapping(use_fp16_acc, use_nnpi);
   });
   m.def("freeze_quantization_params",
       [](const pybind11::bytes& net_def_bytes){
         NetDef def;
         CAFFE_ENFORCE(
             ParseProtoFromLargeString(net_def_bytes.cast<string>(), &def));
         string protob;
         Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
         CAFFE_ENFORCE(gWorkspace);
         freezeQuantizationParams(&def, gWorkspace);
         CAFFE_ENFORCE(def.SerializeToString(&protob));
         return pybind11::bytes(protob);
       });
   m.def(
       "ChooseStaticQuantizationParams",
       [](float min,
          float max,
          const std::vector<uint64_t>& bins,
          bool preserve_sparsity,
          int precision,
          const std::string& quant_scheme,
          float p99_threshold,
          bool is_weight) {
         dnnlowp::Histogram hist = dnnlowp::Histogram(min, max, bins);

         dnnlowp::QuantizationFactory::QuantizationKind quant_kind =
             dnnlowp::QuantizationFactory::MIN_MAX_QUANTIZATION;
         if (quant_scheme.compare("L2_MIN_QUANTIZATION") == 0) {
           quant_kind = dnnlowp::QuantizationFactory::L2_MIN_QUANTIZATION;
         } else if (quant_scheme.compare("L2_MIN_QUANTIZATION_APPROX") == 0) {
           quant_kind = dnnlowp::QuantizationFactory::L2_MIN_QUANTIZATION_APPROX;
         } else if (quant_scheme.compare("KL_MIN_QUANTIZATION") == 0) {
           quant_kind = dnnlowp::QuantizationFactory::KL_MIN_QUANTIZATION;
         } else if (quant_scheme.compare("P99_QUANTIZATION") == 0) {
           quant_kind = dnnlowp::QuantizationFactory::P99_QUANTIZATION;
         } else if (quant_scheme.compare("L1_MIN_QUANTIZATION") == 0) {
           quant_kind = dnnlowp::QuantizationFactory::L1_MIN_QUANTIZATION;
         } else {
           LOG(INFO) << "Using DNNLOWP default MIN_MAX_QUANTIZATION";
         }
         dnnlowp::QuantizationFactory* qfactory =
             dnnlowp::QuantizationFactory::GetDefaultInstance();
         if (is_weight) {
           qfactory->SetWeightP99Threshold(p99_threshold);
         } else {
           qfactory->SetActivationP99Threshold(p99_threshold);
         }
         return qfactory->ChooseQuantizationParams(
             hist, quant_kind, precision, preserve_sparsity, is_weight);
       },
       pybind11::arg("min"),
       pybind11::arg("max"),
       pybind11::arg("bins"),
       pybind11::arg("preserve_sparsity") = true,
       pybind11::arg("precision") = 8,
       pybind11::arg("quant_scheme") = "min_max",
       pybind11::arg("p99_threshold") = 0.99,
       pybind11::arg("is_weight") = false);
   m.def(
       "ObserveFp16FCPackedWeights",
       [](const string& blob_name, const string& weights_out_file) {
         Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
         CAFFE_ENFORCE(gWorkspace);
         const auto* blob = gWorkspace->GetBlob(blob_name);
         CAFFE_ENFORCE(blob, "Can't find blob ", blob_name);
         fbgemm::PackedGemmMatrixFP16* packedGemmMatrixPtr =
             blob->template Get<unique_ptr<fbgemm::PackedGemmMatrixFP16>>()
                 .get();
         uint64_t nrow = packedGemmMatrixPtr->numRows();
         uint64_t ncol = packedGemmMatrixPtr->numCols();
         uint64_t size = nrow * ncol;
         fbgemm::float16* unpacked_mat_ptr = nullptr;
         vector<fbgemm::float16> unpacked_mat;

         if (!packedGemmMatrixPtr->packed()) {
           unpacked_mat_ptr = packedGemmMatrixPtr->pmat();
         } else {
           unpacked_mat.resize(size);
           packedGemmMatrixPtr->unpack(
               unpacked_mat.data(), fbgemm::matrix_op_t::Transpose);
           unpacked_mat_ptr = unpacked_mat.data();
         }
         ofstream fout;
         fout.open(weights_out_file);
         if (!fout) {
           LOG(WARNING) << "Can't open output file to dump fp16 weights "
                        << weights_out_file;
           return;
         }
         for (int i = 0; i < nrow; ++i) {
           for (int j = 0; j < ncol; ++j) {
             if (j > 0) {
               fout << " ";
             }
             fout << fbgemm::cpu_half2float(unpacked_mat_ptr[i + nrow * j]);
           }
           fout << endl;
         }
         LOG(INFO) << "Written unpacked blob " << blob_name << " to "
                   << weights_out_file;
       },
       pybind11::arg("blob_name"),
       pybind11::arg("weights_out_file"));
   m.def(
       "ObserveInt8FCPackedWeights",
       [](const string& blob_name, const string& weights_out_file) {
         Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
         CAFFE_ENFORCE(gWorkspace);
         const auto* blob = gWorkspace->GetBlob(blob_name);
         if (blob == nullptr) {
           LOG(WARNING) << "Can't find blob " << blob_name;
           return;
         }
         const Int8FCDNNLowPPackedWeightBlob& packedInt8Blob =
             blob->template Get<Int8FCDNNLowPPackedWeightBlob>();
         auto& qparams = packedInt8Blob.qparams;
         auto& unpacked_tensor = packedInt8Blob.original_tensor;
         auto& packed_tensor = packedInt8Blob.W;

         auto shape = unpacked_tensor.sizes();
         CAFFE_ENFORCE(shape.size() == 2);
         vector<int8_t> unpacked_int8_data;
         unpacked_int8_data.resize(shape[0] * shape[1]);
         packed_tensor->unpack(unpacked_int8_data.data());

         ofstream fout;
         fout.open(weights_out_file);
         if (!fout) {
           LOG(WARNING) << "Can't open output file to dump int8 weights "
                        << weights_out_file;
           return;
         }
         for (int i = 0; i < qparams.size(); ++i) {
           if (i > 0) {
             fout << " ";
           }
           fout << to_string(qparams[i].scale) << " "
                << to_string(qparams[i].zero_point);
         }
         fout << endl;
         for (int i = 0; i < shape[0]; ++i) {
           for (int j = 0; j < shape[1]; ++j) {
             if (j > 0) {
               fout << " ";
             }
             fout << to_string(unpacked_int8_data.data()[i * shape[1] + j]);
           }
           fout << endl;
         }
         LOG(INFO) << "Written int8 qparams and weights for " << blob_name
                   << " to " << weights_out_file;
       },
       pybind11::arg("blob_name"),
       pybind11::arg("weights_out_file"));
   m.def(
       "CreateInt8QuantSchemeBlob",
       [](std::string quant_scheme_blob_name,
          std::string quantization_kind,
          bool preserve_sparsity) {
         Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
         CAFFE_ENFORCE(gWorkspace);
         auto* quant_scheme_blob = gWorkspace->GetBlob(quant_scheme_blob_name);
         if (quant_scheme_blob == nullptr) {
           quant_scheme_blob = gWorkspace->CreateBlob(quant_scheme_blob_name);
         }
         auto* quant_scheme_blob_data =
             quant_scheme_blob->GetMutable<unique_ptr<Int8QuantSchemeBlob>>();
         quant_scheme_blob_data->reset(
             new Int8QuantSchemeBlob(quantization_kind, preserve_sparsity));
       },
       pybind11::arg("quant_scheme_blob_name"),
       pybind11::arg("quantization_kind"),
       pybind11::arg("preserve_sparsity"));
   m.def(
       "CreateInt8QuantParamsBlob",
       [](std::string quant_params_blob_name, float scale, int zero_point) {
         Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
         CAFFE_ENFORCE(gWorkspace);
         auto* quant_params_blob = gWorkspace->GetBlob(quant_params_blob_name);
         if (quant_params_blob == nullptr) {
           quant_params_blob = gWorkspace->CreateBlob(quant_params_blob_name);
         }
         auto* quant_params_blob_data =
             quant_params_blob->GetMutable<unique_ptr<Int8QuantParamsBlob>>();
         quant_params_blob_data->reset(
             new Int8QuantParamsBlob(scale, zero_point));
       },
       pybind11::arg("quant_param_blob_name"),
       pybind11::arg("scale"),
       pybind11::arg("zero_point"));
   m.def(
       "ObserveInt8QuantParamsBlob",
       [](std::string quant_params_blob_name) {
         Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
         CAFFE_ENFORCE(gWorkspace);
         auto* quant_params_blob = gWorkspace->GetBlob(quant_params_blob_name);
         CAFFE_ENFORCE(quant_params_blob);
         auto* quant_params_blob_data =
             quant_params_blob->Get<unique_ptr<Int8QuantParamsBlob>>().get();
         return std::tuple<float, int>(
             quant_params_blob_data->qparam.scale,
             quant_params_blob_data->qparam.zero_point);
       },
       pybind11::arg("quant_params_blob_name"));
   m.def(
       "ObserveInt8QuantSchemeBlob",
       [](std::string quant_scheme_blob_name) {
         Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
         CAFFE_ENFORCE(gWorkspace);
         auto* quant_scheme_blob = gWorkspace->GetBlob(quant_scheme_blob_name);
         CAFFE_ENFORCE(quant_scheme_blob);
         auto* quant_scheme_blob_data =
             quant_scheme_blob->Get<unique_ptr<Int8QuantSchemeBlob>>().get();
         return std::tuple<std::string, bool>(
             quant_scheme_blob_data->quantization_kind_,
             quant_scheme_blob_data->preserve_sparsity_);
       },
       pybind11::arg("quant_scheme_blob_name"));
 }
	#include <fbgemm/FbgemmFP16.h>
	#include <fbgemm/Utils.h>
	#include <pybind11/pybind11.h>
	#include <pybind11/stl.h>
	#include "activation_distribution_observer.h"
	#include "caffe2/opt/fakefp16_transform.h"
	#include "caffe2/opt/custom/freeze_quantization_params.h"
	#include "caffe2/quantization/server/caffe2_dnnlowp_utils.h"
	#include "caffe2/quantization/server/fbgemm_pack_blob.h"
	#include "caffe2/quantization/server/int8_gen_quant_params.h"
	#include "quantization_error_minimization.h"

	namespace caffe2 {
	namespace python {
	// defined in caffe2/python/pybind_state.cc
	Workspace* GetCurrentWorkspace();
	} // namespace python
	} // namespace caffe2

	PYBIND11_MODULE(dnnlowp_pybind11, m) {
	using namespace std;
	using namespace caffe2;

	m.def("ClearNetObservers", []() { ClearGlobalNetObservers(); });

	m.def(
	"ObserveMinMaxOfOutput",
	[](const string& min_max_file_name, int dump_freq, string delimiter) {
	AddGlobalNetObserverCreator(
	[dump_freq, min_max_file_name, delimiter](NetBase* net) {
	return make_unique<OutputMinMaxNetObserver>(
	net, min_max_file_name, dump_freq, delimiter);
	});
	},
	pybind11::arg("min_max_file_name"),
	pybind11::arg("dump_freq") = -1,
	pybind11::arg("delimiter") = " ");

	m.def(
	"ObserveHistogramOfOutput",
	[](const string& out_file_name,
	int dump_freq,
	bool mul_nets,
	string op_filter,
	string delimiter) {
	AddGlobalNetObserverCreator(
	[out_file_name, dump_freq, mul_nets, op_filter, delimiter](
	NetBase* net) {
	return make_unique<HistogramNetObserver>(
	net,
	out_file_name,
	2048,
	dump_freq,
	mul_nets,
	op_filter,
	delimiter);
	});
	},
	pybind11::arg("out_file_name"),
	pybind11::arg("dump_freq") = -1,
	pybind11::arg("mul_nets") = false,
	pybind11::arg("op_filter") = "",
	pybind11::arg("delimiter") = " ");

	m.def(
	"DumpHistogramFile",
	[](Observable<NetBase>::Observer* ob) {
	HistogramNetObserver* hist_ob =
	dynamic_cast_if_rtti<HistogramNetObserver*>(ob);
	hist_ob->DumpHistogramFile();
	},
	pybind11::arg("ob"));

	m.def(
	"AddHistogramObserver",
	[](const string& net_name,
	const string& out_file_name,
	int dump_freq,
	bool mul_nets,
	string delimiter) {
	Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
	CAFFE_ENFORCE(gWorkspace);
	CAFFE_ENFORCE(
	gWorkspace->GetNet(net_name), "Can't find net ", net_name);
	pybind11::gil_scoped_release g;

	NetBase* net = gWorkspace->GetNet(net_name);
	const Observable<NetBase>::Observer* observer = nullptr;

	observer = net->AttachObserver(make_unique<HistogramNetObserver>(
	net, out_file_name, 2048, dump_freq, mul_nets, "", delimiter));

	CAFFE_ENFORCE(observer != nullptr);
	return pybind11::cast(observer);
	},
	pybind11::arg("net_name"),
	pybind11::arg("out_file_name"),
	pybind11::arg("dump_freq") = -1,
	pybind11::arg("mul_nets") = false,
	pybind11::arg("delimiter") = " ");

	m.def(
	"DumpOutputColumnMaxHistogramFile",
	[](Observable<NetBase>::Observer* ob) {
	OutputColumnMaxHistogramNetObserver* hist_ob =
	dynamic_cast_if_rtti<OutputColumnMaxHistogramNetObserver*>(ob);
	hist_ob->DumpOutputColumnMaxHistogramFile();
	},
	pybind11::arg("ob"));

	m.def(
	"AddOutputColumnMaxHistogramObserver",
	[](const string& net_name,
	const string& out_file_name,
	const std::vector<std::string>& observe_column_max_for_blobs,
	int dump_freq,
	int bin_nums,
	bool mul_nets,
	string delimiter) {
	Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
	CAFFE_ENFORCE(gWorkspace);
	CAFFE_ENFORCE(
	gWorkspace->GetNet(net_name), "Can't find net ", net_name);
	pybind11::gil_scoped_release g;

	NetBase* net = gWorkspace->GetNet(net_name);
	const Observable<NetBase>::Observer* observer = nullptr;

	observer = net->AttachObserver(
	make_unique<OutputColumnMaxHistogramNetObserver>(
	net,
	out_file_name,
	observe_column_max_for_blobs,
	bin_nums,
	dump_freq,
	mul_nets,
	delimiter));

	CAFFE_ENFORCE(observer != nullptr);
	return pybind11::cast(observer);
	},
	pybind11::arg("net_name"),
	pybind11::arg("out_file_name"),
	pybind11::arg("observe_column_max_for_blobs"),
	pybind11::arg("dump_freq") = -1,
	pybind11::arg("bin_nums") = 16,
	pybind11::arg("mul_nets") = false,
	pybind11::arg("delimiter") = " ");

	m.def(
	"ChooseQuantizationParams",
	[](const std::string& blob_name) {
	Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
	CAFFE_ENFORCE(gWorkspace);
	pybind11::gil_scoped_release g;

	const auto* blob = gWorkspace->GetBlob(blob_name);
	if (blob == nullptr) {
	LOG(WARNING) << "Can't find blob " << blob_name;
	} else if (!BlobIsTensorType(*blob, CPU)) {
	LOG(WARNING) << "Blob " << blob_name << " is not a tensor";
	} else {
	const auto& tensor = blob->template Get<Tensor>();
	if (tensor.IsType<float>()) {
	dnnlowp::QuantizationFactory* qfactory =
	dnnlowp::QuantizationFactory::GetDefaultInstance();
	dnnlowp::TensorQuantizationParams qparams =
	qfactory->ChooseQuantizationParams(
	tensor.data<float>(), tensor.size(), true /weight/);
	return std::tuple<float, int>(qparams.scale, qparams.zero_point);
	} else {
	LOG(WARNING) << "Blob " << blob_name << " is not a float tensor";
	}
	}
	return std::tuple<float, int>(1.0, 0);
	},
	pybind11::arg("blob_name"));

	m.def(
	"RegisterQuantizationParams",
	[](const string& min_max_file_name,
	bool is_weight,
	const string& qparams_output_file_name) {
	AddGlobalNetObserverCreator([min_max_file_name,
	is_weight,
	qparams_output_file_name](NetBase* net) {
	return make_unique<RegisterQuantizationParamsNetObserver>(
	net, min_max_file_name, is_weight, qparams_output_file_name);
	});
	},
	pybind11::arg("min_max_file_name"),
	pybind11::arg("is_weight") = false,
	pybind11::arg("qparams_output_file_name") = "");

	m.def(
	"RegisterQuantizationParamsWithHistogram",
	[](const string& histogram_file_name,
	bool is_weight,
	const string& qparams_output_file_name) {
	AddGlobalNetObserverCreator([histogram_file_name,
	is_weight,
	qparams_output_file_name](NetBase* net) {
	return make_unique<
	RegisterQuantizationParamsWithHistogramNetObserver>(
	net, histogram_file_name, is_weight, qparams_output_file_name);
	});
	},
	pybind11::arg("histogram_file_name"),
	pybind11::arg("is_weight") = false,
	pybind11::arg("qparams_output_file_name") = "");

	m.def(
	"AddRegisterQuantizationParamsWithHistogramObserver",
	[](const string& net_name,
	const string& histogram_file_name,
	int is_weight,
	const string& qparams_output_file_name) {
	Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
	CAFFE_ENFORCE(gWorkspace);
	CAFFE_ENFORCE(
	gWorkspace->GetNet(net_name), "Can't find net ", net_name);
	pybind11::gil_scoped_release g;

	NetBase* net = gWorkspace->GetNet(net_name);
	const Observable<NetBase>::Observer* observer = nullptr;

	observer = net->AttachObserver(
	make_unique<RegisterQuantizationParamsWithHistogramNetObserver>(
	net, histogram_file_name, is_weight, qparams_output_file_name));

	CAFFE_ENFORCE(observer != nullptr);
	return pybind11::cast(observer);
	},
	pybind11::arg("net_name"),
	pybind11::arg("histogram_file_name"),
	pybind11::arg("is_weight") = false,
	pybind11::arg("qparams_output_file_name") = "");

	m.def(
	"AddScaleZeroOffsetArgumentsWithHistogram",
	[](const pybind11::bytes& net_def_bytes,
	const string& histogram_file_name) {
	NetDef def;
	CAFFE_ENFORCE(
	ParseProtoFromLargeString(net_def_bytes.cast<string>(), &def));
	pybind11::gil_scoped_release g;

	string protob;
	auto transformed_net =
	dnnlowp::AddScaleZeroOffsetArgumentsWithHistogram(
	def, histogram_file_name);

	CAFFE_ENFORCE(transformed_net.SerializeToString(&protob));
	return pybind11::bytes(protob);
	});

	pybind11::class_<dnnlowp::TensorQuantizationParams>(m, "QueryTensorQparam")
	.def(pybind11::init<float, std::int32_t, int>())
	.def_property_readonly(
	"scale",
	[](dnnlowp::TensorQuantizationParams& qparam) {
	return qparam.scale;
	})
	.def_property_readonly(
	"zero_point",
	[](dnnlowp::TensorQuantizationParams& qparam) {
	return qparam.zero_point;
	})
	.def_property_readonly(
	"precision",
	[](dnnlowp::TensorQuantizationParams& qparam) {
	return qparam.precision;
	})
	.def_property_readonly(
	"min",
	[](dnnlowp::TensorQuantizationParams& qparam) {
	return qparam.Min();
	})
	.def_property_readonly(
	"max", [](dnnlowp::TensorQuantizationParams& qparam) {
	return qparam.Max();
	});
	m.def("get_fakefp16_mapping", [](bool use_fp16_acc, bool use_nnpi) {
	return caffe2::opt::getFakeFp16OpMapping(use_fp16_acc, use_nnpi);
	});
	m.def("freeze_quantization_params",
	[](const pybind11::bytes& net_def_bytes){
	NetDef def;
	CAFFE_ENFORCE(
	ParseProtoFromLargeString(net_def_bytes.cast<string>(), &def));
	string protob;
	Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
	CAFFE_ENFORCE(gWorkspace);
	freezeQuantizationParams(&def, gWorkspace);
	CAFFE_ENFORCE(def.SerializeToString(&protob));
	return pybind11::bytes(protob);
	});
	m.def(
	"ChooseStaticQuantizationParams",
	[](float min,
	float max,
	const std::vector<uint64_t>& bins,
	bool preserve_sparsity,
	int precision,
	const std::string& quant_scheme,
	float p99_threshold,
	bool is_weight) {
	dnnlowp::Histogram hist = dnnlowp::Histogram(min, max, bins);

	dnnlowp::QuantizationFactory::QuantizationKind quant_kind =
	dnnlowp::QuantizationFactory::MIN_MAX_QUANTIZATION;
	if (quant_scheme.compare("L2_MIN_QUANTIZATION") == 0) {
	quant_kind = dnnlowp::QuantizationFactory::L2_MIN_QUANTIZATION;
	} else if (quant_scheme.compare("L2_MIN_QUANTIZATION_APPROX") == 0) {
	quant_kind = dnnlowp::QuantizationFactory::L2_MIN_QUANTIZATION_APPROX;
	} else if (quant_scheme.compare("KL_MIN_QUANTIZATION") == 0) {
	quant_kind = dnnlowp::QuantizationFactory::KL_MIN_QUANTIZATION;
	} else if (quant_scheme.compare("P99_QUANTIZATION") == 0) {
	quant_kind = dnnlowp::QuantizationFactory::P99_QUANTIZATION;
	} else if (quant_scheme.compare("L1_MIN_QUANTIZATION") == 0) {
	quant_kind = dnnlowp::QuantizationFactory::L1_MIN_QUANTIZATION;
	} else {
	LOG(INFO) << "Using DNNLOWP default MIN_MAX_QUANTIZATION";
	}
	dnnlowp::QuantizationFactory* qfactory =
	dnnlowp::QuantizationFactory::GetDefaultInstance();
	if (is_weight) {
	qfactory->SetWeightP99Threshold(p99_threshold);
	} else {
	qfactory->SetActivationP99Threshold(p99_threshold);
	}
	return qfactory->ChooseQuantizationParams(
	hist, quant_kind, precision, preserve_sparsity, is_weight);
	},
	pybind11::arg("min"),
	pybind11::arg("max"),
	pybind11::arg("bins"),
	pybind11::arg("preserve_sparsity") = true,
	pybind11::arg("precision") = 8,
	pybind11::arg("quant_scheme") = "min_max",
	pybind11::arg("p99_threshold") = 0.99,
	pybind11::arg("is_weight") = false);
	m.def(
	"ObserveFp16FCPackedWeights",
	[](const string& blob_name, const string& weights_out_file) {
	Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
	CAFFE_ENFORCE(gWorkspace);
	const auto* blob = gWorkspace->GetBlob(blob_name);
	CAFFE_ENFORCE(blob, "Can't find blob ", blob_name);
	fbgemm::PackedGemmMatrixFP16* packedGemmMatrixPtr =
	blob->template Get<unique_ptr<fbgemm::PackedGemmMatrixFP16>>()
	.get();
	uint64_t nrow = packedGemmMatrixPtr->numRows();
	uint64_t ncol = packedGemmMatrixPtr->numCols();
	uint64_t size = nrow * ncol;
	fbgemm::float16* unpacked_mat_ptr = nullptr;
	vector<fbgemm::float16> unpacked_mat;

	if (!packedGemmMatrixPtr->packed()) {
	unpacked_mat_ptr = packedGemmMatrixPtr->pmat();
	} else {
	unpacked_mat.resize(size);
	packedGemmMatrixPtr->unpack(
	unpacked_mat.data(), fbgemm::matrix_op_t::Transpose);
	unpacked_mat_ptr = unpacked_mat.data();
	}
	ofstream fout;
	fout.open(weights_out_file);
	if (!fout) {
	LOG(WARNING) << "Can't open output file to dump fp16 weights "
	<< weights_out_file;
	return;
	}
	for (int i = 0; i < nrow; ++i) {
	for (int j = 0; j < ncol; ++j) {
	if (j > 0) {
	fout << " ";
	}
	fout << fbgemm::cpu_half2float(unpacked_mat_ptr[i + nrow * j]);
	}
	fout << endl;
	}
	LOG(INFO) << "Written unpacked blob " << blob_name << " to "
	<< weights_out_file;
	},
	pybind11::arg("blob_name"),
	pybind11::arg("weights_out_file"));
	m.def(
	"ObserveInt8FCPackedWeights",
	[](const string& blob_name, const string& weights_out_file) {
	Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
	CAFFE_ENFORCE(gWorkspace);
	const auto* blob = gWorkspace->GetBlob(blob_name);
	if (blob == nullptr) {
	LOG(WARNING) << "Can't find blob " << blob_name;
	return;
	}
	const Int8FCDNNLowPPackedWeightBlob& packedInt8Blob =
	blob->template Get<Int8FCDNNLowPPackedWeightBlob>();
	auto& qparams = packedInt8Blob.qparams;
	auto& unpacked_tensor = packedInt8Blob.original_tensor;
	auto& packed_tensor = packedInt8Blob.W;

	auto shape = unpacked_tensor.sizes();
	CAFFE_ENFORCE(shape.size() == 2);
	vector<int8_t> unpacked_int8_data;
	unpacked_int8_data.resize(shape[0] * shape[1]);
	packed_tensor->unpack(unpacked_int8_data.data());

	ofstream fout;
	fout.open(weights_out_file);
	if (!fout) {
	LOG(WARNING) << "Can't open output file to dump int8 weights "
	<< weights_out_file;
	return;
	}
	for (int i = 0; i < qparams.size(); ++i) {
	if (i > 0) {
	fout << " ";
	}
	fout << to_string(qparams[i].scale) << " "
	<< to_string(qparams[i].zero_point);
	}
	fout << endl;
	for (int i = 0; i < shape[0]; ++i) {
	for (int j = 0; j < shape[1]; ++j) {
	if (j > 0) {
	fout << " ";
	}
	fout << to_string(unpacked_int8_data.data()[i * shape[1] + j]);
	}
	fout << endl;
	}
	LOG(INFO) << "Written int8 qparams and weights for " << blob_name
	<< " to " << weights_out_file;
	},
	pybind11::arg("blob_name"),
	pybind11::arg("weights_out_file"));
	m.def(
	"CreateInt8QuantSchemeBlob",
	[](std::string quant_scheme_blob_name,
	std::string quantization_kind,
	bool preserve_sparsity) {
	Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
	CAFFE_ENFORCE(gWorkspace);
	auto* quant_scheme_blob = gWorkspace->GetBlob(quant_scheme_blob_name);
	if (quant_scheme_blob == nullptr) {
	quant_scheme_blob = gWorkspace->CreateBlob(quant_scheme_blob_name);
	}
	auto* quant_scheme_blob_data =
	quant_scheme_blob->GetMutable<unique_ptr<Int8QuantSchemeBlob>>();
	quant_scheme_blob_data->reset(
	new Int8QuantSchemeBlob(quantization_kind, preserve_sparsity));
	},
	pybind11::arg("quant_scheme_blob_name"),
	pybind11::arg("quantization_kind"),
	pybind11::arg("preserve_sparsity"));
	m.def(
	"CreateInt8QuantParamsBlob",
	[](std::string quant_params_blob_name, float scale, int zero_point) {
	Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
	CAFFE_ENFORCE(gWorkspace);
	auto* quant_params_blob = gWorkspace->GetBlob(quant_params_blob_name);
	if (quant_params_blob == nullptr) {
	quant_params_blob = gWorkspace->CreateBlob(quant_params_blob_name);
	}
	auto* quant_params_blob_data =
	quant_params_blob->GetMutable<unique_ptr<Int8QuantParamsBlob>>();
	quant_params_blob_data->reset(
	new Int8QuantParamsBlob(scale, zero_point));
	},
	pybind11::arg("quant_param_blob_name"),
	pybind11::arg("scale"),
	pybind11::arg("zero_point"));
	m.def(
	"ObserveInt8QuantParamsBlob",
	[](std::string quant_params_blob_name) {
	Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
	CAFFE_ENFORCE(gWorkspace);
	auto* quant_params_blob = gWorkspace->GetBlob(quant_params_blob_name);
	CAFFE_ENFORCE(quant_params_blob);
	auto* quant_params_blob_data =
	quant_params_blob->Get<unique_ptr<Int8QuantParamsBlob>>().get();
	return std::tuple<float, int>(
	quant_params_blob_data->qparam.scale,
	quant_params_blob_data->qparam.zero_point);
	},
	pybind11::arg("quant_params_blob_name"));
	m.def(
	"ObserveInt8QuantSchemeBlob",
	[](std::string quant_scheme_blob_name) {
	Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
	CAFFE_ENFORCE(gWorkspace);
	auto* quant_scheme_blob = gWorkspace->GetBlob(quant_scheme_blob_name);
	CAFFE_ENFORCE(quant_scheme_blob);
	auto* quant_scheme_blob_data =
	quant_scheme_blob->Get<unique_ptr<Int8QuantSchemeBlob>>().get();
	return std::tuple<std::string, bool>(
	quant_scheme_blob_data->quantization_kind_,
	quant_scheme_blob_data->preserve_sparsity_);
	},
	pybind11::arg("quant_scheme_blob_name"));
	}