| #include "caffe2/core/logging.h" |
| #include "caffe2/utils/cpuid.h" |
| #include "l2_minimization.h" |
| |
| #include <cassert> |
| #include <cmath> |
| #include <limits> |
| |
| #include <immintrin.h> |
| |
| #include <c10/util/irange.h> |
| |
| using namespace std; |
| |
| namespace dnnlowp { |
| |
| #undef NDEBUG |
| |
| // Use fp16_min as the small scale cutoff because we don't want to use scales in fp16 subnormal range. |
| // This is to be consistent with Glow and FakeLowP implementation for NNPI. |
| constexpr float SMALL_SCALE_THRESHOLD = 6.1e-5f; |
| |
| static float |
| GetNorm(float begin, float end, float density, NormMinimization::Kind kind) { |
| float norm = 0; |
| |
| // assume values are uniformly distributed within each histogram bin |
| if (NormMinimization::L2 == kind) { |
| // err = density * (integral_{begin, end} x^2) |
| // = density * (end^3 - begin^3) / 3 |
| norm = (end * end * end - begin * begin * begin) / 3; |
| // for begin = d/2 and end = -d/2, this leads to d^3/12 |
| } else { |
| // err = density * (integral_{begin, end} |x|) |
| // = density * (end^2 - begin^2) / 2 |
| float left_begin = std::min(0.0f, begin); |
| float left_end = std::min(0.0f, end); |
| assert(left_begin * left_begin >= left_end * left_end); |
| norm += (left_begin * left_begin - left_end * left_end) / 2; |
| |
| float right_begin = std::max(0.0f, begin); |
| float right_end = std::max(0.0f, end); |
| assert(right_end * right_end >= right_begin * right_begin); |
| norm += (right_end * right_end - right_begin * right_begin) / 2; |
| } |
| |
| return density * norm; |
| } |
| |
| // Filter out outliers in input distributions |
| // Exploit the input distributions for the quick search |
| TensorQuantizationParams NormMinimization::NonlinearQuantizationParamsSearch( |
| const Histogram& hist, |
| bool preserve_sparsity, |
| int precision) { |
| if (preserve_sparsity) { |
| VLOG(2) << "l2_approx with symmetric quantization falls back to L2"; |
| return ChooseQuantizationParams(hist, preserve_sparsity, precision); |
| } |
| VLOG(2) << "Using the nonlinear quantile search"; |
| |
| // NOLINTNEXTLINE(cppcoreguidelines-init-variables) |
| float min, max; |
| vector<float> bins_f(dnnlowp::adjust_hist_to_include_zero(hist, &min, &max)); |
| int nbins = bins_f.size(); |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| float bin_width = (max - min) / nbins; |
| float scale = (max - min) / float((1 << precision) - 1); |
| if (bin_width == 0 || scale < SMALL_SCALE_THRESHOLD) { |
| QuantizationFactory* qfactory = QuantizationFactory::GetDefaultInstance(); |
| return qfactory->ChooseQuantizationParams( |
| min, max, precision, preserve_sparsity); |
| } |
| int dst_nbins = 1 << precision; |
| |
| float org_max = max; |
| float org_min = min; |
| // calculate the CDF |
| uint64_t total = 0; |
| for (uint64_t x : bins_f) { |
| total += x; |
| } |
| vector<uint64_t> CDF; |
| uint64_t sum = 0; |
| for (uint64_t x : bins_f) { |
| sum += x; |
| CDF.push_back(sum); |
| } |
| |
| double stepsize = 0.00001; // experiment on the granularity |
| double alpha = 0.0f, beta = 1.0f; // lowerbound and upperbound |
| int start_bin = 0; |
| int end_bin = nbins - 1; |
| double norm_min = numeric_limits<double>::max(); |
| |
| while (alpha < beta) { |
| // find the next step |
| double next_alpha = alpha + stepsize; |
| double next_beta = beta - stepsize; |
| |
| // find the left and right bins between the quantile bounds |
| int i = start_bin, j = end_bin; |
| while (i < end_bin && CDF[i] < next_alpha * total) |
| i++; |
| while (j > start_bin && CDF[j] > next_beta * total) |
| j--; |
| |
| // decide the next move |
| // cout << i << ", " << j << endl; |
| int next_start_bin = start_bin, next_end_bin = end_bin; |
| if ((i - start_bin) > (end_bin - j)) { |
| // move the start_bin |
| next_start_bin = i; |
| alpha = next_alpha; |
| } else { |
| // move the end_bin |
| next_end_bin = j; |
| beta = next_beta; |
| } |
| |
| if (next_start_bin == start_bin && next_end_bin == end_bin) |
| continue; |
| // calculate the norm |
| double norm = 0; |
| double dst_bin_width = |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| bin_width * (next_end_bin - next_start_bin + 1) / dst_nbins; |
| |
| // go over each histogram bin and accumulate errors |
| for (int src_bin = 0; src_bin < nbins; ++src_bin) { |
| // distances from the beginning of first dst_bin to the beginning and |
| // end of src_bin |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| double src_bin_begin = (src_bin - next_start_bin) * bin_width; |
| double src_bin_end = src_bin_begin + bin_width; |
| |
| // which dst_bins the beginning and end of src_bin belong to? |
| int dst_bin_of_begin = std::min( |
| (1 << precision) - 1., |
| std::max(0., floor(src_bin_begin / dst_bin_width))); |
| int dst_bin_of_end = std::min( |
| (1 << precision) - 1., |
| std::max(0., floor(src_bin_end / dst_bin_width))); |
| |
| double dst_bin_of_begin_center = |
| dst_bin_of_begin * dst_bin_width + dst_bin_width / 2; |
| double density = bins_f[src_bin] / bin_width; |
| if (dst_bin_of_begin == dst_bin_of_end) { |
| // if src_bin is entirely within 1 dst_bin |
| double delta_begin = src_bin_begin - dst_bin_of_begin_center; |
| double delta_end = src_bin_end - dst_bin_of_begin_center; |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| norm += GetNorm(delta_begin, delta_end, density, kind_); |
| } else { |
| double delta_begin = src_bin_begin - dst_bin_of_begin_center; |
| double delta_end = dst_bin_width / 2; |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| norm += GetNorm(delta_begin, delta_end, density, kind_); |
| |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| norm += (dst_bin_of_end - dst_bin_of_begin - 1) * |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| GetNorm(-dst_bin_width / 2, dst_bin_width / 2, density, kind_); |
| |
| double dst_bin_of_end_center = |
| dst_bin_of_end * dst_bin_width + dst_bin_width / 2; |
| delta_begin = -dst_bin_width / 2; |
| delta_end = src_bin_end - dst_bin_of_end_center; |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| norm += GetNorm(delta_begin, delta_end, density, kind_); |
| } |
| } |
| if (norm > norm_min) |
| break; |
| norm_min = norm; |
| start_bin = next_start_bin; |
| end_bin = next_end_bin; |
| } |
| VLOG(2) << "best quantization range " << start_bin << "," << end_bin + 1 |
| << "," << norm_min; |
| |
| double selected_sum = 0; |
| for (int i = start_bin; i < end_bin + 1; ++i) { |
| selected_sum += bins_f[i]; |
| } |
| VLOG(2) << "best quantization range covers " |
| << (double)selected_sum / total * 100 << " %%"; |
| |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| max = min + bin_width * (end_bin + 1); |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| min = min + bin_width * start_bin; |
| |
| VLOG(2) << "Org min " << org_min << " org max " << org_max << " found min " |
| << min << " max " << max << " with minimal norm " << norm_min; |
| QuantizationFactory* qfactory = QuantizationFactory::GetDefaultInstance(); |
| return qfactory->ChooseQuantizationParams( |
| min, max, precision, preserve_sparsity); |
| } |
| |
| TensorQuantizationParams NormMinimization::ChooseQuantizationParams( |
| const Histogram& hist, |
| bool preserve_sparsity, |
| int precision) { |
| VLOG(2) << "Using the brute force search"; |
| // NOLINTNEXTLINE(cppcoreguidelines-init-variables) |
| float min, max; |
| vector<float> bins_f(dnnlowp::adjust_hist_to_include_zero(hist, &min, &max)); |
| int nbins = bins_f.size(); |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| float bin_width = (max - min) / nbins; |
| |
| float scale = (max - min) / float((1 << precision) - 1); |
| if (bin_width == 0 || scale < SMALL_SCALE_THRESHOLD) { |
| QuantizationFactory* qfactory = QuantizationFactory::GetDefaultInstance(); |
| return qfactory->ChooseQuantizationParams( |
| min, max, precision, preserve_sparsity); |
| } |
| int dst_nbins = 1 << precision; |
| |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| int zero_bin = round(-min / bin_width); |
| |
| vector<pair<int, float>> best_start_bins(nbins + 1); |
| |
| // Look at mapping [start_bin, start_bin + nbins_selected) to |
| // [0, 1 << precision) for every (start_bin, nbins_selected) combination and |
| // pick the one with smallest L2 quantization error |
| #ifdef _OPENMP |
| #pragma omp parallel for schedule(dynamic) |
| #endif |
| for (int nbins_selected = 1; nbins_selected <= nbins; ++nbins_selected) { |
| float norm_min = numeric_limits<float>::max(); |
| int best_start_bin = 0; |
| |
| int start_bin_begin = 0, start_bin_end = nbins - nbins_selected + 1; |
| if (preserve_sparsity) { |
| // when preserving sparsity we only check the range |
| // starting from 0 (when min is 0) or symmetric around 0. |
| if (min == 0) { |
| start_bin_begin = 0; |
| start_bin_end = 1; |
| } else { |
| start_bin_begin = zero_bin - nbins_selected / 2; |
| start_bin_end = start_bin_begin + 1; |
| } |
| } |
| |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| float dst_bin_width = bin_width * nbins_selected / dst_nbins; |
| |
| // NOLINTNEXTLINE(cppcoreguidelines-init-variables) |
| int start_bin; |
| for (start_bin = start_bin_begin; start_bin < start_bin_end; ++start_bin) { |
| float norm = 0; |
| |
| // go over each histogram bin and accumulate errors |
| caffe2::CpuId cpuid = caffe2::GetCpuId(); |
| if (kind_ == NormMinimization::L2 && cpuid.avx2() && cpuid.fma()) { |
| norm = internal::L2MinimizationKernelAVX2( |
| precision, |
| bins_f.data(), |
| nbins, |
| bin_width, |
| dst_bin_width, |
| start_bin); |
| } else { |
| for (int src_bin = 0; src_bin < nbins; ++src_bin) { |
| // distances from the beginning of first dst_bin to the beginning and |
| // end of src_bin |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| float src_bin_begin = (src_bin - start_bin) * bin_width; |
| float src_bin_end = src_bin_begin + bin_width; |
| |
| // which dst_bins the beginning and end of src_bin belong to? |
| int dst_bin_of_begin = std::min( |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| (1 << precision) - 1.0f, |
| std::max(0.0f, floorf(src_bin_begin / dst_bin_width))); |
| int dst_bin_of_end = std::min( |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| (1 << precision) - 1.0f, |
| std::max(0.0f, floorf(src_bin_end / dst_bin_width))); |
| |
| float dst_bin_of_begin_center = |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| dst_bin_of_begin * dst_bin_width + dst_bin_width / 2; |
| float density = bins_f[src_bin] / bin_width; |
| float delta_begin = src_bin_begin - dst_bin_of_begin_center; |
| if (dst_bin_of_begin == dst_bin_of_end) { |
| // if src_bin is entirely within 1 dst_bin |
| float delta_end = src_bin_end - dst_bin_of_begin_center; |
| norm += GetNorm(delta_begin, delta_end, density, kind_); |
| } else { |
| float delta_end = dst_bin_width / 2; |
| norm += GetNorm(delta_begin, delta_end, density, kind_); |
| |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| norm += (dst_bin_of_end - dst_bin_of_begin - 1) * |
| GetNorm(-dst_bin_width / 2, dst_bin_width / 2, density, kind_); |
| |
| float dst_bin_of_end_center = |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| dst_bin_of_end * dst_bin_width + dst_bin_width / 2; |
| delta_begin = -dst_bin_width / 2; |
| delta_end = src_bin_end - dst_bin_of_end_center; |
| norm += GetNorm(delta_begin, delta_end, density, kind_); |
| } |
| } |
| } |
| |
| if (norm < norm_min) { |
| norm_min = norm; |
| best_start_bin = start_bin; |
| } |
| } // for each start_bin |
| |
| best_start_bins[nbins_selected] = {best_start_bin, norm_min}; |
| } // for each nbins_selected |
| |
| float norm_min = numeric_limits<float>::max(); |
| int best_nbins_selected = 1, best_start_bin = 0; |
| for (int nbins_selected = 1; nbins_selected <= nbins; ++nbins_selected) { |
| float norm = best_start_bins[nbins_selected].second; |
| if (norm < norm_min) { |
| norm_min = norm; |
| best_start_bin = best_start_bins[nbins_selected].first; |
| best_nbins_selected = nbins_selected; |
| } |
| } |
| |
| float total_sum = 0; |
| for (const auto i : c10::irange(bins_f.size())) { |
| total_sum += bins_f[i]; |
| } |
| float selected_sum = 0; |
| int i_begin = std::max(0, best_start_bin); |
| int i_end = std::min(nbins, best_start_bin + best_nbins_selected); |
| for (int i = i_begin; i < i_end; ++i) { |
| selected_sum += bins_f[i]; |
| } |
| VLOG(2) << "best quantization range covers " << selected_sum / total_sum * 100 |
| << " %%"; |
| |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| max = min + bin_width * (best_start_bin + best_nbins_selected); |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| min = min + bin_width * (best_start_bin); |
| |
| QuantizationFactory* qfactory = QuantizationFactory::GetDefaultInstance(); |
| return qfactory->ChooseQuantizationParams( |
| min, max, precision, preserve_sparsity); |
| } // ChooseQuantizationParams |
| |
| } // namespace dnnlowp |