c10/util/generic_math.h - platform/external/pytorch - Git at Google

 #pragma once

 #include <c10/macros/Macros.h>
 #include <c10/util/TypeSafeSignMath.h>
 #include <cmath>

 #if defined(__CUDA_ARCH__)
 #include <c10/cuda/CUDAMathCompat.h>
 #define C10_COMPAT_COPYSIGN c10::cuda::compat::copysign
 #elif defined(__HIPCC__)
 #include <c10/hip/HIPMathCompat.h>
 #define C10_COMPAT_COPYSIGN c10::hip::compat::copysign
 #else
 #include <c10/util/copysign.h>
 #define C10_COMPAT_COPYSIGN c10::copysign
 #endif

 // The functions in this file should be header-only as it is used under
 // ABI-compatibility mode.

 namespace c10 {

 // NOTE: [Floor Division in Python]
 // Python's __floordiv__ operator is more complicated than just floor(a / b).
 // It aims to maintain the property: a == (a // b) * b + remainder(a, b)
 // which can otherwise fail due to rounding errors in the remainder.
 // So, instead it is calculated as: a // b = (a - remainder(a, b)) / b
 // With some additional fix-ups added to the result.
 //
 // For reference, see CPython's implementation:
 // https://github.com/python/cpython/blob/ace008c531dd685a30c1dd68f9b5ba35f20171cf/Objects/floatobject.c#L636

 template <typename scalar_t>
 inline C10_HOST_DEVICE scalar_t div_floor_floating(scalar_t a, scalar_t b)
     __ubsan_ignore_float_divide_by_zero__ {
   if (C10_UNLIKELY(b == 0)) {
     // Divide by zero: return standard IEEE result
     return a / b;
   }

   auto mod = std::fmod(a, b);
   auto div = (a - mod) / b;
   if ((mod != 0) && (b < 0) != (mod < 0)) {
     div -= scalar_t(1);
   }

   scalar_t floordiv;
   if (div != 0) {
     floordiv = std::floor(div);
     if (div - floordiv > scalar_t(0.5)) {
       floordiv += scalar_t(1.0);
     }
   } else {
     floordiv = C10_COMPAT_COPYSIGN(scalar_t(0), a / b);
   }
   return floordiv;
 }

 template <typename scalar_t>
 inline C10_HOST_DEVICE scalar_t div_floor_integer(scalar_t a, scalar_t b) {
   if (c10::signs_differ(a, b)) {
     // Subtracts one from the results of truncation division if the
     // divisor and dividend have different sign(bit)s and the remainder of
     // the division is nonzero
     const auto quot = a / b;
     const auto rem = a % b;
     return rem ? quot - 1 : quot;
   }
   return a / b;
 }

 } // namespace c10
	#pragma once

	#include <c10/macros/Macros.h>
	#include <c10/util/TypeSafeSignMath.h>
	#include <cmath>

	#if defined(__CUDA_ARCH__)
	#include <c10/cuda/CUDAMathCompat.h>
	#define C10_COMPAT_COPYSIGN c10::cuda::compat::copysign
	#elif defined(__HIPCC__)
	#include <c10/hip/HIPMathCompat.h>
	#define C10_COMPAT_COPYSIGN c10::hip::compat::copysign
	#else
	#include <c10/util/copysign.h>
	#define C10_COMPAT_COPYSIGN c10::copysign
	#endif

	// The functions in this file should be header-only as it is used under
	// ABI-compatibility mode.

	namespace c10 {

	// NOTE: [Floor Division in Python]
	// Python's __floordiv__ operator is more complicated than just floor(a / b).
	// It aims to maintain the property: a == (a // b) * b + remainder(a, b)
	// which can otherwise fail due to rounding errors in the remainder.
	// So, instead it is calculated as: a // b = (a - remainder(a, b)) / b
	// With some additional fix-ups added to the result.
	//
	// For reference, see CPython's implementation:
	// https://github.com/python/cpython/blob/ace008c531dd685a30c1dd68f9b5ba35f20171cf/Objects/floatobject.c#L636

	template <typename scalar_t>
	inline C10_HOST_DEVICE scalar_t div_floor_floating(scalar_t a, scalar_t b)
	__ubsan_ignore_float_divide_by_zero__ {
	if (C10_UNLIKELY(b == 0)) {
	// Divide by zero: return standard IEEE result
	return a / b;
	}

	auto mod = std::fmod(a, b);
	auto div = (a - mod) / b;
	if ((mod != 0) && (b < 0) != (mod < 0)) {
	div -= scalar_t(1);
	}

	scalar_t floordiv;
	if (div != 0) {
	floordiv = std::floor(div);
	if (div - floordiv > scalar_t(0.5)) {
	floordiv += scalar_t(1.0);
	}
	} else {
	floordiv = C10_COMPAT_COPYSIGN(scalar_t(0), a / b);
	}
	return floordiv;
	}

	template <typename scalar_t>
	inline C10_HOST_DEVICE scalar_t div_floor_integer(scalar_t a, scalar_t b) {
	if (c10::signs_differ(a, b)) {
	// Subtracts one from the results of truncation division if the
	// divisor and dividend have different sign(bit)s and the remainder of
	// the division is nonzero
	const auto quot = a / b;
	const auto rem = a % b;
	return rem ? quot - 1 : quot;
	}
	return a / b;
	}

	} // namespace c10