third-party/npy-halffloat.h - platform/external/FP16 - Git at Google

 /*
  * This implementation is extracted from numpy:
  *   Repo: github.com/numpy/numpy
  *   File: numpy/core/src/npymath/halffloat.c
  *   Commit ID: 25c23f1d956104a072a95355ffaa7a38b53710b7
  * Functions are made "static inline" for performance, and
  * non-conversion functions are removed, and generation of
  * exceptions is disabled.
  */

 #include <cstdint>
 typedef uint16_t npy_uint16;
 typedef uint32_t npy_uint32;
 typedef uint64_t npy_uint64;

 /*
  * This chooses between 'ties to even' and 'ties away from zero'.
  */
 #define NPY_HALF_ROUND_TIES_TO_EVEN 1
 /*
  * If these are 1, the conversions try to trigger underflow,
  * overflow, and invalid exceptions in the FP system when needed.
  */
 #define NPY_HALF_GENERATE_OVERFLOW 0
 #define NPY_HALF_GENERATE_UNDERFLOW 0
 #define NPY_HALF_GENERATE_INVALID 0

 /*
  ********************************************************************
  *                     BIT-LEVEL CONVERSIONS                        *
  ********************************************************************
  */

 static inline npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f)
 {
     npy_uint32 f_exp, f_sig;
     npy_uint16 h_sgn, h_exp, h_sig;

     h_sgn = (npy_uint16) ((f&0x80000000u) >> 16);
     f_exp = (f&0x7f800000u);

     /* Exponent overflow/NaN converts to signed inf/NaN */
     if (f_exp >= 0x47800000u) {
         if (f_exp == 0x7f800000u) {
             /* Inf or NaN */
             f_sig = (f&0x007fffffu);
             if (f_sig != 0) {
                 /* NaN - propagate the flag in the significand... */
                 npy_uint16 ret = (npy_uint16) (0x7c00u + (f_sig >> 13));
                 /* ...but make sure it stays a NaN */
                 if (ret == 0x7c00u) {
                     ret++;
                 }
                 return h_sgn + ret;
             } else {
                 /* signed inf */
                 return (npy_uint16) (h_sgn + 0x7c00u);
             }
         } else {
             /* overflow to signed inf */
 #if NPY_HALF_GENERATE_OVERFLOW
             npy_set_floatstatus_overflow();
 #endif
             return (npy_uint16) (h_sgn + 0x7c00u);
         }
     }

     /* Exponent underflow converts to a subnormal half or signed zero */
     if (f_exp <= 0x38000000u) {
         /*
          * Signed zeros, subnormal floats, and floats with small
          * exponents all convert to signed zero halfs.
          */
         if (f_exp < 0x33000000u) {
 #if NPY_HALF_GENERATE_UNDERFLOW
             /* If f != 0, it underflowed to 0 */
             if ((f&0x7fffffff) != 0) {
                 npy_set_floatstatus_underflow();
             }
 #endif
             return h_sgn;
         }
         /* Make the subnormal significand */
         f_exp >>= 23;
         f_sig = (0x00800000u + (f&0x007fffffu));
 #if NPY_HALF_GENERATE_UNDERFLOW
         /* If it's not exactly represented, it underflowed */
         if ((f_sig&(((npy_uint32)1 << (126 - f_exp)) - 1)) != 0) {
             npy_set_floatstatus_underflow();
         }
 #endif
         f_sig >>= (113 - f_exp);
         /* Handle rounding by adding 1 to the bit beyond half precision */
 #if NPY_HALF_ROUND_TIES_TO_EVEN
         /*
          * If the last bit in the half significand is 0 (already even), and
          * the remaining bit pattern is 1000...0, then we do not add one
          * to the bit after the half significand.  In all other cases, we do.
          */
         if ((f_sig&0x00003fffu) != 0x00001000u) {
             f_sig += 0x00001000u;
         }
 #else
         f_sig += 0x00001000u;
 #endif
         h_sig = (npy_uint16) (f_sig >> 13);
         /*
          * If the rounding causes a bit to spill into h_exp, it will
          * increment h_exp from zero to one and h_sig will be zero.
          * This is the correct result.
          */
         return (npy_uint16) (h_sgn + h_sig);
     }

     /* Regular case with no overflow or underflow */
     h_exp = (npy_uint16) ((f_exp - 0x38000000u) >> 13);
     /* Handle rounding by adding 1 to the bit beyond half precision */
     f_sig = (f&0x007fffffu);
 #if NPY_HALF_ROUND_TIES_TO_EVEN
     /*
      * If the last bit in the half significand is 0 (already even), and
      * the remaining bit pattern is 1000...0, then we do not add one
      * to the bit after the half significand.  In all other cases, we do.
      */
     if ((f_sig&0x00003fffu) != 0x00001000u) {
         f_sig += 0x00001000u;
     }
 #else
     f_sig += 0x00001000u;
 #endif
     h_sig = (npy_uint16) (f_sig >> 13);
     /*
      * If the rounding causes a bit to spill into h_exp, it will
      * increment h_exp by one and h_sig will be zero.  This is the
      * correct result.  h_exp may increment to 15, at greatest, in
      * which case the result overflows to a signed inf.
      */
 #if NPY_HALF_GENERATE_OVERFLOW
     h_sig += h_exp;
     if (h_sig == 0x7c00u) {
         npy_set_floatstatus_overflow();
     }
     return h_sgn + h_sig;
 #else
     return h_sgn + h_exp + h_sig;
 #endif
 }

 static inline npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d)
 {
     npy_uint64 d_exp, d_sig;
     npy_uint16 h_sgn, h_exp, h_sig;

     h_sgn = (d&0x8000000000000000ULL) >> 48;
     d_exp = (d&0x7ff0000000000000ULL);

     /* Exponent overflow/NaN converts to signed inf/NaN */
     if (d_exp >= 0x40f0000000000000ULL) {
         if (d_exp == 0x7ff0000000000000ULL) {
             /* Inf or NaN */
             d_sig = (d&0x000fffffffffffffULL);
             if (d_sig != 0) {
                 /* NaN - propagate the flag in the significand... */
                 npy_uint16 ret = (npy_uint16) (0x7c00u + (d_sig >> 42));
                 /* ...but make sure it stays a NaN */
                 if (ret == 0x7c00u) {
                     ret++;
                 }
                 return h_sgn + ret;
             } else {
                 /* signed inf */
                 return h_sgn + 0x7c00u;
             }
         } else {
             /* overflow to signed inf */
 #if NPY_HALF_GENERATE_OVERFLOW
             npy_set_floatstatus_overflow();
 #endif
             return h_sgn + 0x7c00u;
         }
     }

     /* Exponent underflow converts to subnormal half or signed zero */
     if (d_exp <= 0x3f00000000000000ULL) {
         /*
          * Signed zeros, subnormal floats, and floats with small
          * exponents all convert to signed zero halfs.
          */
         if (d_exp < 0x3e60000000000000ULL) {
 #if NPY_HALF_GENERATE_UNDERFLOW
             /* If d != 0, it underflowed to 0 */
             if ((d&0x7fffffffffffffffULL) != 0) {
                 npy_set_floatstatus_underflow();
             }
 #endif
             return h_sgn;
         }
         /* Make the subnormal significand */
         d_exp >>= 52;
         d_sig = (0x0010000000000000ULL + (d&0x000fffffffffffffULL));
 #if NPY_HALF_GENERATE_UNDERFLOW
         /* If it's not exactly represented, it underflowed */
         if ((d_sig&(((npy_uint64)1 << (1051 - d_exp)) - 1)) != 0) {
             npy_set_floatstatus_underflow();
         }
 #endif
         d_sig >>= (1009 - d_exp);
         /* Handle rounding by adding 1 to the bit beyond half precision */
 #if NPY_HALF_ROUND_TIES_TO_EVEN
         /*
          * If the last bit in the half significand is 0 (already even), and
          * the remaining bit pattern is 1000...0, then we do not add one
          * to the bit after the half significand.  In all other cases, we do.
          */
         if ((d_sig&0x000007ffffffffffULL) != 0x0000020000000000ULL) {
             d_sig += 0x0000020000000000ULL;
         }
 #else
         d_sig += 0x0000020000000000ULL;
 #endif
         h_sig = (npy_uint16) (d_sig >> 42);
         /*
          * If the rounding causes a bit to spill into h_exp, it will
          * increment h_exp from zero to one and h_sig will be zero.
          * This is the correct result.
          */
         return h_sgn + h_sig;
     }

     /* Regular case with no overflow or underflow */
     h_exp = (npy_uint16) ((d_exp - 0x3f00000000000000ULL) >> 42);
     /* Handle rounding by adding 1 to the bit beyond half precision */
     d_sig = (d&0x000fffffffffffffULL);
 #if NPY_HALF_ROUND_TIES_TO_EVEN
     /*
      * If the last bit in the half significand is 0 (already even), and
      * the remaining bit pattern is 1000...0, then we do not add one
      * to the bit after the half significand.  In all other cases, we do.
      */
     if ((d_sig&0x000007ffffffffffULL) != 0x0000020000000000ULL) {
         d_sig += 0x0000020000000000ULL;
     }
 #else
     d_sig += 0x0000020000000000ULL;
 #endif
     h_sig = (npy_uint16) (d_sig >> 42);

     /*
      * If the rounding causes a bit to spill into h_exp, it will
      * increment h_exp by one and h_sig will be zero.  This is the
      * correct result.  h_exp may increment to 15, at greatest, in
      * which case the result overflows to a signed inf.
      */
 #if NPY_HALF_GENERATE_OVERFLOW
     h_sig += h_exp;
     if (h_sig == 0x7c00u) {
         npy_set_floatstatus_overflow();
     }
     return h_sgn + h_sig;
 #else
     return h_sgn + h_exp + h_sig;
 #endif
 }

 static inline npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h)
 {
     npy_uint16 h_exp, h_sig;
     npy_uint32 f_sgn, f_exp, f_sig;

     h_exp = (h&0x7c00u);
     f_sgn = ((npy_uint32)h&0x8000u) << 16;
     switch (h_exp) {
         case 0x0000u: /* 0 or subnormal */
             h_sig = (h&0x03ffu);
             /* Signed zero */
             if (h_sig == 0) {
                 return f_sgn;
             }
             /* Subnormal */
             h_sig <<= 1;
             while ((h_sig&0x0400u) == 0) {
                 h_sig <<= 1;
                 h_exp++;
             }
             f_exp = ((npy_uint32)(127 - 15 - h_exp)) << 23;
             f_sig = ((npy_uint32)(h_sig&0x03ffu)) << 13;
             return f_sgn + f_exp + f_sig;
         case 0x7c00u: /* inf or NaN */
             /* All-ones exponent and a copy of the significand */
             return f_sgn + 0x7f800000u + (((npy_uint32)(h&0x03ffu)) << 13);
         default: /* normalized */
             /* Just need to adjust the exponent and shift */
             return f_sgn + (((npy_uint32)(h&0x7fffu) + 0x1c000u) << 13);
     }
 }

 static inline npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h)
 {
     npy_uint16 h_exp, h_sig;
     npy_uint64 d_sgn, d_exp, d_sig;

     h_exp = (h&0x7c00u);
     d_sgn = ((npy_uint64)h&0x8000u) << 48;
     switch (h_exp) {
         case 0x0000u: /* 0 or subnormal */
             h_sig = (h&0x03ffu);
             /* Signed zero */
             if (h_sig == 0) {
                 return d_sgn;
             }
             /* Subnormal */
             h_sig <<= 1;
             while ((h_sig&0x0400u) == 0) {
                 h_sig <<= 1;
                 h_exp++;
             }
             d_exp = ((npy_uint64)(1023 - 15 - h_exp)) << 52;
             d_sig = ((npy_uint64)(h_sig&0x03ffu)) << 42;
             return d_sgn + d_exp + d_sig;
         case 0x7c00u: /* inf or NaN */
             /* All-ones exponent and a copy of the significand */
             return d_sgn + 0x7ff0000000000000ULL +
                                 (((npy_uint64)(h&0x03ffu)) << 42);
         default: /* normalized */
             /* Just need to adjust the exponent and shift */
             return d_sgn + (((npy_uint64)(h&0x7fffu) + 0xfc000u) << 42);
     }
 }
	/*
	* This implementation is extracted from numpy:
	* Repo: github.com/numpy/numpy
	* File: numpy/core/src/npymath/halffloat.c
	* Commit ID: 25c23f1d956104a072a95355ffaa7a38b53710b7
	* Functions are made "static inline" for performance, and
	* non-conversion functions are removed, and generation of
	* exceptions is disabled.
	*/

	#include <cstdint>
	typedef uint16_t npy_uint16;
	typedef uint32_t npy_uint32;
	typedef uint64_t npy_uint64;

	/*
	* This chooses between 'ties to even' and 'ties away from zero'.
	*/
	#define NPY_HALF_ROUND_TIES_TO_EVEN 1
	/*
	* If these are 1, the conversions try to trigger underflow,
	* overflow, and invalid exceptions in the FP system when needed.
	*/
	#define NPY_HALF_GENERATE_OVERFLOW 0
	#define NPY_HALF_GENERATE_UNDERFLOW 0
	#define NPY_HALF_GENERATE_INVALID 0

	/*
	********************************************************************
	* BIT-LEVEL CONVERSIONS *
	********************************************************************
	*/

	static inline npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f)
	{
	npy_uint32 f_exp, f_sig;
	npy_uint16 h_sgn, h_exp, h_sig;

	h_sgn = (npy_uint16) ((f&0x80000000u) >> 16);
	f_exp = (f&0x7f800000u);

	/* Exponent overflow/NaN converts to signed inf/NaN */
	if (f_exp >= 0x47800000u) {
	if (f_exp == 0x7f800000u) {
	/* Inf or NaN */
	f_sig = (f&0x007fffffu);
	if (f_sig != 0) {
	/* NaN - propagate the flag in the significand... */
	npy_uint16 ret = (npy_uint16) (0x7c00u + (f_sig >> 13));
	/* ...but make sure it stays a NaN */
	if (ret == 0x7c00u) {
	ret++;
	}
	return h_sgn + ret;
	} else {
	/* signed inf */
	return (npy_uint16) (h_sgn + 0x7c00u);
	}
	} else {
	/* overflow to signed inf */
	#if NPY_HALF_GENERATE_OVERFLOW
	npy_set_floatstatus_overflow();
	#endif
	return (npy_uint16) (h_sgn + 0x7c00u);
	}
	}

	/* Exponent underflow converts to a subnormal half or signed zero */
	if (f_exp <= 0x38000000u) {
	/*
	* Signed zeros, subnormal floats, and floats with small
	* exponents all convert to signed zero halfs.
	*/
	if (f_exp < 0x33000000u) {
	#if NPY_HALF_GENERATE_UNDERFLOW
	/* If f != 0, it underflowed to 0 */
	if ((f&0x7fffffff) != 0) {
	npy_set_floatstatus_underflow();
	}
	#endif
	return h_sgn;
	}
	/* Make the subnormal significand */
	f_exp >>= 23;
	f_sig = (0x00800000u + (f&0x007fffffu));
	#if NPY_HALF_GENERATE_UNDERFLOW
	/* If it's not exactly represented, it underflowed */
	if ((f_sig&(((npy_uint32)1 << (126 - f_exp)) - 1)) != 0) {
	npy_set_floatstatus_underflow();
	}
	#endif
	f_sig >>= (113 - f_exp);
	/* Handle rounding by adding 1 to the bit beyond half precision */
	#if NPY_HALF_ROUND_TIES_TO_EVEN
	/*
	* If the last bit in the half significand is 0 (already even), and
	* the remaining bit pattern is 1000...0, then we do not add one
	* to the bit after the half significand. In all other cases, we do.
	*/
	if ((f_sig&0x00003fffu) != 0x00001000u) {
	f_sig += 0x00001000u;
	}
	#else
	f_sig += 0x00001000u;
	#endif
	h_sig = (npy_uint16) (f_sig >> 13);
	/*
	* If the rounding causes a bit to spill into h_exp, it will
	* increment h_exp from zero to one and h_sig will be zero.
	* This is the correct result.
	*/
	return (npy_uint16) (h_sgn + h_sig);
	}

	/* Regular case with no overflow or underflow */
	h_exp = (npy_uint16) ((f_exp - 0x38000000u) >> 13);
	/* Handle rounding by adding 1 to the bit beyond half precision */
	f_sig = (f&0x007fffffu);
	#if NPY_HALF_ROUND_TIES_TO_EVEN
	/*
	* If the last bit in the half significand is 0 (already even), and
	* the remaining bit pattern is 1000...0, then we do not add one
	* to the bit after the half significand. In all other cases, we do.
	*/
	if ((f_sig&0x00003fffu) != 0x00001000u) {
	f_sig += 0x00001000u;
	}
	#else
	f_sig += 0x00001000u;
	#endif
	h_sig = (npy_uint16) (f_sig >> 13);
	/*
	* If the rounding causes a bit to spill into h_exp, it will
	* increment h_exp by one and h_sig will be zero. This is the
	* correct result. h_exp may increment to 15, at greatest, in
	* which case the result overflows to a signed inf.
	*/
	#if NPY_HALF_GENERATE_OVERFLOW
	h_sig += h_exp;
	if (h_sig == 0x7c00u) {
	npy_set_floatstatus_overflow();
	}
	return h_sgn + h_sig;
	#else
	return h_sgn + h_exp + h_sig;
	#endif
	}

	static inline npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d)
	{
	npy_uint64 d_exp, d_sig;
	npy_uint16 h_sgn, h_exp, h_sig;

	h_sgn = (d&0x8000000000000000ULL) >> 48;
	d_exp = (d&0x7ff0000000000000ULL);

	/* Exponent overflow/NaN converts to signed inf/NaN */
	if (d_exp >= 0x40f0000000000000ULL) {
	if (d_exp == 0x7ff0000000000000ULL) {
	/* Inf or NaN */
	d_sig = (d&0x000fffffffffffffULL);
	if (d_sig != 0) {
	/* NaN - propagate the flag in the significand... */
	npy_uint16 ret = (npy_uint16) (0x7c00u + (d_sig >> 42));
	/* ...but make sure it stays a NaN */
	if (ret == 0x7c00u) {
	ret++;
	}
	return h_sgn + ret;
	} else {
	/* signed inf */
	return h_sgn + 0x7c00u;
	}
	} else {
	/* overflow to signed inf */
	#if NPY_HALF_GENERATE_OVERFLOW
	npy_set_floatstatus_overflow();
	#endif
	return h_sgn + 0x7c00u;
	}
	}

	/* Exponent underflow converts to subnormal half or signed zero */
	if (d_exp <= 0x3f00000000000000ULL) {
	/*
	* Signed zeros, subnormal floats, and floats with small
	* exponents all convert to signed zero halfs.
	*/
	if (d_exp < 0x3e60000000000000ULL) {
	#if NPY_HALF_GENERATE_UNDERFLOW
	/* If d != 0, it underflowed to 0 */
	if ((d&0x7fffffffffffffffULL) != 0) {
	npy_set_floatstatus_underflow();
	}
	#endif
	return h_sgn;
	}
	/* Make the subnormal significand */
	d_exp >>= 52;
	d_sig = (0x0010000000000000ULL + (d&0x000fffffffffffffULL));
	#if NPY_HALF_GENERATE_UNDERFLOW
	/* If it's not exactly represented, it underflowed */
	if ((d_sig&(((npy_uint64)1 << (1051 - d_exp)) - 1)) != 0) {
	npy_set_floatstatus_underflow();
	}
	#endif
	d_sig >>= (1009 - d_exp);
	/* Handle rounding by adding 1 to the bit beyond half precision */
	#if NPY_HALF_ROUND_TIES_TO_EVEN
	/*
	* If the last bit in the half significand is 0 (already even), and
	* the remaining bit pattern is 1000...0, then we do not add one
	* to the bit after the half significand. In all other cases, we do.
	*/
	if ((d_sig&0x000007ffffffffffULL) != 0x0000020000000000ULL) {
	d_sig += 0x0000020000000000ULL;
	}
	#else
	d_sig += 0x0000020000000000ULL;
	#endif
	h_sig = (npy_uint16) (d_sig >> 42);
	/*
	* If the rounding causes a bit to spill into h_exp, it will
	* increment h_exp from zero to one and h_sig will be zero.
	* This is the correct result.
	*/
	return h_sgn + h_sig;
	}

	/* Regular case with no overflow or underflow */
	h_exp = (npy_uint16) ((d_exp - 0x3f00000000000000ULL) >> 42);
	/* Handle rounding by adding 1 to the bit beyond half precision */
	d_sig = (d&0x000fffffffffffffULL);
	#if NPY_HALF_ROUND_TIES_TO_EVEN
	/*
	* If the last bit in the half significand is 0 (already even), and
	* the remaining bit pattern is 1000...0, then we do not add one
	* to the bit after the half significand. In all other cases, we do.
	*/
	if ((d_sig&0x000007ffffffffffULL) != 0x0000020000000000ULL) {
	d_sig += 0x0000020000000000ULL;
	}
	#else
	d_sig += 0x0000020000000000ULL;
	#endif
	h_sig = (npy_uint16) (d_sig >> 42);

	/*
	* If the rounding causes a bit to spill into h_exp, it will
	* increment h_exp by one and h_sig will be zero. This is the
	* correct result. h_exp may increment to 15, at greatest, in
	* which case the result overflows to a signed inf.
	*/
	#if NPY_HALF_GENERATE_OVERFLOW
	h_sig += h_exp;
	if (h_sig == 0x7c00u) {
	npy_set_floatstatus_overflow();
	}
	return h_sgn + h_sig;
	#else
	return h_sgn + h_exp + h_sig;
	#endif
	}

	static inline npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h)
	{
	npy_uint16 h_exp, h_sig;
	npy_uint32 f_sgn, f_exp, f_sig;

	h_exp = (h&0x7c00u);
	f_sgn = ((npy_uint32)h&0x8000u) << 16;
	switch (h_exp) {
	case 0x0000u: /* 0 or subnormal */
	h_sig = (h&0x03ffu);
	/* Signed zero */
	if (h_sig == 0) {
	return f_sgn;
	}
	/* Subnormal */
	h_sig <<= 1;
	while ((h_sig&0x0400u) == 0) {
	h_sig <<= 1;
	h_exp++;
	}
	f_exp = ((npy_uint32)(127 - 15 - h_exp)) << 23;
	f_sig = ((npy_uint32)(h_sig&0x03ffu)) << 13;
	return f_sgn + f_exp + f_sig;
	case 0x7c00u: /* inf or NaN */
	/* All-ones exponent and a copy of the significand */
	return f_sgn + 0x7f800000u + (((npy_uint32)(h&0x03ffu)) << 13);
	default: /* normalized */
	/* Just need to adjust the exponent and shift */
	return f_sgn + (((npy_uint32)(h&0x7fffu) + 0x1c000u) << 13);
	}
	}

	static inline npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h)
	{
	npy_uint16 h_exp, h_sig;
	npy_uint64 d_sgn, d_exp, d_sig;

	h_exp = (h&0x7c00u);
	d_sgn = ((npy_uint64)h&0x8000u) << 48;
	switch (h_exp) {
	case 0x0000u: /* 0 or subnormal */
	h_sig = (h&0x03ffu);
	/* Signed zero */
	if (h_sig == 0) {
	return d_sgn;
	}
	/* Subnormal */
	h_sig <<= 1;
	while ((h_sig&0x0400u) == 0) {
	h_sig <<= 1;
	h_exp++;
	}
	d_exp = ((npy_uint64)(1023 - 15 - h_exp)) << 52;
	d_sig = ((npy_uint64)(h_sig&0x03ffu)) << 42;
	return d_sgn + d_exp + d_sig;
	case 0x7c00u: /* inf or NaN */
	/* All-ones exponent and a copy of the significand */
	return d_sgn + 0x7ff0000000000000ULL +
	(((npy_uint64)(h&0x03ffu)) << 42);
	default: /* normalized */
	/* Just need to adjust the exponent and shift */
	return d_sgn + (((npy_uint64)(h&0x7fffu) + 0xfc000u) << 42);
	}
	}