| /* |
| * Double-precision erf(x) function. |
| * |
| * Copyright (c) 2020, Arm Limited. |
| * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception |
| */ |
| |
| #include "math_config.h" |
| #include <math.h> |
| #include <stdint.h> |
| |
| #define TwoOverSqrtPiMinusOne 0x1.06eba8214db69p-3 |
| #define C 0x1.b0ac16p-1 |
| #define PA __erf_data.erf_poly_A |
| #define NA __erf_data.erf_ratio_N_A |
| #define DA __erf_data.erf_ratio_D_A |
| #define NB __erf_data.erf_ratio_N_B |
| #define DB __erf_data.erf_ratio_D_B |
| #define PC __erf_data.erfc_poly_C |
| #define PD __erf_data.erfc_poly_D |
| #define PE __erf_data.erfc_poly_E |
| #define PF __erf_data.erfc_poly_F |
| |
| /* Top 32 bits of a double. */ |
| static inline uint32_t |
| top32 (double x) |
| { |
| return asuint64 (x) >> 32; |
| } |
| |
| /* Fast erf implementation using a mix of |
| rational and polynomial approximations. |
| Highest measured error is 1.01 ULPs at 0x1.39956ac43382fp+0. */ |
| double |
| erf (double x) |
| { |
| /* Get top word and sign. */ |
| uint32_t ix = top32 (x); |
| uint32_t ia = ix & 0x7fffffff; |
| uint32_t sign = ix >> 31; |
| |
| /* Normalized and subnormal cases */ |
| if (ia < 0x3feb0000) |
| { /* a = |x| < 0.84375. */ |
| |
| if (ia < 0x3e300000) |
| { /* a < 2^(-28). */ |
| if (ia < 0x00800000) |
| { /* a < 2^(-1015). */ |
| double y = fma (TwoOverSqrtPiMinusOne, x, x); |
| return check_uflow (y); |
| } |
| return x + TwoOverSqrtPiMinusOne * x; |
| } |
| |
| double x2 = x * x; |
| |
| if (ia < 0x3fe00000) |
| { /* a < 0.5 - Use polynomial approximation. */ |
| double r1 = fma (x2, PA[1], PA[0]); |
| double r2 = fma (x2, PA[3], PA[2]); |
| double r3 = fma (x2, PA[5], PA[4]); |
| double r4 = fma (x2, PA[7], PA[6]); |
| double r5 = fma (x2, PA[9], PA[8]); |
| double x4 = x2 * x2; |
| double r = r5; |
| r = fma (x4, r, r4); |
| r = fma (x4, r, r3); |
| r = fma (x4, r, r2); |
| r = fma (x4, r, r1); |
| return fma (r, x, x); /* This fma is crucial for accuracy. */ |
| } |
| else |
| { /* 0.5 <= a < 0.84375 - Use rational approximation. */ |
| double x4, x8, r1n, r2n, r1d, r2d, r3d; |
| |
| r1n = fma (x2, NA[1], NA[0]); |
| x4 = x2 * x2; |
| r2n = fma (x2, NA[3], NA[2]); |
| x8 = x4 * x4; |
| r1d = fma (x2, DA[0], 1.0); |
| r2d = fma (x2, DA[2], DA[1]); |
| r3d = fma (x2, DA[4], DA[3]); |
| double P = r1n + x4 * r2n + x8 * NA[4]; |
| double Q = r1d + x4 * r2d + x8 * r3d; |
| return fma (P / Q, x, x); |
| } |
| } |
| else if (ia < 0x3ff40000) |
| { /* 0.84375 <= |x| < 1.25. */ |
| double a2, a4, a6, r1n, r2n, r3n, r4n, r1d, r2d, r3d, r4d; |
| double a = fabs (x) - 1.0; |
| r1n = fma (a, NB[1], NB[0]); |
| a2 = a * a; |
| r1d = fma (a, DB[0], 1.0); |
| a4 = a2 * a2; |
| r2n = fma (a, NB[3], NB[2]); |
| a6 = a4 * a2; |
| r2d = fma (a, DB[2], DB[1]); |
| r3n = fma (a, NB[5], NB[4]); |
| r3d = fma (a, DB[4], DB[3]); |
| r4n = NB[6]; |
| r4d = DB[5]; |
| double P = r1n + a2 * r2n + a4 * r3n + a6 * r4n; |
| double Q = r1d + a2 * r2d + a4 * r3d + a6 * r4d; |
| if (sign) |
| return -C - P / Q; |
| else |
| return C + P / Q; |
| } |
| else if (ia < 0x40000000) |
| { /* 1.25 <= |x| < 2.0. */ |
| double a = fabs (x); |
| a = a - 1.25; |
| |
| double r1 = fma (a, PC[1], PC[0]); |
| double r2 = fma (a, PC[3], PC[2]); |
| double r3 = fma (a, PC[5], PC[4]); |
| double r4 = fma (a, PC[7], PC[6]); |
| double r5 = fma (a, PC[9], PC[8]); |
| double r6 = fma (a, PC[11], PC[10]); |
| double r7 = fma (a, PC[13], PC[12]); |
| double r8 = fma (a, PC[15], PC[14]); |
| |
| double a2 = a * a; |
| |
| double r = r8; |
| r = fma (a2, r, r7); |
| r = fma (a2, r, r6); |
| r = fma (a2, r, r5); |
| r = fma (a2, r, r4); |
| r = fma (a2, r, r3); |
| r = fma (a2, r, r2); |
| r = fma (a2, r, r1); |
| |
| if (sign) |
| return -1.0 + r; |
| else |
| return 1.0 - r; |
| } |
| else if (ia < 0x400a0000) |
| { /* 2 <= |x| < 3.25. */ |
| double a = fabs (x); |
| a = fma (0.5, a, -1.0); |
| |
| double r1 = fma (a, PD[1], PD[0]); |
| double r2 = fma (a, PD[3], PD[2]); |
| double r3 = fma (a, PD[5], PD[4]); |
| double r4 = fma (a, PD[7], PD[6]); |
| double r5 = fma (a, PD[9], PD[8]); |
| double r6 = fma (a, PD[11], PD[10]); |
| double r7 = fma (a, PD[13], PD[12]); |
| double r8 = fma (a, PD[15], PD[14]); |
| double r9 = fma (a, PD[17], PD[16]); |
| |
| double a2 = a * a; |
| |
| double r = r9; |
| r = fma (a2, r, r8); |
| r = fma (a2, r, r7); |
| r = fma (a2, r, r6); |
| r = fma (a2, r, r5); |
| r = fma (a2, r, r4); |
| r = fma (a2, r, r3); |
| r = fma (a2, r, r2); |
| r = fma (a2, r, r1); |
| |
| if (sign) |
| return -1.0 + r; |
| else |
| return 1.0 - r; |
| } |
| else if (ia < 0x40100000) |
| { /* 3.25 <= |x| < 4.0. */ |
| double a = fabs (x); |
| a = a - 3.25; |
| |
| double r1 = fma (a, PE[1], PE[0]); |
| double r2 = fma (a, PE[3], PE[2]); |
| double r3 = fma (a, PE[5], PE[4]); |
| double r4 = fma (a, PE[7], PE[6]); |
| double r5 = fma (a, PE[9], PE[8]); |
| double r6 = fma (a, PE[11], PE[10]); |
| double r7 = fma (a, PE[13], PE[12]); |
| |
| double a2 = a * a; |
| |
| double r = r7; |
| r = fma (a2, r, r6); |
| r = fma (a2, r, r5); |
| r = fma (a2, r, r4); |
| r = fma (a2, r, r3); |
| r = fma (a2, r, r2); |
| r = fma (a2, r, r1); |
| |
| if (sign) |
| return -1.0 + r; |
| else |
| return 1.0 - r; |
| } |
| else if (ia < 0x4017a000) |
| { /* 4 <= |x| < 5.90625. */ |
| double a = fabs (x); |
| a = fma (0.5, a, -2.0); |
| |
| double r1 = fma (a, PF[1], PF[0]); |
| double r2 = fma (a, PF[3], PF[2]); |
| double r3 = fma (a, PF[5], PF[4]); |
| double r4 = fma (a, PF[7], PF[6]); |
| double r5 = fma (a, PF[9], PF[8]); |
| double r6 = fma (a, PF[11], PF[10]); |
| double r7 = fma (a, PF[13], PF[12]); |
| double r8 = fma (a, PF[15], PF[14]); |
| double r9 = PF[16]; |
| |
| double a2 = a * a; |
| |
| double r = r9; |
| r = fma (a2, r, r8); |
| r = fma (a2, r, r7); |
| r = fma (a2, r, r6); |
| r = fma (a2, r, r5); |
| r = fma (a2, r, r4); |
| r = fma (a2, r, r3); |
| r = fma (a2, r, r2); |
| r = fma (a2, r, r1); |
| |
| if (sign) |
| return -1.0 + r; |
| else |
| return 1.0 - r; |
| } |
| else |
| { |
| /* Special cases : erf(nan)=nan, erf(+inf)=+1 and erf(-inf)=-1. */ |
| if (unlikely (ia >= 0x7ff00000)) |
| return (double) (1.0 - (sign << 1)) + 1.0 / x; |
| |
| if (sign) |
| return -1.0; |
| else |
| return 1.0; |
| } |
| } |