Update prebuilts to go1.8rc1 ab/3640477
Bug: 32982374
Test: m -j blueprint_tools
Change-Id: Ife36ed9cf5b2617ccc8fdf0fdd178d19378546cd
diff --git a/src/math/all_test.go b/src/math/all_test.go
index d9ea1fd..3d8cd72 100644
--- a/src/math/all_test.go
+++ b/src/math/all_test.go
@@ -1165,21 +1165,88 @@
{NaN(), 0},
}
-var vfgammaSC = []float64{
- Inf(-1),
- -3,
- Copysign(0, -1),
- 0,
- Inf(1),
- NaN(),
-}
-var gammaSC = []float64{
- NaN(),
- NaN(),
- Inf(-1),
- Inf(1),
- Inf(1),
- NaN(),
+var vfgamma = [][2]float64{
+ {Inf(1), Inf(1)},
+ {Inf(-1), NaN()},
+ {0, Inf(1)},
+ {Copysign(0, -1), Inf(-1)},
+ {NaN(), NaN()},
+ {-1, NaN()},
+ {-2, NaN()},
+ {-3, NaN()},
+ {-1e16, NaN()},
+ {-1e300, NaN()},
+ {1.7e308, Inf(1)},
+
+ // Test inputs inspired by Python test suite.
+ // Outputs computed at high precision by PARI/GP.
+ // If recomputing table entries, be careful to use
+ // high-precision (%.1000g) formatting of the float64 inputs.
+ // For example, -2.0000000000000004 is the float64 with exact value
+ // -2.00000000000000044408920985626161695, and
+ // gamma(-2.0000000000000004) = -1249999999999999.5386078562728167651513, while
+ // gamma(-2.00000000000000044408920985626161695) = -1125899906826907.2044875028130093136826.
+ // Thus the table lists -1.1258999068426235e+15 as the answer.
+ {0.5, 1.772453850905516},
+ {1.5, 0.886226925452758},
+ {2.5, 1.329340388179137},
+ {3.5, 3.3233509704478426},
+ {-0.5, -3.544907701811032},
+ {-1.5, 2.363271801207355},
+ {-2.5, -0.9453087204829419},
+ {-3.5, 0.2700882058522691},
+ {0.1, 9.51350769866873},
+ {0.01, 99.4325851191506},
+ {1e-08, 9.999999942278434e+07},
+ {1e-16, 1e+16},
+ {0.001, 999.4237724845955},
+ {1e-16, 1e+16},
+ {1e-308, 1e+308},
+ {5.6e-309, 1.7857142857142864e+308},
+ {5.5e-309, Inf(1)},
+ {1e-309, Inf(1)},
+ {1e-323, Inf(1)},
+ {5e-324, Inf(1)},
+ {-0.1, -10.686287021193193},
+ {-0.01, -100.58719796441078},
+ {-1e-08, -1.0000000057721567e+08},
+ {-1e-16, -1e+16},
+ {-0.001, -1000.5782056293586},
+ {-1e-16, -1e+16},
+ {-1e-308, -1e+308},
+ {-5.6e-309, -1.7857142857142864e+308},
+ {-5.5e-309, Inf(-1)},
+ {-1e-309, Inf(-1)},
+ {-1e-323, Inf(-1)},
+ {-5e-324, Inf(-1)},
+ {-0.9999999999999999, -9.007199254740992e+15},
+ {-1.0000000000000002, 4.5035996273704955e+15},
+ {-1.9999999999999998, 2.2517998136852485e+15},
+ {-2.0000000000000004, -1.1258999068426235e+15},
+ {-100.00000000000001, -7.540083334883109e-145},
+ {-99.99999999999999, 7.540083334884096e-145},
+ {17, 2.0922789888e+13},
+ {171, 7.257415615307999e+306},
+ {171.6, 1.5858969096672565e+308},
+ {171.624, 1.7942117599248104e+308},
+ {171.625, Inf(1)},
+ {172, Inf(1)},
+ {2000, Inf(1)},
+ {-100.5, -3.3536908198076787e-159},
+ {-160.5, -5.255546447007829e-286},
+ {-170.5, -3.3127395215386074e-308},
+ {-171.5, 1.9316265431712e-310},
+ {-176.5, -1.196e-321},
+ {-177.5, 5e-324},
+ {-178.5, Copysign(0, -1)},
+ {-179.5, 0},
+ {-201.0001, 0},
+ {-202.9999, Copysign(0, -1)},
+ {-1000.5, Copysign(0, -1)},
+ {-1.0000000003e+09, Copysign(0, -1)},
+ {-4.5035996273704955e+15, 0},
+ {-63.349078729022985, 4.177797167776188e-88},
+ {-127.45117632943295, 1.183111089623681e-214},
}
var vfhypotSC = [][2]float64{
@@ -1735,6 +1802,12 @@
}
func tolerance(a, b, e float64) bool {
+ // Multiplying by e here can underflow denormal values to zero.
+ // Check a==b so that at least if a and b are small and identical
+ // we say they match.
+ if a == b {
+ return true
+ }
d := a - b
if d < 0 {
d = -d
@@ -1974,7 +2047,7 @@
func testExp(t *testing.T, Exp func(float64) float64, name string) {
for i := 0; i < len(vf); i++ {
- if f := Exp(vf[i]); !close(exp[i], f) {
+ if f := Exp(vf[i]); !veryclose(exp[i], f) {
t.Errorf("%s(%g) = %g, want %g", name, vf[i], f, exp[i])
}
}
@@ -2147,9 +2220,18 @@
t.Errorf("Gamma(%g) = %g, want %g", vf[i], f, gamma[i])
}
}
- for i := 0; i < len(vfgammaSC); i++ {
- if f := Gamma(vfgammaSC[i]); !alike(gammaSC[i], f) {
- t.Errorf("Gamma(%g) = %g, want %g", vfgammaSC[i], f, gammaSC[i])
+ for _, g := range vfgamma {
+ f := Gamma(g[0])
+ var ok bool
+ if IsNaN(g[1]) || IsInf(g[1], 0) || g[1] == 0 || f == 0 {
+ ok = alike(g[1], f)
+ } else if g[0] > -50 && g[0] <= 171 {
+ ok = veryclose(g[1], f)
+ } else {
+ ok = close(g[1], f)
+ }
+ if !ok {
+ t.Errorf("Gamma(%g) = %g, want %g", g[0], f, g[1])
}
}
}
@@ -3000,14 +3082,6 @@
var Global float64
-func BenchmarkSqrt(b *testing.B) {
- x, y := 0.0, 10.0
- for i := 0; i < b.N; i++ {
- x += Sqrt(y)
- }
- Global = x
-}
-
func BenchmarkSqrtIndirect(b *testing.B) {
x, y := 0.0, 10.0
f := Sqrt
@@ -3017,10 +3091,27 @@
Global = x
}
-func BenchmarkSqrtGo(b *testing.B) {
- x, y := 0.0, 10.0
+func BenchmarkSqrtLatency(b *testing.B) {
+ x := 10.0
for i := 0; i < b.N; i++ {
- x += SqrtGo(y)
+ x = Sqrt(x)
+ }
+ Global = x
+}
+
+func BenchmarkSqrtIndirectLatency(b *testing.B) {
+ x := 10.0
+ f := Sqrt
+ for i := 0; i < b.N; i++ {
+ x = f(x)
+ }
+ Global = x
+}
+
+func BenchmarkSqrtGoLatency(b *testing.B) {
+ x := 10.0
+ for i := 0; i < b.N; i++ {
+ x = SqrtGo(x)
}
Global = x
}
diff --git a/src/math/arith_s390x.go b/src/math/arith_s390x.go
new file mode 100644
index 0000000..892935a
--- /dev/null
+++ b/src/math/arith_s390x.go
@@ -0,0 +1,29 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+func log10TrampolineSetup(x float64) float64
+func log10Asm(x float64) float64
+
+func cosTrampolineSetup(x float64) float64
+func cosAsm(x float64) float64
+
+func coshTrampolineSetup(x float64) float64
+func coshAsm(x float64) float64
+
+func sinTrampolineSetup(x float64) float64
+func sinAsm(x float64) float64
+
+func sinhTrampolineSetup(x float64) float64
+func sinhAsm(x float64) float64
+
+func tanhTrampolineSetup(x float64) float64
+func tanhAsm(x float64) float64
+
+// hasVectorFacility reports whether the machine has the z/Architecture
+// vector facility installed and enabled.
+func hasVectorFacility() bool
+
+var hasVX = hasVectorFacility()
diff --git a/src/math/arith_s390x_test.go b/src/math/arith_s390x_test.go
new file mode 100644
index 0000000..b4f3070
--- /dev/null
+++ b/src/math/arith_s390x_test.go
@@ -0,0 +1,144 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests whether the non vector routines are working, even when the tests are run on a
+// vector-capable machine.
+package math_test
+
+import (
+ . "math"
+ "testing"
+)
+
+func TestCosNovec(t *testing.T) {
+ if !HasVX {
+ t.Skipf("no vector support")
+ }
+ for i := 0; i < len(vf); i++ {
+ if f := CosNoVec(vf[i]); !veryclose(cos[i], f) {
+ t.Errorf("Cos(%g) = %g, want %g", vf[i], f, cos[i])
+ }
+ }
+ for i := 0; i < len(vfcosSC); i++ {
+ if f := CosNoVec(vfcosSC[i]); !alike(cosSC[i], f) {
+ t.Errorf("Cos(%g) = %g, want %g", vfcosSC[i], f, cosSC[i])
+ }
+ }
+}
+
+func TestCoshNovec(t *testing.T) {
+ if !HasVX {
+ t.Skipf("no vector support")
+ }
+ for i := 0; i < len(vf); i++ {
+ if f := CoshNoVec(vf[i]); !close(cosh[i], f) {
+ t.Errorf("Cosh(%g) = %g, want %g", vf[i], f, cosh[i])
+ }
+ }
+ for i := 0; i < len(vfcoshSC); i++ {
+ if f := CoshNoVec(vfcoshSC[i]); !alike(coshSC[i], f) {
+ t.Errorf("Cosh(%g) = %g, want %g", vfcoshSC[i], f, coshSC[i])
+ }
+ }
+}
+func TestSinNovec(t *testing.T) {
+ if !HasVX {
+ t.Skipf("no vector support")
+ }
+ for i := 0; i < len(vf); i++ {
+ if f := SinNoVec(vf[i]); !veryclose(sin[i], f) {
+ t.Errorf("Sin(%g) = %g, want %g", vf[i], f, sin[i])
+ }
+ }
+ for i := 0; i < len(vfsinSC); i++ {
+ if f := SinNoVec(vfsinSC[i]); !alike(sinSC[i], f) {
+ t.Errorf("Sin(%g) = %g, want %g", vfsinSC[i], f, sinSC[i])
+ }
+ }
+}
+
+func TestSinhNovec(t *testing.T) {
+ if !HasVX {
+ t.Skipf("no vector support")
+ }
+ for i := 0; i < len(vf); i++ {
+ if f := SinhNoVec(vf[i]); !close(sinh[i], f) {
+ t.Errorf("Sinh(%g) = %g, want %g", vf[i], f, sinh[i])
+ }
+ }
+ for i := 0; i < len(vfsinhSC); i++ {
+ if f := SinhNoVec(vfsinhSC[i]); !alike(sinhSC[i], f) {
+ t.Errorf("Sinh(%g) = %g, want %g", vfsinhSC[i], f, sinhSC[i])
+ }
+ }
+}
+
+// Check that math functions of high angle values
+// return accurate results. [Since (vf[i] + large) - large != vf[i],
+// testing for Trig(vf[i] + large) == Trig(vf[i]), where large is
+// a multiple of 2*Pi, is misleading.]
+func TestLargeCosNovec(t *testing.T) {
+ if !HasVX {
+ t.Skipf("no vector support")
+ }
+ large := float64(100000 * Pi)
+ for i := 0; i < len(vf); i++ {
+ f1 := cosLarge[i]
+ f2 := CosNoVec(vf[i] + large)
+ if !close(f1, f2) {
+ t.Errorf("Cos(%g) = %g, want %g", vf[i]+large, f2, f1)
+ }
+ }
+}
+
+func TestLargeSinNovec(t *testing.T) {
+ if !HasVX {
+ t.Skipf("no vector support")
+ }
+ large := float64(100000 * Pi)
+ for i := 0; i < len(vf); i++ {
+ f1 := sinLarge[i]
+ f2 := SinNoVec(vf[i] + large)
+ if !close(f1, f2) {
+ t.Errorf("Sin(%g) = %g, want %g", vf[i]+large, f2, f1)
+ }
+ }
+}
+
+func TestTanhNovec(t *testing.T) {
+ if !HasVX {
+ t.Skipf("no vector support")
+ }
+ for i := 0; i < len(vf); i++ {
+ if f := TanhNoVec(vf[i]); !veryclose(tanh[i], f) {
+ t.Errorf("Tanh(%g) = %g, want %g", vf[i], f, tanh[i])
+ }
+ }
+ for i := 0; i < len(vftanhSC); i++ {
+ if f := TanhNoVec(vftanhSC[i]); !alike(tanhSC[i], f) {
+ t.Errorf("Tanh(%g) = %g, want %g", vftanhSC[i], f, tanhSC[i])
+ }
+ }
+
+}
+
+func TestLog10Novec(t *testing.T) {
+ if !HasVX {
+ t.Skipf("no vector support")
+ }
+ for i := 0; i < len(vf); i++ {
+ a := Abs(vf[i])
+ if f := Log10NoVec(a); !veryclose(log10[i], f) {
+ t.Errorf("Log10(%g) = %g, want %g", a, f, log10[i])
+ }
+ }
+ if f := Log10NoVec(E); f != Log10E {
+ t.Errorf("Log10(%g) = %g, want %g", E, f, Log10E)
+ }
+ for i := 0; i < len(vflogSC); i++ {
+ if f := Log10NoVec(vflogSC[i]); !alike(logSC[i], f) {
+ t.Errorf("Log10(%g) = %g, want %g", vflogSC[i], f, logSC[i])
+ }
+ }
+}
diff --git a/src/math/big/arith_amd64.s b/src/math/big/arith_amd64.s
index b69a2c6..a7eba67 100644
--- a/src/math/big/arith_amd64.s
+++ b/src/math/big/arith_amd64.s
@@ -326,6 +326,41 @@
MOVQ r+56(FP), CX // c = r
MOVQ z_len+8(FP), R11
MOVQ $0, BX // i = 0
+
+ CMPQ R11, $4
+ JL E5
+
+U5: // i+4 <= n
+ // regular loop body unrolled 4x
+ MOVQ (0*8)(R8)(BX*8), AX
+ MULQ R9
+ ADDQ CX, AX
+ ADCQ $0, DX
+ MOVQ AX, (0*8)(R10)(BX*8)
+ MOVQ DX, CX
+ MOVQ (1*8)(R8)(BX*8), AX
+ MULQ R9
+ ADDQ CX, AX
+ ADCQ $0, DX
+ MOVQ AX, (1*8)(R10)(BX*8)
+ MOVQ DX, CX
+ MOVQ (2*8)(R8)(BX*8), AX
+ MULQ R9
+ ADDQ CX, AX
+ ADCQ $0, DX
+ MOVQ AX, (2*8)(R10)(BX*8)
+ MOVQ DX, CX
+ MOVQ (3*8)(R8)(BX*8), AX
+ MULQ R9
+ ADDQ CX, AX
+ ADCQ $0, DX
+ MOVQ AX, (3*8)(R10)(BX*8)
+ MOVQ DX, CX
+ ADDQ $4, BX // i += 4
+
+ LEAQ 4(BX), DX
+ CMPQ DX, R11
+ JLE U5
JMP E5
L5: MOVQ (R8)(BX*8), AX
diff --git a/src/math/big/arith_decl_s390x.go b/src/math/big/arith_decl_s390x.go
new file mode 100644
index 0000000..0f11481
--- /dev/null
+++ b/src/math/big/arith_decl_s390x.go
@@ -0,0 +1,23 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !math_big_pure_go
+
+package big
+
+func addVV_check(z, x, y []Word) (c Word)
+func addVV_vec(z, x, y []Word) (c Word)
+func addVV_novec(z, x, y []Word) (c Word)
+func subVV_check(z, x, y []Word) (c Word)
+func subVV_vec(z, x, y []Word) (c Word)
+func subVV_novec(z, x, y []Word) (c Word)
+func addVW_check(z, x []Word, y Word) (c Word)
+func addVW_vec(z, x []Word, y Word) (c Word)
+func addVW_novec(z, x []Word, y Word) (c Word)
+func subVW_check(z, x []Word, y Word) (c Word)
+func subVW_vec(z, x []Word, y Word) (c Word)
+func subVW_novec(z, x []Word, y Word) (c Word)
+func hasVectorFacility() bool
+
+var hasVX = hasVectorFacility()
diff --git a/src/math/big/arith_mipsx.s b/src/math/big/arith_mipsx.s
new file mode 100644
index 0000000..ac23114
--- /dev/null
+++ b/src/math/big/arith_mipsx.s
@@ -0,0 +1,46 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !math_big_pure_go,mips !math_big_pure_go,mipsle
+
+#include "textflag.h"
+
+// This file provides fast assembly versions for the elementary
+// arithmetic operations on vectors implemented in arith.go.
+
+TEXT ·mulWW(SB),NOSPLIT,$0
+ JMP ·mulWW_g(SB)
+
+TEXT ·divWW(SB),NOSPLIT,$0
+ JMP ·divWW_g(SB)
+
+TEXT ·addVV(SB),NOSPLIT,$0
+ JMP ·addVV_g(SB)
+
+TEXT ·subVV(SB),NOSPLIT,$0
+ JMP ·subVV_g(SB)
+
+TEXT ·addVW(SB),NOSPLIT,$0
+ JMP ·addVW_g(SB)
+
+TEXT ·subVW(SB),NOSPLIT,$0
+ JMP ·subVW_g(SB)
+
+TEXT ·shlVU(SB),NOSPLIT,$0
+ JMP ·shlVU_g(SB)
+
+TEXT ·shrVU(SB),NOSPLIT,$0
+ JMP ·shrVU_g(SB)
+
+TEXT ·mulAddVWW(SB),NOSPLIT,$0
+ JMP ·mulAddVWW_g(SB)
+
+TEXT ·addMulVVW(SB),NOSPLIT,$0
+ JMP ·addMulVVW_g(SB)
+
+TEXT ·divWVW(SB),NOSPLIT,$0
+ JMP ·divWVW_g(SB)
+
+TEXT ·bitLen(SB),NOSPLIT,$0
+ JMP ·bitLen_g(SB)
diff --git a/src/math/big/arith_ppc64.s b/src/math/big/arith_ppc64.s
new file mode 100644
index 0000000..47fe8f1
--- /dev/null
+++ b/src/math/big/arith_ppc64.s
@@ -0,0 +1,14 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !math_big_pure_go,ppc64
+
+#include "textflag.h"
+
+// This file provides fast assembly versions for the elementary
+// arithmetic operations on vectors implemented in arith.go.
+
+TEXT ·divWW(SB), NOSPLIT, $0
+ BR ·divWW_g(SB)
+
diff --git a/src/math/big/arith_ppc64le.s b/src/math/big/arith_ppc64le.s
new file mode 100644
index 0000000..b78cdfe
--- /dev/null
+++ b/src/math/big/arith_ppc64le.s
@@ -0,0 +1,50 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !math_big_pure_go,ppc64le
+
+#include "textflag.h"
+
+// This file provides fast assembly versions for the elementary
+// arithmetic operations on vectors implemented in arith.go.
+
+// func divWW(x1, x0, y Word) (q, r Word)
+TEXT ·divWW(SB), NOSPLIT, $0
+ MOVD x1+0(FP), R4
+ MOVD x0+8(FP), R5
+ MOVD y+16(FP), R6
+
+ CMPU R4, R6
+ BGE divbigger
+
+ // from the programmer's note in ch. 3 of the ISA manual, p.74
+ DIVDEU R6, R4, R3
+ DIVDU R6, R5, R7
+ MULLD R6, R3, R8
+ MULLD R6, R7, R20
+ SUB R20, R5, R10
+ ADD R7, R3, R3
+ SUB R8, R10, R4
+ CMPU R4, R10
+ BLT adjust
+ CMPU R4, R6
+ BLT end
+
+adjust:
+ MOVD $1, R21
+ ADD R21, R3, R3
+ SUB R6, R4, R4
+
+end:
+ MOVD R3, q+24(FP)
+ MOVD R4, r+32(FP)
+
+ RET
+
+divbigger:
+ MOVD $-1, R7
+ MOVD R7, q+24(FP)
+ MOVD R7, r+32(FP)
+ RET
+
diff --git a/src/math/big/arith_ppc64x.s b/src/math/big/arith_ppc64x.s
index d4d4171..89d1cbf 100644
--- a/src/math/big/arith_ppc64x.s
+++ b/src/math/big/arith_ppc64x.s
@@ -9,38 +9,178 @@
// This file provides fast assembly versions for the elementary
// arithmetic operations on vectors implemented in arith.go.
-TEXT ·mulWW(SB),NOSPLIT,$0
- BR ·mulWW_g(SB)
+// func mulWW(x, y Word) (z1, z0 Word)
+TEXT ·mulWW(SB), NOSPLIT, $0
+ MOVD x+0(FP), R4
+ MOVD y+8(FP), R5
+ MULHDU R4, R5, R6
+ MULLD R4, R5, R7
+ MOVD R6, z1+16(FP)
+ MOVD R7, z0+24(FP)
+ RET
-TEXT ·divWW(SB),NOSPLIT,$0
- BR ·divWW_g(SB)
-
-TEXT ·addVV(SB),NOSPLIT,$0
+TEXT ·addVV(SB), NOSPLIT, $0
BR ·addVV_g(SB)
-TEXT ·subVV(SB),NOSPLIT,$0
- BR ·subVV_g(SB)
+// func subVV(z, x, y []Word) (c Word)
+// z[i] = x[i] - y[i] for all i, carrying
+TEXT ·subVV(SB), NOSPLIT, $0
+ MOVD z_len+8(FP), R7
+ MOVD x+24(FP), R8
+ MOVD y+48(FP), R9
+ MOVD z+0(FP), R10
-TEXT ·addVW(SB),NOSPLIT,$0
+ MOVD $0, R4 // c = 0
+ MOVD $0, R5 // i = 0
+ MOVD $1, R29 // work around lack of ADDI
+ MOVD $8, R28 // work around lack of scaled addressing
+
+ SUBC R0, R0 // clear CA
+ JMP sublend
+
+// amd64 saves and restores CF, but I believe they only have to do that because all of
+// their math operations clobber it - we should just be able to recover it at the end.
+subloop:
+ MULLD R5, R28, R6
+ MOVD (R8)(R6), R11 // x[i]
+ MOVD (R9)(R6), R12 // y[i]
+
+ SUBE R12, R11, R15
+ MOVD R15, (R10)(R6)
+
+ ADD R29, R5 // i++
+
+sublend:
+ CMP R5, R7
+ BLT subloop
+
+ ADDZE R4
+ XOR R29, R4
+ MOVD R4, c+72(FP)
+ RET
+
+TEXT ·addVW(SB), NOSPLIT, $0
BR ·addVW_g(SB)
-TEXT ·subVW(SB),NOSPLIT,$0
+TEXT ·subVW(SB), NOSPLIT, $0
BR ·subVW_g(SB)
-TEXT ·shlVU(SB),NOSPLIT,$0
+TEXT ·shlVU(SB), NOSPLIT, $0
BR ·shlVU_g(SB)
-TEXT ·shrVU(SB),NOSPLIT,$0
+TEXT ·shrVU(SB), NOSPLIT, $0
BR ·shrVU_g(SB)
-TEXT ·mulAddVWW(SB),NOSPLIT,$0
- BR ·mulAddVWW_g(SB)
+// func mulAddVWW(z, x []Word, y, r Word) (c Word)
+TEXT ·mulAddVWW(SB), NOSPLIT, $0
+ MOVD z+0(FP), R10
+ MOVD x+24(FP), R8
+ MOVD y+48(FP), R9
+ MOVD r+56(FP), R4 // c = r
+ MOVD z_len+8(FP), R11
+ MOVD $0, R3 // i = 0
+ MOVD $8, R18
+ MOVD $1, R19
-TEXT ·addMulVVW(SB),NOSPLIT,$0
- BR ·addMulVVW_g(SB)
+ JMP e5
-TEXT ·divWVW(SB),NOSPLIT,$0
+l5:
+ MULLD R18, R3, R5
+ MOVD (R8)(R5), R20
+ MULLD R9, R20, R6
+ MULHDU R9, R20, R7
+ ADDC R4, R6
+ ADDZE R7
+ MOVD R6, (R10)(R5)
+ MOVD R7, R4
+ ADD R19, R3
+
+e5:
+ CMP R3, R11
+ BLT l5
+
+ MOVD R4, c+64(FP)
+ RET
+
+// func addMulVVW(z, x []Word, y Word) (c Word)
+TEXT ·addMulVVW(SB), NOSPLIT, $0
+ MOVD z+0(FP), R10
+ MOVD x+24(FP), R8
+ MOVD y+48(FP), R9
+ MOVD z_len+8(FP), R22
+
+ MOVD $0, R5 // i = 0
+ MOVD $0, R4 // c = 0
+ MOVD $8, R28
+ MOVD $-2, R23
+ AND R22, R23 // mask the last bit of z.len
+ MOVD $2, R24
+ CMP R23, R24
+ BGE unrolled
+ JMP end
+
+unrolled:
+ MOVD $8, R19 // no (RA)(RB*8) on power
+ MULLD R5, R19
+ MOVD (R10)(R19), R11 // R11 = z[i]
+ MOVD (R8)(R19), R16 // R16 = x[i]
+ ADD R28, R19, R25
+ MOVD (R10)(R25), R17
+ MOVD (R8)(R25), R18
+
+ MULLD R9, R16, R12
+ MULHDU R9, R16, R14
+ MULLD R9, R18, R6
+ MULHDU R9, R18, R7
+ ADDC R4, R12
+ ADDZE R14
+ ADDC R11, R12 // z[i] = (x[i]*y) + z[i] + carry
+ ADDZE R14 // carry = high order bits + add carry
+ MOVD R12, (R10)(R19)
+ ADDC R14, R6
+ ADDZE R7
+ ADDC R17, R6
+ ADDZE R7
+ MOVD R6, (R10)(R25)
+ MOVD R7, R4
+
+ ADD R24, R5
+ CMP R5, R23
+ BLT unrolled
+ JMP end
+
+loop:
+ MOVD $8, R19
+ MULLD R5, R19
+ MOVD (R10)(R19), R11
+ MOVD (R8)(R19), R16
+ MULLD R9, R16, R12
+ MULHDU R9, R16, R14
+ ADDC R4, R12
+ ADDZE R14
+ ADDC R11, R12
+ ADDZE R14
+ MOVD R12, (R10)(R19)
+ MOVD R14, R4
+
+ MOVD $1, R15
+ ADD R15, R5
+
+end:
+ CMP R5, R22
+ BLT loop
+
+ MOVD R4, c+56(FP)
+ RET
+
+TEXT ·divWVW(SB), NOSPLIT, $0
BR ·divWVW_g(SB)
-TEXT ·bitLen(SB),NOSPLIT,$0
- BR ·bitLen_g(SB)
+// func bitLen(x Word) int
+TEXT ·bitLen(SB), NOSPLIT, $0
+ MOVD x+0(FP), R4
+ CNTLZD R4, R4
+ MOVD $64, R5
+ SUB R4, R5
+ MOVD R5, n+8(FP)
+ RET
diff --git a/src/math/big/arith_s390x.s b/src/math/big/arith_s390x.s
index a691970..bddfd9e 100644
--- a/src/math/big/arith_s390x.s
+++ b/src/math/big/arith_s390x.s
@@ -9,93 +9,464 @@
// This file provides fast assembly versions for the elementary
// arithmetic operations on vectors implemented in arith.go.
+TEXT ·hasVectorFacility(SB),NOSPLIT,$24-1
+ MOVD $x-24(SP), R1
+ XC $24, 0(R1), 0(R1) // clear the storage
+ MOVD $2, R0 // R0 is the number of double words stored -1
+ WORD $0xB2B01000 // STFLE 0(R1)
+ XOR R0, R0 // reset the value of R0
+ MOVBZ z-8(SP), R1
+ AND $0x40, R1
+ BEQ novector
+vectorinstalled:
+ // check if the vector instruction has been enabled
+ VLEIB $0, $0xF, V16
+ VLGVB $0, V16, R1
+ CMPBNE R1, $0xF, novector
+ MOVB $1, ret+0(FP) // have vx
+ RET
+novector:
+ MOVB $0, ret+0(FP) // no vx
+ RET
+
TEXT ·mulWW(SB),NOSPLIT,$0
- MOVD x+0(FP), R3
- MOVD y+8(FP), R4
- MULHDU R3, R4
- MOVD R10, z1+16(FP)
- MOVD R11, z0+24(FP)
+ MOVD x+0(FP), R3
+ MOVD y+8(FP), R4
+ MULHDU R3, R4
+ MOVD R10, z1+16(FP)
+ MOVD R11, z0+24(FP)
RET
// func divWW(x1, x0, y Word) (q, r Word)
TEXT ·divWW(SB),NOSPLIT,$0
- MOVD x1+0(FP), R10
- MOVD x0+8(FP), R11
- MOVD y+16(FP), R5
- WORD $0xb98700a5 // dlgr r10,r5
- MOVD R11, q+24(FP)
- MOVD R10, r+32(FP)
+ MOVD x1+0(FP), R10
+ MOVD x0+8(FP), R11
+ MOVD y+16(FP), R5
+ WORD $0xb98700a5 // dlgr r10,r5
+ MOVD R11, q+24(FP)
+ MOVD R10, r+32(FP)
RET
// DI = R3, CX = R4, SI = r10, r8 = r8, r9=r9, r10 = r2 , r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0) + use R11
// func addVV(z, x, y []Word) (c Word)
-TEXT ·addVV(SB),NOSPLIT,$0
- MOVD z_len+8(FP), R3
- MOVD x+24(FP), R8
- MOVD y+48(FP), R9
- MOVD z+0(FP), R2
- MOVD $0, R4 // c = 0
- MOVD $0, R0 // make sure it's zero
- MOVD $0, R10 // i = 0
+
+TEXT ·addVV(SB),NOSPLIT,$0
+ MOVD addvectorfacility+0x00(SB),R1
+ BR (R1)
+
+TEXT ·addVV_check(SB),NOSPLIT, $0
+ MOVB ·hasVX(SB), R1
+ CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
+ MOVD $addvectorfacility+0x00(SB), R1
+ MOVD $·addVV_novec(SB), R2
+ MOVD R2, 0(R1)
+ //MOVD $·addVV_novec(SB), 0(R1)
+ BR ·addVV_novec(SB)
+vectorimpl:
+ MOVD $addvectorfacility+0x00(SB), R1
+ MOVD $·addVV_vec(SB), R2
+ MOVD R2, 0(R1)
+ //MOVD $·addVV_vec(SB), 0(R1)
+ BR ·addVV_vec(SB)
+
+GLOBL addvectorfacility+0x00(SB), NOPTR, $8
+DATA addvectorfacility+0x00(SB)/8, $·addVV_check(SB)
+
+TEXT ·addVV_vec(SB),NOSPLIT,$0
+ MOVD z_len+8(FP), R3
+ MOVD x+24(FP), R8
+ MOVD y+48(FP), R9
+ MOVD z+0(FP), R2
+
+ MOVD $0, R4 // c = 0
+ MOVD $0, R0 // make sure it's zero
+ MOVD $0, R10 // i = 0
+
// s/JL/JMP/ below to disable the unrolled loop
- SUB $4, R3 // n -= 4
- BLT v1 // if n < 0 goto v1
+ SUB $4, R3
+ BLT v1
+ SUB $12, R3 // n -= 16
+ BLT A1 // if n < 0 goto A1
+
+ MOVD R8, R5
+ MOVD R9, R6
+ MOVD R2, R7
+ // n >= 0
+ // regular loop body unrolled 16x
+ VZERO V0 // c = 0
+UU1: VLM 0(R5), V1, V4 // 64-bytes into V1..V8
+ ADD $64, R5
+ VPDI $0x4,V1,V1,V1 // flip the doublewords to big-endian order
+ VPDI $0x4,V2,V2,V2 // flip the doublewords to big-endian order
+
+
+ VLM 0(R6), V9, V12 // 64-bytes into V9..V16
+ ADD $64, R6
+ VPDI $0x4,V9,V9,V9 // flip the doublewords to big-endian order
+ VPDI $0x4,V10,V10,V10 // flip the doublewords to big-endian order
+
+ VACCCQ V1, V9, V0, V25
+ VACQ V1, V9, V0, V17
+ VACCCQ V2, V10, V25, V26
+ VACQ V2, V10, V25, V18
+
+
+ VLM 0(R5), V5, V6 // 32-bytes into V1..V8
+ VLM 0(R6), V13, V14 // 32-bytes into V9..V16
+ ADD $32, R5
+ ADD $32, R6
+
+ VPDI $0x4,V3,V3,V3 // flip the doublewords to big-endian order
+ VPDI $0x4,V4,V4,V4 // flip the doublewords to big-endian order
+ VPDI $0x4,V11,V11,V11 // flip the doublewords to big-endian order
+ VPDI $0x4,V12,V12,V12 // flip the doublewords to big-endian order
+
+ VACCCQ V3, V11, V26, V27
+ VACQ V3, V11, V26, V19
+ VACCCQ V4, V12, V27, V28
+ VACQ V4, V12, V27, V20
+
+ VLM 0(R5), V7, V8 // 32-bytes into V1..V8
+ VLM 0(R6), V15, V16 // 32-bytes into V9..V16
+ ADD $32, R5
+ ADD $32, R6
+
+ VPDI $0x4,V5,V5,V5 // flip the doublewords to big-endian order
+ VPDI $0x4,V6,V6,V6 // flip the doublewords to big-endian order
+ VPDI $0x4,V13,V13,V13 // flip the doublewords to big-endian order
+ VPDI $0x4,V14,V14,V14 // flip the doublewords to big-endian order
+
+ VACCCQ V5, V13, V28, V29
+ VACQ V5, V13, V28, V21
+ VACCCQ V6, V14, V29, V30
+ VACQ V6, V14, V29, V22
+
+ VPDI $0x4,V7,V7,V7 // flip the doublewords to big-endian order
+ VPDI $0x4,V8,V8,V8 // flip the doublewords to big-endian order
+ VPDI $0x4,V15,V15,V15 // flip the doublewords to big-endian order
+ VPDI $0x4,V16,V16,V16 // flip the doublewords to big-endian order
+
+ VACCCQ V7, V15, V30, V31
+ VACQ V7, V15, V30, V23
+ VACCCQ V8, V16, V31, V0 //V0 has carry-over
+ VACQ V8, V16, V31, V24
+
+ VPDI $0x4,V17,V17,V17 // flip the doublewords to big-endian order
+ VPDI $0x4,V18,V18,V18 // flip the doublewords to big-endian order
+ VPDI $0x4,V19,V19,V19 // flip the doublewords to big-endian order
+ VPDI $0x4,V20,V20,V20 // flip the doublewords to big-endian order
+ VPDI $0x4,V21,V21,V21 // flip the doublewords to big-endian order
+ VPDI $0x4,V22,V22,V22 // flip the doublewords to big-endian order
+ VPDI $0x4,V23,V23,V23 // flip the doublewords to big-endian order
+ VPDI $0x4,V24,V24,V24 // flip the doublewords to big-endian order
+ VSTM V17, V24, 0(R7) // 128-bytes into z
+ ADD $128, R7
+ ADD $128, R10 // i += 16
+ SUB $16, R3 // n -= 16
+ BGE UU1 // if n >= 0 goto U1
+ VLGVG $1, V0, R4 // put cf into R4
+ NEG R4, R4 // save cf
+
+A1: ADD $12, R3 // n += 16
+
+
+ // s/JL/JMP/ below to disable the unrolled loop
+ BLT v1 // if n < 0 goto v1
U1: // n >= 0
// regular loop body unrolled 4x
- MOVD 0(R8)(R10*1), R5
- MOVD 8(R8)(R10*1), R6
- MOVD 16(R8)(R10*1), R7
- MOVD 24(R8)(R10*1), R1
- ADDC R4, R4 // restore CF
- MOVD 0(R9)(R10*1), R11
- ADDE R11, R5
- MOVD 8(R9)(R10*1), R11
- ADDE R11, R6
- MOVD 16(R9)(R10*1), R11
- ADDE R11, R7
- MOVD 24(R9)(R10*1), R11
- ADDE R11, R1
- MOVD R0, R4
- ADDE R4, R4 // save CF
- NEG R4, R4
- MOVD R5, 0(R2)(R10*1)
- MOVD R6, 8(R2)(R10*1)
- MOVD R7, 16(R2)(R10*1)
- MOVD R1, 24(R2)(R10*1)
+ MOVD 0(R8)(R10*1), R5
+ MOVD 8(R8)(R10*1), R6
+ MOVD 16(R8)(R10*1), R7
+ MOVD 24(R8)(R10*1), R1
+ ADDC R4, R4 // restore CF
+ MOVD 0(R9)(R10*1), R11
+ ADDE R11, R5
+ MOVD 8(R9)(R10*1), R11
+ ADDE R11, R6
+ MOVD 16(R9)(R10*1), R11
+ ADDE R11, R7
+ MOVD 24(R9)(R10*1), R11
+ ADDE R11, R1
+ MOVD R0, R4
+ ADDE R4, R4 // save CF
+ NEG R4, R4
+ MOVD R5, 0(R2)(R10*1)
+ MOVD R6, 8(R2)(R10*1)
+ MOVD R7, 16(R2)(R10*1)
+ MOVD R1, 24(R2)(R10*1)
- ADD $32, R10 // i += 4
- SUB $4, R3 // n -= 4
- BGE U1 // if n >= 0 goto U1
+ ADD $32, R10 // i += 4
+ SUB $4, R3 // n -= 4
+ BGE U1 // if n >= 0 goto U1
-v1: ADD $4, R3 // n += 4
- BLE E1 // if n <= 0 goto E1
+v1: ADD $4, R3 // n += 4
+ BLE E1 // if n <= 0 goto E1
L1: // n > 0
- ADDC R4, R4 // restore CF
- MOVD 0(R8)(R10*1), R5
- MOVD 0(R9)(R10*1), R11
- ADDE R11, R5
- MOVD R5, 0(R2)(R10*1)
- MOVD R0, R4
- ADDE R4, R4 // save CF
- NEG R4, R4
+ ADDC R4, R4 // restore CF
+ MOVD 0(R8)(R10*1), R5
+ MOVD 0(R9)(R10*1), R11
+ ADDE R11, R5
+ MOVD R5, 0(R2)(R10*1)
+ MOVD R0, R4
+ ADDE R4, R4 // save CF
+ NEG R4, R4
- ADD $8, R10 // i++
- SUB $1, R3 // n--
- BGT L1 // if n > 0 goto L1
+ ADD $8, R10 // i++
+ SUB $1, R3 // n--
+ BGT L1 // if n > 0 goto L1
-E1: NEG R4, R4
- MOVD R4, c+72(FP) // return c
+E1: NEG R4, R4
+ MOVD R4, c+72(FP) // return c
RET
+TEXT ·addVV_novec(SB),NOSPLIT,$0
+novec:
+ MOVD z_len+8(FP), R3
+ MOVD x+24(FP), R8
+ MOVD y+48(FP), R9
+ MOVD z+0(FP), R2
+
+ MOVD $0, R4 // c = 0
+ MOVD $0, R0 // make sure it's zero
+ MOVD $0, R10 // i = 0
+
+ // s/JL/JMP/ below to disable the unrolled loop
+ SUB $4, R3 // n -= 4
+ BLT v1n // if n < 0 goto v1n
+U1n: // n >= 0
+ // regular loop body unrolled 4x
+ MOVD 0(R8)(R10*1), R5
+ MOVD 8(R8)(R10*1), R6
+ MOVD 16(R8)(R10*1), R7
+ MOVD 24(R8)(R10*1), R1
+ ADDC R4, R4 // restore CF
+ MOVD 0(R9)(R10*1), R11
+ ADDE R11, R5
+ MOVD 8(R9)(R10*1), R11
+ ADDE R11, R6
+ MOVD 16(R9)(R10*1), R11
+ ADDE R11, R7
+ MOVD 24(R9)(R10*1), R11
+ ADDE R11, R1
+ MOVD R0, R4
+ ADDE R4, R4 // save CF
+ NEG R4, R4
+ MOVD R5, 0(R2)(R10*1)
+ MOVD R6, 8(R2)(R10*1)
+ MOVD R7, 16(R2)(R10*1)
+ MOVD R1, 24(R2)(R10*1)
+
+
+ ADD $32, R10 // i += 4
+ SUB $4, R3 // n -= 4
+ BGE U1n // if n >= 0 goto U1n
+
+v1n: ADD $4, R3 // n += 4
+ BLE E1n // if n <= 0 goto E1n
+
+L1n: // n > 0
+ ADDC R4, R4 // restore CF
+ MOVD 0(R8)(R10*1), R5
+ MOVD 0(R9)(R10*1), R11
+ ADDE R11, R5
+ MOVD R5, 0(R2)(R10*1)
+ MOVD R0, R4
+ ADDE R4, R4 // save CF
+ NEG R4, R4
+
+ ADD $8, R10 // i++
+ SUB $1, R3 // n--
+ BGT L1n // if n > 0 goto L1n
+
+E1n: NEG R4, R4
+ MOVD R4, c+72(FP) // return c
+ RET
+
+
+TEXT ·subVV(SB),NOSPLIT,$0
+ MOVD subvectorfacility+0x00(SB),R1
+ BR (R1)
+
+TEXT ·subVV_check(SB),NOSPLIT,$0
+ MOVB ·hasVX(SB), R1
+ CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
+ MOVD $subvectorfacility+0x00(SB), R1
+ MOVD $·subVV_novec(SB), R2
+ MOVD R2, 0(R1)
+ //MOVD $·subVV_novec(SB), 0(R1)
+ BR ·subVV_novec(SB)
+vectorimpl:
+ MOVD $subvectorfacility+0x00(SB), R1
+ MOVD $·subVV_vec(SB), R2
+ MOVD R2, 0(R1)
+ //MOVD $·subVV_vec(SB), 0(R1)
+ BR ·subVV_vec(SB)
+
+GLOBL subvectorfacility+0x00(SB), NOPTR, $8
+DATA subvectorfacility+0x00(SB)/8, $·subVV_check(SB)
+
// DI = R3, CX = R4, SI = r10, r8 = r8, r9=r9, r10 = r2 , r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0) + use R11
// func subVV(z, x, y []Word) (c Word)
// (same as addVV except for SUBC/SUBE instead of ADDC/ADDE and label names)
-TEXT ·subVV(SB),NOSPLIT,$0
+TEXT ·subVV_vec(SB),NOSPLIT,$0
+ MOVD z_len+8(FP), R3
+ MOVD x+24(FP), R8
+ MOVD y+48(FP), R9
+ MOVD z+0(FP), R2
+ MOVD $0, R4 // c = 0
+ MOVD $0, R0 // make sure it's zero
+ MOVD $0, R10 // i = 0
+
+ // s/JL/JMP/ below to disable the unrolled loop
+ SUB $4, R3 // n -= 4
+ BLT v1 // if n < 0 goto v1
+ SUB $12, R3 // n -= 16
+ BLT A1 // if n < 0 goto A1
+
+ MOVD R8, R5
+ MOVD R9, R6
+ MOVD R2, R7
+
+ // n >= 0
+ // regular loop body unrolled 16x
+ VZERO V0 // cf = 0
+ MOVD $1, R4 // for 390 subtraction cf starts as 1 (no borrow)
+ VLVGG $1, R4, V0 //put carry into V0
+
+UU1: VLM 0(R5), V1, V4 // 64-bytes into V1..V8
+ ADD $64, R5
+ VPDI $0x4,V1,V1,V1 // flip the doublewords to big-endian order
+ VPDI $0x4,V2,V2,V2 // flip the doublewords to big-endian order
+
+
+ VLM 0(R6), V9, V12 // 64-bytes into V9..V16
+ ADD $64, R6
+ VPDI $0x4,V9,V9,V9 // flip the doublewords to big-endian order
+ VPDI $0x4,V10,V10,V10 // flip the doublewords to big-endian order
+
+ VSBCBIQ V1, V9, V0, V25
+ VSBIQ V1, V9, V0, V17
+ VSBCBIQ V2, V10, V25, V26
+ VSBIQ V2, V10, V25, V18
+
+
+ VLM 0(R5), V5, V6 // 32-bytes into V1..V8
+ VLM 0(R6), V13, V14 // 32-bytes into V9..V16
+ ADD $32, R5
+ ADD $32, R6
+
+ VPDI $0x4,V3,V3,V3 // flip the doublewords to big-endian order
+ VPDI $0x4,V4,V4,V4 // flip the doublewords to big-endian order
+ VPDI $0x4,V11,V11,V11 // flip the doublewords to big-endian order
+ VPDI $0x4,V12,V12,V12 // flip the doublewords to big-endian order
+
+ VSBCBIQ V3, V11, V26, V27
+ VSBIQ V3, V11, V26, V19
+ VSBCBIQ V4, V12, V27, V28
+ VSBIQ V4, V12, V27, V20
+
+ VLM 0(R5), V7, V8 // 32-bytes into V1..V8
+ VLM 0(R6), V15, V16 // 32-bytes into V9..V16
+ ADD $32, R5
+ ADD $32, R6
+
+ VPDI $0x4,V5,V5,V5 // flip the doublewords to big-endian order
+ VPDI $0x4,V6,V6,V6 // flip the doublewords to big-endian order
+ VPDI $0x4,V13,V13,V13 // flip the doublewords to big-endian order
+ VPDI $0x4,V14,V14,V14 // flip the doublewords to big-endian order
+
+ VSBCBIQ V5, V13, V28, V29
+ VSBIQ V5, V13, V28, V21
+ VSBCBIQ V6, V14, V29, V30
+ VSBIQ V6, V14, V29, V22
+
+ VPDI $0x4,V7,V7,V7 // flip the doublewords to big-endian order
+ VPDI $0x4,V8,V8,V8 // flip the doublewords to big-endian order
+ VPDI $0x4,V15,V15,V15 // flip the doublewords to big-endian order
+ VPDI $0x4,V16,V16,V16 // flip the doublewords to big-endian order
+
+ VSBCBIQ V7, V15, V30, V31
+ VSBIQ V7, V15, V30, V23
+ VSBCBIQ V8, V16, V31, V0 //V0 has carry-over
+ VSBIQ V8, V16, V31, V24
+
+ VPDI $0x4,V17,V17,V17 // flip the doublewords to big-endian order
+ VPDI $0x4,V18,V18,V18 // flip the doublewords to big-endian order
+ VPDI $0x4,V19,V19,V19 // flip the doublewords to big-endian order
+ VPDI $0x4,V20,V20,V20 // flip the doublewords to big-endian order
+ VPDI $0x4,V21,V21,V21 // flip the doublewords to big-endian order
+ VPDI $0x4,V22,V22,V22 // flip the doublewords to big-endian order
+ VPDI $0x4,V23,V23,V23 // flip the doublewords to big-endian order
+ VPDI $0x4,V24,V24,V24 // flip the doublewords to big-endian order
+ VSTM V17, V24, 0(R7) // 128-bytes into z
+ ADD $128, R7
+ ADD $128, R10 // i += 16
+ SUB $16, R3 // n -= 16
+ BGE UU1 // if n >= 0 goto U1
+ VLGVG $1, V0, R4 // put cf into R4
+ SUB $1, R4 // save cf
+
+A1: ADD $12, R3 // n += 16
+ BLT v1 // if n < 0 goto v1
+
+U1: // n >= 0
+ // regular loop body unrolled 4x
+ MOVD 0(R8)(R10*1), R5
+ MOVD 8(R8)(R10*1), R6
+ MOVD 16(R8)(R10*1), R7
+ MOVD 24(R8)(R10*1), R1
+ MOVD R0, R11
+ SUBC R4, R11 // restore CF
+ MOVD 0(R9)(R10*1), R11
+ SUBE R11, R5
+ MOVD 8(R9)(R10*1), R11
+ SUBE R11, R6
+ MOVD 16(R9)(R10*1), R11
+ SUBE R11, R7
+ MOVD 24(R9)(R10*1), R11
+ SUBE R11, R1
+ MOVD R0, R4
+ SUBE R4, R4 // save CF
+ MOVD R5, 0(R2)(R10*1)
+ MOVD R6, 8(R2)(R10*1)
+ MOVD R7, 16(R2)(R10*1)
+ MOVD R1, 24(R2)(R10*1)
+
+ ADD $32, R10 // i += 4
+ SUB $4, R3 // n -= 4
+ BGE U1 // if n >= 0 goto U1n
+
+v1: ADD $4, R3 // n += 4
+ BLE E1 // if n <= 0 goto E1
+
+L1: // n > 0
+ MOVD R0, R11
+ SUBC R4, R11 // restore CF
+ MOVD 0(R8)(R10*1), R5
+ MOVD 0(R9)(R10*1), R11
+ SUBE R11, R5
+ MOVD R5, 0(R2)(R10*1)
+ MOVD R0, R4
+ SUBE R4, R4 // save CF
+
+ ADD $8, R10 // i++
+ SUB $1, R3 // n--
+ BGT L1 // if n > 0 goto L1n
+
+E1: NEG R4, R4
+ MOVD R4, c+72(FP) // return c
+ RET
+
+
+// DI = R3, CX = R4, SI = r10, r8 = r8, r9=r9, r10 = r2 , r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0) + use R11
+// func subVV(z, x, y []Word) (c Word)
+// (same as addVV except for SUBC/SUBE instead of ADDC/ADDE and label names)
+TEXT ·subVV_novec(SB),NOSPLIT,$0
MOVD z_len+8(FP), R3
MOVD x+24(FP), R8
MOVD y+48(FP), R9
@@ -158,9 +529,163 @@
MOVD R4, c+72(FP) // return c
RET
-
-// func addVW(z, x []Word, y Word) (c Word)
TEXT ·addVW(SB),NOSPLIT,$0
+ MOVD addwvectorfacility+0x00(SB),R1
+ BR (R1)
+
+TEXT ·addVW_check(SB),NOSPLIT,$0
+ MOVB ·hasVX(SB), R1
+ CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
+ MOVD $addwvectorfacility+0x00(SB), R1
+ MOVD $·addVW_novec(SB), R2
+ MOVD R2, 0(R1)
+ //MOVD $·addVW_novec(SB), 0(R1)
+ BR ·addVW_novec(SB)
+vectorimpl:
+ MOVD $addwvectorfacility+0x00(SB), R1
+ MOVD $·addVW_vec(SB), R2
+ MOVD R2, 0(R1)
+ //MOVD $·addVW_vec(SB), 0(R1)
+ BR ·addVW_vec(SB)
+
+GLOBL addwvectorfacility+0x00(SB), NOPTR, $8
+DATA addwvectorfacility+0x00(SB)/8, $·addVW_check(SB)
+
+
+// func addVW_vec(z, x []Word, y Word) (c Word)
+TEXT ·addVW_vec(SB),NOSPLIT,$0
+ MOVD z_len+8(FP), R3
+ MOVD x+24(FP), R8
+ MOVD y+48(FP), R4 // c = y
+ MOVD z+0(FP), R2
+
+ MOVD $0, R0 // make sure it's zero
+ MOVD $0, R10 // i = 0
+ MOVD R8, R5
+ MOVD R2, R7
+
+ // s/JL/JMP/ below to disable the unrolled loop
+ SUB $4, R3 // n -= 4
+ BLT v10 // if n < 0 goto v10
+ SUB $12, R3
+ BLT A10
+
+ // n >= 0
+ // regular loop body unrolled 16x
+
+ VZERO V0 // prepare V0 to be final carry register
+ VZERO V9 // to ensure upper half is zero
+ VLVGG $1, R4, V9
+UU1: VLM 0(R5), V1, V4 // 64-bytes into V1..V4
+ ADD $64, R5
+ VPDI $0x4,V1,V1,V1 // flip the doublewords to big-endian order
+ VPDI $0x4,V2,V2,V2 // flip the doublewords to big-endian order
+
+
+ VACCCQ V1, V9, V0, V25
+ VACQ V1, V9, V0, V17
+ VZERO V9
+ VACCCQ V2, V9, V25, V26
+ VACQ V2, V9, V25, V18
+
+
+ VLM 0(R5), V5, V6 // 32-bytes into V5..V6
+ ADD $32, R5
+
+ VPDI $0x4,V3,V3,V3 // flip the doublewords to big-endian order
+ VPDI $0x4,V4,V4,V4 // flip the doublewords to big-endian order
+
+ VACCCQ V3, V9, V26, V27
+ VACQ V3, V9, V26, V19
+ VACCCQ V4, V9, V27, V28
+ VACQ V4, V9, V27, V20
+
+ VLM 0(R5), V7, V8 // 32-bytes into V7..V8
+ ADD $32, R5
+
+ VPDI $0x4,V5,V5,V5 // flip the doublewords to big-endian order
+ VPDI $0x4,V6,V6,V6 // flip the doublewords to big-endian order
+
+ VACCCQ V5, V9, V28, V29
+ VACQ V5, V9, V28, V21
+ VACCCQ V6, V9, V29, V30
+ VACQ V6, V9, V29, V22
+
+ VPDI $0x4,V7,V7,V7 // flip the doublewords to big-endian order
+ VPDI $0x4,V8,V8,V8 // flip the doublewords to big-endian order
+
+ VACCCQ V7, V9, V30, V31
+ VACQ V7, V9, V30, V23
+ VACCCQ V8, V9, V31, V0 //V0 has carry-over
+ VACQ V8, V9, V31, V24
+
+ VPDI $0x4,V17,V17,V17 // flip the doublewords to big-endian order
+ VPDI $0x4,V18,V18,V18 // flip the doublewords to big-endian order
+ VPDI $0x4,V19,V19,V19 // flip the doublewords to big-endian order
+ VPDI $0x4,V20,V20,V20 // flip the doublewords to big-endian order
+ VPDI $0x4,V21,V21,V21 // flip the doublewords to big-endian order
+ VPDI $0x4,V22,V22,V22 // flip the doublewords to big-endian order
+ VPDI $0x4,V23,V23,V23 // flip the doublewords to big-endian order
+ VPDI $0x4,V24,V24,V24 // flip the doublewords to big-endian order
+ VSTM V17, V24, 0(R7) // 128-bytes into z
+ ADD $128, R7
+ ADD $128, R10 // i += 16
+ SUB $16, R3 // n -= 16
+ BGE UU1 // if n >= 0 goto U1
+ VLGVG $1, V0, R4 // put cf into R4 in case we branch to v10
+
+A10: ADD $12, R3 // n += 16
+
+
+ // s/JL/JMP/ below to disable the unrolled loop
+
+ BLT v10 // if n < 0 goto v10
+
+
+U4: // n >= 0
+ // regular loop body unrolled 4x
+ MOVD 0(R8)(R10*1), R5
+ MOVD 8(R8)(R10*1), R6
+ MOVD 16(R8)(R10*1), R7
+ MOVD 24(R8)(R10*1), R1
+ ADDC R4, R5
+ ADDE R0, R6
+ ADDE R0, R7
+ ADDE R0, R1
+ ADDE R0, R0
+ MOVD R0, R4 // save CF
+ SUB R0, R0
+ MOVD R5, 0(R2)(R10*1)
+ MOVD R6, 8(R2)(R10*1)
+ MOVD R7, 16(R2)(R10*1)
+ MOVD R1, 24(R2)(R10*1)
+
+ ADD $32, R10 // i += 4 -> i +=32
+ SUB $4, R3 // n -= 4
+ BGE U4 // if n >= 0 goto U4
+
+v10: ADD $4, R3 // n += 4
+ BLE E10 // if n <= 0 goto E4
+
+
+L4: // n > 0
+ MOVD 0(R8)(R10*1), R5
+ ADDC R4, R5
+ ADDE R0, R0
+ MOVD R0, R4 // save CF
+ SUB R0, R0
+ MOVD R5, 0(R2)(R10*1)
+
+ ADD $8, R10 // i++
+ SUB $1, R3 // n--
+ BGT L4 // if n > 0 goto L4
+
+E10: MOVD R4, c+56(FP) // return c
+
+ RET
+
+
+TEXT ·addVW_novec(SB),NOSPLIT,$0
//DI = R3, CX = R4, SI = r10, r8 = r8, r10 = r2 , r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0)
MOVD z_len+8(FP), R3
MOVD x+24(FP), R8
@@ -214,10 +739,166 @@
RET
+TEXT ·subVW(SB),NOSPLIT,$0
+ MOVD subwvectorfacility+0x00(SB),R1
+ BR (R1)
+
+TEXT ·subVW_check(SB),NOSPLIT,$0
+ MOVB ·hasVX(SB), R1
+ CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
+ MOVD $subwvectorfacility+0x00(SB), R1
+ MOVD $·subVW_novec(SB), R2
+ MOVD R2, 0(R1)
+ //MOVD $·subVW_novec(SB), 0(R1)
+ BR ·subVW_novec(SB)
+vectorimpl:
+ MOVD $subwvectorfacility+0x00(SB), R1
+ MOVD $·subVW_vec(SB), R2
+ MOVD R2, 0(R1)
+ //MOVD $·subVW_vec(SB), 0(R1)
+ BR ·subVW_vec(SB)
+
+GLOBL subwvectorfacility+0x00(SB), NOPTR, $8
+DATA subwvectorfacility+0x00(SB)/8, $·subVW_check(SB)
+
+// func subVW(z, x []Word, y Word) (c Word)
+TEXT ·subVW_vec(SB),NOSPLIT,$0
+ MOVD z_len+8(FP), R3
+ MOVD x+24(FP), R8
+ MOVD y+48(FP), R4 // c = y
+ MOVD z+0(FP), R2
+
+ MOVD $0, R0 // make sure it's zero
+ MOVD $0, R10 // i = 0
+ MOVD R8, R5
+ MOVD R2, R7
+
+ // s/JL/JMP/ below to disable the unrolled loop
+ SUB $4, R3 // n -= 4
+ BLT v11 // if n < 0 goto v11
+ SUB $12, R3
+ BLT A11
+
+ VZERO V0
+ MOVD $1, R6 // prepare V0 to be final carry register
+ VLVGG $1, R6, V0 // borrow is initially "no borrow"
+ VZERO V9 // to ensure upper half is zero
+ VLVGG $1, R4, V9
+
+ // n >= 0
+ // regular loop body unrolled 16x
+
+
+UU1: VLM 0(R5), V1, V4 // 64-bytes into V1..V4
+ ADD $64, R5
+ VPDI $0x4,V1,V1,V1 // flip the doublewords to big-endian order
+ VPDI $0x4,V2,V2,V2 // flip the doublewords to big-endian order
+
+
+ VSBCBIQ V1, V9, V0, V25
+ VSBIQ V1, V9, V0, V17
+ VZERO V9
+ VSBCBIQ V2, V9, V25, V26
+ VSBIQ V2, V9, V25, V18
+
+ VLM 0(R5), V5, V6 // 32-bytes into V5..V6
+ ADD $32, R5
+
+ VPDI $0x4,V3,V3,V3 // flip the doublewords to big-endian order
+ VPDI $0x4,V4,V4,V4 // flip the doublewords to big-endian order
+
+
+ VSBCBIQ V3, V9, V26, V27
+ VSBIQ V3, V9, V26, V19
+ VSBCBIQ V4, V9, V27, V28
+ VSBIQ V4, V9, V27, V20
+
+ VLM 0(R5), V7, V8 // 32-bytes into V7..V8
+ ADD $32, R5
+
+ VPDI $0x4,V5,V5,V5 // flip the doublewords to big-endian order
+ VPDI $0x4,V6,V6,V6 // flip the doublewords to big-endian order
+
+ VSBCBIQ V5, V9, V28, V29
+ VSBIQ V5, V9, V28, V21
+ VSBCBIQ V6, V9, V29, V30
+ VSBIQ V6, V9, V29, V22
+
+ VPDI $0x4,V7,V7,V7 // flip the doublewords to big-endian order
+ VPDI $0x4,V8,V8,V8 // flip the doublewords to big-endian order
+
+ VSBCBIQ V7, V9, V30, V31
+ VSBIQ V7, V9, V30, V23
+ VSBCBIQ V8, V9, V31, V0 // V0 has carry-over
+ VSBIQ V8, V9, V31, V24
+
+ VPDI $0x4,V17,V17,V17 // flip the doublewords to big-endian order
+ VPDI $0x4,V18,V18,V18 // flip the doublewords to big-endian order
+ VPDI $0x4,V19,V19,V19 // flip the doublewords to big-endian order
+ VPDI $0x4,V20,V20,V20 // flip the doublewords to big-endian order
+ VPDI $0x4,V21,V21,V21 // flip the doublewords to big-endian order
+ VPDI $0x4,V22,V22,V22 // flip the doublewords to big-endian order
+ VPDI $0x4,V23,V23,V23 // flip the doublewords to big-endian order
+ VPDI $0x4,V24,V24,V24 // flip the doublewords to big-endian order
+ VSTM V17, V24, 0(R7) // 128-bytes into z
+ ADD $128, R7
+ ADD $128, R10 // i += 16
+ SUB $16, R3 // n -= 16
+ BGE UU1 // if n >= 0 goto U1
+ VLGVG $1, V0, R4 // put cf into R4 in case we branch to v10
+ SUB $1, R4 // save cf
+ NEG R4, R4
+A11: ADD $12, R3 // n += 16
+
+ BLT v11 // if n < 0 goto v11
+
+ // n >= 0
+ // regular loop body unrolled 4x
+
+U4: // n >= 0
+ // regular loop body unrolled 4x
+ MOVD 0(R8)(R10*1), R5
+ MOVD 8(R8)(R10*1), R6
+ MOVD 16(R8)(R10*1), R7
+ MOVD 24(R8)(R10*1), R1
+ SUBC R4, R5 //SLGR -> SUBC
+ SUBE R0, R6 //SLBGR -> SUBE
+ SUBE R0, R7
+ SUBE R0, R1
+ SUBE R4, R4 // save CF
+ NEG R4, R4
+ MOVD R5, 0(R2)(R10*1)
+ MOVD R6, 8(R2)(R10*1)
+ MOVD R7, 16(R2)(R10*1)
+ MOVD R1, 24(R2)(R10*1)
+
+ ADD $32, R10 // i += 4 -> i +=32
+ SUB $4, R3 // n -= 4
+ BGE U4 // if n >= 0 goto U4
+
+v11: ADD $4, R3 // n += 4
+ BLE E11 // if n <= 0 goto E4
+
+L4: // n > 0
+
+ MOVD 0(R8)(R10*1), R5
+ SUBC R4, R5
+ SUBE R4, R4 // save CF
+ NEG R4, R4
+ MOVD R5, 0(R2)(R10*1)
+
+ ADD $8, R10 // i++
+ SUB $1, R3 // n--
+ BGT L4 // if n > 0 goto L4
+
+E11: MOVD R4, c+56(FP) // return c
+
+ RET
+
//DI = R3, CX = R4, SI = r10, r8 = r8, r10 = r2 , r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0)
// func subVW(z, x []Word, y Word) (c Word)
// (same as addVW except for SUBC/SUBE instead of ADDC/ADDE and label names)
-TEXT ·subVW(SB),NOSPLIT,$0
+TEXT ·subVW_novec(SB),NOSPLIT,$0
MOVD z_len+8(FP), R3
MOVD x+24(FP), R8
MOVD y+48(FP), R4 // c = y
@@ -270,296 +951,299 @@
// func shlVU(z, x []Word, s uint) (c Word)
TEXT ·shlVU(SB),NOSPLIT,$0
- MOVD z_len+8(FP), R5
- SUB $1, R5 // n--
- BLT X8b // n < 0 (n <= 0)
+ MOVD z_len+8(FP), R5
+ MOVD $0, R0
+ SUB $1, R5 // n--
+ BLT X8b // n < 0 (n <= 0)
// n > 0
- MOVD s+48(FP), R4
- CMPBEQ R0, R4, Z80 //handle 0 case beq
- MOVD $64, R6
- CMPBEQ R6, R4, Z864 //handle 64 case beq
- MOVD z+0(FP), R2
- MOVD x+24(FP), R8
- SLD $3, R5 // n = n*8
- SUB R4, R6, R7
- MOVD (R8)(R5*1), R10 // w1 = x[i-1]
- SRD R7, R10, R3
- MOVD R3, c+56(FP)
+ MOVD s+48(FP), R4
+ CMPBEQ R0, R4, Z80 //handle 0 case beq
+ MOVD $64, R6
+ CMPBEQ R6, R4, Z864 //handle 64 case beq
+ MOVD z+0(FP), R2
+ MOVD x+24(FP), R8
+ SLD $3, R5 // n = n*8
+ SUB R4, R6, R7
+ MOVD (R8)(R5*1), R10 // w1 = x[i-1]
+ SRD R7, R10, R3
+ MOVD R3, c+56(FP)
- MOVD $0, R1 // i = 0
- BR E8
+ MOVD $0, R1 // i = 0
+ BR E8
// i < n-1
-L8: MOVD R10, R3 // w = w1
- MOVD -8(R8)(R5*1), R10 // w1 = x[i+1]
+L8: MOVD R10, R3 // w = w1
+ MOVD -8(R8)(R5*1), R10 // w1 = x[i+1]
- SLD R4, R3 // w<<s | w1>>ŝ
- SRD R7, R10, R6
- OR R6, R3
- MOVD R3, (R2)(R5*1) // z[i] = w<<s | w1>>ŝ
- SUB $8, R5 // i--
+ SLD R4, R3 // w<<s | w1>>ŝ
+ SRD R7, R10, R6
+ OR R6, R3
+ MOVD R3, (R2)(R5*1) // z[i] = w<<s | w1>>ŝ
+ SUB $8, R5 // i--
-E8: CMPBGT R5, R0, L8 // i < n-1
+E8: CMPBGT R5, R0, L8 // i < n-1
// i >= n-1
-X8a: SLD R4, R10 // w1<<s
- MOVD R10, (R2) // z[0] = w1<<s
+X8a: SLD R4, R10 // w1<<s
+ MOVD R10, (R2) // z[0] = w1<<s
RET
-X8b: MOVD R0, c+56(FP)
+X8b: MOVD R0, c+56(FP)
RET
-Z80: MOVD z+0(FP), R2
- MOVD x+24(FP), R8
- SLD $3, R5 // n = n*8
+Z80: MOVD z+0(FP), R2
+ MOVD x+24(FP), R8
+ SLD $3, R5 // n = n*8
- MOVD (R8), R10
- MOVD $0, R3
- MOVD R3, c+56(FP)
+ MOVD (R8), R10
+ MOVD $0, R3
+ MOVD R3, c+56(FP)
- MOVD $0, R1 // i = 0
- BR E8Z
+ MOVD $0, R1 // i = 0
+ BR E8Z
// i < n-1
-L8Z: MOVD R10, R3
- MOVD 8(R8)(R1*1), R10
+L8Z: MOVD R10, R3
+ MOVD 8(R8)(R1*1), R10
- MOVD R3, (R2)(R1*1)
- ADD $8, R1
+ MOVD R3, (R2)(R1*1)
+ ADD $8, R1
-E8Z: CMPBLT R1, R5, L8Z
+E8Z: CMPBLT R1, R5, L8Z
// i >= n-1
- MOVD R10, (R2)(R5*1)
+ MOVD R10, (R2)(R5*1)
RET
-Z864: MOVD z+0(FP), R2
- MOVD x+24(FP), R8
- SLD $3, R5 // n = n*8
- MOVD (R8)(R5*1), R3 // w1 = x[n-1]
- MOVD R3, c+56(FP) // z[i] = x[n-1]
+Z864: MOVD z+0(FP), R2
+ MOVD x+24(FP), R8
+ SLD $3, R5 // n = n*8
+ MOVD (R8)(R5*1), R3 // w1 = x[n-1]
+ MOVD R3, c+56(FP) // z[i] = x[n-1]
- BR E864
+ BR E864
// i < n-1
-L864: MOVD -8(R8)(R5*1), R3
+L864: MOVD -8(R8)(R5*1), R3
- MOVD R3, (R2)(R5*1) // z[i] = x[n-1]
- SUB $8, R5 // i--
+ MOVD R3, (R2)(R5*1) // z[i] = x[n-1]
+ SUB $8, R5 // i--
-E864: CMPBGT R5, R0, L864 // i < n-1
+E864: CMPBGT R5, R0, L864 // i < n-1
- MOVD R0, (R2) // z[n-1] = 0
+ MOVD R0, (R2) // z[n-1] = 0
RET
// CX = R4, r8 = r8, r10 = r2 , r11 = r5, DX = r3, AX = r10 , BX = R1 , 64-count = r7 (R0 set to 0) temp = R6
// func shrVU(z, x []Word, s uint) (c Word)
TEXT ·shrVU(SB),NOSPLIT,$0
- MOVD z_len+8(FP), R5
- SUB $1, R5 // n--
- BLT X9b // n < 0 (n <= 0)
+ MOVD z_len+8(FP), R5
+ MOVD $0, R0
+ SUB $1, R5 // n--
+ BLT X9b // n < 0 (n <= 0)
// n > 0
- MOVD s+48(FP), R4
- CMPBEQ R0, R4, ZB0 //handle 0 case beq
- MOVD $64, R6
- CMPBEQ R6, R4, ZB64 //handle 64 case beq
- MOVD z+0(FP), R2
- MOVD x+24(FP), R8
- SLD $3, R5 // n = n*8
- SUB R4, R6, R7
- MOVD (R8), R10 // w1 = x[0]
- SLD R7, R10, R3
- MOVD R3, c+56(FP)
+ MOVD s+48(FP), R4
+ CMPBEQ R0, R4, ZB0 //handle 0 case beq
+ MOVD $64, R6
+ CMPBEQ R6, R4, ZB64 //handle 64 case beq
+ MOVD z+0(FP), R2
+ MOVD x+24(FP), R8
+ SLD $3, R5 // n = n*8
+ SUB R4, R6, R7
+ MOVD (R8), R10 // w1 = x[0]
+ SLD R7, R10, R3
+ MOVD R3, c+56(FP)
- MOVD $0, R1 // i = 0
- BR E9
+ MOVD $0, R1 // i = 0
+ BR E9
// i < n-1
-L9: MOVD R10, R3 // w = w1
- MOVD 8(R8)(R1*1), R10 // w1 = x[i+1]
+L9: MOVD R10, R3 // w = w1
+ MOVD 8(R8)(R1*1), R10 // w1 = x[i+1]
- SRD R4, R3 // w>>s | w1<<s
- SLD R7, R10, R6
- OR R6, R3
- MOVD R3, (R2)(R1*1) // z[i] = w>>s | w1<<s
- ADD $8, R1 // i++
+ SRD R4, R3 // w>>s | w1<<s
+ SLD R7, R10, R6
+ OR R6, R3
+ MOVD R3, (R2)(R1*1) // z[i] = w>>s | w1<<s
+ ADD $8, R1 // i++
-E9: CMPBLT R1, R5, L9 // i < n-1
+E9: CMPBLT R1, R5, L9 // i < n-1
// i >= n-1
-X9a: SRD R4, R10 // w1>>s
- MOVD R10, (R2)(R5*1) // z[n-1] = w1>>s
+X9a: SRD R4, R10 // w1>>s
+ MOVD R10, (R2)(R5*1) // z[n-1] = w1>>s
RET
-X9b: MOVD R0, c+56(FP)
+X9b: MOVD R0, c+56(FP)
RET
-ZB0: MOVD z+0(FP), R2
- MOVD x+24(FP), R8
- SLD $3, R5 // n = n*8
+ZB0: MOVD z+0(FP), R2
+ MOVD x+24(FP), R8
+ SLD $3, R5 // n = n*8
- MOVD (R8), R10 // w1 = x[0]
- MOVD $0, R3 // R10 << 64
- MOVD R3, c+56(FP)
+ MOVD (R8), R10 // w1 = x[0]
+ MOVD $0, R3 // R10 << 64
+ MOVD R3, c+56(FP)
- MOVD $0, R1 // i = 0
- BR E9Z
+ MOVD $0, R1 // i = 0
+ BR E9Z
// i < n-1
-L9Z: MOVD R10, R3 // w = w1
- MOVD 8(R8)(R1*1), R10 // w1 = x[i+1]
+L9Z: MOVD R10, R3 // w = w1
+ MOVD 8(R8)(R1*1), R10 // w1 = x[i+1]
- MOVD R3, (R2)(R1*1) // z[i] = w>>s | w1<<s
- ADD $8, R1 // i++
+ MOVD R3, (R2)(R1*1) // z[i] = w>>s | w1<<s
+ ADD $8, R1 // i++
-E9Z: CMPBLT R1, R5, L9Z // i < n-1
+E9Z: CMPBLT R1, R5, L9Z // i < n-1
// i >= n-1
- MOVD R10, (R2)(R5*1) // z[n-1] = w1>>s
+ MOVD R10, (R2)(R5*1) // z[n-1] = w1>>s
RET
-ZB64: MOVD z+0(FP), R2
- MOVD x+24(FP), R8
- SLD $3, R5 // n = n*8
- MOVD (R8), R3 // w1 = x[0]
- MOVD R3, c+56(FP)
+ZB64: MOVD z+0(FP), R2
+ MOVD x+24(FP), R8
+ SLD $3, R5 // n = n*8
+ MOVD (R8), R3 // w1 = x[0]
+ MOVD R3, c+56(FP)
- MOVD $0, R1 // i = 0
- BR E964
+ MOVD $0, R1 // i = 0
+ BR E964
// i < n-1
-L964: MOVD 8(R8)(R1*1), R3 // w1 = x[i+1]
+L964: MOVD 8(R8)(R1*1), R3 // w1 = x[i+1]
- MOVD R3, (R2)(R1*1) // z[i] = w>>s | w1<<s
- ADD $8, R1 // i++
+ MOVD R3, (R2)(R1*1) // z[i] = w>>s | w1<<s
+ ADD $8, R1 // i++
-E964: CMPBLT R1, R5, L964 // i < n-1
+E964: CMPBLT R1, R5, L964 // i < n-1
// i >= n-1
- MOVD $0, R10 // w1>>s
- MOVD R10, (R2)(R5*1) // z[n-1] = w1>>s
+ MOVD $0, R10 // w1>>s
+ MOVD R10, (R2)(R5*1) // z[n-1] = w1>>s
RET
// CX = R4, r8 = r8, r9=r9, r10 = r2 , r11 = r5, DX = r3, AX = r6 , BX = R1 , (R0 set to 0) + use R11 + use R7 for i
// func mulAddVWW(z, x []Word, y, r Word) (c Word)
TEXT ·mulAddVWW(SB),NOSPLIT,$0
- MOVD z+0(FP), R2
- MOVD x+24(FP), R8
- MOVD y+48(FP), R9
- MOVD r+56(FP), R4 // c = r
- MOVD z_len+8(FP), R5
- MOVD $0, R1 // i = 0
- MOVD $0, R7 // i*8 = 0
- MOVD $0, R0 // make sure it's zero
- BR E5
+ MOVD z+0(FP), R2
+ MOVD x+24(FP), R8
+ MOVD y+48(FP), R9
+ MOVD r+56(FP), R4 // c = r
+ MOVD z_len+8(FP), R5
+ MOVD $0, R1 // i = 0
+ MOVD $0, R7 // i*8 = 0
+ MOVD $0, R0 // make sure it's zero
+ BR E5
-L5: MOVD (R8)(R1*1), R6
- MULHDU R9, R6
- ADDC R4, R11 //add to low order bits
- ADDE R0, R6
- MOVD R11, (R2)(R1*1)
- MOVD R6, R4
- ADD $8, R1 // i*8 + 8
- ADD $1, R7 // i++
+L5: MOVD (R8)(R1*1), R6
+ MULHDU R9, R6
+ ADDC R4, R11 //add to low order bits
+ ADDE R0, R6
+ MOVD R11, (R2)(R1*1)
+ MOVD R6, R4
+ ADD $8, R1 // i*8 + 8
+ ADD $1, R7 // i++
-E5: CMPBLT R7, R5, L5 // i < n
+E5: CMPBLT R7, R5, L5 // i < n
- MOVD R4, c+64(FP)
+ MOVD R4, c+64(FP)
RET
// func addMulVVW(z, x []Word, y Word) (c Word)
// CX = R4, r8 = r8, r9=r9, r10 = r2 , r11 = r5, AX = r11, DX = R6, r12=r12, BX = R1 , (R0 set to 0) + use R11 + use R7 for i
TEXT ·addMulVVW(SB),NOSPLIT,$0
- MOVD z+0(FP), R2
- MOVD x+24(FP), R8
- MOVD y+48(FP), R9
- MOVD z_len+8(FP), R5
+ MOVD z+0(FP), R2
+ MOVD x+24(FP), R8
+ MOVD y+48(FP), R9
+ MOVD z_len+8(FP), R5
- MOVD $0, R1 // i*8 = 0
- MOVD $0, R7 // i = 0
- MOVD $0, R0 // make sure it's zero
- MOVD $0, R4 // c = 0
+ MOVD $0, R1 // i*8 = 0
+ MOVD $0, R7 // i = 0
+ MOVD $0, R0 // make sure it's zero
+ MOVD $0, R4 // c = 0
- MOVD R5, R12
- AND $-2, R12
- CMPBGE R5, $2, A6
- BR E6
+ MOVD R5, R12
+ AND $-2, R12
+ CMPBGE R5, $2, A6
+ BR E6
-A6: MOVD (R8)(R1*1), R6
- MULHDU R9, R6
- MOVD (R2)(R1*1), R10
- ADDC R10, R11 //add to low order bits
- ADDE R0, R6
- ADDC R4, R11
- ADDE R0, R6
- MOVD R6, R4
- MOVD R11, (R2)(R1*1)
+A6: MOVD (R8)(R1*1), R6
+ MULHDU R9, R6
+ MOVD (R2)(R1*1), R10
+ ADDC R10, R11 //add to low order bits
+ ADDE R0, R6
+ ADDC R4, R11
+ ADDE R0, R6
+ MOVD R6, R4
+ MOVD R11, (R2)(R1*1)
- MOVD (8)(R8)(R1*1), R6
- MULHDU R9, R6
- MOVD (8)(R2)(R1*1), R10
- ADDC R10, R11 //add to low order bits
- ADDE R0, R6
- ADDC R4, R11
- ADDE R0, R6
- MOVD R6, R4
- MOVD R11, (8)(R2)(R1*1)
+ MOVD (8)(R8)(R1*1), R6
+ MULHDU R9, R6
+ MOVD (8)(R2)(R1*1), R10
+ ADDC R10, R11 //add to low order bits
+ ADDE R0, R6
+ ADDC R4, R11
+ ADDE R0, R6
+ MOVD R6, R4
+ MOVD R11, (8)(R2)(R1*1)
- ADD $16, R1 // i*8 + 8
- ADD $2, R7 // i++
+ ADD $16, R1 // i*8 + 8
+ ADD $2, R7 // i++
- CMPBLT R7, R12, A6
- BR E6
+ CMPBLT R7, R12, A6
+ BR E6
-L6: MOVD (R8)(R1*1), R6
- MULHDU R9, R6
- MOVD (R2)(R1*1), R10
- ADDC R10, R11 //add to low order bits
- ADDE R0, R6
- ADDC R4, R11
- ADDE R0, R6
- MOVD R6, R4
- MOVD R11, (R2)(R1*1)
+L6: MOVD (R8)(R1*1), R6
+ MULHDU R9, R6
+ MOVD (R2)(R1*1), R10
+ ADDC R10, R11 //add to low order bits
+ ADDE R0, R6
+ ADDC R4, R11
+ ADDE R0, R6
+ MOVD R6, R4
+ MOVD R11, (R2)(R1*1)
- ADD $8, R1 // i*8 + 8
- ADD $1, R7 // i++
+ ADD $8, R1 // i*8 + 8
+ ADD $1, R7 // i++
-E6: CMPBLT R7, R5, L6 // i < n
+E6: CMPBLT R7, R5, L6 // i < n
- MOVD R4, c+56(FP)
+ MOVD R4, c+56(FP)
RET
// func divWVW(z []Word, xn Word, x []Word, y Word) (r Word)
// CX = R4, r8 = r8, r9=r9, r10 = r2 , r11 = r5, AX = r11, DX = R6, r12=r12, BX = R1(*8) , (R0 set to 0) + use R11 + use R7 for i
TEXT ·divWVW(SB),NOSPLIT,$0
- MOVD z+0(FP), R2
- MOVD xn+24(FP), R10 // r = xn
- MOVD x+32(FP), R8
- MOVD y+56(FP), R9
- MOVD z_len+8(FP), R7 // i = z
- SLD $3, R7, R1 // i*8
- MOVD $0, R0 // make sure it's zero
- BR E7
+ MOVD z+0(FP), R2
+ MOVD xn+24(FP), R10 // r = xn
+ MOVD x+32(FP), R8
+ MOVD y+56(FP), R9
+ MOVD z_len+8(FP), R7 // i = z
+ SLD $3, R7, R1 // i*8
+ MOVD $0, R0 // make sure it's zero
+ BR E7
-L7: MOVD (R8)(R1*1), R11
- WORD $0xB98700A9 //DLGR R10,R9
- MOVD R11, (R2)(R1*1)
+L7: MOVD (R8)(R1*1), R11
+ WORD $0xB98700A9 //DLGR R10,R9
+ MOVD R11, (R2)(R1*1)
-E7: SUB $1, R7 // i--
- SUB $8, R1
- BGE L7 // i >= 0
+E7: SUB $1, R7 // i--
+ SUB $8, R1
+ BGE L7 // i >= 0
- MOVD R10, r+64(FP)
+ MOVD R10, r+64(FP)
RET
// func bitLen(x Word) (n int)
TEXT ·bitLen(SB),NOSPLIT,$0
- MOVD x+0(FP), R2
- WORD $0xb9830022 // FLOGR R2,R2
- MOVD $64, R3
- SUB R2, R3
- MOVD R3, n+8(FP)
+ MOVD x+0(FP), R2
+ FLOGR R2, R2 // clobbers R3
+ MOVD $64, R3
+ SUB R2, R3
+ MOVD R3, n+8(FP)
RET
+
diff --git a/src/math/big/arith_s390x_test.go b/src/math/big/arith_s390x_test.go
new file mode 100644
index 0000000..31a777e
--- /dev/null
+++ b/src/math/big/arith_s390x_test.go
@@ -0,0 +1,44 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build s390x !math_big_pure_go
+
+package big
+
+import (
+ "testing"
+)
+
+// Tests whether the non vector routines are working, even when the tests are run on a
+// vector-capable machine
+
+func TestFunVVnovec(t *testing.T) {
+ if hasVX == true {
+ for _, a := range sumVV {
+ arg := a
+ testFunVV(t, "addVV_novec", addVV_novec, arg)
+
+ arg = argVV{a.z, a.y, a.x, a.c}
+ testFunVV(t, "addVV_novec symmetric", addVV_novec, arg)
+
+ arg = argVV{a.x, a.z, a.y, a.c}
+ testFunVV(t, "subVV_novec", subVV_novec, arg)
+
+ arg = argVV{a.y, a.z, a.x, a.c}
+ testFunVV(t, "subVV_novec symmetric", subVV_novec, arg)
+ }
+ }
+}
+
+func TestFunVWnovec(t *testing.T) {
+ if hasVX == true {
+ for _, a := range sumVW {
+ arg := a
+ testFunVW(t, "addVW_novec", addVW_novec, arg)
+
+ arg = argVW{a.x, a.z, a.y, a.c}
+ testFunVW(t, "subVW_novec", subVW_novec, arg)
+ }
+ }
+}
diff --git a/src/math/big/arith_test.go b/src/math/big/arith_test.go
index 75862b4..f2b3083 100644
--- a/src/math/big/arith_test.go
+++ b/src/math/big/arith_test.go
@@ -6,10 +6,14 @@
import (
"fmt"
+ "internal/testenv"
"math/rand"
+ "strings"
"testing"
)
+var isRaceBuilder = strings.HasSuffix(testenv.Builder(), "-race")
+
type funWW func(x, y, c Word) (z1, z0 Word)
type argWW struct {
x, y, c, z1, z0 Word
@@ -123,6 +127,9 @@
func BenchmarkAddVV(b *testing.B) {
for _, n := range benchSizes {
+ if isRaceBuilder && n > 1e3 {
+ continue
+ }
x := rndV(n)
y := rndV(n)
z := make([]Word, n)
@@ -233,6 +240,9 @@
func BenchmarkAddVW(b *testing.B) {
for _, n := range benchSizes {
+ if isRaceBuilder && n > 1e3 {
+ continue
+ }
x := rndV(n)
y := rndW()
z := make([]Word, n)
@@ -371,6 +381,9 @@
func BenchmarkAddMulVVW(b *testing.B) {
for _, n := range benchSizes {
+ if isRaceBuilder && n > 1e3 {
+ continue
+ }
x := rndV(n)
y := rndW()
z := make([]Word, n)
diff --git a/src/math/big/decimal.go b/src/math/big/decimal.go
index 2c0c9da..2dfa032 100644
--- a/src/math/big/decimal.go
+++ b/src/math/big/decimal.go
@@ -125,11 +125,12 @@
// read a digit, write a digit
w := 0 // write index
+ mask := Word(1)<<s - 1
for r < len(x.mant) {
ch := Word(x.mant[r])
r++
d := n >> s
- n -= d << s
+ n &= mask // n -= d << s
x.mant[w] = byte(d + '0')
w++
n = n*10 + ch - '0'
@@ -138,7 +139,7 @@
// write extra digits that still fit
for n > 0 && w < len(x.mant) {
d := n >> s
- n -= d << s
+ n &= mask
x.mant[w] = byte(d + '0')
w++
n = n * 10
@@ -148,7 +149,7 @@
// append additional digits that didn't fit
for n > 0 {
d := n >> s
- n -= d << s
+ n &= mask
x.mant = append(x.mant, byte(d+'0'))
n = n * 10
}
diff --git a/src/math/big/decimal_test.go b/src/math/big/decimal_test.go
index 15bdb18..424811e 100644
--- a/src/math/big/decimal_test.go
+++ b/src/math/big/decimal_test.go
@@ -4,7 +4,10 @@
package big
-import "testing"
+import (
+ "fmt"
+ "testing"
+)
func TestDecimalString(t *testing.T) {
for _, test := range []struct {
@@ -105,12 +108,27 @@
}
}
+var sink string
+
func BenchmarkDecimalConversion(b *testing.B) {
for i := 0; i < b.N; i++ {
for shift := -100; shift <= +100; shift++ {
var d decimal
d.init(natOne, shift)
- d.String()
+ sink = d.String()
}
}
}
+
+func BenchmarkFloatString(b *testing.B) {
+ x := new(Float)
+ for _, prec := range []uint{1e2, 1e3, 1e4, 1e5} {
+ x.SetPrec(prec).SetRat(NewRat(1, 3))
+ b.Run(fmt.Sprintf("%v", prec), func(b *testing.B) {
+ b.ReportAllocs()
+ for i := 0; i < b.N; i++ {
+ sink = x.String()
+ }
+ })
+ }
+}
diff --git a/src/math/big/doc.go b/src/math/big/doc.go
index a3c2375..65ed019 100644
--- a/src/math/big/doc.go
+++ b/src/math/big/doc.go
@@ -31,7 +31,7 @@
var z1 Int
z1.SetUint64(123) // z1 := 123
- z2 := new(Rat).SetFloat64(1.2) // z2 := 6/5
+ z2 := new(Rat).SetFloat64(1.25) // z2 := 5/4
z3 := new(Float).SetInt(z1) // z3 := 123.0
Setters, numeric operations and predicates are represented as methods of
diff --git a/src/math/big/example_test.go b/src/math/big/example_test.go
index ac79552..cfc7735 100644
--- a/src/math/big/example_test.go
+++ b/src/math/big/example_test.go
@@ -51,6 +51,19 @@
// Output: 18446744073709551617
}
+func ExampleFloat_Scan() {
+ // The Scan function is rarely used directly;
+ // the fmt package recognizes it as an implementation of fmt.Scanner.
+ f := new(big.Float)
+ _, err := fmt.Sscan("1.19282e99", f)
+ if err != nil {
+ log.Println("error scanning value:", err)
+ } else {
+ fmt.Println(f)
+ }
+ // Output: 1.19282e+99
+}
+
// This example demonstrates how to use big.Int to compute the smallest
// Fibonacci number with 100 decimal digits and to test whether it is prime.
func Example_fibonacci() {
diff --git a/src/math/big/float.go b/src/math/big/float.go
index 7a9c2b3..aabd7b4 100644
--- a/src/math/big/float.go
+++ b/src/math/big/float.go
@@ -1210,20 +1210,30 @@
ex := int64(x.exp) - int64(len(x.mant))*_W
ey := int64(y.exp) - int64(len(y.mant))*_W
+ al := alias(z.mant, x.mant) || alias(z.mant, y.mant)
+
// TODO(gri) having a combined add-and-shift primitive
// could make this code significantly faster
switch {
case ex < ey:
- // cannot re-use z.mant w/o testing for aliasing
- t := nat(nil).shl(y.mant, uint(ey-ex))
- z.mant = z.mant.add(x.mant, t)
+ if al {
+ t := nat(nil).shl(y.mant, uint(ey-ex))
+ z.mant = z.mant.add(x.mant, t)
+ } else {
+ z.mant = z.mant.shl(y.mant, uint(ey-ex))
+ z.mant = z.mant.add(x.mant, z.mant)
+ }
default:
// ex == ey, no shift needed
z.mant = z.mant.add(x.mant, y.mant)
case ex > ey:
- // cannot re-use z.mant w/o testing for aliasing
- t := nat(nil).shl(x.mant, uint(ex-ey))
- z.mant = z.mant.add(t, y.mant)
+ if al {
+ t := nat(nil).shl(x.mant, uint(ex-ey))
+ z.mant = z.mant.add(t, y.mant)
+ } else {
+ z.mant = z.mant.shl(x.mant, uint(ex-ey))
+ z.mant = z.mant.add(z.mant, y.mant)
+ }
ex = ey
}
// len(z.mant) > 0
@@ -1247,18 +1257,28 @@
ex := int64(x.exp) - int64(len(x.mant))*_W
ey := int64(y.exp) - int64(len(y.mant))*_W
+ al := alias(z.mant, x.mant) || alias(z.mant, y.mant)
+
switch {
case ex < ey:
- // cannot re-use z.mant w/o testing for aliasing
- t := nat(nil).shl(y.mant, uint(ey-ex))
- z.mant = t.sub(x.mant, t)
+ if al {
+ t := nat(nil).shl(y.mant, uint(ey-ex))
+ z.mant = t.sub(x.mant, t)
+ } else {
+ z.mant = z.mant.shl(y.mant, uint(ey-ex))
+ z.mant = z.mant.sub(x.mant, z.mant)
+ }
default:
// ex == ey, no shift needed
z.mant = z.mant.sub(x.mant, y.mant)
case ex > ey:
- // cannot re-use z.mant w/o testing for aliasing
- t := nat(nil).shl(x.mant, uint(ex-ey))
- z.mant = t.sub(t, y.mant)
+ if al {
+ t := nat(nil).shl(x.mant, uint(ex-ey))
+ z.mant = t.sub(t, y.mant)
+ } else {
+ z.mant = z.mant.shl(x.mant, uint(ex-ey))
+ z.mant = z.mant.sub(z.mant, y.mant)
+ }
ex = ey
}
diff --git a/src/math/big/float_test.go b/src/math/big/float_test.go
index 464619b..7d4bd31 100644
--- a/src/math/big/float_test.go
+++ b/src/math/big/float_test.go
@@ -5,6 +5,7 @@
package big
import (
+ "flag"
"fmt"
"math"
"strconv"
@@ -1495,12 +1496,14 @@
}
}
+var long = flag.Bool("long", false, "run very long tests")
+
// TestFloatQuoSmoke tests all divisions x/y for values x, y in the range [-n, +n];
// it serves as a smoke test for basic correctness of division.
func TestFloatQuoSmoke(t *testing.T) {
- n := 1000
- if testing.Short() {
- n = 10
+ n := 10
+ if *long {
+ n = 1000
}
const dprec = 3 // max. precision variation
@@ -1762,3 +1765,41 @@
}
}
}
+
+func BenchmarkFloatAdd(b *testing.B) {
+ x := new(Float)
+ y := new(Float)
+ z := new(Float)
+
+ for _, prec := range []uint{10, 1e2, 1e3, 1e4, 1e5} {
+ x.SetPrec(prec).SetRat(NewRat(1, 3))
+ y.SetPrec(prec).SetRat(NewRat(1, 6))
+ z.SetPrec(prec)
+
+ b.Run(fmt.Sprintf("%v", prec), func(b *testing.B) {
+ b.ReportAllocs()
+ for i := 0; i < b.N; i++ {
+ z.Add(x, y)
+ }
+ })
+ }
+}
+
+func BenchmarkFloatSub(b *testing.B) {
+ x := new(Float)
+ y := new(Float)
+ z := new(Float)
+
+ for _, prec := range []uint{10, 1e2, 1e3, 1e4, 1e5} {
+ x.SetPrec(prec).SetRat(NewRat(1, 3))
+ y.SetPrec(prec).SetRat(NewRat(1, 6))
+ z.SetPrec(prec)
+
+ b.Run(fmt.Sprintf("%v", prec), func(b *testing.B) {
+ b.ReportAllocs()
+ for i := 0; i < b.N; i++ {
+ z.Sub(x, y)
+ }
+ })
+ }
+}
diff --git a/src/math/big/floatconv.go b/src/math/big/floatconv.go
index a884df6..95d1bf8 100644
--- a/src/math/big/floatconv.go
+++ b/src/math/big/floatconv.go
@@ -12,9 +12,13 @@
"strings"
)
+var floatZero Float
+
// SetString sets z to the value of s and returns z and a boolean indicating
// success. s must be a floating-point number of the same format as accepted
-// by Parse, with base argument 0.
+// by Parse, with base argument 0. The entire string (not just a prefix) must
+// be valid for success. If the operation failed, the value of z is undefined
+// but the returned value is nil.
func (z *Float) SetString(s string) (*Float, bool) {
if f, _, err := z.Parse(s, 0); err == nil {
return f, true
@@ -212,17 +216,18 @@
//
// It sets z to the (possibly rounded) value of the corresponding floating-
// point value, and returns z, the actual base b, and an error err, if any.
+// The entire string (not just a prefix) must be consumed for success.
// If z's precision is 0, it is changed to 64 before rounding takes effect.
// The number must be of the form:
//
// number = [ sign ] [ prefix ] mantissa [ exponent ] | infinity .
// sign = "+" | "-" .
-// prefix = "0" ( "x" | "X" | "b" | "B" ) .
+// prefix = "0" ( "x" | "X" | "b" | "B" ) .
// mantissa = digits | digits "." [ digits ] | "." digits .
// exponent = ( "E" | "e" | "p" ) [ sign ] digits .
// digits = digit { digit } .
// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" .
-// infinity = [ sign ] ( "inf" | "Inf" ) .
+// infinity = [ sign ] ( "inf" | "Inf" ) .
//
// The base argument must be 0, 2, 10, or 16. Providing an invalid base
// argument will lead to a run-time panic.
@@ -273,3 +278,16 @@
func ParseFloat(s string, base int, prec uint, mode RoundingMode) (f *Float, b int, err error) {
return new(Float).SetPrec(prec).SetMode(mode).Parse(s, base)
}
+
+var _ fmt.Scanner = &floatZero // *Float must implement fmt.Scanner
+
+// Scan is a support routine for fmt.Scanner; it sets z to the value of
+// the scanned number. It accepts formats whose verbs are supported by
+// fmt.Scan for floating point values, which are:
+// 'b' (binary), 'e', 'E', 'f', 'F', 'g' and 'G'.
+// Scan doesn't handle ±Inf.
+func (z *Float) Scan(s fmt.ScanState, ch rune) error {
+ s.SkipSpace()
+ _, _, err := z.scan(byteReader{s}, 0)
+ return err
+}
diff --git a/src/math/big/floatconv_test.go b/src/math/big/floatconv_test.go
index b2a1ab0..edcb2eb 100644
--- a/src/math/big/floatconv_test.go
+++ b/src/math/big/floatconv_test.go
@@ -5,6 +5,7 @@
package big
import (
+ "bytes"
"fmt"
"math"
"strconv"
@@ -665,3 +666,54 @@
}
}
}
+
+func TestFloatScan(t *testing.T) {
+ var floatScanTests = []struct {
+ input string
+ format string
+ output string
+ remaining int
+ wantErr bool
+ }{
+ 0: {"10.0", "%f", "10", 0, false},
+ 1: {"23.98+2.0", "%v", "23.98", 4, false},
+ 2: {"-1+1", "%v", "-1", 2, false},
+ 3: {" 00000", "%v", "0", 0, false},
+ 4: {"-123456p-78", "%b", "-4.084816388e-19", 0, false},
+ 5: {"+123", "%b", "123", 0, false},
+ 6: {"-1.234e+56", "%e", "-1.234e+56", 0, false},
+ 7: {"-1.234E-56", "%E", "-1.234e-56", 0, false},
+ 8: {"-1.234e+567", "%g", "-1.234e+567", 0, false},
+ 9: {"+1234567891011.234", "%G", "1.234567891e+12", 0, false},
+
+ // Scan doesn't handle ±Inf.
+ 10: {"Inf", "%v", "", 3, true},
+ 11: {"-Inf", "%v", "", 3, true},
+ 12: {"-Inf", "%v", "", 3, true},
+ }
+
+ var buf bytes.Buffer
+ for i, test := range floatScanTests {
+ x := new(Float)
+ buf.Reset()
+ buf.WriteString(test.input)
+ _, err := fmt.Fscanf(&buf, test.format, x)
+ if test.wantErr {
+ if err == nil {
+ t.Errorf("#%d want non-nil err", i)
+ }
+ continue
+ }
+
+ if err != nil {
+ t.Errorf("#%d error: %s", i, err)
+ }
+
+ if x.String() != test.output {
+ t.Errorf("#%d got %s; want %s", i, x.String(), test.output)
+ }
+ if buf.Len() != test.remaining {
+ t.Errorf("#%d got %d bytes remaining; want %d", i, buf.Len(), test.remaining)
+ }
+ }
+}
diff --git a/src/math/big/floatexample_test.go b/src/math/big/floatexample_test.go
index fb799d5..0c6668c 100644
--- a/src/math/big/floatexample_test.go
+++ b/src/math/big/floatexample_test.go
@@ -11,7 +11,7 @@
)
func ExampleFloat_Add() {
- // Operating on numbers of different precision.
+ // Operate on numbers of different precision.
var x, y, z big.Float
x.SetInt64(1000) // x is automatically set to 64bit precision
y.SetFloat64(2.718281828) // y is automatically set to 53bit precision
@@ -26,8 +26,8 @@
// z = 1002.718282 (0x.faadf854p+10, prec = 32, acc = Below)
}
-func Example_Shift() {
- // Implementing Float "shift" by modifying the (binary) exponents directly.
+func ExampleFloat_shift() {
+ // Implement Float "shift" by modifying the (binary) exponents directly.
for s := -5; s <= 5; s++ {
x := big.NewFloat(0.5)
x.SetMantExp(x, x.MantExp(nil)+s) // shift x by s
diff --git a/src/math/big/floatmarsh.go b/src/math/big/floatmarsh.go
index 3725d4b..d1c1dab 100644
--- a/src/math/big/floatmarsh.go
+++ b/src/math/big/floatmarsh.go
@@ -16,7 +16,7 @@
// GobEncode implements the gob.GobEncoder interface.
// The Float value and all its attributes (precision,
-// rounding mode, accuracy) are marshalled.
+// rounding mode, accuracy) are marshaled.
func (x *Float) GobEncode() ([]byte, error) {
if x == nil {
return nil, nil
diff --git a/src/math/big/ftoa.go b/src/math/big/ftoa.go
index 57b16e1..d2a8588 100644
--- a/src/math/big/ftoa.go
+++ b/src/math/big/ftoa.go
@@ -376,6 +376,8 @@
return y
}
+var _ fmt.Formatter = &floatZero // *Float must implement fmt.Formatter
+
// Format implements fmt.Formatter. It accepts all the regular
// formats for floating-point numbers ('b', 'e', 'E', 'f', 'F',
// 'g', 'G') as well as 'p' and 'v'. See (*Float).Text for the
diff --git a/src/math/big/gcd_test.go b/src/math/big/gcd_test.go
index a929bf5..3cca2ec 100644
--- a/src/math/big/gcd_test.go
+++ b/src/math/big/gcd_test.go
@@ -20,6 +20,9 @@
}
func runGCD(b *testing.B, aSize, bSize uint) {
+ if isRaceBuilder && (aSize > 1000 || bSize > 1000) {
+ b.Skip("skipping on race builder")
+ }
b.Run("WithoutXY", func(b *testing.B) {
runGCDExt(b, aSize, bSize, false)
})
diff --git a/src/math/big/int.go b/src/math/big/int.go
index f2a75d1..1d8dabc 100644
--- a/src/math/big/int.go
+++ b/src/math/big/int.go
@@ -361,7 +361,8 @@
}
// SetString sets z to the value of s, interpreted in the given base,
-// and returns z and a boolean indicating success. If SetString fails,
+// and returns z and a boolean indicating success. The entire string
+// (not just a prefix) must be valid for success. If SetString fails,
// the value of z is undefined but the returned value is nil.
//
// The base argument must be 0 or a value between 2 and MaxBase. If the base
@@ -371,12 +372,11 @@
//
func (z *Int) SetString(s string, base int) (*Int, bool) {
r := strings.NewReader(s)
- _, _, err := z.scan(r, base)
- if err != nil {
+ if _, _, err := z.scan(r, base); err != nil {
return nil, false
}
- _, err = r.ReadByte()
- if err != io.EOF {
+ // entire string must have been consumed
+ if _, err := r.ReadByte(); err != io.EOF {
return nil, false
}
return z, true // err == io.EOF => scan consumed all of s
@@ -404,8 +404,11 @@
// Exp sets z = x**y mod |m| (i.e. the sign of m is ignored), and returns z.
// If y <= 0, the result is 1 mod |m|; if m == nil or m == 0, z = x**y.
-// See Knuth, volume 2, section 4.6.3.
+//
+// Modular exponentation of inputs of a particular size is not a
+// cryptographically constant-time operation.
func (z *Int) Exp(x, y, m *Int) *Int {
+ // See Knuth, volume 2, section 4.6.3.
var yWords nat
if !y.neg {
yWords = y.abs
@@ -550,19 +553,6 @@
return z.Lsh(u, k)
}
-// ProbablyPrime performs n Miller-Rabin tests to check whether x is prime.
-// If x is prime, it returns true.
-// If x is not prime, it returns false with probability at least 1 - ¼ⁿ.
-//
-// It is not suitable for judging primes that an adversary may have crafted
-// to fool this test.
-func (x *Int) ProbablyPrime(n int) bool {
- if n <= 0 {
- panic("non-positive n for ProbablyPrime")
- }
- return !x.neg && x.abs.probablyPrime(n)
-}
-
// Rand sets z to a pseudo-random number in [0, n) and returns z.
func (z *Int) Rand(rnd *rand.Rand, n *Int) *Int {
z.neg = false
@@ -577,6 +567,11 @@
// ModInverse sets z to the multiplicative inverse of g in the ring ℤ/nℤ
// and returns z. If g and n are not relatively prime, the result is undefined.
func (z *Int) ModInverse(g, n *Int) *Int {
+ if g.neg {
+ // GCD expects parameters a and b to be > 0.
+ var g2 Int
+ g = g2.Mod(g, n)
+ }
var d Int
d.GCD(z, nil, g, n)
// x and y are such that g*x + n*y = d. Since g and n are
@@ -932,3 +927,14 @@
z.neg = true // z cannot be zero if x is positive
return z
}
+
+// Sqrt sets z to ⌊√x⌋, the largest integer such that z² ≤ x, and returns z.
+// It panics if x is negative.
+func (z *Int) Sqrt(x *Int) *Int {
+ if x.neg {
+ panic("square root of negative number")
+ }
+ z.neg = false
+ z.abs = z.abs.sqrt(x.abs)
+ return z
+}
diff --git a/src/math/big/int_test.go b/src/math/big/int_test.go
index 45a3765..b8e0778 100644
--- a/src/math/big/int_test.go
+++ b/src/math/big/int_test.go
@@ -9,6 +9,7 @@
"encoding/hex"
"fmt"
"math/rand"
+ "strings"
"testing"
"testing/quick"
)
@@ -478,6 +479,18 @@
}
}
+func BenchmarkQuoRem(b *testing.B) {
+ x, _ := new(Int).SetString("153980389784927331788354528594524332344709972855165340650588877572729725338415474372475094155672066328274535240275856844648695200875763869073572078279316458648124537905600131008790701752441155668003033945258023841165089852359980273279085783159654751552359397986180318708491098942831252291841441726305535546071", 0)
+ y, _ := new(Int).SetString("7746362281539803897849273317883545285945243323447099728551653406505888775727297253384154743724750941556720663282745352402758568446486952008757638690735720782793164586481245379056001310087907017524411556680030339452580238411650898523599802732790857831596547515523593979861803187084910989428312522918414417263055355460715745539358014631136245887418412633787074173796862711588221766398229333338511838891484974940633857861775630560092874987828057333663969469797013996401149696897591265769095952887917296740109742927689053276850469671231961384715398038978492733178835452859452433234470997285516534065058887757272972533841547437247509415567206632827453524027585684464869520087576386907357207827931645864812453790560013100879070175244115566800303394525802384116508985235998027327908578315965475155235939798618031870849109894283125229184144172630553554607112725169432413343763989564437170644270643461665184965150423819594083121075825", 0)
+ q := new(Int)
+ r := new(Int)
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ q.QuoRem(y, x, r)
+ }
+}
+
var bitLenTests = []struct {
in string
out int
@@ -572,6 +585,19 @@
{"0xffffffffffffffff00000001", "0xffffffffffffffff00000001", "0xffffffffffffffff00000001", "0"},
{"0xffffffffffffffffffffffff00000001", "0xffffffffffffffffffffffff00000001", "0xffffffffffffffffffffffff00000001", "0"},
{"0xffffffffffffffffffffffffffffffff00000001", "0xffffffffffffffffffffffffffffffff00000001", "0xffffffffffffffffffffffffffffffff00000001", "0"},
+
+ {
+ "2",
+ "0xB08FFB20760FFED58FADA86DFEF71AD72AA0FA763219618FE022C197E54708BB1191C66470250FCE8879487507CEE41381CA4D932F81C2B3F1AB20B539D50DCD",
+ "0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF73", // odd
+ "0x6AADD3E3E424D5B713FCAA8D8945B1E055166132038C57BBD2D51C833F0C5EA2007A2324CE514F8E8C2F008A2F36F44005A4039CB55830986F734C93DAF0EB4BAB54A6A8C7081864F44346E9BC6F0A3EB9F2C0146A00C6A05187D0C101E1F2D038CDB70CB5E9E05A2D188AB6CBB46286624D4415E7D4DBFAD3BCC6009D915C406EED38F468B940F41E6BEDC0430DD78E6F19A7DA3A27498A4181E24D738B0072D8F6ADB8C9809A5B033A09785814FD9919F6EF9F83EEA519BEC593855C4C10CBEEC582D4AE0792158823B0275E6AEC35242740468FAF3D5C60FD1E376362B6322F78B7ED0CA1C5BBCD2B49734A56C0967A1D01A100932C837B91D592CE08ABFF",
+ },
+ {
+ "2",
+ "0xB08FFB20760FFED58FADA86DFEF71AD72AA0FA763219618FE022C197E54708BB1191C66470250FCE8879487507CEE41381CA4D932F81C2B3F1AB20B539D50DCD",
+ "0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF72", // even
+ "0x7858794B5897C29F4ED0B40913416AB6C48588484E6A45F2ED3E26C941D878E923575AAC434EE2750E6439A6976F9BB4D64CEDB2A53CE8D04DD48CADCDF8E46F22747C6B81C6CEA86C0D873FBF7CEF262BAAC43A522BD7F32F3CDAC52B9337C77B3DCFB3DB3EDD80476331E82F4B1DF8EFDC1220C92656DFC9197BDC1877804E28D928A2A284B8DED506CBA304435C9D0133C246C98A7D890D1DE60CBC53A024361DA83A9B8775019083D22AC6820ED7C3C68F8E801DD4EC779EE0A05C6EB682EF9840D285B838369BA7E148FA27691D524FAEAF7C6ECE2A4B99A294B9F2C241857B5B90CC8BFFCFCF18DFA7D676131D5CD3855A5A3E8EBFA0CDFADB4D198B4A",
+ },
}
func TestExp(t *testing.T) {
@@ -614,6 +640,26 @@
}
}
+func BenchmarkExp(b *testing.B) {
+ x, _ := new(Int).SetString("11001289118363089646017359372117963499250546375269047542777928006103246876688756735760905680604646624353196869572752623285140408755420374049317646428185270079555372763503115646054602867593662923894140940837479507194934267532831694565516466765025434902348314525627418515646588160955862839022051353653052947073136084780742729727874803457643848197499548297570026926927502505634297079527299004267769780768565695459945235586892627059178884998772989397505061206395455591503771677500931269477503508150175717121828518985901959919560700853226255420793148986854391552859459511723547532575574664944815966793196961286234040892865", 0)
+ y, _ := new(Int).SetString("0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF72", 0)
+ n, _ := new(Int).SetString("0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF73", 0)
+ out := new(Int)
+ for i := 0; i < b.N; i++ {
+ out.Exp(x, y, n)
+ }
+}
+
+func BenchmarkExp2(b *testing.B) {
+ x, _ := new(Int).SetString("2", 0)
+ y, _ := new(Int).SetString("0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF72", 0)
+ n, _ := new(Int).SetString("0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF73", 0)
+ out := new(Int)
+ for i := 0; i < b.N; i++ {
+ out.Exp(x, y, n)
+ }
+}
+
func checkGcd(aBytes, bBytes []byte) bool {
x := new(Int)
y := new(Int)
@@ -715,85 +761,6 @@
}
}
-var primes = []string{
- "2",
- "3",
- "5",
- "7",
- "11",
-
- "13756265695458089029",
- "13496181268022124907",
- "10953742525620032441",
- "17908251027575790097",
-
- // https://golang.org/issue/638
- "18699199384836356663",
-
- "98920366548084643601728869055592650835572950932266967461790948584315647051443",
- "94560208308847015747498523884063394671606671904944666360068158221458669711639",
-
- // http://primes.utm.edu/lists/small/small3.html
- "449417999055441493994709297093108513015373787049558499205492347871729927573118262811508386655998299074566974373711472560655026288668094291699357843464363003144674940345912431129144354948751003607115263071543163",
- "230975859993204150666423538988557839555560243929065415434980904258310530753006723857139742334640122533598517597674807096648905501653461687601339782814316124971547968912893214002992086353183070342498989426570593",
- "5521712099665906221540423207019333379125265462121169655563495403888449493493629943498064604536961775110765377745550377067893607246020694972959780839151452457728855382113555867743022746090187341871655890805971735385789993",
- "203956878356401977405765866929034577280193993314348263094772646453283062722701277632936616063144088173312372882677123879538709400158306567338328279154499698366071906766440037074217117805690872792848149112022286332144876183376326512083574821647933992961249917319836219304274280243803104015000563790123",
-
- // ECC primes: http://tools.ietf.org/html/draft-ladd-safecurves-02
- "3618502788666131106986593281521497120414687020801267626233049500247285301239", // Curve1174: 2^251-9
- "57896044618658097711785492504343953926634992332820282019728792003956564819949", // Curve25519: 2^255-19
- "9850501549098619803069760025035903451269934817616361666987073351061430442874302652853566563721228910201656997576599", // E-382: 2^382-105
- "42307582002575910332922579714097346549017899709713998034217522897561970639123926132812109468141778230245837569601494931472367", // Curve41417: 2^414-17
- "6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151", // E-521: 2^521-1
-}
-
-var composites = []string{
- "0",
- "1",
- "21284175091214687912771199898307297748211672914763848041968395774954376176754",
- "6084766654921918907427900243509372380954290099172559290432744450051395395951",
- "84594350493221918389213352992032324280367711247940675652888030554255915464401",
- "82793403787388584738507275144194252681",
-}
-
-func TestProbablyPrime(t *testing.T) {
- nreps := 20
- if testing.Short() {
- nreps = 1
- }
- for i, s := range primes {
- p, _ := new(Int).SetString(s, 10)
- if !p.ProbablyPrime(nreps) {
- t.Errorf("#%d prime found to be non-prime (%s)", i, s)
- }
- }
-
- for i, s := range composites {
- c, _ := new(Int).SetString(s, 10)
- if c.ProbablyPrime(nreps) {
- t.Errorf("#%d composite found to be prime (%s)", i, s)
- }
- if testing.Short() {
- break
- }
- }
-
- // check that ProbablyPrime panics if n <= 0
- c := NewInt(11) // a prime
- for _, n := range []int{-1, 0, 1} {
- func() {
- defer func() {
- if n <= 0 && recover() == nil {
- t.Fatalf("expected panic from ProbablyPrime(%d)", n)
- }
- }()
- if !c.ProbablyPrime(n) {
- t.Fatalf("%v should be a prime", c)
- }
- }()
- }
-}
-
type intShiftTest struct {
in string
shift uint
@@ -1229,6 +1196,9 @@
}
func BenchmarkModSqrt5430_Tonelli(b *testing.B) {
+ if isRaceBuilder {
+ b.Skip("skipping on race builder")
+ }
p := tri(5430)
x := new(Int).SetUint64(2)
for i := 0; i < b.N; i++ {
@@ -1238,6 +1208,9 @@
}
func BenchmarkModSqrt5430_3Mod4(b *testing.B) {
+ if isRaceBuilder {
+ b.Skip("skipping on race builder")
+ }
p := tri(5430)
x := new(Int).SetUint64(2)
for i := 0; i < b.N; i++ {
@@ -1303,6 +1276,7 @@
}{
{"1234567", "458948883992"},
{"239487239847", "2410312426921032588552076022197566074856950548502459942654116941958108831682612228890093858261341614673227141477904012196503648957050582631942730706805009223062734745341073406696246014589361659774041027169249453200378729434170325843778659198143763193776859869524088940195577346119843545301547043747207749969763750084308926339295559968882457872412993810129130294592999947926365264059284647209730384947211681434464714438488520940127459844288859336526896320919633919"},
+ {"-10", "13"}, // issue #16984
}
func TestModInverse(t *testing.T) {
@@ -1480,3 +1454,44 @@
n := NewInt(10)
n.Rand(rand.New(rand.NewSource(9)), n)
}
+
+func TestSqrt(t *testing.T) {
+ root := 0
+ r := new(Int)
+ for i := 0; i < 10000; i++ {
+ if (root+1)*(root+1) <= i {
+ root++
+ }
+ n := NewInt(int64(i))
+ r.SetInt64(-2)
+ r.Sqrt(n)
+ if r.Cmp(NewInt(int64(root))) != 0 {
+ t.Errorf("Sqrt(%v) = %v, want %v", n, r, root)
+ }
+ }
+
+ for i := 0; i < 1000; i += 10 {
+ n, _ := new(Int).SetString("1"+strings.Repeat("0", i), 10)
+ r := new(Int).Sqrt(n)
+ root, _ := new(Int).SetString("1"+strings.Repeat("0", i/2), 10)
+ if r.Cmp(root) != 0 {
+ t.Errorf("Sqrt(1e%d) = %v, want 1e%d", i, r, i/2)
+ }
+ }
+
+ // Test aliasing.
+ r.SetInt64(100)
+ r.Sqrt(r)
+ if r.Int64() != 10 {
+ t.Errorf("Sqrt(100) = %v, want 10 (aliased output)", r.Int64())
+ }
+}
+
+func BenchmarkSqrt(b *testing.B) {
+ n, _ := new(Int).SetString("1"+strings.Repeat("0", 1001), 10)
+ b.ResetTimer()
+ t := new(Int)
+ for i := 0; i < b.N; i++ {
+ t.Sqrt(n)
+ }
+}
diff --git a/src/math/big/intconv.go b/src/math/big/intconv.go
index daf674a..91a62ce 100644
--- a/src/math/big/intconv.go
+++ b/src/math/big/intconv.go
@@ -52,6 +52,8 @@
}
}
+var _ fmt.Formatter = intOne // *Int must implement fmt.Formatter
+
// Format implements fmt.Formatter. It accepts the formats
// 'b' (binary), 'o' (octal), 'd' (decimal), 'x' (lowercase
// hexadecimal), and 'X' (uppercase hexadecimal).
@@ -223,6 +225,8 @@
return r.UnreadRune()
}
+var _ fmt.Scanner = intOne // *Int must implement fmt.Scanner
+
// Scan is a support routine for fmt.Scanner; it sets z to the value of
// the scanned number. It accepts the formats 'b' (binary), 'o' (octal),
// 'd' (decimal), 'x' (lowercase hexadecimal), and 'X' (uppercase hexadecimal).
diff --git a/src/math/big/intmarsh.go b/src/math/big/intmarsh.go
index 4ff57b6..ee1e414 100644
--- a/src/math/big/intmarsh.go
+++ b/src/math/big/intmarsh.go
@@ -59,7 +59,7 @@
return nil
}
-// The JSON marshallers are only here for API backward compatibility
+// The JSON marshalers are only here for API backward compatibility
// (programs that explicitly look for these two methods). JSON works
// fine with the TextMarshaler only.
@@ -70,5 +70,9 @@
// UnmarshalJSON implements the json.Unmarshaler interface.
func (z *Int) UnmarshalJSON(text []byte) error {
+ // Ignore null, like in the main JSON package.
+ if string(text) == "null" {
+ return nil
+ }
return z.UnmarshalText(text)
}
diff --git a/src/math/big/nat.go b/src/math/big/nat.go
index 2e65d2a..9b1a626 100644
--- a/src/math/big/nat.go
+++ b/src/math/big/nat.go
@@ -542,16 +542,21 @@
return
}
-// getNat returns a nat of len n. The contents may not be zero.
-func getNat(n int) nat {
- var z nat
+// getNat returns a *nat of len n. The contents may not be zero.
+// The pool holds *nat to avoid allocation when converting to interface{}.
+func getNat(n int) *nat {
+ var z *nat
if v := natPool.Get(); v != nil {
- z = v.(nat)
+ z = v.(*nat)
}
- return z.make(n)
+ if z == nil {
+ z = new(nat)
+ }
+ *z = z.make(n)
+ return z
}
-func putNat(x nat) {
+func putNat(x *nat) {
natPool.Put(x)
}
@@ -575,7 +580,8 @@
}
q = z.make(m + 1)
- qhatv := getNat(n + 1)
+ qhatvp := getNat(n + 1)
+ qhatv := *qhatvp
if alias(u, uIn) || alias(u, v) {
u = nil // u is an alias for uIn or v - cannot reuse
}
@@ -583,36 +589,40 @@
u.clear() // TODO(gri) no need to clear if we allocated a new u
// D1.
- var v1 nat
+ var v1p *nat
shift := nlz(v[n-1])
if shift > 0 {
// do not modify v, it may be used by another goroutine simultaneously
- v1 = getNat(n)
+ v1p = getNat(n)
+ v1 := *v1p
shlVU(v1, v, shift)
v = v1
}
u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift)
// D2.
+ vn1 := v[n-1]
for j := m; j >= 0; j-- {
// D3.
qhat := Word(_M)
- if u[j+n] != v[n-1] {
+ if ujn := u[j+n]; ujn != vn1 {
var rhat Word
- qhat, rhat = divWW(u[j+n], u[j+n-1], v[n-1])
+ qhat, rhat = divWW(ujn, u[j+n-1], vn1)
// x1 | x2 = q̂v_{n-2}
- x1, x2 := mulWW(qhat, v[n-2])
+ vn2 := v[n-2]
+ x1, x2 := mulWW(qhat, vn2)
// test if q̂v_{n-2} > br̂ + u_{j+n-2}
- for greaterThan(x1, x2, rhat, u[j+n-2]) {
+ ujn2 := u[j+n-2]
+ for greaterThan(x1, x2, rhat, ujn2) {
qhat--
prevRhat := rhat
- rhat += v[n-1]
+ rhat += vn1
// v[n-1] >= 0, so this tests for overflow.
if rhat < prevRhat {
break
}
- x1, x2 = mulWW(qhat, v[n-2])
+ x1, x2 = mulWW(qhat, vn2)
}
}
@@ -628,10 +638,10 @@
q[j] = qhat
}
- if v1 != nil {
- putNat(v1)
+ if v1p != nil {
+ putNat(v1p)
}
- putNat(qhatv)
+ putNat(qhatvp)
q = q.norm()
shrVU(u, u, shift)
@@ -650,14 +660,14 @@
const deBruijn32 = 0x077CB531
-var deBruijn32Lookup = []byte{
+var deBruijn32Lookup = [...]byte{
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
}
const deBruijn64 = 0x03f79d71b4ca8b09
-var deBruijn64Lookup = []byte{
+var deBruijn64Lookup = [...]byte{
0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
@@ -950,7 +960,7 @@
// (x^2...x^15) but then reduces the number of multiply-reduces by a
// third. Even for a 32-bit exponent, this reduces the number of
// operations. Uses Montgomery method for odd moduli.
- if len(x) > 1 && len(y) > 1 && len(m) > 0 {
+ if x.cmp(natOne) > 0 && len(y) > 1 && len(m) > 0 {
if m[0]&1 == 1 {
return z.expNNMontgomery(x, y, m)
}
@@ -1169,96 +1179,6 @@
return zz.norm()
}
-// probablyPrime performs n Miller-Rabin tests to check whether x is prime.
-// If x is prime, it returns true.
-// If x is not prime, it returns false with probability at least 1 - ¼ⁿ.
-//
-// It is not suitable for judging primes that an adversary may have crafted
-// to fool this test.
-func (n nat) probablyPrime(reps int) bool {
- if len(n) == 0 {
- return false
- }
-
- if len(n) == 1 {
- if n[0] < 2 {
- return false
- }
-
- if n[0]%2 == 0 {
- return n[0] == 2
- }
-
- // We have to exclude these cases because we reject all
- // multiples of these numbers below.
- switch n[0] {
- case 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53:
- return true
- }
- }
-
- if n[0]&1 == 0 {
- return false // n is even
- }
-
- const primesProduct32 = 0xC0CFD797 // Π {p ∈ primes, 2 < p <= 29}
- const primesProduct64 = 0xE221F97C30E94E1D // Π {p ∈ primes, 2 < p <= 53}
-
- var r Word
- switch _W {
- case 32:
- r = n.modW(primesProduct32)
- case 64:
- r = n.modW(primesProduct64 & _M)
- default:
- panic("Unknown word size")
- }
-
- if r%3 == 0 || r%5 == 0 || r%7 == 0 || r%11 == 0 ||
- r%13 == 0 || r%17 == 0 || r%19 == 0 || r%23 == 0 || r%29 == 0 {
- return false
- }
-
- if _W == 64 && (r%31 == 0 || r%37 == 0 || r%41 == 0 ||
- r%43 == 0 || r%47 == 0 || r%53 == 0) {
- return false
- }
-
- nm1 := nat(nil).sub(n, natOne)
- // determine q, k such that nm1 = q << k
- k := nm1.trailingZeroBits()
- q := nat(nil).shr(nm1, k)
-
- nm3 := nat(nil).sub(nm1, natTwo)
- rand := rand.New(rand.NewSource(int64(n[0])))
-
- var x, y, quotient nat
- nm3Len := nm3.bitLen()
-
-NextRandom:
- for i := 0; i < reps; i++ {
- x = x.random(rand, nm3, nm3Len)
- x = x.add(x, natTwo)
- y = y.expNN(x, q, n)
- if y.cmp(natOne) == 0 || y.cmp(nm1) == 0 {
- continue
- }
- for j := uint(1); j < k; j++ {
- y = y.mul(y, y)
- quotient, y = quotient.div(y, y, n)
- if y.cmp(nm1) == 0 {
- continue NextRandom
- }
- if y.cmp(natOne) == 0 {
- return false
- }
- }
- return false
- }
-
- return true
-}
-
// bytes writes the value of z into buf using big-endian encoding.
// len(buf) must be >= len(z)*_S. The value of z is encoded in the
// slice buf[i:]. The number i of unused bytes at the beginning of
@@ -1303,3 +1223,37 @@
return z.norm()
}
+
+// sqrt sets z = ⌊√x⌋
+func (z nat) sqrt(x nat) nat {
+ if x.cmp(natOne) <= 0 {
+ return z.set(x)
+ }
+ if alias(z, x) {
+ z = nil
+ }
+
+ // Start with value known to be too large and repeat "z = ⌊(z + ⌊x/z⌋)/2⌋" until it stops getting smaller.
+ // See Brent and Zimmermann, Modern Computer Arithmetic, Algorithm 1.13 (SqrtInt).
+ // https://members.loria.fr/PZimmermann/mca/pub226.html
+ // If x is one less than a perfect square, the sequence oscillates between the correct z and z+1;
+ // otherwise it converges to the correct z and stays there.
+ var z1, z2 nat
+ z1 = z
+ z1 = z1.setUint64(1)
+ z1 = z1.shl(z1, uint(x.bitLen()/2+1)) // must be ≥ √x
+ for n := 0; ; n++ {
+ z2, _ = z2.div(nil, x, z1)
+ z2 = z2.add(z2, z1)
+ z2 = z2.shr(z2, 1)
+ if z2.cmp(z1) >= 0 {
+ // z1 is answer.
+ // Figure out whether z1 or z2 is currently aliased to z by looking at loop count.
+ if n&1 == 0 {
+ return z1
+ }
+ return z.set(z1)
+ }
+ z1, z2 = z2, z1
+ }
+}
diff --git a/src/math/big/natconv_test.go b/src/math/big/natconv_test.go
index 79901d1..bdb60e6 100644
--- a/src/math/big/natconv_test.go
+++ b/src/math/big/natconv_test.go
@@ -278,6 +278,9 @@
const x = 10
for _, base := range []int{2, 8, 10, 16} {
for _, y := range []Word{10, 100, 1000, 10000, 100000} {
+ if isRaceBuilder && y > 1000 {
+ continue
+ }
b.Run(fmt.Sprintf("%d/Base%d", y, base), func(b *testing.B) {
b.StopTimer()
var z nat
@@ -301,6 +304,9 @@
const x = 10
for _, base := range []int{2, 8, 10, 16} {
for _, y := range []Word{10, 100, 1000, 10000, 100000} {
+ if isRaceBuilder && y > 1000 {
+ continue
+ }
b.Run(fmt.Sprintf("%d/Base%d", y, base), func(b *testing.B) {
b.StopTimer()
var z nat
diff --git a/src/math/big/prime.go b/src/math/big/prime.go
new file mode 100644
index 0000000..3e9690e
--- /dev/null
+++ b/src/math/big/prime.go
@@ -0,0 +1,320 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import "math/rand"
+
+// ProbablyPrime reports whether x is probably prime,
+// applying the Miller-Rabin test with n pseudorandomly chosen bases
+// as well as a Baillie-PSW test.
+//
+// If x is prime, ProbablyPrime returns true.
+// If x is chosen randomly and not prime, ProbablyPrime probably returns false.
+// The probability of returning true for a randomly chosen non-prime is at most ¼ⁿ.
+//
+// ProbablyPrime is 100% accurate for inputs less than 2⁶⁴.
+// See Menezes et al., Handbook of Applied Cryptography, 1997, pp. 145-149,
+// and FIPS 186-4 Appendix F for further discussion of the error probabilities.
+//
+// ProbablyPrime is not suitable for judging primes that an adversary may
+// have crafted to fool the test.
+//
+// As of Go 1.8, ProbablyPrime(0) is allowed and applies only a Baillie-PSW test.
+// Before Go 1.8, ProbablyPrime applied only the Miller-Rabin tests, and ProbablyPrime(0) panicked.
+func (x *Int) ProbablyPrime(n int) bool {
+ // Note regarding the doc comment above:
+ // It would be more precise to say that the Baillie-PSW test uses the
+ // extra strong Lucas test as its Lucas test, but since no one knows
+ // how to tell any of the Lucas tests apart inside a Baillie-PSW test
+ // (they all work equally well empirically), that detail need not be
+ // documented or implicitly guaranteed.
+ // The comment does avoid saying "the" Baillie-PSW test
+ // because of this general ambiguity.
+
+ if n < 0 {
+ panic("negative n for ProbablyPrime")
+ }
+ if x.neg || len(x.abs) == 0 {
+ return false
+ }
+
+ // primeBitMask records the primes < 64.
+ const primeBitMask uint64 = 1<<2 | 1<<3 | 1<<5 | 1<<7 |
+ 1<<11 | 1<<13 | 1<<17 | 1<<19 | 1<<23 | 1<<29 | 1<<31 |
+ 1<<37 | 1<<41 | 1<<43 | 1<<47 | 1<<53 | 1<<59 | 1<<61
+
+ w := x.abs[0]
+ if len(x.abs) == 1 && w < 64 {
+ return primeBitMask&(1<<w) != 0
+ }
+
+ if w&1 == 0 {
+ return false // n is even
+ }
+
+ const primesA = 3 * 5 * 7 * 11 * 13 * 17 * 19 * 23 * 37
+ const primesB = 29 * 31 * 41 * 43 * 47 * 53
+
+ var rA, rB uint32
+ switch _W {
+ case 32:
+ rA = uint32(x.abs.modW(primesA))
+ rB = uint32(x.abs.modW(primesB))
+ case 64:
+ r := x.abs.modW((primesA * primesB) & _M)
+ rA = uint32(r % primesA)
+ rB = uint32(r % primesB)
+ default:
+ panic("math/big: invalid word size")
+ }
+
+ if rA%3 == 0 || rA%5 == 0 || rA%7 == 0 || rA%11 == 0 || rA%13 == 0 || rA%17 == 0 || rA%19 == 0 || rA%23 == 0 || rA%37 == 0 ||
+ rB%29 == 0 || rB%31 == 0 || rB%41 == 0 || rB%43 == 0 || rB%47 == 0 || rB%53 == 0 {
+ return false
+ }
+
+ return x.abs.probablyPrimeMillerRabin(n+1, true) && x.abs.probablyPrimeLucas()
+}
+
+// probablyPrimeMillerRabin reports whether n passes reps rounds of the
+// Miller-Rabin primality test, using pseudo-randomly chosen bases.
+// If force2 is true, one of the rounds is forced to use base 2.
+// See Handbook of Applied Cryptography, p. 139, Algorithm 4.24.
+// The number n is known to be non-zero.
+func (n nat) probablyPrimeMillerRabin(reps int, force2 bool) bool {
+ nm1 := nat(nil).sub(n, natOne)
+ // determine q, k such that nm1 = q << k
+ k := nm1.trailingZeroBits()
+ q := nat(nil).shr(nm1, k)
+
+ nm3 := nat(nil).sub(nm1, natTwo)
+ rand := rand.New(rand.NewSource(int64(n[0])))
+
+ var x, y, quotient nat
+ nm3Len := nm3.bitLen()
+
+NextRandom:
+ for i := 0; i < reps; i++ {
+ if i == reps-1 && force2 {
+ x = x.set(natTwo)
+ } else {
+ x = x.random(rand, nm3, nm3Len)
+ x = x.add(x, natTwo)
+ }
+ y = y.expNN(x, q, n)
+ if y.cmp(natOne) == 0 || y.cmp(nm1) == 0 {
+ continue
+ }
+ for j := uint(1); j < k; j++ {
+ y = y.mul(y, y)
+ quotient, y = quotient.div(y, y, n)
+ if y.cmp(nm1) == 0 {
+ continue NextRandom
+ }
+ if y.cmp(natOne) == 0 {
+ return false
+ }
+ }
+ return false
+ }
+
+ return true
+}
+
+// probablyPrimeLucas reports whether n passes the "almost extra strong" Lucas probable prime test,
+// using Baillie-OEIS parameter selection. This corresponds to "AESLPSP" on Jacobsen's tables (link below).
+// The combination of this test and a Miller-Rabin/Fermat test with base 2 gives a Baillie-PSW test.
+//
+// References:
+//
+// Baillie and Wagstaff, "Lucas Pseudoprimes", Mathematics of Computation 35(152),
+// October 1980, pp. 1391-1417, especially page 1401.
+// http://www.ams.org/journals/mcom/1980-35-152/S0025-5718-1980-0583518-6/S0025-5718-1980-0583518-6.pdf
+//
+// Grantham, "Frobenius Pseudoprimes", Mathematics of Computation 70(234),
+// March 2000, pp. 873-891.
+// http://www.ams.org/journals/mcom/2001-70-234/S0025-5718-00-01197-2/S0025-5718-00-01197-2.pdf
+//
+// Baillie, "Extra strong Lucas pseudoprimes", OEIS A217719, https://oeis.org/A217719.
+//
+// Jacobsen, "Pseudoprime Statistics, Tables, and Data", http://ntheory.org/pseudoprimes.html.
+//
+// Nicely, "The Baillie-PSW Primality Test", http://www.trnicely.net/misc/bpsw.html.
+// (Note that Nicely's definition of the "extra strong" test gives the wrong Jacobi condition,
+// as pointed out by Jacobsen.)
+//
+// Crandall and Pomerance, Prime Numbers: A Computational Perspective, 2nd ed.
+// Springer, 2005.
+func (n nat) probablyPrimeLucas() bool {
+ // Discard 0, 1.
+ if len(n) == 0 || n.cmp(natOne) == 0 {
+ return false
+ }
+ // Two is the only even prime.
+ // Already checked by caller, but here to allow testing in isolation.
+ if n[0]&1 == 0 {
+ return n.cmp(natTwo) == 0
+ }
+
+ // Baillie-OEIS "method C" for choosing D, P, Q,
+ // as in https://oeis.org/A217719/a217719.txt:
+ // try increasing P ≥ 3 such that D = P² - 4 (so Q = 1)
+ // until Jacobi(D, n) = -1.
+ // The search is expected to succeed for non-square n after just a few trials.
+ // After more than expected failures, check whether n is square
+ // (which would cause Jacobi(D, n) = 1 for all D not dividing n).
+ p := Word(3)
+ d := nat{1}
+ t1 := nat(nil) // temp
+ intD := &Int{abs: d}
+ intN := &Int{abs: n}
+ for ; ; p++ {
+ if p > 10000 {
+ // This is widely believed to be impossible.
+ // If we get a report, we'll want the exact number n.
+ panic("math/big: internal error: cannot find (D/n) = -1 for " + intN.String())
+ }
+ d[0] = p*p - 4
+ j := Jacobi(intD, intN)
+ if j == -1 {
+ break
+ }
+ if j == 0 {
+ // d = p²-4 = (p-2)(p+2).
+ // If (d/n) == 0 then d shares a prime factor with n.
+ // Since the loop proceeds in increasing p and starts with p-2==1,
+ // the shared prime factor must be p+2.
+ // If p+2 == n, then n is prime; otherwise p+2 is a proper factor of n.
+ return len(n) == 1 && n[0] == p+2
+ }
+ if p == 40 {
+ // We'll never find (d/n) = -1 if n is a square.
+ // If n is a non-square we expect to find a d in just a few attempts on average.
+ // After 40 attempts, take a moment to check if n is indeed a square.
+ t1 = t1.sqrt(n)
+ t1 = t1.mul(t1, t1)
+ if t1.cmp(n) == 0 {
+ return false
+ }
+ }
+ }
+
+ // Grantham definition of "extra strong Lucas pseudoprime", after Thm 2.3 on p. 876
+ // (D, P, Q above have become Δ, b, 1):
+ //
+ // Let U_n = U_n(b, 1), V_n = V_n(b, 1), and Δ = b²-4.
+ // An extra strong Lucas pseudoprime to base b is a composite n = 2^r s + Jacobi(Δ, n),
+ // where s is odd and gcd(n, 2*Δ) = 1, such that either (i) U_s ≡ 0 mod n and V_s ≡ ±2 mod n,
+ // or (ii) V_{2^t s} ≡ 0 mod n for some 0 ≤ t < r-1.
+ //
+ // We know gcd(n, Δ) = 1 or else we'd have found Jacobi(d, n) == 0 above.
+ // We know gcd(n, 2) = 1 because n is odd.
+ //
+ // Arrange s = (n - Jacobi(Δ, n)) / 2^r = (n+1) / 2^r.
+ s := nat(nil).add(n, natOne)
+ r := int(s.trailingZeroBits())
+ s = s.shr(s, uint(r))
+ nm2 := nat(nil).sub(n, natTwo) // n-2
+
+ // We apply the "almost extra strong" test, which checks the above conditions
+ // except for U_s ≡ 0 mod n, which allows us to avoid computing any U_k values.
+ // Jacobsen points out that maybe we should just do the full extra strong test:
+ // "It is also possible to recover U_n using Crandall and Pomerance equation 3.13:
+ // U_n = D^-1 (2V_{n+1} - PV_n) allowing us to run the full extra-strong test
+ // at the cost of a single modular inversion. This computation is easy and fast in GMP,
+ // so we can get the full extra-strong test at essentially the same performance as the
+ // almost extra strong test."
+
+ // Compute Lucas sequence V_s(b, 1), where:
+ //
+ // V(0) = 2
+ // V(1) = P
+ // V(k) = P V(k-1) - Q V(k-2).
+ //
+ // (Remember that due to method C above, P = b, Q = 1.)
+ //
+ // In general V(k) = α^k + β^k, where α and β are roots of x² - Px + Q.
+ // Crandall and Pomerance (p.147) observe that for 0 ≤ j ≤ k,
+ //
+ // V(j+k) = V(j)V(k) - V(k-j).
+ //
+ // So in particular, to quickly double the subscript:
+ //
+ // V(2k) = V(k)² - 2
+ // V(2k+1) = V(k) V(k+1) - P
+ //
+ // We can therefore start with k=0 and build up to k=s in log₂(s) steps.
+ natP := nat(nil).setWord(p)
+ vk := nat(nil).setWord(2)
+ vk1 := nat(nil).setWord(p)
+ t2 := nat(nil) // temp
+ for i := int(s.bitLen()); i >= 0; i-- {
+ if s.bit(uint(i)) != 0 {
+ // k' = 2k+1
+ // V(k') = V(2k+1) = V(k) V(k+1) - P.
+ t1 = t1.mul(vk, vk1)
+ t1 = t1.add(t1, n)
+ t1 = t1.sub(t1, natP)
+ t2, vk = t2.div(vk, t1, n)
+ // V(k'+1) = V(2k+2) = V(k+1)² - 2.
+ t1 = t1.mul(vk1, vk1)
+ t1 = t1.add(t1, nm2)
+ t2, vk1 = t2.div(vk1, t1, n)
+ } else {
+ // k' = 2k
+ // V(k'+1) = V(2k+1) = V(k) V(k+1) - P.
+ t1 = t1.mul(vk, vk1)
+ t1 = t1.add(t1, n)
+ t1 = t1.sub(t1, natP)
+ t2, vk1 = t2.div(vk1, t1, n)
+ // V(k') = V(2k) = V(k)² - 2
+ t1 = t1.mul(vk, vk)
+ t1 = t1.add(t1, nm2)
+ t2, vk = t2.div(vk, t1, n)
+ }
+ }
+
+ // Now k=s, so vk = V(s). Check V(s) ≡ ±2 (mod n).
+ if vk.cmp(natTwo) == 0 || vk.cmp(nm2) == 0 {
+ // Check U(s) ≡ 0.
+ // As suggested by Jacobsen, apply Crandall and Pomerance equation 3.13:
+ //
+ // U(k) = D⁻¹ (2 V(k+1) - P V(k))
+ //
+ // Since we are checking for U(k) == 0 it suffices to check 2 V(k+1) == P V(k) mod n,
+ // or P V(k) - 2 V(k+1) == 0 mod n.
+ t1 := t1.mul(vk, natP)
+ t2 := t2.shl(vk1, 1)
+ if t1.cmp(t2) < 0 {
+ t1, t2 = t2, t1
+ }
+ t1 = t1.sub(t1, t2)
+ t3 := vk1 // steal vk1, no longer needed below
+ vk1 = nil
+ _ = vk1
+ t2, t3 = t2.div(t3, t1, n)
+ if len(t3) == 0 {
+ return true
+ }
+ }
+
+ // Check V(2^t s) ≡ 0 mod n for some 0 ≤ t < r-1.
+ for t := 0; t < r-1; t++ {
+ if len(vk) == 0 { // vk == 0
+ return true
+ }
+ // Optimization: V(k) = 2 is a fixed point for V(k') = V(k)² - 2,
+ // so if V(k) = 2, we can stop: we will never find a future V(k) == 0.
+ if len(vk) == 1 && vk[0] == 2 { // vk == 2
+ return false
+ }
+ // k' = 2k
+ // V(k') = V(2k) = V(k)² - 2
+ t1 = t1.mul(vk, vk)
+ t1 = t1.sub(t1, natTwo)
+ t2, vk = t2.div(vk, t1, n)
+ }
+ return false
+}
diff --git a/src/math/big/prime_test.go b/src/math/big/prime_test.go
new file mode 100644
index 0000000..a2d3d18
--- /dev/null
+++ b/src/math/big/prime_test.go
@@ -0,0 +1,214 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+ "fmt"
+ "strings"
+ "testing"
+ "unicode"
+)
+
+var primes = []string{
+ "2",
+ "3",
+ "5",
+ "7",
+ "11",
+
+ "13756265695458089029",
+ "13496181268022124907",
+ "10953742525620032441",
+ "17908251027575790097",
+
+ // https://golang.org/issue/638
+ "18699199384836356663",
+
+ "98920366548084643601728869055592650835572950932266967461790948584315647051443",
+ "94560208308847015747498523884063394671606671904944666360068158221458669711639",
+
+ // http://primes.utm.edu/lists/small/small3.html
+ "449417999055441493994709297093108513015373787049558499205492347871729927573118262811508386655998299074566974373711472560655026288668094291699357843464363003144674940345912431129144354948751003607115263071543163",
+ "230975859993204150666423538988557839555560243929065415434980904258310530753006723857139742334640122533598517597674807096648905501653461687601339782814316124971547968912893214002992086353183070342498989426570593",
+ "5521712099665906221540423207019333379125265462121169655563495403888449493493629943498064604536961775110765377745550377067893607246020694972959780839151452457728855382113555867743022746090187341871655890805971735385789993",
+ "203956878356401977405765866929034577280193993314348263094772646453283062722701277632936616063144088173312372882677123879538709400158306567338328279154499698366071906766440037074217117805690872792848149112022286332144876183376326512083574821647933992961249917319836219304274280243803104015000563790123",
+
+ // ECC primes: http://tools.ietf.org/html/draft-ladd-safecurves-02
+ "3618502788666131106986593281521497120414687020801267626233049500247285301239", // Curve1174: 2^251-9
+ "57896044618658097711785492504343953926634992332820282019728792003956564819949", // Curve25519: 2^255-19
+ "9850501549098619803069760025035903451269934817616361666987073351061430442874302652853566563721228910201656997576599", // E-382: 2^382-105
+ "42307582002575910332922579714097346549017899709713998034217522897561970639123926132812109468141778230245837569601494931472367", // Curve41417: 2^414-17
+ "6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151", // E-521: 2^521-1
+}
+
+var composites = []string{
+ "0",
+ "1",
+ "21284175091214687912771199898307297748211672914763848041968395774954376176754",
+ "6084766654921918907427900243509372380954290099172559290432744450051395395951",
+ "84594350493221918389213352992032324280367711247940675652888030554255915464401",
+ "82793403787388584738507275144194252681",
+
+ // Arnault, "Rabin-Miller Primality Test: Composite Numbers Which Pass It",
+ // Mathematics of Computation, 64(209) (January 1995), pp. 335-361.
+ "1195068768795265792518361315725116351898245581", // strong pseudoprime to prime bases 2 through 29
+ // strong pseudoprime to all prime bases up to 200
+ `
+ 80383745745363949125707961434194210813883768828755814583748891752229
+ 74273765333652186502336163960045457915042023603208766569966760987284
+ 0439654082329287387918508691668573282677617710293896977394701670823
+ 0428687109997439976544144845341155872450633409279022275296229414984
+ 2306881685404326457534018329786111298960644845216191652872597534901`,
+
+ // Extra-strong Lucas pseudoprimes. https://oeis.org/A217719
+ "989",
+ "3239",
+ "5777",
+ "10877",
+ "27971",
+ "29681",
+ "30739",
+ "31631",
+ "39059",
+ "72389",
+ "73919",
+ "75077",
+ "100127",
+ "113573",
+ "125249",
+ "137549",
+ "137801",
+ "153931",
+ "155819",
+ "161027",
+ "162133",
+ "189419",
+ "218321",
+ "231703",
+ "249331",
+ "370229",
+ "429479",
+ "430127",
+ "459191",
+ "473891",
+ "480689",
+ "600059",
+ "621781",
+ "632249",
+ "635627",
+
+ "3673744903",
+ "3281593591",
+ "2385076987",
+ "2738053141",
+ "2009621503",
+ "1502682721",
+ "255866131",
+ "117987841",
+ "587861",
+
+ "6368689",
+ "8725753",
+ "80579735209",
+ "105919633",
+}
+
+func cutSpace(r rune) rune {
+ if unicode.IsSpace(r) {
+ return -1
+ }
+ return r
+}
+
+func TestProbablyPrime(t *testing.T) {
+ nreps := 20
+ if testing.Short() {
+ nreps = 3
+ }
+ for i, s := range primes {
+ p, _ := new(Int).SetString(s, 10)
+ if !p.ProbablyPrime(nreps) || !p.ProbablyPrime(1) || !p.ProbablyPrime(0) {
+ t.Errorf("#%d prime found to be non-prime (%s)", i, s)
+ }
+ }
+
+ for i, s := range composites {
+ s = strings.Map(cutSpace, s)
+ c, _ := new(Int).SetString(s, 10)
+ if c.ProbablyPrime(nreps) || c.ProbablyPrime(1) || c.ProbablyPrime(0) {
+ t.Errorf("#%d composite found to be prime (%s)", i, s)
+ }
+ }
+
+ // check that ProbablyPrime panics if n <= 0
+ c := NewInt(11) // a prime
+ for _, n := range []int{-1, 0, 1} {
+ func() {
+ defer func() {
+ if n < 0 && recover() == nil {
+ t.Fatalf("expected panic from ProbablyPrime(%d)", n)
+ }
+ }()
+ if !c.ProbablyPrime(n) {
+ t.Fatalf("%v should be a prime", c)
+ }
+ }()
+ }
+}
+
+func BenchmarkProbablyPrime(b *testing.B) {
+ p, _ := new(Int).SetString("203956878356401977405765866929034577280193993314348263094772646453283062722701277632936616063144088173312372882677123879538709400158306567338328279154499698366071906766440037074217117805690872792848149112022286332144876183376326512083574821647933992961249917319836219304274280243803104015000563790123", 10)
+ for _, n := range []int{0, 1, 5, 10, 20} {
+ b.Run(fmt.Sprintf("n=%d", n), func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ p.ProbablyPrime(n)
+ }
+ })
+ }
+
+ b.Run("Lucas", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ p.abs.probablyPrimeLucas()
+ }
+ })
+ b.Run("MillerRabinBase2", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ p.abs.probablyPrimeMillerRabin(1, true)
+ }
+ })
+}
+
+func TestMillerRabinPseudoprimes(t *testing.T) {
+ testPseudoprimes(t, "probablyPrimeMillerRabin",
+ func(n nat) bool { return n.probablyPrimeMillerRabin(1, true) && !n.probablyPrimeLucas() },
+ // https://oeis.org/A001262
+ []int{2047, 3277, 4033, 4681, 8321, 15841, 29341, 42799, 49141, 52633, 65281, 74665, 80581, 85489, 88357, 90751})
+}
+
+func TestLucasPseudoprimes(t *testing.T) {
+ testPseudoprimes(t, "probablyPrimeLucas",
+ func(n nat) bool { return n.probablyPrimeLucas() && !n.probablyPrimeMillerRabin(1, true) },
+ // https://oeis.org/A217719
+ []int{989, 3239, 5777, 10877, 27971, 29681, 30739, 31631, 39059, 72389, 73919, 75077})
+}
+
+func testPseudoprimes(t *testing.T, name string, cond func(nat) bool, want []int) {
+ n := nat{1}
+ for i := 3; i < 100000; i += 2 {
+ n[0] = Word(i)
+ pseudo := cond(n)
+ if pseudo && (len(want) == 0 || i != want[0]) {
+ t.Errorf("%s(%v, base=2) = %v, want false", name, i)
+ } else if !pseudo && len(want) >= 1 && i == want[0] {
+ t.Errorf("%s(%v, base=2) = false, want true", name, i)
+ }
+ if len(want) > 0 && i == want[0] {
+ want = want[1:]
+ }
+ }
+ if len(want) > 0 {
+ t.Fatalf("forgot to test %v", want)
+ }
+}
diff --git a/src/math/big/rat_test.go b/src/math/big/rat_test.go
index 3a06fca..afda686 100644
--- a/src/math/big/rat_test.go
+++ b/src/math/big/rat_test.go
@@ -382,9 +382,9 @@
9,
11,
}
- var winc, einc = uint64(1), 1 // soak test (~1.5s on x86-64)
- if testing.Short() {
- winc, einc = 5, 15 // quick test (~60ms on x86-64)
+ var winc, einc = uint64(5), 15 // quick test (~60ms on x86-64)
+ if *long {
+ winc, einc = uint64(1), 1 // soak test (~1.5s on x86-64)
}
for _, sign := range "+-" {
@@ -430,9 +430,9 @@
9,
11,
}
- var winc, einc = uint64(1), 1 // soak test (~75s on x86-64)
- if testing.Short() {
- winc, einc = 10, 500 // quick test (~12ms on x86-64)
+ var winc, einc = uint64(10), 500 // quick test (~12ms on x86-64)
+ if *long {
+ winc, einc = uint64(1), 1 // soak test (~75s on x86-64)
}
for _, sign := range "+-" {
diff --git a/src/math/big/ratconv.go b/src/math/big/ratconv.go
index ef2b675..a6a401c 100644
--- a/src/math/big/ratconv.go
+++ b/src/math/big/ratconv.go
@@ -18,6 +18,9 @@
return strings.ContainsRune("+-/0123456789.eE", ch)
}
+var ratZero Rat
+var _ fmt.Scanner = &ratZero // *Rat must implement fmt.Scanner
+
// Scan is a support routine for fmt.Scanner. It accepts the formats
// 'e', 'E', 'f', 'F', 'g', 'G', and 'v'. All formats are equivalent.
func (z *Rat) Scan(s fmt.ScanState, ch rune) error {
@@ -36,8 +39,9 @@
// SetString sets z to the value of s and returns z and a boolean indicating
// success. s can be given as a fraction "a/b" or as a floating-point number
-// optionally followed by an exponent. If the operation failed, the value of
-// z is undefined but the returned value is nil.
+// optionally followed by an exponent. The entire string (not just a prefix)
+// must be valid for success. If the operation failed, the value of z is un-
+// defined but the returned value is nil.
func (z *Rat) SetString(s string) (*Rat, bool) {
if len(s) == 0 {
return nil, false
@@ -49,9 +53,13 @@
if _, ok := z.a.SetString(s[:sep], 0); !ok {
return nil, false
}
- s = s[sep+1:]
+ r := strings.NewReader(s[sep+1:])
var err error
- if z.b.abs, _, _, err = z.b.abs.scan(strings.NewReader(s), 0, false); err != nil {
+ if z.b.abs, _, _, err = z.b.abs.scan(r, 0, false); err != nil {
+ return nil, false
+ }
+ // entire string must have been consumed
+ if _, err = r.ReadByte(); err != io.EOF {
return nil, false
}
if len(z.b.abs) == 0 {
diff --git a/src/math/big/ratconv_test.go b/src/math/big/ratconv_test.go
index 35ad6cc..56ac8d7 100644
--- a/src/math/big/ratconv_test.go
+++ b/src/math/big/ratconv_test.go
@@ -50,6 +50,10 @@
{"204211327800791583.81095", "4084226556015831676219/20000", true},
{"0e9999999999", "0", true}, // issue #16176
{in: "1/0"},
+ {in: "4/3/2"}, // issue 17001
+ {in: "4/3/"},
+ {in: "4/3."},
+ {in: "4/"},
}
// These are not supported by fmt.Fscanf.
@@ -59,6 +63,7 @@
{"-010.", "-10", true},
{"0x10/0x20", "1/2", true},
{"0b1000/3", "8/3", true},
+ {in: "4/3x"},
// TODO(gri) add more tests
}
@@ -139,7 +144,7 @@
}
// Test inputs to Rat.SetString. The prefix "long:" causes the test
-// to be skipped in --test.short mode. (The threshold is about 500us.)
+// to be skipped except in -long mode. (The threshold is about 500us.)
var float64inputs = []string{
// Constants plundered from strconv/testfp.txt.
@@ -345,7 +350,7 @@
func TestFloat32SpecialCases(t *testing.T) {
for _, input := range float64inputs {
if strings.HasPrefix(input, "long:") {
- if testing.Short() {
+ if !*long {
continue
}
input = input[len("long:"):]
@@ -401,7 +406,7 @@
func TestFloat64SpecialCases(t *testing.T) {
for _, input := range float64inputs {
if strings.HasPrefix(input, "long:") {
- if testing.Short() {
+ if !*long {
continue
}
input = input[len("long:"):]
diff --git a/src/math/cmplx/cmath_test.go b/src/math/cmplx/cmath_test.go
index d904be8..7a5c485 100644
--- a/src/math/cmplx/cmath_test.go
+++ b/src/math/cmplx/cmath_test.go
@@ -759,6 +759,14 @@
}
}
+// See issue 17577
+func TestInfiniteLoopIntanSeries(t *testing.T) {
+ want := Inf()
+ if got := Cot(0); got != want {
+ t.Errorf("Cot(0): got %g, want %g", got, want)
+ }
+}
+
func BenchmarkAbs(b *testing.B) {
for i := 0; i < b.N; i++ {
Abs(complex(2.5, 3.5))
diff --git a/src/math/cmplx/example_test.go b/src/math/cmplx/example_test.go
new file mode 100644
index 0000000..f0ed963
--- /dev/null
+++ b/src/math/cmplx/example_test.go
@@ -0,0 +1,28 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cmplx_test
+
+import (
+ "fmt"
+ "math"
+ "math/cmplx"
+)
+
+func ExampleAbs() {
+ fmt.Printf("%.1f", cmplx.Abs(3+4i))
+ // Output: 5.0
+}
+
+// ExampleExp computes Euler's identity.
+func ExampleExp() {
+ fmt.Printf("%.1f", cmplx.Exp(1i*math.Pi)+1)
+ // Output: (0.0+0.0i)
+}
+
+func ExamplePolar() {
+ r, theta := cmplx.Polar(2i)
+ fmt.Printf("r: %.1f, θ: %.1f*π", r, theta/math.Pi)
+ // Output: r: 2.0, θ: 0.5*π
+}
diff --git a/src/math/cmplx/tan.go b/src/math/cmplx/tan.go
index 9485315..2990552 100644
--- a/src/math/cmplx/tan.go
+++ b/src/math/cmplx/tan.go
@@ -120,9 +120,9 @@
rn := 0.0
d := 0.0
for {
- rn += 1
+ rn++
f *= rn
- rn += 1
+ rn++
f *= rn
x2 *= x
y2 *= y
@@ -130,16 +130,18 @@
t /= f
d += t
- rn += 1
+ rn++
f *= rn
- rn += 1
+ rn++
f *= rn
x2 *= x
y2 *= y
t = y2 - x2
t /= f
d += t
- if math.Abs(t/d) <= MACHEP {
+ if !(math.Abs(t/d) > MACHEP) {
+ // Caution: Use ! and > instead of <= for correct behavior if t/d is NaN.
+ // See issue 17577.
break
}
}
diff --git a/src/math/cosh_s390x.s b/src/math/cosh_s390x.s
new file mode 100644
index 0000000..d061bd0
--- /dev/null
+++ b/src/math/cosh_s390x.s
@@ -0,0 +1,227 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Constants
+DATA coshrodataL23<>+0(SB)/8, $0.231904681384629956E-16
+DATA coshrodataL23<>+8(SB)/8, $0.693147180559945286E+00
+DATA coshrodataL23<>+16(SB)/8, $0.144269504088896339E+01
+DATA coshrodataL23<>+24(SB)/8, $704.E0
+GLOBL coshrodataL23<>+0(SB), RODATA, $32
+DATA coshxinf<>+0(SB)/8, $0x7FF0000000000000
+GLOBL coshxinf<>+0(SB), RODATA, $8
+DATA coshxlim1<>+0(SB)/8, $800.E0
+GLOBL coshxlim1<>+0(SB), RODATA, $8
+DATA coshxaddhy<>+0(SB)/8, $0xc2f0000100003fdf
+GLOBL coshxaddhy<>+0(SB), RODATA, $8
+DATA coshx4ff<>+0(SB)/8, $0x4ff0000000000000
+GLOBL coshx4ff<>+0(SB), RODATA, $8
+DATA coshe1<>+0(SB)/8, $0x3ff000000000000a
+GLOBL coshe1<>+0(SB), RODATA, $8
+
+// Log multiplier table
+DATA coshtab<>+0(SB)/8, $0.442737824274138381E-01
+DATA coshtab<>+8(SB)/8, $0.263602189790660309E-01
+DATA coshtab<>+16(SB)/8, $0.122565642281703586E-01
+DATA coshtab<>+24(SB)/8, $0.143757052860721398E-02
+DATA coshtab<>+32(SB)/8, $-.651375034121276075E-02
+DATA coshtab<>+40(SB)/8, $-.119317678849450159E-01
+DATA coshtab<>+48(SB)/8, $-.150868749549871069E-01
+DATA coshtab<>+56(SB)/8, $-.161992609578469234E-01
+DATA coshtab<>+64(SB)/8, $-.154492360403337917E-01
+DATA coshtab<>+72(SB)/8, $-.129850717389178721E-01
+DATA coshtab<>+80(SB)/8, $-.892902649276657891E-02
+DATA coshtab<>+88(SB)/8, $-.338202636596794887E-02
+DATA coshtab<>+96(SB)/8, $0.357266307045684762E-02
+DATA coshtab<>+104(SB)/8, $0.118665304327406698E-01
+DATA coshtab<>+112(SB)/8, $0.214434994118118914E-01
+DATA coshtab<>+120(SB)/8, $0.322580645161290314E-01
+GLOBL coshtab<>+0(SB), RODATA, $128
+
+// Minimax polynomial approximations
+DATA coshe2<>+0(SB)/8, $0.500000000000004237e+00
+GLOBL coshe2<>+0(SB), RODATA, $8
+DATA coshe3<>+0(SB)/8, $0.166666666630345592e+00
+GLOBL coshe3<>+0(SB), RODATA, $8
+DATA coshe4<>+0(SB)/8, $0.416666664838056960e-01
+GLOBL coshe4<>+0(SB), RODATA, $8
+DATA coshe5<>+0(SB)/8, $0.833349307718286047e-02
+GLOBL coshe5<>+0(SB), RODATA, $8
+DATA coshe6<>+0(SB)/8, $0.138926439368309441e-02
+GLOBL coshe6<>+0(SB), RODATA, $8
+
+// Cosh returns the hyperbolic cosine of x.
+//
+// Special cases are:
+// Cosh(±0) = 1
+// Cosh(±Inf) = +Inf
+// Cosh(NaN) = NaN
+// The algorithm used is minimax polynomial approximation
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT ·coshAsm(SB),NOSPLIT,$0-16
+ FMOVD x+0(FP), F0
+ MOVD $coshrodataL23<>+0(SB), R9
+ WORD $0xB3120000 //ltdbr %f0,%f0
+ MOVD $0x4086000000000000, R2
+ MOVD $0x4086000000000000, R3
+ BLTU L19
+ FMOVD F0, F4
+L2:
+ WORD $0xED409018 //cdb %f4,.L24-.L23(%r9)
+ BYTE $0x00
+ BYTE $0x19
+ BGE L14 //jnl .L14
+ BVS L14
+ WFCEDBS V4, V4, V2
+ BEQ L20
+L1:
+ FMOVD F0, ret+8(FP)
+ RET
+
+L14:
+ WFCEDBS V4, V4, V2
+ BVS L1
+ MOVD $coshxlim1<>+0(SB), R1
+ FMOVD 0(R1), F2
+ WFCHEDBS V4, V2, V2
+ BEQ L21
+ MOVD $coshxaddhy<>+0(SB), R1
+ FMOVD coshrodataL23<>+16(SB), F5
+ FMOVD 0(R1), F2
+ WFMSDB V0, V5, V2, V5
+ FMOVD coshrodataL23<>+8(SB), F3
+ FADD F5, F2
+ MOVD $coshe6<>+0(SB), R1
+ WFMSDB V2, V3, V0, V3
+ FMOVD 0(R1), F6
+ WFMDB V3, V3, V1
+ MOVD $coshe4<>+0(SB), R1
+ FMOVD coshrodataL23<>+0(SB), F7
+ WFMADB V2, V7, V3, V2
+ FMOVD 0(R1), F3
+ MOVD $coshe5<>+0(SB), R1
+ WFMADB V1, V6, V3, V6
+ FMOVD 0(R1), F7
+ MOVD $coshe3<>+0(SB), R1
+ FMOVD 0(R1), F3
+ WFMADB V1, V7, V3, V7
+ FNEG F2, F3
+ WORD $0xB3CD0015 //lgdr %r1,%f5
+ MOVD $coshe2<>+0(SB), R3
+ WFCEDBS V4, V0, V0
+ FMOVD 0(R3), F5
+ MOVD $coshe1<>+0(SB), R3
+ WFMADB V1, V6, V5, V6
+ FMOVD 0(R3), F5
+ WORD $0xEC21000F //risbgn %r2,%r1,64-64+0,64-64+0+16-1,64-0-16
+ BYTE $0x30
+ BYTE $0x59
+ WFMADB V1, V7, V5, V1
+ BVS L22
+ WORD $0xEC4139BC //risbg %r4,%r1,57,128+60,3
+ BYTE $0x03
+ BYTE $0x55
+ MOVD $coshtab<>+0(SB), R3
+ WFMADB V3, V6, V1, V6
+ WORD $0x68043000 //ld %f0,0(%r4,%r3)
+ FMSUB F0, F3, F2, F2
+ WORD $0xA71AF000 //ahi %r1,-4096
+ WFMADB V2, V6, V0, V6
+L17:
+ WORD $0xEC21000F //risbgn %r2,%r1,64-64+0,64-64+0+16-1,64-0-16
+ BYTE $0x30
+ BYTE $0x59
+ WORD $0xB3C10022 //ldgr %f2,%r2
+ FMADD F2, F6, F2, F2
+ MOVD $coshx4ff<>+0(SB), R1
+ FMOVD 0(R1), F0
+ FMUL F2, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L19:
+ FNEG F0, F4
+ BR L2
+L20:
+ MOVD $coshxaddhy<>+0(SB), R1
+ FMOVD coshrodataL23<>+16(SB), F3
+ FMOVD 0(R1), F2
+ WFMSDB V0, V3, V2, V3
+ FMOVD coshrodataL23<>+8(SB), F4
+ FADD F3, F2
+ MOVD $coshe6<>+0(SB), R1
+ FMSUB F4, F2, F0, F0
+ FMOVD 0(R1), F6
+ WFMDB V0, V0, V1
+ MOVD $coshe4<>+0(SB), R1
+ FMOVD 0(R1), F4
+ MOVD $coshe5<>+0(SB), R1
+ FMOVD coshrodataL23<>+0(SB), F5
+ WFMADB V1, V6, V4, V6
+ FMADD F5, F2, F0, F0
+ FMOVD 0(R1), F2
+ MOVD $coshe3<>+0(SB), R1
+ FMOVD 0(R1), F4
+ WFMADB V1, V2, V4, V2
+ MOVD $coshe2<>+0(SB), R1
+ FMOVD 0(R1), F5
+ FNEG F0, F4
+ WFMADB V1, V6, V5, V6
+ MOVD $coshe1<>+0(SB), R1
+ FMOVD 0(R1), F5
+ WFMADB V1, V2, V5, V1
+ WORD $0xB3CD0013 //lgdr %r1,%f3
+ MOVD $coshtab<>+0(SB), R5
+ WFMADB V4, V6, V1, V3
+ WORD $0xEC4139BC //risbg %r4,%r1,57,128+60,3
+ BYTE $0x03
+ BYTE $0x55
+ WFMSDB V4, V6, V1, V6
+ WORD $0x68145000 //ld %f1,0(%r4,%r5)
+ WFMSDB V4, V1, V0, V2
+ WORD $0xA7487FBE //lhi %r4,32702
+ FMADD F3, F2, F1, F1
+ SUBW R1, R4
+ WORD $0xECC439BC //risbg %r12,%r4,57,128+60,3
+ BYTE $0x03
+ BYTE $0x55
+ WORD $0x682C5000 //ld %f2,0(%r12,%r5)
+ FMSUB F2, F4, F0, F0
+ WORD $0xEC21000F //risbgn %r2,%r1,64-64+0,64-64+0+16-1,64-0-16
+ BYTE $0x30
+ BYTE $0x59
+ WFMADB V0, V6, V2, V6
+ WORD $0xEC34000F //risbgn %r3,%r4,64-64+0,64-64+0+16-1,64-0-16
+ BYTE $0x30
+ BYTE $0x59
+ WORD $0xB3C10022 //ldgr %f2,%r2
+ WORD $0xB3C10003 //ldgr %f0,%r3
+ FMADD F2, F1, F2, F2
+ FMADD F0, F6, F0, F0
+ FADD F2, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L22:
+ WORD $0xA7387FBE //lhi %r3,32702
+ MOVD $coshtab<>+0(SB), R4
+ SUBW R1, R3
+ WFMSDB V3, V6, V1, V6
+ WORD $0xEC3339BC //risbg %r3,%r3,57,128+60,3
+ BYTE $0x03
+ BYTE $0x55
+ WORD $0x68034000 //ld %f0,0(%r3,%r4)
+ FMSUB F0, F3, F2, F2
+ WORD $0xA7386FBE //lhi %r3,28606
+ WFMADB V2, V6, V0, V6
+ SUBW R1, R3, R1
+ BR L17
+L21:
+ MOVD $coshxinf<>+0(SB), R1
+ FMOVD 0(R1), F0
+ FMOVD F0, ret+8(FP)
+ RET
+
diff --git a/src/math/dim_arm64.s b/src/math/dim_arm64.s
new file mode 100644
index 0000000..4b6b592
--- /dev/null
+++ b/src/math/dim_arm64.s
@@ -0,0 +1,78 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+#define PosInf 0x7FF0000000000000
+#define NaN 0x7FF8000000000001
+#define NegInf 0xFFF0000000000000
+
+// func Dim(x, y float64) float64
+TEXT ·Dim(SB),NOSPLIT,$0
+ // (+Inf, +Inf) special case
+ MOVD $PosInf, R0
+ MOVD x+0(FP), R1
+ MOVD y+8(FP), R2
+ CMP R0, R1
+ BNE dim2
+ CMP R0, R2
+ BEQ bothInf
+dim2: // (-Inf, -Inf) special case
+ MOVD $NegInf, R0
+ CMP R0, R1
+ BNE dim3
+ CMP R0, R2
+ BEQ bothInf
+dim3: // normal case
+ FMOVD R1, F0
+ FMOVD R2, F1
+ FMOVD $0.0, F2
+ FSUBD F1, F0
+ FMAXD F0, F2, F0
+ FMOVD F0, ret+16(FP)
+ RET
+bothInf:
+ MOVD $NaN, R0
+ MOVD R0, ret+16(FP)
+ RET
+
+// func ·Max(x, y float64) float64
+TEXT ·Max(SB),NOSPLIT,$0
+ // +Inf special cases
+ MOVD $PosInf, R0
+ MOVD x+0(FP), R1
+ CMP R0, R1
+ BEQ isPosInf
+ MOVD y+8(FP), R2
+ CMP R0, R2
+ BEQ isPosInf
+ // normal case
+ FMOVD R1, F0
+ FMOVD R2, F1
+ FMAXD F0, F1, F0
+ FMOVD F0, ret+16(FP)
+ RET
+isPosInf: // return +Inf
+ MOVD R0, ret+16(FP)
+ RET
+
+// func Min(x, y float64) float64
+TEXT ·Min(SB),NOSPLIT,$0
+ // -Inf special cases
+ MOVD $NegInf, R0
+ MOVD x+0(FP), R1
+ CMP R0, R1
+ BEQ isNegInf
+ MOVD y+8(FP), R2
+ CMP R0, R2
+ BEQ isNegInf
+ // normal case
+ FMOVD R1, F0
+ FMOVD R2, F1
+ FMIND F0, F1, F0
+ FMOVD F0, ret+16(FP)
+ RET
+isNegInf: // return -Inf
+ MOVD R0, ret+16(FP)
+ RET
diff --git a/src/math/exp_386.s b/src/math/exp_386.s
index 18a92ef..9d63295 100644
--- a/src/math/exp_386.s
+++ b/src/math/exp_386.s
@@ -6,36 +6,6 @@
// func Exp(x float64) float64
TEXT ·Exp(SB),NOSPLIT,$0
-// test bits for not-finite
- MOVL x_hi+4(FP), AX
- ANDL $0x7ff00000, AX
- CMPL AX, $0x7ff00000
- JEQ not_finite
- FLDL2E // F0=log2(e)
- FMULD x+0(FP), F0 // F0=x*log2(e)
- FMOVD F0, F1 // F0=x*log2(e), F1=x*log2(e)
- FRNDINT // F0=int(x*log2(e)), F1=x*log2(e)
- FSUBD F0, F1 // F0=int(x*log2(e)), F1=x*log2(e)-int(x*log2(e))
- FXCHD F0, F1 // F0=x*log2(e)-int(x*log2(e)), F1=int(x*log2(e))
- F2XM1 // F0=2**(x*log2(e)-int(x*log2(e)))-1, F1=int(x*log2(e))
- FLD1 // F0=1, F1=2**(x*log2(e)-int(x*log2(e)))-1, F2=int(x*log2(e))
- FADDDP F0, F1 // F0=2**(x*log2(e)-int(x*log2(e))), F1=int(x*log2(e))
- FSCALE // F0=e**x, F1=int(x*log2(e))
- FMOVDP F0, F1 // F0=e**x
- FMOVDP F0, ret+8(FP)
- RET
-not_finite:
-// test bits for -Inf
- MOVL x_hi+4(FP), BX
- MOVL x_lo+0(FP), CX
- CMPL BX, $0xfff00000
- JNE not_neginf
- CMPL CX, $0
- JNE not_neginf
- FLDZ // F0=0
- FMOVDP F0, ret+8(FP)
- RET
-not_neginf:
- MOVL CX, ret_lo+8(FP)
- MOVL BX, ret_hi+12(FP)
- RET
+ // Used to use 387 assembly (FLDL2E+F2XM1) here,
+ // but it was both slower and less accurate than the portable Go code.
+ JMP ·exp(SB)
diff --git a/src/math/expm1.go b/src/math/expm1.go
index 8ce67e5..7dd75a8 100644
--- a/src/math/expm1.go
+++ b/src/math/expm1.go
@@ -229,7 +229,7 @@
}
t := Float64frombits(uint64(0x3ff-k) << 52) // 2**-k
y := x - (e + t)
- y += 1
+ y++
y = Float64frombits(Float64bits(y) + uint64(k)<<52) // add k to y's exponent
return y
}
diff --git a/src/math/export_s390x_test.go b/src/math/export_s390x_test.go
new file mode 100644
index 0000000..3fdbd86
--- /dev/null
+++ b/src/math/export_s390x_test.go
@@ -0,0 +1,14 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// Export internal functions and variable for testing.
+var Log10NoVec = log10
+var CosNoVec = cos
+var CoshNoVec = cosh
+var SinNoVec = sin
+var SinhNoVec = sinh
+var TanhNoVec = tanh
+var HasVX = hasVX
diff --git a/src/math/floor_arm64.s b/src/math/floor_arm64.s
new file mode 100644
index 0000000..6d240d4
--- /dev/null
+++ b/src/math/floor_arm64.s
@@ -0,0 +1,26 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func Floor(x float64) float64
+TEXT ·Floor(SB),NOSPLIT,$0
+ FMOVD x+0(FP), F0
+ FRINTMD F0, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+// func Ceil(x float64) float64
+TEXT ·Ceil(SB),NOSPLIT,$0
+ FMOVD x+0(FP), F0
+ FRINTPD F0, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+// func Trunc(x float64) float64
+TEXT ·Trunc(SB),NOSPLIT,$0
+ FMOVD x+0(FP), F0
+ FRINTZD F0, F0
+ FMOVD F0, ret+8(FP)
+ RET
diff --git a/src/math/floor_ppc64x.s b/src/math/floor_ppc64x.s
new file mode 100644
index 0000000..2ab011d
--- /dev/null
+++ b/src/math/floor_ppc64x.s
@@ -0,0 +1,25 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ppc64 ppc64le
+
+#include "textflag.h"
+
+TEXT ·Floor(SB),NOSPLIT,$0
+ FMOVD x+0(FP), F0
+ FRIM F0, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+TEXT ·Ceil(SB),NOSPLIT,$0
+ FMOVD x+0(FP), F0
+ FRIP F0, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+TEXT ·Trunc(SB),NOSPLIT,$0
+ FMOVD x+0(FP), F0
+ FRIZ F0, F0
+ FMOVD F0, ret+8(FP)
+ RET
diff --git a/src/math/floor_s390x.s b/src/math/floor_s390x.s
new file mode 100644
index 0000000..896e79b
--- /dev/null
+++ b/src/math/floor_s390x.s
@@ -0,0 +1,26 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func Floor(x float64) float64
+TEXT ·Floor(SB),NOSPLIT,$0
+ FMOVD x+0(FP), F0
+ FIDBR $7, F0, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+// func Ceil(x float64) float64
+TEXT ·Ceil(SB),NOSPLIT,$0
+ FMOVD x+0(FP), F0
+ FIDBR $6, F0, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+// func Trunc(x float64) float64
+TEXT ·Trunc(SB),NOSPLIT,$0
+ FMOVD x+0(FP), F0
+ FIDBR $5, F0, F0
+ FMOVD F0, ret+8(FP)
+ RET
diff --git a/src/math/gamma.go b/src/math/gamma.go
index 841ec11..cc9e869 100644
--- a/src/math/gamma.go
+++ b/src/math/gamma.go
@@ -91,23 +91,31 @@
}
// Gamma function computed by Stirling's formula.
-// The polynomial is valid for 33 <= x <= 172.
-func stirling(x float64) float64 {
+// The pair of results must be multiplied together to get the actual answer.
+// The multiplication is left to the caller so that, if careful, the caller can avoid
+// infinity for 172 <= x <= 180.
+// The polynomial is valid for 33 <= x <= 172; larger values are only used
+// in reciprocal and produce denormalized floats. The lower precision there
+// masks any imprecision in the polynomial.
+func stirling(x float64) (float64, float64) {
+ if x > 200 {
+ return Inf(1), 1
+ }
const (
SqrtTwoPi = 2.506628274631000502417
MaxStirling = 143.01608
)
w := 1 / x
w = 1 + w*((((_gamS[0]*w+_gamS[1])*w+_gamS[2])*w+_gamS[3])*w+_gamS[4])
- y := Exp(x)
+ y1 := Exp(x)
+ y2 := 1.0
if x > MaxStirling { // avoid Pow() overflow
v := Pow(x, 0.5*x-0.25)
- y = v * (v / y)
+ y1, y2 = v, v/y1
} else {
- y = Pow(x, x-0.5) / y
+ y1 = Pow(x, x-0.5) / y1
}
- y = SqrtTwoPi * y * w
- return y
+ return y1, SqrtTwoPi * w * y2
}
// Gamma returns the Gamma function of x.
@@ -125,22 +133,26 @@
switch {
case isNegInt(x) || IsInf(x, -1) || IsNaN(x):
return NaN()
+ case IsInf(x, 1):
+ return Inf(1)
case x == 0:
if Signbit(x) {
return Inf(-1)
}
return Inf(1)
- case x < -170.5674972726612 || x > 171.61447887182298:
- return Inf(1)
}
q := Abs(x)
p := Floor(q)
if q > 33 {
if x >= 0 {
- return stirling(x)
+ y1, y2 := stirling(x)
+ return y1 * y2
}
+ // Note: x is negative but (checked above) not a negative integer,
+ // so x must be small enough to be in range for conversion to int64.
+ // If |x| were >= 2⁶³ it would have to be an integer.
signgam := 1
- if ip := int(p); ip&1 == 0 {
+ if ip := int64(p); ip&1 == 0 {
signgam = -1
}
z := q - p
@@ -152,7 +164,14 @@
if z == 0 {
return Inf(signgam)
}
- z = Pi / (Abs(z) * stirling(q))
+ sq1, sq2 := stirling(q)
+ absz := Abs(z)
+ d := absz * sq1 * sq2
+ if IsInf(d, 0) {
+ z = Pi / absz / sq1 / sq2
+ } else {
+ z = Pi / d
+ }
return float64(signgam) * z
}
diff --git a/src/math/j0.go b/src/math/j0.go
index cbef7aa..fe26791 100644
--- a/src/math/j0.go
+++ b/src/math/j0.go
@@ -305,20 +305,20 @@
}
func pzero(x float64) float64 {
- var p [6]float64
- var q [5]float64
+ var p *[6]float64
+ var q *[5]float64
if x >= 8 {
- p = p0R8
- q = p0S8
+ p = &p0R8
+ q = &p0S8
} else if x >= 4.5454 {
- p = p0R5
- q = p0S5
+ p = &p0R5
+ q = &p0S5
} else if x >= 2.8571 {
- p = p0R3
- q = p0S3
+ p = &p0R3
+ q = &p0S3
} else if x >= 2 {
- p = p0R2
- q = p0S2
+ p = &p0R2
+ q = &p0S2
}
z := 1 / (x * x)
r := p[0] + z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))))
@@ -408,19 +408,19 @@
}
func qzero(x float64) float64 {
- var p, q [6]float64
+ var p, q *[6]float64
if x >= 8 {
- p = q0R8
- q = q0S8
+ p = &q0R8
+ q = &q0S8
} else if x >= 4.5454 {
- p = q0R5
- q = q0S5
+ p = &q0R5
+ q = &q0S5
} else if x >= 2.8571 {
- p = q0R3
- q = q0S3
+ p = &q0R3
+ q = &q0S3
} else if x >= 2 {
- p = q0R2
- q = q0S2
+ p = &q0R2
+ q = &q0S2
}
z := 1 / (x * x)
r := p[0] + z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))))
diff --git a/src/math/j1.go b/src/math/j1.go
index d359d90..f1adcb6 100644
--- a/src/math/j1.go
+++ b/src/math/j1.go
@@ -298,20 +298,20 @@
}
func pone(x float64) float64 {
- var p [6]float64
- var q [5]float64
+ var p *[6]float64
+ var q *[5]float64
if x >= 8 {
- p = p1R8
- q = p1S8
+ p = &p1R8
+ q = &p1S8
} else if x >= 4.5454 {
- p = p1R5
- q = p1S5
+ p = &p1R5
+ q = &p1S5
} else if x >= 2.8571 {
- p = p1R3
- q = p1S3
+ p = &p1R3
+ q = &p1S3
} else if x >= 2 {
- p = p1R2
- q = p1S2
+ p = &p1R2
+ q = &p1S2
}
z := 1 / (x * x)
r := p[0] + z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))))
@@ -401,19 +401,19 @@
}
func qone(x float64) float64 {
- var p, q [6]float64
+ var p, q *[6]float64
if x >= 8 {
- p = q1R8
- q = q1S8
+ p = &q1R8
+ q = &q1S8
} else if x >= 4.5454 {
- p = q1R5
- q = q1S5
+ p = &q1R5
+ q = &q1S5
} else if x >= 2.8571 {
- p = q1R3
- q = q1S3
+ p = &q1R3
+ q = &q1S3
} else if x >= 2 {
- p = q1R2
- q = q1S2
+ p = &q1R2
+ q = &q1S2
}
z := 1 / (x * x)
r := p[0] + z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))))
diff --git a/src/math/jn.go b/src/math/jn.go
index 721112f..3422782 100644
--- a/src/math/jn.go
+++ b/src/math/jn.go
@@ -174,7 +174,7 @@
q1 := w*z - 1
k := 1
for q1 < 1e9 {
- k += 1
+ k++
z += h
q0, q1 = q1, z*q1-q0
}
diff --git a/src/math/log10_s390x.s b/src/math/log10_s390x.s
new file mode 100644
index 0000000..460bcd9
--- /dev/null
+++ b/src/math/log10_s390x.s
@@ -0,0 +1,170 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Minimax polynomial coefficients and other constants
+DATA log10rodataL19<>+0(SB)/8, $0.000000000000000000E+00
+DATA log10rodataL19<>+8(SB)/8, $-1.0
+DATA log10rodataL19<>+16(SB)/8, $0x7FF8000000000000 //+NanN
+DATA log10rodataL19<>+24(SB)/8, $.15375570329280596749
+DATA log10rodataL19<>+32(SB)/8, $.60171950900703668594E+04
+DATA log10rodataL19<>+40(SB)/8, $-1.9578460454940795898
+DATA log10rodataL19<>+48(SB)/8, $0.78962633073318517310E-01
+DATA log10rodataL19<>+56(SB)/8, $-.71784211884836937993E-02
+DATA log10rodataL19<>+64(SB)/8, $0.87011165920689940661E-03
+DATA log10rodataL19<>+72(SB)/8, $-.11865158981621437541E-03
+DATA log10rodataL19<>+80(SB)/8, $0.17258413403018680410E-04
+DATA log10rodataL19<>+88(SB)/8, $0.40752932047883484315E-06
+DATA log10rodataL19<>+96(SB)/8, $-.26149194688832680410E-05
+DATA log10rodataL19<>+104(SB)/8, $0.92453396963875026759E-08
+DATA log10rodataL19<>+112(SB)/8, $-.64572084905921579630E-07
+DATA log10rodataL19<>+120(SB)/8, $-5.5
+DATA log10rodataL19<>+128(SB)/8, $18446744073709551616.
+GLOBL log10rodataL19<>+0(SB), RODATA, $136
+
+// Table of log10 correction terms
+DATA log10tab2074<>+0(SB)/8, $0.254164497922885069E-01
+DATA log10tab2074<>+8(SB)/8, $0.179018857989381839E-01
+DATA log10tab2074<>+16(SB)/8, $0.118926768029048674E-01
+DATA log10tab2074<>+24(SB)/8, $0.722595568238080033E-02
+DATA log10tab2074<>+32(SB)/8, $0.376393570022739135E-02
+DATA log10tab2074<>+40(SB)/8, $0.138901135928814326E-02
+DATA log10tab2074<>+48(SB)/8, $0
+DATA log10tab2074<>+56(SB)/8, $-0.490780466387818203E-03
+DATA log10tab2074<>+64(SB)/8, $-0.159811431402137571E-03
+DATA log10tab2074<>+72(SB)/8, $0.925796337165100494E-03
+DATA log10tab2074<>+80(SB)/8, $0.270683176738357035E-02
+DATA log10tab2074<>+88(SB)/8, $0.513079030821304758E-02
+DATA log10tab2074<>+96(SB)/8, $0.815089785397996303E-02
+DATA log10tab2074<>+104(SB)/8, $0.117253060262419215E-01
+DATA log10tab2074<>+112(SB)/8, $0.158164239345343963E-01
+DATA log10tab2074<>+120(SB)/8, $0.203903595489229786E-01
+GLOBL log10tab2074<>+0(SB), RODATA, $128
+
+// Log10 returns the decimal logarithm of the argument.
+//
+// Special cases are:
+// Log(+Inf) = +Inf
+// Log(0) = -Inf
+// Log(x < 0) = NaN
+// Log(NaN) = NaN
+// The algorithm used is minimax polynomial approximation
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT ·log10Asm(SB),NOSPLIT,$8-16
+ FMOVD x+0(FP), F0
+ MOVD $log10rodataL19<>+0(SB), R9
+ FMOVD F0, x-8(SP)
+ WORD $0xC0298006 //iilf %r2,2147909631
+ BYTE $0x7F
+ BYTE $0xFF
+ WORD $0x5840F008 //l %r4, 8(%r15)
+ SUBW R4, R2, R3
+ WORD $0xEC5320AF //risbg %r5,%r3,32,128+47,0
+ BYTE $0x00
+ BYTE $0x55
+ MOVH $0x0, R1
+ WORD $0xEC15001F //risbgn %r1,%r5,64-64+0,64-64+0+32-1,64-0-32
+ BYTE $0x20
+ BYTE $0x59
+ WORD $0xC0590016 //iilf %r5,1507327
+ BYTE $0xFF
+ BYTE $0xFF
+ MOVW R4, R10
+ MOVW R5, R11
+ CMPBLE R10, R11, L2
+ WORD $0xC0297FEF //iilf %r2,2146435071
+ BYTE $0xFF
+ BYTE $0xFF
+ MOVW R4, R10
+ MOVW R2, R11
+ CMPBLE R10, R11, L16
+L3:
+L1:
+ FMOVD F0, ret+8(FP)
+ RET
+
+L2:
+ WORD $0xB3120000 //ltdbr %f0,%f0
+ BLEU L13
+ WORD $0xED009080 //mdb %f0,.L20-.L19(%r9)
+ BYTE $0x00
+ BYTE $0x1C
+ FMOVD F0, x-8(SP)
+ WORD $0x5B20F008 //s %r2, 8(%r15)
+ WORD $0xEC3239BC //risbg %r3,%r2,57,128+60,64-13
+ BYTE $0x33
+ BYTE $0x55
+ ANDW $0xFFFF0000, R2
+ WORD $0xEC12001F //risbgn %r1,%r2,64-64+0,64-64+0+32-1,64-0-32
+ BYTE $0x20
+ BYTE $0x59
+ ADDW $0x4000000, R2
+ BLEU L17
+L8:
+ SRW $8, R2, R2
+ ORW $0x45000000, R2
+L4:
+ FMOVD log10rodataL19<>+120(SB), F2
+ WORD $0xB3C10041 //ldgr %f4,%r1
+ WFMADB V4, V0, V2, V0
+ FMOVD log10rodataL19<>+112(SB), F4
+ FMOVD log10rodataL19<>+104(SB), F6
+ WFMADB V0, V6, V4, V6
+ FMOVD log10rodataL19<>+96(SB), F4
+ FMOVD log10rodataL19<>+88(SB), F1
+ WFMADB V0, V1, V4, V1
+ WFMDB V0, V0, V4
+ FMOVD log10rodataL19<>+80(SB), F2
+ WFMADB V6, V4, V1, V6
+ FMOVD log10rodataL19<>+72(SB), F1
+ WFMADB V0, V2, V1, V2
+ FMOVD log10rodataL19<>+64(SB), F1
+ WORD $0xEC3339BC //risbg %r3,%r3,57,128+60,0
+ BYTE $0x00
+ BYTE $0x55
+ WFMADB V4, V6, V2, V6
+ FMOVD log10rodataL19<>+56(SB), F2
+ WFMADB V0, V1, V2, V1
+ VLVGF $0, R2, V2
+ WFMADB V4, V6, V1, V4
+ LDEBR F2, F2
+ FMOVD log10rodataL19<>+48(SB), F6
+ WFMADB V0, V4, V6, V4
+ FMOVD log10rodataL19<>+40(SB), F1
+ FMOVD log10rodataL19<>+32(SB), F6
+ MOVD $log10tab2074<>+0(SB), R1
+ WFMADB V2, V1, V6, V2
+ WORD $0x68331000 //ld %f3,0(%r3,%r1)
+ WFMADB V0, V4, V3, V0
+ FMOVD log10rodataL19<>+24(SB), F4
+ FMADD F4, F2, F0, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L16:
+ WORD $0xEC2328B7 //risbg %r2,%r3,40,128+55,64-8
+ BYTE $0x38
+ BYTE $0x55
+ WORD $0xEC3339BC //risbg %r3,%r3,57,128+60,64-13
+ BYTE $0x33
+ BYTE $0x55
+ ORW $0x45000000, R2
+ BR L4
+L13:
+ BGE L18 //jnl .L18
+ BVS L18
+ FMOVD log10rodataL19<>+16(SB), F0
+ BR L1
+L17:
+ SRAW $1, R2, R2
+ SUBW $0x40000000, R2
+ BR L8
+L18:
+ FMOVD log10rodataL19<>+8(SB), F0
+ WORD $0xED009000 //ddb %f0,.L36-.L19(%r9)
+ BYTE $0x00
+ BYTE $0x1D
+ BR L1
diff --git a/src/math/log1p.go b/src/math/log1p.go
index d1bddfb..b128a16 100644
--- a/src/math/log1p.go
+++ b/src/math/log1p.go
@@ -167,7 +167,7 @@
if iu < 0x0006a09e667f3bcd { // mantissa of Sqrt(2)
u = Float64frombits(iu | 0x3ff0000000000000) // normalize u
} else {
- k += 1
+ k++
u = Float64frombits(iu | 0x3fe0000000000000) // normalize u/2
iu = (0x0010000000000000 - iu) >> 2
}
@@ -179,10 +179,9 @@
if f == 0 {
if k == 0 {
return 0
- } else {
- c += float64(k) * Ln2Lo
- return float64(k)*Ln2Hi + c
}
+ c += float64(k) * Ln2Lo
+ return float64(k)*Ln2Hi + c
}
R = hfsq * (1.0 - 0.66666666666666666*f) // avoid division
if k == 0 {
diff --git a/src/math/modf_386.s b/src/math/modf_386.s
index d9b1eeb..e916073 100644
--- a/src/math/modf_386.s
+++ b/src/math/modf_386.s
@@ -7,16 +7,16 @@
// func Modf(f float64) (int float64, frac float64)
TEXT ·Modf(SB),NOSPLIT,$0
// special case for f == -0.0
- MOVL f+4(FP), DX // high word
- MOVL f+0(FP), AX // low word
+ MOVL f_hi+4(FP), DX // high word
+ MOVL f_lo+0(FP), AX // low word
CMPL DX, $(1<<31) // beginning of -0.0
JNE notNegativeZero
CMPL AX, $0 // could be denormalized
JNE notNegativeZero
- MOVL AX, int+8(FP)
- MOVL DX, int+12(FP)
- MOVL AX, frac+16(FP)
- MOVL DX, frac+20(FP)
+ MOVL AX, int_lo+8(FP)
+ MOVL DX, int_hi+12(FP)
+ MOVL AX, frac_lo+16(FP)
+ MOVL DX, frac_hi+20(FP)
RET
notNegativeZero:
FMOVD f+0(FP), F0 // F0=f
diff --git a/src/math/modf_arm64.s b/src/math/modf_arm64.s
new file mode 100644
index 0000000..7c70ef3
--- /dev/null
+++ b/src/math/modf_arm64.s
@@ -0,0 +1,18 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func Modf(f float64) (int float64, frac float64)
+TEXT ·Modf(SB),NOSPLIT,$0
+ MOVD f+0(FP), R0
+ FMOVD R0, F0
+ FRINTZD F0, F1
+ FMOVD F1, int+8(FP)
+ FSUBD F1, F0
+ FMOVD F0, R1
+ AND $(1<<63), R0
+ ORR R0, R1 // must have same sign
+ MOVD R1, frac+16(FP)
+ RET
diff --git a/src/math/rand/gen_cooked.go b/src/math/rand/gen_cooked.go
new file mode 100644
index 0000000..567b7a8
--- /dev/null
+++ b/src/math/rand/gen_cooked.go
@@ -0,0 +1,89 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// This program computes the value of rng_cooked in rng.go,
+// which is used for seeding all instances of rand.Source.
+// a 64bit and a 63bit version of the array is printed to
+// the standard output.
+
+package main
+
+import "fmt"
+
+const (
+ length = 607
+ tap = 273
+ mask = (1 << 63) - 1
+ a = 48271
+ m = (1 << 31) - 1
+ q = 44488
+ r = 3399
+)
+
+var (
+ rngVec [length]int64
+ rngTap, rngFeed int
+)
+
+func seedrand(x int32) int32 {
+ hi := x / q
+ lo := x % q
+ x = a*lo - r*hi
+ if x < 0 {
+ x += m
+ }
+ return x
+}
+
+func srand(seed int32) {
+ rngTap = 0
+ rngFeed = length - tap
+ seed %= m
+ if seed < 0 {
+ seed += m
+ } else if seed == 0 {
+ seed = 89482311
+ }
+ x := seed
+ for i := -20; i < length; i++ {
+ x = seedrand(x)
+ if i >= 0 {
+ var u int64
+ u = int64(x) << 20
+ x = seedrand(x)
+ u ^= int64(x) << 10
+ x = seedrand(x)
+ u ^= int64(x)
+ rngVec[i] = u
+ }
+ }
+}
+
+func vrand() int64 {
+ rngTap--
+ if rngTap < 0 {
+ rngTap += length
+ }
+ rngFeed--
+ if rngFeed < 0 {
+ rngFeed += length
+ }
+ x := (rngVec[rngFeed] + rngVec[rngTap])
+ rngVec[rngFeed] = x
+ return x
+}
+
+func main() {
+ srand(1)
+ for i := uint64(0); i < 7.8e12; i++ {
+ vrand()
+ }
+ fmt.Printf("rngVec after 7.8e12 calls to vrand:\n%#v\n", rngVec)
+ for i := range rngVec {
+ rngVec[i] &= mask
+ }
+ fmt.Printf("lower 63bit of rngVec after 7.8e12 calls to vrand:\n%#v\n", rngVec)
+}
diff --git a/src/math/rand/race_test.go b/src/math/rand/race_test.go
index 48f6c29..186c716 100644
--- a/src/math/rand/race_test.go
+++ b/src/math/rand/race_test.go
@@ -33,6 +33,7 @@
seed += int64(Int63n(Int63()))
seed += int64(NormFloat64())
seed += int64(Uint32())
+ seed += int64(Uint64())
for _, p := range Perm(10) {
seed += int64(p)
}
diff --git a/src/math/rand/rand.go b/src/math/rand/rand.go
index dd8d43c..9fe1cbd 100644
--- a/src/math/rand/rand.go
+++ b/src/math/rand/rand.go
@@ -23,7 +23,20 @@
Seed(seed int64)
}
+// A Source64 is a Source that can also generate
+// uniformly-distributed pseudo-random uint64 values in
+// the range [0, 1<<64) directly.
+// If a Rand r's underlying Source s implements Source64,
+// then r.Uint64 returns the result of one call to s.Uint64
+// instead of making two calls to s.Int63.
+type Source64 interface {
+ Source
+ Uint64() uint64
+}
+
// NewSource returns a new pseudo-random Source seeded with the given value.
+// Unlike the default Source used by top-level functions, this source is not
+// safe for concurrent use by multiple goroutines.
func NewSource(seed int64) Source {
var rng rngSource
rng.Seed(seed)
@@ -33,6 +46,7 @@
// A Rand is a source of random numbers.
type Rand struct {
src Source
+ s64 Source64 // non-nil if src is source64
// readVal contains remainder of 63-bit integer used for bytes
// generation during most recent Read call.
@@ -46,7 +60,10 @@
// New returns a new Rand that uses random values from src
// to generate other random values.
-func New(src Source) *Rand { return &Rand{src: src} }
+func New(src Source) *Rand {
+ s64, _ := src.(Source64)
+ return &Rand{src: src, s64: s64}
+}
// Seed uses the provided seed value to initialize the generator to a deterministic state.
// Seed should not be called concurrently with any other Rand method.
@@ -66,6 +83,14 @@
// Uint32 returns a pseudo-random 32-bit value as a uint32.
func (r *Rand) Uint32() uint32 { return uint32(r.Int63() >> 31) }
+// Uint64 returns a pseudo-random 64-bit value as a uint64.
+func (r *Rand) Uint64() uint64 {
+ if r.s64 != nil {
+ return r.s64.Uint64()
+ }
+ return uint64(r.Int63())>>31 | uint64(r.Int63())<<32
+}
+
// Int31 returns a non-negative pseudo-random 31-bit integer as an int32.
func (r *Rand) Int31() int32 { return int32(r.Int63() >> 32) }
@@ -207,7 +232,7 @@
* Top-level convenience functions
*/
-var globalRand = New(&lockedSource{src: NewSource(1)})
+var globalRand = New(&lockedSource{src: NewSource(1).(Source64)})
// Seed uses the provided seed value to initialize the default Source to a
// deterministic state. If Seed is not called, the generator behaves as
@@ -224,6 +249,10 @@
// from the default Source.
func Uint32() uint32 { return globalRand.Uint32() }
+// Uint64 returns a pseudo-random 64-bit value as a uint64
+// from the default Source.
+func Uint64() uint64 { return globalRand.Uint64() }
+
// Int31 returns a non-negative pseudo-random 31-bit integer as an int32
// from the default Source.
func Int31() int32 { return globalRand.Int31() }
@@ -286,7 +315,7 @@
type lockedSource struct {
lk sync.Mutex
- src Source
+ src Source64
}
func (r *lockedSource) Int63() (n int64) {
@@ -296,6 +325,13 @@
return
}
+func (r *lockedSource) Uint64() (n uint64) {
+ r.lk.Lock()
+ n = r.src.Uint64()
+ r.lk.Unlock()
+ return
+}
+
func (r *lockedSource) Seed(seed int64) {
r.lk.Lock()
r.src.Seed(seed)
diff --git a/src/math/rand/rand_test.go b/src/math/rand/rand_test.go
index 6f31279..bf509e0 100644
--- a/src/math/rand/rand_test.go
+++ b/src/math/rand/rand_test.go
@@ -328,13 +328,26 @@
}
}
+func hasSlowFloatingPoint() bool {
+ switch runtime.GOARCH {
+ case "arm":
+ return os.Getenv("GOARM") == "5"
+ case "mips", "mipsle", "mips64", "mips64le":
+ // Be conservative and assume that all mips boards
+ // have emulated floating point.
+ // TODO: detect what it actually has.
+ return true
+ }
+ return false
+}
+
func TestFloat32(t *testing.T) {
// For issue 6721, the problem came after 7533753 calls, so check 10e6.
num := int(10e6)
// But do the full amount only on builders (not locally).
// But ARM5 floating point emulation is slow (Issue 10749), so
// do less for that builder:
- if testing.Short() && (testenv.Builder() == "" || runtime.GOARCH == "arm" && os.Getenv("GOARM") == "5") {
+ if testing.Short() && (testenv.Builder() == "" || hasSlowFloatingPoint()) {
num /= 100 // 1.72 seconds instead of 172 seconds
}
diff --git a/src/math/rand/regress_test.go b/src/math/rand/regress_test.go
index 4dd965c..e31e6c5 100644
--- a/src/math/rand/regress_test.go
+++ b/src/math/rand/regress_test.go
@@ -381,4 +381,24 @@
uint32(75079301), // Uint32()
uint32(3380456901), // Uint32()
uint32(3433369789), // Uint32()
+ uint64(8717895732742165505), // Uint64()
+ uint64(2259404117704393152), // Uint64()
+ uint64(6050128673802995827), // Uint64()
+ uint64(9724605487393973602), // Uint64()
+ uint64(12613765599614152010), // Uint64()
+ uint64(11893357769247901871), // Uint64()
+ uint64(1774932891286980153), // Uint64()
+ uint64(15267744271532198264), // Uint64()
+ uint64(17498302081433670737), // Uint64()
+ uint64(1543572285742637646), // Uint64()
+ uint64(11885104867954719224), // Uint64()
+ uint64(17548432336275752516), // Uint64()
+ uint64(7837839688282259259), // Uint64()
+ uint64(2518412263346885298), // Uint64()
+ uint64(5617773211005988520), // Uint64()
+ uint64(11562935753659892057), // Uint64()
+ uint64(16368296284793757383), // Uint64()
+ uint64(161231572858529631), // Uint64()
+ uint64(16482847956365694147), // Uint64()
+ uint64(16596477517051940556), // Uint64()
}
diff --git a/src/math/rand/rng.go b/src/math/rand/rng.go
index 947c49f..f922417 100644
--- a/src/math/rand/rng.go
+++ b/src/math/rand/rng.go
@@ -23,161 +23,159 @@
)
var (
- // cooked random numbers
- // the state of the rng
- // after 780e10 iterations
+ // Used for seeding. See gen_cooked.go for details.
rng_cooked [_LEN]int64 = [...]int64{
- 5041579894721019882, 4646389086726545243, 1395769623340756751, 5333664234075297259,
- 2875692520355975054, 9033628115061424579, 7143218595135194537, 4812947590706362721,
- 7937252194349799378, 5307299880338848416, 8209348851763925077, 2115741599318814044,
- 4593015457530856296, 8140875735541888011, 3319429241265089026, 8619815648190321034,
- 1727074043483619500, 113108499721038619, 4569519971459345583, 5062833859075314731,
- 2387618771259064424, 2716131344356686112, 6559392774825876886, 7650093201692370310,
- 7684323884043752161, 257867835996031390, 6593456519409015164, 271327514973697897,
- 2789386447340118284, 1065192797246149621, 3344507881999356393, 4459797941780066633,
- 7465081662728599889, 1014950805555097187, 4449440729345990775, 3481109366438502643,
+ -4181792142133755926, -4576982950128230565, 1395769623340756751, 5333664234075297259,
+ -6347679516498800754, 9033628115061424579, 7143218595135194537, 4812947590706362721,
+ 7937252194349799378, 5307299880338848416, 8209348851763925077, -7107630437535961764,
+ 4593015457530856296, 8140875735541888011, -5903942795589686782, -603556388664454774,
+ -7496297993371156308, 113108499721038619, 4569519971459345583, -4160538177779461077,
+ -6835753265595711384, -6507240692498089696, 6559392774825876886, 7650093201692370310,
+ 7684323884043752161, -8965504200858744418, -2629915517445760644, 271327514973697897,
+ -6433985589514657524, 1065192797246149621, 3344507881999356393, -4763574095074709175,
+ 7465081662728599889, 1014950805555097187, -4773931307508785033, -5742262670416273165,
2418672789110888383, 5796562887576294778, 4484266064449540171, 3738982361971787048,
- 4523597184512354423, 10530508058128498, 8633833783282346118, 2625309929628791628,
- 8660405965245884302, 10162832508971942, 6540714680961817391, 7031802312784620857,
- 6240911277345944669, 831864355460801054, 8004434137542152891, 2116287251661052151,
+ -4699774852342421385, 10530508058128498, -589538253572429690, -6598062107225984180,
+ 8660405965245884302, 10162832508971942, -2682657355892958417, 7031802312784620857,
+ 6240911277345944669, 831864355460801054, -1218937899312622917, 2116287251661052151,
2202309800992166967, 9161020366945053561, 4069299552407763864, 4936383537992622449,
- 457351505131524928, 342195045928179354, 2847771682816600509, 2068020115986376518,
- 4368649989588021065, 887231587095185257, 5563591506886576496, 6816225200251950296,
- 5616972787034086048, 8471809303394836566, 1686575021641186857, 4045484338074262002,
- 4244156215201778923, 7848217333783577387, 5632136521049761902, 833283142057835272,
- 9029726508369077193, 3243583134664087292, 4316371101804477087, 8937849979965997980,
- 6446940406810434101, 1679342092332374735, 6050638460742422078, 6993520719509581582,
- 7640877852514293609, 5881353426285907985, 812786550756860885, 4541845584483343330,
- 2725470216277009086, 4980675660146853729, 5210769080603236061, 8894283318990530821,
- 6326442804750084282, 1495812843684243920, 7069751578799128019, 7370257291860230865,
- 6756929275356942261, 4706794511633873654, 7824520467827898663, 8549875090542453214,
- 33650829478596156, 1328918435751322643, 7297902601803624459, 1011190183918857495,
- 2238025036817854944, 5147159997473910359, 896512091560522982, 2659470849286379941,
- 6097729358393448602, 1731725986304753684, 4106255841983812711, 8327155210721535508,
- 8477511620686074402, 5803876044675762232, 8435417780860221662, 5988852856651071244,
- 4715837297103951910, 7566171971264485114, 505808562678895611, 5070098180695063370,
- 842110666775871513, 572156825025677802, 1791881013492340891, 3393267094866038768,
- 3778721850472236509, 2352769483186201278, 1292459583847367458, 8897907043675088419,
- 5781809037144163536, 2733958794029492513, 5092019688680754699, 8996124554772526841,
- 4234737173186232084, 5027558287275472836, 4635198586344772304, 8687338893267139351,
- 5907508150730407386, 784756255473944452, 972392927514829904, 5422057694808175112,
- 5158420642969283891, 9048531678558643225, 2407211146698877100, 7583282216521099569,
- 3940796514530962282, 3341174631045206375, 3095313889586102949, 7405321895688238710,
- 5832080132947175283, 7890064875145919662, 8184139210799583195, 1149859861409226130,
- 1464597243840211302, 4641648007187991873, 3516491885471466898, 956288521791657692,
+ 457351505131524928, -8881176990926596454, -6375600354038175299, -7155351920868399290,
+ 4368649989588021065, 887231587095185257, -3659780529968199312, -2407146836602825512,
+ 5616972787034086048, -751562733459939242, 1686575021641186857, -5177887698780513806,
+ -4979215821652996885, -1375154703071198421, 5632136521049761902, -8390088894796940536,
+ -193645528485698615, -5979788902190688516, -4907000935050298721, -285522056888777828,
+ -2776431630044341707, 1679342092332374735, 6050638460742422078, -2229851317345194226,
+ -1582494184340482199, 5881353426285907985, 812786550756860885, 4541845584483343330,
+ -6497901820577766722, 4980675660146853729, -4012602956251539747, -329088717864244987,
+ -2896929232104691526, 1495812843684243920, -2153620458055647789, 7370257291860230865,
+ -2466442761497833547, 4706794511633873654, -1398851569026877145, 8549875090542453214,
+ -9189721207376179652, -7894453601103453165, 7297902601803624459, 1011190183918857495,
+ -6985347000036920864, 5147159997473910359, -8326859945294252826, 2659470849286379941,
+ 6097729358393448602, -7491646050550022124, -5117116194870963097, -896216826133240300,
+ -745860416168701406, 5803876044675762232, -787954255994554146, -3234519180203704564,
+ -4507534739750823898, -1657200065590290694, 505808562678895611, -4153273856159712438,
+ -8381261370078904295, 572156825025677802, 1791881013492340891, 3393267094866038768,
+ -5444650186382539299, 2352769483186201278, -7930912453007408350, -325464993179687389,
+ -3441562999710612272, -6489413242825283295, 5092019688680754699, -227247482082248967,
+ 4234737173186232084, 5027558287275472836, 4635198586344772304, -536033143587636457,
+ 5907508150730407386, -8438615781380831356, 972392927514829904, -3801314342046600696,
+ -4064951393885491917, -174840358296132583, 2407211146698877100, -1640089820333676239,
+ 3940796514530962282, -5882197405809569433, 3095313889586102949, -1818050141166537098,
+ 5832080132947175283, 7890064875145919662, 8184139210799583195, -8073512175445549678,
+ -7758774793014564506, -4581724029666783935, 3516491885471466898, -8267083515063118116,
6657089965014657519, 5220884358887979358, 1796677326474620641, 5340761970648932916,
1147977171614181568, 5066037465548252321, 2574765911837859848, 1085848279845204775,
- 3350107529868390359, 6116438694366558490, 2107701075971293812, 1803294065921269267,
- 2469478054175558874, 7368243281019965984, 3791908367843677526, 185046971116456637,
- 2257095756513439648, 7217693971077460129, 909049953079504259, 7196649268545224266,
- 5637660345400869599, 3955544945427965183, 8057528650917418961, 4139268440301127643,
- 6621926588513568059, 1373361136802681441, 6527366231383600011, 3507654575162700890,
- 9202058512774729859, 1954818376891585542, 6640380907130175705, 8299563319178235687,
- 3901867355218954373, 7046310742295574065, 6847195391333990232, 1572638100518868053,
- 8850422670118399721, 3631909142291992901, 5158881091950831288, 2882958317343121593,
- 4763258931815816403, 6280052734341785344, 4243789408204964850, 2043464728020827976,
- 6545300466022085465, 4562580375758598164, 5495451168795427352, 1738312861590151095,
- 553004618757816492, 6895160632757959823, 8233623922264685171, 7139506338801360852,
- 8550891222387991669, 5535668688139305547, 2430933853350256242, 5401941257863201076,
- 8159640039107728799, 6157493831600770366, 7632066283658143750, 6308328381617103346,
+ -5873264506986385449, 6116438694366558490, 2107701075971293812, -7420077970933506541,
+ 2469478054175558874, -1855128755834809824, -5431463669011098282, -9038325065738319171,
+ -6966276280341336160, 7217693971077460129, -8314322083775271549, 7196649268545224266,
+ -3585711691453906209, -5267827091426810625, 8057528650917418961, -5084103596553648165,
+ -2601445448341207749, -7850010900052094367, 6527366231383600011, 3507654575162700890,
+ 9202058512774729859, 1954818376891585542, -2582991129724600103, 8299563319178235687,
+ -5321504681635821435, 7046310742295574065, -2376176645520785576, -7650733936335907755,
+ 8850422670118399721, 3631909142291992901, 5158881091950831288, -6340413719511654215,
+ 4763258931815816403, 6280052734341785344, -4979582628649810958, 2043464728020827976,
+ -2678071570832690343, 4562580375758598164, 5495451168795427352, -7485059175264624713,
+ 553004618757816492, 6895160632757959823, -989748114590090637, 7139506338801360852,
+ -672480814466784139, 5535668688139305547, 2430933853350256242, -3821430778991574732,
+ -1063731997747047009, -3065878205254005442, 7632066283658143750, 6308328381617103346,
3681878764086140361, 3289686137190109749, 6587997200611086848, 244714774258135476,
- 4079788377417136100, 8090302575944624335, 2945117363431356361, 864324395848741045,
- 3009039260312620700, 8430027460082534031, 401084700045993341, 7254622446438694921,
- 4707864159563588614, 5640248530963493951, 5982507712689997893, 3315098242282210105,
- 5503847578771918426, 3941971367175193882, 8118566580304798074, 3839261274019871296,
- 7062410411742090847, 741381002980207668, 6027994129690250817, 2497829994150063930,
- 6251390334426228834, 1368930247903518833, 8809096399316380241, 6492004350391900708,
- 2462145737463489636, 404828418920299174, 4153026434231690595, 261785715255475940,
- 5464715384600071357, 592710404378763017, 6764129236657751224, 8513655718539357449,
- 5820343663801914208, 385298524683789911, 5224135003438199467, 6303131641338802145,
- 7150122561309371392, 368107899140673753, 3115186834558311558, 2915636353584281051,
+ -5143583659437639708, 8090302575944624335, 2945117363431356361, -8359047641006034763,
+ 3009039260312620700, -793344576772241777, 401084700045993341, -1968749590416080887,
+ 4707864159563588614, -3583123505891281857, -3240864324164777915, -5908273794572565703,
+ -3719524458082857382, -5281400669679581926, 8118566580304798074, 3839261274019871296,
+ 7062410411742090847, -8481991033874568140, 6027994129690250817, -6725542042704711878,
+ -2971981702428546974, -7854441788951256975, 8809096399316380241, 6492004350391900708,
+ 2462145737463489636, -8818543617934476634, -5070345602623085213, -8961586321599299868,
+ -3758656652254704451, -8630661632476012791, 6764129236657751224, -709716318315418359,
+ -3403028373052861600, -8838073512170985897, -3999237033416576341, -2920240395515973663,
+ -2073249475545404416, 368107899140673753, -6108185202296464250, -6307735683270494757,
4782583894627718279, 6718292300699989587, 8387085186914375220, 3387513132024756289,
- 4654329375432538231, 8930667561363381602, 5374373436876319273, 7623042350483453954,
- 7725442901813263321, 9186225467561587250, 4091027289597503355, 2357631606492579800,
- 2530936820058611833, 1636551876240043639, 5564664674334965799, 1452244145334316253,
- 2061642381019690829, 1279580266495294036, 9108481583171221009, 6023278686734049809,
- 5007630032676973346, 2153168792952589781, 6720334534964750538, 6041546491134794105,
- 3433922409283786309, 2285479922797300912, 3110614940896576130, 6366559590722842893,
- 5418791419666136509, 7163298419643543757, 4891138053923696990, 580618510277907015,
- 1684034065251686769, 4429514767357295841, 330346578555450005, 1119637995812174675,
- 7177515271653460134, 4589042248470800257, 7693288629059004563, 143607045258444228,
- 246994305896273627, 866417324803099287, 6473547110565816071, 3092379936208876896,
- 2058427839513754051, 5133784708526867938, 8785882556301281247, 6149332666841167611,
- 8585842181454472135, 6137678347805511274, 2070447184436970006, 5708223427705576541,
- 5999657892458244504, 4358391411789012426, 325123008708389849, 6837621693887290924,
- 4843721905315627004, 6010651222149276415, 5398352198963874652, 4602025990114250980,
- 1044646352569048800, 9106614159853161675, 829256115228593269, 4919284369102997000,
- 2681532557646850893, 3681559472488511871, 5307999518958214035, 6334130388442829274,
- 2658708232916537604, 1163313865052186287, 581945337509520675, 3648778920718647903,
- 4423673246306544414, 1620799783996955743, 220828013409515943, 8150384699999389761,
- 4287360518296753003, 4590000184845883843, 5513660857261085186, 6964829100392774275,
- 478991688350776035, 8746140185685648781, 228500091334420247, 1356187007457302238,
- 3019253992034194581, 3152601605678500003, 430152752706002213, 5559581553696971176,
- 4916432985369275664, 663574931734554391, 3420773838927732076, 2868348622579915573,
- 1999319134044418520, 3328689518636282723, 2587672709781371173, 1517255313529399333,
- 3092343956317362483, 3662252519007064108, 972445599196498113, 7664865435875959367,
- 1708913533482282562, 6917817162668868494, 3217629022545312900, 2570043027221707107,
- 8739788839543624613, 2488075924621352812, 4694002395387436668, 4559628481798514356,
+ 4654329375432538231, -292704475491394206, -3848998599978456535, 7623042350483453954,
+ 7725442901813263321, 9186225467561587250, -5132344747257272453, -6865740430362196008,
+ 2530936820058611833, 1636551876240043639, -3658707362519810009, 1452244145334316253,
+ -7161729655835084979, -7943791770359481772, 9108481583171221009, -3200093350120725999,
+ 5007630032676973346, 2153168792952589781, 6720334534964750538, -3181825545719981703,
+ 3433922409283786309, 2285479922797300912, 3110614940896576130, -2856812446131932915,
+ -3804580617188639299, 7163298419643543757, 4891138053923696990, 580618510277907015,
+ 1684034065251686769, 4429514767357295841, -8893025458299325803, -8103734041042601133,
+ 7177515271653460134, 4589042248470800257, -1530083407795771245, 143607045258444228,
+ 246994305896273627, -8356954712051676521, 6473547110565816071, 3092379936208876896,
+ 2058427839513754051, -4089587328327907870, 8785882556301281247, -3074039370013608197,
+ -637529855400303673, 6137678347805511274, -7152924852417805802, 5708223427705576541,
+ -3223714144396531304, 4358391411789012426, 325123008708389849, 6837621693887290924,
+ 4843721905315627004, -3212720814705499393, -3825019837890901156, 4602025990114250980,
+ 1044646352569048800, 9106614159853161675, -8394115921626182539, -4304087667751778808,
+ 2681532557646850893, 3681559472488511871, -3915372517896561773, -2889241648411946534,
+ -6564663803938238204, -8060058171802589521, 581945337509520675, 3648778920718647903,
+ -4799698790548231394, -7602572252857820065, 220828013409515943, -1072987336855386047,
+ 4287360518296753003, -4633371852008891965, 5513660857261085186, -2258542936462001533,
+ -8744380348503999773, 8746140185685648781, 228500091334420247, 1356187007457302238,
+ 3019253992034194581, 3152601605678500003, -8793219284148773595, 5559581553696971176,
+ 4916432985369275664, -8559797105120221417, -5802598197927043732, 2868348622579915573,
+ -7224052902810357288, -5894682518218493085, 2587672709781371173, -7706116723325376475,
+ 3092343956317362483, -5561119517847711700, 972445599196498113, -1558506600978816441,
+ 1708913533482282562, -2305554874185907314, -6005743014309462908, -6653329009633068701,
+ -483583197311151195, 2488075924621352812, -4529369641467339140, -4663743555056261452,
2997203966153298104, 1282559373026354493, 240113143146674385, 8665713329246516443,
- 628141331766346752, 4571950817186770476, 1472811188152235408, 7596648026010355826,
- 6091219417754424743, 7834161864828164065, 7103445518877254909, 4390861237357459201,
- 4442653864240571734, 8903482404847331368, 622261699494173647, 6037261250297213248,
- 504404948065709118, 7275215526217113061, 1011176780856001400, 2194750105623461063,
- 2623071828615234808, 5157313728073836108, 3738405111966602044, 2539767524076729570,
- 2467284396349269342, 5256026990536851868, 7841086888628396109, 6640857538655893162,
- 1202087339038317498, 2113514992440715978, 7534350895342931403, 4925284734898484745,
- 5145623771477493805, 8225140880134972332, 2719520354384050532, 9132346697815513771,
- 4332154495710163773, 7137789594094346916, 6994721091344268833, 6667228574869048934,
- 655440045726677499, 59934747298466858, 6124974028078036405, 8957774780655365418,
- 2332206071942466437, 1701056712286369627, 3154897383618636503, 1637766181387607527,
- 2460521277767576533, 197309393502684135, 643677854385267315, 2543179307861934850,
- 4350769010207485119, 4754652089410667672, 2015595502641514512, 7999059458976458608,
- 4287946071480840813, 8362686366770308971, 6486469209321732151, 3617727845841796026,
- 7554353525834302244, 4450022655153542367, 1605195740213535749, 5327014565305508387,
- 4626575813550328320, 2692222020597705149, 241045573717249868, 5098046974627094010,
- 7916882295460730264, 884817090297530579, 5329160409530630596, 7790979528857726136,
- 4955070238059373407, 4918537275422674302, 3008076183950404629, 3007769226071157901,
- 2470346235617803020, 8928702772696731736, 7856187920214445904, 4474874585391974885,
- 7900176660600710914, 2140571127916226672, 2425445057265199971, 2486055153341847830,
- 4186670094382025798, 1883939007446035042, 8808666044074867985, 3734134241178479257,
- 4065968871360089196, 6953124200385847784, 1305686814738899057, 1637739099014457647,
- 3656125660947993209, 3966759634633167020, 3106378204088556331, 6328899822778449810,
- 4565385105440252958, 1979884289539493806, 2331793186920865425, 3783206694208922581,
- 8464961209802336085, 2843963751609577687, 3030678195484896323, 4793717574095772604,
+ 628141331766346752, -4651421219668005332, -7750560848702540400, 7596648026010355826,
+ -3132152619100351065, 7834161864828164065, 7103445518877254909, 4390861237357459201,
+ -4780718172614204074, -319889632007444440, 622261699494173647, -3186110786557562560,
+ -8718967088789066690, -1948156510637662747, -8212195255998774408, -7028621931231314745,
+ 2623071828615234808, -4066058308780939700, -5484966924888173764, -6683604512778046238,
+ -6756087640505506466, 5256026990536851868, 7841086888628396109, 6640857538655893162,
+ -8021284697816458310, -7109857044414059830, -1689021141511844405, -4298087301956291063,
+ -4077748265377282003, -998231156719803476, 2719520354384050532, 9132346697815513771,
+ 4332154495710163773, -2085582442760428892, 6994721091344268833, -2556143461985726874,
+ -8567931991128098309, 59934747298466858, -3098398008776739403, -265597256199410390,
+ 2332206071942466437, -7522315324568406181, 3154897383618636503, -7585605855467168281,
+ -6762850759087199275, 197309393502684135, -8579694182469508493, 2543179307861934850,
+ 4350769010207485119, -4468719947444108136, -7207776534213261296, -1224312577878317200,
+ 4287946071480840813, 8362686366770308971, 6486469209321732151, -5605644191012979782,
+ -1669018511020473564, 4450022655153542367, -7618176296641240059, -3896357471549267421,
+ -4596796223304447488, -6531150016257070659, -8982326463137525940, -4125325062227681798,
+ -1306489741394045544, -8338554946557245229, 5329160409530630596, 7790979528857726136,
+ 4955070238059373407, -4304834761432101506, -6215295852904371179, 3007769226071157901,
+ -6753025801236972788, 8928702772696731736, 7856187920214445904, -4748497451462800923,
+ 7900176660600710914, -7082800908938549136, -6797926979589575837, -6737316883512927978,
+ 4186670094382025798, 1883939007446035042, -414705992779907823, 3734134241178479257,
+ 4065968871360089196, 6953124200385847784, -7917685222115876751, -7585632937840318161,
+ -5567246375906782599, -5256612402221608788, 3106378204088556331, -2894472214076325998,
+ 4565385105440252958, 1979884289539493806, -6891578849933910383, 3783206694208922581,
+ 8464961209802336085, 2843963751609577687, 3030678195484896323, -4429654462759003204,
4459239494808162889, 402587895800087237, 8057891408711167515, 4541888170938985079,
- 1042662272908816815, 5557303057122568958, 2647678726283249984, 2144477441549833761,
- 5806352215355387087, 7117771003473903623, 5916597177708541638, 462597715452321361,
- 8833658097025758785, 5970273481425315300, 563813119381731307, 2768349550652697015,
- 1598828206250873866, 5206393647403558110, 6235043485709261823, 3152217402014639496,
- 8469693267274066490, 125672920241807416, 5311079624024060938, 6663754932310491587,
- 8736848295048751716, 4488039774992061878, 5923302823487327109, 140891791083103236,
- 7414942793393574290, 7990420780896957397, 4317817392807076702, 3625184369705367340,
- 2740722765288122703, 5743100009702758344, 5997898640509039159, 8854493341352484163,
- 5242208035432907801, 701338899890987198, 7609280429197514109, 3020985755112334161,
- 6651322707055512866, 2635195723621160615, 5144520864246028816, 1035086515727829828,
- 1567242097116389047, 8172389260191636581, 6337820351429292273, 2163012566996458925,
- 2743190902890262681, 1906367633221323427, 6011544915663598137, 5932255307352610768,
- 2241128460406315459, 895504896216695588, 3094483003111372717, 4583857460292963101,
- 9079887171656594975, 8839289181930711403, 5762740387243057873, 4225072055348026230,
- 1838220598389033063, 3801620336801580414, 8823526620080073856, 1776617605585100335,
- 7899055018877642622, 5421679761463003041, 5521102963086275121, 4248279443559365898,
- 8735487530905098534, 1760527091573692978, 7142485049657745894, 8222656872927218123,
- 4969531564923704323, 3394475942196872480, 6424174453260338141, 359248545074932887,
- 3273651282831730598, 6797106199797138596, 3030918217665093212, 145600834617314036,
- 6036575856065626233, 740416251634527158, 7080427635449935582, 6951781370868335478,
- 399922722363687927, 294902314447253185, 7844950936339178523, 880320858634709042,
- 6192655680808675579, 411604686384710388, 9026808440365124461, 6440783557497587732,
+ 1042662272908816815, -3666068979732206850, 2647678726283249984, 2144477441549833761,
+ -3417019821499388721, -2105601033380872185, 5916597177708541638, -8760774321402454447,
+ 8833658097025758785, 5970273481425315300, 563813119381731307, -6455022486202078793,
+ 1598828206250873866, -4016978389451217698, -2988328551145513985, -6071154634840136312,
+ 8469693267274066490, 125672920241807416, -3912292412830714870, -2559617104544284221,
+ -486523741806024092, -4735332261862713930, 5923302823487327109, -9082480245771672572,
+ -1808429243461201518, 7990420780896957397, 4317817392807076702, 3625184369705367340,
+ -6482649271566653105, -3480272027152017464, -3225473396345736649, -368878695502291645,
+ -3981164001421868007, -8522033136963788610, 7609280429197514109, 3020985755112334161,
+ -2572049329799262942, 2635195723621160615, 5144520864246028816, -8188285521126945980,
+ 1567242097116389047, 8172389260191636581, -2885551685425483535, -7060359469858316883,
+ -6480181133964513127, -7317004403633452381, 6011544915663598137, 5932255307352610768,
+ 2241128460406315459, -8327867140638080220, 3094483003111372717, 4583857460292963101,
+ 9079887171656594975, -384082854924064405, -3460631649611717935, 4225072055348026230,
+ -7385151438465742745, 3801620336801580414, -399845416774701952, -7446754431269675473,
+ 7899055018877642622, 5421679761463003041, 5521102963086275121, -4975092593295409910,
+ 8735487530905098534, -7462844945281082830, -2080886987197029914, -1000715163927557685,
+ -4253840471931071485, -5828896094657903328, 6424174453260338141, 359248545074932887,
+ -5949720754023045210, -2426265837057637212, 3030918217665093212, -9077771202237461772,
+ -3186796180789149575, 740416251634527158, -2142944401404840226, 6951781370868335478,
+ 399922722363687927, -8928469722407522623, -1378421100515597285, -8343051178220066766,
+ -3030716356046100229, -8811767350470065420, 9026808440365124461, 6440783557497587732,
4615674634722404292, 539897290441580544, 2096238225866883852, 8751955639408182687,
- 1907224908052289603, 7381039757301768559, 6157238513393239656, 7749994231914157575,
+ -7316147128802486205, 7381039757301768559, 6157238513393239656, -1473377804940618233,
8629571604380892756, 5280433031239081479, 7101611890139813254, 2479018537985767835,
- 7169176924412769570, 7942066497793203302, 1357759729055557688, 2278447439451174845,
- 3625338785743880657, 6477479539006708521, 8976185375579272206, 5511371554711836120,
+ 7169176924412769570, -1281305539061572506, -7865612307799218120, 2278447439451174845,
+ 3625338785743880657, 6477479539006708521, 8976185375579272206, -3712000482142939688,
1326024180520890843, 7537449876596048829, 5464680203499696154, 3189671183162196045,
- 6346751753565857109, 241159987320630307, 3095793449658682053, 8978332846736310159,
- 2902794662273147216, 7208698530190629697, 7276901792339343736, 1732385229314443140,
- 4133292154170828382, 2918308698224194548, 1519461397937144458, 5293934712616591764,
- 4922828954023452664, 2879211533496425641, 5896236396443472108, 8465043815351752425,
- 7329020396871624740, 8915471717014488588, 2944902635677463047, 7052079073493465134,
+ 6346751753565857109, -8982212049534145501, -6127578587196093755, -245039190118465649,
+ -6320577374581628592, 7208698530190629697, 7276901792339343736, -7490986807540332668,
+ 4133292154170828382, 2918308698224194548, -7703910638917631350, -3929437324238184044,
+ -4300543082831323144, -6344160503358350167, 5896236396443472108, -758328221503023383,
+ -1894351639983151068, -307900319840287220, -6278469401177312761, -2171292963361310674,
8382142935188824023, 9103922860780351547, 4152330101494654406,
}
)
@@ -223,13 +221,18 @@
x = seedrand(x)
u ^= int64(x)
u ^= rng_cooked[i]
- rng.vec[i] = u & _MASK
+ rng.vec[i] = u
}
}
}
// Int63 returns a non-negative pseudo-random 63-bit integer as an int64.
func (rng *rngSource) Int63() int64 {
+ return int64(rng.Uint64() & _MASK)
+}
+
+// Uint64 returns a non-negative pseudo-random 64-bit integer as an uint64.
+func (rng *rngSource) Uint64() uint64 {
rng.tap--
if rng.tap < 0 {
rng.tap += _LEN
@@ -240,7 +243,7 @@
rng.feed += _LEN
}
- x := (rng.vec[rng.feed] + rng.vec[rng.tap]) & _MASK
+ x := rng.vec[rng.feed] + rng.vec[rng.tap]
rng.vec[rng.feed] = x
- return x
+ return uint64(x)
}
diff --git a/src/math/sin.go b/src/math/sin.go
index ed85f21..7a75a5f 100644
--- a/src/math/sin.go
+++ b/src/math/sin.go
@@ -140,8 +140,8 @@
// map zeros to origin
if j&1 == 1 {
- j += 1
- y += 1
+ j++
+ y++
}
j &= 7 // octant modulo 2Pi radians (360 degrees)
if j > 3 {
@@ -200,8 +200,8 @@
// map zeros to origin
if j&1 == 1 {
- j += 1
- y += 1
+ j++
+ y++
}
j &= 7 // octant modulo 2Pi radians (360 degrees)
// reflect in x axis
diff --git a/src/math/sin_s390x.s b/src/math/sin_s390x.s
new file mode 100644
index 0000000..5dc823c
--- /dev/null
+++ b/src/math/sin_s390x.s
@@ -0,0 +1,356 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Various constants
+DATA sincosxnan<>+0(SB)/8, $0x7ff8000000000000
+GLOBL sincosxnan<>+0(SB), RODATA, $8
+DATA sincosxlim<>+0(SB)/8, $0x432921fb54442d19
+GLOBL sincosxlim<>+0(SB), RODATA, $8
+DATA sincosxadd<>+0(SB)/8, $0xc338000000000000
+GLOBL sincosxadd<>+0(SB), RODATA, $8
+DATA sincosxpi2l<>+0(SB)/8, $0.108285667392191389e-31
+GLOBL sincosxpi2l<>+0(SB), RODATA, $8
+DATA sincosxpi2m<>+0(SB)/8, $0.612323399573676480e-16
+GLOBL sincosxpi2m<>+0(SB), RODATA, $8
+DATA sincosxpi2h<>+0(SB)/8, $0.157079632679489656e+01
+GLOBL sincosxpi2h<>+0(SB), RODATA, $8
+DATA sincosrpi2<>+0(SB)/8, $0.636619772367581341e+00
+GLOBL sincosrpi2<>+0(SB), RODATA, $8
+
+// Minimax polynomial approximations
+DATA sincosc0<>+0(SB)/8, $0.100000000000000000E+01
+GLOBL sincosc0<>+0(SB), RODATA, $8
+DATA sincosc1<>+0(SB)/8, $-.499999999999999833E+00
+GLOBL sincosc1<>+0(SB), RODATA, $8
+DATA sincosc2<>+0(SB)/8, $0.416666666666625843E-01
+GLOBL sincosc2<>+0(SB), RODATA, $8
+DATA sincosc3<>+0(SB)/8, $-.138888888885498984E-02
+GLOBL sincosc3<>+0(SB), RODATA, $8
+DATA sincosc4<>+0(SB)/8, $0.248015871681607202E-04
+GLOBL sincosc4<>+0(SB), RODATA, $8
+DATA sincosc5<>+0(SB)/8, $-.275572911309937875E-06
+GLOBL sincosc5<>+0(SB), RODATA, $8
+DATA sincosc6<>+0(SB)/8, $0.208735047247632818E-08
+GLOBL sincosc6<>+0(SB), RODATA, $8
+DATA sincosc7<>+0(SB)/8, $-.112753632738365317E-10
+GLOBL sincosc7<>+0(SB), RODATA, $8
+DATA sincoss0<>+0(SB)/8, $0.100000000000000000E+01
+GLOBL sincoss0<>+0(SB), RODATA, $8
+DATA sincoss1<>+0(SB)/8, $-.166666666666666657E+00
+GLOBL sincoss1<>+0(SB), RODATA, $8
+DATA sincoss2<>+0(SB)/8, $0.833333333333309209E-02
+GLOBL sincoss2<>+0(SB), RODATA, $8
+DATA sincoss3<>+0(SB)/8, $-.198412698410701448E-03
+GLOBL sincoss3<>+0(SB), RODATA, $8
+DATA sincoss4<>+0(SB)/8, $0.275573191453906794E-05
+GLOBL sincoss4<>+0(SB), RODATA, $8
+DATA sincoss5<>+0(SB)/8, $-.250520918387633290E-07
+GLOBL sincoss5<>+0(SB), RODATA, $8
+DATA sincoss6<>+0(SB)/8, $0.160571285514715856E-09
+GLOBL sincoss6<>+0(SB), RODATA, $8
+DATA sincoss7<>+0(SB)/8, $-.753213484933210972E-12
+GLOBL sincoss7<>+0(SB), RODATA, $8
+
+// Sin returns the sine of the radian argument x.
+//
+// Special cases are:
+// Sin(±0) = ±0
+// Sin(±Inf) = NaN
+// Sin(NaN) = NaN
+// The algorithm used is minimax polynomial approximation.
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT ·sinAsm(SB),NOSPLIT,$0-16
+ FMOVD x+0(FP), F0
+ //special case Sin(±0) = ±0
+ FMOVD $(0.0), F1
+ FCMPU F0, F1
+ BEQ sinIsZero
+ WORD $0xB3120000 //ltdbr %f0,%f0
+ BLTU L17
+ FMOVD F0, F5
+L2:
+ MOVD $sincoss7<>+0(SB), R1
+ FMOVD 0(R1), F4
+ MOVD $sincoss6<>+0(SB), R1
+ FMOVD 0(R1), F1
+ MOVD $sincoss5<>+0(SB), R1
+ VLEG $0, 0(R1), V18
+ MOVD $sincoss4<>+0(SB), R1
+ FMOVD 0(R1), F6
+ MOVD $sincoss2<>+0(SB), R1
+ VLEG $0, 0(R1), V16
+ MOVD $sincoss3<>+0(SB), R1
+ FMOVD 0(R1), F7
+ MOVD $sincoss1<>+0(SB), R1
+ FMOVD 0(R1), F3
+ MOVD $sincoss0<>+0(SB), R1
+ FMOVD 0(R1), F2
+ WFCHDBS V2, V5, V2
+ BEQ L18
+ MOVD $sincosrpi2<>+0(SB), R1
+ FMOVD 0(R1), F3
+ MOVD $sincosxadd<>+0(SB), R1
+ FMOVD 0(R1), F2
+ WFMSDB V0, V3, V2, V3
+ FMOVD 0(R1), F6
+ FADD F3, F6
+ MOVD $sincosxpi2h<>+0(SB), R1
+ FMOVD 0(R1), F2
+ FMSUB F2, F6, F0, F0
+ MOVD $sincosxpi2m<>+0(SB), R1
+ FMOVD 0(R1), F4
+ FMADD F4, F6, F0, F0
+ MOVD $sincosxpi2l<>+0(SB), R1
+ WFMDB V0, V0, V1
+ FMOVD 0(R1), F7
+ WFMDB V1, V1, V2
+ WORD $0xB3CD0013 //lgdr %r1,%f3
+ MOVD $sincosxlim<>+0(SB), R2
+ WORD $0xA7110001 //tmll %r1,1
+ BEQ L6
+ FMOVD 0(R2), F0
+ WFCHDBS V0, V5, V0
+ BNE L14
+ MOVD $sincosc7<>+0(SB), R2
+ FMOVD 0(R2), F0
+ MOVD $sincosc6<>+0(SB), R2
+ FMOVD 0(R2), F4
+ MOVD $sincosc5<>+0(SB), R2
+ WFMADB V1, V0, V4, V0
+ FMOVD 0(R2), F6
+ MOVD $sincosc4<>+0(SB), R2
+ WFMADB V1, V0, V6, V0
+ FMOVD 0(R2), F4
+ MOVD $sincosc2<>+0(SB), R2
+ FMOVD 0(R2), F6
+ WFMADB V2, V4, V6, V4
+ MOVD $sincosc3<>+0(SB), R2
+ FMOVD 0(R2), F3
+ MOVD $sincosc1<>+0(SB), R2
+ WFMADB V2, V0, V3, V0
+ FMOVD 0(R2), F6
+ WFMADB V1, V4, V6, V4
+ WORD $0xA7110002 //tmll %r1,2
+ WFMADB V2, V0, V4, V0
+ MOVD $sincosc0<>+0(SB), R1
+ FMOVD 0(R1), F2
+ WFMADB V1, V0, V2, V0
+ BNE L15
+ FMOVD F0, ret+8(FP)
+ RET
+
+L6:
+ FMOVD 0(R2), F4
+ WFCHDBS V4, V5, V4
+ BNE L14
+ MOVD $sincoss7<>+0(SB), R2
+ FMOVD 0(R2), F4
+ MOVD $sincoss6<>+0(SB), R2
+ FMOVD 0(R2), F3
+ MOVD $sincoss5<>+0(SB), R2
+ WFMADB V1, V4, V3, V4
+ WFMADB V6, V7, V0, V6
+ FMOVD 0(R2), F0
+ MOVD $sincoss4<>+0(SB), R2
+ FMADD F4, F1, F0, F0
+ FMOVD 0(R2), F3
+ MOVD $sincoss2<>+0(SB), R2
+ FMOVD 0(R2), F4
+ MOVD $sincoss3<>+0(SB), R2
+ WFMADB V2, V3, V4, V3
+ FMOVD 0(R2), F4
+ MOVD $sincoss1<>+0(SB), R2
+ WFMADB V2, V0, V4, V0
+ FMOVD 0(R2), F4
+ WFMADB V1, V3, V4, V3
+ FNEG F6, F4
+ WFMADB V2, V0, V3, V2
+ WFMDB V4, V1, V0
+ WORD $0xA7110002 //tmll %r1,2
+ WFMSDB V0, V2, V6, V0
+ BNE L15
+ FMOVD F0, ret+8(FP)
+ RET
+
+L14:
+ MOVD $sincosxnan<>+0(SB), R1
+ FMOVD 0(R1), F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L18:
+ WFMDB V0, V0, V2
+ WFMADB V2, V4, V1, V4
+ WFMDB V2, V2, V1
+ WFMADB V2, V4, V18, V4
+ WFMADB V1, V6, V16, V6
+ WFMADB V1, V4, V7, V4
+ WFMADB V2, V6, V3, V6
+ FMUL F0, F2
+ WFMADB V1, V4, V6, V4
+ FMADD F4, F2, F0, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L17:
+ FNEG F0, F5
+ BR L2
+L15:
+ FNEG F0, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+
+sinIsZero:
+ FMOVD F0, ret+8(FP)
+ RET
+
+// Cos returns the cosine of the radian argument.
+//
+// Special cases are:
+// Cos(±Inf) = NaN
+// Cos(NaN) = NaN
+// The algorithm used is minimax polynomial approximation.
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT ·cosAsm(SB),NOSPLIT,$0-16
+ FMOVD x+0(FP), F0
+ WORD $0xB3120000 //ltdbr %f0,%f0
+ BLTU L35
+ FMOVD F0, F1
+L21:
+ MOVD $sincosc7<>+0(SB), R1
+ FMOVD 0(R1), F4
+ MOVD $sincosc6<>+0(SB), R1
+ VLEG $0, 0(R1), V20
+ MOVD $sincosc5<>+0(SB), R1
+ VLEG $0, 0(R1), V18
+ MOVD $sincosc4<>+0(SB), R1
+ FMOVD 0(R1), F6
+ MOVD $sincosc2<>+0(SB), R1
+ VLEG $0, 0(R1), V16
+ MOVD $sincosc3<>+0(SB), R1
+ FMOVD 0(R1), F7
+ MOVD $sincosc1<>+0(SB), R1
+ FMOVD 0(R1), F5
+ MOVD $sincosrpi2<>+0(SB), R1
+ FMOVD 0(R1), F2
+ MOVD $sincosxadd<>+0(SB), R1
+ FMOVD 0(R1), F3
+ MOVD $sincoss0<>+0(SB), R1
+ WFMSDB V0, V2, V3, V2
+ FMOVD 0(R1), F3
+ WFCHDBS V3, V1, V3
+ WORD $0xB3CD0012 //lgdr %r1,%f2
+ BEQ L36
+ MOVD $sincosxadd<>+0(SB), R2
+ FMOVD 0(R2), F4
+ FADD F2, F4
+ MOVD $sincosxpi2h<>+0(SB), R2
+ FMOVD 0(R2), F2
+ WFMSDB V4, V2, V0, V2
+ MOVD $sincosxpi2m<>+0(SB), R2
+ FMOVD 0(R2), F0
+ WFMADB V4, V0, V2, V0
+ MOVD $sincosxpi2l<>+0(SB), R2
+ WFMDB V0, V0, V2
+ FMOVD 0(R2), F5
+ WFMDB V2, V2, V6
+ MOVD $sincosxlim<>+0(SB), R2
+ WORD $0xA7110001 //tmll %r1,1
+ BNE L25
+ FMOVD 0(R2), F0
+ WFCHDBS V0, V1, V0
+ BNE L33
+ MOVD $sincosc7<>+0(SB), R2
+ FMOVD 0(R2), F0
+ MOVD $sincosc6<>+0(SB), R2
+ FMOVD 0(R2), F4
+ MOVD $sincosc5<>+0(SB), R2
+ WFMADB V2, V0, V4, V0
+ FMOVD 0(R2), F1
+ MOVD $sincosc4<>+0(SB), R2
+ WFMADB V2, V0, V1, V0
+ FMOVD 0(R2), F4
+ MOVD $sincosc2<>+0(SB), R2
+ FMOVD 0(R2), F1
+ WFMADB V6, V4, V1, V4
+ MOVD $sincosc3<>+0(SB), R2
+ FMOVD 0(R2), F3
+ MOVD $sincosc1<>+0(SB), R2
+ WFMADB V6, V0, V3, V0
+ FMOVD 0(R2), F1
+ WFMADB V2, V4, V1, V4
+ WORD $0xA7110002 //tmll %r1,2
+ WFMADB V6, V0, V4, V0
+ MOVD $sincosc0<>+0(SB), R1
+ FMOVD 0(R1), F4
+ WFMADB V2, V0, V4, V0
+ BNE L34
+ FMOVD F0, ret+8(FP)
+ RET
+
+L25:
+ FMOVD 0(R2), F3
+ WFCHDBS V3, V1, V1
+ BNE L33
+ MOVD $sincoss7<>+0(SB), R2
+ FMOVD 0(R2), F1
+ MOVD $sincoss6<>+0(SB), R2
+ FMOVD 0(R2), F3
+ MOVD $sincoss5<>+0(SB), R2
+ WFMADB V2, V1, V3, V1
+ FMOVD 0(R2), F3
+ MOVD $sincoss4<>+0(SB), R2
+ WFMADB V2, V1, V3, V1
+ FMOVD 0(R2), F3
+ MOVD $sincoss2<>+0(SB), R2
+ FMOVD 0(R2), F7
+ WFMADB V6, V3, V7, V3
+ MOVD $sincoss3<>+0(SB), R2
+ FMADD F5, F4, F0, F0
+ FMOVD 0(R2), F4
+ MOVD $sincoss1<>+0(SB), R2
+ FMADD F1, F6, F4, F4
+ FMOVD 0(R2), F1
+ FMADD F3, F2, F1, F1
+ FMUL F0, F2
+ WFMADB V6, V4, V1, V6
+ WORD $0xA7110002 //tmll %r1,2
+ FMADD F6, F2, F0, F0
+ BNE L34
+ FMOVD F0, ret+8(FP)
+ RET
+
+L33:
+ MOVD $sincosxnan<>+0(SB), R1
+ FMOVD 0(R1), F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L36:
+ FMUL F0, F0
+ MOVD $sincosc0<>+0(SB), R1
+ WFMDB V0, V0, V1
+ WFMADB V0, V4, V20, V4
+ WFMADB V1, V6, V16, V6
+ WFMADB V0, V4, V18, V4
+ WFMADB V0, V6, V5, V6
+ WFMADB V1, V4, V7, V4
+ FMOVD 0(R1), F2
+ WFMADB V1, V4, V6, V4
+ WFMADB V0, V4, V2, V0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L35:
+ FNEG F0, F1
+ BR L21
+L34:
+ FNEG F0, F0
+ FMOVD F0, ret+8(FP)
+ RET
diff --git a/src/math/sincos.go b/src/math/sincos.go
index 7180303..6e663d0 100644
--- a/src/math/sincos.go
+++ b/src/math/sincos.go
@@ -40,8 +40,8 @@
y := float64(j) // integer part of x/(Pi/4), as float
if j&1 == 1 { // map zeros to origin
- j += 1
- y += 1
+ j++
+ y++
}
j &= 7 // octant modulo 2Pi radians (360 degrees)
if j > 3 { // reflect in x axis
diff --git a/src/math/sinh.go b/src/math/sinh.go
index 139b911..2bdd7b1 100644
--- a/src/math/sinh.go
+++ b/src/math/sinh.go
@@ -22,7 +22,9 @@
// Sinh(±0) = ±0
// Sinh(±Inf) = ±Inf
// Sinh(NaN) = NaN
-func Sinh(x float64) float64 {
+func Sinh(x float64) float64
+
+func sinh(x float64) float64 {
// The coefficients are #2029 from Hart & Cheney. (20.36D)
const (
P0 = -0.6307673640497716991184787251e+6
@@ -66,7 +68,9 @@
// Cosh(±0) = 1
// Cosh(±Inf) = +Inf
// Cosh(NaN) = NaN
-func Cosh(x float64) float64 {
+func Cosh(x float64) float64
+
+func cosh(x float64) float64 {
if x < 0 {
x = -x
}
diff --git a/src/math/sinh_s390x.s b/src/math/sinh_s390x.s
new file mode 100644
index 0000000..e492415
--- /dev/null
+++ b/src/math/sinh_s390x.s
@@ -0,0 +1,261 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+
+#include "textflag.h"
+
+// Constants
+DATA sinhrodataL21<>+0(SB)/8, $0.231904681384629956E-16
+DATA sinhrodataL21<>+8(SB)/8, $0.693147180559945286E+00
+DATA sinhrodataL21<>+16(SB)/8, $704.E0
+GLOBL sinhrodataL21<>+0(SB), RODATA, $24
+DATA sinhrlog2<>+0(SB)/8, $0x3ff7154760000000
+GLOBL sinhrlog2<>+0(SB), RODATA, $8
+DATA sinhxinf<>+0(SB)/8, $0x7ff0000000000000
+GLOBL sinhxinf<>+0(SB), RODATA, $8
+DATA sinhxinit<>+0(SB)/8, $0x3ffb504f333f9de6
+GLOBL sinhxinit<>+0(SB), RODATA, $8
+DATA sinhxlim1<>+0(SB)/8, $800.E0
+GLOBL sinhxlim1<>+0(SB), RODATA, $8
+DATA sinhxadd<>+0(SB)/8, $0xc3200001610007fb
+GLOBL sinhxadd<>+0(SB), RODATA, $8
+DATA sinhx4ff<>+0(SB)/8, $0x4ff0000000000000
+GLOBL sinhx4ff<>+0(SB), RODATA, $8
+
+// Minimax polynomial approximations
+DATA sinhe0<>+0(SB)/8, $0.11715728752538099300E+01
+GLOBL sinhe0<>+0(SB), RODATA, $8
+DATA sinhe1<>+0(SB)/8, $0.11715728752538099300E+01
+GLOBL sinhe1<>+0(SB), RODATA, $8
+DATA sinhe2<>+0(SB)/8, $0.58578643762688526692E+00
+GLOBL sinhe2<>+0(SB), RODATA, $8
+DATA sinhe3<>+0(SB)/8, $0.19526214587563004497E+00
+GLOBL sinhe3<>+0(SB), RODATA, $8
+DATA sinhe4<>+0(SB)/8, $0.48815536475176217404E-01
+GLOBL sinhe4<>+0(SB), RODATA, $8
+DATA sinhe5<>+0(SB)/8, $0.97631072948627397816E-02
+GLOBL sinhe5<>+0(SB), RODATA, $8
+DATA sinhe6<>+0(SB)/8, $0.16271839297756073153E-02
+GLOBL sinhe6<>+0(SB), RODATA, $8
+DATA sinhe7<>+0(SB)/8, $0.23245485387271142509E-03
+GLOBL sinhe7<>+0(SB), RODATA, $8
+DATA sinhe8<>+0(SB)/8, $0.29080955860869629131E-04
+GLOBL sinhe8<>+0(SB), RODATA, $8
+DATA sinhe9<>+0(SB)/8, $0.32311267157667725278E-05
+GLOBL sinhe9<>+0(SB), RODATA, $8
+
+// Sinh returns the hyperbolic sine of the argument.
+//
+// Special cases are:
+// Sinh(±0) = ±0
+// Sinh(±Inf) = ±Inf
+// Sinh(NaN) = NaN
+// The algorithm used is minimax polynomial approximation
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT ·sinhAsm(SB),NOSPLIT,$0-16
+ FMOVD x+0(FP), F0
+ //specail case Sinh(±0) = ±0
+ FMOVD $(0.0), F1
+ FCMPU F0, F1
+ BEQ sinhIsZero
+ //specail case Sinh(±Inf = ±Inf
+ FMOVD $1.797693134862315708145274237317043567981e+308, F1
+ FCMPU F1, F0
+ BLEU sinhIsInf
+ FMOVD $-1.797693134862315708145274237317043567981e+308, F1
+ FCMPU F1, F0
+ BGT sinhIsInf
+
+ MOVD $sinhrodataL21<>+0(SB), R5
+ WORD $0xB3120000 //ltdbr %f0,%f0
+ MOVD sinhxinit<>+0(SB), R1
+ FMOVD F0, F4
+ MOVD R1, R3
+ BLTU L19
+ FMOVD F0, F2
+L2:
+ WORD $0xED205010 //cdb %f2,.L22-.L21(%r5)
+ BYTE $0x00
+ BYTE $0x19
+ BGE L15 //jnl .L15
+ BVS L15
+ WFCEDBS V2, V2, V0
+ BEQ L20
+L12:
+ FMOVD F4, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L15:
+ WFCEDBS V2, V2, V0
+ BVS L12
+ MOVD $sinhxlim1<>+0(SB), R2
+ FMOVD 0(R2), F0
+ WFCHDBS V0, V2, V0
+ BEQ L6
+ WFCHEDBS V4, V2, V6
+ MOVD $sinhxinf<>+0(SB), R1
+ FMOVD 0(R1), F0
+ BNE LEXITTAGsinh
+ WFCHDBS V2, V4, V2
+ BNE L16
+ FNEG F0, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L19:
+ FNEG F0, F2
+ BR L2
+L6:
+ MOVD $sinhxadd<>+0(SB), R2
+ FMOVD 0(R2), F0
+ MOVD sinhrlog2<>+0(SB), R2
+ WORD $0xB3C10062 //ldgr %f6,%r2
+ WFMSDB V4, V6, V0, V16
+ FMOVD sinhrodataL21<>+8(SB), F6
+ WFADB V0, V16, V0
+ FMOVD sinhrodataL21<>+0(SB), F3
+ WFMSDB V0, V6, V4, V6
+ MOVD $sinhe9<>+0(SB), R2
+ WFMADB V0, V3, V6, V0
+ FMOVD 0(R2), F1
+ MOVD $sinhe7<>+0(SB), R2
+ WFMDB V0, V0, V6
+ FMOVD 0(R2), F5
+ MOVD $sinhe8<>+0(SB), R2
+ FMOVD 0(R2), F3
+ MOVD $sinhe6<>+0(SB), R2
+ WFMADB V6, V1, V5, V1
+ FMOVD 0(R2), F5
+ MOVD $sinhe5<>+0(SB), R2
+ FMOVD 0(R2), F7
+ MOVD $sinhe3<>+0(SB), R2
+ WFMADB V6, V3, V5, V3
+ FMOVD 0(R2), F5
+ MOVD $sinhe4<>+0(SB), R2
+ WFMADB V6, V7, V5, V7
+ FMOVD 0(R2), F5
+ MOVD $sinhe2<>+0(SB), R2
+ VLEG $0, 0(R2), V20
+ WFMDB V6, V6, V18
+ WFMADB V6, V5, V20, V5
+ WFMADB V1, V18, V7, V1
+ FNEG F0, F0
+ WFMADB V3, V18, V5, V3
+ MOVD $sinhe1<>+0(SB), R3
+ WFCEDBS V2, V4, V2
+ FMOVD 0(R3), F5
+ MOVD $sinhe0<>+0(SB), R3
+ WFMADB V6, V1, V5, V1
+ FMOVD 0(R3), F5
+ VLGVG $0, V16, R2
+ WFMADB V6, V3, V5, V6
+ RLL $3, R2, R2
+ WORD $0xEC12000F //risbgn %r1,%r2,64-64+0,64-64+0+16-1,64-0-16
+ BYTE $0x30
+ BYTE $0x59
+ BEQ L9
+ WFMSDB V0, V1, V6, V0
+ MOVD $sinhx4ff<>+0(SB), R3
+ FNEG F0, F0
+ FMOVD 0(R3), F2
+ FMUL F2, F0
+ ANDW $0xFFFF, R2
+ WORD $0xA53FEFB6 //llill %r3,61366
+ SUBW R2, R3, R2
+ WORD $0xEC12000F //risbgn %r1,%r2,64-64+0,64-64+0+16-1,64-0-16
+ BYTE $0x30
+ BYTE $0x59
+ WORD $0xB3C10021 //ldgr %f2,%r1
+ FMUL F2, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L20:
+ MOVD $sinhxadd<>+0(SB), R2
+ FMOVD 0(R2), F2
+ MOVD sinhrlog2<>+0(SB), R2
+ WORD $0xB3C10002 //ldgr %f0,%r2
+ WFMSDB V4, V0, V2, V6
+ FMOVD sinhrodataL21<>+8(SB), F0
+ FADD F6, F2
+ MOVD $sinhe9<>+0(SB), R2
+ FMSUB F0, F2, F4, F4
+ FMOVD 0(R2), F1
+ FMOVD sinhrodataL21<>+0(SB), F3
+ MOVD $sinhe7<>+0(SB), R2
+ FMADD F3, F2, F4, F4
+ FMOVD 0(R2), F0
+ MOVD $sinhe8<>+0(SB), R2
+ WFMDB V4, V4, V2
+ FMOVD 0(R2), F3
+ MOVD $sinhe6<>+0(SB), R2
+ FMOVD 0(R2), F5
+ WORD $0xB3CD0026 //lgdr %r2,%f6
+ RLL $3, R2, R2
+ WORD $0xEC12000F //risbgn %r1,%r2,64-64+0,64-64+0+16-1,64-0-16
+ BYTE $0x30
+ BYTE $0x59
+ WFMADB V2, V1, V0, V1
+ WORD $0xB3C10001 //ldgr %f0,%r1
+ MOVD $sinhe5<>+0(SB), R1
+ WFMADB V2, V3, V5, V3
+ FMOVD 0(R1), F5
+ MOVD $sinhe3<>+0(SB), R1
+ FMOVD 0(R1), F6
+ WFMDB V2, V2, V7
+ WFMADB V2, V5, V6, V5
+ WORD $0xA7487FB6 //lhi %r4,32694
+ FNEG F4, F4
+ ANDW $0xFFFF, R2
+ SUBW R2, R4, R2
+ WORD $0xEC32000F //risbgn %r3,%r2,64-64+0,64-64+0+16-1,64-0-16
+ BYTE $0x30
+ BYTE $0x59
+ WORD $0xB3C10063 //ldgr %f6,%r3
+ WFADB V0, V6, V16
+ MOVD $sinhe4<>+0(SB), R1
+ WFMADB V1, V7, V5, V1
+ WFMDB V4, V16, V4
+ FMOVD 0(R1), F5
+ MOVD $sinhe2<>+0(SB), R1
+ VLEG $0, 0(R1), V16
+ MOVD $sinhe1<>+0(SB), R1
+ WFMADB V2, V5, V16, V5
+ VLEG $0, 0(R1), V16
+ WFMADB V3, V7, V5, V3
+ WFMADB V2, V1, V16, V1
+ FSUB F6, F0
+ FMUL F1, F4
+ MOVD $sinhe0<>+0(SB), R1
+ FMOVD 0(R1), F6
+ WFMADB V2, V3, V6, V2
+ WFMADB V0, V2, V4, V0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L9:
+ WFMADB V0, V1, V6, V0
+ MOVD $sinhx4ff<>+0(SB), R3
+ FMOVD 0(R3), F2
+ FMUL F2, F0
+ WORD $0xA72AF000 //ahi %r2,-4096
+ WORD $0xEC12000F //risbgn %r1,%r2,64-64+0,64-64+0+16-1,64-0-16
+ BYTE $0x30
+ BYTE $0x59
+ WORD $0xB3C10021 //ldgr %f2,%r1
+ FMUL F2, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L16:
+ FMOVD F0, ret+8(FP)
+ RET
+
+LEXITTAGsinh:
+sinhIsInf:
+sinhIsZero:
+ FMOVD F0, ret+8(FP)
+ RET
diff --git a/src/math/sinh_stub.s b/src/math/sinh_stub.s
new file mode 100644
index 0000000..4caaa0c
--- /dev/null
+++ b/src/math/sinh_stub.s
@@ -0,0 +1,17 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build 386 amd64 amd64p32 arm
+
+#include "textflag.h"
+
+TEXT ·Sinh(SB),NOSPLIT,$0
+ JMP ·sinh(SB)
+
+TEXT ·Cosh(SB),NOSPLIT,$0
+ JMP ·cosh(SB)
+
+TEXT ·Tanh(SB),NOSPLIT,$0
+ JMP ·tanh(SB)
+
diff --git a/src/math/sqrt_amd64.s b/src/math/sqrt_amd64.s
index f8d825d..1102903 100644
--- a/src/math/sqrt_amd64.s
+++ b/src/math/sqrt_amd64.s
@@ -5,7 +5,8 @@
#include "textflag.h"
// func Sqrt(x float64) float64
-TEXT ·Sqrt(SB),NOSPLIT,$0
+TEXT ·Sqrt(SB), NOSPLIT, $0
+ XORPS X0, X0 // break dependency
SQRTSD x+0(FP), X0
- MOVSD X0, ret+8(FP)
+ MOVSD X0, ret+8(FP)
RET
diff --git a/src/math/sqrt_mipsx.s b/src/math/sqrt_mipsx.s
new file mode 100644
index 0000000..1b27d49
--- /dev/null
+++ b/src/math/sqrt_mipsx.s
@@ -0,0 +1,14 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build mips mipsle
+
+#include "textflag.h"
+
+// func Sqrt(x float64) float64
+TEXT ·Sqrt(SB),NOSPLIT,$0
+ MOVD x+0(FP), F0
+ SQRTD F0, F0
+ MOVD F0, ret+8(FP)
+ RET
diff --git a/src/math/stubs_arm64.s b/src/math/stubs_arm64.s
index 04de911..d8c9aa8 100644
--- a/src/math/stubs_arm64.s
+++ b/src/math/stubs_arm64.s
@@ -18,33 +18,18 @@
TEXT ·Atan(SB),NOSPLIT,$0
B ·atan(SB)
-TEXT ·Dim(SB),NOSPLIT,$0
- B ·dim(SB)
-
-TEXT ·Min(SB),NOSPLIT,$0
- B ·min(SB)
-
-TEXT ·Max(SB),NOSPLIT,$0
- B ·max(SB)
-
TEXT ·Exp2(SB),NOSPLIT,$0
B ·exp2(SB)
+TEXT ·Cosh(SB),NOSPLIT,$0
+ B ·cosh(SB)
+
TEXT ·Expm1(SB),NOSPLIT,$0
B ·expm1(SB)
TEXT ·Exp(SB),NOSPLIT,$0
B ·exp(SB)
-TEXT ·Floor(SB),NOSPLIT,$0
- B ·floor(SB)
-
-TEXT ·Ceil(SB),NOSPLIT,$0
- B ·ceil(SB)
-
-TEXT ·Trunc(SB),NOSPLIT,$0
- B ·trunc(SB)
-
TEXT ·Frexp(SB),NOSPLIT,$0
B ·frexp(SB)
@@ -66,9 +51,6 @@
TEXT ·Log(SB),NOSPLIT,$0
B ·log(SB)
-TEXT ·Modf(SB),NOSPLIT,$0
- B ·modf(SB)
-
TEXT ·Mod(SB),NOSPLIT,$0
B ·mod(SB)
@@ -81,8 +63,14 @@
TEXT ·Sin(SB),NOSPLIT,$0
B ·sin(SB)
+TEXT ·Sinh(SB),NOSPLIT,$0
+ B ·sinh(SB)
+
TEXT ·Cos(SB),NOSPLIT,$0
B ·cos(SB)
TEXT ·Tan(SB),NOSPLIT,$0
B ·tan(SB)
+
+TEXT ·Tanh(SB),NOSPLIT,$0
+ B ·tanh(SB)
diff --git a/src/math/stubs_mips64x.s b/src/math/stubs_mips64x.s
index 97e6e4c..21df5cc 100644
--- a/src/math/stubs_mips64x.s
+++ b/src/math/stubs_mips64x.s
@@ -81,11 +81,20 @@
TEXT ·Sin(SB),NOSPLIT,$0
JMP ·sin(SB)
+TEXT ·Sinh(SB),NOSPLIT,$0
+ JMP ·sinh(SB)
+
TEXT ·Cos(SB),NOSPLIT,$0
JMP ·cos(SB)
+TEXT ·Cosh(SB),NOSPLIT,$0
+ JMP ·cosh(SB)
+
TEXT ·Sqrt(SB),NOSPLIT,$0
JMP ·sqrt(SB)
TEXT ·Tan(SB),NOSPLIT,$0
JMP ·tan(SB)
+
+TEXT ·Tanh(SB),NOSPLIT,$0
+ JMP ·tanh(SB)
diff --git a/src/math/stubs_mipsx.s b/src/math/stubs_mipsx.s
new file mode 100644
index 0000000..b869768
--- /dev/null
+++ b/src/math/stubs_mipsx.s
@@ -0,0 +1,98 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build mips mipsle
+
+#include "textflag.h"
+
+TEXT ·Asin(SB),NOSPLIT,$0
+ JMP ·asin(SB)
+
+TEXT ·Acos(SB),NOSPLIT,$0
+ JMP ·acos(SB)
+
+TEXT ·Atan2(SB),NOSPLIT,$0
+ JMP ·atan2(SB)
+
+TEXT ·Atan(SB),NOSPLIT,$0
+ JMP ·atan(SB)
+
+TEXT ·Dim(SB),NOSPLIT,$0
+ JMP ·dim(SB)
+
+TEXT ·Min(SB),NOSPLIT,$0
+ JMP ·min(SB)
+
+TEXT ·Max(SB),NOSPLIT,$0
+ JMP ·max(SB)
+
+TEXT ·Exp2(SB),NOSPLIT,$0
+ JMP ·exp2(SB)
+
+TEXT ·Expm1(SB),NOSPLIT,$0
+ JMP ·expm1(SB)
+
+TEXT ·Exp(SB),NOSPLIT,$0
+ JMP ·exp(SB)
+
+TEXT ·Floor(SB),NOSPLIT,$0
+ JMP ·floor(SB)
+
+TEXT ·Ceil(SB),NOSPLIT,$0
+ JMP ·ceil(SB)
+
+TEXT ·Trunc(SB),NOSPLIT,$0
+ JMP ·trunc(SB)
+
+TEXT ·Frexp(SB),NOSPLIT,$0
+ JMP ·frexp(SB)
+
+TEXT ·Hypot(SB),NOSPLIT,$0
+ JMP ·hypot(SB)
+
+TEXT ·Ldexp(SB),NOSPLIT,$0
+ JMP ·ldexp(SB)
+
+TEXT ·Log10(SB),NOSPLIT,$0
+ JMP ·log10(SB)
+
+TEXT ·Log2(SB),NOSPLIT,$0
+ JMP ·log2(SB)
+
+TEXT ·Log1p(SB),NOSPLIT,$0
+ JMP ·log1p(SB)
+
+TEXT ·Log(SB),NOSPLIT,$0
+ JMP ·log(SB)
+
+TEXT ·Modf(SB),NOSPLIT,$0
+ JMP ·modf(SB)
+
+TEXT ·Mod(SB),NOSPLIT,$0
+ JMP ·mod(SB)
+
+TEXT ·Remainder(SB),NOSPLIT,$0
+ JMP ·remainder(SB)
+
+TEXT ·Sincos(SB),NOSPLIT,$0
+ JMP ·sincos(SB)
+
+TEXT ·Sin(SB),NOSPLIT,$0
+ JMP ·sin(SB)
+
+TEXT ·Sinh(SB),NOSPLIT,$0
+ JMP ·sinh(SB)
+
+TEXT ·Cos(SB),NOSPLIT,$0
+ JMP ·cos(SB)
+
+TEXT ·Cosh(SB),NOSPLIT,$0
+ JMP ·cosh(SB)
+
+TEXT ·Tan(SB),NOSPLIT,$0
+ JMP ·tan(SB)
+
+TEXT ·Tanh(SB),NOSPLIT,$0
+ JMP ·tanh(SB)
+
diff --git a/src/math/stubs_ppc64x.s b/src/math/stubs_ppc64x.s
index a57357e..b622016 100644
--- a/src/math/stubs_ppc64x.s
+++ b/src/math/stubs_ppc64x.s
@@ -36,15 +36,6 @@
TEXT ·Exp(SB),NOSPLIT,$0
BR ·exp(SB)
-TEXT ·Floor(SB),NOSPLIT,$0
- BR ·floor(SB)
-
-TEXT ·Ceil(SB),NOSPLIT,$0
- BR ·ceil(SB)
-
-TEXT ·Trunc(SB),NOSPLIT,$0
- BR ·trunc(SB)
-
TEXT ·Frexp(SB),NOSPLIT,$0
BR ·frexp(SB)
@@ -81,8 +72,17 @@
TEXT ·Sin(SB),NOSPLIT,$0
BR ·sin(SB)
+TEXT ·Sinh(SB),NOSPLIT,$0
+ BR ·sinh(SB)
+
TEXT ·Cos(SB),NOSPLIT,$0
BR ·cos(SB)
+TEXT ·Cosh(SB),NOSPLIT,$0
+ BR ·cosh(SB)
+
TEXT ·Tan(SB),NOSPLIT,$0
BR ·tan(SB)
+
+TEXT ·Tanh(SB),NOSPLIT,$0
+ BR ·tanh(SB)
diff --git a/src/math/stubs_s390x.s b/src/math/stubs_s390x.s
index 7686844..8da55c5 100644
--- a/src/math/stubs_s390x.s
+++ b/src/math/stubs_s390x.s
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#include "../runtime/textflag.h"
+#include "textflag.h"
TEXT ·Asin(SB),NOSPLIT,$0
BR ·asin(SB)
@@ -25,15 +25,6 @@
TEXT ·Exp(SB),NOSPLIT,$0
BR ·exp(SB)
-TEXT ·Floor(SB),NOSPLIT,$0
- BR ·floor(SB)
-
-TEXT ·Ceil(SB),NOSPLIT,$0
- BR ·ceil(SB)
-
-TEXT ·Trunc(SB),NOSPLIT,$0
- BR ·trunc(SB)
-
TEXT ·Frexp(SB),NOSPLIT,$0
BR ·frexp(SB)
@@ -43,9 +34,6 @@
TEXT ·Ldexp(SB),NOSPLIT,$0
BR ·ldexp(SB)
-TEXT ·Log10(SB),NOSPLIT,$0
- BR ·log10(SB)
-
TEXT ·Log2(SB),NOSPLIT,$0
BR ·log2(SB)
@@ -67,11 +55,154 @@
TEXT ·Sincos(SB),NOSPLIT,$0
BR ·sincos(SB)
-TEXT ·Sin(SB),NOSPLIT,$0
- BR ·sin(SB)
-
-TEXT ·Cos(SB),NOSPLIT,$0
- BR ·cos(SB)
-
TEXT ·Tan(SB),NOSPLIT,$0
BR ·tan(SB)
+
+//if go assembly use vector instruction
+TEXT ·hasVectorFacility(SB),NOSPLIT,$24-1
+ MOVD $x-24(SP), R1
+ XC $24, 0(R1), 0(R1) // clear the storage
+ MOVD $2, R0 // R0 is the number of double words stored -1
+ WORD $0xB2B01000 // STFLE 0(R1)
+ XOR R0, R0 // reset the value of R0
+ MOVBZ z-8(SP), R1
+ AND $0x40, R1
+ BEQ novector
+vectorinstalled:
+ // check if the vector instruction has been enabled
+ VLEIB $0, $0xF, V16
+ VLGVB $0, V16, R1
+ CMPBNE R1, $0xF, novector
+ MOVB $1, ret+0(FP) // have vx
+ RET
+novector:
+ MOVB $0, ret+0(FP) // no vx
+ RET
+
+TEXT ·Log10(SB),NOSPLIT,$0
+ MOVD log10vectorfacility+0x00(SB),R1
+ BR (R1)
+
+TEXT ·log10TrampolineSetup(SB),NOSPLIT, $0
+ MOVB ·hasVX(SB), R1
+ CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
+ MOVD $log10vectorfacility+0x00(SB), R1
+ MOVD $·log10(SB), R2
+ MOVD R2, 0(R1)
+ BR ·log10(SB)
+vectorimpl:
+ MOVD $log10vectorfacility+0x00(SB), R1
+ MOVD $·log10Asm(SB), R2
+ MOVD R2, 0(R1)
+ BR ·log10Asm(SB)
+
+GLOBL log10vectorfacility+0x00(SB), NOPTR, $8
+DATA log10vectorfacility+0x00(SB)/8, $·log10TrampolineSetup(SB)
+
+
+TEXT ·Cos(SB),NOSPLIT,$0
+ MOVD cosvectorfacility+0x00(SB),R1
+ BR (R1)
+
+TEXT ·cosTrampolineSetup(SB),NOSPLIT, $0
+ MOVB ·hasVX(SB), R1
+ CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
+ MOVD $cosvectorfacility+0x00(SB), R1
+ MOVD $·cos(SB), R2
+ MOVD R2, 0(R1)
+ BR ·cos(SB)
+vectorimpl:
+ MOVD $cosvectorfacility+0x00(SB), R1
+ MOVD $·cosAsm(SB), R2
+ MOVD R2, 0(R1)
+ BR ·cosAsm(SB)
+
+GLOBL cosvectorfacility+0x00(SB), NOPTR, $8
+DATA cosvectorfacility+0x00(SB)/8, $·cosTrampolineSetup(SB)
+
+
+TEXT ·Cosh(SB),NOSPLIT,$0
+ MOVD coshvectorfacility+0x00(SB),R1
+ BR (R1)
+
+TEXT ·coshTrampolineSetup(SB),NOSPLIT, $0
+ MOVB ·hasVX(SB), R1
+ CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
+ MOVD $coshvectorfacility+0x00(SB), R1
+ MOVD $·cosh(SB), R2
+ MOVD R2, 0(R1)
+ BR ·cosh(SB)
+vectorimpl:
+ MOVD $coshvectorfacility+0x00(SB), R1
+ MOVD $·coshAsm(SB), R2
+ MOVD R2, 0(R1)
+ BR ·coshAsm(SB)
+
+GLOBL coshvectorfacility+0x00(SB), NOPTR, $8
+DATA coshvectorfacility+0x00(SB)/8, $·coshTrampolineSetup(SB)
+
+
+TEXT ·Sin(SB),NOSPLIT,$0
+ MOVD sinvectorfacility+0x00(SB),R1
+ BR (R1)
+
+TEXT ·sinTrampolineSetup(SB),NOSPLIT, $0
+ MOVB ·hasVX(SB), R1
+ CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
+ MOVD $sinvectorfacility+0x00(SB), R1
+ MOVD $·sin(SB), R2
+ MOVD R2, 0(R1)
+ BR ·sin(SB)
+vectorimpl:
+ MOVD $sinvectorfacility+0x00(SB), R1
+ MOVD $·sinAsm(SB), R2
+ MOVD R2, 0(R1)
+ BR ·sinAsm(SB)
+
+GLOBL sinvectorfacility+0x00(SB), NOPTR, $8
+DATA sinvectorfacility+0x00(SB)/8, $·sinTrampolineSetup(SB)
+
+
+TEXT ·Sinh(SB),NOSPLIT,$0
+ MOVD sinhvectorfacility+0x00(SB),R1
+ BR (R1)
+
+TEXT ·sinhTrampolineSetup(SB),NOSPLIT, $0
+ MOVB ·hasVX(SB), R1
+ CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
+ MOVD $sinhvectorfacility+0x00(SB), R1
+ MOVD $·sinh(SB), R2
+ MOVD R2, 0(R1)
+ BR ·sinh(SB)
+vectorimpl:
+ MOVD $sinhvectorfacility+0x00(SB), R1
+ MOVD $·sinhAsm(SB), R2
+ MOVD R2, 0(R1)
+ BR ·sinhAsm(SB)
+
+GLOBL sinhvectorfacility+0x00(SB), NOPTR, $8
+DATA sinhvectorfacility+0x00(SB)/8, $·sinhTrampolineSetup(SB)
+
+
+
+TEXT ·Tanh(SB),NOSPLIT,$0
+ MOVD tanhvectorfacility+0x00(SB),R1
+ BR (R1)
+
+TEXT ·tanhTrampolineSetup(SB),NOSPLIT, $0
+ MOVB ·hasVX(SB), R1
+ CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
+ MOVD $tanhvectorfacility+0x00(SB), R1
+ MOVD $·tanh(SB), R2
+ MOVD R2, 0(R1)
+ BR ·tanh(SB)
+vectorimpl:
+ MOVD $tanhvectorfacility+0x00(SB), R1
+ MOVD $·tanhAsm(SB), R2
+ MOVD R2, 0(R1)
+ BR ·tanhAsm(SB)
+
+GLOBL tanhvectorfacility+0x00(SB), NOPTR, $8
+DATA tanhvectorfacility+0x00(SB)/8, $·tanhTrampolineSetup(SB)
+
+
diff --git a/src/math/tan.go b/src/math/tan.go
index 285eff1..aa2fb37 100644
--- a/src/math/tan.go
+++ b/src/math/tan.go
@@ -108,8 +108,8 @@
/* map zeros and singularities to origin */
if j&1 == 1 {
- j += 1
- y += 1
+ j++
+ y++
}
z := ((x - y*PI4A) - y*PI4B) - y*PI4C
diff --git a/src/math/tanh.go b/src/math/tanh.go
index cf0ffa1..eaa0e4c 100644
--- a/src/math/tanh.go
+++ b/src/math/tanh.go
@@ -71,7 +71,9 @@
// Tanh(±0) = ±0
// Tanh(±Inf) = ±1
// Tanh(NaN) = NaN
-func Tanh(x float64) float64 {
+func Tanh(x float64) float64
+
+func tanh(x float64) float64 {
const MAXLOG = 8.8029691931113054295988e+01 // log(2**127)
z := Abs(x)
switch {
diff --git a/src/math/tanh_s390x.s b/src/math/tanh_s390x.s
new file mode 100644
index 0000000..1b76c14
--- /dev/null
+++ b/src/math/tanh_s390x.s
@@ -0,0 +1,173 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Minimax polynomial approximations
+DATA tanhrodataL18<>+0(SB)/8, $-1.0
+DATA tanhrodataL18<>+8(SB)/8, $-2.0
+DATA tanhrodataL18<>+16(SB)/8, $1.0
+DATA tanhrodataL18<>+24(SB)/8, $2.0
+DATA tanhrodataL18<>+32(SB)/8, $0.20000000000000011868E+01
+DATA tanhrodataL18<>+40(SB)/8, $0.13333333333333341256E+01
+DATA tanhrodataL18<>+48(SB)/8, $0.26666666663549111502E+00
+DATA tanhrodataL18<>+56(SB)/8, $0.66666666658721844678E+00
+DATA tanhrodataL18<>+64(SB)/8, $0.88890217768964374821E-01
+DATA tanhrodataL18<>+72(SB)/8, $0.25397199429103821138E-01
+DATA tanhrodataL18<>+80(SB)/8, $-.346573590279972643E+00
+DATA tanhrodataL18<>+88(SB)/8, $20.E0
+GLOBL tanhrodataL18<>+0(SB), RODATA, $96
+
+// Constants
+DATA tanhrlog2<>+0(SB)/8, $0x4007154760000000
+GLOBL tanhrlog2<>+0(SB), RODATA, $8
+DATA tanhxadd<>+0(SB)/8, $0xc2f0000100003ff0
+GLOBL tanhxadd<>+0(SB), RODATA, $8
+DATA tanhxmone<>+0(SB)/8, $-1.0
+GLOBL tanhxmone<>+0(SB), RODATA, $8
+DATA tanhxzero<>+0(SB)/8, $0
+GLOBL tanhxzero<>+0(SB), RODATA, $8
+
+// Polynomial coefficients
+DATA tanhtab<>+0(SB)/8, $0.000000000000000000E+00
+DATA tanhtab<>+8(SB)/8, $-.171540871271399150E-01
+DATA tanhtab<>+16(SB)/8, $-.306597931864376363E-01
+DATA tanhtab<>+24(SB)/8, $-.410200970469965021E-01
+DATA tanhtab<>+32(SB)/8, $-.486343079978231466E-01
+DATA tanhtab<>+40(SB)/8, $-.538226193725835820E-01
+DATA tanhtab<>+48(SB)/8, $-.568439602538111520E-01
+DATA tanhtab<>+56(SB)/8, $-.579091847395528847E-01
+DATA tanhtab<>+64(SB)/8, $-.571909584179366341E-01
+DATA tanhtab<>+72(SB)/8, $-.548312665987204407E-01
+DATA tanhtab<>+80(SB)/8, $-.509471843643441085E-01
+DATA tanhtab<>+88(SB)/8, $-.456353588448863359E-01
+DATA tanhtab<>+96(SB)/8, $-.389755254243262365E-01
+DATA tanhtab<>+104(SB)/8, $-.310332908285244231E-01
+DATA tanhtab<>+112(SB)/8, $-.218623539150173528E-01
+DATA tanhtab<>+120(SB)/8, $-.115062908917949451E-01
+GLOBL tanhtab<>+0(SB), RODATA, $128
+
+// Tanh returns the hyperbolic tangent of the argument.
+//
+// Special cases are:
+// Tanh(±0) = ±0
+// Tanh(±Inf) = ±1
+// Tanh(NaN) = NaN
+// The algorithm used is minimax polynomial approximation using a table of
+// polynomial coefficients determined with a Remez exchange algorithm.
+
+TEXT ·tanhAsm(SB),NOSPLIT,$0-16
+ FMOVD x+0(FP), F0
+ //specail case Tanh(±0) = ±0
+ FMOVD $(0.0), F1
+ FCMPU F0, F1
+ BEQ tanhIsZero
+ MOVD $tanhrodataL18<>+0(SB), R5
+ WORD $0xB3120000 //ltdbr %f0,%f0
+ MOVD $0x4034000000000000, R1
+ BLTU L15
+ FMOVD F0, F1
+L2:
+ MOVD $tanhxadd<>+0(SB), R2
+ FMOVD 0(R2), F2
+ MOVD tanhrlog2<>+0(SB), R2
+ WORD $0xB3C10042 //ldgr %f4,%r2
+ WFMSDB V0, V4, V2, V4
+ MOVD $tanhtab<>+0(SB), R3
+ WORD $0xB3CD0024 //lgdr %r2,%f4
+ WORD $0xEC4239BC //risbg %r4,%r2,57,128+60,3
+ BYTE $0x03
+ BYTE $0x55
+ WORD $0xED105058 //cdb %f1,.L19-.L18(%r5)
+ BYTE $0x00
+ BYTE $0x19
+ WORD $0xEC12000F //risbgn %r1,%r2,64-64+0,64-64+0+16-1,64-0-16
+ BYTE $0x30
+ BYTE $0x59
+ WORD $0x68543000 //ld %f5,0(%r4,%r3)
+ WORD $0xB3C10061 //ldgr %f6,%r1
+ BLT L3
+ MOVD $tanhxzero<>+0(SB), R1
+ FMOVD 0(R1), F2
+ WFCHDBS V0, V2, V4
+ BEQ L9
+ WFCHDBS V2, V0, V2
+ BNE L1
+ MOVD $tanhxmone<>+0(SB), R1
+ FMOVD 0(R1), F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L3:
+ FADD F4, F2
+ FMOVD tanhrodataL18<>+80(SB), F4
+ FMADD F4, F2, F0, F0
+ FMOVD tanhrodataL18<>+72(SB), F1
+ WFMDB V0, V0, V3
+ FMOVD tanhrodataL18<>+64(SB), F2
+ WFMADB V0, V1, V2, V1
+ FMOVD tanhrodataL18<>+56(SB), F4
+ FMOVD tanhrodataL18<>+48(SB), F2
+ WFMADB V1, V3, V4, V1
+ FMOVD tanhrodataL18<>+40(SB), F4
+ WFMADB V3, V2, V4, V2
+ FMOVD tanhrodataL18<>+32(SB), F4
+ WORD $0xB9270022 //lhr %r2,%r2
+ WFMADB V3, V1, V4, V1
+ FMOVD tanhrodataL18<>+24(SB), F4
+ WFMADB V3, V2, V4, V3
+ WFMADB V0, V5, V0, V2
+ WFMADB V0, V1, V3, V0
+ WORD $0xA7183ECF //lhi %r1,16079
+ WFMADB V0, V2, V5, V2
+ FMUL F6, F2
+ MOVW R2, R10
+ MOVW R1, R11
+ CMPBLE R10, R11, L16
+ FMOVD F6, F0
+ WORD $0xED005010 //adb %f0,.L28-.L18(%r5)
+ BYTE $0x00
+ BYTE $0x1A
+ WORD $0xA7184330 //lhi %r1,17200
+ FADD F2, F0
+ MOVW R2, R10
+ MOVW R1, R11
+ CMPBGT R10, R11, L17
+ WORD $0xED605010 //sdb %f6,.L28-.L18(%r5)
+ BYTE $0x00
+ BYTE $0x1B
+ FADD F6, F2
+ WFDDB V0, V2, V0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L9:
+ FMOVD tanhrodataL18<>+16(SB), F0
+L1:
+ FMOVD F0, ret+8(FP)
+ RET
+
+L15:
+ FNEG F0, F1
+ BR L2
+L16:
+ FADD F6, F2
+ FMOVD tanhrodataL18<>+8(SB), F0
+ FMADD F4, F2, F0, F0
+ FMOVD tanhrodataL18<>+0(SB), F4
+ FNEG F0, F0
+ WFMADB V0, V2, V4, V0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L17:
+ WFDDB V0, V4, V0
+ FMOVD tanhrodataL18<>+16(SB), F2
+ WFSDB V0, V2, V0
+ FMOVD F0, ret+8(FP)
+ RET
+
+tanhIsZero: //return ±0
+ FMOVD F0, ret+8(FP)
+ RET