Update prebuilts to go1.10 ab/4625579
Test: m -j blueprint_tools
Change-Id: I12d0286a2978fcbafa50880625700ba69c4581d8
diff --git a/src/runtime/alg.go b/src/runtime/alg.go
index 8d388da..89125f4 100644
--- a/src/runtime/alg.go
+++ b/src/runtime/alg.go
@@ -47,26 +47,25 @@
func memhash0(p unsafe.Pointer, h uintptr) uintptr {
return h
}
+
func memhash8(p unsafe.Pointer, h uintptr) uintptr {
return memhash(p, h, 1)
}
+
func memhash16(p unsafe.Pointer, h uintptr) uintptr {
return memhash(p, h, 2)
}
-func memhash32(p unsafe.Pointer, h uintptr) uintptr {
- return memhash(p, h, 4)
-}
-func memhash64(p unsafe.Pointer, h uintptr) uintptr {
- return memhash(p, h, 8)
-}
+
func memhash128(p unsafe.Pointer, h uintptr) uintptr {
return memhash(p, h, 16)
}
-// memhash_varlen is defined in assembly because it needs access
-// to the closure. It appears here to provide an argument
-// signature for the assembly routine.
-func memhash_varlen(p unsafe.Pointer, h uintptr) uintptr
+//go:nosplit
+func memhash_varlen(p unsafe.Pointer, h uintptr) uintptr {
+ ptr := getclosureptr()
+ size := *(*uintptr)(unsafe.Pointer(ptr + unsafe.Sizeof(h)))
+ return memhash(p, h, size)
+}
var algarray = [alg_max]typeAlg{
alg_NOEQ: {nil, nil},
diff --git a/src/runtime/append_test.go b/src/runtime/append_test.go
index 6bd8f3b..ef1e812 100644
--- a/src/runtime/append_test.go
+++ b/src/runtime/append_test.go
@@ -18,42 +18,52 @@
}
}
-func BenchmarkGrowSliceBytes(b *testing.B) {
- b.StopTimer()
- var x = make([]byte, 9)
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- _ = append([]byte(nil), x...)
- }
-}
+type (
+ struct24 struct{ a, b, c int64 }
+ struct32 struct{ a, b, c, d int64 }
+ struct40 struct{ a, b, c, d, e int64 }
+)
-func BenchmarkGrowSliceInts(b *testing.B) {
- b.StopTimer()
- var x = make([]int, 9)
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- _ = append([]int(nil), x...)
- }
-}
+func BenchmarkGrowSlice(b *testing.B) {
+ b.Run("Byte", func(b *testing.B) {
+ x := make([]byte, 9)
+ for i := 0; i < b.N; i++ {
+ _ = append([]byte(nil), x...)
+ }
+ })
+ b.Run("Int", func(b *testing.B) {
+ x := make([]int, 9)
+ for i := 0; i < b.N; i++ {
+ _ = append([]int(nil), x...)
+ }
+ })
+ b.Run("Ptr", func(b *testing.B) {
+ x := make([]*byte, 9)
+ for i := 0; i < b.N; i++ {
+ _ = append([]*byte(nil), x...)
+ }
+ })
+ b.Run("Struct", func(b *testing.B) {
+ b.Run("24", func(b *testing.B) {
+ x := make([]struct24, 9)
+ for i := 0; i < b.N; i++ {
+ _ = append([]struct24(nil), x...)
+ }
+ })
+ b.Run("32", func(b *testing.B) {
+ x := make([]struct32, 9)
+ for i := 0; i < b.N; i++ {
+ _ = append([]struct32(nil), x...)
+ }
+ })
+ b.Run("40", func(b *testing.B) {
+ x := make([]struct40, 9)
+ for i := 0; i < b.N; i++ {
+ _ = append([]struct40(nil), x...)
+ }
+ })
-func BenchmarkGrowSlicePtr(b *testing.B) {
- b.StopTimer()
- var x = make([]*byte, 9)
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- _ = append([]*byte(nil), x...)
- }
-}
-
-type struct24 struct{ a, b, c int64 }
-
-func BenchmarkGrowSliceStruct24Bytes(b *testing.B) {
- b.StopTimer()
- var x = make([]struct24, 9)
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- _ = append([]struct24(nil), x...)
- }
+ })
}
func BenchmarkAppend(b *testing.B) {
diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s
index 5bbf286..80a1451 100644
--- a/src/runtime/asm_386.s
+++ b/src/runtime/asm_386.s
@@ -7,10 +7,93 @@
#include "funcdata.h"
#include "textflag.h"
+// _rt0_386 is common startup code for most 386 systems when using
+// internal linking. This is the entry point for the program from the
+// kernel for an ordinary -buildmode=exe program. The stack holds the
+// number of arguments and the C-style argv.
+TEXT _rt0_386(SB),NOSPLIT,$8
+ MOVL 8(SP), AX // argc
+ LEAL 12(SP), BX // argv
+ MOVL AX, 0(SP)
+ MOVL BX, 4(SP)
+ JMP runtime·rt0_go(SB)
+
+// _rt0_386_lib is common startup code for most 386 systems when
+// using -buildmode=c-archive or -buildmode=c-shared. The linker will
+// arrange to invoke this function as a global constructor (for
+// c-archive) or when the shared library is loaded (for c-shared).
+// We expect argc and argv to be passed on the stack following the
+// usual C ABI.
+TEXT _rt0_386_lib(SB),NOSPLIT,$0
+ PUSHL BP
+ MOVL SP, BP
+ PUSHL BX
+ PUSHL SI
+ PUSHL DI
+
+ MOVL 8(BP), AX
+ MOVL AX, _rt0_386_lib_argc<>(SB)
+ MOVL 12(BP), AX
+ MOVL AX, _rt0_386_lib_argv<>(SB)
+
+ // Synchronous initialization.
+ CALL runtime·libpreinit(SB)
+
+ SUBL $8, SP
+
+ // Create a new thread to do the runtime initialization.
+ MOVL _cgo_sys_thread_create(SB), AX
+ TESTL AX, AX
+ JZ nocgo
+
+ // Align stack to call C function.
+ // We moved SP to BP above, but BP was clobbered by the libpreinit call.
+ MOVL SP, BP
+ ANDL $~15, SP
+
+ MOVL $_rt0_386_lib_go(SB), BX
+ MOVL BX, 0(SP)
+ MOVL $0, 4(SP)
+
+ CALL AX
+
+ MOVL BP, SP
+
+ JMP restore
+
+nocgo:
+ MOVL $0x800000, 0(SP) // stacksize = 8192KB
+ MOVL $_rt0_386_lib_go(SB), AX
+ MOVL AX, 4(SP) // fn
+ CALL runtime·newosproc0(SB)
+
+restore:
+ ADDL $8, SP
+ POPL DI
+ POPL SI
+ POPL BX
+ POPL BP
+ RET
+
+// _rt0_386_lib_go initializes the Go runtime.
+// This is started in a separate thread by _rt0_386_lib.
+TEXT _rt0_386_lib_go(SB),NOSPLIT,$8
+ MOVL _rt0_386_lib_argc<>(SB), AX
+ MOVL AX, 0(SP)
+ MOVL _rt0_386_lib_argv<>(SB), AX
+ MOVL AX, 4(SP)
+ JMP runtime·rt0_go(SB)
+
+DATA _rt0_386_lib_argc<>(SB)/4, $0
+GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4
+DATA _rt0_386_lib_argv<>(SB)/4, $0
+GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4
+
TEXT runtime·rt0_go(SB),NOSPLIT,$0
- // copy arguments forward on an even stack
- MOVL argc+0(FP), AX
- MOVL argv+4(FP), BX
+ // Copy arguments forward on an even stack.
+ // Users of this function jump to it, they don't call it.
+ MOVL 0(SP), AX
+ MOVL 4(SP), BX
SUBL $128, SP // plenty of scratch
ANDL $~15, SP
MOVL AX, 120(SP) // save argc, argv away
@@ -279,18 +362,6 @@
// restore state from Gobuf; longjmp
TEXT runtime·gogo(SB), NOSPLIT, $8-4
MOVL buf+0(FP), BX // gobuf
-
- // If ctxt is not nil, invoke deletion barrier before overwriting.
- MOVL gobuf_ctxt(BX), DX
- TESTL DX, DX
- JZ nilctxt
- LEAL gobuf_ctxt(BX), AX
- MOVL AX, 0(SP)
- MOVL $0, 4(SP)
- CALL runtime·writebarrierptr_prewrite(SB)
- MOVL buf+0(FP), BX
-
-nilctxt:
MOVL gobuf_g(BX), DX
MOVL 0(DX), CX // make sure g != nil
get_tls(CX)
@@ -403,11 +474,12 @@
RET
noswitch:
- // already on system stack, just call directly
+ // already on system stack; tail call the function
+ // Using a tail call here cleans up tracebacks since we won't stop
+ // at an intermediate systemstack.
MOVL DI, DX
MOVL 0(DI), DI
- CALL DI
- RET
+ JMP DI
/*
* support for morestack
@@ -453,7 +525,7 @@
MOVL SI, (g_sched+gobuf_g)(SI)
LEAL 4(SP), AX // f's SP
MOVL AX, (g_sched+gobuf_sp)(SI)
- // newstack will fill gobuf.ctxt.
+ MOVL DX, (g_sched+gobuf_ctxt)(SI)
// Call newstack on m->g0's stack.
MOVL m_g0(BX), BP
@@ -461,10 +533,8 @@
MOVL (g_sched+gobuf_sp)(BP), AX
MOVL -4(AX), BX // fault if CALL would, before smashing SP
MOVL AX, SP
- PUSHL DX // ctxt argument
CALL runtime·newstack(SB)
MOVL $0, 0x1003 // crash if newstack returns
- POPL DX // keep balance check happy
RET
TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
@@ -849,12 +919,6 @@
INT $3
RET
-TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8
- MOVL argp+0(FP),AX // addr of first arg
- MOVL -4(AX),AX // get calling pc
- MOVL AX, ret+4(FP)
- RET
-
// func cputicks() int64
TEXT runtime·cputicks(SB),NOSPLIT,$0-8
CMPB runtime·support_sse2(SB), $1
@@ -885,23 +949,6 @@
TEXT runtime·emptyfunc(SB),0,$0-0
RET
-// memhash_varlen(p unsafe.Pointer, h seed) uintptr
-// redirects to memhash(p, h, size) using the size
-// stored in the closure.
-TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12
- GO_ARGS
- NO_LOCAL_POINTERS
- MOVL p+0(FP), AX
- MOVL h+4(FP), BX
- MOVL 4(DX), CX
- MOVL AX, 0(SP)
- MOVL BX, 4(SP)
- MOVL CX, 8(SP)
- CALL runtime·memhash(SB)
- MOVL 12(SP), AX
- MOVL AX, ret+8(FP)
- RET
-
// hash function using AES hardware instructions
TEXT runtime·aeshash(SB),NOSPLIT,$0-16
MOVL p+0(FP), AX // ptr to data
@@ -1323,23 +1370,6 @@
MOVB $1, ret+8(FP)
RET
-// eqstring tests whether two strings are equal.
-// The compiler guarantees that strings passed
-// to eqstring have equal length.
-// See runtime_test.go:eqstring_generic for
-// equivalent Go code.
-TEXT runtime·eqstring(SB),NOSPLIT,$0-17
- MOVL s1_base+0(FP), SI
- MOVL s2_base+8(FP), DI
- CMPL SI, DI
- JEQ same
- MOVL s1_len+4(FP), BX
- LEAL ret+16(FP), AX
- JMP runtime·memeqbody(SB)
-same:
- MOVB $1, ret+16(FP)
- RET
-
TEXT bytes·Equal(SB),NOSPLIT,$0-25
MOVL a_len+4(FP), BX
MOVL b_len+16(FP), CX
@@ -1637,19 +1667,6 @@
// traceback from goexit1 must hit code range of goexit
BYTE $0x90 // NOP
-// Prefetching doesn't seem to help.
-TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
- RET
-
-TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
- RET
-
-TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
- RET
-
-TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
- RET
-
// Add a module's moduledata to the linked list of moduledata objects. This
// is called from .init_array by a function generated in the linker and so
// follows the platform ABI wrt register preservation -- it only touches AX,
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s
index 6405be9..576a61c 100644
--- a/src/runtime/asm_amd64.s
+++ b/src/runtime/asm_amd64.s
@@ -7,6 +7,83 @@
#include "funcdata.h"
#include "textflag.h"
+// _rt0_amd64 is common startup code for most amd64 systems when using
+// internal linking. This is the entry point for the program from the
+// kernel for an ordinary -buildmode=exe program. The stack holds the
+// number of arguments and the C-style argv.
+TEXT _rt0_amd64(SB),NOSPLIT,$-8
+ MOVQ 0(SP), DI // argc
+ LEAQ 8(SP), SI // argv
+ JMP runtime·rt0_go(SB)
+
+// main is common startup code for most amd64 systems when using
+// external linking. The C startup code will call the symbol "main"
+// passing argc and argv in the usual C ABI registers DI and SI.
+TEXT main(SB),NOSPLIT,$-8
+ JMP runtime·rt0_go(SB)
+
+// _rt0_amd64_lib is common startup code for most amd64 systems when
+// using -buildmode=c-archive or -buildmode=c-shared. The linker will
+// arrange to invoke this function as a global constructor (for
+// c-archive) or when the shared library is loaded (for c-shared).
+// We expect argc and argv to be passed in the usual C ABI registers
+// DI and SI.
+TEXT _rt0_amd64_lib(SB),NOSPLIT,$0x50
+ // Align stack per ELF ABI requirements.
+ MOVQ SP, AX
+ ANDQ $~15, SP
+ // Save C ABI callee-saved registers, as caller may need them.
+ MOVQ BX, 0x10(SP)
+ MOVQ BP, 0x18(SP)
+ MOVQ R12, 0x20(SP)
+ MOVQ R13, 0x28(SP)
+ MOVQ R14, 0x30(SP)
+ MOVQ R15, 0x38(SP)
+ MOVQ AX, 0x40(SP)
+
+ MOVQ DI, _rt0_amd64_lib_argc<>(SB)
+ MOVQ SI, _rt0_amd64_lib_argv<>(SB)
+
+ // Synchronous initialization.
+ CALL runtime·libpreinit(SB)
+
+ // Create a new thread to finish Go runtime initialization.
+ MOVQ _cgo_sys_thread_create(SB), AX
+ TESTQ AX, AX
+ JZ nocgo
+ MOVQ $_rt0_amd64_lib_go(SB), DI
+ MOVQ $0, SI
+ CALL AX
+ JMP restore
+
+nocgo:
+ MOVQ $0x800000, 0(SP) // stacksize
+ MOVQ $_rt0_amd64_lib_go(SB), AX
+ MOVQ AX, 8(SP) // fn
+ CALL runtime·newosproc0(SB)
+
+restore:
+ MOVQ 0x10(SP), BX
+ MOVQ 0x18(SP), BP
+ MOVQ 0x20(SP), R12
+ MOVQ 0x28(SP), R13
+ MOVQ 0x30(SP), R14
+ MOVQ 0x38(SP), R15
+ MOVQ 0x40(SP), SP
+ RET
+
+// _rt0_amd64_lib_go initializes the Go runtime.
+// This is started in a separate thread by _rt0_amd64_lib.
+TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
+ MOVQ _rt0_amd64_lib_argc<>(SB), DI
+ MOVQ _rt0_amd64_lib_argv<>(SB), SI
+ JMP runtime·rt0_go(SB)
+
+DATA _rt0_amd64_lib_argc<>(SB)/8, $0
+GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
+DATA _rt0_amd64_lib_argv<>(SB)/8, $0
+GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
+
TEXT runtime·rt0_go(SB),NOSPLIT,$0
// copy arguments forward on an even stack
MOVQ DI, AX // argc
@@ -227,18 +304,6 @@
// restore state from Gobuf; longjmp
TEXT runtime·gogo(SB), NOSPLIT, $16-8
MOVQ buf+0(FP), BX // gobuf
-
- // If ctxt is not nil, invoke deletion barrier before overwriting.
- MOVQ gobuf_ctxt(BX), AX
- TESTQ AX, AX
- JZ nilctxt
- LEAQ gobuf_ctxt(BX), AX
- MOVQ AX, 0(SP)
- MOVQ $0, 8(SP)
- CALL runtime·writebarrierptr_prewrite(SB)
- MOVQ buf+0(FP), BX
-
-nilctxt:
MOVQ gobuf_g(BX), DX
MOVQ 0(DX), CX // make sure g != nil
get_tls(CX)
@@ -354,11 +419,12 @@
RET
noswitch:
- // already on m stack, just call directly
+ // already on m stack; tail call the function
+ // Using a tail call here cleans up tracebacks since we won't stop
+ // at an intermediate systemstack.
MOVQ DI, DX
MOVQ 0(DI), DI
- CALL DI
- RET
+ JMP DI
/*
* support for morestack
@@ -405,16 +471,14 @@
LEAQ 8(SP), AX // f's SP
MOVQ AX, (g_sched+gobuf_sp)(SI)
MOVQ BP, (g_sched+gobuf_bp)(SI)
- // newstack will fill gobuf.ctxt.
+ MOVQ DX, (g_sched+gobuf_ctxt)(SI)
// Call newstack on m->g0's stack.
MOVQ m_g0(BX), BX
MOVQ BX, g(CX)
MOVQ (g_sched+gobuf_sp)(BX), SP
- PUSHQ DX // ctxt argument
CALL runtime·newstack(SB)
MOVQ $0, 0x1003 // crash if newstack returns
- POPQ DX // keep balance check happy
RET
// morestack but not preserving ctxt.
@@ -833,12 +897,6 @@
INT $3
RET
-TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16
- MOVQ argp+0(FP),AX // addr of first arg
- MOVQ -8(AX),AX // get calling pc
- MOVQ AX, ret+8(FP)
- RET
-
// func cputicks() int64
TEXT runtime·cputicks(SB),NOSPLIT,$0-0
CMPB runtime·lfenceBeforeRdtsc(SB), $1
@@ -854,23 +912,6 @@
MOVQ AX, ret+0(FP)
RET
-// memhash_varlen(p unsafe.Pointer, h seed) uintptr
-// redirects to memhash(p, h, size) using the size
-// stored in the closure.
-TEXT runtime·memhash_varlen(SB),NOSPLIT,$32-24
- GO_ARGS
- NO_LOCAL_POINTERS
- MOVQ p+0(FP), AX
- MOVQ h+8(FP), BX
- MOVQ 8(DX), CX
- MOVQ AX, 0(SP)
- MOVQ BX, 8(SP)
- MOVQ CX, 16(SP)
- CALL runtime·memhash(SB)
- MOVQ 24(SP), AX
- MOVQ AX, ret+16(FP)
- RET
-
// hash function using AES hardware instructions
TEXT runtime·aeshash(SB),NOSPLIT,$0-32
MOVQ p+0(FP), AX // ptr to data
@@ -1343,23 +1384,6 @@
MOVB $1, ret+16(FP)
RET
-// eqstring tests whether two strings are equal.
-// The compiler guarantees that strings passed
-// to eqstring have equal length.
-// See runtime_test.go:eqstring_generic for
-// equivalent Go code.
-TEXT runtime·eqstring(SB),NOSPLIT,$0-33
- MOVQ s1_base+0(FP), SI
- MOVQ s2_base+16(FP), DI
- CMPQ SI, DI
- JEQ eq
- MOVQ s1_len+8(FP), BX
- LEAQ ret+32(FP), AX
- JMP runtime·memeqbody(SB)
-eq:
- MOVB $1, ret+32(FP)
- RET
-
// a in SI
// b in DI
// count in BX
@@ -2339,26 +2363,6 @@
// traceback from goexit1 must hit code range of goexit
BYTE $0x90 // NOP
-TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8
- MOVQ addr+0(FP), AX
- PREFETCHT0 (AX)
- RET
-
-TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8
- MOVQ addr+0(FP), AX
- PREFETCHT1 (AX)
- RET
-
-TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8
- MOVQ addr+0(FP), AX
- PREFETCHT2 (AX)
- RET
-
-TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8
- MOVQ addr+0(FP), AX
- PREFETCHNTA (AX)
- RET
-
// This is called from .init_array and follows the platform, not Go, ABI.
TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save
@@ -2367,3 +2371,87 @@
MOVQ DI, runtime·lastmoduledatap(SB)
POPQ R15
RET
+
+// gcWriteBarrier performs a heap pointer write and informs the GC.
+//
+// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
+// - DI is the destination of the write
+// - AX is the value being written at DI
+// It clobbers FLAGS. It does not clobber any general-purpose registers,
+// but may clobber others (e.g., SSE registers).
+TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$120
+ // Save the registers clobbered by the fast path. This is slightly
+ // faster than having the caller spill these.
+ MOVQ R14, 104(SP)
+ MOVQ R13, 112(SP)
+ // TODO: Consider passing g.m.p in as an argument so they can be shared
+ // across a sequence of write barriers.
+ get_tls(R13)
+ MOVQ g(R13), R13
+ MOVQ g_m(R13), R13
+ MOVQ m_p(R13), R13
+ MOVQ (p_wbBuf+wbBuf_next)(R13), R14
+ // Increment wbBuf.next position.
+ LEAQ 16(R14), R14
+ MOVQ R14, (p_wbBuf+wbBuf_next)(R13)
+ CMPQ R14, (p_wbBuf+wbBuf_end)(R13)
+ // Record the write.
+ MOVQ AX, -16(R14) // Record value
+ MOVQ (DI), R13 // TODO: This turns bad writes into bad reads.
+ MOVQ R13, -8(R14) // Record *slot
+ // Is the buffer full? (flags set in CMPQ above)
+ JEQ flush
+ret:
+ MOVQ 104(SP), R14
+ MOVQ 112(SP), R13
+ // Do the write.
+ MOVQ AX, (DI)
+ RET
+
+flush:
+ // Save all general purpose registers since these could be
+ // clobbered by wbBufFlush and were not saved by the caller.
+ // It is possible for wbBufFlush to clobber other registers
+ // (e.g., SSE registers), but the compiler takes care of saving
+ // those in the caller if necessary. This strikes a balance
+ // with registers that are likely to be used.
+ //
+ // We don't have type information for these, but all code under
+ // here is NOSPLIT, so nothing will observe these.
+ //
+ // TODO: We could strike a different balance; e.g., saving X0
+ // and not saving GP registers that are less likely to be used.
+ MOVQ DI, 0(SP) // Also first argument to wbBufFlush
+ MOVQ AX, 8(SP) // Also second argument to wbBufFlush
+ MOVQ BX, 16(SP)
+ MOVQ CX, 24(SP)
+ MOVQ DX, 32(SP)
+ // DI already saved
+ MOVQ SI, 40(SP)
+ MOVQ BP, 48(SP)
+ MOVQ R8, 56(SP)
+ MOVQ R9, 64(SP)
+ MOVQ R10, 72(SP)
+ MOVQ R11, 80(SP)
+ MOVQ R12, 88(SP)
+ // R13 already saved
+ // R14 already saved
+ MOVQ R15, 96(SP)
+
+ // This takes arguments DI and AX
+ CALL runtime·wbBufFlush(SB)
+
+ MOVQ 0(SP), DI
+ MOVQ 8(SP), AX
+ MOVQ 16(SP), BX
+ MOVQ 24(SP), CX
+ MOVQ 32(SP), DX
+ MOVQ 40(SP), SI
+ MOVQ 48(SP), BP
+ MOVQ 56(SP), R8
+ MOVQ 64(SP), R9
+ MOVQ 72(SP), R10
+ MOVQ 80(SP), R11
+ MOVQ 88(SP), R12
+ MOVQ 96(SP), R15
+ JMP ret
diff --git a/src/runtime/asm_amd64p32.s b/src/runtime/asm_amd64p32.s
index 6367b3f..7fee79a 100644
--- a/src/runtime/asm_amd64p32.s
+++ b/src/runtime/asm_amd64p32.s
@@ -198,18 +198,6 @@
// restore state from Gobuf; longjmp
TEXT runtime·gogo(SB), NOSPLIT, $8-4
MOVL buf+0(FP), BX // gobuf
-
- // If ctxt is not nil, invoke deletion barrier before overwriting.
- MOVL gobuf_ctxt(BX), DX
- TESTL DX, DX
- JZ nilctxt
- LEAL gobuf_ctxt(BX), AX
- MOVL AX, 0(SP)
- MOVL $0, 4(SP)
- CALL runtime·writebarrierptr_prewrite(SB)
- MOVL buf+0(FP), BX
-
-nilctxt:
MOVL gobuf_g(BX), DX
MOVL 0(DX), CX // make sure g != nil
get_tls(CX)
@@ -318,10 +306,11 @@
noswitch:
// already on m stack, just call directly
+ // Using a tail call here cleans up tracebacks since we won't stop
+ // at an intermediate systemstack.
MOVL DI, DX
MOVL 0(DI), DI
- CALL DI
- RET
+ JMP DI
/*
* support for morestack
@@ -368,16 +357,14 @@
MOVL SI, (g_sched+gobuf_g)(SI)
LEAL 8(SP), AX // f's SP
MOVL AX, (g_sched+gobuf_sp)(SI)
- // newstack will fill gobuf.ctxt.
+ MOVL DX, (g_sched+gobuf_ctxt)(SI)
// Call newstack on m->g0's stack.
MOVL m_g0(BX), BX
MOVL BX, g(CX)
MOVL (g_sched+gobuf_sp)(BX), SP
- PUSHQ DX // ctxt argument
CALL runtime·newstack(SB)
MOVL $0, 0x1003 // crash if newstack returns
- POPQ DX // keep balance check happy
RET
// morestack trampolines
@@ -559,30 +546,6 @@
MOVL 0, AX
RET
-TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT,$0-8
- MOVL ptr+0(FP), DI
- MOVL n+4(FP), CX
- MOVQ CX, BX
- ANDQ $3, BX
- SHRQ $2, CX
- MOVQ $0, AX
- CLD
- REP
- STOSL
- MOVQ BX, CX
- REP
- STOSB
- // Note: we zero only 4 bytes at a time so that the tail is at most
- // 3 bytes. That guarantees that we aren't zeroing pointers with STOSB.
- // See issue 13160.
- RET
-
-TEXT runtime·getcallerpc(SB),NOSPLIT,$8-12
- MOVL argp+0(FP),AX // addr of first arg
- MOVL -8(AX),AX // get calling pc
- MOVL AX, ret+8(FP)
- RET
-
// int64 runtime·cputicks(void)
TEXT runtime·cputicks(SB),NOSPLIT,$0-0
RDTSC
@@ -591,23 +554,6 @@
MOVQ AX, ret+0(FP)
RET
-// memhash_varlen(p unsafe.Pointer, h seed) uintptr
-// redirects to memhash(p, h, size) using the size
-// stored in the closure.
-TEXT runtime·memhash_varlen(SB),NOSPLIT,$24-12
- GO_ARGS
- NO_LOCAL_POINTERS
- MOVL p+0(FP), AX
- MOVL h+4(FP), BX
- MOVL 4(DX), CX
- MOVL AX, 0(SP)
- MOVL BX, 4(SP)
- MOVL CX, 8(SP)
- CALL runtime·memhash(SB)
- MOVL 16(SP), AX
- MOVL AX, ret+8(FP)
- RET
-
// hash function using AES hardware instructions
// For now, our one amd64p32 system (NaCl) does not
// support using AES instructions, so have not bothered to
@@ -658,24 +604,6 @@
MOVB $1, ret+8(FP)
RET
-// eqstring tests whether two strings are equal.
-// The compiler guarantees that strings passed
-// to eqstring have equal length.
-// See runtime_test.go:eqstring_generic for
-// equivalent Go code.
-TEXT runtime·eqstring(SB),NOSPLIT,$0-17
- MOVL s1_base+0(FP), SI
- MOVL s2_base+8(FP), DI
- CMPL SI, DI
- JEQ same
- MOVL s1_len+4(FP), BX
- CALL runtime·memeqbody(SB)
- MOVB AX, ret+16(FP)
- RET
-same:
- MOVB $1, ret+16(FP)
- RET
-
// a in SI
// b in DI
// count in BX
@@ -1042,27 +970,6 @@
// traceback from goexit1 must hit code range of goexit
BYTE $0x90 // NOP
-TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
- MOVL addr+0(FP), AX
- PREFETCHT0 (AX)
- RET
-
-TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
- MOVL addr+0(FP), AX
- PREFETCHT1 (AX)
- RET
-
-
-TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
- MOVL addr+0(FP), AX
- PREFETCHT2 (AX)
- RET
-
-TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
- MOVL addr+0(FP), AX
- PREFETCHNTA (AX)
- RET
-
TEXT ·checkASM(SB),NOSPLIT,$0-1
MOVB $1, ret+0(FP)
RET
diff --git a/src/runtime/asm_arm.s b/src/runtime/asm_arm.s
index 09b6759..306984e 100644
--- a/src/runtime/asm_arm.s
+++ b/src/runtime/asm_arm.s
@@ -7,14 +7,112 @@
#include "funcdata.h"
#include "textflag.h"
+// _rt0_arm is common startup code for most ARM systems when using
+// internal linking. This is the entry point for the program from the
+// kernel for an ordinary -buildmode=exe program. The stack holds the
+// number of arguments and the C-style argv.
+TEXT _rt0_arm(SB),NOSPLIT,$-4
+ MOVW (R13), R0 // argc
+ MOVW $4(R13), R1 // argv
+ B runtime·rt0_go(SB)
+
+// main is common startup code for most ARM systems when using
+// external linking. The C startup code will call the symbol "main"
+// passing argc and argv in the usual C ABI registers R0 and R1.
+TEXT main(SB),NOSPLIT,$-4
+ B runtime·rt0_go(SB)
+
+// _rt0_arm_lib is common startup code for most ARM systems when
+// using -buildmode=c-archive or -buildmode=c-shared. The linker will
+// arrange to invoke this function as a global constructor (for
+// c-archive) or when the shared library is loaded (for c-shared).
+// We expect argc and argv to be passed in the usual C ABI registers
+// R0 and R1.
+TEXT _rt0_arm_lib(SB),NOSPLIT,$104
+ // Preserve callee-save registers. Raspberry Pi's dlopen(), for example,
+ // actually cares that R11 is preserved.
+ MOVW R4, 12(R13)
+ MOVW R5, 16(R13)
+ MOVW R6, 20(R13)
+ MOVW R7, 24(R13)
+ MOVW R8, 28(R13)
+ MOVW R11, 32(R13)
+
+ // Skip floating point registers on GOARM < 6.
+ MOVB runtime·goarm(SB), R11
+ CMP $6, R11
+ BLT skipfpsave
+ MOVD F8, (32+8*1)(R13)
+ MOVD F9, (32+8*2)(R13)
+ MOVD F10, (32+8*3)(R13)
+ MOVD F11, (32+8*4)(R13)
+ MOVD F12, (32+8*5)(R13)
+ MOVD F13, (32+8*6)(R13)
+ MOVD F14, (32+8*7)(R13)
+ MOVD F15, (32+8*8)(R13)
+skipfpsave:
+ // Save argc/argv.
+ MOVW R0, _rt0_arm_lib_argc<>(SB)
+ MOVW R1, _rt0_arm_lib_argv<>(SB)
+
+ // Synchronous initialization.
+ CALL runtime·libpreinit(SB)
+
+ // Create a new thread to do the runtime initialization.
+ MOVW _cgo_sys_thread_create(SB), R2
+ CMP $0, R2
+ BEQ nocgo
+ MOVW $_rt0_arm_lib_go<>(SB), R0
+ MOVW $0, R1
+ BL (R2)
+ B rr
+nocgo:
+ MOVW $0x800000, R0 // stacksize = 8192KB
+ MOVW $_rt0_arm_lib_go<>(SB), R1 // fn
+ MOVW R0, 4(R13)
+ MOVW R1, 8(R13)
+ BL runtime·newosproc0(SB)
+rr:
+ // Restore callee-save registers and return.
+ MOVB runtime·goarm(SB), R11
+ CMP $6, R11
+ BLT skipfprest
+ MOVD (32+8*1)(R13), F8
+ MOVD (32+8*2)(R13), F9
+ MOVD (32+8*3)(R13), F10
+ MOVD (32+8*4)(R13), F11
+ MOVD (32+8*5)(R13), F12
+ MOVD (32+8*6)(R13), F13
+ MOVD (32+8*7)(R13), F14
+ MOVD (32+8*8)(R13), F15
+skipfprest:
+ MOVW 12(R13), R4
+ MOVW 16(R13), R5
+ MOVW 20(R13), R6
+ MOVW 24(R13), R7
+ MOVW 28(R13), R8
+ MOVW 32(R13), R11
+ RET
+
+// _rt0_arm_lib_go initializes the Go runtime.
+// This is started in a separate thread by _rt0_arm_lib.
+TEXT _rt0_arm_lib_go<>(SB),NOSPLIT,$8
+ MOVW _rt0_arm_lib_argc<>(SB), R0
+ MOVW _rt0_arm_lib_argv<>(SB), R1
+ B runtime·rt0_go(SB)
+
+DATA _rt0_arm_lib_argc<>(SB)/4,$0
+GLOBL _rt0_arm_lib_argc<>(SB),NOPTR,$4
+DATA _rt0_arm_lib_argv<>(SB)/4,$0
+GLOBL _rt0_arm_lib_argv<>(SB),NOPTR,$4
+
// using frame size $-4 means do not save LR on stack.
+// argc is in R0, argv is in R1.
TEXT runtime·rt0_go(SB),NOSPLIT,$-4
MOVW $0xcafebabe, R12
// copy arguments forward on an even stack
// use R13 instead of SP to avoid linker rewriting the offsets
- MOVW 0(R13), R0 // argc
- MOVW 4(R13), R1 // argv
SUB $64, R13 // plenty of scratch
AND $~7, R13
MOVW R0, 60(R13) // save argc, argv away
@@ -129,19 +227,6 @@
// restore state from Gobuf; longjmp
TEXT runtime·gogo(SB),NOSPLIT,$8-4
MOVW buf+0(FP), R1
-
- // If ctxt is not nil, invoke deletion barrier before overwriting.
- MOVW gobuf_ctxt(R1), R0
- CMP $0, R0
- B.EQ nilctxt
- MOVW $gobuf_ctxt(R1), R0
- MOVW R0, 4(R13)
- MOVW $0, R0
- MOVW R0, 8(R13)
- BL runtime·writebarrierptr_prewrite(SB)
- MOVW buf+0(FP), R1
-
-nilctxt:
MOVW gobuf_g(R1), R0
BL setg<>(SB)
@@ -273,10 +358,12 @@
RET
noswitch:
+ // Using a tail call here cleans up tracebacks since we won't stop
+ // at an intermediate systemstack.
MOVW R0, R7
MOVW 0(R0), R0
- BL (R0)
- RET
+ MOVW.P 4(R13), R14 // restore LR
+ B (R0)
/*
* support for morestack
@@ -314,7 +401,7 @@
MOVW R13, (g_sched+gobuf_sp)(g)
MOVW LR, (g_sched+gobuf_pc)(g)
MOVW R3, (g_sched+gobuf_lr)(g)
- // newstack will fill gobuf.ctxt.
+ MOVW R7, (g_sched+gobuf_ctxt)(g)
// Called from f.
// Set m->morebuf to f's caller.
@@ -328,8 +415,7 @@
BL setg<>(SB)
MOVW (g_sched+gobuf_sp)(g), R13
MOVW $0, R0
- MOVW.W R0, -8(R13) // create a call frame on g0
- MOVW R7, 4(R13) // ctxt argument
+ MOVW.W R0, -4(R13) // create a call frame on g0 (saved LR)
BL runtime·newstack(SB)
// Not reached, but make sure the return PC from the call to newstack
@@ -677,9 +763,9 @@
MOVW g, R0
RET
-TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8
- MOVW 8(R13), R0 // LR saved by caller
- MOVW R0, ret+4(FP)
+TEXT runtime·getcallerpc(SB),NOSPLIT,$-4-4
+ MOVW 0(R13), R0 // LR saved by caller
+ MOVW R0, ret+0(FP)
RET
TEXT runtime·emptyfunc(SB),0,$0-0
@@ -719,23 +805,6 @@
MOVW $0, R0
MOVW (R0), R1
-// memhash_varlen(p unsafe.Pointer, h seed) uintptr
-// redirects to memhash(p, h, size) using the size
-// stored in the closure.
-TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12
- GO_ARGS
- NO_LOCAL_POINTERS
- MOVW p+0(FP), R0
- MOVW h+4(FP), R1
- MOVW 4(R7), R2
- MOVW R0, 4(R13)
- MOVW R1, 8(R13)
- MOVW R2, 12(R13)
- BL runtime·memhash(SB)
- MOVW 16(R13), R0
- MOVW R0, ret+8(FP)
- RET
-
// memequal(p, q unsafe.Pointer, size uintptr) bool
TEXT runtime·memequal(SB),NOSPLIT,$-4-13
MOVW a+0(FP), R1
@@ -830,31 +899,6 @@
MOVW R0, (R7)
RET
-// eqstring tests whether two strings are equal.
-// The compiler guarantees that strings passed
-// to eqstring have equal length.
-// See runtime_test.go:eqstring_generic for
-// equivalent Go code.
-TEXT runtime·eqstring(SB),NOSPLIT,$-4-17
- MOVW s1_base+0(FP), R2
- MOVW s2_base+8(FP), R3
- MOVW $1, R8
- MOVB R8, ret+16(FP)
- CMP R2, R3
- RET.EQ
- MOVW s1_len+4(FP), R0
- ADD R2, R0, R6
-loop:
- CMP R2, R6
- RET.EQ
- MOVBU.P 1(R2), R4
- MOVBU.P 1(R3), R5
- CMP R4, R5
- BEQ loop
- MOVW $0, R8
- MOVB R8, ret+16(FP)
- RET
-
// TODO: share code with memequal?
TEXT bytes·Equal(SB),NOSPLIT,$0-25
MOVW a_len+4(FP), R1
@@ -973,18 +1017,6 @@
// traceback from goexit1 must hit code range of goexit
MOVW R0, R0 // NOP
-TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
- RET
-
-TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
- RET
-
-TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
- RET
-
-TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
- RET
-
// x -> x/1000000, x%1000000, called from Go with args, results on stack.
TEXT runtime·usplit(SB),NOSPLIT,$0-12
MOVW x+0(FP), R0
diff --git a/src/runtime/asm_arm64.s b/src/runtime/asm_arm64.s
index 30ecec7..9bf0646 100644
--- a/src/runtime/asm_arm64.s
+++ b/src/runtime/asm_arm64.s
@@ -122,18 +122,6 @@
// restore state from Gobuf; longjmp
TEXT runtime·gogo(SB), NOSPLIT, $24-8
MOVD buf+0(FP), R5
-
- // If ctxt is not nil, invoke deletion barrier before overwriting.
- MOVD gobuf_ctxt(R5), R0
- CMP $0, R0
- BEQ nilctxt
- MOVD $gobuf_ctxt(R5), R0
- MOVD R0, 8(RSP)
- MOVD ZR, 16(RSP)
- BL runtime·writebarrierptr_prewrite(SB)
- MOVD buf+0(FP), R5
-
-nilctxt:
MOVD gobuf_g(R5), g
BL runtime·save_g(SB)
@@ -251,9 +239,11 @@
noswitch:
// already on m stack, just call directly
+ // Using a tail call here cleans up tracebacks since we won't stop
+ // at an intermediate systemstack.
MOVD 0(R26), R3 // code pointer
- BL (R3)
- RET
+ MOVD.P 16(RSP), R30 // restore LR
+ B (R3)
/*
* support for morestack
@@ -289,7 +279,7 @@
MOVD R0, (g_sched+gobuf_sp)(g)
MOVD LR, (g_sched+gobuf_pc)(g)
MOVD R3, (g_sched+gobuf_lr)(g)
- // newstack will fill gobuf.ctxt.
+ MOVD R26, (g_sched+gobuf_ctxt)(g)
// Called from f.
// Set m->morebuf to f's callers.
@@ -303,8 +293,7 @@
BL runtime·save_g(SB)
MOVD (g_sched+gobuf_sp)(g), R0
MOVD R0, RSP
- MOVD.W $0, -16(RSP) // create a call frame on g0
- MOVD R26, 8(RSP) // ctxt argument
+ MOVD.W $0, -16(RSP) // create a call frame on g0 (saved LR; keep 16-aligned)
BL runtime·newstack(SB)
// Not reached, but make sure the return PC from the call to newstack
@@ -368,16 +357,26 @@
NO_LOCAL_POINTERS; \
/* copy arguments to stack */ \
MOVD arg+16(FP), R3; \
- MOVWU argsize+24(FP), R4; \
- MOVD RSP, R5; \
- ADD $(8-1), R5; \
- SUB $1, R3; \
- ADD R5, R4; \
- CMP R5, R4; \
- BEQ 4(PC); \
- MOVBU.W 1(R3), R6; \
- MOVBU.W R6, 1(R5); \
- B -4(PC); \
+ MOVWU argsize+24(FP), R4; \
+ ADD $8, RSP, R5; \
+ BIC $0xf, R4, R6; \
+ CBZ R6, 6(PC); \
+ /* if R6=(argsize&~15) != 0 */ \
+ ADD R6, R5, R6; \
+ /* copy 16 bytes a time */ \
+ LDP.P 16(R3), (R7, R8); \
+ STP.P (R7, R8), 16(R5); \
+ CMP R5, R6; \
+ BNE -3(PC); \
+ AND $0xf, R4, R6; \
+ CBZ R6, 6(PC); \
+ /* if R6=(argsize&15) != 0 */ \
+ ADD R6, R5, R6; \
+ /* copy 1 byte a time for the rest */ \
+ MOVBU.P 1(R3), R7; \
+ MOVBU.P R7, 1(R5); \
+ CMP R5, R6; \
+ BNE -3(PC); \
/* call function */ \
MOVD f+8(FP), R26; \
MOVD (R26), R0; \
@@ -704,52 +703,27 @@
MOVD savedR27-8(SP), R27
RET
-TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16
- MOVD 16(RSP), R0 // LR saved by caller
- MOVD R0, ret+8(FP)
+TEXT runtime·getcallerpc(SB),NOSPLIT,$-8-8
+ MOVD 0(RSP), R0 // LR saved by caller
+ MOVD R0, ret+0(FP)
RET
TEXT runtime·abort(SB),NOSPLIT,$-8-0
B (ZR)
UNDEF
-// memhash_varlen(p unsafe.Pointer, h seed) uintptr
-// redirects to memhash(p, h, size) using the size
-// stored in the closure.
-TEXT runtime·memhash_varlen(SB),NOSPLIT,$40-24
- GO_ARGS
- NO_LOCAL_POINTERS
- MOVD p+0(FP), R3
- MOVD h+8(FP), R4
- MOVD 8(R26), R5
- MOVD R3, 8(RSP)
- MOVD R4, 16(RSP)
- MOVD R5, 24(RSP)
- BL runtime·memhash(SB)
- MOVD 32(RSP), R3
- MOVD R3, ret+16(FP)
- RET
-
-// memequal(p, q unsafe.Pointer, size uintptr) bool
+// memequal(a, b unsafe.Pointer, size uintptr) bool
TEXT runtime·memequal(SB),NOSPLIT,$-8-25
- MOVD a+0(FP), R1
+ MOVD size+16(FP), R1
+ // short path to handle 0-byte case
+ CBZ R1, equal
+ MOVD a+0(FP), R0
MOVD b+8(FP), R2
- MOVD size+16(FP), R3
- ADD R1, R3, R6
+ MOVD $ret+24(FP), R8
+ B runtime·memeqbody<>(SB)
+equal:
MOVD $1, R0
MOVB R0, ret+24(FP)
- CMP R1, R2
- BEQ done
-loop:
- CMP R1, R6
- BEQ done
- MOVBU.P 1(R1), R4
- MOVBU.P 1(R2), R5
- CMP R4, R5
- BEQ loop
-
- MOVB $0, ret+24(FP)
-done:
RET
// memequal_varlen(a, b unsafe.Pointer) bool
@@ -823,103 +797,235 @@
MOVD R4, (R7)
RET
-// eqstring tests whether two strings are equal.
-// The compiler guarantees that strings passed
-// to eqstring have equal length.
-// See runtime_test.go:eqstring_generic for
-// equivalent Go code.
-TEXT runtime·eqstring(SB),NOSPLIT,$0-33
- MOVD s1_base+0(FP), R0
- MOVD s1_len+8(FP), R1
- MOVD s2_base+16(FP), R2
- ADD R0, R1 // end
-loop:
- CMP R0, R1
- BEQ equal // reaches the end
- MOVBU.P 1(R0), R4
- MOVBU.P 1(R2), R5
- CMP R4, R5
- BEQ loop
-notequal:
- MOVB ZR, ret+32(FP)
- RET
-equal:
- MOVD $1, R0
- MOVB R0, ret+32(FP)
- RET
-
//
// functions for other packages
//
TEXT bytes·IndexByte(SB),NOSPLIT,$0-40
MOVD b+0(FP), R0
- MOVD b_len+8(FP), R1
- MOVBU c+24(FP), R2 // byte to find
- MOVD R0, R4 // store base for later
- ADD R0, R1 // end
-loop:
- CMP R0, R1
- BEQ notfound
- MOVBU.P 1(R0), R3
- CMP R2, R3
- BNE loop
-
- SUB $1, R0 // R0 will be one beyond the position we want
- SUB R4, R0 // remove base
- MOVD R0, ret+32(FP)
- RET
-
-notfound:
- MOVD $-1, R0
- MOVD R0, ret+32(FP)
- RET
+ MOVD b_len+8(FP), R2
+ MOVBU c+24(FP), R1
+ MOVD $ret+32(FP), R8
+ B runtime·indexbytebody<>(SB)
TEXT strings·IndexByte(SB),NOSPLIT,$0-32
MOVD s+0(FP), R0
- MOVD s_len+8(FP), R1
- MOVBU c+16(FP), R2 // byte to find
- MOVD R0, R4 // store base for later
- ADD R0, R1 // end
+ MOVD s_len+8(FP), R2
+ MOVBU c+16(FP), R1
+ MOVD $ret+24(FP), R8
+ B runtime·indexbytebody<>(SB)
+
+// input:
+// R0: data
+// R1: byte to search
+// R2: data len
+// R8: address to put result
+TEXT runtime·indexbytebody<>(SB),NOSPLIT,$0
+ // Core algorithm:
+ // For each 32-byte chunk we calculate a 64-bit syndrome value,
+ // with two bits per byte. For each tuple, bit 0 is set if the
+ // relevant byte matched the requested character and bit 1 is
+ // not used (faster than using a 32bit syndrome). Since the bits
+ // in the syndrome reflect exactly the order in which things occur
+ // in the original string, counting trailing zeros allows to
+ // identify exactly which byte has matched.
+
+ CBZ R2, fail
+ MOVD R0, R11
+ // Magic constant 0x40100401 allows us to identify
+ // which lane matches the requested byte.
+ // 0x40100401 = ((1<<0) + (4<<8) + (16<<16) + (64<<24))
+ // Different bytes have different bit masks (i.e: 1, 4, 16, 64)
+ MOVD $0x40100401, R5
+ VMOV R1, V0.B16
+ // Work with aligned 32-byte chunks
+ BIC $0x1f, R0, R3
+ VMOV R5, V5.S4
+ ANDS $0x1f, R0, R9
+ AND $0x1f, R2, R10
+ BEQ loop
+
+ // Input string is not 32-byte aligned. We calculate the
+ // syndrome value for the aligned 32 bytes block containing
+ // the first bytes and mask off the irrelevant part.
+ VLD1.P (R3), [V1.B16, V2.B16]
+ SUB $0x20, R9, R4
+ ADDS R4, R2, R2
+ VCMEQ V0.B16, V1.B16, V3.B16
+ VCMEQ V0.B16, V2.B16, V4.B16
+ VAND V5.B16, V3.B16, V3.B16
+ VAND V5.B16, V4.B16, V4.B16
+ VADDP V4.B16, V3.B16, V6.B16 // 256->128
+ VADDP V6.B16, V6.B16, V6.B16 // 128->64
+ VMOV V6.D[0], R6
+ // Clear the irrelevant lower bits
+ LSL $1, R9, R4
+ LSR R4, R6, R6
+ LSL R4, R6, R6
+ // The first block can also be the last
+ BLS masklast
+ // Have we found something already?
+ CBNZ R6, tail
+
loop:
- CMP R0, R1
- BEQ notfound
- MOVBU.P 1(R0), R3
- CMP R2, R3
- BNE loop
+ VLD1.P (R3), [V1.B16, V2.B16]
+ SUBS $0x20, R2, R2
+ VCMEQ V0.B16, V1.B16, V3.B16
+ VCMEQ V0.B16, V2.B16, V4.B16
+ // If we're out of data we finish regardless of the result
+ BLS end
+ // Use a fast check for the termination condition
+ VORR V4.B16, V3.B16, V6.B16
+ VADDP V6.D2, V6.D2, V6.D2
+ VMOV V6.D[0], R6
+ // We're not out of data, loop if we haven't found the character
+ CBZ R6, loop
- SUB $1, R0 // R0 will be one beyond the position we want
- SUB R4, R0 // remove base
- MOVD R0, ret+24(FP)
+end:
+ // Termination condition found, let's calculate the syndrome value
+ VAND V5.B16, V3.B16, V3.B16
+ VAND V5.B16, V4.B16, V4.B16
+ VADDP V4.B16, V3.B16, V6.B16
+ VADDP V6.B16, V6.B16, V6.B16
+ VMOV V6.D[0], R6
+ // Only do the clear for the last possible block with less than 32 bytes
+ // Condition flags come from SUBS in the loop
+ BHS tail
+
+masklast:
+ // Clear the irrelevant upper bits
+ ADD R9, R10, R4
+ AND $0x1f, R4, R4
+ SUB $0x20, R4, R4
+ NEG R4<<1, R4
+ LSL R4, R6, R6
+ LSR R4, R6, R6
+
+tail:
+ // Check that we have found a character
+ CBZ R6, fail
+ // Count the trailing zeros using bit reversing
+ RBIT R6, R6
+ // Compensate the last post-increment
+ SUB $0x20, R3, R3
+ // And count the leading zeros
+ CLZ R6, R6
+ // R6 is twice the offset into the fragment
+ ADD R6>>1, R3, R0
+ // Compute the offset result
+ SUB R11, R0, R0
+ MOVD R0, (R8)
RET
-notfound:
+fail:
MOVD $-1, R0
- MOVD R0, ret+24(FP)
+ MOVD R0, (R8)
RET
-// TODO: share code with memequal?
+// Equal(a, b []byte) bool
TEXT bytes·Equal(SB),NOSPLIT,$0-49
MOVD a_len+8(FP), R1
MOVD b_len+32(FP), R3
- CMP R1, R3 // unequal lengths are not equal
- BNE notequal
+ CMP R1, R3
+ // unequal lengths are not equal
+ BNE not_equal
+ // short path to handle 0-byte case
+ CBZ R1, equal
MOVD a+0(FP), R0
MOVD b+24(FP), R2
- ADD R0, R1 // end
-loop:
- CMP R0, R1
- BEQ equal // reaches the end
- MOVBU.P 1(R0), R4
- MOVBU.P 1(R2), R5
- CMP R4, R5
- BEQ loop
-notequal:
- MOVB ZR, ret+48(FP)
- RET
+ MOVD $ret+48(FP), R8
+ B runtime·memeqbody<>(SB)
equal:
MOVD $1, R0
MOVB R0, ret+48(FP)
RET
+not_equal:
+ MOVB ZR, ret+48(FP)
+ RET
+
+// input:
+// R0: pointer a
+// R1: data len
+// R2: pointer b
+// R8: address to put result
+TEXT runtime·memeqbody<>(SB),NOSPLIT,$0
+ CMP $1, R1
+ // handle 1-byte special case for better performance
+ BEQ one
+ CMP $16, R1
+ // handle specially if length < 16
+ BLO tail
+ BIC $0x3f, R1, R3
+ CBZ R3, chunk16
+ // work with 64-byte chunks
+ ADD R3, R0, R6 // end of chunks
+chunk64_loop:
+ VLD1.P (R0), [V0.D2, V1.D2, V2.D2, V3.D2]
+ VLD1.P (R2), [V4.D2, V5.D2, V6.D2, V7.D2]
+ VCMEQ V0.D2, V4.D2, V8.D2
+ VCMEQ V1.D2, V5.D2, V9.D2
+ VCMEQ V2.D2, V6.D2, V10.D2
+ VCMEQ V3.D2, V7.D2, V11.D2
+ VAND V8.B16, V9.B16, V8.B16
+ VAND V8.B16, V10.B16, V8.B16
+ VAND V8.B16, V11.B16, V8.B16
+ CMP R0, R6
+ VMOV V8.D[0], R4
+ VMOV V8.D[1], R5
+ CBZ R4, not_equal
+ CBZ R5, not_equal
+ BNE chunk64_loop
+ AND $0x3f, R1, R1
+ CBZ R1, equal
+chunk16:
+ // work with 16-byte chunks
+ BIC $0xf, R1, R3
+ CBZ R3, tail
+ ADD R3, R0, R6 // end of chunks
+chunk16_loop:
+ VLD1.P (R0), [V0.D2]
+ VLD1.P (R2), [V1.D2]
+ VCMEQ V0.D2, V1.D2, V2.D2
+ CMP R0, R6
+ VMOV V2.D[0], R4
+ VMOV V2.D[1], R5
+ CBZ R4, not_equal
+ CBZ R5, not_equal
+ BNE chunk16_loop
+ AND $0xf, R1, R1
+ CBZ R1, equal
+tail:
+ // special compare of tail with length < 16
+ TBZ $3, R1, lt_8
+ MOVD.P 8(R0), R4
+ MOVD.P 8(R2), R5
+ CMP R4, R5
+ BNE not_equal
+lt_8:
+ TBZ $2, R1, lt_4
+ MOVWU.P 4(R0), R4
+ MOVWU.P 4(R2), R5
+ CMP R4, R5
+ BNE not_equal
+lt_4:
+ TBZ $1, R1, lt_2
+ MOVHU.P 2(R0), R4
+ MOVHU.P 2(R2), R5
+ CMP R4, R5
+ BNE not_equal
+lt_2:
+ TBZ $0, R1, equal
+one:
+ MOVBU (R0), R4
+ MOVBU (R2), R5
+ CMP R4, R5
+ BNE not_equal
+equal:
+ MOVD $1, R0
+ MOVB R0, (R8)
+ RET
+not_equal:
+ MOVB ZR, (R8)
+ RET
TEXT runtime·return0(SB), NOSPLIT, $0
MOVW $0, R0
@@ -931,19 +1037,6 @@
MOVD R0, R0 // NOP
BL runtime·goexit1(SB) // does not return
-// TODO(aram): use PRFM here.
-TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8
- RET
-
-TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8
- RET
-
-TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8
- RET
-
-TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8
- RET
-
TEXT runtime·sigreturn(SB),NOSPLIT,$0-0
RET
diff --git a/src/runtime/asm_mips64x.s b/src/runtime/asm_mips64x.s
index 57d4578..12cea00 100644
--- a/src/runtime/asm_mips64x.s
+++ b/src/runtime/asm_mips64x.s
@@ -108,17 +108,6 @@
// restore state from Gobuf; longjmp
TEXT runtime·gogo(SB), NOSPLIT, $16-8
MOVV buf+0(FP), R3
-
- // If ctxt is not nil, invoke deletion barrier before overwriting.
- MOVV gobuf_ctxt(R3), R1
- BEQ R1, nilctxt
- MOVV $gobuf_ctxt(R3), R1
- MOVV R1, 8(R29)
- MOVV R0, 16(R29)
- JAL runtime·writebarrierptr_prewrite(SB)
- MOVV buf+0(FP), R3
-
-nilctxt:
MOVV gobuf_g(R3), g // make sure g is not nil
JAL runtime·save_g(SB)
@@ -225,9 +214,12 @@
noswitch:
// already on m stack, just call directly
+ // Using a tail call here cleans up tracebacks since we won't stop
+ // at an intermediate systemstack.
MOVV 0(REGCTXT), R4 // code pointer
- JAL (R4)
- RET
+ MOVV 0(R29), R31 // restore LR
+ ADDV $8, R29
+ JMP (R4)
/*
* support for morestack
@@ -260,7 +252,7 @@
MOVV R29, (g_sched+gobuf_sp)(g)
MOVV R31, (g_sched+gobuf_pc)(g)
MOVV R3, (g_sched+gobuf_lr)(g)
- // newstack will fill gobuf.ctxt.
+ MOVV REGCTXT, (g_sched+gobuf_ctxt)(g)
// Called from f.
// Set m->morebuf to f's caller.
@@ -273,9 +265,8 @@
JAL runtime·save_g(SB)
MOVV (g_sched+gobuf_sp)(g), R29
// Create a stack frame on g0 to call newstack.
- MOVV R0, -16(R29) // Zero saved LR in frame
- ADDV $-16, R29
- MOVV REGCTXT, 8(R29) // ctxt argument
+ MOVV R0, -8(R29) // Zero saved LR in frame
+ ADDV $-8, R29
JAL runtime·newstack(SB)
// Not reached, but make sure the return PC from the call to newstack
@@ -616,32 +607,15 @@
JAL runtime·save_g(SB)
RET
-TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16
- MOVV 16(R29), R1 // LR saved by caller
- MOVV R1, ret+8(FP)
+TEXT runtime·getcallerpc(SB),NOSPLIT,$-8-8
+ MOVV 0(R29), R1 // LR saved by caller
+ MOVV R1, ret+0(FP)
RET
TEXT runtime·abort(SB),NOSPLIT,$-8-0
MOVW (R0), R0
UNDEF
-// memhash_varlen(p unsafe.Pointer, h seed) uintptr
-// redirects to memhash(p, h, size) using the size
-// stored in the closure.
-TEXT runtime·memhash_varlen(SB),NOSPLIT,$40-24
- GO_ARGS
- NO_LOCAL_POINTERS
- MOVV p+0(FP), R1
- MOVV h+8(FP), R2
- MOVV 8(REGCTXT), R3
- MOVV R1, 8(R29)
- MOVV R2, 16(R29)
- MOVV R3, 24(R29)
- JAL runtime·memhash(SB)
- MOVV 32(R29), R1
- MOVV R1, ret+16(FP)
- RET
-
// AES hashing not implemented for mips64
TEXT runtime·aeshash(SB),NOSPLIT,$-8-0
MOVW (R0), R1
@@ -696,31 +670,6 @@
MOVB R1, ret+16(FP)
RET
-// eqstring tests whether two strings are equal.
-// The compiler guarantees that strings passed
-// to eqstring have equal length.
-// See runtime_test.go:eqstring_generic for
-// equivalent Go code.
-TEXT runtime·eqstring(SB),NOSPLIT,$0-33
- MOVV s1_base+0(FP), R1
- MOVV s2_base+16(FP), R2
- MOVV $1, R3
- MOVB R3, ret+32(FP)
- BNE R1, R2, 2(PC)
- RET
- MOVV s1_len+8(FP), R3
- ADDV R1, R3, R4
-loop:
- BNE R1, R4, 2(PC)
- RET
- MOVBU (R1), R6
- ADDV $1, R1
- MOVBU (R2), R7
- ADDV $1, R2
- BEQ R6, R7, loop
- MOVB R0, ret+32(FP)
- RET
-
// TODO: share code with memequal?
TEXT bytes·Equal(SB),NOSPLIT,$0-49
MOVV a_len+8(FP), R3
@@ -823,18 +772,6 @@
// traceback from goexit1 must hit code range of goexit
NOR R0, R0 // NOP
-TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8
- RET
-
-TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8
- RET
-
-TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8
- RET
-
-TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8
- RET
-
TEXT ·checkASM(SB),NOSPLIT,$0-1
MOVW $1, R1
MOVB R1, ret+0(FP)
diff --git a/src/runtime/asm_mipsx.s b/src/runtime/asm_mipsx.s
index 536c315..bba6a95 100644
--- a/src/runtime/asm_mipsx.s
+++ b/src/runtime/asm_mipsx.s
@@ -109,17 +109,6 @@
// restore state from Gobuf; longjmp
TEXT runtime·gogo(SB),NOSPLIT,$8-4
MOVW buf+0(FP), R3
-
- // If ctxt is not nil, invoke deletion barrier before overwriting.
- MOVW gobuf_ctxt(R3), R1
- BEQ R1, nilctxt
- MOVW $gobuf_ctxt(R3), R1
- MOVW R1, 4(R29)
- MOVW R0, 8(R29)
- JAL runtime·writebarrierptr_prewrite(SB)
- MOVW buf+0(FP), R3
-
-nilctxt:
MOVW gobuf_g(R3), g // make sure g is not nil
JAL runtime·save_g(SB)
@@ -226,9 +215,12 @@
noswitch:
// already on m stack, just call directly
+ // Using a tail call here cleans up tracebacks since we won't stop
+ // at an intermediate systemstack.
MOVW 0(REGCTXT), R4 // code pointer
- JAL (R4)
- RET
+ MOVW 0(R29), R31 // restore LR
+ ADD $4, R29
+ JMP (R4)
/*
* support for morestack
@@ -261,7 +253,7 @@
MOVW R29, (g_sched+gobuf_sp)(g)
MOVW R31, (g_sched+gobuf_pc)(g)
MOVW R3, (g_sched+gobuf_lr)(g)
- // newstack will fill gobuf.ctxt.
+ MOVW REGCTXT, (g_sched+gobuf_ctxt)(g)
// Called from f.
// Set m->morebuf to f's caller.
@@ -274,9 +266,8 @@
JAL runtime·save_g(SB)
MOVW (g_sched+gobuf_sp)(g), R29
// Create a stack frame on g0 to call newstack.
- MOVW R0, -8(R29) // Zero saved LR in frame
- ADDU $-8, R29
- MOVW REGCTXT, 4(R29) // ctxt argument
+ MOVW R0, -4(R29) // Zero saved LR in frame
+ ADDU $-4, R29
JAL runtime·newstack(SB)
// Not reached, but make sure the return PC from the call to newstack
@@ -619,31 +610,14 @@
JAL runtime·save_g(SB)
RET
-TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8
- MOVW 8(R29), R1 // LR saved by caller
- MOVW R1, ret+4(FP)
+TEXT runtime·getcallerpc(SB),NOSPLIT,$-4-4
+ MOVW 0(R29), R1 // LR saved by caller
+ MOVW R1, ret+0(FP)
RET
TEXT runtime·abort(SB),NOSPLIT,$0-0
UNDEF
-// memhash_varlen(p unsafe.Pointer, h seed) uintptr
-// redirects to memhash(p, h, size) using the size
-// stored in the closure.
-TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12
- GO_ARGS
- NO_LOCAL_POINTERS
- MOVW p+0(FP), R1
- MOVW h+4(FP), R2
- MOVW 4(REGCTXT), R3
- MOVW R1, 4(R29)
- MOVW R2, 8(R29)
- MOVW R3, 12(R29)
- JAL runtime·memhash(SB)
- MOVW 16(R29), R1
- MOVW R1, ret+8(FP)
- RET
-
// Not implemented.
TEXT runtime·aeshash(SB),NOSPLIT,$0
UNDEF
@@ -712,31 +686,6 @@
MOVB R1, ret+8(FP)
RET
-// eqstring tests whether two strings are equal.
-// The compiler guarantees that strings passed
-// to eqstring have equal length.
-// See runtime_test.go:eqstring_generic for
-// equivalent Go code.
-TEXT runtime·eqstring(SB),NOSPLIT,$0-17
- MOVW s1_base+0(FP), R1
- MOVW s2_base+8(FP), R2
- MOVW $1, R3
- MOVBU R3, ret+16(FP)
- BNE R1, R2, 2(PC)
- RET
- MOVW s1_len+4(FP), R3
- ADDU R1, R3, R4
-loop:
- BNE R1, R4, 2(PC)
- RET
- MOVBU (R1), R6
- ADDU $1, R1
- MOVBU (R2), R7
- ADDU $1, R2
- BEQ R6, R7, loop
- MOVB R0, ret+16(FP)
- RET
-
TEXT bytes·Equal(SB),NOSPLIT,$0-25
MOVW a_len+4(FP), R3
MOVW b_len+16(FP), R4
@@ -903,18 +852,6 @@
// traceback from goexit1 must hit code range of goexit
NOR R0, R0 // NOP
-TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
- RET
-
-TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
- RET
-
-TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
- RET
-
-TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
- RET
-
TEXT ·checkASM(SB),NOSPLIT,$0-1
MOVW $1, R1
MOVB R1, ret+0(FP)
diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s
index 616861e..e02ca16 100644
--- a/src/runtime/asm_ppc64x.s
+++ b/src/runtime/asm_ppc64x.s
@@ -133,18 +133,6 @@
// restore state from Gobuf; longjmp
TEXT runtime·gogo(SB), NOSPLIT, $16-8
MOVD buf+0(FP), R5
-
- // If ctxt is not nil, invoke deletion barrier before overwriting.
- MOVD gobuf_ctxt(R5), R3
- CMP R0, R3
- BEQ nilctxt
- MOVD $gobuf_ctxt(R5), R3
- MOVD R3, FIXED_FRAME+0(R1)
- MOVD R0, FIXED_FRAME+8(R1)
- BL runtime·writebarrierptr_prewrite(SB)
- MOVD buf+0(FP), R5
-
-nilctxt:
MOVD gobuf_g(R5), g // make sure g is not nil
BL runtime·save_g(SB)
@@ -277,6 +265,9 @@
noswitch:
// already on m stack, just call directly
+ // On other arches we do a tail call here, but it appears to be
+ // impossible to tail call a function pointer in shared mode on
+ // ppc64 because the caller is responsible for restoring the TOC.
MOVD 0(R11), R12 // code pointer
MOVD R12, CTR
BL (CTR)
@@ -317,7 +308,7 @@
MOVD LR, R8
MOVD R8, (g_sched+gobuf_pc)(g)
MOVD R5, (g_sched+gobuf_lr)(g)
- // newstack will fill gobuf.ctxt.
+ MOVD R11, (g_sched+gobuf_ctxt)(g)
// Called from f.
// Set m->morebuf to f's caller.
@@ -329,8 +320,7 @@
MOVD m_g0(R7), g
BL runtime·save_g(SB)
MOVD (g_sched+gobuf_sp)(g), R1
- MOVDU R0, -(FIXED_FRAME+8)(R1) // create a call frame on g0
- MOVD R11, FIXED_FRAME+0(R1) // ctxt argument
+ MOVDU R0, -(FIXED_FRAME+0)(R1) // create a call frame on g0
BL runtime·newstack(SB)
// Not reached, but make sure the return PC from the call to newstack
@@ -714,9 +704,9 @@
MOVD R4, LR
RET
-TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16
- MOVD FIXED_FRAME+8(R1), R3 // LR saved by caller
- MOVD R3, ret+8(FP)
+TEXT runtime·getcallerpc(SB),NOSPLIT|NOFRAME,$0-8
+ MOVD 0(R1), R3 // LR saved by caller
+ MOVD R3, ret+0(FP)
RET
TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
@@ -738,23 +728,6 @@
MOVD R3, ret+0(FP)
RET
-// memhash_varlen(p unsafe.Pointer, h seed) uintptr
-// redirects to memhash(p, h, size) using the size
-// stored in the closure.
-TEXT runtime·memhash_varlen(SB),NOSPLIT,$40-24
- GO_ARGS
- NO_LOCAL_POINTERS
- MOVD p+0(FP), R3
- MOVD h+8(FP), R4
- MOVD 8(R11), R5
- MOVD R3, FIXED_FRAME+0(R1)
- MOVD R4, FIXED_FRAME+8(R1)
- MOVD R5, FIXED_FRAME+16(R1)
- BL runtime·memhash(SB)
- MOVD FIXED_FRAME+24(R1), R3
- MOVD R3, ret+16(FP)
- RET
-
// AES hashing not implemented for ppc64
TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
MOVW (R0), R1
@@ -1074,24 +1047,6 @@
MOVD $1, R9
RET
-// eqstring tests whether two strings are equal.
-// The compiler guarantees that strings passed
-// to eqstring have equal length.
-// See runtime_test.go:eqstring_generic for
-// equivalent Go code.
-TEXT runtime·eqstring(SB),NOSPLIT,$0-33
- MOVD s1_base+0(FP), R3
- MOVD s2_base+16(FP), R4
- MOVD $1, R5
- MOVB R5, ret+32(FP)
- CMP R3, R4
- BNE 2(PC)
- RET
- MOVD s1_len+8(FP), R5
- BL runtime·memeqbody(SB)
- MOVB R9, ret+32(FP)
- RET
-
TEXT bytes·Equal(SB),NOSPLIT,$0-49
MOVD a_len+8(FP), R4
MOVD b_len+32(FP), R5
@@ -1129,24 +1084,17 @@
TEXT runtime·indexbytebody<>(SB),NOSPLIT|NOFRAME,$0-0
DCBT (R3) // Prepare cache line.
- MOVD R3,R10 // Save base address for calculating the index later.
+ MOVD R3,R17 // Save base address for calculating the index later.
RLDICR $0,R3,$60,R8 // Align address to doubleword boundary in R8.
RLDIMI $8,R5,$48,R5 // Replicating the byte across the register.
-
- // Calculate last acceptable address and check for possible overflow
- // using a saturated add.
- // Overflows set last acceptable address to 0xffffffffffffffff.
- ADD R4,R3,R7
- SUBC R3,R7,R6
- SUBE R0,R0,R9
- MOVW R9,R6
- OR R6,R7,R7
+ ADD R4,R3,R7 // Last acceptable address in R7.
RLDIMI $16,R5,$32,R5
CMPU R4,$32 // Check if it's a small string (<32 bytes). Those will be processed differently.
MOVD $-1,R9
- WORD $0x54661EB8 // Calculate padding in R6 (rlwinm r6,r3,3,26,28).
+ WORD $0x54661EB8 // Calculate padding in R6 (rlwinm r6,r3,3,26,28).
RLDIMI $32,R5,$0,R5
+ MOVD R7,R10 // Save last acceptable address in R10 for later.
ADD $-1,R7,R7
#ifdef GOARCH_ppc64le
SLD R6,R9,R9 // Prepare mask for Little Endian
@@ -1155,56 +1103,142 @@
#endif
BLE small_string // Jump to the small string case if it's <32 bytes.
- // Case for length >32 bytes
+ // If we are 64-byte aligned, branch to qw_align just to get the auxiliary values
+ // in V0, V1 and V10, then branch to the preloop.
+ ANDCC $63,R3,R11
+ BEQ CR0,qw_align
+ RLDICL $0,R3,$61,R11
+
MOVD 0(R8),R12 // Load one doubleword from the aligned address in R8.
CMPB R12,R5,R3 // Check for a match.
AND R9,R3,R3 // Mask bytes below s_base
- RLDICL $0,R7,$61,R4 // length-1
+ RLDICL $0,R7,$61,R6 // length-1
RLDICR $0,R7,$60,R7 // Last doubleword in R7
CMPU R3,$0,CR7 // If we have a match, jump to the final computation
BNE CR7,done
+ ADD $8,R8,R8
+ ADD $-8,R4,R4
+ ADD R4,R11,R4
- // Check for doubleword alignment and jump to the loop setup if aligned.
- MOVFL R8,CR7
- BC 12,28,loop_setup
+ // Check for quadword alignment
+ ANDCC $15,R8,R11
+ BEQ CR0,qw_align
- // Not aligned, so handle the second doubleword
- MOVDU 8(R8),R12
+ // Not aligned, so handle the next doubleword
+ MOVD 0(R8),R12
CMPB R12,R5,R3
CMPU R3,$0,CR7
BNE CR7,done
+ ADD $8,R8,R8
+ ADD $-8,R4,R4
-loop_setup:
- // We are now aligned to a 16-byte boundary. We will load two doublewords
- // per loop iteration. The last doubleword is in R7, so our loop counter
- // starts at (R7-R8)/16.
- SUB R8,R7,R6
- SRD $4,R6,R6
- MOVD R6,CTR
+ // Either quadword aligned or 64-byte at this point. We can use LVX.
+qw_align:
- // Note: when we have an align directive, align this loop to 32 bytes so
- // it fits in a single icache sector.
+ // Set up auxiliary data for the vectorized algorithm.
+ VSPLTISB $0,V0 // Replicate 0 across V0
+ VSPLTISB $3,V10 // Use V10 as control for VBPERMQ
+ MTVRD R5,V1
+ LVSL (R0+R0),V11
+ VSLB V11,V10,V10
+ VSPLTB $7,V1,V1 // Replicate byte across V1
+ CMPU R4, $64 // If len <= 64, don't use the vectorized loop
+ BLE tail
+
+ // We will load 4 quardwords per iteration in the loop, so check for
+ // 64-byte alignment. If 64-byte aligned, then branch to the preloop.
+ ANDCC $63,R8,R11
+ BEQ CR0,preloop
+
+ // Not 64-byte aligned. Load one quadword at a time until aligned.
+ LVX (R8+R0),V4
+ VCMPEQUBCC V1,V4,V6 // Check for byte in V4
+ BNE CR6,found_qw_align
+ ADD $16,R8,R8
+ ADD $-16,R4,R4
+
+ ANDCC $63,R8,R11
+ BEQ CR0,preloop
+ LVX (R8+R0),V4
+ VCMPEQUBCC V1,V4,V6 // Check for byte in V4
+ BNE CR6,found_qw_align
+ ADD $16,R8,R8
+ ADD $-16,R4,R4
+
+ ANDCC $63,R8,R11
+ BEQ CR0,preloop
+ LVX (R8+R0),V4
+ VCMPEQUBCC V1,V4,V6 // Check for byte in V4
+ BNE CR6,found_qw_align
+ ADD $-16,R4,R4
+ ADD $16,R8,R8
+
+ // 64-byte aligned. Prepare for the main loop.
+preloop:
+ CMPU R4,$64
+ BLE tail // If len <= 64, don't use the vectorized loop
+
+ // We are now aligned to a 64-byte boundary. We will load 4 quadwords
+ // per loop iteration. The last doubleword is in R10, so our loop counter
+ // starts at (R10-R8)/64.
+ SUB R8,R10,R6
+ SRD $6,R6,R9 // Loop counter in R9
+ MOVD R9,CTR
+
+ MOVD $16,R11 // Load offsets for the vector loads
+ MOVD $32,R9
+ MOVD $48,R7
+
+ // Main loop we will load 64 bytes per iteration
loop:
- // Load two doublewords, then compare and merge in a single register. We
- // will check two doublewords per iteration, then find out which of them
- // contains the byte later. This speeds up the search.
- MOVD 8(R8),R12
- MOVDU 16(R8),R11
- CMPB R12,R5,R3
- CMPB R11,R5,R9
- OR R3,R9,R6
- CMPU R6,$0,CR7
- BNE CR7,found
- BC 16,0,loop
+ LVX (R8+R0),V2 // Load 4 16-byte vectors
+ LVX (R11+R8),V3
+ LVX (R9+R8),V4
+ LVX (R7+R8),V5
+ VCMPEQUB V1,V2,V6 // Look for byte in each vector
+ VCMPEQUB V1,V3,V7
+ VCMPEQUB V1,V4,V8
+ VCMPEQUB V1,V5,V9
+ VOR V6,V7,V11 // Compress the result in a single vector
+ VOR V8,V9,V12
+ VOR V11,V12,V11
+ VCMPEQUBCC V0,V11,V11 // Check for byte
+ BGE CR6,found
+ ADD $64,R8,R8
+ BC 16,0,loop // bdnz loop
- // Counter zeroed, but we may have another doubleword to read
- CMPU R8,R7
- BEQ notfound
+ // Handle the tailing bytes or R4 <= 64
+ RLDICL $0,R6,$58,R4
+tail:
+ CMPU R4,$0
+ BEQ notfound
+ LVX (R8+R0),V4
+ VCMPEQUBCC V1,V4,V6
+ BNE CR6,found_qw_align
+ ADD $16,R8,R8
+ CMPU R4,$16,CR6
+ BLE CR6,notfound
+ ADD $-16,R4,R4
- MOVDU 8(R8),R12
- CMPB R12,R5,R3
- CMPU R3,$0,CR6
- BNE CR6,done
+ LVX (R8+R0),V4
+ VCMPEQUBCC V1,V4,V6
+ BNE CR6,found_qw_align
+ ADD $16,R8,R8
+ CMPU R4,$16,CR6
+ BLE CR6,notfound
+ ADD $-16,R4,R4
+
+ LVX (R8+R0),V4
+ VCMPEQUBCC V1,V4,V6
+ BNE CR6,found_qw_align
+ ADD $16,R8,R8
+ CMPU R4,$16,CR6
+ BLE CR6,notfound
+ ADD $-16,R4,R4
+
+ LVX (R8+R0),V4
+ VCMPEQUBCC V1,V4,V6
+ BNE CR6,found_qw_align
notfound:
MOVD $-1,R3
@@ -1212,15 +1246,68 @@
RET
found:
- // One of the doublewords from the loop contains the byte we are looking
- // for. Check the first doubleword and adjust the address if found.
- CMPU R3,$0,CR6
- ADD $-8,R8,R8
- BNE CR6,done
+ // We will now compress the results into a single doubleword,
+ // so it can be moved to a GPR for the final index calculation.
- // Not found, so it must be in the second doubleword of the merged pair.
- MOVD R9,R3
- ADD $8,R8,R8
+ // The bytes in V6-V9 are either 0x00 or 0xFF. So, permute the
+ // first bit of each byte into bits 48-63.
+ VBPERMQ V6,V10,V6
+ VBPERMQ V7,V10,V7
+ VBPERMQ V8,V10,V8
+ VBPERMQ V9,V10,V9
+
+ // Shift each 16-bit component into its correct position for
+ // merging into a single doubleword.
+#ifdef GOARCH_ppc64le
+ VSLDOI $2,V7,V7,V7
+ VSLDOI $4,V8,V8,V8
+ VSLDOI $6,V9,V9,V9
+#else
+ VSLDOI $6,V6,V6,V6
+ VSLDOI $4,V7,V7,V7
+ VSLDOI $2,V8,V8,V8
+#endif
+
+ // Merge V6-V9 into a single doubleword and move to a GPR.
+ VOR V6,V7,V11
+ VOR V8,V9,V4
+ VOR V4,V11,V4
+ MFVRD V4,R3
+
+#ifdef GOARCH_ppc64le
+ ADD $-1,R3,R11
+ ANDN R3,R11,R11
+ POPCNTD R11,R11 // Count trailing zeros (Little Endian).
+#else
+ CNTLZD R3,R11 // Count leading zeros (Big Endian).
+#endif
+ ADD R8,R11,R3 // Calculate byte address
+
+return:
+ SUB R17,R3
+ MOVD R3,(R14)
+ RET
+
+found_qw_align:
+ // Use the same algorithm as above. Compress the result into
+ // a single doubleword and move it to a GPR for the final
+ // calculation.
+ VBPERMQ V6,V10,V6
+
+#ifdef GOARCH_ppc64le
+ MFVRD V6,R3
+ ADD $-1,R3,R11
+ ANDN R3,R11,R11
+ POPCNTD R11,R11
+#else
+ VSLDOI $6,V6,V6,V6
+ MFVRD V6,R3
+ CNTLZD R3,R11
+#endif
+ ADD R8,R11,R3
+ CMPU R11,R4
+ BLT return
+ BR notfound
done:
// At this point, R3 has 0xFF in the same position as the byte we are
@@ -1236,17 +1323,10 @@
CMPU R8,R7 // Check if we are at the last doubleword.
SRD $3,R11 // Convert trailing zeros to bytes.
ADD R11,R8,R3
- CMPU R11,R4,CR7 // If at the last doubleword, check the byte offset.
+ CMPU R11,R6,CR7 // If at the last doubleword, check the byte offset.
BNE return
BLE CR7,return
- MOVD $-1,R3
- MOVD R3,(R14)
- RET
-
-return:
- SUB R10,R3 // Calculate index.
- MOVD R3,(R14)
- RET
+ BR notfound
small_string:
// We unroll this loop for better performance.
@@ -1257,9 +1337,9 @@
CMPB R12,R5,R3 // Check for a match.
AND R9,R3,R3 // Mask bytes below s_base.
CMPU R3,$0,CR7 // If we have a match, jump to the final computation.
- RLDICL $0,R7,$61,R4 // length-1
+ RLDICL $0,R7,$61,R6 // length-1
RLDICR $0,R7,$60,R7 // Last doubleword in R7.
- CMPU R8,R7
+ CMPU R8,R7
BNE CR7,done
BEQ notfound // Hit length.
@@ -1287,34 +1367,70 @@
MOVDU 8(R8),R12
CMPB R12,R5,R3
CMPU R3,$0,CR6
- CMPU R8,R7
BNE CR6,done
BR notfound
TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
MOVD s1_base+0(FP), R5
- MOVD s1_len+8(FP), R3
MOVD s2_base+16(FP), R6
+ MOVD s1_len+8(FP), R3
+ CMP R5,R6,CR7
MOVD s2_len+24(FP), R4
MOVD $ret+32(FP), R7
+ CMP R3,R4,CR6
+ BEQ CR7,equal
+
+notequal:
#ifdef GOARCH_ppc64le
BR cmpbodyLE<>(SB)
#else
BR cmpbodyBE<>(SB)
#endif
+equal:
+ BEQ CR6,done
+ MOVD $1, R8
+ BGT CR6,greater
+ NEG R8
+
+greater:
+ MOVD R8, (R7)
+ RET
+
+done:
+ MOVD $0, (R7)
+ RET
+
TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
MOVD s1+0(FP), R5
- MOVD s1+8(FP), R3
MOVD s2+24(FP), R6
+ MOVD s1+8(FP), R3
+ CMP R5,R6,CR7
MOVD s2+32(FP), R4
MOVD $ret+48(FP), R7
+ CMP R3,R4,CR6
+ BEQ CR7,equal
+
#ifdef GOARCH_ppc64le
BR cmpbodyLE<>(SB)
#else
BR cmpbodyBE<>(SB)
#endif
+equal:
+ BEQ CR6,done
+ MOVD $1, R8
+ BGT CR6,greater
+ NEG R8
+
+greater:
+ MOVD R8, (R7)
+ RET
+
+done:
+ MOVD $0, (R7)
+ RET
+
TEXT runtime·return0(SB), NOSPLIT, $0
MOVW $0, R3
RET
@@ -1353,18 +1469,6 @@
// traceback from goexit1 must hit code range of goexit
MOVD R0, R0 // NOP
-TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8
- RET
-
-TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8
- RET
-
-TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8
- RET
-
-TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8
- RET
-
TEXT runtime·sigreturn(SB),NOSPLIT,$0-0
RET
diff --git a/src/runtime/asm_s390x.s b/src/runtime/asm_s390x.s
index 20e740b..6b71830 100644
--- a/src/runtime/asm_s390x.s
+++ b/src/runtime/asm_s390x.s
@@ -7,6 +7,83 @@
#include "funcdata.h"
#include "textflag.h"
+// _rt0_s390x_lib is common startup code for s390x systems when
+// using -buildmode=c-archive or -buildmode=c-shared. The linker will
+// arrange to invoke this function as a global constructor (for
+// c-archive) or when the shared library is loaded (for c-shared).
+// We expect argc and argv to be passed in the usual C ABI registers
+// R2 and R3.
+TEXT _rt0_s390x_lib(SB), NOSPLIT|NOFRAME, $0
+ STMG R6, R15, 48(R15)
+ MOVD R2, _rt0_s390x_lib_argc<>(SB)
+ MOVD R3, _rt0_s390x_lib_argv<>(SB)
+
+ // Save R6-R15 in the register save area of the calling function.
+ STMG R6, R15, 48(R15)
+
+ // Allocate 80 bytes on the stack.
+ MOVD $-80(R15), R15
+
+ // Save F8-F15 in our stack frame.
+ FMOVD F8, 16(R15)
+ FMOVD F9, 24(R15)
+ FMOVD F10, 32(R15)
+ FMOVD F11, 40(R15)
+ FMOVD F12, 48(R15)
+ FMOVD F13, 56(R15)
+ FMOVD F14, 64(R15)
+ FMOVD F15, 72(R15)
+
+ // Synchronous initialization.
+ MOVD $runtime·libpreinit(SB), R1
+ BL R1
+
+ // Create a new thread to finish Go runtime initialization.
+ MOVD _cgo_sys_thread_create(SB), R1
+ CMP R1, $0
+ BEQ nocgo
+ MOVD $_rt0_s390x_lib_go(SB), R2
+ MOVD $0, R3
+ BL R1
+ BR restore
+
+nocgo:
+ MOVD $0x800000, R1 // stacksize
+ MOVD R1, 0(R15)
+ MOVD $_rt0_s390x_lib_go(SB), R1
+ MOVD R1, 8(R15) // fn
+ MOVD $runtime·newosproc(SB), R1
+ BL R1
+
+restore:
+ // Restore F8-F15 from our stack frame.
+ FMOVD 16(R15), F8
+ FMOVD 24(R15), F9
+ FMOVD 32(R15), F10
+ FMOVD 40(R15), F11
+ FMOVD 48(R15), F12
+ FMOVD 56(R15), F13
+ FMOVD 64(R15), F14
+ FMOVD 72(R15), F15
+ MOVD $80(R15), R15
+
+ // Restore R6-R15.
+ LMG 48(R15), R6, R15
+ RET
+
+// _rt0_s390x_lib_go initializes the Go runtime.
+// This is started in a separate thread by _rt0_s390x_lib.
+TEXT _rt0_s390x_lib_go(SB), NOSPLIT|NOFRAME, $0
+ MOVD _rt0_s390x_lib_argc<>(SB), R2
+ MOVD _rt0_s390x_lib_argv<>(SB), R3
+ MOVD $runtime·rt0_go(SB), R1
+ BR R1
+
+DATA _rt0_s390x_lib_argc<>(SB)/8, $0
+GLOBL _rt0_s390x_lib_argc<>(SB), NOPTR, $8
+DATA _rt0_s90x_lib_argv<>(SB)/8, $0
+GLOBL _rt0_s390x_lib_argv<>(SB), NOPTR, $8
+
TEXT runtime·rt0_go(SB),NOSPLIT,$0
// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
// C TLS base pointer in AR0:AR1
@@ -116,17 +193,6 @@
// restore state from Gobuf; longjmp
TEXT runtime·gogo(SB), NOSPLIT, $16-8
MOVD buf+0(FP), R5
-
- // If ctxt is not nil, invoke deletion barrier before overwriting.
- MOVD gobuf_ctxt(R5), R1
- CMPBEQ R1, $0, nilctxt
- MOVD $gobuf_ctxt(R5), R1
- MOVD R1, 8(R15)
- MOVD R0, 16(R15)
- BL runtime·writebarrierptr_prewrite(SB)
- MOVD buf+0(FP), R5
-
-nilctxt:
MOVD gobuf_g(R5), g // make sure g is not nil
BL runtime·save_g(SB)
@@ -235,9 +301,12 @@
noswitch:
// already on m stack, just call directly
+ // Using a tail call here cleans up tracebacks since we won't stop
+ // at an intermediate systemstack.
MOVD 0(R12), R3 // code pointer
- BL (R3)
- RET
+ MOVD 0(R15), LR // restore LR
+ ADD $8, R15
+ BR (R3)
/*
* support for morestack
@@ -272,7 +341,7 @@
MOVD LR, R8
MOVD R8, (g_sched+gobuf_pc)(g)
MOVD R5, (g_sched+gobuf_lr)(g)
- // newstack will fill gobuf.ctxt.
+ MOVD R12, (g_sched+gobuf_ctxt)(g)
// Called from f.
// Set m->morebuf to f's caller.
@@ -285,9 +354,8 @@
BL runtime·save_g(SB)
MOVD (g_sched+gobuf_sp)(g), R15
// Create a stack frame on g0 to call newstack.
- MOVD $0, -16(R15) // Zero saved LR in frame
- SUB $16, R15
- MOVD R12, 8(R15) // ctxt argument
+ MOVD $0, -8(R15) // Zero saved LR in frame
+ SUB $8, R15
BL runtime·newstack(SB)
// Not reached, but make sure the return PC from the call to newstack
@@ -656,9 +724,9 @@
MOVD R1, LR
RET
-TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16
- MOVD 16(R15), R3 // LR saved by caller
- MOVD R3, ret+8(FP)
+TEXT runtime·getcallerpc(SB),NOSPLIT|NOFRAME,$0-8
+ MOVD 0(R15), R3 // LR saved by caller
+ MOVD R3, ret+0(FP)
RET
TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
@@ -678,23 +746,6 @@
MOVD R3, ret+0(FP)
RET
-// memhash_varlen(p unsafe.Pointer, h seed) uintptr
-// redirects to memhash(p, h, size) using the size
-// stored in the closure.
-TEXT runtime·memhash_varlen(SB),NOSPLIT,$40-24
- GO_ARGS
- NO_LOCAL_POINTERS
- MOVD p+0(FP), R3
- MOVD h+8(FP), R4
- MOVD 8(R12), R5
- MOVD R3, 8(R15)
- MOVD R4, 16(R15)
- MOVD R5, 24(R15)
- BL runtime·memhash(SB)
- MOVD 32(R15), R3
- MOVD R3, ret+16(FP)
- RET
-
// AES hashing not implemented for s390x
TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
MOVW (R0), R15
@@ -721,18 +772,6 @@
LA ret+16(FP), R7
BR runtime·memeqbody(SB)
-// eqstring tests whether two strings are equal.
-// The compiler guarantees that strings passed
-// to eqstring have equal length.
-// See runtime_test.go:eqstring_generic for
-// equivalent Go code.
-TEXT runtime·eqstring(SB),NOSPLIT|NOFRAME,$0-33
- MOVD s1_base+0(FP), R3
- MOVD s1_len+8(FP), R6
- MOVD s2_base+16(FP), R5
- LA ret+32(FP), R7
- BR runtime·memeqbody(SB)
-
TEXT bytes·Equal(SB),NOSPLIT|NOFRAME,$0-49
MOVD a_len+8(FP), R2
MOVD b_len+32(FP), R6
@@ -949,23 +988,12 @@
// traceback from goexit1 must hit code range of goexit
BYTE $0x07; BYTE $0x00; // 2-byte nop
-TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8
- RET
-
-TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8
- RET
-
-TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8
- RET
-
-TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8
- RET
-
TEXT runtime·sigreturn(SB),NOSPLIT,$0-0
RET
TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
- SYNC
+ // Stores are already ordered on s390x, so this is just a
+ // compile barrier.
RET
TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
diff --git a/src/runtime/cgo/asm_386.s b/src/runtime/cgo/asm_386.s
index dc8897d..7293c20 100644
--- a/src/runtime/cgo/asm_386.s
+++ b/src/runtime/cgo/asm_386.s
@@ -7,26 +7,23 @@
// Called by C code generated by cmd/cgo.
// func crosscall2(fn func(a unsafe.Pointer, n int32, ctxt uintptr), a unsafe.Pointer, n int32, ctxt uintptr)
// Saves C callee-saved registers and calls fn with three arguments.
-TEXT crosscall2(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- PUSHL BX
- PUSHL SI
- PUSHL DI
-
- SUBL $12, SP
- MOVL 20(BP), AX
+TEXT crosscall2(SB),NOSPLIT,$28-16
+ MOVL BP, 24(SP)
+ MOVL BX, 20(SP)
+ MOVL SI, 16(SP)
+ MOVL DI, 12(SP)
+
+ MOVL ctxt+12(FP), AX
MOVL AX, 8(SP)
- MOVL 16(BP), AX
+ MOVL n+8(FP), AX
MOVL AX, 4(SP)
- MOVL 12(BP), AX
+ MOVL a+4(FP), AX
MOVL AX, 0(SP)
- MOVL 8(BP), AX
+ MOVL fn+0(FP), AX
CALL AX
- ADDL $12, SP
-
- POPL DI
- POPL SI
- POPL BX
- POPL BP
+
+ MOVL 12(SP), DI
+ MOVL 16(SP), SI
+ MOVL 20(SP), BX
+ MOVL 24(SP), BP
RET
diff --git a/src/runtime/cgo/asm_amd64.s b/src/runtime/cgo/asm_amd64.s
index 541bd9e..0e33fc4 100644
--- a/src/runtime/cgo/asm_amd64.s
+++ b/src/runtime/cgo/asm_amd64.s
@@ -7,14 +7,12 @@
// Called by C code generated by cmd/cgo.
// func crosscall2(fn func(a unsafe.Pointer, n int32, ctxt uintptr), a unsafe.Pointer, n int32, ctxt uintptr)
// Saves C callee-saved registers and calls fn with three arguments.
-TEXT crosscall2(SB),NOSPLIT,$0
#ifndef GOOS_windows
- SUBQ $0x58, SP /* keeps stack pointer 32-byte aligned */
+TEXT crosscall2(SB),NOSPLIT,$0x50-0 /* keeps stack pointer 32-byte aligned */
#else
- SUBQ $0x118, SP /* also need to save xmm6 - xmm15 */
+TEXT crosscall2(SB),NOSPLIT,$0x110-0 /* also need to save xmm6 - xmm15 */
#endif
MOVQ BX, 0x18(SP)
- MOVQ BP, 0x20(SP)
MOVQ R12, 0x28(SP)
MOVQ R13, 0x30(SP)
MOVQ R14, 0x38(SP)
@@ -62,15 +60,9 @@
#endif
MOVQ 0x18(SP), BX
- MOVQ 0x20(SP), BP
MOVQ 0x28(SP), R12
MOVQ 0x30(SP), R13
MOVQ 0x38(SP), R14
MOVQ 0x40(SP), R15
-#ifndef GOOS_windows
- ADDQ $0x58, SP
-#else
- ADDQ $0x118, SP
-#endif
RET
diff --git a/src/runtime/cgo/asm_mipsx.s b/src/runtime/cgo/asm_mipsx.s
index dd16af6..2483bdd 100644
--- a/src/runtime/cgo/asm_mipsx.s
+++ b/src/runtime/cgo/asm_mipsx.s
@@ -20,7 +20,11 @@
// Space for 9 caller-saved GPR + LR + 6 caller-saved FPR.
// O32 ABI allows us to smash 16 bytes argument area of caller frame.
+#ifndef GOMIPS_softfloat
SUBU $(4*14+8*6-16), R29
+#else
+ SUBU $(4*14-16), R29 // For soft-float, no FPR.
+#endif
MOVW R5, (4*1)(R29)
MOVW R6, (4*2)(R29)
MOVW R7, (4*3)(R29)
@@ -34,14 +38,14 @@
MOVW R23, (4*11)(R29)
MOVW g, (4*12)(R29)
MOVW R31, (4*13)(R29)
-
+#ifndef GOMIPS_softfloat
MOVD F20, (4*14)(R29)
MOVD F22, (4*14+8*1)(R29)
MOVD F24, (4*14+8*2)(R29)
MOVD F26, (4*14+8*3)(R29)
MOVD F28, (4*14+8*4)(R29)
MOVD F30, (4*14+8*5)(R29)
-
+#endif
JAL runtime·load_g(SB)
JAL (R4)
@@ -55,7 +59,7 @@
MOVW (4*11)(R29), R23
MOVW (4*12)(R29), g
MOVW (4*13)(R29), R31
-
+#ifndef GOMIPS_softfloat
MOVD (4*14)(R29), F20
MOVD (4*14+8*1)(R29), F22
MOVD (4*14+8*2)(R29), F24
@@ -64,4 +68,7 @@
MOVD (4*14+8*5)(R29), F30
ADDU $(4*14+8*6-16), R29
+#else
+ ADDU $(4*14-16), R29
+#endif
RET
diff --git a/src/runtime/cgo/gcc_android_386.c b/src/runtime/cgo/gcc_android_386.c
index 23a15f1..28f553c 100644
--- a/src/runtime/cgo/gcc_android_386.c
+++ b/src/runtime/cgo/gcc_android_386.c
@@ -36,7 +36,7 @@
*/
ntofree = 0;
for(;;) {
- if(pthread_key_create(&k, nil) < 0) {
+ if(pthread_key_create(&k, nil) != 0) {
fprintf(stderr, "runtime/cgo: pthread_key_create failed\n");
abort();
}
@@ -77,7 +77,10 @@
ts = *(ThreadStart*)v;
free(v);
- pthread_setspecific(k1, (void*)ts.g);
+ if (pthread_setspecific(k1, (void*)ts.g) != 0) {
+ fprintf(stderr, "runtime/cgo: pthread_setspecific failed\n");
+ abort();
+ }
crosscall_386(ts.fn);
return nil;
diff --git a/src/runtime/cgo/gcc_android_amd64.c b/src/runtime/cgo/gcc_android_amd64.c
index e006c49..6f92d90 100644
--- a/src/runtime/cgo/gcc_android_amd64.c
+++ b/src/runtime/cgo/gcc_android_amd64.c
@@ -41,7 +41,7 @@
*/
ntofree = 0;
for(;;) {
- if(pthread_key_create(&k, nil) < 0) {
+ if(pthread_key_create(&k, nil) != 0) {
fprintf(stderr, "runtime/cgo: pthread_key_create failed\n");
abort();
}
@@ -82,7 +82,10 @@
ts = *(ThreadStart*)v;
free(v);
- pthread_setspecific(k1, (void*)ts.g);
+ if (pthread_setspecific(k1, (void*)ts.g) != 0) {
+ fprintf(stderr, "runtime/cgo: pthread_setspecific failed\n");
+ abort();
+ }
crosscall_amd64(ts.fn);
return nil;
diff --git a/src/runtime/cgo/gcc_darwin_386.c b/src/runtime/cgo/gcc_darwin_386.c
index 4ab3267..7d3c55c 100644
--- a/src/runtime/cgo/gcc_darwin_386.c
+++ b/src/runtime/cgo/gcc_darwin_386.c
@@ -39,8 +39,8 @@
*
* The linker and runtime hard-code this constant offset
* from %gs where we expect to find g.
- * Known to ../../../liblink/sym.c:/468
- * and to ../sys_darwin_386.s:/468
+ * Known to src/cmd/link/internal/ld/sym.go:/0x468
+ * and to src/runtime/sys_darwin_386.s:/0x468
*
* This is truly disgusting and a bit fragile, but taking care
* of it here protects the rest of the system from damage.
@@ -64,7 +64,7 @@
*/
ntofree = 0;
for(;;) {
- if(pthread_key_create(&k, nil) < 0) {
+ if(pthread_key_create(&k, nil) != 0) {
fprintf(stderr, "runtime/cgo: pthread_key_create failed\n");
abort();
}
@@ -142,7 +142,10 @@
ts = *(ThreadStart*)v;
free(v);
- pthread_setspecific(k1, (void*)ts.g);
+ if (pthread_setspecific(k1, (void*)ts.g) != 0) {
+ fprintf(stderr, "runtime/cgo: pthread_setspecific failed\n");
+ abort();
+ }
crosscall_386(ts.fn);
return nil;
diff --git a/src/runtime/cgo/gcc_darwin_amd64.c b/src/runtime/cgo/gcc_darwin_amd64.c
index 181d0ab..c57608c 100644
--- a/src/runtime/cgo/gcc_darwin_amd64.c
+++ b/src/runtime/cgo/gcc_darwin_amd64.c
@@ -28,14 +28,14 @@
*
* The linker and runtime hard-code this constant offset
* from %gs where we expect to find g.
- * Known to ../../../liblink/sym.c:/8a0
- * and to ../sys_darwin_amd64.s:/8a0
+ * Known to src/cmd/link/internal/ld/sym.go:/0x8a0
+ * and to src/runtime/sys_darwin_amd64.s:/0x8a0
*
* As disgusting as on the 386; same justification.
*/
ntofree = 0;
for(;;) {
- if(pthread_key_create(&k, nil) < 0) {
+ if(pthread_key_create(&k, nil) != 0) {
fprintf(stderr, "runtime/cgo: pthread_key_create failed\n");
abort();
}
@@ -113,7 +113,10 @@
ts = *(ThreadStart*)v;
free(v);
- pthread_setspecific(k1, (void*)ts.g);
+ if (pthread_setspecific(k1, (void*)ts.g) != 0) {
+ fprintf(stderr, "runtime/cgo: pthread_setspecific failed\n");
+ abort();
+ }
crosscall_amd64(ts.fn);
return nil;
diff --git a/src/runtime/cgo/gcc_libinit.c b/src/runtime/cgo/gcc_libinit.c
index 31594ad..3dc5bde 100644
--- a/src/runtime/cgo/gcc_libinit.c
+++ b/src/runtime/cgo/gcc_libinit.c
@@ -98,6 +98,10 @@
for (tries = 0; tries < 20; tries++) {
err = pthread_create(thread, attr, pfn, arg);
+ if (err == 0) {
+ pthread_detach(*thread);
+ return 0;
+ }
if (err != EAGAIN) {
return err;
}
diff --git a/src/runtime/cgo/gcc_mipsx.S b/src/runtime/cgo/gcc_mipsx.S
index c51c36a..54f4b82 100644
--- a/src/runtime/cgo/gcc_mipsx.S
+++ b/src/runtime/cgo/gcc_mipsx.S
@@ -14,8 +14,11 @@
.globl crosscall1
.set noat
crosscall1:
+#ifndef __mips_soft_float
addiu $29, $29, -88
-
+#else
+ addiu $29, $29, -40 // For soft-float, no need to make room for FP registers
+#endif
sw $31, 0($29)
sw $16, 4($29)
sw $17, 8($29)
@@ -27,14 +30,14 @@
sw $23, 32($29)
sw $30, 36($29)
+#ifndef __mips_soft_float
sdc1 $f20, 40($29)
sdc1 $f22, 48($29)
sdc1 $f24, 56($29)
sdc1 $f26, 64($29)
sdc1 $f28, 72($29)
sdc1 $f30, 80($29)
-
-
+#endif
move $20, $4 // save R4
move $4, $6
jalr $5 // call setg_gcc
@@ -49,16 +52,20 @@
lw $22, 28($29)
lw $23, 32($29)
lw $30, 36($29)
+#ifndef __mips_soft_float
ldc1 $f20, 40($29)
ldc1 $f22, 48($29)
ldc1 $f24, 56($29)
ldc1 $f26, 64($29)
ldc1 $f28, 72($29)
ldc1 $f30, 80($29)
-
+#endif
lw $31, 0($29)
-
+#ifndef __mips_soft_float
addiu $29, $29, 88
+#else
+ addiu $29, $29, 40
+#endif
jr $31
.set at
diff --git a/src/runtime/cgo/gcc_mmap.c b/src/runtime/cgo/gcc_mmap.c
index 29acd3c..5cf6bdf 100644
--- a/src/runtime/cgo/gcc_mmap.c
+++ b/src/runtime/cgo/gcc_mmap.c
@@ -11,7 +11,7 @@
#include "libcgo.h"
-void *
+uintptr_t
x_cgo_mmap(void *addr, uintptr_t length, int32_t prot, int32_t flags, int32_t fd, uint32_t offset) {
void *p;
@@ -20,9 +20,9 @@
_cgo_tsan_release();
if (p == MAP_FAILED) {
/* This is what the Go code expects on failure. */
- p = (void *) (uintptr_t) errno;
+ return (uintptr_t)errno;
}
- return p;
+ return (uintptr_t)p;
}
void
diff --git a/src/runtime/cgo/gcc_signal2_darwin_armx.c b/src/runtime/cgo/gcc_signal2_darwin_armx.c
new file mode 100644
index 0000000..54b7e32
--- /dev/null
+++ b/src/runtime/cgo/gcc_signal2_darwin_armx.c
@@ -0,0 +1,13 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build lldb
+// +build darwin
+// +build arm arm64
+
+// Used by gcc_signal_darwin_armx.c when doing the test build during cgo.
+// We hope that for real binaries the definition provided by Go will take precedence
+// and the linker will drop this .o file altogether, which is why this definition
+// is all by itself in its own file.
+void __attribute__((weak)) xx_cgo_panicmem(void) {}
diff --git a/src/runtime/cgo/gcc_signal_darwin_armx.c b/src/runtime/cgo/gcc_signal_darwin_armx.c
index a2d520b..3ab1d8b 100644
--- a/src/runtime/cgo/gcc_signal_darwin_armx.c
+++ b/src/runtime/cgo/gcc_signal_darwin_armx.c
@@ -39,7 +39,8 @@
#include "libcgo.h"
#include "libcgo_unix.h"
-uintptr_t x_cgo_panicmem;
+void xx_cgo_panicmem(void);
+uintptr_t x_cgo_panicmem = (uintptr_t)xx_cgo_panicmem;
static pthread_mutex_t mach_exception_handler_port_set_mu;
static mach_port_t mach_exception_handler_port_set = MACH_PORT_NULL;
diff --git a/src/runtime/cgo/gcc_signal_darwin_lldb.c b/src/runtime/cgo/gcc_signal_darwin_lldb.c
index 12cc388..54d91f6 100644
--- a/src/runtime/cgo/gcc_signal_darwin_lldb.c
+++ b/src/runtime/cgo/gcc_signal_darwin_lldb.c
@@ -8,7 +8,5 @@
#include <stdint.h>
-uintptr_t x_cgo_panicmem;
-
void darwin_arm_init_thread_exception_port() {}
void darwin_arm_init_mach_exception_handler() {}
diff --git a/src/runtime/cgo/gcc_util.c b/src/runtime/cgo/gcc_util.c
index 2d5382a..3fcb48c 100644
--- a/src/runtime/cgo/gcc_util.c
+++ b/src/runtime/cgo/gcc_util.c
@@ -29,6 +29,10 @@
#include <string.h>
+char x_cgo_yield_strncpy_src = 0;
+char x_cgo_yield_strncpy_dst = 0;
+size_t x_cgo_yield_strncpy_n = 0;
+
/*
Stub for allowing libc interceptors to execute.
@@ -50,9 +54,14 @@
So we choose strncpy(_, _, 0): it requires an extra header,
but it's standard and should be very efficient.
+
+ GCC 7 has an unfortunate habit of optimizing out strncpy calls (see
+ https://golang.org/issue/21196), so the arguments here need to be global
+ variables with external linkage in order to ensure that the call traps all the
+ way down into libc.
*/
- char nothing = 0;
- strncpy(¬hing, ¬hing, 0);
+ strncpy(&x_cgo_yield_strncpy_dst, &x_cgo_yield_strncpy_src,
+ x_cgo_yield_strncpy_n);
}
void(* const _cgo_yield)() = &x_cgo_yield;
diff --git a/src/runtime/cgo/signal_darwin_arm.s b/src/runtime/cgo/signal_darwin_arm.s
index ee5c3d3..f886e4b 100644
--- a/src/runtime/cgo/signal_darwin_arm.s
+++ b/src/runtime/cgo/signal_darwin_arm.s
@@ -4,13 +4,13 @@
#include "textflag.h"
-// panicmem is the entrypoint for SIGSEGV as intercepted via a
+// xx_cgo_panicmem is the entrypoint for SIGSEGV as intercepted via a
// mach thread port as EXC_BAD_ACCESS. As the segfault may have happened
-// in C code, we first need to load_g then call panicmem.
+// in C code, we first need to load_g then call xx_cgo_panicmem.
//
// R1 - LR at moment of fault
// R2 - PC at moment of fault
-TEXT ·panicmem(SB),NOSPLIT,$-4
+TEXT xx_cgo_panicmem(SB),NOSPLIT,$-4
// If in external C code, we need to load the g register.
BL runtime·load_g(SB)
CMP $0, g
diff --git a/src/runtime/cgo/signal_darwin_arm64.s b/src/runtime/cgo/signal_darwin_arm64.s
index 75aefd4..17781cf 100644
--- a/src/runtime/cgo/signal_darwin_arm64.s
+++ b/src/runtime/cgo/signal_darwin_arm64.s
@@ -4,13 +4,13 @@
#include "textflag.h"
-// panicmem is the entrypoint for SIGSEGV as intercepted via a
+// xx_cgo_panicmem is the entrypoint for SIGSEGV as intercepted via a
// mach thread port as EXC_BAD_ACCESS. As the segfault may have happened
-// in C code, we first need to load_g then call panicmem.
+// in C code, we first need to load_g then call xx_cgo_panicmem.
//
// R1 - LR at moment of fault
// R2 - PC at moment of fault
-TEXT ·panicmem(SB),NOSPLIT,$-8
+TEXT xx_cgo_panicmem(SB),NOSPLIT,$-8
// If in external C code, we need to load the g register.
BL runtime·load_g(SB)
CMP $0, g
diff --git a/src/runtime/cgo/signal_darwin_armx.go b/src/runtime/cgo/signal_darwin_armx.go
index 9f6741e..9f4b462 100644
--- a/src/runtime/cgo/signal_darwin_armx.go
+++ b/src/runtime/cgo/signal_darwin_armx.go
@@ -7,29 +7,7 @@
package cgo
-import "unsafe"
+import _ "unsafe"
-//go:cgo_import_static x_cgo_panicmem
-//go:linkname x_cgo_panicmem x_cgo_panicmem
-var x_cgo_panicmem uintptr
-
-// use a pointer to avoid relocation of external symbol in __TEXT
-// make linker happy
-var _cgo_panicmem = &x_cgo_panicmem
-
-// TODO(crawshaw): move this into x_cgo_init, it will not run until
-// runtime has finished loading, which may be after its use.
-func init() {
- *_cgo_panicmem = funcPC(panicmem)
-}
-
-func funcPC(f interface{}) uintptr {
- var ptrSize = unsafe.Sizeof(uintptr(0))
- return **(**uintptr)(add(unsafe.Pointer(&f), ptrSize))
-}
-
-func add(p unsafe.Pointer, x uintptr) unsafe.Pointer {
- return unsafe.Pointer(uintptr(p) + x)
-}
-
-func panicmem()
+//go:cgo_export_static xx_cgo_panicmem xx_cgo_panicmem
+func xx_cgo_panicmem()
diff --git a/src/runtime/cgo_mmap.go b/src/runtime/cgo_mmap.go
index aa531b9..b7c70c6 100644
--- a/src/runtime/cgo_mmap.go
+++ b/src/runtime/cgo_mmap.go
@@ -20,19 +20,21 @@
//go:linkname _cgo_munmap _cgo_munmap
var _cgo_munmap unsafe.Pointer
-func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer {
+func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (unsafe.Pointer, int) {
if _cgo_mmap != nil {
// Make ret a uintptr so that writing to it in the
// function literal does not trigger a write barrier.
// A write barrier here could break because of the way
// that mmap uses the same value both as a pointer and
// an errno value.
- // TODO: Fix mmap to return two values.
var ret uintptr
systemstack(func() {
ret = callCgoMmap(addr, n, prot, flags, fd, off)
})
- return unsafe.Pointer(ret)
+ if ret < 4096 {
+ return nil, int(ret)
+ }
+ return unsafe.Pointer(ret), 0
}
return sysMmap(addr, n, prot, flags, fd, off)
}
@@ -46,7 +48,7 @@
}
// sysMmap calls the mmap system call. It is implemented in assembly.
-func sysMmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer
+func sysMmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (p unsafe.Pointer, err int)
// callCgoMmap calls the mmap function in the runtime/cgo package
// using the GCC calling convention. It is implemented in assembly.
diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go
index 755269e..02c4cb3 100644
--- a/src/runtime/cgocall.go
+++ b/src/runtime/cgocall.go
@@ -8,9 +8,9 @@
// runtime.cgocall(_cgo_Cfunc_f, frame), where _cgo_Cfunc_f is a
// gcc-compiled function written by cgo.
//
-// runtime.cgocall (below) locks g to m, calls entersyscall
-// so as not to block other goroutines or the garbage collector,
-// and then calls runtime.asmcgocall(_cgo_Cfunc_f, frame).
+// runtime.cgocall (below) calls entersyscall so as not to block
+// other goroutines or the garbage collector, and then calls
+// runtime.asmcgocall(_cgo_Cfunc_f, frame).
//
// runtime.asmcgocall (in asm_$GOARCH.s) switches to the m->g0 stack
// (assumed to be an operating system-allocated stack, so safe to run
@@ -104,13 +104,9 @@
racereleasemerge(unsafe.Pointer(&racecgosync))
}
- // Lock g to m to ensure we stay on the same stack if we do a
- // cgo callback. In case of panic, unwindm calls endcgo.
- lockOSThread()
mp := getg().m
mp.ncgocall++
mp.ncgo++
- mp.incgo = true
// Reset traceback.
mp.cgoCallers[0] = 0
@@ -130,7 +126,14 @@
// and then re-enter the "system call" reusing the PC and SP
// saved by entersyscall here.
entersyscall(0)
+
+ mp.incgo = true
errno := asmcgocall(fn, arg)
+
+ // Call endcgo before exitsyscall because exitsyscall may
+ // reschedule us on to a different M.
+ endcgo(mp)
+
exitsyscall(0)
// From the garbage collector's perspective, time can move
@@ -145,8 +148,8 @@
// GC by forcing them to stay live across this time warp.
KeepAlive(fn)
KeepAlive(arg)
+ KeepAlive(mp)
- endcgo(mp)
return errno
}
@@ -158,8 +161,6 @@
if raceenabled {
raceacquire(unsafe.Pointer(&racecgosync))
}
-
- unlockOSThread() // invalidates mp
}
// Call from C back to Go.
@@ -171,6 +172,12 @@
exit(2)
}
+ // The call from C is on gp.m's g0 stack, so we must ensure
+ // that we stay on that M. We have to do this before calling
+ // exitsyscall, since it would otherwise be free to move us to
+ // a different M. The call to unlockOSThread is in unwindm.
+ lockOSThread()
+
// Save current syscall parameters, so m.syscall can be
// used again if callback decide to make syscall.
syscall := gp.m.syscall
@@ -186,6 +193,10 @@
cgocallbackg1(ctxt)
+ // At this point unlockOSThread has been called.
+ // The following code must not change to a different m.
+ // This is enforced by checking incgo in the schedule function.
+
gp.m.incgo = true
// going back to cgo call
reentersyscall(savedpc, uintptr(savedsp))
@@ -321,32 +332,35 @@
}
func unwindm(restore *bool) {
- if !*restore {
- return
- }
- // Restore sp saved by cgocallback during
- // unwind of g's stack (see comment at top of file).
- mp := acquirem()
- sched := &mp.g0.sched
- switch GOARCH {
- default:
- throw("unwindm not implemented")
- case "386", "amd64", "arm", "ppc64", "ppc64le", "mips64", "mips64le", "s390x", "mips", "mipsle":
- sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp + sys.MinFrameSize))
- case "arm64":
- sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp + 16))
+ if *restore {
+ // Restore sp saved by cgocallback during
+ // unwind of g's stack (see comment at top of file).
+ mp := acquirem()
+ sched := &mp.g0.sched
+ switch GOARCH {
+ default:
+ throw("unwindm not implemented")
+ case "386", "amd64", "arm", "ppc64", "ppc64le", "mips64", "mips64le", "s390x", "mips", "mipsle":
+ sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp + sys.MinFrameSize))
+ case "arm64":
+ sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp + 16))
+ }
+
+ // Call endcgo to do the accounting that cgocall will not have a
+ // chance to do during an unwind.
+ //
+ // In the case where a Go call originates from C, ncgo is 0
+ // and there is no matching cgocall to end.
+ if mp.ncgo > 0 {
+ endcgo(mp)
+ }
+
+ releasem(mp)
}
- // Call endcgo to do the accounting that cgocall will not have a
- // chance to do during an unwind.
- //
- // In the case where a a Go call originates from C, ncgo is 0
- // and there is no matching cgocall to end.
- if mp.ncgo > 0 {
- endcgo(mp)
- }
-
- releasem(mp)
+ // Undo the call to lockOSThread in cgocallbackg.
+ // We must still stay on the same m.
+ unlockOSThread()
}
// called from assembly
@@ -580,10 +594,8 @@
// No more possible pointers.
break
}
- if hbits.isPointer() {
- if cgoIsGoPointer(*(*unsafe.Pointer)(unsafe.Pointer(base + i))) {
- panic(errorString(msg))
- }
+ if hbits.isPointer() && cgoIsGoPointer(*(*unsafe.Pointer)(unsafe.Pointer(base + i))) {
+ panic(errorString(msg))
}
hbits = hbits.next()
}
diff --git a/src/runtime/cgocheck.go b/src/runtime/cgocheck.go
index 61aaa0a..ea1ab97 100644
--- a/src/runtime/cgocheck.go
+++ b/src/runtime/cgocheck.go
@@ -16,6 +16,10 @@
// cgoCheckWriteBarrier is called whenever a pointer is stored into memory.
// It throws if the program is storing a Go pointer into non-Go memory.
+//
+// This is called from the write barrier, so its entire call tree must
+// be nosplit.
+//
//go:nosplit
//go:nowritebarrier
func cgoCheckWriteBarrier(dst *uintptr, src uintptr) {
diff --git a/src/runtime/chan.go b/src/runtime/chan.go
index 6294678..41ae803 100644
--- a/src/runtime/chan.go
+++ b/src/runtime/chan.go
@@ -55,11 +55,19 @@
}
//go:linkname reflect_makechan reflect.makechan
-func reflect_makechan(t *chantype, size int64) *hchan {
+func reflect_makechan(t *chantype, size int) *hchan {
return makechan(t, size)
}
-func makechan(t *chantype, size int64) *hchan {
+func makechan64(t *chantype, size int64) *hchan {
+ if int64(int(size)) != size {
+ panic(plainError("makechan: size out of range"))
+ }
+
+ return makechan(t, int(size))
+}
+
+func makechan(t *chantype, size int) *hchan {
elem := t.elem
// compiler checks this but be safe.
@@ -69,29 +77,33 @@
if hchanSize%maxAlign != 0 || elem.align > maxAlign {
throw("makechan: bad alignment")
}
- if size < 0 || int64(uintptr(size)) != size || (elem.size > 0 && uintptr(size) > (_MaxMem-hchanSize)/elem.size) {
+
+ if size < 0 || uintptr(size) > maxSliceCap(elem.size) || uintptr(size)*elem.size > _MaxMem-hchanSize {
panic(plainError("makechan: size out of range"))
}
+ // Hchan does not contain pointers interesting for GC when elements stored in buf do not contain pointers.
+ // buf points into the same allocation, elemtype is persistent.
+ // SudoG's are referenced from their owning thread so they can't be collected.
+ // TODO(dvyukov,rlh): Rethink when collector can move allocated objects.
var c *hchan
- if elem.kind&kindNoPointers != 0 || size == 0 {
- // Allocate memory in one call.
- // Hchan does not contain pointers interesting for GC in this case:
- // buf points into the same allocation, elemtype is persistent.
- // SudoG's are referenced from their owning thread so they can't be collected.
- // TODO(dvyukov,rlh): Rethink when collector can move allocated objects.
+ switch {
+ case size == 0 || elem.size == 0:
+ // Queue or element size is zero.
+ c = (*hchan)(mallocgc(hchanSize, nil, true))
+ // Race detector uses this location for synchronization.
+ c.buf = unsafe.Pointer(c)
+ case elem.kind&kindNoPointers != 0:
+ // Elements do not contain pointers.
+ // Allocate hchan and buf in one call.
c = (*hchan)(mallocgc(hchanSize+uintptr(size)*elem.size, nil, true))
- if size > 0 && elem.size != 0 {
- c.buf = add(unsafe.Pointer(c), hchanSize)
- } else {
- // race detector uses this location for synchronization
- // Also prevents us from pointing beyond the allocation (see issue 9401).
- c.buf = unsafe.Pointer(c)
- }
- } else {
+ c.buf = add(unsafe.Pointer(c), hchanSize)
+ default:
+ // Elements contain pointers.
c = new(hchan)
- c.buf = newarray(elem, int(size))
+ c.buf = mallocgc(uintptr(size)*elem.size, elem, true)
}
+
c.elemsize = uint16(elem.size)
c.elemtype = elem
c.dataqsiz = uint(size)
@@ -110,7 +122,7 @@
// entry point for c <- x from compiled code
//go:nosplit
func chansend1(c *hchan, elem unsafe.Pointer) {
- chansend(c, elem, true, getcallerpc(unsafe.Pointer(&c)))
+ chansend(c, elem, true, getcallerpc())
}
/*
@@ -214,7 +226,7 @@
mysg.elem = ep
mysg.waitlink = nil
mysg.g = gp
- mysg.selectdone = nil
+ mysg.isSelect = false
mysg.c = c
gp.waiting = mysg
gp.param = nil
@@ -322,7 +334,7 @@
}
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer(&c))
+ callerpc := getcallerpc()
racewritepc(unsafe.Pointer(c), callerpc, funcPC(closechan))
racerelease(unsafe.Pointer(c))
}
@@ -499,7 +511,7 @@
mysg.waitlink = nil
gp.waiting = mysg
mysg.g = gp
- mysg.selectdone = nil
+ mysg.isSelect = false
mysg.c = c
gp.param = nil
c.recvq.enqueue(mysg)
@@ -594,7 +606,7 @@
// }
//
func selectnbsend(c *hchan, elem unsafe.Pointer) (selected bool) {
- return chansend(c, elem, false, getcallerpc(unsafe.Pointer(&c)))
+ return chansend(c, elem, false, getcallerpc())
}
// compiler implements
@@ -644,7 +656,7 @@
//go:linkname reflect_chansend reflect.chansend
func reflect_chansend(c *hchan, elem unsafe.Pointer, nb bool) (selected bool) {
- return chansend(c, elem, !nb, getcallerpc(unsafe.Pointer(&c)))
+ return chansend(c, elem, !nb, getcallerpc())
}
//go:linkname reflect_chanrecv reflect.chanrecv
@@ -703,10 +715,16 @@
sgp.next = nil // mark as removed (see dequeueSudog)
}
- // if sgp participates in a select and is already signaled, ignore it
- if sgp.selectdone != nil {
- // claim the right to signal
- if *sgp.selectdone != 0 || !atomic.Cas(sgp.selectdone, 0, 1) {
+ // if a goroutine was put on this queue because of a
+ // select, there is a small window between the goroutine
+ // being woken up by a different case and it grabbing the
+ // channel locks. Once it has the lock
+ // it removes itself from the queue, so we won't see it after that.
+ // We use a flag in the G struct to tell us when someone
+ // else has won the race to signal this goroutine but the goroutine
+ // hasn't removed itself from the queue yet.
+ if sgp.isSelect {
+ if !atomic.Cas(&sgp.g.selectDone, 0, 1) {
continue
}
}
diff --git a/src/runtime/chan_test.go b/src/runtime/chan_test.go
index a75fa1b..b6188f5 100644
--- a/src/runtime/chan_test.go
+++ b/src/runtime/chan_test.go
@@ -5,6 +5,8 @@
package runtime_test
import (
+ "internal/testenv"
+ "math"
"runtime"
"sync"
"sync/atomic"
@@ -430,6 +432,65 @@
wg.Wait()
}
+func TestSelectFairness(t *testing.T) {
+ const trials = 10000
+ if runtime.GOOS == "linux" && runtime.GOARCH == "ppc64le" {
+ testenv.SkipFlaky(t, 22047)
+ }
+ c1 := make(chan byte, trials+1)
+ c2 := make(chan byte, trials+1)
+ for i := 0; i < trials+1; i++ {
+ c1 <- 1
+ c2 <- 2
+ }
+ c3 := make(chan byte)
+ c4 := make(chan byte)
+ out := make(chan byte)
+ done := make(chan byte)
+ var wg sync.WaitGroup
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ for {
+ var b byte
+ select {
+ case b = <-c3:
+ case b = <-c4:
+ case b = <-c1:
+ case b = <-c2:
+ }
+ select {
+ case out <- b:
+ case <-done:
+ return
+ }
+ }
+ }()
+ cnt1, cnt2 := 0, 0
+ for i := 0; i < trials; i++ {
+ switch b := <-out; b {
+ case 1:
+ cnt1++
+ case 2:
+ cnt2++
+ default:
+ t.Fatalf("unexpected value %d on channel", b)
+ }
+ }
+ // If the select in the goroutine is fair,
+ // cnt1 and cnt2 should be about the same value.
+ // With 10,000 trials, the expected margin of error at
+ // a confidence level of five nines is 4.4172 / (2 * Sqrt(10000)).
+ r := float64(cnt1) / trials
+ e := math.Abs(r - 0.5)
+ t.Log(cnt1, cnt2, r, e)
+ if e > 4.4172/(2*math.Sqrt(trials)) {
+ t.Errorf("unfair select: in %d trials, results were %d, %d", trials, cnt1, cnt2)
+ }
+ close(done)
+ wg.Wait()
+}
+
func TestChanSendInterface(t *testing.T) {
type mt struct{}
m := &mt{}
@@ -669,6 +730,55 @@
<-ready2
}
+type struct0 struct{}
+
+func BenchmarkMakeChan(b *testing.B) {
+ b.Run("Byte", func(b *testing.B) {
+ var x chan byte
+ for i := 0; i < b.N; i++ {
+ x = make(chan byte, 8)
+ }
+ close(x)
+ })
+ b.Run("Int", func(b *testing.B) {
+ var x chan int
+ for i := 0; i < b.N; i++ {
+ x = make(chan int, 8)
+ }
+ close(x)
+ })
+ b.Run("Ptr", func(b *testing.B) {
+ var x chan *byte
+ for i := 0; i < b.N; i++ {
+ x = make(chan *byte, 8)
+ }
+ close(x)
+ })
+ b.Run("Struct", func(b *testing.B) {
+ b.Run("0", func(b *testing.B) {
+ var x chan struct0
+ for i := 0; i < b.N; i++ {
+ x = make(chan struct0, 8)
+ }
+ close(x)
+ })
+ b.Run("32", func(b *testing.B) {
+ var x chan struct32
+ for i := 0; i < b.N; i++ {
+ x = make(chan struct32, 8)
+ }
+ close(x)
+ })
+ b.Run("40", func(b *testing.B) {
+ var x chan struct40
+ for i := 0; i < b.N; i++ {
+ x = make(chan struct40, 8)
+ }
+ close(x)
+ })
+ })
+}
+
func BenchmarkChanNonblocking(b *testing.B) {
myc := make(chan int)
b.RunParallel(func(pb *testing.PB) {
diff --git a/src/runtime/cpuprof.go b/src/runtime/cpuprof.go
index fb841a9..e00dcb1 100644
--- a/src/runtime/cpuprof.go
+++ b/src/runtime/cpuprof.go
@@ -160,6 +160,7 @@
funcPC(_ExternalCode) + sys.PCQuantum,
}
cpuprof.log.write(nil, 0, hdr[:], lostStk[:])
+ p.lostExtra = 0
}
}
diff --git a/src/runtime/cputicks.go b/src/runtime/cputicks.go
index ccc3947..de97d5b 100644
--- a/src/runtime/cputicks.go
+++ b/src/runtime/cputicks.go
@@ -11,6 +11,6 @@
package runtime
-// careful: cputicks is not guaranteed to be monotonic! In particular, we have
+// careful: cputicks is not guaranteed to be monotonic! In particular, we have
// noticed drift between cpus on certain os/arch combinations. See issue 8976.
func cputicks() int64
diff --git a/src/runtime/crash_cgo_test.go b/src/runtime/crash_cgo_test.go
index a5cbbad..3b9fedc 100644
--- a/src/runtime/crash_cgo_test.go
+++ b/src/runtime/crash_cgo_test.go
@@ -13,6 +13,7 @@
"os"
"os/exec"
"runtime"
+ "strconv"
"strings"
"testing"
"time"
@@ -113,7 +114,7 @@
t.Fatal(err)
}
- got, err := testEnv(exec.Command(exe, "CgoExternalThreadSIGPROF")).CombinedOutput()
+ got, err := testenv.CleanCmdEnv(exec.Command(exe, "CgoExternalThreadSIGPROF")).CombinedOutput()
if err != nil {
t.Fatalf("exit status: %v\n%s", err, got)
}
@@ -136,7 +137,7 @@
t.Fatal(err)
}
- got, err := testEnv(exec.Command(exe, "CgoExternalThreadSIGPROF")).CombinedOutput()
+ got, err := testenv.CleanCmdEnv(exec.Command(exe, "CgoExternalThreadSIGPROF")).CombinedOutput()
if err != nil {
t.Fatalf("exit status: %v\n%s", err, got)
}
@@ -203,14 +204,14 @@
const tries = 10
var tot1, tot2 time.Duration
for i := 0; i < tries; i++ {
- cmd := testEnv(exec.Command(exe, "CgoCheckBytes"))
+ cmd := testenv.CleanCmdEnv(exec.Command(exe, "CgoCheckBytes"))
cmd.Env = append(cmd.Env, "GODEBUG=cgocheck=0", fmt.Sprintf("GO_CGOCHECKBYTES_TRY=%d", i))
start := time.Now()
cmd.Run()
d1 := time.Since(start)
- cmd = testEnv(exec.Command(exe, "CgoCheckBytes"))
+ cmd = testenv.CleanCmdEnv(exec.Command(exe, "CgoCheckBytes"))
cmd.Env = append(cmd.Env, fmt.Sprintf("GO_CGOCHECKBYTES_TRY=%d", i))
start = time.Now()
@@ -251,7 +252,7 @@
func TestCgoCrashTraceback(t *testing.T) {
t.Parallel()
- if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" {
+ if runtime.GOOS != "linux" || (runtime.GOARCH != "amd64" && runtime.GOARCH != "ppc64le") {
t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH)
}
got := runTestProg(t, "testprogcgo", "CrashTraceback")
@@ -273,7 +274,7 @@
func testCgoPprof(t *testing.T, buildArg, runArg string) {
t.Parallel()
- if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" {
+ if runtime.GOOS != "linux" || (runtime.GOARCH != "amd64" && runtime.GOARCH != "ppc64le") {
t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH)
}
testenv.MustHaveGoRun(t)
@@ -283,7 +284,7 @@
t.Fatal(err)
}
- got, err := testEnv(exec.Command(exe, runArg)).CombinedOutput()
+ got, err := testenv.CleanCmdEnv(exec.Command(exe, runArg)).CombinedOutput()
if err != nil {
if testenv.Builder() == "linux-amd64-alpine" {
// See Issue 18243 and Issue 19938.
@@ -295,7 +296,7 @@
defer os.Remove(fn)
for try := 0; try < 2; try++ {
- cmd := testEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-top", "-nodecount=1"))
+ cmd := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-top", "-nodecount=1"))
// Check that pprof works both with and without explicit executable on command line.
if try == 0 {
cmd.Args = append(cmd.Args, exe, fn)
@@ -330,7 +331,7 @@
}
func TestCgoPprofPIE(t *testing.T) {
- testCgoPprof(t, "-ldflags=-extldflags=-pie", "CgoPprof")
+ testCgoPprof(t, "-buildmode=pie", "CgoPprof")
}
func TestCgoPprofThread(t *testing.T) {
@@ -359,7 +360,7 @@
t.Fatal(err)
}
- got, err := testEnv(exec.Command(exe, "CgoRaceprof")).CombinedOutput()
+ got, err := testenv.CleanCmdEnv(exec.Command(exe, "CgoRaceprof")).CombinedOutput()
if err != nil {
t.Fatal(err)
}
@@ -388,7 +389,7 @@
t.Fatal(err)
}
- got, err := testEnv(exec.Command(exe, "CgoRaceSignal")).CombinedOutput()
+ got, err := testenv.CleanCmdEnv(exec.Command(exe, "CgoRaceSignal")).CombinedOutput()
if err != nil {
t.Logf("%s\n", got)
t.Fatal(err)
@@ -411,3 +412,93 @@
t.Errorf("expected %q got %v", want, got)
}
}
+
+func TestCatchPanic(t *testing.T) {
+ t.Parallel()
+ switch runtime.GOOS {
+ case "plan9", "windows":
+ t.Skipf("no signals on %s", runtime.GOOS)
+ case "darwin":
+ if runtime.GOARCH == "amd64" {
+ t.Skipf("crash() on darwin/amd64 doesn't raise SIGABRT")
+ }
+ }
+
+ testenv.MustHaveGoRun(t)
+
+ exe, err := buildTestProg(t, "testprogcgo")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ for _, early := range []bool{true, false} {
+ cmd := testenv.CleanCmdEnv(exec.Command(exe, "CgoCatchPanic"))
+ // Make sure a panic results in a crash.
+ cmd.Env = append(cmd.Env, "GOTRACEBACK=crash")
+ if early {
+ // Tell testprogcgo to install an early signal handler for SIGABRT
+ cmd.Env = append(cmd.Env, "CGOCATCHPANIC_EARLY_HANDLER=1")
+ }
+ if out, err := cmd.CombinedOutput(); err != nil {
+ t.Errorf("testprogcgo CgoCatchPanic failed: %v\n%s", err, out)
+ }
+ }
+}
+
+func TestCgoLockOSThreadExit(t *testing.T) {
+ switch runtime.GOOS {
+ case "plan9", "windows":
+ t.Skipf("no pthreads on %s", runtime.GOOS)
+ }
+ t.Parallel()
+ testLockOSThreadExit(t, "testprogcgo")
+}
+
+func TestWindowsStackMemoryCgo(t *testing.T) {
+ if runtime.GOOS != "windows" {
+ t.Skip("skipping windows specific test")
+ }
+ testenv.SkipFlaky(t, 22575)
+ o := runTestProg(t, "testprogcgo", "StackMemory")
+ stackUsage, err := strconv.Atoi(o)
+ if err != nil {
+ t.Fatalf("Failed to read stack usage: %v", err)
+ }
+ if expected, got := 100<<10, stackUsage; got > expected {
+ t.Fatalf("expected < %d bytes of memory per thread, got %d", expected, got)
+ }
+}
+
+func TestSigStackSwapping(t *testing.T) {
+ switch runtime.GOOS {
+ case "plan9", "windows":
+ t.Skipf("no sigaltstack on %s", runtime.GOOS)
+ }
+ t.Parallel()
+ got := runTestProg(t, "testprogcgo", "SigStack")
+ want := "OK\n"
+ if got != want {
+ t.Errorf("expected %q got %v", want, got)
+ }
+}
+
+func TestCgoTracebackSigpanic(t *testing.T) {
+ // Test unwinding over a sigpanic in C code without a C
+ // symbolizer. See issue #23576.
+ if runtime.GOOS == "windows" {
+ // On Windows if we get an exception in C code, we let
+ // the Windows exception handler unwind it, rather
+ // than injecting a sigpanic.
+ t.Skip("no sigpanic in C on windows")
+ }
+ t.Parallel()
+ got := runTestProg(t, "testprogcgo", "TracebackSigpanic")
+ want := "runtime.sigpanic"
+ if !strings.Contains(got, want) {
+ t.Fatalf("want failure containing %q. output:\n%s\n", want, got)
+ }
+ nowant := "unexpected return pc"
+ if strings.Contains(got, nowant) {
+ t.Fatalf("failure incorrectly contains %q. output:\n%s\n", nowant, got)
+ }
+}
diff --git a/src/runtime/crash_test.go b/src/runtime/crash_test.go
index 7753809..0254ebd 100644
--- a/src/runtime/crash_test.go
+++ b/src/runtime/crash_test.go
@@ -32,25 +32,6 @@
os.Exit(status)
}
-func testEnv(cmd *exec.Cmd) *exec.Cmd {
- if cmd.Env != nil {
- panic("environment already set")
- }
- for _, env := range os.Environ() {
- // Exclude GODEBUG from the environment to prevent its output
- // from breaking tests that are trying to parse other command output.
- if strings.HasPrefix(env, "GODEBUG=") {
- continue
- }
- // Exclude GOTRACEBACK for the same reason.
- if strings.HasPrefix(env, "GOTRACEBACK=") {
- continue
- }
- cmd.Env = append(cmd.Env, env)
- }
- return cmd
-}
-
var testprog struct {
sync.Mutex
dir string
@@ -62,7 +43,11 @@
err error
}
-func runTestProg(t *testing.T, binary, name string) string {
+func runTestProg(t *testing.T, binary, name string, env ...string) string {
+ if *flagQuick {
+ t.Skip("-quick")
+ }
+
testenv.MustHaveGoBuild(t)
exe, err := buildTestProg(t, binary)
@@ -70,7 +55,11 @@
t.Fatal(err)
}
- cmd := testEnv(exec.Command(exe, name))
+ cmd := testenv.CleanCmdEnv(exec.Command(exe, name))
+ cmd.Env = append(cmd.Env, env...)
+ if testing.Short() {
+ cmd.Env = append(cmd.Env, "RUNTIME_TEST_SHORT=1")
+ }
var b bytes.Buffer
cmd.Stdout = &b
cmd.Stderr = &b
@@ -111,6 +100,10 @@
}
func buildTestProg(t *testing.T, binary string, flags ...string) (string, error) {
+ if *flagQuick {
+ t.Skip("-quick")
+ }
+
checkStaleRuntime(t)
testprog.Lock()
@@ -139,7 +132,7 @@
exe := filepath.Join(testprog.dir, name+".exe")
cmd := exec.Command(testenv.GoToolPath(t), append([]string{"build", "-o", exe}, flags...)...)
cmd.Dir = "testdata/" + binary
- out, err := testEnv(cmd).CombinedOutput()
+ out, err := testenv.CleanCmdEnv(cmd).CombinedOutput()
if err != nil {
target.err = fmt.Errorf("building %s %v: %v\n%s", binary, flags, err, out)
testprog.target[name] = target
@@ -158,14 +151,14 @@
func checkStaleRuntime(t *testing.T) {
staleRuntimeOnce.Do(func() {
// 'go run' uses the installed copy of runtime.a, which may be out of date.
- out, err := testEnv(exec.Command(testenv.GoToolPath(t), "list", "-f", "{{.Stale}}", "runtime")).CombinedOutput()
+ out, err := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "list", "-gcflags=all="+os.Getenv("GO_GCFLAGS"), "-f", "{{.Stale}}", "runtime")).CombinedOutput()
if err != nil {
staleRuntimeErr = fmt.Errorf("failed to execute 'go list': %v\n%v", err, string(out))
return
}
if string(out) != "false\n" {
t.Logf("go list -f {{.Stale}} runtime:\n%s", out)
- out, err := testEnv(exec.Command(testenv.GoToolPath(t), "list", "-f", "{{.StaleReason}}", "runtime")).CombinedOutput()
+ out, err := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "list", "-gcflags=all="+os.Getenv("GO_GCFLAGS"), "-f", "{{.StaleReason}}", "runtime")).CombinedOutput()
if err != nil {
t.Logf("go list -f {{.StaleReason}} failed: %v", err)
}
@@ -468,7 +461,7 @@
t.Fatal(err)
}
- got, err := testEnv(exec.Command(exe, "MemProf")).CombinedOutput()
+ got, err := testenv.CleanCmdEnv(exec.Command(exe, "MemProf")).CombinedOutput()
if err != nil {
t.Fatal(err)
}
@@ -476,7 +469,7 @@
defer os.Remove(fn)
for try := 0; try < 2; try++ {
- cmd := testEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-alloc_space", "-top"))
+ cmd := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-alloc_space", "-top"))
// Check that pprof works both with and without explicit executable on command line.
if try == 0 {
cmd.Args = append(cmd.Args, exe, fn)
@@ -586,7 +579,7 @@
const tries = 10
retry:
for i := 0; i < tries; i++ {
- got, err := testEnv(exec.Command(exe, "PanicRace")).CombinedOutput()
+ got, err := testenv.CleanCmdEnv(exec.Command(exe, "PanicRace")).CombinedOutput()
if err == nil {
t.Logf("try %d: program exited successfully, should have failed", i+1)
continue
@@ -614,3 +607,17 @@
}
t.Errorf("test ran %d times without producing expected output", tries)
}
+
+func TestBadTraceback(t *testing.T) {
+ output := runTestProg(t, "testprog", "BadTraceback")
+ for _, want := range []string{
+ "runtime: unexpected return pc",
+ "called from 0xbad",
+ "00000bad", // Smashed LR in hex dump
+ "<main.badLR", // Symbolization in hex dump (badLR1 or badLR2)
+ } {
+ if !strings.Contains(output, want) {
+ t.Errorf("output does not contain %q:\n%s", want, output)
+ }
+ }
+}
diff --git a/src/runtime/crash_unix_test.go b/src/runtime/crash_unix_test.go
index cbaa1f6..af9e643 100644
--- a/src/runtime/crash_unix_test.go
+++ b/src/runtime/crash_unix_test.go
@@ -65,13 +65,13 @@
cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "a.exe")
cmd.Dir = dir
- out, err := testEnv(cmd).CombinedOutput()
+ out, err := testenv.CleanCmdEnv(cmd).CombinedOutput()
if err != nil {
t.Fatalf("building source: %v\n%s", err, out)
}
cmd = exec.Command(filepath.Join(dir, "a.exe"))
- cmd = testEnv(cmd)
+ cmd = testenv.CleanCmdEnv(cmd)
cmd.Env = append(cmd.Env, "GOTRACEBACK=crash")
// Set GOGC=off. Because of golang.org/issue/10958, the tight
@@ -184,7 +184,7 @@
t.Parallel()
cmd := exec.Command(os.Args[0], "testPanicSystemstackInternal")
- cmd = testEnv(cmd)
+ cmd = testenv.CleanCmdEnv(cmd)
cmd.Env = append(cmd.Env, "GOTRACEBACK=crash")
pr, pw, err := os.Pipe()
if err != nil {
@@ -249,7 +249,7 @@
if err != nil {
t.Fatal(err)
}
- err = testEnv(exec.Command(exe, "SignalExitStatus")).Run()
+ err = testenv.CleanCmdEnv(exec.Command(exe, "SignalExitStatus")).Run()
if err == nil {
t.Error("test program succeeded unexpectedly")
} else if ee, ok := err.(*exec.ExitError); !ok {
diff --git a/src/runtime/debug.go b/src/runtime/debug.go
index 0e798fc..feacfb6 100644
--- a/src/runtime/debug.go
+++ b/src/runtime/debug.go
@@ -15,9 +15,6 @@
// The number of logical CPUs on the local machine can be queried with NumCPU.
// This call will go away when the scheduler improves.
func GOMAXPROCS(n int) int {
- if n > _MaxGomaxprocs {
- n = _MaxGomaxprocs
- }
lock(&sched.lock)
ret := int(gomaxprocs)
unlock(&sched.lock)
diff --git a/src/runtime/defs1_netbsd_386.go b/src/runtime/defs1_netbsd_386.go
index 66f07ce..c26f417 100644
--- a/src/runtime/defs1_netbsd_386.go
+++ b/src/runtime/defs1_netbsd_386.go
@@ -79,6 +79,7 @@
_EV_CLEAR = 0x20
_EV_RECEIPT = 0
_EV_ERROR = 0x4000
+ _EV_EOF = 0x8000
_EVFILT_READ = 0x0
_EVFILT_WRITE = 0x1
)
diff --git a/src/runtime/defs1_netbsd_amd64.go b/src/runtime/defs1_netbsd_amd64.go
index 9e31471..0704cd4 100644
--- a/src/runtime/defs1_netbsd_amd64.go
+++ b/src/runtime/defs1_netbsd_amd64.go
@@ -79,6 +79,7 @@
_EV_CLEAR = 0x20
_EV_RECEIPT = 0
_EV_ERROR = 0x4000
+ _EV_EOF = 0x8000
_EVFILT_READ = 0x0
_EVFILT_WRITE = 0x1
)
diff --git a/src/runtime/defs1_netbsd_arm.go b/src/runtime/defs1_netbsd_arm.go
index db8e4c6..d2a13ad 100644
--- a/src/runtime/defs1_netbsd_arm.go
+++ b/src/runtime/defs1_netbsd_arm.go
@@ -79,6 +79,7 @@
_EV_CLEAR = 0x20
_EV_RECEIPT = 0
_EV_ERROR = 0x4000
+ _EV_EOF = 0x8000
_EVFILT_READ = 0x0
_EVFILT_WRITE = 0x1
)
diff --git a/src/runtime/defs_darwin.go b/src/runtime/defs_darwin.go
index 78df4e7..f7d65e7 100644
--- a/src/runtime/defs_darwin.go
+++ b/src/runtime/defs_darwin.go
@@ -139,6 +139,7 @@
EV_CLEAR = C.EV_CLEAR
EV_RECEIPT = C.EV_RECEIPT
EV_ERROR = C.EV_ERROR
+ EV_EOF = C.EV_EOF
EVFILT_READ = C.EVFILT_READ
EVFILT_WRITE = C.EVFILT_WRITE
)
diff --git a/src/runtime/defs_darwin_386.go b/src/runtime/defs_darwin_386.go
index 1a5967b..f6dbcc5 100644
--- a/src/runtime/defs_darwin_386.go
+++ b/src/runtime/defs_darwin_386.go
@@ -118,6 +118,7 @@
_EV_CLEAR = 0x20
_EV_RECEIPT = 0x40
_EV_ERROR = 0x4000
+ _EV_EOF = 0x8000
_EVFILT_READ = -0x1
_EVFILT_WRITE = -0x2
)
diff --git a/src/runtime/defs_darwin_amd64.go b/src/runtime/defs_darwin_amd64.go
index a4ab090..245fe15 100644
--- a/src/runtime/defs_darwin_amd64.go
+++ b/src/runtime/defs_darwin_amd64.go
@@ -118,6 +118,7 @@
_EV_CLEAR = 0x20
_EV_RECEIPT = 0x40
_EV_ERROR = 0x4000
+ _EV_EOF = 0x8000
_EVFILT_READ = -0x1
_EVFILT_WRITE = -0x2
)
diff --git a/src/runtime/defs_darwin_arm.go b/src/runtime/defs_darwin_arm.go
index 3f8dbbf..f89aee6 100644
--- a/src/runtime/defs_darwin_arm.go
+++ b/src/runtime/defs_darwin_arm.go
@@ -120,6 +120,7 @@
_EV_CLEAR = 0x20
_EV_RECEIPT = 0x40
_EV_ERROR = 0x4000
+ _EV_EOF = 0x8000
_EVFILT_READ = -0x1
_EVFILT_WRITE = -0x2
)
diff --git a/src/runtime/defs_darwin_arm64.go b/src/runtime/defs_darwin_arm64.go
index c25a41b..a0ca7f1 100644
--- a/src/runtime/defs_darwin_arm64.go
+++ b/src/runtime/defs_darwin_arm64.go
@@ -118,6 +118,7 @@
_EV_CLEAR = 0x20
_EV_RECEIPT = 0x40
_EV_ERROR = 0x4000
+ _EV_EOF = 0x8000
_EVFILT_READ = -0x1
_EVFILT_WRITE = -0x2
)
diff --git a/src/runtime/defs_dragonfly.go b/src/runtime/defs_dragonfly.go
index ed00be0..95014fe 100644
--- a/src/runtime/defs_dragonfly.go
+++ b/src/runtime/defs_dragonfly.go
@@ -103,6 +103,7 @@
EV_DELETE = C.EV_DELETE
EV_CLEAR = C.EV_CLEAR
EV_ERROR = C.EV_ERROR
+ EV_EOF = C.EV_EOF
EVFILT_READ = C.EVFILT_READ
EVFILT_WRITE = C.EVFILT_WRITE
)
diff --git a/src/runtime/defs_dragonfly_amd64.go b/src/runtime/defs_dragonfly_amd64.go
index fc70103..c30da80 100644
--- a/src/runtime/defs_dragonfly_amd64.go
+++ b/src/runtime/defs_dragonfly_amd64.go
@@ -82,6 +82,7 @@
_EV_DELETE = 0x2
_EV_CLEAR = 0x20
_EV_ERROR = 0x4000
+ _EV_EOF = 0x8000
_EVFILT_READ = -0x1
_EVFILT_WRITE = -0x2
)
diff --git a/src/runtime/defs_freebsd.go b/src/runtime/defs_freebsd.go
index 0a11d09..9d55111 100644
--- a/src/runtime/defs_freebsd.go
+++ b/src/runtime/defs_freebsd.go
@@ -125,6 +125,7 @@
EV_CLEAR = C.EV_CLEAR
EV_RECEIPT = C.EV_RECEIPT
EV_ERROR = C.EV_ERROR
+ EV_EOF = C.EV_EOF
EVFILT_READ = C.EVFILT_READ
EVFILT_WRITE = C.EVFILT_WRITE
)
diff --git a/src/runtime/defs_freebsd_386.go b/src/runtime/defs_freebsd_386.go
index 92b0550..49bcbb1 100644
--- a/src/runtime/defs_freebsd_386.go
+++ b/src/runtime/defs_freebsd_386.go
@@ -95,6 +95,7 @@
_EV_CLEAR = 0x20
_EV_RECEIPT = 0x40
_EV_ERROR = 0x4000
+ _EV_EOF = 0x8000
_EVFILT_READ = -0x1
_EVFILT_WRITE = -0x2
)
diff --git a/src/runtime/defs_freebsd_amd64.go b/src/runtime/defs_freebsd_amd64.go
index 645e205..0e1c675 100644
--- a/src/runtime/defs_freebsd_amd64.go
+++ b/src/runtime/defs_freebsd_amd64.go
@@ -95,6 +95,7 @@
_EV_CLEAR = 0x20
_EV_RECEIPT = 0x40
_EV_ERROR = 0x4000
+ _EV_EOF = 0x8000
_EVFILT_READ = -0x1
_EVFILT_WRITE = -0x2
)
diff --git a/src/runtime/defs_freebsd_arm.go b/src/runtime/defs_freebsd_arm.go
index c8a198f..71684fe 100644
--- a/src/runtime/defs_freebsd_arm.go
+++ b/src/runtime/defs_freebsd_arm.go
@@ -95,6 +95,7 @@
_EV_CLEAR = 0x20
_EV_RECEIPT = 0x40
_EV_ERROR = 0x4000
+ _EV_EOF = 0x8000
_EVFILT_READ = -0x1
_EVFILT_WRITE = -0x2
)
diff --git a/src/runtime/defs_netbsd.go b/src/runtime/defs_netbsd.go
index 56db1f0..41aa07a 100644
--- a/src/runtime/defs_netbsd.go
+++ b/src/runtime/defs_netbsd.go
@@ -105,6 +105,7 @@
EV_CLEAR = C.EV_CLEAR
EV_RECEIPT = 0
EV_ERROR = C.EV_ERROR
+ EV_EOF = C.EV_EOF
EVFILT_READ = C.EVFILT_READ
EVFILT_WRITE = C.EVFILT_WRITE
)
diff --git a/src/runtime/defs_openbsd.go b/src/runtime/defs_openbsd.go
index 7e72150..9ff13df 100644
--- a/src/runtime/defs_openbsd.go
+++ b/src/runtime/defs_openbsd.go
@@ -100,6 +100,7 @@
EV_DELETE = C.EV_DELETE
EV_CLEAR = C.EV_CLEAR
EV_ERROR = C.EV_ERROR
+ EV_EOF = C.EV_EOF
EVFILT_READ = C.EVFILT_READ
EVFILT_WRITE = C.EVFILT_WRITE
)
diff --git a/src/runtime/defs_openbsd_386.go b/src/runtime/defs_openbsd_386.go
index ce08111..1185530 100644
--- a/src/runtime/defs_openbsd_386.go
+++ b/src/runtime/defs_openbsd_386.go
@@ -80,6 +80,7 @@
_EV_DELETE = 0x2
_EV_CLEAR = 0x20
_EV_ERROR = 0x4000
+ _EV_EOF = 0x8000
_EVFILT_READ = -0x1
_EVFILT_WRITE = -0x2
)
diff --git a/src/runtime/defs_openbsd_amd64.go b/src/runtime/defs_openbsd_amd64.go
index ea07098..4bb8eac 100644
--- a/src/runtime/defs_openbsd_amd64.go
+++ b/src/runtime/defs_openbsd_amd64.go
@@ -80,6 +80,7 @@
_EV_DELETE = 0x2
_EV_CLEAR = 0x20
_EV_ERROR = 0x4000
+ _EV_EOF = 0x8000
_EVFILT_READ = -0x1
_EVFILT_WRITE = -0x2
)
diff --git a/src/runtime/defs_openbsd_arm.go b/src/runtime/defs_openbsd_arm.go
index b0fb639..38b77c9 100644
--- a/src/runtime/defs_openbsd_arm.go
+++ b/src/runtime/defs_openbsd_arm.go
@@ -80,6 +80,7 @@
_EV_DELETE = 0x2
_EV_CLEAR = 0x20
_EV_ERROR = 0x4000
+ _EV_EOF = 0x8000
_EVFILT_READ = -0x1
_EVFILT_WRITE = -0x2
)
diff --git a/src/runtime/duff_amd64.s b/src/runtime/duff_amd64.s
index a1112a4..44dc75d 100644
--- a/src/runtime/duff_amd64.s
+++ b/src/runtime/duff_amd64.s
@@ -9,97 +9,97 @@
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
- ADDQ $64,DI
+ LEAQ 64(DI),DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
- ADDQ $64,DI
+ LEAQ 64(DI),DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
- ADDQ $64,DI
+ LEAQ 64(DI),DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
- ADDQ $64,DI
+ LEAQ 64(DI),DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
- ADDQ $64,DI
+ LEAQ 64(DI),DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
- ADDQ $64,DI
+ LEAQ 64(DI),DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
- ADDQ $64,DI
+ LEAQ 64(DI),DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
- ADDQ $64,DI
+ LEAQ 64(DI),DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
- ADDQ $64,DI
+ LEAQ 64(DI),DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
- ADDQ $64,DI
+ LEAQ 64(DI),DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
- ADDQ $64,DI
+ LEAQ 64(DI),DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
- ADDQ $64,DI
+ LEAQ 64(DI),DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
- ADDQ $64,DI
+ LEAQ 64(DI),DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
- ADDQ $64,DI
+ LEAQ 64(DI),DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
- ADDQ $64,DI
+ LEAQ 64(DI),DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
- ADDQ $64,DI
+ LEAQ 64(DI),DI
RET
diff --git a/src/runtime/duff_arm64.s b/src/runtime/duff_arm64.s
index 60a0e26..21619ff 100644
--- a/src/runtime/duff_arm64.s
+++ b/src/runtime/duff_arm64.s
@@ -5,134 +5,70 @@
#include "textflag.h"
TEXT runtime·duffzero(SB), NOSPLIT, $-8-0
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
- MOVD.W ZR, 8(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP.P (ZR, ZR), 16(R16)
+ STP (ZR, ZR), (R16)
RET
TEXT runtime·duffcopy(SB), NOSPLIT, $0-0
diff --git a/src/runtime/error.go b/src/runtime/error.go
index eafcc9b..6048272 100644
--- a/src/runtime/error.go
+++ b/src/runtime/error.go
@@ -72,8 +72,7 @@
return e._type.string()
}
-// For calling from C.
-// Prints an argument passed to panic.
+// printany prints an argument passed to panic.
func printany(i interface{}) {
switch v := i.(type) {
case nil:
@@ -126,34 +125,31 @@
//go:linkname stringsIndexByte strings.IndexByte
func stringsIndexByte(s string, c byte) int
-// called from generated code
+// panicwrap generates a panic for a call to a wrapped value method
+// with a nil pointer receiver.
+//
+// It is called from the generated wrapper code.
func panicwrap() {
- pc := make([]uintptr, 1)
- n := Callers(2, pc)
- if n == 0 {
- throw("panicwrap: Callers failed")
- }
- frames := CallersFrames(pc)
- frame, _ := frames.Next()
- name := frame.Function
+ pc := getcallerpc()
+ name := funcname(findfunc(pc))
// name is something like "main.(*T).F".
// We want to extract pkg ("main"), typ ("T"), and meth ("F").
// Do it by finding the parens.
i := stringsIndexByte(name, '(')
if i < 0 {
- throw("panicwrap: no ( in " + frame.Function)
+ throw("panicwrap: no ( in " + name)
}
pkg := name[:i-1]
if i+2 >= len(name) || name[i-1:i+2] != ".(*" {
- throw("panicwrap: unexpected string after package name: " + frame.Function)
+ throw("panicwrap: unexpected string after package name: " + name)
}
name = name[i+2:]
i = stringsIndexByte(name, ')')
if i < 0 {
- throw("panicwrap: no ) in " + frame.Function)
+ throw("panicwrap: no ) in " + name)
}
if i+2 >= len(name) || name[i:i+2] != ")." {
- throw("panicwrap: unexpected string after type name: " + frame.Function)
+ throw("panicwrap: unexpected string after type name: " + name)
}
typ := name[:i]
meth := name[i+2:]
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index c929bd4..385c569 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -152,12 +152,19 @@
}
}
-var StringHash = stringHash
-var BytesHash = bytesHash
-var Int32Hash = int32Hash
-var Int64Hash = int64Hash
-var EfaceHash = efaceHash
-var IfaceHash = ifaceHash
+var (
+ StringHash = stringHash
+ BytesHash = bytesHash
+ Int32Hash = int32Hash
+ Int64Hash = int64Hash
+ MemHash = memhash
+ MemHash32 = memhash32
+ MemHash64 = memhash64
+ EfaceHash = efaceHash
+ IfaceHash = ifaceHash
+)
+
+var UseAeshash = &useAeshash
func MemclrBytes(b []byte) {
s := (*slice)(unsafe.Pointer(&b))
@@ -369,3 +376,40 @@
func (rw *RWMutex) Unlock() {
rw.rw.unlock()
}
+
+func MapBucketsCount(m map[int]int) int {
+ h := *(**hmap)(unsafe.Pointer(&m))
+ return 1 << h.B
+}
+
+func MapBucketsPointerIsNil(m map[int]int) bool {
+ h := *(**hmap)(unsafe.Pointer(&m))
+ return h.buckets == nil
+}
+
+func LockOSCounts() (external, internal uint32) {
+ g := getg()
+ if g.m.lockedExt+g.m.lockedInt == 0 {
+ if g.lockedm != 0 {
+ panic("lockedm on non-locked goroutine")
+ }
+ } else {
+ if g.lockedm == 0 {
+ panic("nil lockedm on locked goroutine")
+ }
+ }
+ return g.m.lockedExt, g.m.lockedInt
+}
+
+//go:noinline
+func TracebackSystemstack(stk []uintptr, i int) int {
+ if i == 0 {
+ pc, sp := getcallerpc(), getcallersp(unsafe.Pointer(&stk))
+ return gentraceback(pc, sp, 0, getg(), 0, &stk[0], len(stk), nil, nil, _TraceJumpStack)
+ }
+ n := 0
+ systemstack(func() {
+ n = TracebackSystemstack(stk, i-1)
+ })
+ return n
+}
diff --git a/src/runtime/extern.go b/src/runtime/extern.go
index 6e6c674..2c20e0d 100644
--- a/src/runtime/extern.go
+++ b/src/runtime/extern.go
@@ -178,11 +178,11 @@
// We asked for one extra, so skip that one. If this is sigpanic,
// stepping over this frame will set up state in Frames so the
// next frame is correct.
- callers, _, ok = stackExpander.next(callers)
+ callers, _, ok = stackExpander.next(callers, true)
if !ok {
return
}
- _, frame, _ := stackExpander.next(callers)
+ _, frame, _ := stackExpander.next(callers, true)
pc = frame.PC
file = frame.File
line = frame.Line
@@ -212,8 +212,8 @@
return callers(skip, pc)
}
-// GOROOT returns the root of the Go tree.
-// It uses the GOROOT environment variable, if set,
+// GOROOT returns the root of the Go tree. It uses the
+// GOROOT environment variable, if set at process start,
// or else the root used during the Go build.
func GOROOT() string {
s := gogetenv("GOROOT")
diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go
index 03acc8a..1b1db25 100644
--- a/src/runtime/gc_test.go
+++ b/src/runtime/gc_test.go
@@ -10,6 +10,7 @@
"reflect"
"runtime"
"runtime/debug"
+ "sync/atomic"
"testing"
"time"
"unsafe"
@@ -170,7 +171,7 @@
// slack if things are slow.
var numGCs uint32
const want = 2
- for i := 0; i < 20 && numGCs < want; i++ {
+ for i := 0; i < 200 && numGCs < want; i++ {
time.Sleep(5 * time.Millisecond)
// Test that periodic GC actually happened.
@@ -499,3 +500,142 @@
hugeSink = nil
}
+
+func TestUserForcedGC(t *testing.T) {
+ // Test that runtime.GC() triggers a GC even if GOGC=off.
+ defer debug.SetGCPercent(debug.SetGCPercent(-1))
+
+ var ms1, ms2 runtime.MemStats
+ runtime.ReadMemStats(&ms1)
+ runtime.GC()
+ runtime.ReadMemStats(&ms2)
+ if ms1.NumGC == ms2.NumGC {
+ t.Fatalf("runtime.GC() did not trigger GC")
+ }
+ if ms1.NumForcedGC == ms2.NumForcedGC {
+ t.Fatalf("runtime.GC() was not accounted in NumForcedGC")
+ }
+}
+
+func writeBarrierBenchmark(b *testing.B, f func()) {
+ runtime.GC()
+ var ms runtime.MemStats
+ runtime.ReadMemStats(&ms)
+ //b.Logf("heap size: %d MB", ms.HeapAlloc>>20)
+
+ // Keep GC running continuously during the benchmark, which in
+ // turn keeps the write barrier on continuously.
+ var stop uint32
+ done := make(chan bool)
+ go func() {
+ for atomic.LoadUint32(&stop) == 0 {
+ runtime.GC()
+ }
+ close(done)
+ }()
+ defer func() {
+ atomic.StoreUint32(&stop, 1)
+ <-done
+ }()
+
+ b.ResetTimer()
+ f()
+ b.StopTimer()
+}
+
+func BenchmarkWriteBarrier(b *testing.B) {
+ if runtime.GOMAXPROCS(-1) < 2 {
+ // We don't want GC to take our time.
+ b.Skip("need GOMAXPROCS >= 2")
+ }
+
+ // Construct a large tree both so the GC runs for a while and
+ // so we have a data structure to manipulate the pointers of.
+ type node struct {
+ l, r *node
+ }
+ var wbRoots []*node
+ var mkTree func(level int) *node
+ mkTree = func(level int) *node {
+ if level == 0 {
+ return nil
+ }
+ n := &node{mkTree(level - 1), mkTree(level - 1)}
+ if level == 10 {
+ // Seed GC with enough early pointers so it
+ // doesn't accidentally switch to mark 2 when
+ // it only has the top of the tree.
+ wbRoots = append(wbRoots, n)
+ }
+ return n
+ }
+ const depth = 22 // 64 MB
+ root := mkTree(22)
+
+ writeBarrierBenchmark(b, func() {
+ var stack [depth]*node
+ tos := -1
+
+ // There are two write barriers per iteration, so i+=2.
+ for i := 0; i < b.N; i += 2 {
+ if tos == -1 {
+ stack[0] = root
+ tos = 0
+ }
+
+ // Perform one step of reversing the tree.
+ n := stack[tos]
+ if n.l == nil {
+ tos--
+ } else {
+ n.l, n.r = n.r, n.l
+ stack[tos] = n.l
+ stack[tos+1] = n.r
+ tos++
+ }
+
+ if i%(1<<12) == 0 {
+ // Avoid non-preemptible loops (see issue #10958).
+ runtime.Gosched()
+ }
+ }
+ })
+
+ runtime.KeepAlive(wbRoots)
+}
+
+func BenchmarkBulkWriteBarrier(b *testing.B) {
+ if runtime.GOMAXPROCS(-1) < 2 {
+ // We don't want GC to take our time.
+ b.Skip("need GOMAXPROCS >= 2")
+ }
+
+ // Construct a large set of objects we can copy around.
+ const heapSize = 64 << 20
+ type obj [16]*byte
+ ptrs := make([]*obj, heapSize/unsafe.Sizeof(obj{}))
+ for i := range ptrs {
+ ptrs[i] = new(obj)
+ }
+
+ writeBarrierBenchmark(b, func() {
+ const blockSize = 1024
+ var pos int
+ for i := 0; i < b.N; i += blockSize {
+ // Rotate block.
+ block := ptrs[pos : pos+blockSize]
+ first := block[0]
+ copy(block, block[1:])
+ block[blockSize-1] = first
+
+ pos += blockSize
+ if pos+blockSize > len(ptrs) {
+ pos = 0
+ }
+
+ runtime.Gosched()
+ }
+ })
+
+ runtime.KeepAlive(ptrs)
+}
diff --git a/src/runtime/hash32.go b/src/runtime/hash32.go
index be59076..5574923 100644
--- a/src/runtime/hash32.go
+++ b/src/runtime/hash32.go
@@ -81,6 +81,32 @@
return uintptr(h)
}
+func memhash32(p unsafe.Pointer, seed uintptr) uintptr {
+ h := uint32(seed + 4*hashkey[0])
+ h ^= readUnaligned32(p)
+ h = rotl_15(h*m1) * m2
+ h ^= h >> 17
+ h *= m3
+ h ^= h >> 13
+ h *= m4
+ h ^= h >> 16
+ return uintptr(h)
+}
+
+func memhash64(p unsafe.Pointer, seed uintptr) uintptr {
+ h := uint32(seed + 8*hashkey[0])
+ h ^= readUnaligned32(p)
+ h = rotl_15(h*m1) * m2
+ h ^= readUnaligned32(add(p, 4))
+ h = rotl_15(h*m1) * m2
+ h ^= h >> 17
+ h *= m3
+ h ^= h >> 13
+ h *= m4
+ h ^= h >> 16
+ return uintptr(h)
+}
+
// Note: in order to get the compiler to issue rotl instructions, we
// need to constant fold the shift amount by hand.
// TODO: convince the compiler to issue rotl instructions after inlining.
diff --git a/src/runtime/hash64.go b/src/runtime/hash64.go
index d61f114..3cf3f46 100644
--- a/src/runtime/hash64.go
+++ b/src/runtime/hash64.go
@@ -81,6 +81,28 @@
return uintptr(h)
}
+func memhash32(p unsafe.Pointer, seed uintptr) uintptr {
+ h := uint64(seed + 4*hashkey[0])
+ v := uint64(readUnaligned32(p))
+ h ^= v
+ h ^= v << 32
+ h = rotl_31(h*m1) * m2
+ h ^= h >> 29
+ h *= m3
+ h ^= h >> 32
+ return uintptr(h)
+}
+
+func memhash64(p unsafe.Pointer, seed uintptr) uintptr {
+ h := uint64(seed + 8*hashkey[0])
+ h ^= uint64(readUnaligned32(p)) | uint64(readUnaligned32(add(p, 4)))<<32
+ h = rotl_31(h*m1) * m2
+ h ^= h >> 29
+ h *= m3
+ h ^= h >> 32
+ return uintptr(h)
+}
+
// Note: in order to get the compiler to issue rotl instructions, we
// need to constant fold the shift amount by hand.
// TODO: convince the compiler to issue rotl instructions after inlining.
diff --git a/src/runtime/hash_test.go b/src/runtime/hash_test.go
index a6f3cdb..1400579 100644
--- a/src/runtime/hash_test.go
+++ b/src/runtime/hash_test.go
@@ -14,6 +14,40 @@
"unsafe"
)
+func TestMemHash32Equality(t *testing.T) {
+ if *UseAeshash {
+ t.Skip("skipping since AES hash implementation is used")
+ }
+ var b [4]byte
+ r := rand.New(rand.NewSource(1234))
+ seed := uintptr(r.Uint64())
+ for i := 0; i < 100; i++ {
+ randBytes(r, b[:])
+ got := MemHash32(unsafe.Pointer(&b), seed)
+ want := MemHash(unsafe.Pointer(&b), seed, 4)
+ if got != want {
+ t.Errorf("MemHash32(%x, %v) = %v; want %v", b, seed, got, want)
+ }
+ }
+}
+
+func TestMemHash64Equality(t *testing.T) {
+ if *UseAeshash {
+ t.Skip("skipping since AES hash implementation is used")
+ }
+ var b [8]byte
+ r := rand.New(rand.NewSource(1234))
+ seed := uintptr(r.Uint64())
+ for i := 0; i < 100; i++ {
+ randBytes(r, b[:])
+ got := MemHash64(unsafe.Pointer(&b), seed)
+ want := MemHash(unsafe.Pointer(&b), seed, 8)
+ if got != want {
+ t.Errorf("MemHash64(%x, %v) = %v; want %v", b, seed, got, want)
+ }
+ }
+}
+
// Smhasher is a torture test for hash functions.
// https://code.google.com/p/smhasher/
// This code is a port of some of the Smhasher tests to Go.
diff --git a/src/runtime/hashmap.go b/src/runtime/hashmap.go
index 11ce0cb..dee5dd5 100644
--- a/src/runtime/hashmap.go
+++ b/src/runtime/hashmap.go
@@ -64,8 +64,10 @@
bucketCntBits = 3
bucketCnt = 1 << bucketCntBits
- // Maximum average load of a bucket that triggers growth.
- loadFactor = 6.5
+ // Maximum average load of a bucket that triggers growth is 6.5.
+ // Represent as loadFactorNum/loadFactDen, to allow integer math.
+ loadFactorNum = 13
+ loadFactorDen = 2
// Maximum key or value size to keep inline (instead of mallocing per element).
// Must fit in a uint8.
@@ -124,12 +126,13 @@
// If both key and value do not contain pointers and are inline, then we mark bucket
// type as containing no pointers. This avoids scanning such maps.
// However, bmap.overflow is a pointer. In order to keep overflow buckets
- // alive, we store pointers to all overflow buckets in hmap.overflow.
- // Overflow is used only if key and value do not contain pointers.
- // overflow[0] contains overflow buckets for hmap.buckets.
- // overflow[1] contains overflow buckets for hmap.oldbuckets.
+ // alive, we store pointers to all overflow buckets in hmap.overflow and h.map.oldoverflow.
+ // overflow and oldoverflow are only used if key and value do not contain pointers.
+ // overflow contains overflow buckets for hmap.buckets.
+ // oldoverflow contains overflow buckets for hmap.oldbuckets.
// The indirection allows to store a pointer to the slice in hiter.
- overflow [2]*[]*bmap
+ overflow *[]*bmap
+ oldoverflow *[]*bmap
// nextOverflow holds a pointer to a free overflow bucket.
nextOverflow *bmap
@@ -158,7 +161,8 @@
h *hmap
buckets unsafe.Pointer // bucket ptr at hash_iter initialization time
bptr *bmap // current bucket
- overflow [2]*[]*bmap // keeps overflow buckets alive
+ overflow *[]*bmap // keeps overflow buckets of hmap.buckets alive
+ oldoverflow *[]*bmap // keeps overflow buckets of hmap.oldbuckets alive
startBucket uintptr // bucket iteration started at
offset uint8 // intra-bucket offset to start from during iteration (should be big enough to hold bucketCnt-1)
wrapped bool // already wrapped around from end of bucket array to beginning
@@ -168,6 +172,28 @@
checkBucket uintptr
}
+// bucketShift returns 1<<b, optimized for code generation.
+func bucketShift(b uint8) uintptr {
+ if sys.GoarchAmd64|sys.GoarchAmd64p32|sys.Goarch386 != 0 {
+ b &= sys.PtrSize*8 - 1 // help x86 archs remove shift overflow checks
+ }
+ return uintptr(1) << b
+}
+
+// bucketMask returns 1<<b - 1, optimized for code generation.
+func bucketMask(b uint8) uintptr {
+ return bucketShift(b) - 1
+}
+
+// tophash calculates the tophash value for hash.
+func tophash(hash uintptr) uint8 {
+ top := uint8(hash >> (sys.PtrSize*8 - 8))
+ if top < minTopHash {
+ top += minTopHash
+ }
+ return top
+}
+
func evacuated(b *bmap) bool {
h := b.tophash[0]
return h > empty && h < minTopHash
@@ -181,6 +207,10 @@
*(**bmap)(add(unsafe.Pointer(b), uintptr(t.bucketsize)-sys.PtrSize)) = ovf
}
+func (b *bmap) keys() unsafe.Pointer {
+ return add(unsafe.Pointer(b), dataOffset)
+}
+
// incrnoverflow increments h.noverflow.
// noverflow counts the number of overflow buckets.
// This is used to trigger same-size map growth.
@@ -229,7 +259,7 @@
h.incrnoverflow()
if t.bucket.kind&kindNoPointers != 0 {
h.createOverflow()
- *h.extra.overflow[0] = append(*h.extra.overflow[0], ovf)
+ *h.extra.overflow = append(*h.extra.overflow, ovf)
}
b.setoverflow(t, ovf)
return ovf
@@ -239,96 +269,68 @@
if h.extra == nil {
h.extra = new(mapextra)
}
- if h.extra.overflow[0] == nil {
- h.extra.overflow[0] = new([]*bmap)
+ if h.extra.overflow == nil {
+ h.extra.overflow = new([]*bmap)
}
}
-// makemap implements a Go map creation make(map[k]v, hint)
+func makemap64(t *maptype, hint int64, h *hmap) *hmap {
+ if int64(int(hint)) != hint {
+ hint = 0
+ }
+ return makemap(t, int(hint), h)
+}
+
+// makehmap_small implements Go map creation for make(map[k]v) and
+// make(map[k]v, hint) when hint is known to be at most bucketCnt
+// at compile time and the map needs to be allocated on the heap.
+func makemap_small() *hmap {
+ h := new(hmap)
+ h.hash0 = fastrand()
+ return h
+}
+
+// makemap implements Go map creation for make(map[k]v, hint).
// If the compiler has determined that the map or the first bucket
// can be created on the stack, h and/or bucket may be non-nil.
// If h != nil, the map can be created directly in h.
-// If bucket != nil, bucket can be used as the first bucket.
-func makemap(t *maptype, hint int64, h *hmap, bucket unsafe.Pointer) *hmap {
- if sz := unsafe.Sizeof(hmap{}); sz > 48 || sz != t.hmap.size {
+// If h.buckets != nil, bucket pointed to can be used as the first bucket.
+func makemap(t *maptype, hint int, h *hmap) *hmap {
+ // The size of hmap should be 48 bytes on 64 bit
+ // and 28 bytes on 32 bit platforms.
+ if sz := unsafe.Sizeof(hmap{}); sz != 8+5*sys.PtrSize {
println("runtime: sizeof(hmap) =", sz, ", t.hmap.size =", t.hmap.size)
throw("bad hmap size")
}
- if hint < 0 || hint > int64(maxSliceCap(t.bucket.size)) {
+ if hint < 0 || hint > int(maxSliceCap(t.bucket.size)) {
hint = 0
}
- if !ismapkey(t.key) {
- throw("runtime.makemap: unsupported map key type")
- }
-
- // check compiler's and reflect's math
- if t.key.size > maxKeySize && (!t.indirectkey || t.keysize != uint8(sys.PtrSize)) ||
- t.key.size <= maxKeySize && (t.indirectkey || t.keysize != uint8(t.key.size)) {
- throw("key size wrong")
- }
- if t.elem.size > maxValueSize && (!t.indirectvalue || t.valuesize != uint8(sys.PtrSize)) ||
- t.elem.size <= maxValueSize && (t.indirectvalue || t.valuesize != uint8(t.elem.size)) {
- throw("value size wrong")
- }
-
- // invariants we depend on. We should probably check these at compile time
- // somewhere, but for now we'll do it here.
- if t.key.align > bucketCnt {
- throw("key align too big")
- }
- if t.elem.align > bucketCnt {
- throw("value align too big")
- }
- if t.key.size%uintptr(t.key.align) != 0 {
- throw("key size not a multiple of key align")
- }
- if t.elem.size%uintptr(t.elem.align) != 0 {
- throw("value size not a multiple of value align")
- }
- if bucketCnt < 8 {
- throw("bucketsize too small for proper alignment")
- }
- if dataOffset%uintptr(t.key.align) != 0 {
- throw("need padding in bucket (key)")
- }
- if dataOffset%uintptr(t.elem.align) != 0 {
- throw("need padding in bucket (value)")
- }
-
- // find size parameter which will hold the requested # of elements
- B := uint8(0)
- for ; overLoadFactor(hint, B); B++ {
- }
-
- // allocate initial hash table
- // if B == 0, the buckets field is allocated lazily later (in mapassign)
- // If hint is large zeroing this memory could take a while.
- buckets := bucket
- var extra *mapextra
- if B != 0 {
- var nextOverflow *bmap
- buckets, nextOverflow = makeBucketArray(t, B)
- if nextOverflow != nil {
- extra = new(mapextra)
- extra.nextOverflow = nextOverflow
- }
- }
-
// initialize Hmap
if h == nil {
h = (*hmap)(newobject(t.hmap))
}
- h.count = 0
- h.B = B
- h.extra = extra
- h.flags = 0
h.hash0 = fastrand()
- h.buckets = buckets
- h.oldbuckets = nil
- h.nevacuate = 0
- h.noverflow = 0
+
+ // find size parameter which will hold the requested # of elements
+ B := uint8(0)
+ for overLoadFactor(hint, B) {
+ B++
+ }
+ h.B = B
+
+ // allocate initial hash table
+ // if B == 0, the buckets field is allocated lazily later (in mapassign)
+ // If hint is large zeroing this memory could take a while.
+ if h.B != 0 {
+ var nextOverflow *bmap
+ h.buckets, nextOverflow = makeBucketArray(t, h.B)
+ if nextOverflow != nil {
+ h.extra = new(mapextra)
+ h.extra.nextOverflow = nextOverflow
+ }
+ }
return h
}
@@ -340,7 +342,7 @@
// hold onto it for very long.
func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer(&t))
+ callerpc := getcallerpc()
pc := funcPC(mapaccess1)
racereadpc(unsafe.Pointer(h), callerpc, pc)
raceReadObjectPC(t.key, key, callerpc, pc)
@@ -356,7 +358,7 @@
}
alg := t.key.alg
hash := alg.hash(key, uintptr(h.hash0))
- m := uintptr(1)<<h.B - 1
+ m := bucketMask(h.B)
b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
if !h.sameSizeGrow() {
@@ -368,11 +370,8 @@
b = oldb
}
}
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
- for {
+ top := tophash(hash)
+ for ; b != nil; b = b.overflow(t) {
for i := uintptr(0); i < bucketCnt; i++ {
if b.tophash[i] != top {
continue
@@ -389,16 +388,13 @@
return v
}
}
- b = b.overflow(t)
- if b == nil {
- return unsafe.Pointer(&zeroVal[0])
- }
}
+ return unsafe.Pointer(&zeroVal[0])
}
func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool) {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer(&t))
+ callerpc := getcallerpc()
pc := funcPC(mapaccess2)
racereadpc(unsafe.Pointer(h), callerpc, pc)
raceReadObjectPC(t.key, key, callerpc, pc)
@@ -414,7 +410,7 @@
}
alg := t.key.alg
hash := alg.hash(key, uintptr(h.hash0))
- m := uintptr(1)<<h.B - 1
+ m := bucketMask(h.B)
b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
if !h.sameSizeGrow() {
@@ -426,11 +422,8 @@
b = oldb
}
}
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
- for {
+ top := tophash(hash)
+ for ; b != nil; b = b.overflow(t) {
for i := uintptr(0); i < bucketCnt; i++ {
if b.tophash[i] != top {
continue
@@ -447,11 +440,8 @@
return v, true
}
}
- b = b.overflow(t)
- if b == nil {
- return unsafe.Pointer(&zeroVal[0]), false
- }
}
+ return unsafe.Pointer(&zeroVal[0]), false
}
// returns both key and value. Used by map iterator
@@ -461,7 +451,7 @@
}
alg := t.key.alg
hash := alg.hash(key, uintptr(h.hash0))
- m := uintptr(1)<<h.B - 1
+ m := bucketMask(h.B)
b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
if !h.sameSizeGrow() {
@@ -473,11 +463,8 @@
b = oldb
}
}
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
- for {
+ top := tophash(hash)
+ for ; b != nil; b = b.overflow(t) {
for i := uintptr(0); i < bucketCnt; i++ {
if b.tophash[i] != top {
continue
@@ -494,11 +481,8 @@
return k, v
}
}
- b = b.overflow(t)
- if b == nil {
- return nil, nil
- }
}
+ return nil, nil
}
func mapaccess1_fat(t *maptype, h *hmap, key, zero unsafe.Pointer) unsafe.Pointer {
@@ -523,7 +507,7 @@
panic(plainError("assignment to entry in nil map"))
}
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer(&t))
+ callerpc := getcallerpc()
pc := funcPC(mapassign)
racewritepc(unsafe.Pointer(h), callerpc, pc)
raceReadObjectPC(t.key, key, callerpc, pc)
@@ -542,19 +526,16 @@
h.flags |= hashWriting
if h.buckets == nil {
- h.buckets = newarray(t.bucket, 1)
+ h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
}
again:
- bucket := hash & (uintptr(1)<<h.B - 1)
+ bucket := hash & bucketMask(h.B)
if h.growing() {
growWork(t, h, bucket)
}
b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
+ top := tophash(hash)
var inserti *uint8
var insertk unsafe.Pointer
@@ -594,7 +575,7 @@
// If we hit the max load factor or we have too many overflow buckets,
// and we're not already in the middle of growing, start growing.
- if !h.growing() && (overLoadFactor(int64(h.count), h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+ if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
hashGrow(t, h)
goto again // Growing the table invalidates everything, so try again
}
@@ -634,7 +615,7 @@
func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer(&t))
+ callerpc := getcallerpc()
pc := funcPC(mapdelete)
racewritepc(unsafe.Pointer(h), callerpc, pc)
raceReadObjectPC(t.key, key, callerpc, pc)
@@ -656,16 +637,14 @@
// in which case we have not actually done a write (delete).
h.flags |= hashWriting
- bucket := hash & (uintptr(1)<<h.B - 1)
+ bucket := hash & bucketMask(h.B)
if h.growing() {
growWork(t, h, bucket)
}
- b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
- for {
+ b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
+ top := tophash(hash)
+search:
+ for ; b != nil; b = b.overflow(t) {
for i := uintptr(0); i < bucketCnt; i++ {
if b.tophash[i] != top {
continue
@@ -678,53 +657,44 @@
if !alg.equal(key, k2) {
continue
}
+ // Only clear key if there are pointers in it.
if t.indirectkey {
*(*unsafe.Pointer)(k) = nil
- } else {
- typedmemclr(t.key, k)
+ } else if t.key.kind&kindNoPointers == 0 {
+ memclrHasPointers(k, t.key.size)
}
- v := unsafe.Pointer(uintptr(unsafe.Pointer(b)) + dataOffset + bucketCnt*uintptr(t.keysize) + i*uintptr(t.valuesize))
- if t.indirectvalue {
- *(*unsafe.Pointer)(v) = nil
- } else {
- typedmemclr(t.elem, v)
+ // Only clear value if there are pointers in it.
+ if t.indirectvalue || t.elem.kind&kindNoPointers == 0 {
+ v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
+ if t.indirectvalue {
+ *(*unsafe.Pointer)(v) = nil
+ } else {
+ memclrHasPointers(v, t.elem.size)
+ }
}
b.tophash[i] = empty
h.count--
- goto done
- }
- b = b.overflow(t)
- if b == nil {
- goto done
+ break search
}
}
-done:
if h.flags&hashWriting == 0 {
throw("concurrent map writes")
}
h.flags &^= hashWriting
}
+// mapiterinit initializes the hiter struct used for ranging over maps.
+// The hiter struct pointed to by 'it' is allocated on the stack
+// by the compilers order pass or on the heap by reflect_mapiterinit.
+// Both need to have zeroed hiter since the struct contains pointers.
func mapiterinit(t *maptype, h *hmap, it *hiter) {
- // Clear pointer fields so garbage collector does not complain.
- it.key = nil
- it.value = nil
- it.t = nil
- it.h = nil
- it.buckets = nil
- it.bptr = nil
- it.overflow[0] = nil
- it.overflow[1] = nil
-
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer(&t))
+ callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapiterinit))
}
if h == nil || h.count == 0 {
- it.key = nil
- it.value = nil
return
}
@@ -744,6 +714,7 @@
// while we are iterating.
h.createOverflow()
it.overflow = h.extra.overflow
+ it.oldoverflow = h.extra.oldoverflow
}
// decide where to start
@@ -751,16 +722,14 @@
if h.B > 31-bucketCntBits {
r += uintptr(fastrand()) << 31
}
- it.startBucket = r & (uintptr(1)<<h.B - 1)
+ it.startBucket = r & bucketMask(h.B)
it.offset = uint8(r >> h.B & (bucketCnt - 1))
// iterator state
it.bucket = it.startBucket
- it.wrapped = false
- it.bptr = nil
// Remember we have an iterator.
- // Can run concurrently with another hash_iter_init().
+ // Can run concurrently with another mapiterinit().
if old := h.flags; old&(iterator|oldIterator) != iterator|oldIterator {
atomic.Or8(&h.flags, iterator|oldIterator)
}
@@ -771,7 +740,7 @@
func mapiternext(it *hiter) {
h := it.h
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer(&it))
+ callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapiternext))
}
if h.flags&hashWriting != 0 {
@@ -810,7 +779,7 @@
checkBucket = noCheck
}
bucket++
- if bucket == uintptr(1)<<it.B {
+ if bucket == bucketShift(it.B) {
bucket = 0
it.wrapped = true
}
@@ -818,90 +787,75 @@
}
for ; i < bucketCnt; i++ {
offi := (i + it.offset) & (bucketCnt - 1)
- k := add(unsafe.Pointer(b), dataOffset+uintptr(offi)*uintptr(t.keysize))
- v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+uintptr(offi)*uintptr(t.valuesize))
- if b.tophash[offi] != empty && b.tophash[offi] != evacuatedEmpty {
- if checkBucket != noCheck && !h.sameSizeGrow() {
- // Special case: iterator was started during a grow to a larger size
- // and the grow is not done yet. We're working on a bucket whose
- // oldbucket has not been evacuated yet. Or at least, it wasn't
- // evacuated when we started the bucket. So we're iterating
- // through the oldbucket, skipping any keys that will go
- // to the other new bucket (each oldbucket expands to two
- // buckets during a grow).
- k2 := k
- if t.indirectkey {
- k2 = *((*unsafe.Pointer)(k2))
- }
- if t.reflexivekey || alg.equal(k2, k2) {
- // If the item in the oldbucket is not destined for
- // the current new bucket in the iteration, skip it.
- hash := alg.hash(k2, uintptr(h.hash0))
- if hash&(uintptr(1)<<it.B-1) != checkBucket {
- continue
- }
- } else {
- // Hash isn't repeatable if k != k (NaNs). We need a
- // repeatable and randomish choice of which direction
- // to send NaNs during evacuation. We'll use the low
- // bit of tophash to decide which way NaNs go.
- // NOTE: this case is why we need two evacuate tophash
- // values, evacuatedX and evacuatedY, that differ in
- // their low bit.
- if checkBucket>>(it.B-1) != uintptr(b.tophash[offi]&1) {
- continue
- }
- }
- }
- if b.tophash[offi] != evacuatedX && b.tophash[offi] != evacuatedY {
- // this is the golden data, we can return it.
- if t.indirectkey {
- k = *((*unsafe.Pointer)(k))
- }
- it.key = k
- if t.indirectvalue {
- v = *((*unsafe.Pointer)(v))
- }
- it.value = v
- } else {
- // The hash table has grown since the iterator was started.
- // The golden data for this key is now somewhere else.
- k2 := k
- if t.indirectkey {
- k2 = *((*unsafe.Pointer)(k2))
- }
- if t.reflexivekey || alg.equal(k2, k2) {
- // Check the current hash table for the data.
- // This code handles the case where the key
- // has been deleted, updated, or deleted and reinserted.
- // NOTE: we need to regrab the key as it has potentially been
- // updated to an equal() but not identical key (e.g. +0.0 vs -0.0).
- rk, rv := mapaccessK(t, h, k2)
- if rk == nil {
- continue // key has been deleted
- }
- it.key = rk
- it.value = rv
- } else {
- // if key!=key then the entry can't be deleted or
- // updated, so we can just return it. That's lucky for
- // us because when key!=key we can't look it up
- // successfully in the current table.
- it.key = k2
- if t.indirectvalue {
- v = *((*unsafe.Pointer)(v))
- }
- it.value = v
- }
- }
- it.bucket = bucket
- if it.bptr != b { // avoid unnecessary write barrier; see issue 14921
- it.bptr = b
- }
- it.i = i + 1
- it.checkBucket = checkBucket
- return
+ if b.tophash[offi] == empty || b.tophash[offi] == evacuatedEmpty {
+ continue
}
+ k := add(unsafe.Pointer(b), dataOffset+uintptr(offi)*uintptr(t.keysize))
+ if t.indirectkey {
+ k = *((*unsafe.Pointer)(k))
+ }
+ v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+uintptr(offi)*uintptr(t.valuesize))
+ if checkBucket != noCheck && !h.sameSizeGrow() {
+ // Special case: iterator was started during a grow to a larger size
+ // and the grow is not done yet. We're working on a bucket whose
+ // oldbucket has not been evacuated yet. Or at least, it wasn't
+ // evacuated when we started the bucket. So we're iterating
+ // through the oldbucket, skipping any keys that will go
+ // to the other new bucket (each oldbucket expands to two
+ // buckets during a grow).
+ if t.reflexivekey || alg.equal(k, k) {
+ // If the item in the oldbucket is not destined for
+ // the current new bucket in the iteration, skip it.
+ hash := alg.hash(k, uintptr(h.hash0))
+ if hash&bucketMask(it.B) != checkBucket {
+ continue
+ }
+ } else {
+ // Hash isn't repeatable if k != k (NaNs). We need a
+ // repeatable and randomish choice of which direction
+ // to send NaNs during evacuation. We'll use the low
+ // bit of tophash to decide which way NaNs go.
+ // NOTE: this case is why we need two evacuate tophash
+ // values, evacuatedX and evacuatedY, that differ in
+ // their low bit.
+ if checkBucket>>(it.B-1) != uintptr(b.tophash[offi]&1) {
+ continue
+ }
+ }
+ }
+ if (b.tophash[offi] != evacuatedX && b.tophash[offi] != evacuatedY) ||
+ !(t.reflexivekey || alg.equal(k, k)) {
+ // This is the golden data, we can return it.
+ // OR
+ // key!=key, so the entry can't be deleted or updated, so we can just return it.
+ // That's lucky for us because when key!=key we can't look it up successfully.
+ it.key = k
+ if t.indirectvalue {
+ v = *((*unsafe.Pointer)(v))
+ }
+ it.value = v
+ } else {
+ // The hash table has grown since the iterator was started.
+ // The golden data for this key is now somewhere else.
+ // Check the current hash table for the data.
+ // This code handles the case where the key
+ // has been deleted, updated, or deleted and reinserted.
+ // NOTE: we need to regrab the key as it has potentially been
+ // updated to an equal() but not identical key (e.g. +0.0 vs -0.0).
+ rk, rv := mapaccessK(t, h, k)
+ if rk == nil {
+ continue // key has been deleted
+ }
+ it.key = rk
+ it.value = rv
+ }
+ it.bucket = bucket
+ if it.bptr != b { // avoid unnecessary write barrier; see issue 14921
+ it.bptr = b
+ }
+ it.i = i + 1
+ it.checkBucket = checkBucket
+ return
}
b = b.overflow(t)
i = 0
@@ -909,7 +863,7 @@
}
func makeBucketArray(t *maptype, b uint8) (buckets unsafe.Pointer, nextOverflow *bmap) {
- base := uintptr(1 << b)
+ base := bucketShift(b)
nbuckets := base
// For small b, overflow buckets are unlikely.
// Avoid the overhead of the calculation.
@@ -917,7 +871,7 @@
// Add on the estimated number of overflow buckets
// required to insert the median number of elements
// used with this value of b.
- nbuckets += 1 << (b - 4)
+ nbuckets += bucketShift(b - 4)
sz := t.bucket.size * nbuckets
up := roundupsize(sz)
if up != sz {
@@ -943,7 +897,7 @@
// Otherwise, there are too many overflow buckets,
// so keep the same number of buckets and "grow" laterally.
bigger := uint8(1)
- if !overLoadFactor(int64(h.count), h.B) {
+ if !overLoadFactor(h.count+1, h.B) {
bigger = 0
h.flags |= sameSizeGrow
}
@@ -962,13 +916,13 @@
h.nevacuate = 0
h.noverflow = 0
- if h.extra != nil && h.extra.overflow[0] != nil {
+ if h.extra != nil && h.extra.overflow != nil {
// Promote current overflow buckets to the old generation.
- if h.extra.overflow[1] != nil {
- throw("overflow is not nil")
+ if h.extra.oldoverflow != nil {
+ throw("oldoverflow is not nil")
}
- h.extra.overflow[1] = h.extra.overflow[0]
- h.extra.overflow[0] = nil
+ h.extra.oldoverflow = h.extra.overflow
+ h.extra.overflow = nil
}
if nextOverflow != nil {
if h.extra == nil {
@@ -982,9 +936,8 @@
}
// overLoadFactor reports whether count items placed in 1<<B buckets is over loadFactor.
-func overLoadFactor(count int64, B uint8) bool {
- // TODO: rewrite to use integer math and comparison?
- return count >= bucketCnt && float32(count) >= loadFactor*float32((uint64(1)<<B))
+func overLoadFactor(count int, B uint8) bool {
+ return count > bucketCnt && uintptr(count) > loadFactorNum*(bucketShift(B)/loadFactorDen)
}
// tooManyOverflowBuckets reports whether noverflow buckets is too many for a map with 1<<B buckets.
@@ -995,10 +948,11 @@
// If the threshold is too high, maps that grow and shrink can hold on to lots of unused memory.
// "too many" means (approximately) as many overflow buckets as regular buckets.
// See incrnoverflow for more details.
- if B < 16 {
- return noverflow >= uint16(1)<<B
+ if B > 15 {
+ B = 15
}
- return noverflow >= 1<<15
+ // The compiler doesn't see here that B < 16; mask B to generate shorter shift code.
+ return noverflow >= uint16(1)<<(B&15)
}
// growing reports whether h is growing. The growth may be to the same size or bigger.
@@ -1017,7 +971,7 @@
if !h.sameSizeGrow() {
oldB--
}
- return uintptr(1) << oldB
+ return bucketShift(oldB)
}
// oldbucketmask provides a mask that can be applied to calculate n % noldbuckets().
@@ -1041,32 +995,37 @@
return evacuated(b)
}
+// evacDst is an evacuation destination.
+type evacDst struct {
+ b *bmap // current destination bucket
+ i int // key/val index into b
+ k unsafe.Pointer // pointer to current key storage
+ v unsafe.Pointer // pointer to current value storage
+}
+
func evacuate(t *maptype, h *hmap, oldbucket uintptr) {
b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
newbit := h.noldbuckets()
- alg := t.key.alg
if !evacuated(b) {
// TODO: reuse overflow buckets instead of using new ones, if there
// is no iterator using the old buckets. (If !oldIterator.)
- var (
- x, y *bmap // current low/high buckets in new map
- xi, yi int // key/val indices into x and y
- xk, yk unsafe.Pointer // pointers to current x and y key storage
- xv, yv unsafe.Pointer // pointers to current x and y value storage
- )
- x = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
- xi = 0
- xk = add(unsafe.Pointer(x), dataOffset)
- xv = add(xk, bucketCnt*uintptr(t.keysize))
+ // xy contains the x and y (low and high) evacuation destinations.
+ var xy [2]evacDst
+ x := &xy[0]
+ x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
+ x.k = add(unsafe.Pointer(x.b), dataOffset)
+ x.v = add(x.k, bucketCnt*uintptr(t.keysize))
+
if !h.sameSizeGrow() {
// Only calculate y pointers if we're growing bigger.
// Otherwise GC can see bad pointers.
- y = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
- yi = 0
- yk = add(unsafe.Pointer(y), dataOffset)
- yv = add(yk, bucketCnt*uintptr(t.keysize))
+ y := &xy[1]
+ y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
+ y.k = add(unsafe.Pointer(y.b), dataOffset)
+ y.v = add(y.k, bucketCnt*uintptr(t.keysize))
}
+
for ; b != nil; b = b.overflow(t) {
k := add(unsafe.Pointer(b), dataOffset)
v := add(k, bucketCnt*uintptr(t.keysize))
@@ -1083,122 +1042,102 @@
if t.indirectkey {
k2 = *((*unsafe.Pointer)(k2))
}
- useX := true
+ var useY uint8
if !h.sameSizeGrow() {
// Compute hash to make our evacuation decision (whether we need
// to send this key/value to bucket x or bucket y).
- hash := alg.hash(k2, uintptr(h.hash0))
- if h.flags&iterator != 0 {
- if !t.reflexivekey && !alg.equal(k2, k2) {
- // If key != key (NaNs), then the hash could be (and probably
- // will be) entirely different from the old hash. Moreover,
- // it isn't reproducible. Reproducibility is required in the
- // presence of iterators, as our evacuation decision must
- // match whatever decision the iterator made.
- // Fortunately, we have the freedom to send these keys either
- // way. Also, tophash is meaningless for these kinds of keys.
- // We let the low bit of tophash drive the evacuation decision.
- // We recompute a new random tophash for the next level so
- // these keys will get evenly distributed across all buckets
- // after multiple grows.
- if top&1 != 0 {
- hash |= newbit
- } else {
- hash &^= newbit
- }
- top = uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
+ hash := t.key.alg.hash(k2, uintptr(h.hash0))
+ if h.flags&iterator != 0 && !t.reflexivekey && !t.key.alg.equal(k2, k2) {
+ // If key != key (NaNs), then the hash could be (and probably
+ // will be) entirely different from the old hash. Moreover,
+ // it isn't reproducible. Reproducibility is required in the
+ // presence of iterators, as our evacuation decision must
+ // match whatever decision the iterator made.
+ // Fortunately, we have the freedom to send these keys either
+ // way. Also, tophash is meaningless for these kinds of keys.
+ // We let the low bit of tophash drive the evacuation decision.
+ // We recompute a new random tophash for the next level so
+ // these keys will get evenly distributed across all buckets
+ // after multiple grows.
+ useY = top & 1
+ top = tophash(hash)
+ } else {
+ if hash&newbit != 0 {
+ useY = 1
}
}
- useX = hash&newbit == 0
}
- if useX {
- b.tophash[i] = evacuatedX
- if xi == bucketCnt {
- newx := h.newoverflow(t, x)
- x = newx
- xi = 0
- xk = add(unsafe.Pointer(x), dataOffset)
- xv = add(xk, bucketCnt*uintptr(t.keysize))
- }
- x.tophash[xi] = top
- if t.indirectkey {
- *(*unsafe.Pointer)(xk) = k2 // copy pointer
- } else {
- typedmemmove(t.key, xk, k) // copy value
- }
- if t.indirectvalue {
- *(*unsafe.Pointer)(xv) = *(*unsafe.Pointer)(v)
- } else {
- typedmemmove(t.elem, xv, v)
- }
- xi++
- xk = add(xk, uintptr(t.keysize))
- xv = add(xv, uintptr(t.valuesize))
+
+ if evacuatedX+1 != evacuatedY {
+ throw("bad evacuatedN")
+ }
+
+ b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY
+ dst := &xy[useY] // evacuation destination
+
+ if dst.i == bucketCnt {
+ dst.b = h.newoverflow(t, dst.b)
+ dst.i = 0
+ dst.k = add(unsafe.Pointer(dst.b), dataOffset)
+ dst.v = add(dst.k, bucketCnt*uintptr(t.keysize))
+ }
+ dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
+ if t.indirectkey {
+ *(*unsafe.Pointer)(dst.k) = k2 // copy pointer
} else {
- b.tophash[i] = evacuatedY
- if yi == bucketCnt {
- newy := h.newoverflow(t, y)
- y = newy
- yi = 0
- yk = add(unsafe.Pointer(y), dataOffset)
- yv = add(yk, bucketCnt*uintptr(t.keysize))
- }
- y.tophash[yi] = top
- if t.indirectkey {
- *(*unsafe.Pointer)(yk) = k2
- } else {
- typedmemmove(t.key, yk, k)
- }
- if t.indirectvalue {
- *(*unsafe.Pointer)(yv) = *(*unsafe.Pointer)(v)
- } else {
- typedmemmove(t.elem, yv, v)
- }
- yi++
- yk = add(yk, uintptr(t.keysize))
- yv = add(yv, uintptr(t.valuesize))
+ typedmemmove(t.key, dst.k, k) // copy value
}
+ if t.indirectvalue {
+ *(*unsafe.Pointer)(dst.v) = *(*unsafe.Pointer)(v)
+ } else {
+ typedmemmove(t.elem, dst.v, v)
+ }
+ dst.i++
+ // These updates might push these pointers past the end of the
+ // key or value arrays. That's ok, as we have the overflow pointer
+ // at the end of the bucket to protect against pointing past the
+ // end of the bucket.
+ dst.k = add(dst.k, uintptr(t.keysize))
+ dst.v = add(dst.v, uintptr(t.valuesize))
}
}
// Unlink the overflow buckets & clear key/value to help GC.
- if h.flags&oldIterator == 0 {
- b = (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
+ if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
+ b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
// Preserve b.tophash because the evacuation
// state is maintained there.
- if t.bucket.kind&kindNoPointers == 0 {
- memclrHasPointers(add(unsafe.Pointer(b), dataOffset), uintptr(t.bucketsize)-dataOffset)
- } else {
- memclrNoHeapPointers(add(unsafe.Pointer(b), dataOffset), uintptr(t.bucketsize)-dataOffset)
- }
+ ptr := add(b, dataOffset)
+ n := uintptr(t.bucketsize) - dataOffset
+ memclrHasPointers(ptr, n)
}
}
- // Advance evacuation mark
if oldbucket == h.nevacuate {
- h.nevacuate = oldbucket + 1
- // Experiments suggest that 1024 is overkill by at least an order of magnitude.
- // Put it in there as a safeguard anyway, to ensure O(1) behavior.
- stop := h.nevacuate + 1024
- if stop > newbit {
- stop = newbit
+ advanceEvacuationMark(h, t, newbit)
+ }
+}
+
+func advanceEvacuationMark(h *hmap, t *maptype, newbit uintptr) {
+ h.nevacuate++
+ // Experiments suggest that 1024 is overkill by at least an order of magnitude.
+ // Put it in there as a safeguard anyway, to ensure O(1) behavior.
+ stop := h.nevacuate + 1024
+ if stop > newbit {
+ stop = newbit
+ }
+ for h.nevacuate != stop && bucketEvacuated(t, h, h.nevacuate) {
+ h.nevacuate++
+ }
+ if h.nevacuate == newbit { // newbit == # of oldbuckets
+ // Growing is all done. Free old main bucket array.
+ h.oldbuckets = nil
+ // Can discard old overflow buckets as well.
+ // If they are still referenced by an iterator,
+ // then the iterator holds a pointers to the slice.
+ if h.extra != nil {
+ h.extra.oldoverflow = nil
}
- for h.nevacuate != stop && bucketEvacuated(t, h, h.nevacuate) {
- h.nevacuate++
- }
- if h.nevacuate == newbit { // newbit == # of oldbuckets
- // Growing is all done. Free old main bucket array.
- h.oldbuckets = nil
- // Can discard old overflow buckets as well.
- // If they are still referenced by an iterator,
- // then the iterator holds a pointers to the slice.
- if h.extra != nil {
- h.extra.overflow[1] = nil
- }
- h.flags &^= sameSizeGrow
- }
+ h.flags &^= sameSizeGrow
}
}
@@ -1210,7 +1149,45 @@
//go:linkname reflect_makemap reflect.makemap
func reflect_makemap(t *maptype, cap int) *hmap {
- return makemap(t, int64(cap), nil, nil)
+ // Check invariants and reflects math.
+ if sz := unsafe.Sizeof(hmap{}); sz != t.hmap.size {
+ println("runtime: sizeof(hmap) =", sz, ", t.hmap.size =", t.hmap.size)
+ throw("bad hmap size")
+ }
+ if !ismapkey(t.key) {
+ throw("runtime.reflect_makemap: unsupported map key type")
+ }
+ if t.key.size > maxKeySize && (!t.indirectkey || t.keysize != uint8(sys.PtrSize)) ||
+ t.key.size <= maxKeySize && (t.indirectkey || t.keysize != uint8(t.key.size)) {
+ throw("key size wrong")
+ }
+ if t.elem.size > maxValueSize && (!t.indirectvalue || t.valuesize != uint8(sys.PtrSize)) ||
+ t.elem.size <= maxValueSize && (t.indirectvalue || t.valuesize != uint8(t.elem.size)) {
+ throw("value size wrong")
+ }
+ if t.key.align > bucketCnt {
+ throw("key align too big")
+ }
+ if t.elem.align > bucketCnt {
+ throw("value align too big")
+ }
+ if t.key.size%uintptr(t.key.align) != 0 {
+ throw("key size not a multiple of key align")
+ }
+ if t.elem.size%uintptr(t.elem.align) != 0 {
+ throw("value size not a multiple of value align")
+ }
+ if bucketCnt < 8 {
+ throw("bucketsize too small for proper alignment")
+ }
+ if dataOffset%uintptr(t.key.align) != 0 {
+ throw("need padding in bucket (key)")
+ }
+ if dataOffset%uintptr(t.elem.align) != 0 {
+ throw("need padding in bucket (value)")
+ }
+
+ return makemap(t, cap, nil)
}
//go:linkname reflect_mapaccess reflect.mapaccess
@@ -1257,7 +1234,7 @@
return 0
}
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer(&h))
+ callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, funcPC(reflect_maplen))
}
return h.count
diff --git a/src/runtime/hashmap_fast.go b/src/runtime/hashmap_fast.go
index 67b9787..2de3814 100644
--- a/src/runtime/hashmap_fast.go
+++ b/src/runtime/hashmap_fast.go
@@ -11,7 +11,7 @@
func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer(&t))
+ callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast32))
}
if h == nil || h.count == 0 {
@@ -26,7 +26,7 @@
b = (*bmap)(h.buckets)
} else {
hash := t.key.alg.hash(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
- m := uintptr(1)<<h.B - 1
+ m := bucketMask(h.B)
b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
if !h.sameSizeGrow() {
@@ -39,28 +39,19 @@
}
}
}
- for {
- for i := uintptr(0); i < bucketCnt; i++ {
- k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4)))
- if k != key {
- continue
+ for ; b != nil; b = b.overflow(t) {
+ for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) {
+ if *(*uint32)(k) == key && b.tophash[i] != empty {
+ return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize))
}
- x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check
- if x == empty {
- continue
- }
- return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize))
- }
- b = b.overflow(t)
- if b == nil {
- return unsafe.Pointer(&zeroVal[0])
}
}
+ return unsafe.Pointer(&zeroVal[0])
}
func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer(&t))
+ callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast32))
}
if h == nil || h.count == 0 {
@@ -75,7 +66,7 @@
b = (*bmap)(h.buckets)
} else {
hash := t.key.alg.hash(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
- m := uintptr(1)<<h.B - 1
+ m := bucketMask(h.B)
b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
if !h.sameSizeGrow() {
@@ -88,28 +79,19 @@
}
}
}
- for {
- for i := uintptr(0); i < bucketCnt; i++ {
- k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4)))
- if k != key {
- continue
+ for ; b != nil; b = b.overflow(t) {
+ for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) {
+ if *(*uint32)(k) == key && b.tophash[i] != empty {
+ return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)), true
}
- x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check
- if x == empty {
- continue
- }
- return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)), true
- }
- b = b.overflow(t)
- if b == nil {
- return unsafe.Pointer(&zeroVal[0]), false
}
}
+ return unsafe.Pointer(&zeroVal[0]), false
}
func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer(&t))
+ callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast64))
}
if h == nil || h.count == 0 {
@@ -124,7 +106,7 @@
b = (*bmap)(h.buckets)
} else {
hash := t.key.alg.hash(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
- m := uintptr(1)<<h.B - 1
+ m := bucketMask(h.B)
b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
if !h.sameSizeGrow() {
@@ -137,28 +119,19 @@
}
}
}
- for {
- for i := uintptr(0); i < bucketCnt; i++ {
- k := *((*uint64)(add(unsafe.Pointer(b), dataOffset+i*8)))
- if k != key {
- continue
+ for ; b != nil; b = b.overflow(t) {
+ for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) {
+ if *(*uint64)(k) == key && b.tophash[i] != empty {
+ return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize))
}
- x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check
- if x == empty {
- continue
- }
- return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize))
- }
- b = b.overflow(t)
- if b == nil {
- return unsafe.Pointer(&zeroVal[0])
}
}
+ return unsafe.Pointer(&zeroVal[0])
}
func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer(&t))
+ callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast64))
}
if h == nil || h.count == 0 {
@@ -173,7 +146,7 @@
b = (*bmap)(h.buckets)
} else {
hash := t.key.alg.hash(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
- m := uintptr(1)<<h.B - 1
+ m := bucketMask(h.B)
b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
if !h.sameSizeGrow() {
@@ -186,28 +159,19 @@
}
}
}
- for {
- for i := uintptr(0); i < bucketCnt; i++ {
- k := *((*uint64)(add(unsafe.Pointer(b), dataOffset+i*8)))
- if k != key {
- continue
+ for ; b != nil; b = b.overflow(t) {
+ for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) {
+ if *(*uint64)(k) == key && b.tophash[i] != empty {
+ return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)), true
}
- x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check
- if x == empty {
- continue
- }
- return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)), true
- }
- b = b.overflow(t)
- if b == nil {
- return unsafe.Pointer(&zeroVal[0]), false
}
}
+ return unsafe.Pointer(&zeroVal[0]), false
}
func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer(&t))
+ callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_faststr))
}
if h == nil || h.count == 0 {
@@ -222,13 +186,9 @@
b := (*bmap)(h.buckets)
if key.len < 32 {
// short key, doing lots of comparisons is ok
- for i := uintptr(0); i < bucketCnt; i++ {
- x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check
- if x == empty {
- continue
- }
- k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize))
- if k.len != key.len {
+ for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+ k := (*stringStruct)(kptr)
+ if k.len != key.len || b.tophash[i] == empty {
continue
}
if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
@@ -239,13 +199,9 @@
}
// long key, try not to do more comparisons than necessary
keymaybe := uintptr(bucketCnt)
- for i := uintptr(0); i < bucketCnt; i++ {
- x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check
- if x == empty {
- continue
- }
- k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize))
- if k.len != key.len {
+ for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+ k := (*stringStruct)(kptr)
+ if k.len != key.len || b.tophash[i] == empty {
continue
}
if k.str == key.str {
@@ -275,7 +231,7 @@
}
dohash:
hash := t.key.alg.hash(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
- m := uintptr(1)<<h.B - 1
+ m := bucketMask(h.B)
b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
if !h.sameSizeGrow() {
@@ -287,34 +243,24 @@
b = oldb
}
}
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
- for {
- for i := uintptr(0); i < bucketCnt; i++ {
- x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check
- if x != top {
- continue
- }
- k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize))
- if k.len != key.len {
+ top := tophash(hash)
+ for ; b != nil; b = b.overflow(t) {
+ for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+ k := (*stringStruct)(kptr)
+ if k.len != key.len || b.tophash[i] != top {
continue
}
if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
}
}
- b = b.overflow(t)
- if b == nil {
- return unsafe.Pointer(&zeroVal[0])
- }
}
+ return unsafe.Pointer(&zeroVal[0])
}
func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer(&t))
+ callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_faststr))
}
if h == nil || h.count == 0 {
@@ -329,13 +275,9 @@
b := (*bmap)(h.buckets)
if key.len < 32 {
// short key, doing lots of comparisons is ok
- for i := uintptr(0); i < bucketCnt; i++ {
- x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check
- if x == empty {
- continue
- }
- k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize))
- if k.len != key.len {
+ for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+ k := (*stringStruct)(kptr)
+ if k.len != key.len || b.tophash[i] == empty {
continue
}
if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
@@ -346,13 +288,9 @@
}
// long key, try not to do more comparisons than necessary
keymaybe := uintptr(bucketCnt)
- for i := uintptr(0); i < bucketCnt; i++ {
- x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check
- if x == empty {
- continue
- }
- k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize))
- if k.len != key.len {
+ for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+ k := (*stringStruct)(kptr)
+ if k.len != key.len || b.tophash[i] == empty {
continue
}
if k.str == key.str {
@@ -382,7 +320,7 @@
}
dohash:
hash := t.key.alg.hash(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
- m := uintptr(1)<<h.B - 1
+ m := bucketMask(h.B)
b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
if !h.sameSizeGrow() {
@@ -394,29 +332,19 @@
b = oldb
}
}
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
- for {
- for i := uintptr(0); i < bucketCnt; i++ {
- x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check
- if x != top {
- continue
- }
- k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize))
- if k.len != key.len {
+ top := tophash(hash)
+ for ; b != nil; b = b.overflow(t) {
+ for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+ k := (*stringStruct)(kptr)
+ if k.len != key.len || b.tophash[i] != top {
continue
}
if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true
}
}
- b = b.overflow(t)
- if b == nil {
- return unsafe.Pointer(&zeroVal[0]), false
- }
}
+ return unsafe.Pointer(&zeroVal[0]), false
}
func mapassign_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
@@ -424,7 +352,7 @@
panic(plainError("assignment to entry in nil map"))
}
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer(&t))
+ callerpc := getcallerpc()
racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast32))
}
if h.flags&hashWriting != 0 {
@@ -436,30 +364,26 @@
h.flags |= hashWriting
if h.buckets == nil {
- h.buckets = newarray(t.bucket, 1)
+ h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
}
again:
- bucket := hash & (uintptr(1)<<h.B - 1)
+ bucket := hash & bucketMask(h.B)
if h.growing() {
- growWork(t, h, bucket)
+ growWork_fast32(t, h, bucket)
}
b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
- var inserti *uint8
+ var insertb *bmap
+ var inserti uintptr
var insertk unsafe.Pointer
- var val unsafe.Pointer
+
for {
for i := uintptr(0); i < bucketCnt; i++ {
- if b.tophash[i] != top {
- if b.tophash[i] == empty && inserti == nil {
- inserti = &b.tophash[i]
- insertk = add(unsafe.Pointer(b), dataOffset+i*4)
- val = add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize))
+ if b.tophash[i] == empty {
+ if insertb == nil {
+ inserti = i
+ insertb = b
}
continue
}
@@ -467,7 +391,8 @@
if k != key {
continue
}
- val = add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize))
+ inserti = i
+ insertb = b
goto done
}
ovf := b.overflow(t)
@@ -481,25 +406,112 @@
// If we hit the max load factor or we have too many overflow buckets,
// and we're not already in the middle of growing, start growing.
- if !h.growing() && (overLoadFactor(int64(h.count), h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+ if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
hashGrow(t, h)
goto again // Growing the table invalidates everything, so try again
}
- if inserti == nil {
+ if insertb == nil {
// all current buckets are full, allocate a new one.
- newb := h.newoverflow(t, b)
- inserti = &newb.tophash[0]
- insertk = add(unsafe.Pointer(newb), dataOffset)
- val = add(insertk, bucketCnt*4)
+ insertb = h.newoverflow(t, b)
+ inserti = 0 // not necessary, but avoids needlessly spilling inserti
}
+ insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
- // store new key/value at insert position
- typedmemmove(t.key, insertk, unsafe.Pointer(&key))
- *inserti = top
+ insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*4)
+ // store new key at insert position
+ *(*uint32)(insertk) = key
+
h.count++
done:
+ val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.valuesize))
+ if h.flags&hashWriting == 0 {
+ throw("concurrent map writes")
+ }
+ h.flags &^= hashWriting
+ return val
+}
+
+func mapassign_fast32ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
+ if h == nil {
+ panic(plainError("assignment to entry in nil map"))
+ }
+ if raceenabled {
+ callerpc := getcallerpc()
+ racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast32))
+ }
+ if h.flags&hashWriting != 0 {
+ throw("concurrent map writes")
+ }
+ hash := t.key.alg.hash(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+
+ // Set hashWriting after calling alg.hash for consistency with mapassign.
+ h.flags |= hashWriting
+
+ if h.buckets == nil {
+ h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
+ }
+
+again:
+ bucket := hash & bucketMask(h.B)
+ if h.growing() {
+ growWork_fast32(t, h, bucket)
+ }
+ b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
+
+ var insertb *bmap
+ var inserti uintptr
+ var insertk unsafe.Pointer
+
+ for {
+ for i := uintptr(0); i < bucketCnt; i++ {
+ if b.tophash[i] == empty {
+ if insertb == nil {
+ inserti = i
+ insertb = b
+ }
+ continue
+ }
+ k := *((*unsafe.Pointer)(add(unsafe.Pointer(b), dataOffset+i*4)))
+ if k != key {
+ continue
+ }
+ inserti = i
+ insertb = b
+ goto done
+ }
+ ovf := b.overflow(t)
+ if ovf == nil {
+ break
+ }
+ b = ovf
+ }
+
+ // Did not find mapping for key. Allocate new cell & add entry.
+
+ // If we hit the max load factor or we have too many overflow buckets,
+ // and we're not already in the middle of growing, start growing.
+ if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+ hashGrow(t, h)
+ goto again // Growing the table invalidates everything, so try again
+ }
+
+ if insertb == nil {
+ // all current buckets are full, allocate a new one.
+ insertb = h.newoverflow(t, b)
+ inserti = 0 // not necessary, but avoids needlessly spilling inserti
+ }
+ insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
+
+ insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*4)
+ // store new key at insert position
+ *(*unsafe.Pointer)(insertk) = key
+
+ h.count++
+
+done:
+ val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.valuesize))
if h.flags&hashWriting == 0 {
throw("concurrent map writes")
}
@@ -512,7 +524,7 @@
panic(plainError("assignment to entry in nil map"))
}
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer(&t))
+ callerpc := getcallerpc()
racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast64))
}
if h.flags&hashWriting != 0 {
@@ -524,30 +536,26 @@
h.flags |= hashWriting
if h.buckets == nil {
- h.buckets = newarray(t.bucket, 1)
+ h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
}
again:
- bucket := hash & (uintptr(1)<<h.B - 1)
+ bucket := hash & bucketMask(h.B)
if h.growing() {
- growWork(t, h, bucket)
+ growWork_fast64(t, h, bucket)
}
b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
- var inserti *uint8
+ var insertb *bmap
+ var inserti uintptr
var insertk unsafe.Pointer
- var val unsafe.Pointer
+
for {
for i := uintptr(0); i < bucketCnt; i++ {
- if b.tophash[i] != top {
- if b.tophash[i] == empty && inserti == nil {
- inserti = &b.tophash[i]
- insertk = add(unsafe.Pointer(b), dataOffset+i*8)
- val = add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize))
+ if b.tophash[i] == empty {
+ if insertb == nil {
+ insertb = b
+ inserti = i
}
continue
}
@@ -555,7 +563,8 @@
if k != key {
continue
}
- val = add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize))
+ insertb = b
+ inserti = i
goto done
}
ovf := b.overflow(t)
@@ -569,25 +578,26 @@
// If we hit the max load factor or we have too many overflow buckets,
// and we're not already in the middle of growing, start growing.
- if !h.growing() && (overLoadFactor(int64(h.count), h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+ if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
hashGrow(t, h)
goto again // Growing the table invalidates everything, so try again
}
- if inserti == nil {
+ if insertb == nil {
// all current buckets are full, allocate a new one.
- newb := h.newoverflow(t, b)
- inserti = &newb.tophash[0]
- insertk = add(unsafe.Pointer(newb), dataOffset)
- val = add(insertk, bucketCnt*8)
+ insertb = h.newoverflow(t, b)
+ inserti = 0 // not necessary, but avoids needlessly spilling inserti
}
+ insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
- // store new key/value at insert position
- typedmemmove(t.key, insertk, unsafe.Pointer(&key))
- *inserti = top
+ insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*8)
+ // store new key at insert position
+ *(*uint64)(insertk) = key
+
h.count++
done:
+ val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.valuesize))
if h.flags&hashWriting == 0 {
throw("concurrent map writes")
}
@@ -595,48 +605,131 @@
return val
}
-func mapassign_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
+func mapassign_fast64ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
if h == nil {
panic(plainError("assignment to entry in nil map"))
}
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer(&t))
- racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_faststr))
+ callerpc := getcallerpc()
+ racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast64))
}
if h.flags&hashWriting != 0 {
throw("concurrent map writes")
}
- key := stringStructOf(&ky)
- hash := t.key.alg.hash(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
+ hash := t.key.alg.hash(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
// Set hashWriting after calling alg.hash for consistency with mapassign.
h.flags |= hashWriting
if h.buckets == nil {
- h.buckets = newarray(t.bucket, 1)
+ h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
}
again:
- bucket := hash & (uintptr(1)<<h.B - 1)
+ bucket := hash & bucketMask(h.B)
if h.growing() {
- growWork(t, h, bucket)
+ growWork_fast64(t, h, bucket)
}
b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
+
+ var insertb *bmap
+ var inserti uintptr
+ var insertk unsafe.Pointer
+
+ for {
+ for i := uintptr(0); i < bucketCnt; i++ {
+ if b.tophash[i] == empty {
+ if insertb == nil {
+ insertb = b
+ inserti = i
+ }
+ continue
+ }
+ k := *((*unsafe.Pointer)(add(unsafe.Pointer(b), dataOffset+i*8)))
+ if k != key {
+ continue
+ }
+ insertb = b
+ inserti = i
+ goto done
+ }
+ ovf := b.overflow(t)
+ if ovf == nil {
+ break
+ }
+ b = ovf
}
- var inserti *uint8
+ // Did not find mapping for key. Allocate new cell & add entry.
+
+ // If we hit the max load factor or we have too many overflow buckets,
+ // and we're not already in the middle of growing, start growing.
+ if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+ hashGrow(t, h)
+ goto again // Growing the table invalidates everything, so try again
+ }
+
+ if insertb == nil {
+ // all current buckets are full, allocate a new one.
+ insertb = h.newoverflow(t, b)
+ inserti = 0 // not necessary, but avoids needlessly spilling inserti
+ }
+ insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
+
+ insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*8)
+ // store new key at insert position
+ *(*unsafe.Pointer)(insertk) = key
+
+ h.count++
+
+done:
+ val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.valuesize))
+ if h.flags&hashWriting == 0 {
+ throw("concurrent map writes")
+ }
+ h.flags &^= hashWriting
+ return val
+}
+
+func mapassign_faststr(t *maptype, h *hmap, s string) unsafe.Pointer {
+ if h == nil {
+ panic(plainError("assignment to entry in nil map"))
+ }
+ if raceenabled {
+ callerpc := getcallerpc()
+ racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_faststr))
+ }
+ if h.flags&hashWriting != 0 {
+ throw("concurrent map writes")
+ }
+ key := stringStructOf(&s)
+ hash := t.key.alg.hash(noescape(unsafe.Pointer(&s)), uintptr(h.hash0))
+
+ // Set hashWriting after calling alg.hash for consistency with mapassign.
+ h.flags |= hashWriting
+
+ if h.buckets == nil {
+ h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
+ }
+
+again:
+ bucket := hash & bucketMask(h.B)
+ if h.growing() {
+ growWork_faststr(t, h, bucket)
+ }
+ b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
+ top := tophash(hash)
+
+ var insertb *bmap
+ var inserti uintptr
var insertk unsafe.Pointer
- var val unsafe.Pointer
+
for {
for i := uintptr(0); i < bucketCnt; i++ {
if b.tophash[i] != top {
- if b.tophash[i] == empty && inserti == nil {
- inserti = &b.tophash[i]
- insertk = add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
- val = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
+ if b.tophash[i] == empty && insertb == nil {
+ insertb = b
+ inserti = i
}
continue
}
@@ -648,7 +741,8 @@
continue
}
// already have a mapping for key. Update it.
- val = add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
+ inserti = i
+ insertb = b
goto done
}
ovf := b.overflow(t)
@@ -662,25 +756,25 @@
// If we hit the max load factor or we have too many overflow buckets,
// and we're not already in the middle of growing, start growing.
- if !h.growing() && (overLoadFactor(int64(h.count), h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+ if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
hashGrow(t, h)
goto again // Growing the table invalidates everything, so try again
}
- if inserti == nil {
+ if insertb == nil {
// all current buckets are full, allocate a new one.
- newb := h.newoverflow(t, b)
- inserti = &newb.tophash[0]
- insertk = add(unsafe.Pointer(newb), dataOffset)
- val = add(insertk, bucketCnt*2*sys.PtrSize)
+ insertb = h.newoverflow(t, b)
+ inserti = 0 // not necessary, but avoids needlessly spilling inserti
}
+ insertb.tophash[inserti&(bucketCnt-1)] = top // mask inserti to avoid bounds checks
- // store new key/value at insert position
+ insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*2*sys.PtrSize)
+ // store new key at insert position
*((*stringStruct)(insertk)) = *key
- *inserti = top
h.count++
done:
+ val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*2*sys.PtrSize+inserti*uintptr(t.valuesize))
if h.flags&hashWriting == 0 {
throw("concurrent map writes")
}
@@ -690,7 +784,7 @@
func mapdelete_fast32(t *maptype, h *hmap, key uint32) {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer(&t))
+ callerpc := getcallerpc()
racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_fast32))
}
if h == nil || h.count == 0 {
@@ -705,38 +799,32 @@
// Set hashWriting after calling alg.hash for consistency with mapdelete
h.flags |= hashWriting
- bucket := hash & (uintptr(1)<<h.B - 1)
+ bucket := hash & bucketMask(h.B)
if h.growing() {
- growWork(t, h, bucket)
+ growWork_fast32(t, h, bucket)
}
- b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
- for {
- for i := uintptr(0); i < bucketCnt; i++ {
- if b.tophash[i] != top {
+ b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
+search:
+ for ; b != nil; b = b.overflow(t) {
+ for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) {
+ if key != *(*uint32)(k) || b.tophash[i] == empty {
continue
}
- k := (*uint32)(add(unsafe.Pointer(b), dataOffset+i*4))
- if key != *k {
- continue
+ // Only clear key if there are pointers in it.
+ if t.key.kind&kindNoPointers == 0 {
+ memclrHasPointers(k, t.key.size)
}
- typedmemclr(t.key, unsafe.Pointer(k))
- v := unsafe.Pointer(uintptr(unsafe.Pointer(b)) + dataOffset + bucketCnt*4 + i*uintptr(t.valuesize))
- typedmemclr(t.elem, v)
+ // Only clear value if there are pointers in it.
+ if t.elem.kind&kindNoPointers == 0 {
+ v := add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize))
+ memclrHasPointers(v, t.elem.size)
+ }
b.tophash[i] = empty
h.count--
- goto done
- }
- b = b.overflow(t)
- if b == nil {
- goto done
+ break search
}
}
-done:
if h.flags&hashWriting == 0 {
throw("concurrent map writes")
}
@@ -745,7 +833,7 @@
func mapdelete_fast64(t *maptype, h *hmap, key uint64) {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer(&t))
+ callerpc := getcallerpc()
racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_fast64))
}
if h == nil || h.count == 0 {
@@ -760,38 +848,32 @@
// Set hashWriting after calling alg.hash for consistency with mapdelete
h.flags |= hashWriting
- bucket := hash & (uintptr(1)<<h.B - 1)
+ bucket := hash & bucketMask(h.B)
if h.growing() {
- growWork(t, h, bucket)
+ growWork_fast64(t, h, bucket)
}
- b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
- for {
- for i := uintptr(0); i < bucketCnt; i++ {
- if b.tophash[i] != top {
+ b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
+search:
+ for ; b != nil; b = b.overflow(t) {
+ for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) {
+ if key != *(*uint64)(k) || b.tophash[i] == empty {
continue
}
- k := (*uint64)(add(unsafe.Pointer(b), dataOffset+i*8))
- if key != *k {
- continue
+ // Only clear key if there are pointers in it.
+ if t.key.kind&kindNoPointers == 0 {
+ memclrHasPointers(k, t.key.size)
}
- typedmemclr(t.key, unsafe.Pointer(k))
- v := unsafe.Pointer(uintptr(unsafe.Pointer(b)) + dataOffset + bucketCnt*8 + i*uintptr(t.valuesize))
- typedmemclr(t.elem, v)
+ // Only clear value if there are pointers in it.
+ if t.elem.kind&kindNoPointers == 0 {
+ v := add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize))
+ memclrHasPointers(v, t.elem.size)
+ }
b.tophash[i] = empty
h.count--
- goto done
- }
- b = b.overflow(t)
- if b == nil {
- goto done
+ break search
}
}
-done:
if h.flags&hashWriting == 0 {
throw("concurrent map writes")
}
@@ -800,7 +882,7 @@
func mapdelete_faststr(t *maptype, h *hmap, ky string) {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer(&t))
+ callerpc := getcallerpc()
racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_faststr))
}
if h == nil || h.count == 0 {
@@ -816,43 +898,340 @@
// Set hashWriting after calling alg.hash for consistency with mapdelete
h.flags |= hashWriting
- bucket := hash & (uintptr(1)<<h.B - 1)
+ bucket := hash & bucketMask(h.B)
if h.growing() {
- growWork(t, h, bucket)
+ growWork_faststr(t, h, bucket)
}
- b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
- for {
- for i := uintptr(0); i < bucketCnt; i++ {
- if b.tophash[i] != top {
- continue
- }
- k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize))
- if k.len != key.len {
+ b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
+ top := tophash(hash)
+search:
+ for ; b != nil; b = b.overflow(t) {
+ for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+ k := (*stringStruct)(kptr)
+ if k.len != key.len || b.tophash[i] != top {
continue
}
if k.str != key.str && !memequal(k.str, key.str, uintptr(key.len)) {
continue
}
- typedmemclr(t.key, unsafe.Pointer(k))
- v := unsafe.Pointer(uintptr(unsafe.Pointer(b)) + dataOffset + bucketCnt*2*sys.PtrSize + i*uintptr(t.valuesize))
- typedmemclr(t.elem, v)
+ // Clear key's pointer.
+ k.str = nil
+ // Only clear value if there are pointers in it.
+ if t.elem.kind&kindNoPointers == 0 {
+ v := add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
+ memclrHasPointers(v, t.elem.size)
+ }
b.tophash[i] = empty
h.count--
- goto done
- }
- b = b.overflow(t)
- if b == nil {
- goto done
+ break search
}
}
-done:
if h.flags&hashWriting == 0 {
throw("concurrent map writes")
}
h.flags &^= hashWriting
}
+
+func growWork_fast32(t *maptype, h *hmap, bucket uintptr) {
+ // make sure we evacuate the oldbucket corresponding
+ // to the bucket we're about to use
+ evacuate_fast32(t, h, bucket&h.oldbucketmask())
+
+ // evacuate one more oldbucket to make progress on growing
+ if h.growing() {
+ evacuate_fast32(t, h, h.nevacuate)
+ }
+}
+
+func evacuate_fast32(t *maptype, h *hmap, oldbucket uintptr) {
+ b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
+ newbit := h.noldbuckets()
+ if !evacuated(b) {
+ // TODO: reuse overflow buckets instead of using new ones, if there
+ // is no iterator using the old buckets. (If !oldIterator.)
+
+ // xy contains the x and y (low and high) evacuation destinations.
+ var xy [2]evacDst
+ x := &xy[0]
+ x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
+ x.k = add(unsafe.Pointer(x.b), dataOffset)
+ x.v = add(x.k, bucketCnt*4)
+
+ if !h.sameSizeGrow() {
+ // Only calculate y pointers if we're growing bigger.
+ // Otherwise GC can see bad pointers.
+ y := &xy[1]
+ y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
+ y.k = add(unsafe.Pointer(y.b), dataOffset)
+ y.v = add(y.k, bucketCnt*4)
+ }
+
+ for ; b != nil; b = b.overflow(t) {
+ k := add(unsafe.Pointer(b), dataOffset)
+ v := add(k, bucketCnt*4)
+ for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 4), add(v, uintptr(t.valuesize)) {
+ top := b.tophash[i]
+ if top == empty {
+ b.tophash[i] = evacuatedEmpty
+ continue
+ }
+ if top < minTopHash {
+ throw("bad map state")
+ }
+ var useY uint8
+ if !h.sameSizeGrow() {
+ // Compute hash to make our evacuation decision (whether we need
+ // to send this key/value to bucket x or bucket y).
+ hash := t.key.alg.hash(k, uintptr(h.hash0))
+ if hash&newbit != 0 {
+ useY = 1
+ }
+ }
+
+ b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
+ dst := &xy[useY] // evacuation destination
+
+ if dst.i == bucketCnt {
+ dst.b = h.newoverflow(t, dst.b)
+ dst.i = 0
+ dst.k = add(unsafe.Pointer(dst.b), dataOffset)
+ dst.v = add(dst.k, bucketCnt*4)
+ }
+ dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
+
+ // Copy key.
+ if sys.PtrSize == 4 && t.key.kind&kindNoPointers == 0 && writeBarrier.enabled {
+ writebarrierptr((*uintptr)(dst.k), *(*uintptr)(k))
+ } else {
+ *(*uint32)(dst.k) = *(*uint32)(k)
+ }
+
+ typedmemmove(t.elem, dst.v, v)
+ dst.i++
+ // These updates might push these pointers past the end of the
+ // key or value arrays. That's ok, as we have the overflow pointer
+ // at the end of the bucket to protect against pointing past the
+ // end of the bucket.
+ dst.k = add(dst.k, 4)
+ dst.v = add(dst.v, uintptr(t.valuesize))
+ }
+ }
+ // Unlink the overflow buckets & clear key/value to help GC.
+ if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
+ b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
+ // Preserve b.tophash because the evacuation
+ // state is maintained there.
+ ptr := add(b, dataOffset)
+ n := uintptr(t.bucketsize) - dataOffset
+ memclrHasPointers(ptr, n)
+ }
+ }
+
+ if oldbucket == h.nevacuate {
+ advanceEvacuationMark(h, t, newbit)
+ }
+}
+
+func growWork_fast64(t *maptype, h *hmap, bucket uintptr) {
+ // make sure we evacuate the oldbucket corresponding
+ // to the bucket we're about to use
+ evacuate_fast64(t, h, bucket&h.oldbucketmask())
+
+ // evacuate one more oldbucket to make progress on growing
+ if h.growing() {
+ evacuate_fast64(t, h, h.nevacuate)
+ }
+}
+
+func evacuate_fast64(t *maptype, h *hmap, oldbucket uintptr) {
+ b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
+ newbit := h.noldbuckets()
+ if !evacuated(b) {
+ // TODO: reuse overflow buckets instead of using new ones, if there
+ // is no iterator using the old buckets. (If !oldIterator.)
+
+ // xy contains the x and y (low and high) evacuation destinations.
+ var xy [2]evacDst
+ x := &xy[0]
+ x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
+ x.k = add(unsafe.Pointer(x.b), dataOffset)
+ x.v = add(x.k, bucketCnt*8)
+
+ if !h.sameSizeGrow() {
+ // Only calculate y pointers if we're growing bigger.
+ // Otherwise GC can see bad pointers.
+ y := &xy[1]
+ y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
+ y.k = add(unsafe.Pointer(y.b), dataOffset)
+ y.v = add(y.k, bucketCnt*8)
+ }
+
+ for ; b != nil; b = b.overflow(t) {
+ k := add(unsafe.Pointer(b), dataOffset)
+ v := add(k, bucketCnt*8)
+ for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 8), add(v, uintptr(t.valuesize)) {
+ top := b.tophash[i]
+ if top == empty {
+ b.tophash[i] = evacuatedEmpty
+ continue
+ }
+ if top < minTopHash {
+ throw("bad map state")
+ }
+ var useY uint8
+ if !h.sameSizeGrow() {
+ // Compute hash to make our evacuation decision (whether we need
+ // to send this key/value to bucket x or bucket y).
+ hash := t.key.alg.hash(k, uintptr(h.hash0))
+ if hash&newbit != 0 {
+ useY = 1
+ }
+ }
+
+ b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
+ dst := &xy[useY] // evacuation destination
+
+ if dst.i == bucketCnt {
+ dst.b = h.newoverflow(t, dst.b)
+ dst.i = 0
+ dst.k = add(unsafe.Pointer(dst.b), dataOffset)
+ dst.v = add(dst.k, bucketCnt*8)
+ }
+ dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
+
+ // Copy key.
+ if t.key.kind&kindNoPointers == 0 && writeBarrier.enabled {
+ if sys.PtrSize == 8 {
+ writebarrierptr((*uintptr)(dst.k), *(*uintptr)(k))
+ } else {
+ // There are three ways to squeeze at least one 32 bit pointer into 64 bits.
+ // Give up and call typedmemmove.
+ typedmemmove(t.key, dst.k, k)
+ }
+ } else {
+ *(*uint64)(dst.k) = *(*uint64)(k)
+ }
+
+ typedmemmove(t.elem, dst.v, v)
+ dst.i++
+ // These updates might push these pointers past the end of the
+ // key or value arrays. That's ok, as we have the overflow pointer
+ // at the end of the bucket to protect against pointing past the
+ // end of the bucket.
+ dst.k = add(dst.k, 8)
+ dst.v = add(dst.v, uintptr(t.valuesize))
+ }
+ }
+ // Unlink the overflow buckets & clear key/value to help GC.
+ if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
+ b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
+ // Preserve b.tophash because the evacuation
+ // state is maintained there.
+ ptr := add(b, dataOffset)
+ n := uintptr(t.bucketsize) - dataOffset
+ memclrHasPointers(ptr, n)
+ }
+ }
+
+ if oldbucket == h.nevacuate {
+ advanceEvacuationMark(h, t, newbit)
+ }
+}
+
+func growWork_faststr(t *maptype, h *hmap, bucket uintptr) {
+ // make sure we evacuate the oldbucket corresponding
+ // to the bucket we're about to use
+ evacuate_faststr(t, h, bucket&h.oldbucketmask())
+
+ // evacuate one more oldbucket to make progress on growing
+ if h.growing() {
+ evacuate_faststr(t, h, h.nevacuate)
+ }
+}
+
+func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) {
+ b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
+ newbit := h.noldbuckets()
+ if !evacuated(b) {
+ // TODO: reuse overflow buckets instead of using new ones, if there
+ // is no iterator using the old buckets. (If !oldIterator.)
+
+ // xy contains the x and y (low and high) evacuation destinations.
+ var xy [2]evacDst
+ x := &xy[0]
+ x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
+ x.k = add(unsafe.Pointer(x.b), dataOffset)
+ x.v = add(x.k, bucketCnt*2*sys.PtrSize)
+
+ if !h.sameSizeGrow() {
+ // Only calculate y pointers if we're growing bigger.
+ // Otherwise GC can see bad pointers.
+ y := &xy[1]
+ y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
+ y.k = add(unsafe.Pointer(y.b), dataOffset)
+ y.v = add(y.k, bucketCnt*2*sys.PtrSize)
+ }
+
+ for ; b != nil; b = b.overflow(t) {
+ k := add(unsafe.Pointer(b), dataOffset)
+ v := add(k, bucketCnt*2*sys.PtrSize)
+ for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 2*sys.PtrSize), add(v, uintptr(t.valuesize)) {
+ top := b.tophash[i]
+ if top == empty {
+ b.tophash[i] = evacuatedEmpty
+ continue
+ }
+ if top < minTopHash {
+ throw("bad map state")
+ }
+ var useY uint8
+ if !h.sameSizeGrow() {
+ // Compute hash to make our evacuation decision (whether we need
+ // to send this key/value to bucket x or bucket y).
+ hash := t.key.alg.hash(k, uintptr(h.hash0))
+ if hash&newbit != 0 {
+ useY = 1
+ }
+ }
+
+ b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
+ dst := &xy[useY] // evacuation destination
+
+ if dst.i == bucketCnt {
+ dst.b = h.newoverflow(t, dst.b)
+ dst.i = 0
+ dst.k = add(unsafe.Pointer(dst.b), dataOffset)
+ dst.v = add(dst.k, bucketCnt*2*sys.PtrSize)
+ }
+ dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
+
+ // Copy key.
+ *(*string)(dst.k) = *(*string)(k)
+
+ typedmemmove(t.elem, dst.v, v)
+ dst.i++
+ // These updates might push these pointers past the end of the
+ // key or value arrays. That's ok, as we have the overflow pointer
+ // at the end of the bucket to protect against pointing past the
+ // end of the bucket.
+ dst.k = add(dst.k, 2*sys.PtrSize)
+ dst.v = add(dst.v, uintptr(t.valuesize))
+ }
+ }
+ // Unlink the overflow buckets & clear key/value to help GC.
+ // Unlink the overflow buckets & clear key/value to help GC.
+ if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
+ b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
+ // Preserve b.tophash because the evacuation
+ // state is maintained there.
+ ptr := add(b, dataOffset)
+ n := uintptr(t.bucketsize) - dataOffset
+ memclrHasPointers(ptr, n)
+ }
+ }
+
+ if oldbucket == h.nevacuate {
+ advanceEvacuationMark(h, t, newbit)
+ }
+}
diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go
index 35f6124..2b51758 100644
--- a/src/runtime/heapdump.go
+++ b/src/runtime/heapdump.go
@@ -200,7 +200,6 @@
// dump an object
func dumpobj(obj unsafe.Pointer, size uintptr, bv bitvector) {
- dumpbvtypes(&bv, obj)
dumpint(tagObject)
dumpint(uint64(uintptr(obj)))
dumpmemrange(obj, size)
@@ -261,14 +260,9 @@
}
stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
- // Dump any types we will need to resolve Efaces.
- if child.args.n >= 0 {
- dumpbvtypes(&child.args, unsafe.Pointer(s.sp+child.argoff))
- }
var bv bitvector
if stkmap != nil && stkmap.n > 0 {
bv = stackmapdata(stkmap, pcdata)
- dumpbvtypes(&bv, unsafe.Pointer(s.varp-uintptr(bv.n*sys.PtrSize)))
} else {
bv.n = -1
}
@@ -423,14 +417,12 @@
func dumproots() {
// TODO(mwhudson): dump datamask etc from all objects
// data segment
- dumpbvtypes(&firstmoduledata.gcdatamask, unsafe.Pointer(firstmoduledata.data))
dumpint(tagData)
dumpint(uint64(firstmoduledata.data))
dumpmemrange(unsafe.Pointer(firstmoduledata.data), firstmoduledata.edata-firstmoduledata.data)
dumpfields(firstmoduledata.gcdatamask)
// bss segment
- dumpbvtypes(&firstmoduledata.gcbssmask, unsafe.Pointer(firstmoduledata.bss))
dumpint(tagBSS)
dumpint(uint64(firstmoduledata.bss))
dumpmemrange(unsafe.Pointer(firstmoduledata.bss), firstmoduledata.ebss-firstmoduledata.bss)
@@ -677,16 +669,6 @@
dumpint(fieldKindEol)
}
-// The heap dump reader needs to be able to disambiguate
-// Eface entries. So it needs to know every type that might
-// appear in such an entry. The following routine accomplishes that.
-// TODO(rsc, khr): Delete - no longer possible.
-
-// Dump all the types that appear in the type field of
-// any Eface described by this bit vector.
-func dumpbvtypes(bv *bitvector, base unsafe.Pointer) {
-}
-
func makeheapobjbv(p uintptr, size uintptr) bitvector {
// Extend the temp buffer if necessary.
nptr := size / sys.PtrSize
diff --git a/src/runtime/iface.go b/src/runtime/iface.go
index 58ed61e..7c5d3a0 100644
--- a/src/runtime/iface.go
+++ b/src/runtime/iface.go
@@ -10,21 +10,24 @@
"unsafe"
)
-const (
- hashSize = 1009
-)
+const itabInitSize = 512
var (
- ifaceLock mutex // lock for accessing hash
- hash [hashSize]*itab
+ itabLock mutex // lock for accessing itab table
+ itabTable = &itabTableInit // pointer to current table
+ itabTableInit = itabTableType{size: itabInitSize} // starter table
)
-func itabhash(inter *interfacetype, typ *_type) uint32 {
+//Note: change the formula in the mallocgc call in itabAdd if you change these fields.
+type itabTableType struct {
+ size uintptr // length of entries array. Always a power of 2.
+ count uintptr // current number of filled entries.
+ entries [itabInitSize]*itab // really [size] large
+}
+
+func itabHashFunc(inter *interfacetype, typ *_type) uintptr {
// compiler has provided some good hash codes for us.
- h := inter.typ.hash
- h += 17 * typ.hash
- // TODO(rsc): h += 23 * x.mhash ?
- return h % hashSize
+ return uintptr(inter.typ.hash ^ typ.hash)
}
func getitab(inter *interfacetype, typ *_type, canfail bool) *itab {
@@ -41,50 +44,137 @@
panic(&TypeAssertionError{"", typ.string(), inter.typ.string(), name.name()})
}
- h := itabhash(inter, typ)
-
- // look twice - once without lock, once with.
- // common case will be no lock contention.
var m *itab
- var locked int
- for locked = 0; locked < 2; locked++ {
- if locked != 0 {
- lock(&ifaceLock)
- }
- for m = (*itab)(atomic.Loadp(unsafe.Pointer(&hash[h]))); m != nil; m = m.link {
- if m.inter == inter && m._type == typ {
- if m.bad {
- if !canfail {
- // this can only happen if the conversion
- // was already done once using the , ok form
- // and we have a cached negative result.
- // the cached result doesn't record which
- // interface function was missing, so try
- // adding the itab again, which will throw an error.
- additab(m, locked != 0, false)
- }
- m = nil
- }
- if locked != 0 {
- unlock(&ifaceLock)
- }
- return m
- }
- }
+
+ // First, look in the existing table to see if we can find the itab we need.
+ // This is by far the most common case, so do it without locks.
+ // Use atomic to ensure we see any previous writes done by the thread
+ // that updates the itabTable field (with atomic.Storep in itabAdd).
+ t := (*itabTableType)(atomic.Loadp(unsafe.Pointer(&itabTable)))
+ if m = t.find(inter, typ); m != nil {
+ goto finish
}
+ // Not found. Grab the lock and try again.
+ lock(&itabLock)
+ if m = itabTable.find(inter, typ); m != nil {
+ unlock(&itabLock)
+ goto finish
+ }
+
+ // Entry doesn't exist yet. Make a new entry & add it.
m = (*itab)(persistentalloc(unsafe.Sizeof(itab{})+uintptr(len(inter.mhdr)-1)*sys.PtrSize, 0, &memstats.other_sys))
m.inter = inter
m._type = typ
- additab(m, true, canfail)
- unlock(&ifaceLock)
- if m.bad {
+ m.init()
+ itabAdd(m)
+ unlock(&itabLock)
+finish:
+ if m.fun[0] != 0 {
+ return m
+ }
+ if canfail {
return nil
}
- return m
+ // this can only happen if the conversion
+ // was already done once using the , ok form
+ // and we have a cached negative result.
+ // The cached result doesn't record which
+ // interface function was missing, so initialize
+ // the itab again to get the missing function name.
+ panic(&TypeAssertionError{concreteString: typ.string(), assertedString: inter.typ.string(), missingMethod: m.init()})
}
-func additab(m *itab, locked, canfail bool) {
+// find finds the given interface/type pair in t.
+// Returns nil if the given interface/type pair isn't present.
+func (t *itabTableType) find(inter *interfacetype, typ *_type) *itab {
+ // Implemented using quadratic probing.
+ // Probe sequence is h(i) = h0 + i*(i+1)/2 mod 2^k.
+ // We're guaranteed to hit all table entries using this probe sequence.
+ mask := t.size - 1
+ h := itabHashFunc(inter, typ) & mask
+ for i := uintptr(1); ; i++ {
+ p := (**itab)(add(unsafe.Pointer(&t.entries), h*sys.PtrSize))
+ // Use atomic read here so if we see m != nil, we also see
+ // the initializations of the fields of m.
+ // m := *p
+ m := (*itab)(atomic.Loadp(unsafe.Pointer(p)))
+ if m == nil {
+ return nil
+ }
+ if m.inter == inter && m._type == typ {
+ return m
+ }
+ h += i
+ h &= mask
+ }
+}
+
+// itabAdd adds the given itab to the itab hash table.
+// itabLock must be held.
+func itabAdd(m *itab) {
+ t := itabTable
+ if t.count >= 3*(t.size/4) { // 75% load factor
+ // Grow hash table.
+ // t2 = new(itabTableType) + some additional entries
+ // We lie and tell malloc we want pointer-free memory because
+ // all the pointed-to values are not in the heap.
+ t2 := (*itabTableType)(mallocgc((2+2*t.size)*sys.PtrSize, nil, true))
+ t2.size = t.size * 2
+
+ // Copy over entries.
+ // Note: while copying, other threads may look for an itab and
+ // fail to find it. That's ok, they will then try to get the itab lock
+ // and as a consequence wait until this copying is complete.
+ iterate_itabs(t2.add)
+ if t2.count != t.count {
+ throw("mismatched count during itab table copy")
+ }
+ // Publish new hash table. Use an atomic write: see comment in getitab.
+ atomicstorep(unsafe.Pointer(&itabTable), unsafe.Pointer(t2))
+ // Adopt the new table as our own.
+ t = itabTable
+ // Note: the old table can be GC'ed here.
+ }
+ t.add(m)
+}
+
+// add adds the given itab to itab table t.
+// itabLock must be held.
+func (t *itabTableType) add(m *itab) {
+ // See comment in find about the probe sequence.
+ // Insert new itab in the first empty spot in the probe sequence.
+ mask := t.size - 1
+ h := itabHashFunc(m.inter, m._type) & mask
+ for i := uintptr(1); ; i++ {
+ p := (**itab)(add(unsafe.Pointer(&t.entries), h*sys.PtrSize))
+ m2 := *p
+ if m2 == m {
+ // A given itab may be used in more than one module
+ // and thanks to the way global symbol resolution works, the
+ // pointed-to itab may already have been inserted into the
+ // global 'hash'.
+ return
+ }
+ if m2 == nil {
+ // Use atomic write here so if a reader sees m, it also
+ // sees the correctly initialized fields of m.
+ // NoWB is ok because m is not in heap memory.
+ // *p = m
+ atomic.StorepNoWB(unsafe.Pointer(p), unsafe.Pointer(m))
+ t.count++
+ return
+ }
+ h += i
+ h &= mask
+ }
+}
+
+// init fills in the m.fun array with all the code pointers for
+// the m.inter/m._type pair. If the type does not implement the interface,
+// it sets m.fun[0] to 0 and returns the name of an interface function that is missing.
+// It is ok to call this multiple times on the same m, even concurrently.
+func (m *itab) init() string {
inter := m.inter
typ := m._type
x := typ.uncommon()
@@ -97,6 +187,7 @@
nt := int(x.mcount)
xmhdr := (*[1 << 16]method)(add(unsafe.Pointer(x), uintptr(x.moff)))[:nt:nt]
j := 0
+imethods:
for k := 0; k < ni; k++ {
i := &inter.mhdr[k]
itype := inter.typ.typeOff(i.ityp)
@@ -119,45 +210,26 @@
ifn := typ.textOff(t.ifn)
*(*unsafe.Pointer)(add(unsafe.Pointer(&m.fun[0]), uintptr(k)*sys.PtrSize)) = ifn
}
- goto nextimethod
+ continue imethods
}
}
}
// didn't find method
- if !canfail {
- if locked {
- unlock(&ifaceLock)
- }
- panic(&TypeAssertionError{"", typ.string(), inter.typ.string(), iname})
- }
- m.bad = true
- break
- nextimethod:
+ m.fun[0] = 0
+ return iname
}
- if !locked {
- throw("invalid itab locking")
- }
- h := itabhash(inter, typ)
- m.link = hash[h]
- m.inhash = true
- atomicstorep(unsafe.Pointer(&hash[h]), unsafe.Pointer(m))
+ m.hash = typ.hash
+ return ""
}
func itabsinit() {
- lock(&ifaceLock)
+ lock(&itabLock)
for _, md := range activeModules() {
for _, i := range md.itablinks {
- // itablinks is a slice of pointers to the itabs used in this
- // module. A given itab may be used in more than one module
- // and thanks to the way global symbol resolution works, the
- // pointed-to itab may already have been inserted into the
- // global 'hash'.
- if !i.inhash {
- additab(i, true, false)
- }
+ itabAdd(i)
}
}
- unlock(&ifaceLock)
+ unlock(&itabLock)
}
// panicdottypeE is called when doing an e.(T) conversion and the conversion fails.
@@ -200,7 +272,7 @@
func convT2E(t *_type, elem unsafe.Pointer) (e eface) {
if raceenabled {
- raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2E))
+ raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2E))
}
if msanenabled {
msanread(elem, t.size)
@@ -216,7 +288,7 @@
func convT2E16(t *_type, elem unsafe.Pointer) (e eface) {
if raceenabled {
- raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2E16))
+ raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2E16))
}
if msanenabled {
msanread(elem, t.size)
@@ -235,7 +307,7 @@
func convT2E32(t *_type, elem unsafe.Pointer) (e eface) {
if raceenabled {
- raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2E32))
+ raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2E32))
}
if msanenabled {
msanread(elem, t.size)
@@ -254,7 +326,7 @@
func convT2E64(t *_type, elem unsafe.Pointer) (e eface) {
if raceenabled {
- raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2E64))
+ raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2E64))
}
if msanenabled {
msanread(elem, t.size)
@@ -273,7 +345,7 @@
func convT2Estring(t *_type, elem unsafe.Pointer) (e eface) {
if raceenabled {
- raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2Estring))
+ raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2Estring))
}
if msanenabled {
msanread(elem, t.size)
@@ -292,7 +364,7 @@
func convT2Eslice(t *_type, elem unsafe.Pointer) (e eface) {
if raceenabled {
- raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2Eslice))
+ raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2Eslice))
}
if msanenabled {
msanread(elem, t.size)
@@ -311,7 +383,7 @@
func convT2Enoptr(t *_type, elem unsafe.Pointer) (e eface) {
if raceenabled {
- raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2Enoptr))
+ raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2Enoptr))
}
if msanenabled {
msanread(elem, t.size)
@@ -326,7 +398,7 @@
func convT2I(tab *itab, elem unsafe.Pointer) (i iface) {
t := tab._type
if raceenabled {
- raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&tab)), funcPC(convT2I))
+ raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2I))
}
if msanenabled {
msanread(elem, t.size)
@@ -341,7 +413,7 @@
func convT2I16(tab *itab, elem unsafe.Pointer) (i iface) {
t := tab._type
if raceenabled {
- raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&tab)), funcPC(convT2I16))
+ raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2I16))
}
if msanenabled {
msanread(elem, t.size)
@@ -361,7 +433,7 @@
func convT2I32(tab *itab, elem unsafe.Pointer) (i iface) {
t := tab._type
if raceenabled {
- raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&tab)), funcPC(convT2I32))
+ raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2I32))
}
if msanenabled {
msanread(elem, t.size)
@@ -381,7 +453,7 @@
func convT2I64(tab *itab, elem unsafe.Pointer) (i iface) {
t := tab._type
if raceenabled {
- raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&tab)), funcPC(convT2I64))
+ raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2I64))
}
if msanenabled {
msanread(elem, t.size)
@@ -401,7 +473,7 @@
func convT2Istring(tab *itab, elem unsafe.Pointer) (i iface) {
t := tab._type
if raceenabled {
- raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&tab)), funcPC(convT2Istring))
+ raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2Istring))
}
if msanenabled {
msanread(elem, t.size)
@@ -421,7 +493,7 @@
func convT2Islice(tab *itab, elem unsafe.Pointer) (i iface) {
t := tab._type
if raceenabled {
- raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&tab)), funcPC(convT2Islice))
+ raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2Islice))
}
if msanenabled {
msanread(elem, t.size)
@@ -441,7 +513,7 @@
func convT2Inoptr(tab *itab, elem unsafe.Pointer) (i iface) {
t := tab._type
if raceenabled {
- raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&tab)), funcPC(convT2Inoptr))
+ raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2Inoptr))
}
if msanenabled {
msanread(elem, t.size)
@@ -533,9 +605,13 @@
}
func iterate_itabs(fn func(*itab)) {
- for _, h := range &hash {
- for ; h != nil; h = h.link {
- fn(h)
+ // Note: only runs during stop the world or with itabLock held,
+ // so no other locks/atomics needed.
+ t := itabTable
+ for i := uintptr(0); i < t.size; i++ {
+ m := *(**itab)(add(unsafe.Pointer(&t.entries), i*sys.PtrSize))
+ if m != nil {
+ fn(m)
}
}
}
diff --git a/src/runtime/internal/atomic/atomic_test.go b/src/runtime/internal/atomic/atomic_test.go
index 879a82f..b697aa8 100644
--- a/src/runtime/internal/atomic/atomic_test.go
+++ b/src/runtime/internal/atomic/atomic_test.go
@@ -52,7 +52,7 @@
// Tests that xadduintptr correctly updates 64-bit values. The place where
// we actually do so is mstats.go, functions mSysStat{Inc,Dec}.
func TestXadduintptrOnUint64(t *testing.T) {
- if sys.BigEndian != 0 {
+ if sys.BigEndian {
// On big endian architectures, we never use xadduintptr to update
// 64-bit values and hence we skip the test. (Note that functions
// mSysStat{Inc,Dec} in mstats.go have explicit checks for
diff --git a/src/runtime/internal/sys/arch_386.go b/src/runtime/internal/sys/arch_386.go
index 61d6722..5fb1fba 100644
--- a/src/runtime/internal/sys/arch_386.go
+++ b/src/runtime/internal/sys/arch_386.go
@@ -6,7 +6,7 @@
const (
ArchFamily = I386
- BigEndian = 0
+ BigEndian = false
CacheLineSize = 64
DefaultPhysPageSize = GoosNacl*65536 + (1-GoosNacl)*4096 // 4k normally; 64k on NaCl
PCQuantum = 1
diff --git a/src/runtime/internal/sys/arch_amd64.go b/src/runtime/internal/sys/arch_amd64.go
index 1f2114a..2f32bc4 100644
--- a/src/runtime/internal/sys/arch_amd64.go
+++ b/src/runtime/internal/sys/arch_amd64.go
@@ -6,7 +6,7 @@
const (
ArchFamily = AMD64
- BigEndian = 0
+ BigEndian = false
CacheLineSize = 64
DefaultPhysPageSize = 4096
PCQuantum = 1
diff --git a/src/runtime/internal/sys/arch_amd64p32.go b/src/runtime/internal/sys/arch_amd64p32.go
index 0779855..c560907 100644
--- a/src/runtime/internal/sys/arch_amd64p32.go
+++ b/src/runtime/internal/sys/arch_amd64p32.go
@@ -6,7 +6,7 @@
const (
ArchFamily = AMD64
- BigEndian = 0
+ BigEndian = false
CacheLineSize = 64
DefaultPhysPageSize = 65536*GoosNacl + 4096*(1-GoosNacl)
PCQuantum = 1
diff --git a/src/runtime/internal/sys/arch_arm.go b/src/runtime/internal/sys/arch_arm.go
index 899010b..f383d82 100644
--- a/src/runtime/internal/sys/arch_arm.go
+++ b/src/runtime/internal/sys/arch_arm.go
@@ -6,7 +6,7 @@
const (
ArchFamily = ARM
- BigEndian = 0
+ BigEndian = false
CacheLineSize = 32
DefaultPhysPageSize = 65536
PCQuantum = 4
diff --git a/src/runtime/internal/sys/arch_arm64.go b/src/runtime/internal/sys/arch_arm64.go
index 2d57dda..cb83ecc 100644
--- a/src/runtime/internal/sys/arch_arm64.go
+++ b/src/runtime/internal/sys/arch_arm64.go
@@ -6,8 +6,8 @@
const (
ArchFamily = ARM64
- BigEndian = 0
- CacheLineSize = 32
+ BigEndian = false
+ CacheLineSize = 64
DefaultPhysPageSize = 65536
PCQuantum = 4
Int64Align = 8
diff --git a/src/runtime/internal/sys/arch_mips.go b/src/runtime/internal/sys/arch_mips.go
index 65fc4f8..e12f32d 100644
--- a/src/runtime/internal/sys/arch_mips.go
+++ b/src/runtime/internal/sys/arch_mips.go
@@ -6,7 +6,7 @@
const (
ArchFamily = MIPS
- BigEndian = 1
+ BigEndian = true
CacheLineSize = 32
DefaultPhysPageSize = 65536
PCQuantum = 4
diff --git a/src/runtime/internal/sys/arch_mips64.go b/src/runtime/internal/sys/arch_mips64.go
index 0f6de74..973ec10 100644
--- a/src/runtime/internal/sys/arch_mips64.go
+++ b/src/runtime/internal/sys/arch_mips64.go
@@ -6,7 +6,7 @@
const (
ArchFamily = MIPS64
- BigEndian = 1
+ BigEndian = true
CacheLineSize = 32
DefaultPhysPageSize = 16384
PCQuantum = 4
diff --git a/src/runtime/internal/sys/arch_mips64le.go b/src/runtime/internal/sys/arch_mips64le.go
index 4ced35b..e96d962 100644
--- a/src/runtime/internal/sys/arch_mips64le.go
+++ b/src/runtime/internal/sys/arch_mips64le.go
@@ -6,7 +6,7 @@
const (
ArchFamily = MIPS64
- BigEndian = 0
+ BigEndian = false
CacheLineSize = 32
DefaultPhysPageSize = 16384
PCQuantum = 4
diff --git a/src/runtime/internal/sys/arch_mipsle.go b/src/runtime/internal/sys/arch_mipsle.go
index 33e9764..25742ae 100644
--- a/src/runtime/internal/sys/arch_mipsle.go
+++ b/src/runtime/internal/sys/arch_mipsle.go
@@ -6,7 +6,7 @@
const (
ArchFamily = MIPS
- BigEndian = 0
+ BigEndian = false
CacheLineSize = 32
DefaultPhysPageSize = 65536
PCQuantum = 4
diff --git a/src/runtime/internal/sys/arch_ppc64.go b/src/runtime/internal/sys/arch_ppc64.go
index 80595ee..a538bbd 100644
--- a/src/runtime/internal/sys/arch_ppc64.go
+++ b/src/runtime/internal/sys/arch_ppc64.go
@@ -6,7 +6,7 @@
const (
ArchFamily = PPC64
- BigEndian = 1
+ BigEndian = true
CacheLineSize = 128
DefaultPhysPageSize = 65536
PCQuantum = 4
diff --git a/src/runtime/internal/sys/arch_ppc64le.go b/src/runtime/internal/sys/arch_ppc64le.go
index f68e777..aa50689 100644
--- a/src/runtime/internal/sys/arch_ppc64le.go
+++ b/src/runtime/internal/sys/arch_ppc64le.go
@@ -6,7 +6,7 @@
const (
ArchFamily = PPC64
- BigEndian = 0
+ BigEndian = false
CacheLineSize = 128
DefaultPhysPageSize = 65536
PCQuantum = 4
diff --git a/src/runtime/internal/sys/arch_s390x.go b/src/runtime/internal/sys/arch_s390x.go
index 4ec4bf8..e42c420 100644
--- a/src/runtime/internal/sys/arch_s390x.go
+++ b/src/runtime/internal/sys/arch_s390x.go
@@ -6,7 +6,7 @@
const (
ArchFamily = S390X
- BigEndian = 1
+ BigEndian = true
CacheLineSize = 256
DefaultPhysPageSize = 4096
PCQuantum = 2
diff --git a/src/runtime/internal/sys/gengoos.go b/src/runtime/internal/sys/gengoos.go
index 4c45c0a..d078921 100644
--- a/src/runtime/internal/sys/gengoos.go
+++ b/src/runtime/internal/sys/gengoos.go
@@ -30,14 +30,14 @@
if strings.HasPrefix(line, goosPrefix) {
text, err := strconv.Unquote(strings.TrimPrefix(line, goosPrefix))
if err != nil {
- log.Fatalf("parsing goosList %#q: %v", strings.TrimPrefix(line, goosPrefix), err)
+ log.Fatalf("parsing goosList: %v", err)
}
gooses = strings.Fields(text)
}
if strings.HasPrefix(line, goarchPrefix) {
text, err := strconv.Unquote(strings.TrimPrefix(line, goarchPrefix))
if err != nil {
- log.Fatal("parsing goarchList: %v", err)
+ log.Fatalf("parsing goarchList: %v", err)
}
goarches = strings.Fields(text)
}
@@ -45,10 +45,11 @@
for _, target := range gooses {
var buf bytes.Buffer
- fmt.Fprintf(&buf, "// generated by gengoos.go using 'go generate'\n\n")
+ fmt.Fprintf(&buf, "// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.\n\n")
if target == "linux" {
- fmt.Fprintf(&buf, "// +build !android\n\n") // must explicitly exclude android for linux
+ fmt.Fprintf(&buf, "// +build !android\n") // must explicitly exclude android for linux
}
+ fmt.Fprintf(&buf, "// +build %s\n\n", target) // must explicitly include target for bootstrapping purposes
fmt.Fprintf(&buf, "package sys\n\n")
fmt.Fprintf(&buf, "const GOOS = `%s`\n\n", target)
for _, goos := range gooses {
@@ -66,7 +67,8 @@
for _, target := range goarches {
var buf bytes.Buffer
- fmt.Fprintf(&buf, "// generated by gengoos.go using 'go generate'\n\n")
+ fmt.Fprintf(&buf, "// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.\n\n")
+ fmt.Fprintf(&buf, "// +build %s\n\n", target) // must explicitly include target for bootstrapping purposes
fmt.Fprintf(&buf, "package sys\n\n")
fmt.Fprintf(&buf, "const GOARCH = `%s`\n\n", target)
for _, goarch := range goarches {
diff --git a/src/runtime/internal/sys/stubs.go b/src/runtime/internal/sys/stubs.go
index 0a94502..5328023 100644
--- a/src/runtime/internal/sys/stubs.go
+++ b/src/runtime/internal/sys/stubs.go
@@ -9,3 +9,5 @@
const PtrSize = 4 << (^uintptr(0) >> 63) // unsafe.Sizeof(uintptr(0)) but an ideal const
const RegSize = 4 << (^Uintreg(0) >> 63) // unsafe.Sizeof(uintreg(0)) but an ideal const
const SpAlign = 1*(1-GoarchArm64) + 16*GoarchArm64 // SP alignment: 1 normally, 16 for ARM64
+
+var DefaultGoroot string // set at link time
diff --git a/src/runtime/internal/sys/sys.go b/src/runtime/internal/sys/sys.go
index 586a763..9d9ac45 100644
--- a/src/runtime/internal/sys/sys.go
+++ b/src/runtime/internal/sys/sys.go
@@ -6,9 +6,9 @@
// constants used by the runtime.
package sys
-// The next line makes 'go generate' write the zgen_*.go files with
+// The next line makes 'go generate' write the zgo*.go files with
// per-OS and per-arch information, including constants
-// named goos_$GOOS and goarch_$GOARCH for every
+// named Goos$GOOS and Goarch$GOARCH for every
// known GOOS and GOARCH. The constant is 1 on the
// current system, 0 otherwise; multiplying by them is
// useful for defining GOOS- or GOARCH-specific constants.
diff --git a/src/runtime/internal/sys/zgoarch_386.go b/src/runtime/internal/sys/zgoarch_386.go
index 3bcf83b..b07abbe 100644
--- a/src/runtime/internal/sys/zgoarch_386.go
+++ b/src/runtime/internal/sys/zgoarch_386.go
@@ -1,4 +1,6 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build 386
package sys
diff --git a/src/runtime/internal/sys/zgoarch_amd64.go b/src/runtime/internal/sys/zgoarch_amd64.go
index 699f191..bfdcb00 100644
--- a/src/runtime/internal/sys/zgoarch_amd64.go
+++ b/src/runtime/internal/sys/zgoarch_amd64.go
@@ -1,4 +1,6 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build amd64
package sys
diff --git a/src/runtime/internal/sys/zgoarch_amd64p32.go b/src/runtime/internal/sys/zgoarch_amd64p32.go
index cc2d658..b61617d 100644
--- a/src/runtime/internal/sys/zgoarch_amd64p32.go
+++ b/src/runtime/internal/sys/zgoarch_amd64p32.go
@@ -1,4 +1,6 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build amd64p32
package sys
diff --git a/src/runtime/internal/sys/zgoarch_arm.go b/src/runtime/internal/sys/zgoarch_arm.go
index a5fd789..79595d5 100644
--- a/src/runtime/internal/sys/zgoarch_arm.go
+++ b/src/runtime/internal/sys/zgoarch_arm.go
@@ -1,4 +1,6 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build arm
package sys
diff --git a/src/runtime/internal/sys/zgoarch_arm64.go b/src/runtime/internal/sys/zgoarch_arm64.go
index 084d2c7..c839b8f 100644
--- a/src/runtime/internal/sys/zgoarch_arm64.go
+++ b/src/runtime/internal/sys/zgoarch_arm64.go
@@ -1,4 +1,6 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build arm64
package sys
diff --git a/src/runtime/internal/sys/zgoarch_arm64be.go b/src/runtime/internal/sys/zgoarch_arm64be.go
new file mode 100644
index 0000000..58b4ef1
--- /dev/null
+++ b/src/runtime/internal/sys/zgoarch_arm64be.go
@@ -0,0 +1,28 @@
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build arm64be
+
+package sys
+
+const GOARCH = `arm64be`
+
+const Goarch386 = 0
+const GoarchAmd64 = 0
+const GoarchAmd64p32 = 0
+const GoarchArm = 0
+const GoarchArmbe = 0
+const GoarchArm64 = 0
+const GoarchArm64be = 1
+const GoarchPpc64 = 0
+const GoarchPpc64le = 0
+const GoarchMips = 0
+const GoarchMipsle = 0
+const GoarchMips64 = 0
+const GoarchMips64le = 0
+const GoarchMips64p32 = 0
+const GoarchMips64p32le = 0
+const GoarchPpc = 0
+const GoarchS390 = 0
+const GoarchS390x = 0
+const GoarchSparc = 0
+const GoarchSparc64 = 0
diff --git a/src/runtime/internal/sys/zgoarch_armbe.go b/src/runtime/internal/sys/zgoarch_armbe.go
new file mode 100644
index 0000000..e9e2c31
--- /dev/null
+++ b/src/runtime/internal/sys/zgoarch_armbe.go
@@ -0,0 +1,28 @@
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build armbe
+
+package sys
+
+const GOARCH = `armbe`
+
+const Goarch386 = 0
+const GoarchAmd64 = 0
+const GoarchAmd64p32 = 0
+const GoarchArm = 0
+const GoarchArmbe = 1
+const GoarchArm64 = 0
+const GoarchArm64be = 0
+const GoarchPpc64 = 0
+const GoarchPpc64le = 0
+const GoarchMips = 0
+const GoarchMipsle = 0
+const GoarchMips64 = 0
+const GoarchMips64le = 0
+const GoarchMips64p32 = 0
+const GoarchMips64p32le = 0
+const GoarchPpc = 0
+const GoarchS390 = 0
+const GoarchS390x = 0
+const GoarchSparc = 0
+const GoarchSparc64 = 0
diff --git a/src/runtime/internal/sys/zgoarch_mips.go b/src/runtime/internal/sys/zgoarch_mips.go
index 2f733d2..b0bf4ff 100644
--- a/src/runtime/internal/sys/zgoarch_mips.go
+++ b/src/runtime/internal/sys/zgoarch_mips.go
@@ -1,4 +1,6 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build mips
package sys
diff --git a/src/runtime/internal/sys/zgoarch_mips64.go b/src/runtime/internal/sys/zgoarch_mips64.go
index 2ad62bd..093e88c 100644
--- a/src/runtime/internal/sys/zgoarch_mips64.go
+++ b/src/runtime/internal/sys/zgoarch_mips64.go
@@ -1,4 +1,6 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build mips64
package sys
diff --git a/src/runtime/internal/sys/zgoarch_mips64le.go b/src/runtime/internal/sys/zgoarch_mips64le.go
index 047c8b4..3bad7cf 100644
--- a/src/runtime/internal/sys/zgoarch_mips64le.go
+++ b/src/runtime/internal/sys/zgoarch_mips64le.go
@@ -1,4 +1,6 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build mips64le
package sys
diff --git a/src/runtime/internal/sys/zgoarch_mips64p32.go b/src/runtime/internal/sys/zgoarch_mips64p32.go
new file mode 100644
index 0000000..c5f69fc
--- /dev/null
+++ b/src/runtime/internal/sys/zgoarch_mips64p32.go
@@ -0,0 +1,28 @@
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build mips64p32
+
+package sys
+
+const GOARCH = `mips64p32`
+
+const Goarch386 = 0
+const GoarchAmd64 = 0
+const GoarchAmd64p32 = 0
+const GoarchArm = 0
+const GoarchArmbe = 0
+const GoarchArm64 = 0
+const GoarchArm64be = 0
+const GoarchPpc64 = 0
+const GoarchPpc64le = 0
+const GoarchMips = 0
+const GoarchMipsle = 0
+const GoarchMips64 = 0
+const GoarchMips64le = 0
+const GoarchMips64p32 = 1
+const GoarchMips64p32le = 0
+const GoarchPpc = 0
+const GoarchS390 = 0
+const GoarchS390x = 0
+const GoarchSparc = 0
+const GoarchSparc64 = 0
diff --git a/src/runtime/internal/sys/zgoarch_mips64p32le.go b/src/runtime/internal/sys/zgoarch_mips64p32le.go
new file mode 100644
index 0000000..014ef84
--- /dev/null
+++ b/src/runtime/internal/sys/zgoarch_mips64p32le.go
@@ -0,0 +1,28 @@
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build mips64p32le
+
+package sys
+
+const GOARCH = `mips64p32le`
+
+const Goarch386 = 0
+const GoarchAmd64 = 0
+const GoarchAmd64p32 = 0
+const GoarchArm = 0
+const GoarchArmbe = 0
+const GoarchArm64 = 0
+const GoarchArm64be = 0
+const GoarchPpc64 = 0
+const GoarchPpc64le = 0
+const GoarchMips = 0
+const GoarchMipsle = 0
+const GoarchMips64 = 0
+const GoarchMips64le = 0
+const GoarchMips64p32 = 0
+const GoarchMips64p32le = 1
+const GoarchPpc = 0
+const GoarchS390 = 0
+const GoarchS390x = 0
+const GoarchSparc = 0
+const GoarchSparc64 = 0
diff --git a/src/runtime/internal/sys/zgoarch_mipsle.go b/src/runtime/internal/sys/zgoarch_mipsle.go
index 95f3d5a..75814be 100644
--- a/src/runtime/internal/sys/zgoarch_mipsle.go
+++ b/src/runtime/internal/sys/zgoarch_mipsle.go
@@ -1,4 +1,6 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build mipsle
package sys
diff --git a/src/runtime/internal/sys/zgoarch_ppc.go b/src/runtime/internal/sys/zgoarch_ppc.go
new file mode 100644
index 0000000..2a891b8
--- /dev/null
+++ b/src/runtime/internal/sys/zgoarch_ppc.go
@@ -0,0 +1,28 @@
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build ppc
+
+package sys
+
+const GOARCH = `ppc`
+
+const Goarch386 = 0
+const GoarchAmd64 = 0
+const GoarchAmd64p32 = 0
+const GoarchArm = 0
+const GoarchArmbe = 0
+const GoarchArm64 = 0
+const GoarchArm64be = 0
+const GoarchPpc64 = 0
+const GoarchPpc64le = 0
+const GoarchMips = 0
+const GoarchMipsle = 0
+const GoarchMips64 = 0
+const GoarchMips64le = 0
+const GoarchMips64p32 = 0
+const GoarchMips64p32le = 0
+const GoarchPpc = 1
+const GoarchS390 = 0
+const GoarchS390x = 0
+const GoarchSparc = 0
+const GoarchSparc64 = 0
diff --git a/src/runtime/internal/sys/zgoarch_ppc64.go b/src/runtime/internal/sys/zgoarch_ppc64.go
index 748b5b5..847db4b 100644
--- a/src/runtime/internal/sys/zgoarch_ppc64.go
+++ b/src/runtime/internal/sys/zgoarch_ppc64.go
@@ -1,4 +1,6 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build ppc64
package sys
diff --git a/src/runtime/internal/sys/zgoarch_ppc64le.go b/src/runtime/internal/sys/zgoarch_ppc64le.go
index d3dcba4..5195797 100644
--- a/src/runtime/internal/sys/zgoarch_ppc64le.go
+++ b/src/runtime/internal/sys/zgoarch_ppc64le.go
@@ -1,4 +1,6 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build ppc64le
package sys
diff --git a/src/runtime/internal/sys/zgoarch_s390.go b/src/runtime/internal/sys/zgoarch_s390.go
new file mode 100644
index 0000000..cd215da
--- /dev/null
+++ b/src/runtime/internal/sys/zgoarch_s390.go
@@ -0,0 +1,28 @@
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build s390
+
+package sys
+
+const GOARCH = `s390`
+
+const Goarch386 = 0
+const GoarchAmd64 = 0
+const GoarchAmd64p32 = 0
+const GoarchArm = 0
+const GoarchArmbe = 0
+const GoarchArm64 = 0
+const GoarchArm64be = 0
+const GoarchPpc64 = 0
+const GoarchPpc64le = 0
+const GoarchMips = 0
+const GoarchMipsle = 0
+const GoarchMips64 = 0
+const GoarchMips64le = 0
+const GoarchMips64p32 = 0
+const GoarchMips64p32le = 0
+const GoarchPpc = 0
+const GoarchS390 = 1
+const GoarchS390x = 0
+const GoarchSparc = 0
+const GoarchSparc64 = 0
diff --git a/src/runtime/internal/sys/zgoarch_s390x.go b/src/runtime/internal/sys/zgoarch_s390x.go
index 1ead5d5..b9368ff 100644
--- a/src/runtime/internal/sys/zgoarch_s390x.go
+++ b/src/runtime/internal/sys/zgoarch_s390x.go
@@ -1,4 +1,6 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build s390x
package sys
diff --git a/src/runtime/internal/sys/zgoarch_sparc.go b/src/runtime/internal/sys/zgoarch_sparc.go
new file mode 100644
index 0000000..e9afe01
--- /dev/null
+++ b/src/runtime/internal/sys/zgoarch_sparc.go
@@ -0,0 +1,28 @@
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build sparc
+
+package sys
+
+const GOARCH = `sparc`
+
+const Goarch386 = 0
+const GoarchAmd64 = 0
+const GoarchAmd64p32 = 0
+const GoarchArm = 0
+const GoarchArmbe = 0
+const GoarchArm64 = 0
+const GoarchArm64be = 0
+const GoarchPpc64 = 0
+const GoarchPpc64le = 0
+const GoarchMips = 0
+const GoarchMipsle = 0
+const GoarchMips64 = 0
+const GoarchMips64le = 0
+const GoarchMips64p32 = 0
+const GoarchMips64p32le = 0
+const GoarchPpc = 0
+const GoarchS390 = 0
+const GoarchS390x = 0
+const GoarchSparc = 1
+const GoarchSparc64 = 0
diff --git a/src/runtime/internal/sys/zgoarch_sparc64.go b/src/runtime/internal/sys/zgoarch_sparc64.go
new file mode 100644
index 0000000..b6004ef
--- /dev/null
+++ b/src/runtime/internal/sys/zgoarch_sparc64.go
@@ -0,0 +1,28 @@
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build sparc64
+
+package sys
+
+const GOARCH = `sparc64`
+
+const Goarch386 = 0
+const GoarchAmd64 = 0
+const GoarchAmd64p32 = 0
+const GoarchArm = 0
+const GoarchArmbe = 0
+const GoarchArm64 = 0
+const GoarchArm64be = 0
+const GoarchPpc64 = 0
+const GoarchPpc64le = 0
+const GoarchMips = 0
+const GoarchMipsle = 0
+const GoarchMips64 = 0
+const GoarchMips64le = 0
+const GoarchMips64p32 = 0
+const GoarchMips64p32le = 0
+const GoarchPpc = 0
+const GoarchS390 = 0
+const GoarchS390x = 0
+const GoarchSparc = 0
+const GoarchSparc64 = 1
diff --git a/src/runtime/internal/sys/zgoos_android.go b/src/runtime/internal/sys/zgoos_android.go
index 6503b15..01ebe75 100644
--- a/src/runtime/internal/sys/zgoos_android.go
+++ b/src/runtime/internal/sys/zgoos_android.go
@@ -1,4 +1,6 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build android
package sys
@@ -15,3 +17,4 @@
const GoosPlan9 = 0
const GoosSolaris = 0
const GoosWindows = 0
+const GoosZos = 0
diff --git a/src/runtime/internal/sys/zgoos_darwin.go b/src/runtime/internal/sys/zgoos_darwin.go
index 6a28598..1303d71 100644
--- a/src/runtime/internal/sys/zgoos_darwin.go
+++ b/src/runtime/internal/sys/zgoos_darwin.go
@@ -1,4 +1,6 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build darwin
package sys
@@ -15,3 +17,4 @@
const GoosPlan9 = 0
const GoosSolaris = 0
const GoosWindows = 0
+const GoosZos = 0
diff --git a/src/runtime/internal/sys/zgoos_dragonfly.go b/src/runtime/internal/sys/zgoos_dragonfly.go
index 886ac26..64325c7 100644
--- a/src/runtime/internal/sys/zgoos_dragonfly.go
+++ b/src/runtime/internal/sys/zgoos_dragonfly.go
@@ -1,4 +1,6 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build dragonfly
package sys
@@ -15,3 +17,4 @@
const GoosPlan9 = 0
const GoosSolaris = 0
const GoosWindows = 0
+const GoosZos = 0
diff --git a/src/runtime/internal/sys/zgoos_freebsd.go b/src/runtime/internal/sys/zgoos_freebsd.go
index 0bf2403..3744971 100644
--- a/src/runtime/internal/sys/zgoos_freebsd.go
+++ b/src/runtime/internal/sys/zgoos_freebsd.go
@@ -1,4 +1,6 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build freebsd
package sys
@@ -15,3 +17,4 @@
const GoosPlan9 = 0
const GoosSolaris = 0
const GoosWindows = 0
+const GoosZos = 0
diff --git a/src/runtime/internal/sys/zgoos_linux.go b/src/runtime/internal/sys/zgoos_linux.go
index c8664db..c726465 100644
--- a/src/runtime/internal/sys/zgoos_linux.go
+++ b/src/runtime/internal/sys/zgoos_linux.go
@@ -1,6 +1,7 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
// +build !android
+// +build linux
package sys
@@ -17,3 +18,4 @@
const GoosPlan9 = 0
const GoosSolaris = 0
const GoosWindows = 0
+const GoosZos = 0
diff --git a/src/runtime/internal/sys/zgoos_nacl.go b/src/runtime/internal/sys/zgoos_nacl.go
index 0541226..53b394c 100644
--- a/src/runtime/internal/sys/zgoos_nacl.go
+++ b/src/runtime/internal/sys/zgoos_nacl.go
@@ -1,4 +1,6 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build nacl
package sys
@@ -15,3 +17,4 @@
const GoosPlan9 = 0
const GoosSolaris = 0
const GoosWindows = 0
+const GoosZos = 0
diff --git a/src/runtime/internal/sys/zgoos_netbsd.go b/src/runtime/internal/sys/zgoos_netbsd.go
index 5c509a1..8bfdf45 100644
--- a/src/runtime/internal/sys/zgoos_netbsd.go
+++ b/src/runtime/internal/sys/zgoos_netbsd.go
@@ -1,4 +1,6 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build netbsd
package sys
@@ -15,3 +17,4 @@
const GoosPlan9 = 0
const GoosSolaris = 0
const GoosWindows = 0
+const GoosZos = 0
diff --git a/src/runtime/internal/sys/zgoos_openbsd.go b/src/runtime/internal/sys/zgoos_openbsd.go
index dc43157..fc6acb7 100644
--- a/src/runtime/internal/sys/zgoos_openbsd.go
+++ b/src/runtime/internal/sys/zgoos_openbsd.go
@@ -1,4 +1,6 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build openbsd
package sys
@@ -15,3 +17,4 @@
const GoosPlan9 = 0
const GoosSolaris = 0
const GoosWindows = 0
+const GoosZos = 0
diff --git a/src/runtime/internal/sys/zgoos_plan9.go b/src/runtime/internal/sys/zgoos_plan9.go
index 4b0934f..75baeb3 100644
--- a/src/runtime/internal/sys/zgoos_plan9.go
+++ b/src/runtime/internal/sys/zgoos_plan9.go
@@ -1,4 +1,6 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build plan9
package sys
@@ -15,3 +17,4 @@
const GoosPlan9 = 1
const GoosSolaris = 0
const GoosWindows = 0
+const GoosZos = 0
diff --git a/src/runtime/internal/sys/zgoos_solaris.go b/src/runtime/internal/sys/zgoos_solaris.go
index 42511a3..c18f34f 100644
--- a/src/runtime/internal/sys/zgoos_solaris.go
+++ b/src/runtime/internal/sys/zgoos_solaris.go
@@ -1,4 +1,6 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build solaris
package sys
@@ -15,3 +17,4 @@
const GoosPlan9 = 0
const GoosSolaris = 1
const GoosWindows = 0
+const GoosZos = 0
diff --git a/src/runtime/internal/sys/zgoos_windows.go b/src/runtime/internal/sys/zgoos_windows.go
index d77f62c..b9f0d4e 100644
--- a/src/runtime/internal/sys/zgoos_windows.go
+++ b/src/runtime/internal/sys/zgoos_windows.go
@@ -1,4 +1,6 @@
-// generated by gengoos.go using 'go generate'
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build windows
package sys
@@ -15,3 +17,4 @@
const GoosPlan9 = 0
const GoosSolaris = 0
const GoosWindows = 1
+const GoosZos = 0
diff --git a/src/runtime/internal/sys/zgoos_zos.go b/src/runtime/internal/sys/zgoos_zos.go
new file mode 100644
index 0000000..2563ebe
--- /dev/null
+++ b/src/runtime/internal/sys/zgoos_zos.go
@@ -0,0 +1,20 @@
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build zos
+
+package sys
+
+const GOOS = `zos`
+
+const GoosAndroid = 0
+const GoosDarwin = 0
+const GoosDragonfly = 0
+const GoosFreebsd = 0
+const GoosLinux = 0
+const GoosNacl = 0
+const GoosNetbsd = 0
+const GoosOpenbsd = 0
+const GoosPlan9 = 0
+const GoosSolaris = 0
+const GoosWindows = 0
+const GoosZos = 1
diff --git a/src/runtime/internal/sys/zversion.go b/src/runtime/internal/sys/zversion.go
index 3a529d4..3c54824 100644
--- a/src/runtime/internal/sys/zversion.go
+++ b/src/runtime/internal/sys/zversion.go
@@ -1,9 +1,7 @@
-// auto generated by go tool dist
+// Code generated by go tool dist; DO NOT EDIT.
package sys
-const DefaultGoroot = `./prebuilts/go/linux-x86`
-const TheVersion = `go1.9`
+const TheVersion = `go1.10`
const Goexperiment = ``
const StackGuardMultiplier = 1
-
diff --git a/src/runtime/lock_sema.go b/src/runtime/lock_sema.go
index 5b0169d..b41f805 100644
--- a/src/runtime/lock_sema.go
+++ b/src/runtime/lock_sema.go
@@ -71,7 +71,7 @@
// for this lock, chained through m->nextwaitm.
// Queue this M.
for {
- gp.m.nextwaitm = v &^ locked
+ gp.m.nextwaitm = muintptr(v &^ locked)
if atomic.Casuintptr(&l.key, v, uintptr(unsafe.Pointer(gp.m))|locked) {
break
}
@@ -103,8 +103,8 @@
} else {
// Other M's are waiting for the lock.
// Dequeue an M.
- mp = (*m)(unsafe.Pointer(v &^ locked))
- if atomic.Casuintptr(&l.key, v, mp.nextwaitm) {
+ mp = muintptr(v &^ locked).ptr()
+ if atomic.Casuintptr(&l.key, v, uintptr(mp.nextwaitm)) {
// Dequeued an M. Wake it.
semawakeup(mp)
break
@@ -140,7 +140,7 @@
case v == 0:
// Nothing was waiting. Done.
case v == locked:
- // Two notewakeups! Not allowed.
+ // Two notewakeups! Not allowed.
throw("notewakeup - double wakeup")
default:
// Must be the waiting m. Wake it up.
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 0ebd2c0..72b8f40 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -529,9 +529,8 @@
}
s.allocCache >>= uint(theBit + 1)
s.freeindex = freeidx
- v := gclinkptr(result*s.elemsize + s.base())
s.allocCount++
- return v
+ return gclinkptr(result*s.elemsize + s.base())
}
}
return 0
@@ -847,6 +846,9 @@
// newarray allocates an array of n elements of type typ.
func newarray(typ *_type, n int) unsafe.Pointer {
+ if n == 1 {
+ return mallocgc(typ.size, typ, true)
+ }
if n < 0 || uintptr(n) > maxSliceCap(typ.size) {
panic(plainError("runtime: allocation size out of range"))
}
@@ -863,11 +865,13 @@
mProf_Malloc(x, size)
}
-// nextSample returns the next sampling point for heap profiling.
-// It produces a random variable with a geometric distribution and
-// mean MemProfileRate. This is done by generating a uniformly
-// distributed random number and applying the cumulative distribution
-// function for an exponential.
+// nextSample returns the next sampling point for heap profiling. The goal is
+// to sample allocations on average every MemProfileRate bytes, but with a
+// completely random distribution over the allocation timeline; this
+// corresponds to a Poisson process with parameter MemProfileRate. In Poisson
+// processes, the distance between two samples follows the exponential
+// distribution (exp(MemProfileRate)), so the best return value is a random
+// number taken from an exponential distribution whose mean is MemProfileRate.
func nextSample() int32 {
if GOOS == "plan9" {
// Plan 9 doesn't support floating point in note handler.
@@ -876,25 +880,29 @@
}
}
- period := MemProfileRate
+ return fastexprand(MemProfileRate)
+}
- // make nextSample not overflow. Maximum possible step is
- // -ln(1/(1<<kRandomBitCount)) * period, approximately 20 * period.
+// fastexprand returns a random number from an exponential distribution with
+// the specified mean.
+func fastexprand(mean int) int32 {
+ // Avoid overflow. Maximum possible step is
+ // -ln(1/(1<<randomBitCount)) * mean, approximately 20 * mean.
switch {
- case period > 0x7000000:
- period = 0x7000000
- case period == 0:
+ case mean > 0x7000000:
+ mean = 0x7000000
+ case mean == 0:
return 0
}
- // Let m be the sample rate,
- // the probability distribution function is m*exp(-mx), so the CDF is
- // p = 1 - exp(-mx), so
- // q = 1 - p == exp(-mx)
- // log_e(q) = -mx
- // -log_e(q)/m = x
- // x = -log_e(q) * period
- // x = log_2(q) * (-log_e(2)) * period ; Using log_2 for efficiency
+ // Take a random sample of the exponential distribution exp(-mean*x).
+ // The probability distribution function is mean*exp(-mean*x), so the CDF is
+ // p = 1 - exp(-mean*x), so
+ // q = 1 - p == exp(-mean*x)
+ // log_e(q) = -mean*x
+ // -log_e(q)/mean = x
+ // x = -log_e(q) * mean
+ // x = log_2(q) * (-log_e(2)) * mean ; Using log_2 for efficiency
const randomBitCount = 26
q := fastrand()%(1<<randomBitCount) + 1
qlog := fastlog2(float64(q)) - randomBitCount
@@ -902,7 +910,7 @@
qlog = 0
}
const minusLog2 = -0.6931471805599453 // -ln(2)
- return int32(qlog*(minusLog2*float64(period))) + 1
+ return int32(qlog*(minusLog2*float64(mean))) + 1
}
// nextSampleNoFP is similar to nextSample, but uses older,
@@ -920,7 +928,7 @@
}
type persistentAlloc struct {
- base unsafe.Pointer
+ base *notInHeap
off uintptr
}
@@ -937,17 +945,17 @@
//
// Consider marking persistentalloc'd types go:notinheap.
func persistentalloc(size, align uintptr, sysStat *uint64) unsafe.Pointer {
- var p unsafe.Pointer
+ var p *notInHeap
systemstack(func() {
p = persistentalloc1(size, align, sysStat)
})
- return p
+ return unsafe.Pointer(p)
}
// Must run on system stack because stack growth can (re)invoke it.
// See issue 9174.
//go:systemstack
-func persistentalloc1(size, align uintptr, sysStat *uint64) unsafe.Pointer {
+func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap {
const (
chunk = 256 << 10
maxBlock = 64 << 10 // VM reservation granularity is 64K on windows
@@ -968,7 +976,7 @@
}
if size >= maxBlock {
- return sysAlloc(size, sysStat)
+ return (*notInHeap)(sysAlloc(size, sysStat))
}
mp := acquirem()
@@ -981,7 +989,7 @@
}
persistent.off = round(persistent.off, align)
if persistent.off+size > chunk || persistent.base == nil {
- persistent.base = sysAlloc(chunk, &memstats.other_sys)
+ persistent.base = (*notInHeap)(sysAlloc(chunk, &memstats.other_sys))
if persistent.base == nil {
if persistent == &globalAlloc.persistentAlloc {
unlock(&globalAlloc.mutex)
@@ -990,7 +998,7 @@
}
persistent.off = 0
}
- p := add(persistent.base, persistent.off)
+ p := persistent.base.add(persistent.off)
persistent.off += size
releasem(mp)
if persistent == &globalAlloc.persistentAlloc {
@@ -1003,3 +1011,19 @@
}
return p
}
+
+// notInHeap is off-heap memory allocated by a lower-level allocator
+// like sysAlloc or persistentAlloc.
+//
+// In general, it's better to use real types marked as go:notinheap,
+// but this serves as a generic type for situations where that isn't
+// possible (like in the allocators).
+//
+// TODO: Use this as the return type of sysAlloc, persistentAlloc, etc?
+//
+//go:notinheap
+type notInHeap struct{}
+
+func (p *notInHeap) add(bytes uintptr) *notInHeap {
+ return (*notInHeap)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + bytes))
+}
diff --git a/src/runtime/malloc_test.go b/src/runtime/malloc_test.go
index d9487ee..93aa56d 100644
--- a/src/runtime/malloc_test.go
+++ b/src/runtime/malloc_test.go
@@ -46,9 +46,6 @@
}
// Of the uint fields, HeapReleased, HeapIdle can be 0.
// PauseTotalNs can be 0 if timer resolution is poor.
- //
- // TODO: Test that GCCPUFraction is <= 0.99. This currently
- // fails on windows/386. (Issue #19319)
fields := map[string][]func(interface{}) error{
"Alloc": {nz, le(1e10)}, "TotalAlloc": {nz, le(1e11)}, "Sys": {nz, le(1e10)},
"Lookups": {nz, le(1e10)}, "Mallocs": {nz, le(1e10)}, "Frees": {nz, le(1e10)},
@@ -61,7 +58,7 @@
"NextGC": {nz, le(1e10)}, "LastGC": {nz},
"PauseTotalNs": {le(1e11)}, "PauseNs": nil, "PauseEnd": nil,
"NumGC": {nz, le(1e9)}, "NumForcedGC": {nz, le(1e9)},
- "GCCPUFraction": nil, "EnableGC": {eq(true)}, "DebugGC": {eq(false)},
+ "GCCPUFraction": {le(0.99)}, "EnableGC": {eq(true)}, "DebugGC": {eq(false)},
"BySize": nil,
}
diff --git a/src/runtime/map_test.go b/src/runtime/map_test.go
index 81f05a0..6ed655d 100644
--- a/src/runtime/map_test.go
+++ b/src/runtime/map_test.go
@@ -244,7 +244,7 @@
numGrowStep := 250
numReader := 16
if testing.Short() {
- numLoop, numGrowStep = 2, 500
+ numLoop, numGrowStep = 2, 100
}
for i := 0; i < numLoop; i++ {
m := make(map[int]int, 0)
@@ -596,6 +596,134 @@
}
}
+var mapSink map[int]int
+
+var mapBucketTests = [...]struct {
+ n int // n is the number of map elements
+ noescape int // number of expected buckets for non-escaping map
+ escape int // number of expected buckets for escaping map
+}{
+ {-(1 << 30), 1, 1},
+ {-1, 1, 1},
+ {0, 1, 1},
+ {1, 1, 1},
+ {8, 1, 1},
+ {9, 2, 2},
+ {13, 2, 2},
+ {14, 4, 4},
+ {26, 4, 4},
+}
+
+func TestMapBuckets(t *testing.T) {
+ // Test that maps of different sizes have the right number of buckets.
+ // Non-escaping maps with small buckets (like map[int]int) never
+ // have a nil bucket pointer due to starting with preallocated buckets
+ // on the stack. Escaping maps start with a non-nil bucket pointer if
+ // hint size is above bucketCnt and thereby have more than one bucket.
+ // These tests depend on bucketCnt and loadFactor* in hashmap.go.
+ t.Run("mapliteral", func(t *testing.T) {
+ for _, tt := range mapBucketTests {
+ localMap := map[int]int{}
+ if runtime.MapBucketsPointerIsNil(localMap) {
+ t.Errorf("no escape: buckets pointer is nil for non-escaping map")
+ }
+ for i := 0; i < tt.n; i++ {
+ localMap[i] = i
+ }
+ if got := runtime.MapBucketsCount(localMap); got != tt.noescape {
+ t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got)
+ }
+ escapingMap := map[int]int{}
+ if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) {
+ t.Errorf("escape: buckets pointer is nil for n=%d buckets", count)
+ }
+ for i := 0; i < tt.n; i++ {
+ escapingMap[i] = i
+ }
+ if got := runtime.MapBucketsCount(escapingMap); got != tt.escape {
+ t.Errorf("escape n=%d want %d buckets, got %d", tt.n, tt.escape, got)
+ }
+ mapSink = escapingMap
+ }
+ })
+ t.Run("nohint", func(t *testing.T) {
+ for _, tt := range mapBucketTests {
+ localMap := make(map[int]int)
+ if runtime.MapBucketsPointerIsNil(localMap) {
+ t.Errorf("no escape: buckets pointer is nil for non-escaping map")
+ }
+ for i := 0; i < tt.n; i++ {
+ localMap[i] = i
+ }
+ if got := runtime.MapBucketsCount(localMap); got != tt.noescape {
+ t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got)
+ }
+ escapingMap := make(map[int]int)
+ if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) {
+ t.Errorf("escape: buckets pointer is nil for n=%d buckets", count)
+ }
+ for i := 0; i < tt.n; i++ {
+ escapingMap[i] = i
+ }
+ if got := runtime.MapBucketsCount(escapingMap); got != tt.escape {
+ t.Errorf("escape: n=%d want %d buckets, got %d", tt.n, tt.escape, got)
+ }
+ mapSink = escapingMap
+ }
+ })
+ t.Run("makemap", func(t *testing.T) {
+ for _, tt := range mapBucketTests {
+ localMap := make(map[int]int, tt.n)
+ if runtime.MapBucketsPointerIsNil(localMap) {
+ t.Errorf("no escape: buckets pointer is nil for non-escaping map")
+ }
+ for i := 0; i < tt.n; i++ {
+ localMap[i] = i
+ }
+ if got := runtime.MapBucketsCount(localMap); got != tt.noescape {
+ t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got)
+ }
+ escapingMap := make(map[int]int, tt.n)
+ if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) {
+ t.Errorf("escape: buckets pointer is nil for n=%d buckets", count)
+ }
+ for i := 0; i < tt.n; i++ {
+ escapingMap[i] = i
+ }
+ if got := runtime.MapBucketsCount(escapingMap); got != tt.escape {
+ t.Errorf("escape: n=%d want %d buckets, got %d", tt.n, tt.escape, got)
+ }
+ mapSink = escapingMap
+ }
+ })
+ t.Run("makemap64", func(t *testing.T) {
+ for _, tt := range mapBucketTests {
+ localMap := make(map[int]int, int64(tt.n))
+ if runtime.MapBucketsPointerIsNil(localMap) {
+ t.Errorf("no escape: buckets pointer is nil for non-escaping map")
+ }
+ for i := 0; i < tt.n; i++ {
+ localMap[i] = i
+ }
+ if got := runtime.MapBucketsCount(localMap); got != tt.noescape {
+ t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got)
+ }
+ escapingMap := make(map[int]int, tt.n)
+ if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) {
+ t.Errorf("escape: buckets pointer is nil for n=%d buckets", count)
+ }
+ for i := 0; i < tt.n; i++ {
+ escapingMap[i] = i
+ }
+ if got := runtime.MapBucketsCount(escapingMap); got != tt.escape {
+ t.Errorf("escape: n=%d want %d buckets, got %d", tt.n, tt.escape, got)
+ }
+ mapSink = escapingMap
+ }
+ })
+
+}
+
func benchmarkMapPop(b *testing.B, n int) {
m := map[int]int{}
for i := 0; i < b.N; i++ {
@@ -617,14 +745,38 @@
func BenchmarkMapPop1000(b *testing.B) { benchmarkMapPop(b, 1000) }
func BenchmarkMapPop10000(b *testing.B) { benchmarkMapPop(b, 10000) }
+var testNonEscapingMapVariable int = 8
+
func TestNonEscapingMap(t *testing.T) {
n := testing.AllocsPerRun(1000, func() {
+ m := map[int]int{}
+ m[0] = 0
+ })
+ if n != 0 {
+ t.Fatalf("mapliteral: want 0 allocs, got %v", n)
+ }
+ n = testing.AllocsPerRun(1000, func() {
m := make(map[int]int)
m[0] = 0
})
if n != 0 {
- t.Fatalf("want 0 allocs, got %v", n)
+ t.Fatalf("no hint: want 0 allocs, got %v", n)
}
+ n = testing.AllocsPerRun(1000, func() {
+ m := make(map[int]int, 8)
+ m[0] = 0
+ })
+ if n != 0 {
+ t.Fatalf("with small hint: want 0 allocs, got %v", n)
+ }
+ n = testing.AllocsPerRun(1000, func() {
+ m := make(map[int]int, testNonEscapingMapVariable)
+ m[0] = 0
+ })
+ if n != 0 {
+ t.Fatalf("with variable hint: want 0 allocs, got %v", n)
+ }
+
}
func benchmarkMapAssignInt32(b *testing.B, n int) {
@@ -635,12 +787,16 @@
}
func benchmarkMapDeleteInt32(b *testing.B, n int) {
- a := make(map[int32]int)
- for i := 0; i < n*b.N; i++ {
- a[int32(i)] = i
- }
+ a := make(map[int32]int, n)
b.ResetTimer()
- for i := 0; i < n*b.N; i = i + n {
+ for i := 0; i < b.N; i++ {
+ if len(a) == 0 {
+ b.StopTimer()
+ for j := i; j < i+n; j++ {
+ a[int32(j)] = j
+ }
+ b.StartTimer()
+ }
delete(a, int32(i))
}
}
@@ -653,12 +809,16 @@
}
func benchmarkMapDeleteInt64(b *testing.B, n int) {
- a := make(map[int64]int)
- for i := 0; i < n*b.N; i++ {
- a[int64(i)] = i
- }
+ a := make(map[int64]int, n)
b.ResetTimer()
- for i := 0; i < n*b.N; i = i + n {
+ for i := 0; i < b.N; i++ {
+ if len(a) == 0 {
+ b.StopTimer()
+ for j := i; j < i+n; j++ {
+ a[int64(j)] = j
+ }
+ b.StartTimer()
+ }
delete(a, int64(i))
}
}
@@ -676,17 +836,23 @@
}
func benchmarkMapDeleteStr(b *testing.B, n int) {
- k := make([]string, n*b.N)
- for i := 0; i < n*b.N; i++ {
- k[i] = strconv.Itoa(i)
+ i2s := make([]string, n)
+ for i := 0; i < n; i++ {
+ i2s[i] = strconv.Itoa(i)
}
- a := make(map[string]int)
- for i := 0; i < n*b.N; i++ {
- a[k[i]] = i
- }
+ a := make(map[string]int, n)
b.ResetTimer()
- for i := 0; i < n*b.N; i = i + n {
- delete(a, k[i])
+ k := 0
+ for i := 0; i < b.N; i++ {
+ if len(a) == 0 {
+ b.StopTimer()
+ for j := 0; j < n; j++ {
+ a[i2s[j]] = j
+ }
+ k = i
+ b.StartTimer()
+ }
+ delete(a, i2s[i-k])
}
}
@@ -705,7 +871,7 @@
}
func BenchmarkMapDelete(b *testing.B) {
- b.Run("Int32", runWith(benchmarkMapDeleteInt32, 1, 2, 4))
- b.Run("Int64", runWith(benchmarkMapDeleteInt64, 1, 2, 4))
- b.Run("Str", runWith(benchmarkMapDeleteStr, 1, 2, 4))
+ b.Run("Int32", runWith(benchmarkMapDeleteInt32, 100, 1000, 10000))
+ b.Run("Int64", runWith(benchmarkMapDeleteInt64, 100, 1000, 10000))
+ b.Run("Str", runWith(benchmarkMapDeleteStr, 100, 1000, 10000))
}
diff --git a/src/runtime/mbarrier.go b/src/runtime/mbarrier.go
index 3713c50..e28bdb8 100644
--- a/src/runtime/mbarrier.go
+++ b/src/runtime/mbarrier.go
@@ -182,6 +182,8 @@
func writebarrierptr_prewrite1(dst *uintptr, src uintptr) {
mp := acquirem()
if mp.inwb || mp.dying > 0 {
+ // We explicitly allow write barriers in startpanic_m,
+ // since we're going down anyway. Ignore them here.
releasem(mp)
return
}
@@ -237,6 +239,10 @@
// typedmemmove copies a value of type t to dst from src.
// Must be nosplit, see #16026.
+//
+// TODO: Perfect for go:nosplitrec since we can't have a safe point
+// anywhere in the bulk barrier or memmove.
+//
//go:nosplit
func typedmemmove(typ *_type, dst, src unsafe.Pointer) {
if typ.kind&kindNoPointers == 0 {
@@ -258,8 +264,8 @@
//go:linkname reflect_typedmemmove reflect.typedmemmove
func reflect_typedmemmove(typ *_type, dst, src unsafe.Pointer) {
if raceenabled {
- raceWriteObjectPC(typ, dst, getcallerpc(unsafe.Pointer(&typ)), funcPC(reflect_typedmemmove))
- raceReadObjectPC(typ, src, getcallerpc(unsafe.Pointer(&typ)), funcPC(reflect_typedmemmove))
+ raceWriteObjectPC(typ, dst, getcallerpc(), funcPC(reflect_typedmemmove))
+ raceReadObjectPC(typ, src, getcallerpc(), funcPC(reflect_typedmemmove))
}
if msanenabled {
msanwrite(dst, typ.size)
@@ -320,8 +326,12 @@
dstp := dst.array
srcp := src.array
+ // The compiler emits calls to typedslicecopy before
+ // instrumentation runs, so unlike the other copying and
+ // assignment operations, it's not instrumented in the calling
+ // code and needs its own instrumentation.
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer(&typ))
+ callerpc := getcallerpc()
pc := funcPC(slicecopy)
racewriterangepc(dstp, uintptr(n)*typ.size, callerpc, pc)
racereadrangepc(srcp, uintptr(n)*typ.size, callerpc, pc)
@@ -339,41 +349,13 @@
// compiler only emits calls to typedslicecopy for types with pointers,
// and growslice and reflect_typedslicecopy check for pointers
// before calling typedslicecopy.
- if !writeBarrier.needed {
- memmove(dstp, srcp, uintptr(n)*typ.size)
- return n
+ size := uintptr(n) * typ.size
+ if writeBarrier.needed {
+ bulkBarrierPreWrite(uintptr(dstp), uintptr(srcp), size)
}
-
- systemstack(func() {
- if uintptr(srcp) < uintptr(dstp) && uintptr(srcp)+uintptr(n)*typ.size > uintptr(dstp) {
- // Overlap with src before dst.
- // Copy backward, being careful not to move dstp/srcp
- // out of the array they point into.
- dstp = add(dstp, uintptr(n-1)*typ.size)
- srcp = add(srcp, uintptr(n-1)*typ.size)
- i := 0
- for {
- typedmemmove(typ, dstp, srcp)
- if i++; i >= n {
- break
- }
- dstp = add(dstp, -typ.size)
- srcp = add(srcp, -typ.size)
- }
- } else {
- // Copy forward, being careful not to move dstp/srcp
- // out of the array they point into.
- i := 0
- for {
- typedmemmove(typ, dstp, srcp)
- if i++; i >= n {
- break
- }
- dstp = add(dstp, typ.size)
- srcp = add(srcp, typ.size)
- }
- }
- })
+ // See typedmemmove for a discussion of the race between the
+ // barrier and memmove.
+ memmove(dstp, srcp, size)
return n
}
@@ -390,7 +372,7 @@
size := uintptr(n) * elemType.size
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer(&elemType))
+ callerpc := getcallerpc()
pc := funcPC(reflect_typedslicecopy)
racewriterangepc(dst.array, size, callerpc, pc)
racereadrangepc(src.array, size, callerpc, pc)
diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go
index 2a9f1b8..3a88f17 100644
--- a/src/runtime/mbitmap.go
+++ b/src/runtime/mbitmap.go
@@ -449,11 +449,6 @@
return
}
-// prefetch the bits.
-func (h heapBits) prefetch() {
- prefetchnta(uintptr(unsafe.Pointer((h.bitp))))
-}
-
// next returns the heapBits describing the next pointer-sized word in memory.
// That is, if h describes address p, h.next() describes p+ptrSize.
// Note that next does not modify h. The caller must record the result.
@@ -480,6 +475,9 @@
// The caller can test morePointers and isPointer by &-ing with bitScan and bitPointer.
// The result includes in its higher bits the bits for subsequent words
// described by the same bitmap byte.
+//
+// nosplit because it is used during write barriers and must not be preempted.
+//go:nosplit
func (h heapBits) bits() uint32 {
// The (shift & 31) eliminates a test and conditional branch
// from the generated code.
@@ -528,12 +526,13 @@
atomic.Or8(h.bitp, bitScan<<(heapBitsShift+h.shift))
}
-// bulkBarrierPreWrite executes writebarrierptr_prewrite1
+// bulkBarrierPreWrite executes a write barrier
// for every pointer slot in the memory range [src, src+size),
// using pointer/scalar information from [dst, dst+size).
// This executes the write barriers necessary before a memmove.
// src, dst, and size must be pointer-aligned.
// The range [dst, dst+size) must lie within a single object.
+// It does not perform the actual writes.
//
// As a special case, src == 0 indicates that this is being used for a
// memclr. bulkBarrierPreWrite will pass 0 for the src of each write
@@ -583,12 +582,15 @@
return
}
+ buf := &getg().m.p.ptr().wbBuf
h := heapBitsForAddr(dst)
if src == 0 {
for i := uintptr(0); i < size; i += sys.PtrSize {
if h.isPointer() {
dstx := (*uintptr)(unsafe.Pointer(dst + i))
- writebarrierptr_prewrite1(dstx, 0)
+ if !buf.putFast(*dstx, 0) {
+ wbBufFlush(nil, 0)
+ }
}
h = h.next()
}
@@ -597,7 +599,9 @@
if h.isPointer() {
dstx := (*uintptr)(unsafe.Pointer(dst + i))
srcx := (*uintptr)(unsafe.Pointer(src + i))
- writebarrierptr_prewrite1(dstx, *srcx)
+ if !buf.putFast(*dstx, *srcx) {
+ wbBufFlush(nil, 0)
+ }
}
h = h.next()
}
@@ -617,6 +621,7 @@
bits = addb(bits, word/8)
mask := uint8(1) << (word % 8)
+ buf := &getg().m.p.ptr().wbBuf
for i := uintptr(0); i < size; i += sys.PtrSize {
if mask == 0 {
bits = addb(bits, 1)
@@ -630,10 +635,14 @@
if *bits&mask != 0 {
dstx := (*uintptr)(unsafe.Pointer(dst + i))
if src == 0 {
- writebarrierptr_prewrite1(dstx, 0)
+ if !buf.putFast(*dstx, 0) {
+ wbBufFlush(nil, 0)
+ }
} else {
srcx := (*uintptr)(unsafe.Pointer(src + i))
- writebarrierptr_prewrite1(dstx, *srcx)
+ if !buf.putFast(*dstx, *srcx) {
+ wbBufFlush(nil, 0)
+ }
}
}
mask <<= 1
diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go
index 96fb273..6c24650 100644
--- a/src/runtime/mcache.go
+++ b/src/runtime/mcache.go
@@ -104,7 +104,7 @@
// Gets a span that has a free object in it and assigns it
// to be the cached span for the given sizeclass. Returns this span.
-func (c *mcache) refill(spc spanClass) *mspan {
+func (c *mcache) refill(spc spanClass) {
_g_ := getg()
_g_.m.locks++
@@ -131,7 +131,6 @@
c.alloc[spc] = s
_g_.m.locks--
- return s
}
func (c *mcache) releaseAll() {
diff --git a/src/runtime/mem_bsd.go b/src/runtime/mem_bsd.go
index e0d2347..23872b9 100644
--- a/src/runtime/mem_bsd.go
+++ b/src/runtime/mem_bsd.go
@@ -15,8 +15,8 @@
// which prevents us from allocating more stack.
//go:nosplit
func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
- v := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
- if uintptr(v) < 4096 {
+ v, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
+ if err != 0 {
return nil
}
mSysStatInc(sysStat, n)
@@ -51,8 +51,8 @@
return v
}
- p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
- if uintptr(p) < 4096 {
+ p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
+ if err != 0 {
return nil
}
*reserved = true
@@ -76,22 +76,22 @@
// to do this - we do not on other platforms.
flags |= _MAP_FIXED
}
- p := mmap(v, n, _PROT_READ|_PROT_WRITE, flags, -1, 0)
- if uintptr(p) == _ENOMEM || (GOOS == "solaris" && uintptr(p) == _sunosEAGAIN) {
+ p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, flags, -1, 0)
+ if err == _ENOMEM || (GOOS == "solaris" && err == _sunosEAGAIN) {
throw("runtime: out of memory")
}
- if p != v {
- print("runtime: address space conflict: map(", v, ") = ", p, "\n")
+ if p != v || err != 0 {
+ print("runtime: address space conflict: map(", v, ") = ", p, "(err ", err, ")\n")
throw("runtime: address space conflict")
}
return
}
- p := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
- if uintptr(p) == _ENOMEM || (GOOS == "solaris" && uintptr(p) == _sunosEAGAIN) {
+ p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
+ if err == _ENOMEM || (GOOS == "solaris" && err == _sunosEAGAIN) {
throw("runtime: out of memory")
}
- if p != v {
+ if p != v || err != 0 {
throw("runtime: cannot map pages in arena address space")
}
}
diff --git a/src/runtime/mem_darwin.go b/src/runtime/mem_darwin.go
index 3f1c4d7..e41452a 100644
--- a/src/runtime/mem_darwin.go
+++ b/src/runtime/mem_darwin.go
@@ -10,8 +10,8 @@
// which prevents us from allocating more stack.
//go:nosplit
func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
- v := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
- if uintptr(v) < 4096 {
+ v, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
+ if err != 0 {
return nil
}
mSysStatInc(sysStat, n)
@@ -40,8 +40,8 @@
func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
*reserved = true
- p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
- if uintptr(p) < 4096 {
+ p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
+ if err != 0 {
return nil
}
return p
@@ -53,11 +53,11 @@
func sysMap(v unsafe.Pointer, n uintptr, reserved bool, sysStat *uint64) {
mSysStatInc(sysStat, n)
- p := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
- if uintptr(p) == _ENOMEM {
+ p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
+ if err == _ENOMEM {
throw("runtime: out of memory")
}
- if p != v {
+ if p != v || err != 0 {
throw("runtime: cannot map pages in arena address space")
}
}
diff --git a/src/runtime/mem_linux.go b/src/runtime/mem_linux.go
index 094658d..16f4443 100644
--- a/src/runtime/mem_linux.go
+++ b/src/runtime/mem_linux.go
@@ -41,30 +41,30 @@
return true
}
-func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uint32) unsafe.Pointer {
- p := mmap(v, n, prot, flags, fd, offset)
+func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uint32) (unsafe.Pointer, int) {
+ p, err := mmap(v, n, prot, flags, fd, offset)
// On some systems, mmap ignores v without
// MAP_FIXED, so retry if the address space is free.
if p != v && addrspace_free(v, n) {
- if uintptr(p) > 4096 {
+ if err == 0 {
munmap(p, n)
}
- p = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset)
+ p, err = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset)
}
- return p
+ return p, err
}
// Don't split the stack as this method may be invoked without a valid G, which
// prevents us from allocating more stack.
//go:nosplit
func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
- p := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
- if uintptr(p) < 4096 {
- if uintptr(p) == _EACCES {
+ p, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
+ if err != 0 {
+ if err == _EACCES {
print("runtime: mmap: access denied\n")
exit(2)
}
- if uintptr(p) == _EAGAIN {
+ if err == _EAGAIN {
print("runtime: mmap: too much locked memory (check 'ulimit -l').\n")
exit(2)
}
@@ -186,9 +186,9 @@
// if we can reserve at least 64K and check the assumption in SysMap.
// Only user-mode Linux (UML) rejects these requests.
if sys.PtrSize == 8 && uint64(n) > 1<<32 {
- p := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
- if p != v {
- if uintptr(p) >= 4096 {
+ p, err := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
+ if p != v || err != 0 {
+ if err == 0 {
munmap(p, 64<<10)
}
return nil
@@ -198,8 +198,8 @@
return v
}
- p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
- if uintptr(p) < 4096 {
+ p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
+ if err != 0 {
return nil
}
*reserved = true
@@ -211,22 +211,22 @@
// On 64-bit, we don't actually have v reserved, so tread carefully.
if !reserved {
- p := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
- if uintptr(p) == _ENOMEM {
+ p, err := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
+ if err == _ENOMEM {
throw("runtime: out of memory")
}
- if p != v {
- print("runtime: address space conflict: map(", v, ") = ", p, "\n")
+ if p != v || err != 0 {
+ print("runtime: address space conflict: map(", v, ") = ", p, " (err ", err, ")\n")
throw("runtime: address space conflict")
}
return
}
- p := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
- if uintptr(p) == _ENOMEM {
+ p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
+ if err == _ENOMEM {
throw("runtime: out of memory")
}
- if p != v {
+ if p != v || err != 0 {
throw("runtime: cannot map pages in arena address space")
}
}
diff --git a/src/runtime/memclr_amd64p32.s b/src/runtime/memclr_amd64p32.s
new file mode 100644
index 0000000..26171bf
--- /dev/null
+++ b/src/runtime/memclr_amd64p32.s
@@ -0,0 +1,23 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT,$0-8
+ MOVL ptr+0(FP), DI
+ MOVL n+4(FP), CX
+ MOVQ CX, BX
+ ANDQ $3, BX
+ SHRQ $2, CX
+ MOVQ $0, AX
+ CLD
+ REP
+ STOSL
+ MOVQ BX, CX
+ REP
+ STOSB
+ // Note: we zero only 4 bytes at a time so that the tail is at most
+ // 3 bytes. That guarantees that we aren't zeroing pointers with STOSB.
+ // See issue 13160.
+ RET
diff --git a/src/runtime/memclr_arm64.s b/src/runtime/memclr_arm64.s
index 9d756bc..bf954e0 100644
--- a/src/runtime/memclr_arm64.s
+++ b/src/runtime/memclr_arm64.s
@@ -6,32 +6,54 @@
// void runtime·memclrNoHeapPointers(void*, uintptr)
TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT,$0-16
- MOVD ptr+0(FP), R3
- MOVD n+8(FP), R4
- // TODO(mwhudson): this is written this way to avoid tickling
- // warnings from addpool when written as AND $7, R4, R6 (see
- // https://golang.org/issue/12708)
- AND $~7, R4, R5 // R5 is N&~7
- SUB R5, R4, R6 // R6 is N&7
+ MOVD ptr+0(FP), R0
+ MOVD n+8(FP), R1
+ // If size is less than 16 bytes, use tail_zero to zero what remains
+ CMP $16, R1
+ BLT tail_zero
+ // Get buffer offset into 16 byte aligned address for better performance
+ ANDS $15, R0, ZR
+ BNE unaligned_to_16
+aligned_to_16:
+ LSR $4, R1, R2
+zero_by_16:
+ STP.P (ZR, ZR), 16(R0)
+ SUBS $1, R2, R2
+ BNE zero_by_16
- CMP $0, R5
- BEQ nowords
+ ANDS $15, R1, R1
+ BEQ ending
- ADD R3, R5, R5
+ // Zero buffer with size=R1 < 16
+tail_zero:
+ TBZ $3, R1, tail_zero_4
+ MOVD.P ZR, 8(R0)
-wordloop: // TODO: Optimize for unaligned ptr.
- MOVD.P $0, 8(R3)
- CMP R3, R5
- BNE wordloop
-nowords:
- CMP $0, R6
- BEQ done
+tail_zero_4:
+ TBZ $2, R1, tail_zero_2
+ MOVW.P ZR, 4(R0)
- ADD R3, R6, R6
+tail_zero_2:
+ TBZ $1, R1, tail_zero_1
+ MOVH.P ZR, 2(R0)
-byteloop:
- MOVBU.P $0, 1(R3)
- CMP R3, R6
- BNE byteloop
-done:
+tail_zero_1:
+ TBZ $0, R1, ending
+ MOVB ZR, (R0)
+
+ending:
RET
+
+unaligned_to_16:
+ MOVD R0, R2
+head_loop:
+ MOVBU.P ZR, 1(R0)
+ ANDS $15, R0, ZR
+ BNE head_loop
+ // Adjust length for what remains
+ SUB R2, R0, R3
+ SUB R3, R1
+ // If size is less than 16 bytes, use tail_zero to zero what remains
+ CMP $16, R1
+ BLT tail_zero
+ B aligned_to_16
diff --git a/src/runtime/memmove_nacl_amd64p32.s b/src/runtime/memmove_amd64p32.s
similarity index 100%
rename from src/runtime/memmove_nacl_amd64p32.s
rename to src/runtime/memmove_amd64p32.s
diff --git a/src/runtime/memmove_test.go b/src/runtime/memmove_test.go
index 74b8753..62de604 100644
--- a/src/runtime/memmove_test.go
+++ b/src/runtime/memmove_test.go
@@ -9,6 +9,7 @@
"encoding/binary"
"fmt"
"internal/race"
+ "internal/testenv"
. "runtime"
"testing"
)
@@ -88,6 +89,10 @@
}
func TestMemmoveLarge0x180000(t *testing.T) {
+ if testing.Short() && testenv.Builder() == "" {
+ t.Skip("-short")
+ }
+
t.Parallel()
if race.Enabled {
t.Skip("skipping large memmove test under race detector")
@@ -96,6 +101,10 @@
}
func TestMemmoveOverlapLarge0x120000(t *testing.T) {
+ if testing.Short() && testenv.Builder() == "" {
+ t.Skip("-short")
+ }
+
t.Parallel()
if race.Enabled {
t.Skip("skipping large memmove test under race detector")
diff --git a/src/runtime/mfinal.go b/src/runtime/mfinal.go
index a8729b1..c11a6f1 100644
--- a/src/runtime/mfinal.go
+++ b/src/runtime/mfinal.go
@@ -461,11 +461,7 @@
return
}
-// Mark KeepAlive as noinline so that the current compiler will ensure
-// that the argument is alive at the point of the function call.
-// If it were inlined, it would disappear, and there would be nothing
-// keeping the argument alive. Perhaps a future compiler will recognize
-// runtime.KeepAlive specially and do something more efficient.
+// Mark KeepAlive as noinline so that it is easily detectable as an intrinsic.
//go:noinline
// KeepAlive marks its argument as currently reachable.
@@ -487,4 +483,11 @@
// Without the KeepAlive call, the finalizer could run at the start of
// syscall.Read, closing the file descriptor before syscall.Read makes
// the actual system call.
-func KeepAlive(interface{}) {}
+func KeepAlive(x interface{}) {
+ // Introduce a use of x that the compiler can't eliminate.
+ // This makes sure x is alive on entry. We need x to be alive
+ // on entry for "defer runtime.KeepAlive(x)"; see issue 21402.
+ if cgoAlwaysFalse {
+ println(x)
+ }
+}
diff --git a/src/runtime/mfinal_test.go b/src/runtime/mfinal_test.go
index e9e3601..3ca8d31 100644
--- a/src/runtime/mfinal_test.go
+++ b/src/runtime/mfinal_test.go
@@ -241,3 +241,24 @@
Foo2 = &Object2{}
Foo1 = &Object1{}
)
+
+func TestDeferKeepAlive(t *testing.T) {
+ if *flagQuick {
+ t.Skip("-quick")
+ }
+
+ // See issue 21402.
+ t.Parallel()
+ type T *int // needs to be a pointer base type to avoid tinyalloc and its never-finalized behavior.
+ x := new(T)
+ finRun := false
+ runtime.SetFinalizer(x, func(x *T) {
+ finRun = true
+ })
+ defer runtime.KeepAlive(x)
+ runtime.GC()
+ time.Sleep(time.Second)
+ if finRun {
+ t.Errorf("finalizer ran prematurely")
+ }
+}
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 111fa78..ab90c28 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -230,6 +230,24 @@
// Update pacing in response to gcpercent change.
gcSetTriggerRatio(memstats.triggerRatio)
unlock(&mheap_.lock)
+
+ // If we just disabled GC, wait for any concurrent GC to
+ // finish so we always return with no GC running.
+ if in < 0 {
+ // Disable phase transitions.
+ lock(&work.sweepWaiters.lock)
+ if gcphase == _GCmark {
+ // GC is active. Wait until we reach sweeping.
+ gp := getg()
+ gp.schedlink = work.sweepWaiters.head
+ work.sweepWaiters.head.set(gp)
+ goparkunlock(&work.sweepWaiters.lock, "wait for GC cycle", traceEvGoBlock, 1)
+ } else {
+ // GC isn't active.
+ unlock(&work.sweepWaiters.lock)
+ }
+ }
+
return out
}
@@ -299,10 +317,10 @@
// gcMarkWorkerFractionalMode indicates that a P is currently
// running the "fractional" mark worker. The fractional worker
- // is necessary when GOMAXPROCS*gcGoalUtilization is not an
- // integer. The fractional worker should run until it is
+ // is necessary when GOMAXPROCS*gcBackgroundUtilization is not
+ // an integer. The fractional worker should run until it is
// preempted and will be scheduled to pick up the fractional
- // part of GOMAXPROCS*gcGoalUtilization.
+ // part of GOMAXPROCS*gcBackgroundUtilization.
gcMarkWorkerFractionalMode
// gcMarkWorkerIdleMode indicates that a P is running the mark
@@ -396,23 +414,18 @@
assistBytesPerWork float64
// fractionalUtilizationGoal is the fraction of wall clock
- // time that should be spent in the fractional mark worker.
- // For example, if the overall mark utilization goal is 25%
- // and GOMAXPROCS is 6, one P will be a dedicated mark worker
- // and this will be set to 0.5 so that 50% of the time some P
- // is in a fractional mark worker. This is computed at the
- // beginning of each cycle.
+ // time that should be spent in the fractional mark worker on
+ // each P that isn't running a dedicated worker.
+ //
+ // For example, if the utilization goal is 25% and there are
+ // no dedicated workers, this will be 0.25. If there goal is
+ // 25%, there is one dedicated worker, and GOMAXPROCS is 5,
+ // this will be 0.05 to make up the missing 5%.
+ //
+ // If this is zero, no fractional workers are needed.
fractionalUtilizationGoal float64
_ [sys.CacheLineSize]byte
-
- // fractionalMarkWorkersNeeded is the number of fractional
- // mark workers that need to be started. This is either 0 or
- // 1. This is potentially updated atomically at every
- // scheduling point (hence it gets its own cache line).
- fractionalMarkWorkersNeeded int64
-
- _ [sys.CacheLineSize]byte
}
// startCycle resets the GC controller's state and computes estimates
@@ -453,23 +466,33 @@
memstats.next_gc = memstats.heap_live + 1024*1024
}
- // Compute the total mark utilization goal and divide it among
- // dedicated and fractional workers.
- totalUtilizationGoal := float64(gomaxprocs) * gcGoalUtilization
- c.dedicatedMarkWorkersNeeded = int64(totalUtilizationGoal)
- c.fractionalUtilizationGoal = totalUtilizationGoal - float64(c.dedicatedMarkWorkersNeeded)
- if c.fractionalUtilizationGoal > 0 {
- c.fractionalMarkWorkersNeeded = 1
+ // Compute the background mark utilization goal. In general,
+ // this may not come out exactly. We round the number of
+ // dedicated workers so that the utilization is closest to
+ // 25%. For small GOMAXPROCS, this would introduce too much
+ // error, so we add fractional workers in that case.
+ totalUtilizationGoal := float64(gomaxprocs) * gcBackgroundUtilization
+ c.dedicatedMarkWorkersNeeded = int64(totalUtilizationGoal + 0.5)
+ utilError := float64(c.dedicatedMarkWorkersNeeded)/totalUtilizationGoal - 1
+ const maxUtilError = 0.3
+ if utilError < -maxUtilError || utilError > maxUtilError {
+ // Rounding put us more than 30% off our goal. With
+ // gcBackgroundUtilization of 25%, this happens for
+ // GOMAXPROCS<=3 or GOMAXPROCS=6. Enable fractional
+ // workers to compensate.
+ if float64(c.dedicatedMarkWorkersNeeded) > totalUtilizationGoal {
+ // Too many dedicated workers.
+ c.dedicatedMarkWorkersNeeded--
+ }
+ c.fractionalUtilizationGoal = (totalUtilizationGoal - float64(c.dedicatedMarkWorkersNeeded)) / float64(gomaxprocs)
} else {
- c.fractionalMarkWorkersNeeded = 0
+ c.fractionalUtilizationGoal = 0
}
// Clear per-P state
- for _, p := range &allp {
- if p == nil {
- break
- }
+ for _, p := range allp {
p.gcAssistTime = 0
+ p.gcFractionalMarkTime = 0
}
// Compute initial values for controls that are updated
@@ -482,7 +505,7 @@
work.initialHeapLive>>20, "->",
memstats.next_gc>>20, " MB)",
" workers=", c.dedicatedMarkWorkersNeeded,
- "+", c.fractionalMarkWorkersNeeded, "\n")
+ "+", c.fractionalUtilizationGoal, "\n")
}
}
@@ -495,47 +518,73 @@
// is when assists are enabled and the necessary statistics are
// available).
func (c *gcControllerState) revise() {
- // Compute the expected scan work remaining.
+ gcpercent := gcpercent
+ if gcpercent < 0 {
+ // If GC is disabled but we're running a forced GC,
+ // act like GOGC is huge for the below calculations.
+ gcpercent = 100000
+ }
+ live := atomic.Load64(&memstats.heap_live)
+
+ var heapGoal, scanWorkExpected int64
+ if live <= memstats.next_gc {
+ // We're under the soft goal. Pace GC to complete at
+ // next_gc assuming the heap is in steady-state.
+ heapGoal = int64(memstats.next_gc)
+
+ // Compute the expected scan work remaining.
+ //
+ // This is estimated based on the expected
+ // steady-state scannable heap. For example, with
+ // GOGC=100, only half of the scannable heap is
+ // expected to be live, so that's what we target.
+ //
+ // (This is a float calculation to avoid overflowing on
+ // 100*heap_scan.)
+ scanWorkExpected = int64(float64(memstats.heap_scan) * 100 / float64(100+gcpercent))
+ } else {
+ // We're past the soft goal. Pace GC so that in the
+ // worst case it will complete by the hard goal.
+ const maxOvershoot = 1.1
+ heapGoal = int64(float64(memstats.next_gc) * maxOvershoot)
+
+ // Compute the upper bound on the scan work remaining.
+ scanWorkExpected = int64(memstats.heap_scan)
+ }
+
+ // Compute the remaining scan work estimate.
//
// Note that we currently count allocations during GC as both
// scannable heap (heap_scan) and scan work completed
- // (scanWork), so this difference won't be changed by
- // allocations during GC.
- //
- // This particular estimate is a strict upper bound on the
- // possible remaining scan work for the current heap.
- // You might consider dividing this by 2 (or by
- // (100+GOGC)/100) to counter this over-estimation, but
- // benchmarks show that this has almost no effect on mean
- // mutator utilization, heap size, or assist time and it
- // introduces the danger of under-estimating and letting the
- // mutator outpace the garbage collector.
- scanWorkExpected := int64(memstats.heap_scan) - c.scanWork
- if scanWorkExpected < 1000 {
+ // (scanWork), so allocation will change this difference will
+ // slowly in the soft regime and not at all in the hard
+ // regime.
+ scanWorkRemaining := scanWorkExpected - c.scanWork
+ if scanWorkRemaining < 1000 {
// We set a somewhat arbitrary lower bound on
// remaining scan work since if we aim a little high,
// we can miss by a little.
//
// We *do* need to enforce that this is at least 1,
// since marking is racy and double-scanning objects
- // may legitimately make the expected scan work
- // negative.
- scanWorkExpected = 1000
+ // may legitimately make the remaining scan work
+ // negative, even in the hard goal regime.
+ scanWorkRemaining = 1000
}
// Compute the heap distance remaining.
- heapDistance := int64(memstats.next_gc) - int64(atomic.Load64(&memstats.heap_live))
- if heapDistance <= 0 {
+ heapRemaining := heapGoal - int64(live)
+ if heapRemaining <= 0 {
// This shouldn't happen, but if it does, avoid
// dividing by zero or setting the assist negative.
- heapDistance = 1
+ heapRemaining = 1
}
// Compute the mutator assist ratio so by the time the mutator
// allocates the remaining heap bytes up to next_gc, it will
// have done (or stolen) the remaining amount of scan work.
- c.assistWorkPerByte = float64(scanWorkExpected) / float64(heapDistance)
- c.assistBytesPerWork = float64(heapDistance) / float64(scanWorkExpected)
+ c.assistWorkPerByte = float64(scanWorkRemaining) / float64(heapRemaining)
+ c.assistBytesPerWork = float64(heapRemaining) / float64(scanWorkRemaining)
}
// endCycle computes the trigger ratio for the next cycle.
@@ -569,7 +618,7 @@
assistDuration := nanotime() - c.markStartTime
// Assume background mark hit its utilization goal.
- utilization := gcGoalUtilization
+ utilization := gcBackgroundUtilization
// Add assist utilization; avoid divide by zero.
if assistDuration > 0 {
utilization += float64(c.assistTime) / float64(assistDuration*int64(gomaxprocs))
@@ -688,51 +737,20 @@
// This P is now dedicated to marking until the end of
// the concurrent mark phase.
_p_.gcMarkWorkerMode = gcMarkWorkerDedicatedMode
+ } else if c.fractionalUtilizationGoal == 0 {
+ // No need for fractional workers.
+ return nil
} else {
- if !decIfPositive(&c.fractionalMarkWorkersNeeded) {
- // No more workers are need right now.
+ // Is this P behind on the fractional utilization
+ // goal?
+ //
+ // This should be kept in sync with pollFractionalWorkerExit.
+ delta := nanotime() - gcController.markStartTime
+ if delta > 0 && float64(_p_.gcFractionalMarkTime)/float64(delta) > c.fractionalUtilizationGoal {
+ // Nope. No need to run a fractional worker.
return nil
}
-
- // This P has picked the token for the fractional worker.
- // Is the GC currently under or at the utilization goal?
- // If so, do more work.
- //
- // We used to check whether doing one time slice of work
- // would remain under the utilization goal, but that has the
- // effect of delaying work until the mutator has run for
- // enough time slices to pay for the work. During those time
- // slices, write barriers are enabled, so the mutator is running slower.
- // Now instead we do the work whenever we're under or at the
- // utilization work and pay for it by letting the mutator run later.
- // This doesn't change the overall utilization averages, but it
- // front loads the GC work so that the GC finishes earlier and
- // write barriers can be turned off sooner, effectively giving
- // the mutator a faster machine.
- //
- // The old, slower behavior can be restored by setting
- // gcForcePreemptNS = forcePreemptNS.
- const gcForcePreemptNS = 0
-
- // TODO(austin): We could fast path this and basically
- // eliminate contention on c.fractionalMarkWorkersNeeded by
- // precomputing the minimum time at which it's worth
- // next scheduling the fractional worker. Then Ps
- // don't have to fight in the window where we've
- // passed that deadline and no one has started the
- // worker yet.
- //
- // TODO(austin): Shorter preemption interval for mark
- // worker to improve fairness and give this
- // finer-grained control over schedule?
- now := nanotime() - gcController.markStartTime
- then := now + gcForcePreemptNS
- timeUsed := c.fractionalMarkTime + gcForcePreemptNS
- if then > 0 && float64(timeUsed)/float64(then) > c.fractionalUtilizationGoal {
- // Nope, we'd overshoot the utilization goal
- atomic.Xaddint64(&c.fractionalMarkWorkersNeeded, +1)
- return nil
- }
+ // Run a fractional worker.
_p_.gcMarkWorkerMode = gcMarkWorkerFractionalMode
}
@@ -745,6 +763,24 @@
return gp
}
+// pollFractionalWorkerExit returns true if a fractional mark worker
+// should self-preempt. It assumes it is called from the fractional
+// worker.
+func pollFractionalWorkerExit() bool {
+ // This should be kept in sync with the fractional worker
+ // scheduler logic in findRunnableGCWorker.
+ now := nanotime()
+ delta := now - gcController.markStartTime
+ if delta <= 0 {
+ return true
+ }
+ p := getg().m.p.ptr()
+ selfTime := p.gcFractionalMarkTime + (now - p.gcMarkWorkerStartTime)
+ // Add some slack to the utilization goal so that the
+ // fractional worker isn't behind again the instant it exits.
+ return float64(selfTime)/float64(delta) > 1.2*gcController.fractionalUtilizationGoal
+}
+
// gcSetTriggerRatio sets the trigger ratio and updates everything
// derived from it: the absolute trigger, the heap goal, mark pacing,
// and sweep pacing.
@@ -859,9 +895,22 @@
}
}
-// gcGoalUtilization is the goal CPU utilization for background
+// gcGoalUtilization is the goal CPU utilization for
// marking as a fraction of GOMAXPROCS.
-const gcGoalUtilization = 0.25
+const gcGoalUtilization = 0.30
+
+// gcBackgroundUtilization is the fixed CPU utilization for background
+// marking. It must be <= gcGoalUtilization. The difference between
+// gcGoalUtilization and gcBackgroundUtilization will be made up by
+// mark assists. The scheduler will aim to use within 50% of this
+// goal.
+//
+// Setting this to < gcGoalUtilization avoids saturating the trigger
+// feedback controller when there are no assists, which allows it to
+// better control CPU and heap growth. However, the larger the gap,
+// the more mutator assists are expected to happen, which impact
+// mutator latency.
+const gcBackgroundUtilization = 0.25
// gcCreditSlack is the amount of scan work credit that can can
// accumulate locally before updating gcController.scanWork and,
@@ -1158,7 +1207,7 @@
if t.kind == gcTriggerAlways {
return true
}
- if gcphase != _GCoff || gcpercent < 0 {
+ if gcphase != _GCoff {
return false
}
switch t.kind {
@@ -1169,6 +1218,9 @@
// own write.
return memstats.heap_live >= memstats.gc_trigger
case gcTriggerTime:
+ if gcpercent < 0 {
+ return false
+ }
lastgc := int64(atomic.Load64(&memstats.last_gc_nanotime))
return lastgc != 0 && t.now-lastgc > forcegcperiod
case gcTriggerCycle:
@@ -1235,7 +1287,7 @@
}
}
- // Ok, we're doing it! Stop everybody else
+ // Ok, we're doing it! Stop everybody else
semacquire(&worldsema)
if trace.enabled {
@@ -1248,7 +1300,12 @@
gcResetMarkState()
- work.stwprocs, work.maxprocs = gcprocs(), gomaxprocs
+ work.stwprocs, work.maxprocs = gomaxprocs, gomaxprocs
+ if work.stwprocs > ncpu {
+ // This is used to compute CPU time of the STW phases,
+ // so it can't be more than ncpu, even if GOMAXPROCS is.
+ work.stwprocs = ncpu
+ }
work.heap0 = atomic.Load64(&memstats.heap_live)
work.pauseNS = 0
work.mode = mode
@@ -1256,6 +1313,9 @@
now := nanotime()
work.tSweepTerm = now
work.pauseStart = now
+ if trace.enabled {
+ traceGCSTWStart(1)
+ }
systemstack(stopTheWorldWithSema)
// Finish sweep before we start concurrent scan.
systemstack(func() {
@@ -1308,11 +1368,17 @@
gcController.markStartTime = now
// Concurrent mark.
- systemstack(startTheWorldWithSema)
- now = nanotime()
+ systemstack(func() {
+ now = startTheWorldWithSema(trace.enabled)
+ })
work.pauseNS += now - work.pauseStart
work.tMark = now
} else {
+ if trace.enabled {
+ // Switch to mark termination STW.
+ traceGCSTWDone()
+ traceGCSTWStart(0)
+ }
t := nanotime()
work.tMark, work.tMarkTerm = t, t
work.heapGoal = work.heap0
@@ -1355,7 +1421,8 @@
// TODO(austin): Should dedicated workers keep an eye on this
// and exit gcDrain promptly?
atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, -0xffffffff)
- atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, -0xffffffff)
+ prevFractionalGoal := gcController.fractionalUtilizationGoal
+ gcController.fractionalUtilizationGoal = 0
if !gcBlackenPromptly {
// Transition from mark 1 to mark 2.
@@ -1382,6 +1449,7 @@
// workers have exited their loop so we can
// start new mark 2 workers.
forEachP(func(_p_ *p) {
+ wbBufFlush1(_p_)
_p_.gcw.dispose()
})
})
@@ -1398,7 +1466,7 @@
// Now we can start up mark 2 workers.
atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 0xffffffff)
- atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, 0xffffffff)
+ gcController.fractionalUtilizationGoal = prevFractionalGoal
incnwait := atomic.Xadd(&work.nwait, +1)
if incnwait == work.nproc && !gcMarkWorkAvailable(nil) {
@@ -1413,6 +1481,9 @@
work.tMarkTerm = now
work.pauseStart = now
getg().m.preemptoff = "gcing"
+ if trace.enabled {
+ traceGCSTWStart(0)
+ }
systemstack(stopTheWorldWithSema)
// The gcphase is _GCmark, it will transition to _GCmarktermination
// below. The important thing is that the wb remains active until
@@ -1573,7 +1644,7 @@
// so events don't leak into the wrong cycle.
mProf_NextCycle()
- systemstack(startTheWorldWithSema)
+ systemstack(func() { startTheWorldWithSema(true) })
// Flush the heap profile so we can start a new cycle next GC.
// This is relatively expensive, so we don't do it with the
@@ -1647,10 +1718,7 @@
func gcBgMarkStartWorkers() {
// Background marking is performed by per-P G's. Ensure that
// each P has a background GC G.
- for _, p := range &allp {
- if p == nil || p.status == _Pdead {
- break
- }
+ for _, p := range allp {
if p.gcBgMarkWorker == 0 {
go gcBgMarkWorker(p)
notetsleepg(&work.bgMarkReady, -1)
@@ -1750,6 +1818,7 @@
}
startTime := nanotime()
+ _p_.gcMarkWorkerStartTime = startTime
decnwait := atomic.Xadd(&work.nwait, -1)
if decnwait == work.nproc {
@@ -1791,7 +1860,7 @@
// without preemption.
gcDrain(&_p_.gcw, gcDrainNoBlock|gcDrainFlushBgCredit)
case gcMarkWorkerFractionalMode:
- gcDrain(&_p_.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit)
+ gcDrain(&_p_.gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit)
case gcMarkWorkerIdleMode:
gcDrain(&_p_.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit)
}
@@ -1816,7 +1885,7 @@
atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 1)
case gcMarkWorkerFractionalMode:
atomic.Xaddint64(&gcController.fractionalMarkTime, duration)
- atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, 1)
+ atomic.Xaddint64(&_p_.gcFractionalMarkTime, duration)
case gcMarkWorkerIdleMode:
atomic.Xaddint64(&gcController.idleMarkTime, duration)
}
@@ -1914,10 +1983,6 @@
work.helperDrainBlock = true
}
- if trace.enabled {
- traceGCScanStart()
- }
-
if work.nproc > 1 {
noteclear(&work.alldone)
helpgc(int32(work.nproc))
@@ -1951,8 +2016,8 @@
// Double-check that all gcWork caches are empty. This should
// be ensured by mark 2 before we enter mark termination.
- for i := 0; i < int(gomaxprocs); i++ {
- gcw := &allp[i].gcw
+ for _, p := range allp {
+ gcw := &p.gcw
if !gcw.empty() {
throw("P has cached GC work at end of mark termination")
}
@@ -1961,10 +2026,6 @@
}
}
- if trace.enabled {
- traceGCScanDone()
- }
-
cachestats()
// Update the marked heap stat.
@@ -2094,18 +2155,19 @@
unlock(&sched.deferlock)
}
-// Timing
-
-//go:nowritebarrier
+// gchelper runs mark termination tasks on Ps other than the P
+// coordinating mark termination.
+//
+// The caller is responsible for ensuring that this has a P to run on,
+// even though it's running during STW. Because of this, it's allowed
+// to have write barriers.
+//
+//go:yeswritebarrierrec
func gchelper() {
_g_ := getg()
_g_.m.traceback = 2
gchelperstart()
- if trace.enabled {
- traceGCScanStart()
- }
-
// Parallel mark over GC roots and heap
if gcphase == _GCmarktermination {
gcw := &_g_.m.p.ptr().gcw
@@ -2117,10 +2179,6 @@
gcw.dispose()
}
- if trace.enabled {
- traceGCScanDone()
- }
-
nproc := atomic.Load(&work.nproc) // work.nproc can change right after we increment work.ndone
if atomic.Xadd(&work.ndone, +1) == nproc-1 {
notewakeup(&work.alldone)
@@ -2139,6 +2197,8 @@
}
}
+// Timing
+
// itoaDiv formats val/(10**dec) into buf.
func itoaDiv(buf []byte, val uint64, dec int) []byte {
i := len(buf) - 1
diff --git a/src/runtime/mgclarge.go b/src/runtime/mgclarge.go
index 757e88d..fe437bf 100644
--- a/src/runtime/mgclarge.go
+++ b/src/runtime/mgclarge.go
@@ -164,11 +164,10 @@
}
}
-func (root *mTreap) removeNode(t *treapNode) *mspan {
+func (root *mTreap) removeNode(t *treapNode) {
if t.spanKey.npages != t.npagesKey {
throw("span and treap node npages do not match")
}
- result := t.spanKey
// Rotate t down to be leaf of tree for removal, respecting priorities.
for t.right != nil || t.left != nil {
@@ -192,7 +191,6 @@
t.spanKey = nil
t.npagesKey = 0
mheap_.treapalloc.free(unsafe.Pointer(t))
- return result
}
// remove searches for, finds, removes from the treap, and returns the smallest
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index 9029d19..5664390 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -34,13 +34,13 @@
// span base.
maxObletBytes = 128 << 10
- // idleCheckThreshold specifies how many units of work to do
- // between run queue checks in an idle worker. Assuming a scan
+ // drainCheckThreshold specifies how many units of work to do
+ // between self-preemption checks in gcDrain. Assuming a scan
// rate of 1 MB/ms, this is ~100 µs. Lower values have higher
// overhead in the scan loop (the scheduler check may perform
// a syscall, so its overhead is nontrivial). Higher values
// make the system less responsive to incoming work.
- idleCheckThreshold = 100000
+ drainCheckThreshold = 100000
)
// gcMarkRootPrepare queues root scanning jobs (stacks, globals, and
@@ -770,6 +770,13 @@
shrinkstack(gp)
}
+ // Scan the saved context register. This is effectively a live
+ // register that gets moved back and forth between the
+ // register and sched.ctxt without a write barrier.
+ if gp.sched.ctxt != nil {
+ scanblock(uintptr(unsafe.Pointer(&gp.sched.ctxt)), sys.PtrSize, &oneptrmask[0], gcw)
+ }
+
// Scan the stack.
var cache pcvalueCache
scanframe := func(frame *stkframe, unused unsafe.Pointer) bool {
@@ -861,6 +868,7 @@
gcDrainNoBlock
gcDrainFlushBgCredit
gcDrainIdle
+ gcDrainFractional
// gcDrainBlock means neither gcDrainUntilPreempt or
// gcDrainNoBlock. It is the default, but callers should use
@@ -877,6 +885,10 @@
// If flags&gcDrainIdle != 0, gcDrain returns when there is other work
// to do. This implies gcDrainNoBlock.
//
+// If flags&gcDrainFractional != 0, gcDrain self-preempts when
+// pollFractionalWorkerExit() returns true. This implies
+// gcDrainNoBlock.
+//
// If flags&gcDrainNoBlock != 0, gcDrain returns as soon as it is
// unable to get more work. Otherwise, it will block until all
// blocking calls are blocked in gcDrain.
@@ -893,14 +905,24 @@
gp := getg().m.curg
preemptible := flags&gcDrainUntilPreempt != 0
- blocking := flags&(gcDrainUntilPreempt|gcDrainIdle|gcDrainNoBlock) == 0
+ blocking := flags&(gcDrainUntilPreempt|gcDrainIdle|gcDrainFractional|gcDrainNoBlock) == 0
flushBgCredit := flags&gcDrainFlushBgCredit != 0
idle := flags&gcDrainIdle != 0
initScanWork := gcw.scanWork
- // idleCheck is the scan work at which to perform the next
- // idle check with the scheduler.
- idleCheck := initScanWork + idleCheckThreshold
+
+ // checkWork is the scan work before performing the next
+ // self-preempt check.
+ checkWork := int64(1<<63 - 1)
+ var check func() bool
+ if flags&(gcDrainIdle|gcDrainFractional) != 0 {
+ checkWork = initScanWork + drainCheckThreshold
+ if idle {
+ check = pollWork
+ } else if flags&gcDrainFractional != 0 {
+ check = pollFractionalWorkerExit
+ }
+ }
// Drain root marking jobs.
if work.markrootNext < work.markrootJobs {
@@ -910,7 +932,7 @@
break
}
markroot(gcw, job)
- if idle && pollWork() {
+ if check != nil && check() {
goto done
}
}
@@ -951,12 +973,12 @@
gcFlushBgCredit(gcw.scanWork - initScanWork)
initScanWork = 0
}
- idleCheck -= gcw.scanWork
+ checkWork -= gcw.scanWork
gcw.scanWork = 0
- if idle && idleCheck <= 0 {
- idleCheck += idleCheckThreshold
- if pollWork() {
+ if checkWork <= 0 {
+ checkWork += drainCheckThreshold
+ if check != nil && check() {
break
}
}
@@ -1212,6 +1234,9 @@
// obj is the start of an object with mark mbits.
// If it isn't already marked, mark it and enqueue into gcw.
// base and off are for debugging only and could be removed.
+//
+// See also wbBufFlush1, which partially duplicates this logic.
+//
//go:nowritebarrierrec
func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork, objIndex uintptr) {
// obj should be start of allocation, and so must be at least pointer-aligned.
@@ -1356,10 +1381,7 @@
//
// The world must be stopped.
func gcMarkTinyAllocs() {
- for _, p := range &allp {
- if p == nil || p.status == _Pdead {
- break
- }
+ for _, p := range allp {
c := p.mcache
if c == nil || c.tiny == 0 {
continue
diff --git a/src/runtime/mgcwork.go b/src/runtime/mgcwork.go
index 461679b..c6634fc 100644
--- a/src/runtime/mgcwork.go
+++ b/src/runtime/mgcwork.go
@@ -85,6 +85,13 @@
scanWork int64
}
+// Most of the methods of gcWork are go:nowritebarrierrec because the
+// write barrier itself can invoke gcWork methods but the methods are
+// not generally re-entrant. Hence, if a gcWork method invoked the
+// write barrier while the gcWork was in an inconsistent state, and
+// the write barrier in turn invoked a gcWork method, it could
+// permanently corrupt the gcWork.
+
func (w *gcWork) init() {
w.wbuf1 = getempty()
wbuf2 := trygetfull()
@@ -96,7 +103,7 @@
// put enqueues a pointer for the garbage collector to trace.
// obj must point to the beginning of a heap object or an oblet.
-//go:nowritebarrier
+//go:nowritebarrierrec
func (w *gcWork) put(obj uintptr) {
flushed := false
wbuf := w.wbuf1
@@ -129,7 +136,7 @@
// putFast does a put and returns true if it can be done quickly
// otherwise it returns false and the caller needs to call put.
-//go:nowritebarrier
+//go:nowritebarrierrec
func (w *gcWork) putFast(obj uintptr) bool {
wbuf := w.wbuf1
if wbuf == nil {
@@ -143,12 +150,45 @@
return true
}
+// putBatch performs a put on every pointer in obj. See put for
+// constraints on these pointers.
+//
+//go:nowritebarrierrec
+func (w *gcWork) putBatch(obj []uintptr) {
+ if len(obj) == 0 {
+ return
+ }
+
+ flushed := false
+ wbuf := w.wbuf1
+ if wbuf == nil {
+ w.init()
+ wbuf = w.wbuf1
+ }
+
+ for len(obj) > 0 {
+ for wbuf.nobj == len(wbuf.obj) {
+ putfull(wbuf)
+ w.wbuf1, w.wbuf2 = w.wbuf2, getempty()
+ wbuf = w.wbuf1
+ flushed = true
+ }
+ n := copy(wbuf.obj[wbuf.nobj:], obj)
+ wbuf.nobj += n
+ obj = obj[n:]
+ }
+
+ if flushed && gcphase == _GCmark {
+ gcController.enlistWorker()
+ }
+}
+
// tryGet dequeues a pointer for the garbage collector to trace.
//
// If there are no pointers remaining in this gcWork or in the global
// queue, tryGet returns 0. Note that there may still be pointers in
// other gcWork instances or other caches.
-//go:nowritebarrier
+//go:nowritebarrierrec
func (w *gcWork) tryGet() uintptr {
wbuf := w.wbuf1
if wbuf == nil {
@@ -177,7 +217,7 @@
// tryGetFast dequeues a pointer for the garbage collector to trace
// if one is readily available. Otherwise it returns 0 and
// the caller is expected to call tryGet().
-//go:nowritebarrier
+//go:nowritebarrierrec
func (w *gcWork) tryGetFast() uintptr {
wbuf := w.wbuf1
if wbuf == nil {
@@ -194,7 +234,7 @@
// get dequeues a pointer for the garbage collector to trace, blocking
// if necessary to ensure all pointers from all queues and caches have
// been retrieved. get returns 0 if there are no pointers remaining.
-//go:nowritebarrier
+//go:nowritebarrierrec
func (w *gcWork) get() uintptr {
wbuf := w.wbuf1
if wbuf == nil {
@@ -228,7 +268,7 @@
// GC can inspect them. This helps reduce the mutator's
// ability to hide pointers during the concurrent mark phase.
//
-//go:nowritebarrier
+//go:nowritebarrierrec
func (w *gcWork) dispose() {
if wbuf := w.wbuf1; wbuf != nil {
if wbuf.nobj == 0 {
@@ -262,7 +302,7 @@
// balance moves some work that's cached in this gcWork back on the
// global queue.
-//go:nowritebarrier
+//go:nowritebarrierrec
func (w *gcWork) balance() {
if w.wbuf1 == nil {
return
@@ -282,7 +322,7 @@
}
// empty returns true if w has no mark work available.
-//go:nowritebarrier
+//go:nowritebarrierrec
func (w *gcWork) empty() bool {
return w.wbuf1 == nil || (w.wbuf1.nobj == 0 && w.wbuf2.nobj == 0)
}
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 893587e..12cf29a 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -56,6 +56,12 @@
// Internal pages map to an arbitrary span.
// For pages that have never been allocated, spans entries are nil.
//
+ // Modifications are protected by mheap.lock. Reads can be
+ // performed without locking, but ONLY from indexes that are
+ // known to contain in-use or stack spans. This means there
+ // must not be a safe-point between establishing that an
+ // address is live and looking it up in the spans array.
+ //
// This is backed by a reserved region of the address space so
// it can grow without moving. The memory up to len(spans) is
// mapped. cap(spans) indicates the total reserved memory.
@@ -154,6 +160,8 @@
specialfinalizeralloc fixalloc // allocator for specialfinalizer*
specialprofilealloc fixalloc // allocator for specialprofile*
speciallock mutex // lock for special record allocators.
+
+ unused *specialfinalizer // never set, just here to force the specialfinalizer type into DWARF
}
var mheap_ mheap
@@ -311,6 +319,17 @@
return
}
+// recordspan adds a newly allocated span to h.allspans.
+//
+// This only happens the first time a span is allocated from
+// mheap.spanalloc (it is not called when a span is reused).
+//
+// Write barriers are disallowed here because it can be called from
+// gcWork when allocating new workbufs. However, because it's an
+// indirect call from the fixalloc initializer, the compiler can't see
+// this.
+//
+//go:nowritebarrierrec
func recordspan(vh unsafe.Pointer, p unsafe.Pointer) {
h := (*mheap)(vh)
s := (*mspan)(p)
@@ -331,12 +350,13 @@
copy(new, h.allspans)
}
oldAllspans := h.allspans
- h.allspans = new
+ *(*notInHeapSlice)(unsafe.Pointer(&h.allspans)) = *(*notInHeapSlice)(unsafe.Pointer(&new))
if len(oldAllspans) != 0 {
sysFree(unsafe.Pointer(&oldAllspans[0]), uintptr(cap(oldAllspans))*unsafe.Sizeof(oldAllspans[0]), &memstats.other_sys)
}
}
- h.allspans = append(h.allspans, s)
+ h.allspans = h.allspans[:len(h.allspans)+1]
+ h.allspans[len(h.allspans)-1] = s
}
// A spanClass represents the size class and noscan-ness of a span.
@@ -857,7 +877,7 @@
// Large spans have a minimum size of 1MByte. The maximum number of large spans to support
// 1TBytes is 1 million, experimentation using random sizes indicates that the depth of
// the tree is less that 2x that of a perfectly balanced tree. For 1TByte can be referenced
-// by a perfectly balanced tree with a a depth of 20. Twice that is an acceptable 40.
+// by a perfectly balanced tree with a depth of 20. Twice that is an acceptable 40.
func (h *mheap) isLargeSpan(npages uintptr) bool {
return npages >= uintptr(len(h.free))
}
@@ -1123,34 +1143,35 @@
var sumreleased uintptr
for s := list.first; s != nil; s = s.next {
- if (now-uint64(s.unusedsince)) > limit && s.npreleased != s.npages {
- start := s.base()
- end := start + s.npages<<_PageShift
- if physPageSize > _PageSize {
- // We can only release pages in
- // physPageSize blocks, so round start
- // and end in. (Otherwise, madvise
- // will round them *out* and release
- // more memory than we want.)
- start = (start + physPageSize - 1) &^ (physPageSize - 1)
- end &^= physPageSize - 1
- if end <= start {
- // start and end don't span a
- // whole physical page.
- continue
- }
- }
- len := end - start
-
- released := len - (s.npreleased << _PageShift)
- if physPageSize > _PageSize && released == 0 {
+ if (now-uint64(s.unusedsince)) <= limit || s.npreleased == s.npages {
+ continue
+ }
+ start := s.base()
+ end := start + s.npages<<_PageShift
+ if physPageSize > _PageSize {
+ // We can only release pages in
+ // physPageSize blocks, so round start
+ // and end in. (Otherwise, madvise
+ // will round them *out* and release
+ // more memory than we want.)
+ start = (start + physPageSize - 1) &^ (physPageSize - 1)
+ end &^= physPageSize - 1
+ if end <= start {
+ // start and end don't span a
+ // whole physical page.
continue
}
- memstats.heap_released += uint64(released)
- sumreleased += released
- s.npreleased = len >> _PageShift
- sysUnused(unsafe.Pointer(start), len)
}
+ len := end - start
+
+ released := len - (s.npreleased << _PageShift)
+ if physPageSize > _PageSize && released == 0 {
+ continue
+ }
+ memstats.heap_released += uint64(released)
+ sumreleased += released
+ s.npreleased = len >> _PageShift
+ sysUnused(unsafe.Pointer(start), len)
}
return sumreleased
}
diff --git a/src/runtime/mkduff.go b/src/runtime/mkduff.go
index d15f1f7..fb7cbc2 100644
--- a/src/runtime/mkduff.go
+++ b/src/runtime/mkduff.go
@@ -70,7 +70,7 @@
fmt.Fprintln(w, "\tMOVUPS\tX0,16(DI)")
fmt.Fprintln(w, "\tMOVUPS\tX0,32(DI)")
fmt.Fprintln(w, "\tMOVUPS\tX0,48(DI)")
- fmt.Fprintln(w, "\tADDQ\t$64,DI")
+ fmt.Fprintln(w, "\tLEAQ\t64(DI),DI") // We use lea instead of add, to avoid clobbering flags
fmt.Fprintln(w)
}
fmt.Fprintln(w, "\tRET")
@@ -151,12 +151,13 @@
func zeroARM64(w io.Writer) {
// ZR: always zero
- // R16 (aka REGRT1): ptr to memory to be zeroed - 8
+ // R16 (aka REGRT1): ptr to memory to be zeroed
// On return, R16 points to the last zeroed dword.
fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $-8-0")
- for i := 0; i < 128; i++ {
- fmt.Fprintln(w, "\tMOVD.W\tZR, 8(R16)")
+ for i := 0; i < 63; i++ {
+ fmt.Fprintln(w, "\tSTP.P\t(ZR, ZR), 16(R16)")
}
+ fmt.Fprintln(w, "\tSTP\t(ZR, ZR), (R16)")
fmt.Fprintln(w, "\tRET")
}
diff --git a/src/runtime/mksizeclasses.go b/src/runtime/mksizeclasses.go
index 0cb2b33..b146dbc 100644
--- a/src/runtime/mksizeclasses.go
+++ b/src/runtime/mksizeclasses.go
@@ -24,8 +24,8 @@
// In practice, only one of the wastes comes into play for a
// given size (sizes < 512 waste mainly on the round-up,
// sizes > 512 waste mainly on the page chopping).
-//
-// TODO(rsc): Compute max waste for any given size.
+// For really small sizes, alignment constraints force the
+// overhead higher.
package main
@@ -242,15 +242,18 @@
}
func printComment(w io.Writer, classes []class) {
- fmt.Fprintf(w, "// %-5s %-9s %-10s %-7s %-11s\n", "class", "bytes/obj", "bytes/span", "objects", "waste bytes")
+ fmt.Fprintf(w, "// %-5s %-9s %-10s %-7s %-10s %-9s\n", "class", "bytes/obj", "bytes/span", "objects", "tail waste", "max waste")
+ prevSize := 0
for i, c := range classes {
if i == 0 {
continue
}
spanSize := c.npages * pageSize
objects := spanSize / c.size
- waste := spanSize - c.size*(spanSize/c.size)
- fmt.Fprintf(w, "// %5d %9d %10d %7d %11d\n", i, c.size, spanSize, objects, waste)
+ tailWaste := spanSize - c.size*(spanSize/c.size)
+ maxWaste := float64((c.size-prevSize-1)*objects+tailWaste) / float64(spanSize)
+ prevSize = c.size
+ fmt.Fprintf(w, "// %5d %9d %10d %7d %10d %8.2f%%\n", i, c.size, spanSize, objects, tailWaste, 100*maxWaste)
}
fmt.Fprintf(w, "\n")
}
diff --git a/src/runtime/mmap.go b/src/runtime/mmap.go
index 62f3780..e1333c6 100644
--- a/src/runtime/mmap.go
+++ b/src/runtime/mmap.go
@@ -16,7 +16,8 @@
// We only pass the lower 32 bits of file offset to the
// assembly routine; the higher bits (if required), should be provided
// by the assembly routine as 0.
-func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer
+// The err result is an OS error code such as ENOMEM.
+func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (p unsafe.Pointer, err int)
// munmap calls the munmap system call. It is implemented in assembly.
func munmap(addr unsafe.Pointer, n uintptr)
diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go
index 2bd09b6..259473c 100644
--- a/src/runtime/mprof.go
+++ b/src/runtime/mprof.go
@@ -596,7 +596,7 @@
r.AllocObjects = int64(mp.active.allocs)
r.FreeObjects = int64(mp.active.frees)
if raceenabled {
- racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(unsafe.Pointer(&r)), funcPC(MemProfile))
+ racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(), funcPC(MemProfile))
}
if msanenabled {
msanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0))
@@ -644,7 +644,7 @@
r.Count = bp.count
r.Cycles = bp.cycles
if raceenabled {
- racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(unsafe.Pointer(&p)), funcPC(BlockProfile))
+ racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(), funcPC(BlockProfile))
}
if msanenabled {
msanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0))
@@ -741,7 +741,7 @@
// Save current goroutine.
sp := getcallersp(unsafe.Pointer(&p))
- pc := getcallerpc(unsafe.Pointer(&p))
+ pc := getcallerpc()
systemstack(func() {
saveg(pc, sp, gp, &r[0])
})
@@ -786,7 +786,7 @@
if len(buf) > 0 {
gp := getg()
sp := getcallersp(unsafe.Pointer(&buf))
- pc := getcallerpc(unsafe.Pointer(&buf))
+ pc := getcallerpc()
systemstack(func() {
g0 := getg()
// Force traceback=1 to override GOTRACEBACK setting,
@@ -826,7 +826,7 @@
}
if gp.m.curg == nil || gp == gp.m.curg {
goroutineheader(gp)
- pc := getcallerpc(unsafe.Pointer(&p))
+ pc := getcallerpc()
sp := getcallersp(unsafe.Pointer(&p))
systemstack(func() {
traceback(pc, sp, 0, gp)
@@ -846,7 +846,7 @@
gp.m.traceback = 2
print("tracefree(", p, ", ", hex(size), ")\n")
goroutineheader(gp)
- pc := getcallerpc(unsafe.Pointer(&p))
+ pc := getcallerpc()
sp := getcallersp(unsafe.Pointer(&p))
systemstack(func() {
traceback(pc, sp, 0, gp)
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
index 1cb44a1..e6f2f00 100644
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -262,7 +262,7 @@
// can only be used for other objects of roughly the same
// size.
//
- // HeapInuse minus HeapAlloc esimates the amount of memory
+ // HeapInuse minus HeapAlloc estimates the amount of memory
// that has been dedicated to particular size classes, but is
// not currently being used. This is an upper bound on
// fragmentation, but in general this memory can be reused
@@ -589,12 +589,13 @@
memstats.heap_objects = memstats.nmalloc - memstats.nfree
}
+// cachestats flushes all mcache stats.
+//
+// The world must be stopped.
+//
//go:nowritebarrier
func cachestats() {
- for _, p := range &allp {
- if p == nil {
- break
- }
+ for _, p := range allp {
c := p.mcache
if c == nil {
continue
@@ -610,9 +611,6 @@
//go:nowritebarrier
func flushmcache(i int) {
p := allp[i]
- if p == nil {
- return
- }
c := p.mcache
if c == nil {
return
@@ -666,7 +664,7 @@
// overflow errors.
//go:nosplit
func mSysStatInc(sysStat *uint64, n uintptr) {
- if sys.BigEndian != 0 {
+ if sys.BigEndian {
atomic.Xadd64(sysStat, int64(n))
return
}
@@ -680,7 +678,7 @@
// mSysStatInc apply.
//go:nosplit
func mSysStatDec(sysStat *uint64, n uintptr) {
- if sys.BigEndian != 0 {
+ if sys.BigEndian {
atomic.Xadd64(sysStat, -int64(n))
return
}
diff --git a/src/runtime/mwbbuf.go b/src/runtime/mwbbuf.go
new file mode 100644
index 0000000..4a2d1ad
--- /dev/null
+++ b/src/runtime/mwbbuf.go
@@ -0,0 +1,260 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This implements the write barrier buffer. The write barrier itself
+// is gcWriteBarrier and is implemented in assembly.
+//
+// The write barrier has a fast path and a slow path. The fast path
+// simply enqueues to a per-P write barrier buffer. It's written in
+// assembly and doesn't clobber any general purpose registers, so it
+// doesn't have the usual overheads of a Go call.
+//
+// When the buffer fills up, the write barrier invokes the slow path
+// (wbBufFlush) to flush the buffer to the GC work queues. In this
+// path, since the compiler didn't spill registers, we spill *all*
+// registers and disallow any GC safe points that could observe the
+// stack frame (since we don't know the types of the spilled
+// registers).
+
+package runtime
+
+import (
+ "runtime/internal/sys"
+ "unsafe"
+)
+
+// testSmallBuf forces a small write barrier buffer to stress write
+// barrier flushing.
+const testSmallBuf = false
+
+// wbBuf is a per-P buffer of pointers queued by the write barrier.
+// This buffer is flushed to the GC workbufs when it fills up and on
+// various GC transitions.
+//
+// This is closely related to a "sequential store buffer" (SSB),
+// except that SSBs are usually used for maintaining remembered sets,
+// while this is used for marking.
+type wbBuf struct {
+ // next points to the next slot in buf. It must not be a
+ // pointer type because it can point past the end of buf and
+ // must be updated without write barriers.
+ //
+ // This is a pointer rather than an index to optimize the
+ // write barrier assembly.
+ next uintptr
+
+ // end points to just past the end of buf. It must not be a
+ // pointer type because it points past the end of buf and must
+ // be updated without write barriers.
+ end uintptr
+
+ // buf stores a series of pointers to execute write barriers
+ // on. This must be a multiple of wbBufEntryPointers because
+ // the write barrier only checks for overflow once per entry.
+ buf [wbBufEntryPointers * wbBufEntries]uintptr
+}
+
+const (
+ // wbBufEntries is the number of write barriers between
+ // flushes of the write barrier buffer.
+ //
+ // This trades latency for throughput amortization. Higher
+ // values amortize flushing overhead more, but increase the
+ // latency of flushing. Higher values also increase the cache
+ // footprint of the buffer.
+ //
+ // TODO: What is the latency cost of this? Tune this value.
+ wbBufEntries = 256
+
+ // wbBufEntryPointers is the number of pointers added to the
+ // buffer by each write barrier.
+ wbBufEntryPointers = 2
+)
+
+// reset empties b by resetting its next and end pointers.
+func (b *wbBuf) reset() {
+ start := uintptr(unsafe.Pointer(&b.buf[0]))
+ b.next = start
+ if gcBlackenPromptly || writeBarrier.cgo {
+ // Effectively disable the buffer by forcing a flush
+ // on every barrier.
+ b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers]))
+ } else if testSmallBuf {
+ // For testing, allow two barriers in the buffer. If
+ // we only did one, then barriers of non-heap pointers
+ // would be no-ops. This lets us combine a buffered
+ // barrier with a flush at a later time.
+ b.end = uintptr(unsafe.Pointer(&b.buf[2*wbBufEntryPointers]))
+ } else {
+ b.end = start + uintptr(len(b.buf))*unsafe.Sizeof(b.buf[0])
+ }
+
+ if (b.end-b.next)%(wbBufEntryPointers*unsafe.Sizeof(b.buf[0])) != 0 {
+ throw("bad write barrier buffer bounds")
+ }
+}
+
+// discard resets b's next pointer, but not its end pointer.
+//
+// This must be nosplit because it's called by wbBufFlush.
+//
+//go:nosplit
+func (b *wbBuf) discard() {
+ b.next = uintptr(unsafe.Pointer(&b.buf[0]))
+}
+
+// putFast adds old and new to the write barrier buffer and returns
+// false if a flush is necessary. Callers should use this as:
+//
+// buf := &getg().m.p.ptr().wbBuf
+// if !buf.putFast(old, new) {
+// wbBufFlush(...)
+// }
+//
+// The arguments to wbBufFlush depend on whether the caller is doing
+// its own cgo pointer checks. If it is, then this can be
+// wbBufFlush(nil, 0). Otherwise, it must pass the slot address and
+// new.
+//
+// Since buf is a per-P resource, the caller must ensure there are no
+// preemption points while buf is in use.
+//
+// It must be nowritebarrierrec to because write barriers here would
+// corrupt the write barrier buffer. It (and everything it calls, if
+// it called anything) has to be nosplit to avoid scheduling on to a
+// different P and a different buffer.
+//
+//go:nowritebarrierrec
+//go:nosplit
+func (b *wbBuf) putFast(old, new uintptr) bool {
+ p := (*[2]uintptr)(unsafe.Pointer(b.next))
+ p[0] = old
+ p[1] = new
+ b.next += 2 * sys.PtrSize
+ return b.next != b.end
+}
+
+// wbBufFlush flushes the current P's write barrier buffer to the GC
+// workbufs. It is passed the slot and value of the write barrier that
+// caused the flush so that it can implement cgocheck.
+//
+// This must not have write barriers because it is part of the write
+// barrier implementation.
+//
+// This and everything it calls must be nosplit because 1) the stack
+// contains untyped slots from gcWriteBarrier and 2) there must not be
+// a GC safe point between the write barrier test in the caller and
+// flushing the buffer.
+//
+// TODO: A "go:nosplitrec" annotation would be perfect for this.
+//
+//go:nowritebarrierrec
+//go:nosplit
+func wbBufFlush(dst *uintptr, src uintptr) {
+ // Note: Every possible return from this function must reset
+ // the buffer's next pointer to prevent buffer overflow.
+
+ if getg().m.dying > 0 {
+ // We're going down. Not much point in write barriers
+ // and this way we can allow write barriers in the
+ // panic path.
+ getg().m.p.ptr().wbBuf.discard()
+ return
+ }
+
+ if writeBarrier.cgo && dst != nil {
+ // This must be called from the stack that did the
+ // write. It's nosplit all the way down.
+ cgoCheckWriteBarrier(dst, src)
+ if !writeBarrier.needed {
+ // We were only called for cgocheck.
+ getg().m.p.ptr().wbBuf.discard()
+ return
+ }
+ }
+
+ // Switch to the system stack so we don't have to worry about
+ // the untyped stack slots or safe points.
+ systemstack(func() {
+ wbBufFlush1(getg().m.p.ptr())
+ })
+}
+
+// wbBufFlush1 flushes p's write barrier buffer to the GC work queue.
+//
+// This must not have write barriers because it is part of the write
+// barrier implementation, so this may lead to infinite loops or
+// buffer corruption.
+//
+// This must be non-preemptible because it uses the P's workbuf.
+//
+//go:nowritebarrierrec
+//go:systemstack
+func wbBufFlush1(_p_ *p) {
+ // Get the buffered pointers.
+ start := uintptr(unsafe.Pointer(&_p_.wbBuf.buf[0]))
+ n := (_p_.wbBuf.next - start) / unsafe.Sizeof(_p_.wbBuf.buf[0])
+ ptrs := _p_.wbBuf.buf[:n]
+
+ // Reset the buffer.
+ _p_.wbBuf.reset()
+
+ if useCheckmark {
+ // Slow path for checkmark mode.
+ for _, ptr := range ptrs {
+ shade(ptr)
+ }
+ return
+ }
+
+ // Mark all of the pointers in the buffer and record only the
+ // pointers we greyed. We use the buffer itself to temporarily
+ // record greyed pointers.
+ //
+ // TODO: Should scanobject/scanblock just stuff pointers into
+ // the wbBuf? Then this would become the sole greying path.
+ gcw := &_p_.gcw
+ pos := 0
+ arenaStart := mheap_.arena_start
+ for _, ptr := range ptrs {
+ if ptr < arenaStart {
+ // nil pointers are very common, especially
+ // for the "old" values. Filter out these and
+ // other "obvious" non-heap pointers ASAP.
+ //
+ // TODO: Should we filter out nils in the fast
+ // path to reduce the rate of flushes?
+ continue
+ }
+ // TODO: This doesn't use hbits, so calling
+ // heapBitsForObject seems a little silly. We could
+ // easily separate this out since heapBitsForObject
+ // just calls heapBitsForAddr(obj) to get hbits.
+ obj, _, span, objIndex := heapBitsForObject(ptr, 0, 0)
+ if obj == 0 {
+ continue
+ }
+ // TODO: Consider making two passes where the first
+ // just prefetches the mark bits.
+ mbits := span.markBitsForIndex(objIndex)
+ if mbits.isMarked() {
+ continue
+ }
+ mbits.setMarked()
+ if span.spanclass.noscan() {
+ gcw.bytesMarked += uint64(span.elemsize)
+ continue
+ }
+ ptrs[pos] = obj
+ pos++
+ }
+
+ // Enqueue the greyed objects.
+ gcw.putBatch(ptrs[:pos])
+ if gcphase == _GCmarktermination || gcBlackenPromptly {
+ // Ps aren't allowed to cache work during mark
+ // termination.
+ gcw.dispose()
+ }
+}
diff --git a/src/runtime/netpoll_kqueue.go b/src/runtime/netpoll_kqueue.go
index 71de98b..4d5d1a4 100644
--- a/src/runtime/netpoll_kqueue.go
+++ b/src/runtime/netpoll_kqueue.go
@@ -88,10 +88,23 @@
for i := 0; i < int(n); i++ {
ev := &events[i]
var mode int32
- if ev.filter == _EVFILT_READ {
+ switch ev.filter {
+ case _EVFILT_READ:
mode += 'r'
- }
- if ev.filter == _EVFILT_WRITE {
+
+ // On some systems when the read end of a pipe
+ // is closed the write end will not get a
+ // _EVFILT_WRITE event, but will get a
+ // _EVFILT_READ event with EV_EOF set.
+ // Note that setting 'w' here just means that we
+ // will wake up a goroutine waiting to write;
+ // that goroutine will try the write again,
+ // and the appropriate thing will happen based
+ // on what that write returns (success, EPIPE, EAGAIN).
+ if ev.flags&_EV_EOF != 0 {
+ mode += 'w'
+ }
+ case _EVFILT_WRITE:
mode += 'w'
}
if mode != 0 {
diff --git a/src/runtime/netpoll_windows.go b/src/runtime/netpoll_windows.go
index 79dafb0..134071f 100644
--- a/src/runtime/netpoll_windows.go
+++ b/src/runtime/netpoll_windows.go
@@ -47,7 +47,7 @@
func netpollopen(fd uintptr, pd *pollDesc) int32 {
if stdcall4(_CreateIoCompletionPort, fd, iocphandle, 0, 0) == 0 {
- return -int32(getlasterror())
+ return int32(getlasterror())
}
return 0
}
diff --git a/src/runtime/os3_plan9.go b/src/runtime/os3_plan9.go
index 5d4b5a6..0b313d7 100644
--- a/src/runtime/os3_plan9.go
+++ b/src/runtime/os3_plan9.go
@@ -45,6 +45,11 @@
break
}
}
+ if flags&_SigPanic != 0 && gp.throwsplit {
+ // We can't safely sigpanic because it may grow the
+ // stack. Abort in the signal handler instead.
+ flags = (flags &^ _SigPanic) | _SigThrow
+ }
if flags&_SigGoExit != 0 {
exits((*byte)(add(unsafe.Pointer(note), 9))) // Strip "go: exit " prefix.
}
@@ -153,3 +158,6 @@
// TODO: Enable profiling interrupts.
getg().m.profilehz = hz
}
+
+// gsignalStack is unused on Plan 9.
+type gsignalStack struct{}
diff --git a/src/runtime/os3_solaris.go b/src/runtime/os3_solaris.go
index 067fb3b..c53f613 100644
--- a/src/runtime/os3_solaris.go
+++ b/src/runtime/os3_solaris.go
@@ -181,6 +181,12 @@
}
}
+func exitThread(wait *uint32) {
+ // We should never reach exitThread on Solaris because we let
+ // libc clean up threads.
+ throw("exitThread")
+}
+
var urandom_dev = []byte("/dev/urandom\x00")
//go:nosplit
@@ -396,12 +402,12 @@
}
//go:nosplit
-func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer {
+func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (unsafe.Pointer, int) {
p, err := doMmap(uintptr(addr), n, uintptr(prot), uintptr(flags), uintptr(fd), uintptr(off))
if p == ^uintptr(0) {
- return unsafe.Pointer(err)
+ return nil, int(err)
}
- return unsafe.Pointer(p)
+ return unsafe.Pointer(p), 0
}
//go:nosplit
diff --git a/src/runtime/os_darwin.go b/src/runtime/os_darwin.go
index 1528167..580dffa 100644
--- a/src/runtime/os_darwin.go
+++ b/src/runtime/os_darwin.go
@@ -11,6 +11,8 @@
waitsema uint32 // semaphore for parking on locks
}
+var darwinVersion int
+
func bsdthread_create(stk, arg unsafe.Pointer, fn uintptr) int32
func bsdthread_register() int32
@@ -50,16 +52,35 @@
// can look at the environment first.
ncpu = getncpu()
-
physPageSize = getPageSize()
+ darwinVersion = getDarwinVersion()
}
const (
- _CTL_HW = 6
- _HW_NCPU = 3
- _HW_PAGESIZE = 7
+ _CTL_KERN = 1
+ _CTL_HW = 6
+ _KERN_OSRELEASE = 2
+ _HW_NCPU = 3
+ _HW_PAGESIZE = 7
)
+func getDarwinVersion() int {
+ // Use sysctl to fetch kern.osrelease
+ mib := [2]uint32{_CTL_KERN, _KERN_OSRELEASE}
+ var out [32]byte
+ nout := unsafe.Sizeof(out)
+ ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0)
+ if ret >= 0 {
+ ver := 0
+ for i := 0; i < int(nout) && out[i] >= '0' && out[i] <= '9'; i++ {
+ ver *= 10
+ ver += int(out[i] - '0')
+ }
+ return ver
+ }
+ return 17 // should not happen: default to a newish version
+}
+
func getncpu() int32 {
// Use sysctl to fetch hw.ncpu.
mib := [2]uint32{_CTL_HW, _HW_NCPU}
@@ -135,7 +156,7 @@
// not safe to use after initialization as it does not pass an M as fnarg.
//
//go:nosplit
-func newosproc0(stacksize uintptr, fn unsafe.Pointer, fnarg uintptr) {
+func newosproc0(stacksize uintptr, fn uintptr) {
stack := sysAlloc(stacksize, &memstats.stacks_sys)
if stack == nil {
write(2, unsafe.Pointer(&failallocatestack[0]), int32(len(failallocatestack)))
@@ -145,7 +166,7 @@
var oset sigset
sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
- errno := bsdthread_create(stk, fn, fnarg)
+ errno := bsdthread_create(stk, nil, fn)
sigprocmask(_SIG_SETMASK, &oset, nil)
if errno < 0 {
@@ -188,7 +209,11 @@
// Called from dropm to undo the effect of an minit.
//go:nosplit
func unminit() {
- unminitSignals()
+ // The alternate signal stack is buggy on arm and arm64.
+ // See minit.
+ if GOARCH != "arm" && GOARCH != "arm64" {
+ unminitSignals()
+ }
}
// Mach IPC, to get at semaphores
diff --git a/src/runtime/os_darwin_arm64.go b/src/runtime/os_darwin_arm64.go
index 01285af..8de132d 100644
--- a/src/runtime/os_darwin_arm64.go
+++ b/src/runtime/os_darwin_arm64.go
@@ -4,8 +4,6 @@
package runtime
-var supportCRC32 = false
-
//go:nosplit
func cputicks() int64 {
// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand().
diff --git a/src/runtime/os_freebsd.go b/src/runtime/os_freebsd.go
index 7c989de..31708e2 100644
--- a/src/runtime/os_freebsd.go
+++ b/src/runtime/os_freebsd.go
@@ -69,15 +69,19 @@
}
const (
- _CPU_SETSIZE_MAX = 32 // Limited by _MaxGomaxprocs(256) in runtime2.go.
_CPU_CURRENT_PID = -1 // Current process ID.
)
//go:noescape
func cpuset_getaffinity(level int, which int, id int64, size int, mask *byte) int32
+//go:systemstack
func getncpu() int32 {
- var mask [_CPU_SETSIZE_MAX]byte
+ // Use a large buffer for the CPU mask. We're on the system
+ // stack, so this is fine, and we can't allocate memory for a
+ // dynamically-sized buffer at this point.
+ const maxCPUs = 64 * 1024
+ var mask [maxCPUs / 8]byte
var mib [_CTL_MAXNAME]uint32
// According to FreeBSD's /usr/src/sys/kern/kern_cpuset.c,
@@ -99,21 +103,20 @@
return 1
}
- size := maxcpus / _NBBY
- ptrsize := uint32(unsafe.Sizeof(uintptr(0)))
- if size < ptrsize {
- size = ptrsize
+ maskSize := int(maxcpus+7) / 8
+ if maskSize < sys.PtrSize {
+ maskSize = sys.PtrSize
}
- if size > _CPU_SETSIZE_MAX {
- return 1
+ if maskSize > len(mask) {
+ maskSize = len(mask)
}
if cpuset_getaffinity(_CPU_LEVEL_WHICH, _CPU_WHICH_PID, _CPU_CURRENT_PID,
- int(size), (*byte)(unsafe.Pointer(&mask[0]))) != 0 {
+ maskSize, (*byte)(unsafe.Pointer(&mask[0]))) != 0 {
return 1
}
n := int32(0)
- for _, v := range mask[:size] {
+ for _, v := range mask[:maskSize] {
for v != 0 {
n += int32(v & 1)
v >>= 1
diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go
index 7889973..98e7f52 100644
--- a/src/runtime/os_linux.go
+++ b/src/runtime/os_linux.go
@@ -89,13 +89,13 @@
// buffers, but we don't have a dynamic memory allocator at the
// moment, so that's a bit tricky and seems like overkill.
const maxCPUs = 64 * 1024
- var buf [maxCPUs / (sys.PtrSize * 8)]uintptr
+ var buf [maxCPUs / 8]byte
r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0])
if r < 0 {
return 1
}
n := int32(0)
- for _, v := range buf[:r/sys.PtrSize] {
+ for _, v := range buf[:r] {
for v != 0 {
n += int32(v & 1)
v >>= 1
@@ -193,6 +193,8 @@
var procAuxv = []byte("/proc/self/auxv\x00")
+func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
+
func sysargs(argc int32, argv **byte) {
n := argc + 1
@@ -206,45 +208,46 @@
// now argv+n is auxv
auxv := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*sys.PtrSize))
- if sysauxv(auxv[:]) == 0 {
- // In some situations we don't get a loader-provided
- // auxv, such as when loaded as a library on Android.
- // Fall back to /proc/self/auxv.
- fd := open(&procAuxv[0], 0 /* O_RDONLY */, 0)
- if fd < 0 {
- // On Android, /proc/self/auxv might be unreadable (issue 9229), so we fallback to
- // try using mincore to detect the physical page size.
- // mincore should return EINVAL when address is not a multiple of system page size.
- const size = 256 << 10 // size of memory region to allocate
- p := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
- if uintptr(p) < 4096 {
- return
- }
- var n uintptr
- for n = 4 << 10; n < size; n <<= 1 {
- err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0])
- if err == 0 {
- physPageSize = n
- break
- }
- }
- if physPageSize == 0 {
- physPageSize = size
- }
- munmap(p, size)
- return
- }
- var buf [128]uintptr
- n := read(fd, noescape(unsafe.Pointer(&buf[0])), int32(unsafe.Sizeof(buf)))
- closefd(fd)
- if n < 0 {
- return
- }
- // Make sure buf is terminated, even if we didn't read
- // the whole file.
- buf[len(buf)-2] = _AT_NULL
- sysauxv(buf[:])
+ if sysauxv(auxv[:]) != 0 {
+ return
}
+ // In some situations we don't get a loader-provided
+ // auxv, such as when loaded as a library on Android.
+ // Fall back to /proc/self/auxv.
+ fd := open(&procAuxv[0], 0 /* O_RDONLY */, 0)
+ if fd < 0 {
+ // On Android, /proc/self/auxv might be unreadable (issue 9229), so we fallback to
+ // try using mincore to detect the physical page size.
+ // mincore should return EINVAL when address is not a multiple of system page size.
+ const size = 256 << 10 // size of memory region to allocate
+ p, err := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
+ if err != 0 {
+ return
+ }
+ var n uintptr
+ for n = 4 << 10; n < size; n <<= 1 {
+ err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0])
+ if err == 0 {
+ physPageSize = n
+ break
+ }
+ }
+ if physPageSize == 0 {
+ physPageSize = size
+ }
+ munmap(p, size)
+ return
+ }
+ var buf [128]uintptr
+ n = read(fd, noescape(unsafe.Pointer(&buf[0])), int32(unsafe.Sizeof(buf)))
+ closefd(fd)
+ if n < 0 {
+ return
+ }
+ // Make sure buf is terminated, even if we didn't read
+ // the whole file.
+ buf[len(buf)-2] = _AT_NULL
+ sysauxv(buf[:])
}
func sysauxv(auxv []uintptr) int {
@@ -382,7 +385,7 @@
func raiseproc(sig uint32)
//go:noescape
-func sched_getaffinity(pid, len uintptr, buf *uintptr) int32
+func sched_getaffinity(pid, len uintptr, buf *byte) int32
func osyield()
//go:nosplit
diff --git a/src/runtime/os_linux_arm64.go b/src/runtime/os_linux_arm64.go
index 986a341..96827e7 100644
--- a/src/runtime/os_linux_arm64.go
+++ b/src/runtime/os_linux_arm64.go
@@ -2,14 +2,22 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+// +build arm64
+
package runtime
-const (
- _ARM64_FEATURE_HAS_CRC32 = 0x80
-)
+// For go:linkname
+import _ "unsafe"
var randomNumber uint32
-var supportCRC32 bool
+
+// arm64 doesn't have a 'cpuid' instruction equivalent and relies on
+// HWCAP/HWCAP2 bits for hardware capabilities.
+
+//go:linkname cpu_hwcap internal/cpu.arm64_hwcap
+//go:linkname cpu_hwcap2 internal/cpu.arm64_hwcap2
+var cpu_hwcap uint
+var cpu_hwcap2 uint
func archauxv(tag, val uintptr) {
switch tag {
@@ -20,7 +28,9 @@
randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
case _AT_HWCAP:
- supportCRC32 = val&_ARM64_FEATURE_HAS_CRC32 != 0
+ cpu_hwcap = uint(val)
+ case _AT_HWCAP2:
+ cpu_hwcap2 = uint(val)
}
}
diff --git a/src/runtime/os_linux_noauxv.go b/src/runtime/os_linux_noauxv.go
index 5e9f031..db6e5a0 100644
--- a/src/runtime/os_linux_noauxv.go
+++ b/src/runtime/os_linux_noauxv.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build !amd64,!arm,!arm64,!mips,!mipsle,!mips64,!mips64le,!s390x,!ppc64,!ppc64le
+// +build !386,!amd64,!arm,!arm64,!mips,!mipsle,!mips64,!mips64le,!s390x,!ppc64,!ppc64le
package runtime
diff --git a/src/runtime/os_linux_ppc64x.go b/src/runtime/os_linux_ppc64x.go
index b0da98b..e37bfc4 100644
--- a/src/runtime/os_linux_ppc64x.go
+++ b/src/runtime/os_linux_ppc64x.go
@@ -6,55 +6,22 @@
package runtime
-import (
- "runtime/internal/sys"
-)
+// For go:linkname
+import _ "unsafe"
-const (
- // ISA level
- // Go currently requires POWER5 as a minimum for ppc64, so we need
- // to check for ISA 2.03 and beyond.
- _PPC_FEATURE_POWER5_PLUS = 0x00020000 // ISA 2.03 (POWER5+)
- _PPC_FEATURE_ARCH_2_05 = 0x00001000 // ISA 2.05 (POWER6)
- _PPC_FEATURE_POWER6_EXT = 0x00000200 // mffgpr/mftgpr extension (POWER6x)
- _PPC_FEATURE_ARCH_2_06 = 0x00000100 // ISA 2.06 (POWER7)
- _PPC_FEATURE2_ARCH_2_07 = 0x80000000 // ISA 2.07 (POWER8)
+// ppc64x doesn't have a 'cpuid' instruction equivalent and relies on
+// HWCAP/HWCAP2 bits for hardware capabilities.
- // Standalone capabilities
- _PPC_FEATURE_HAS_ALTIVEC = 0x10000000 // SIMD/Vector unit
- _PPC_FEATURE_HAS_VSX = 0x00000080 // Vector scalar unit
-)
-
-type facilities struct {
- _ [sys.CacheLineSize]byte
- isPOWER5x bool // ISA 2.03
- isPOWER6 bool // ISA 2.05
- isPOWER6x bool // ISA 2.05 + mffgpr/mftgpr extension
- isPOWER7 bool // ISA 2.06
- isPOWER8 bool // ISA 2.07
- hasVMX bool // Vector unit
- hasVSX bool // Vector scalar unit
- _ [sys.CacheLineSize]byte
-}
-
-// cpu can be tested at runtime in go assembler code to check for
-// a certain ISA level or hardware capability, for example:
-// ·cpu+facilities_hasVSX(SB) for checking the availability of VSX
-// or
-// ·cpu+facilities_isPOWER7(SB) for checking if the processor implements
-// ISA 2.06 instructions.
-var cpu facilities
+//go:linkname cpu_hwcap internal/cpu.ppc64x_hwcap
+//go:linkname cpu_hwcap2 internal/cpu.ppc64x_hwcap2
+var cpu_hwcap uint
+var cpu_hwcap2 uint
func archauxv(tag, val uintptr) {
switch tag {
case _AT_HWCAP:
- cpu.isPOWER5x = val&_PPC_FEATURE_POWER5_PLUS != 0
- cpu.isPOWER6 = val&_PPC_FEATURE_ARCH_2_05 != 0
- cpu.isPOWER6x = val&_PPC_FEATURE_POWER6_EXT != 0
- cpu.isPOWER7 = val&_PPC_FEATURE_ARCH_2_06 != 0
- cpu.hasVMX = val&_PPC_FEATURE_HAS_ALTIVEC != 0
- cpu.hasVSX = val&_PPC_FEATURE_HAS_VSX != 0
+ cpu_hwcap = uint(val)
case _AT_HWCAP2:
- cpu.isPOWER8 = val&_PPC_FEATURE2_ARCH_2_07 != 0
+ cpu_hwcap2 = uint(val)
}
}
diff --git a/src/runtime/os_nacl.go b/src/runtime/os_nacl.go
index 18e6ce6..6830da4 100644
--- a/src/runtime/os_nacl.go
+++ b/src/runtime/os_nacl.go
@@ -33,7 +33,7 @@
//go:noescape
func nacl_nanosleep(ts, extra *timespec) int32
func nanotime() int64
-func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer
+func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (p unsafe.Pointer, err int)
func exit(code int32)
func osyield()
@@ -168,6 +168,9 @@
}
}
+//go:noescape
+func exitThread(wait *uint32)
+
//go:nosplit
func semacreate(mp *m) {
if mp.waitsema != 0 {
@@ -285,6 +288,9 @@
func sigignore(uint32) {}
func closeonexec(int32) {}
+// gsignalStack is unused on nacl.
+type gsignalStack struct{}
+
var writelock uint32 // test-and-set spin lock for write
/*
diff --git a/src/runtime/os_netbsd.go b/src/runtime/os_netbsd.go
index c26c3c9..3778969 100644
--- a/src/runtime/os_netbsd.go
+++ b/src/runtime/os_netbsd.go
@@ -21,6 +21,9 @@
_UC_SIGMASK = 0x01
_UC_CPU = 0x04
+ // From <sys/lwp.h>
+ _LWP_DETACHED = 0x00000040
+
_EAGAIN = 35
)
@@ -55,7 +58,7 @@
func lwp_create(ctxt unsafe.Pointer, flags uintptr, lwpid unsafe.Pointer) int32
//go:noescape
-func lwp_park(abstime *timespec, unpark int32, hint, unparkhint unsafe.Pointer) int32
+func lwp_park(clockid, flags int32, ts *timespec, unpark int32, hint, unparkhint unsafe.Pointer) int32
//go:noescape
func lwp_unpark(lwp int32, hint unsafe.Pointer) int32
@@ -73,6 +76,9 @@
_CLOCK_VIRTUAL = 1
_CLOCK_PROF = 2
_CLOCK_MONOTONIC = 3
+
+ _TIMER_RELTIME = 0
+ _TIMER_ABSTIME = 1
)
var sigset_all = sigset{[4]uint32{^uint32(0), ^uint32(0), ^uint32(0), ^uint32(0)}}
@@ -116,10 +122,9 @@
// Compute sleep deadline.
var tsp *timespec
+ var ts timespec
if ns >= 0 {
- var ts timespec
var nsec int32
- ns += nanotime()
ts.set_sec(timediv(ns, 1000000000, &nsec))
ts.set_nsec(nsec)
tsp = &ts
@@ -135,9 +140,18 @@
}
// Sleep until unparked by semawakeup or timeout.
- ret := lwp_park(tsp, 0, unsafe.Pointer(&_g_.m.waitsemacount), nil)
+ ret := lwp_park(_CLOCK_MONOTONIC, _TIMER_RELTIME, tsp, 0, unsafe.Pointer(&_g_.m.waitsemacount), nil)
if ret == _ETIMEDOUT {
return -1
+ } else if ret == _EINTR && ns >= 0 {
+ // Avoid sleeping forever if we keep getting
+ // interrupted (for example by the profiling
+ // timer). It would be if tsp upon return had the
+ // remaining time to sleep, but this is good enough.
+ var nsec int32
+ ns /= 2
+ ts.set_sec(timediv(ns, 1000000000, &nsec))
+ ts.set_nsec(nsec)
}
}
}
@@ -182,7 +196,7 @@
lwp_mcontext_init(&uc.uc_mcontext, stk, mp, mp.g0, funcPC(netbsdMstart))
- ret := lwp_create(unsafe.Pointer(&uc), 0, unsafe.Pointer(&mp.procid))
+ ret := lwp_create(unsafe.Pointer(&uc), _LWP_DETACHED, unsafe.Pointer(&mp.procid))
sigprocmask(_SIG_SETMASK, &oset, nil)
if ret < 0 {
print("runtime: failed to create new OS thread (have ", mcount()-1, " already; errno=", -ret, ")\n")
diff --git a/src/runtime/os_plan9.go b/src/runtime/os_plan9.go
index 45e881a..32fdabb 100644
--- a/src/runtime/os_plan9.go
+++ b/src/runtime/os_plan9.go
@@ -393,7 +393,7 @@
}
//go:nosplit
-func exit(e int) {
+func exit(e int32) {
var status []byte
if e == 0 {
status = emptystatus
@@ -421,6 +421,12 @@
}
}
+func exitThread(wait *uint32) {
+ // We should never reach exitThread on Plan 9 because we let
+ // the OS clean up threads.
+ throw("exitThread")
+}
+
//go:nosplit
func semacreate(mp *m) {
}
diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go
index 233cc16..7aeadd9 100644
--- a/src/runtime/os_windows.go
+++ b/src/runtime/os_windows.go
@@ -623,7 +623,7 @@
func newosproc(mp *m, stk unsafe.Pointer) {
const _STACK_SIZE_PARAM_IS_A_RESERVATION = 0x00010000
// stackSize must match SizeOfStackReserve in cmd/link/internal/ld/pe.go.
- const stackSize = 0x00200000*_64bit + 0x00020000*(1-_64bit)
+ const stackSize = 0x00200000*_64bit + 0x00100000*(1-_64bit)
thandle := stdcall6(_CreateThread, 0, stackSize,
funcPC(tstart_stdcall), uintptr(unsafe.Pointer(mp)),
_STACK_SIZE_PARAM_IS_A_RESERVATION, 0)
@@ -640,6 +640,9 @@
print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", getlasterror(), ")\n")
throw("runtime.newosproc")
}
+
+ // Close thandle to avoid leaking the thread object if it exits.
+ stdcall1(_CloseHandle, thandle)
}
// Used by the C library build mode. On Linux this function would allocate a
@@ -651,6 +654,12 @@
newosproc(mp, stk)
}
+func exitThread(wait *uint32) {
+ // We should never reach exitThread on Windows because we let
+ // the OS clean up threads.
+ throw("exitThread")
+}
+
// Called to initialize a new m (including the bootstrap m).
// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
func mpreinit(mp *m) {
@@ -701,7 +710,7 @@
if mp.profilehz != 0 {
// leave pc/sp for cpu profiler
mp.libcallg.set(gp)
- mp.libcallpc = getcallerpc(unsafe.Pointer(&fn))
+ mp.libcallpc = getcallerpc()
// sp must be the last, because once async cpu profiler finds
// all three values to be non-zero, it will use them
mp.libcallsp = getcallersp(unsafe.Pointer(&fn))
diff --git a/src/runtime/panic.go b/src/runtime/panic.go
index 43bfdd7..c51948b 100644
--- a/src/runtime/panic.go
+++ b/src/runtime/panic.go
@@ -83,7 +83,7 @@
// Until the copy completes, we can only call nosplit routines.
sp := getcallersp(unsafe.Pointer(&siz))
argp := uintptr(unsafe.Pointer(&fn)) + unsafe.Sizeof(fn)
- callerpc := getcallerpc(unsafe.Pointer(&siz))
+ callerpc := getcallerpc()
d := newdefer(siz)
if d._panic != nil {
@@ -244,36 +244,47 @@
freedeferfn()
}
sc := deferclass(uintptr(d.siz))
- if sc < uintptr(len(p{}.deferpool)) {
- pp := getg().m.p.ptr()
- if len(pp.deferpool[sc]) == cap(pp.deferpool[sc]) {
- // Transfer half of local cache to the central cache.
- //
- // Take this slow path on the system stack so
- // we don't grow freedefer's stack.
- systemstack(func() {
- var first, last *_defer
- for len(pp.deferpool[sc]) > cap(pp.deferpool[sc])/2 {
- n := len(pp.deferpool[sc])
- d := pp.deferpool[sc][n-1]
- pp.deferpool[sc][n-1] = nil
- pp.deferpool[sc] = pp.deferpool[sc][:n-1]
- if first == nil {
- first = d
- } else {
- last.link = d
- }
- last = d
- }
- lock(&sched.deferlock)
- last.link = sched.deferpool[sc]
- sched.deferpool[sc] = first
- unlock(&sched.deferlock)
- })
- }
- *d = _defer{}
- pp.deferpool[sc] = append(pp.deferpool[sc], d)
+ if sc >= uintptr(len(p{}.deferpool)) {
+ return
}
+ pp := getg().m.p.ptr()
+ if len(pp.deferpool[sc]) == cap(pp.deferpool[sc]) {
+ // Transfer half of local cache to the central cache.
+ //
+ // Take this slow path on the system stack so
+ // we don't grow freedefer's stack.
+ systemstack(func() {
+ var first, last *_defer
+ for len(pp.deferpool[sc]) > cap(pp.deferpool[sc])/2 {
+ n := len(pp.deferpool[sc])
+ d := pp.deferpool[sc][n-1]
+ pp.deferpool[sc][n-1] = nil
+ pp.deferpool[sc] = pp.deferpool[sc][:n-1]
+ if first == nil {
+ first = d
+ } else {
+ last.link = d
+ }
+ last = d
+ }
+ lock(&sched.deferlock)
+ last.link = sched.deferpool[sc]
+ sched.deferpool[sc] = first
+ unlock(&sched.deferlock)
+ })
+ }
+
+ // These lines used to be simply `*d = _defer{}` but that
+ // started causing a nosplit stack overflow via typedmemmove.
+ d.siz = 0
+ d.started = false
+ d.sp = 0
+ d.pc = 0
+ d.fn = nil
+ d._panic = nil
+ d.link = nil
+
+ pp.deferpool[sc] = append(pp.deferpool[sc], d)
}
// Separate function so that it can split stack.
@@ -336,7 +347,7 @@
// Goexit terminates the goroutine that calls it. No other goroutine is affected.
// Goexit runs all deferred calls before terminating the goroutine. Because Goexit
-// is not panic, however, any recover calls in those deferred functions will return nil.
+// is not a panic, any recover calls in those deferred functions will return nil.
//
// Calling Goexit from the main goroutine terminates that goroutine
// without func main returning. Since func main has not returned,
@@ -397,12 +408,15 @@
}
// Print all currently active panics. Used when crashing.
+// Should only be called after preprintpanics.
func printpanics(p *_panic) {
if p.link != nil {
printpanics(p.link)
print("\t")
}
print("panic: ")
+ // Because of preprintpanics, p.arg cannot be an error or
+ // stringer, so this won't call into user code.
printany(p.arg)
if p.recovered {
print(" [recovered]")
@@ -580,7 +594,7 @@
//go:nosplit
func dopanic(unused int) {
- pc := getcallerpc(unsafe.Pointer(&unused))
+ pc := getcallerpc()
sp := getcallersp(unsafe.Pointer(&unused))
gp := getg()
systemstack(func() {
@@ -643,14 +657,22 @@
gogo(&gp.sched)
}
+// startpanic_m prepares for an unrecoverable panic.
+//
+// It can have write barriers because the write barrier explicitly
+// ignores writes once dying > 0.
+//
+//go:yeswritebarrierrec
func startpanic_m() {
_g_ := getg()
if mheap_.cachealloc.size == 0 { // very early
print("runtime: panic before malloc heap initialized\n")
- _g_.m.mallocing = 1 // tell rest of panic not to try to malloc
- } else if _g_.m.mcache == nil { // can happen if called from signal handler or throw
- _g_.m.mcache = allocmcache()
}
+ // Disallow malloc during an unrecoverable panic. A panic
+ // could happen in a signal handler, or in a throw, or inside
+ // malloc itself. We want to catch if an allocation ever does
+ // happen (even if we're not in one of these situations).
+ _g_.m.mallocing++
switch _g_.m.dying {
case 0:
@@ -679,7 +701,7 @@
exit(4)
fallthrough
default:
- // Can't even print! Just exit.
+ // Can't even print! Just exit.
exit(5)
}
}
@@ -735,6 +757,9 @@
exit(2)
}
+// canpanic returns false if a signal should throw instead of
+// panicking.
+//
//go:nosplit
func canpanic(gp *g) bool {
// Note that g is m->gsignal, different from gp.
diff --git a/src/runtime/plugin.go b/src/runtime/plugin.go
index 682caac..5e05be7 100644
--- a/src/runtime/plugin.go
+++ b/src/runtime/plugin.go
@@ -7,22 +7,29 @@
import "unsafe"
//go:linkname plugin_lastmoduleinit plugin.lastmoduleinit
-func plugin_lastmoduleinit() (path string, syms map[string]interface{}, mismatchpkg string) {
- md := firstmoduledata.next
+func plugin_lastmoduleinit() (path string, syms map[string]interface{}, errstr string) {
+ var md *moduledata
+ for pmd := firstmoduledata.next; pmd != nil; pmd = pmd.next {
+ if pmd.bad {
+ md = nil // we only want the last module
+ continue
+ }
+ md = pmd
+ }
if md == nil {
throw("runtime: no plugin module data")
}
- for md.next != nil {
- md = md.next
+ if md.pluginpath == "" {
+ throw("runtime: plugin has empty pluginpath")
}
if md.typemap != nil {
- throw("runtime: plugin already initialized")
+ return "", nil, "plugin already loaded"
}
for _, pmd := range activeModules() {
if pmd.pluginpath == md.pluginpath {
- println("plugin: plugin", md.pluginpath, "already loaded")
- throw("plugin: plugin already loaded")
+ md.bad = true
+ return "", nil, "plugin already loaded"
}
if inRange(pmd.text, pmd.etext, md.text, md.etext) ||
@@ -43,7 +50,8 @@
}
for _, pkghash := range md.pkghashes {
if pkghash.linktimehash != *pkghash.runtimehash {
- return "", nil, pkghash.modulename
+ md.bad = true
+ return "", nil, "plugin was built with a different version of package " + pkghash.modulename
}
}
@@ -54,13 +62,11 @@
pluginftabverify(md)
moduledataverify1(md)
- lock(&ifaceLock)
+ lock(&itabLock)
for _, i := range md.itablinks {
- if !i.inhash {
- additab(i, true, false)
- }
+ itabAdd(i)
}
- unlock(&ifaceLock)
+ unlock(&itabLock)
// Build a map of symbol names to symbols. Here in the runtime
// we fill out the first word of the interface, the type. We
diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go
index 21ea25c..b7e5a1f 100644
--- a/src/runtime/pprof/pprof.go
+++ b/src/runtime/pprof/pprof.go
@@ -18,7 +18,7 @@
// To add equivalent profiling support to a standalone program, add
// code like the following to your main function:
//
-// var cpuprofile = flag.String("cpuprofile", "", "write cpu profile `file`")
+// var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`")
// var memprofile = flag.String("memprofile", "", "write memory profile to `file`")
//
// func main() {
@@ -319,7 +319,15 @@
p.mu.Unlock()
// Map order is non-deterministic; make output deterministic.
- sort.Sort(stackProfile(all))
+ sort.Slice(all, func(i, j int) bool {
+ t, u := all[i], all[j]
+ for k := 0; k < len(t) && k < len(u); k++ {
+ if t[k] != u[k] {
+ return t[k] < u[k]
+ }
+ }
+ return len(t) < len(u)
+ })
return printCountProfile(w, debug, p.name, stackProfile(all))
}
@@ -328,16 +336,6 @@
func (x stackProfile) Len() int { return len(x) }
func (x stackProfile) Stack(i int) []uintptr { return x[i] }
-func (x stackProfile) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
-func (x stackProfile) Less(i, j int) bool {
- t, u := x[i], x[j]
- for k := 0; k < len(t) && k < len(u); k++ {
- if t[k] != u[k] {
- return t[k] < u[k]
- }
- }
- return len(t) < len(u)
-}
// A countProfile is a set of stack traces to be printed as counts
// grouped by stack trace. There are multiple implementations:
@@ -348,6 +346,43 @@
Stack(i int) []uintptr
}
+// printCountCycleProfile outputs block profile records (for block or mutex profiles)
+// as the pprof-proto format output. Translations from cycle count to time duration
+// are done because The proto expects count and time (nanoseconds) instead of count
+// and the number of cycles for block, contention profiles.
+// Possible 'scaler' functions are scaleBlockProfile and scaleMutexProfile.
+func printCountCycleProfile(w io.Writer, countName, cycleName string, scaler func(int64, float64) (int64, float64), records []runtime.BlockProfileRecord) error {
+ // Output profile in protobuf form.
+ b := newProfileBuilder(w)
+ b.pbValueType(tagProfile_PeriodType, countName, "count")
+ b.pb.int64Opt(tagProfile_Period, 1)
+ b.pbValueType(tagProfile_SampleType, countName, "count")
+ b.pbValueType(tagProfile_SampleType, cycleName, "nanoseconds")
+
+ cpuGHz := float64(runtime_cyclesPerSecond()) / 1e9
+
+ values := []int64{0, 0}
+ var locs []uint64
+ for _, r := range records {
+ count, nanosec := scaler(r.Count, float64(r.Cycles)/cpuGHz)
+ values[0] = count
+ values[1] = int64(nanosec)
+ locs = locs[:0]
+ for _, addr := range r.Stack() {
+ // For count profiles, all stack addresses are
+ // return PCs, which is what locForPC expects.
+ l := b.locForPC(addr)
+ if l == 0 { // runtime.goexit
+ continue
+ }
+ locs = append(locs, l)
+ }
+ b.pbSample(values, locs, nil)
+ }
+ b.build()
+ return nil
+}
+
// printCountProfile prints a countProfile at the specified debug level.
// The profile will be in compressed proto format unless debug is nonzero.
func printCountProfile(w io.Writer, debug int, name string, p countProfile) error {
@@ -476,6 +511,14 @@
// writeHeap writes the current runtime heap profile to w.
func writeHeap(w io.Writer, debug int) error {
+ var memStats *runtime.MemStats
+ if debug != 0 {
+ // Read mem stats first, so that our other allocations
+ // do not appear in the statistics.
+ memStats = new(runtime.MemStats)
+ runtime.ReadMemStats(memStats)
+ }
+
// Find out how many records there are (MemProfile(nil, true)),
// allocate that many records, and get the data.
// There's a race—more records might be added between
@@ -538,8 +581,7 @@
// Print memstats information too.
// Pprof will ignore, but useful for people
- s := new(runtime.MemStats)
- runtime.ReadMemStats(s)
+ s := memStats
fmt.Fprintf(w, "\n# runtime.MemStats\n")
fmt.Fprintf(w, "# Alloc = %d\n", s.Alloc)
fmt.Fprintf(w, "# TotalAlloc = %d\n", s.TotalAlloc)
@@ -765,14 +807,14 @@
sort.Slice(p, func(i, j int) bool { return p[i].Cycles > p[j].Cycles })
- b := bufio.NewWriter(w)
- var tw *tabwriter.Writer
- w = b
- if debug > 0 {
- tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
- w = tw
+ if debug <= 0 {
+ return printCountCycleProfile(w, "contentions", "delay", scaleBlockProfile, p)
}
+ b := bufio.NewWriter(w)
+ tw := tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
+ w = tw
+
fmt.Fprintf(w, "--- contention:\n")
fmt.Fprintf(w, "cycles/second=%v\n", runtime_cyclesPerSecond())
for i := range p {
@@ -793,6 +835,14 @@
return b.Flush()
}
+func scaleBlockProfile(cnt int64, ns float64) (int64, float64) {
+ // Do nothing.
+ // The current way of block profile sampling makes it
+ // hard to compute the unsampled number. The legacy block
+ // profile parse doesn't attempt to scale or unsample.
+ return cnt, ns
+}
+
// writeMutex writes the current mutex profile to w.
func writeMutex(w io.Writer, debug int) error {
// TODO(pjw): too much common code with writeBlock. FIX!
@@ -809,14 +859,14 @@
sort.Slice(p, func(i, j int) bool { return p[i].Cycles > p[j].Cycles })
- b := bufio.NewWriter(w)
- var tw *tabwriter.Writer
- w = b
- if debug > 0 {
- tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
- w = tw
+ if debug <= 0 {
+ return printCountCycleProfile(w, "contentions", "delay", scaleMutexProfile, p)
}
+ b := bufio.NewWriter(w)
+ tw := tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
+ w = tw
+
fmt.Fprintf(w, "--- mutex:\n")
fmt.Fprintf(w, "cycles/second=%v\n", runtime_cyclesPerSecond())
fmt.Fprintf(w, "sampling period=%d\n", runtime.SetMutexProfileFraction(-1))
@@ -838,4 +888,9 @@
return b.Flush()
}
+func scaleMutexProfile(cnt int64, ns float64) (int64, float64) {
+ period := runtime.SetMutexProfileFraction(-1)
+ return cnt * int64(period), ns * float64(period)
+}
+
func runtime_cyclesPerSecond() int64
diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go
index 955964c..96fcfc9 100644
--- a/src/runtime/pprof/pprof_test.go
+++ b/src/runtime/pprof/pprof_test.go
@@ -26,16 +26,18 @@
"time"
)
-func cpuHogger(f func() int, dur time.Duration) {
+func cpuHogger(f func(x int) int, y *int, dur time.Duration) {
// We only need to get one 100 Hz clock tick, so we've got
// a large safety buffer.
// But do at least 500 iterations (which should take about 100ms),
// otherwise TestCPUProfileMultithreaded can fail if only one
// thread is scheduled during the testing period.
t0 := time.Now()
+ accum := *y
for i := 0; i < 500 || time.Since(t0) < dur; i++ {
- f()
+ accum = f(accum)
}
+ *y = accum
}
var (
@@ -46,8 +48,8 @@
// The actual CPU hogging function.
// Must not call other functions nor access heap/globals in the loop,
// otherwise under race detector the samples will be in the race runtime.
-func cpuHog1() int {
- foo := salt1
+func cpuHog1(x int) int {
+ foo := x
for i := 0; i < 1e5; i++ {
if foo > 0 {
foo *= foo
@@ -58,8 +60,8 @@
return foo
}
-func cpuHog2() int {
- foo := salt2
+func cpuHog2(x int) int {
+ foo := x
for i := 0; i < 1e5; i++ {
if foo > 0 {
foo *= foo
@@ -72,7 +74,7 @@
func TestCPUProfile(t *testing.T) {
testCPUProfile(t, []string{"runtime/pprof.cpuHog1"}, func(dur time.Duration) {
- cpuHogger(cpuHog1, dur)
+ cpuHogger(cpuHog1, &salt1, dur)
})
}
@@ -81,29 +83,29 @@
testCPUProfile(t, []string{"runtime/pprof.cpuHog1", "runtime/pprof.cpuHog2"}, func(dur time.Duration) {
c := make(chan int)
go func() {
- cpuHogger(cpuHog1, dur)
+ cpuHogger(cpuHog1, &salt1, dur)
c <- 1
}()
- cpuHogger(cpuHog2, dur)
+ cpuHogger(cpuHog2, &salt2, dur)
<-c
})
}
func TestCPUProfileInlining(t *testing.T) {
testCPUProfile(t, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}, func(dur time.Duration) {
- cpuHogger(inlinedCaller, dur)
+ cpuHogger(inlinedCaller, &salt1, dur)
})
}
-func inlinedCaller() int {
- inlinedCallee()
- return 0
+func inlinedCaller(x int) int {
+ x = inlinedCallee(x)
+ return x
}
-func inlinedCallee() {
+func inlinedCallee(x int) int {
// We could just use cpuHog1, but for loops prevent inlining
// right now. :(
- foo := salt1
+ foo := x
i := 0
loop:
if foo > 0 {
@@ -114,7 +116,7 @@
if i++; i < 1e5 {
goto loop
}
- salt1 = foo
+ return foo
}
func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) {
@@ -177,9 +179,9 @@
}
}
- if badOS[runtime.GOOS] {
+ switch runtime.GOOS {
+ case "darwin", "dragonfly", "netbsd", "solaris":
t.Skipf("ignoring failure on %s; see golang.org/issue/13841", runtime.GOOS)
- return
}
// Ignore the failure if the tests are running in a QEMU-based emulator,
// QEMU is not perfect at emulating everything.
@@ -187,7 +189,6 @@
// IN_QEMU=1 indicates that the tests are running in QEMU. See issue 9605.
if os.Getenv("IN_QEMU") == "1" {
t.Skip("ignore the failure in QEMU; see golang.org/issue/9605")
- return
}
t.FailNow()
}
@@ -394,59 +395,107 @@
})
}
-// Operating systems that are expected to fail the tests. See issue 13841.
-var badOS = map[string]bool{
- "darwin": true,
- "netbsd": true,
- "plan9": true,
- "dragonfly": true,
- "solaris": true,
-}
-
func TestBlockProfile(t *testing.T) {
type TestCase struct {
name string
f func()
+ stk []string
re string
}
tests := [...]TestCase{
- {"chan recv", blockChanRecv, `
+ {
+ name: "chan recv",
+ f: blockChanRecv,
+ stk: []string{
+ "runtime.chanrecv1",
+ "runtime/pprof.blockChanRecv",
+ "runtime/pprof.TestBlockProfile",
+ },
+ re: `
[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
# 0x[0-9a-f]+ runtime\.chanrecv1\+0x[0-9a-f]+ .*/src/runtime/chan.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.blockChanRecv\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
- {"chan send", blockChanSend, `
+ {
+ name: "chan send",
+ f: blockChanSend,
+ stk: []string{
+ "runtime.chansend1",
+ "runtime/pprof.blockChanSend",
+ "runtime/pprof.TestBlockProfile",
+ },
+ re: `
[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
# 0x[0-9a-f]+ runtime\.chansend1\+0x[0-9a-f]+ .*/src/runtime/chan.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.blockChanSend\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
- {"chan close", blockChanClose, `
+ {
+ name: "chan close",
+ f: blockChanClose,
+ stk: []string{
+ "runtime.chanrecv1",
+ "runtime/pprof.blockChanClose",
+ "runtime/pprof.TestBlockProfile",
+ },
+ re: `
[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
# 0x[0-9a-f]+ runtime\.chanrecv1\+0x[0-9a-f]+ .*/src/runtime/chan.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.blockChanClose\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
- {"select recv async", blockSelectRecvAsync, `
+ {
+ name: "select recv async",
+ f: blockSelectRecvAsync,
+ stk: []string{
+ "runtime.selectgo",
+ "runtime/pprof.blockSelectRecvAsync",
+ "runtime/pprof.TestBlockProfile",
+ },
+ re: `
[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
# 0x[0-9a-f]+ runtime\.selectgo\+0x[0-9a-f]+ .*/src/runtime/select.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.blockSelectRecvAsync\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
- {"select send sync", blockSelectSendSync, `
+ {
+ name: "select send sync",
+ f: blockSelectSendSync,
+ stk: []string{
+ "runtime.selectgo",
+ "runtime/pprof.blockSelectSendSync",
+ "runtime/pprof.TestBlockProfile",
+ },
+ re: `
[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
# 0x[0-9a-f]+ runtime\.selectgo\+0x[0-9a-f]+ .*/src/runtime/select.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.blockSelectSendSync\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
- {"mutex", blockMutex, `
+ {
+ name: "mutex",
+ f: blockMutex,
+ stk: []string{
+ "sync.(*Mutex).Lock",
+ "runtime/pprof.blockMutex",
+ "runtime/pprof.TestBlockProfile",
+ },
+ re: `
[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
# 0x[0-9a-f]+ sync\.\(\*Mutex\)\.Lock\+0x[0-9a-f]+ .*/src/sync/mutex\.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.blockMutex\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
- {"cond", blockCond, `
+ {
+ name: "cond",
+ f: blockCond,
+ stk: []string{
+ "sync.(*Cond).Wait",
+ "runtime/pprof.blockCond",
+ "runtime/pprof.TestBlockProfile",
+ },
+ re: `
[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
# 0x[0-9a-f]+ sync\.\(\*Cond\)\.Wait\+0x[0-9a-f]+ .*/src/sync/cond\.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.blockCond\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
@@ -454,28 +503,84 @@
`},
}
+ // Generate block profile
runtime.SetBlockProfileRate(1)
defer runtime.SetBlockProfileRate(0)
for _, test := range tests {
test.f()
}
- var w bytes.Buffer
- Lookup("block").WriteTo(&w, 1)
- prof := w.String()
- if !strings.HasPrefix(prof, "--- contention:\ncycles/second=") {
- t.Fatalf("Bad profile header:\n%v", prof)
+ t.Run("debug=1", func(t *testing.T) {
+ var w bytes.Buffer
+ Lookup("block").WriteTo(&w, 1)
+ prof := w.String()
+
+ if !strings.HasPrefix(prof, "--- contention:\ncycles/second=") {
+ t.Fatalf("Bad profile header:\n%v", prof)
+ }
+
+ if strings.HasSuffix(prof, "#\t0x0\n\n") {
+ t.Errorf("Useless 0 suffix:\n%v", prof)
+ }
+
+ for _, test := range tests {
+ if !regexp.MustCompile(strings.Replace(test.re, "\t", "\t+", -1)).MatchString(prof) {
+ t.Errorf("Bad %v entry, expect:\n%v\ngot:\n%v", test.name, test.re, prof)
+ }
+ }
+ })
+
+ t.Run("proto", func(t *testing.T) {
+ // proto format
+ var w bytes.Buffer
+ Lookup("block").WriteTo(&w, 0)
+ p, err := profile.Parse(&w)
+ if err != nil {
+ t.Fatalf("failed to parse profile: %v", err)
+ }
+ t.Logf("parsed proto: %s", p)
+ if err := p.CheckValid(); err != nil {
+ t.Fatalf("invalid profile: %v", err)
+ }
+
+ stks := stacks(p)
+ for _, test := range tests {
+ if !containsStack(stks, test.stk) {
+ t.Errorf("No matching stack entry for %v, want %+v", test.name, test.stk)
+ }
+ }
+ })
+
+}
+
+func stacks(p *profile.Profile) (res [][]string) {
+ for _, s := range p.Sample {
+ var stk []string
+ for _, l := range s.Location {
+ for _, line := range l.Line {
+ stk = append(stk, line.Function.Name)
+ }
+ }
+ res = append(res, stk)
}
+ return res
+}
- if strings.HasSuffix(prof, "#\t0x0\n\n") {
- t.Errorf("Useless 0 suffix:\n%v", prof)
- }
-
- for _, test := range tests {
- if !regexp.MustCompile(strings.Replace(test.re, "\t", "\t+", -1)).MatchString(prof) {
- t.Fatalf("Bad %v entry, expect:\n%v\ngot:\n%v", test.name, test.re, prof)
+func containsStack(got [][]string, want []string) bool {
+ for _, stk := range got {
+ if len(stk) < len(want) {
+ continue
+ }
+ for i, f := range want {
+ if f != stk[i] {
+ break
+ }
+ if i == len(want)-1 {
+ return true
+ }
}
}
+ return false
}
const blockDelay = 10 * time.Millisecond
@@ -567,6 +672,8 @@
}
func TestMutexProfile(t *testing.T) {
+ // Generate mutex profile
+
old := runtime.SetMutexProfileFraction(1)
defer runtime.SetMutexProfileFraction(old)
if old != 0 {
@@ -575,31 +682,57 @@
blockMutex()
- var w bytes.Buffer
- Lookup("mutex").WriteTo(&w, 1)
- prof := w.String()
+ t.Run("debug=1", func(t *testing.T) {
+ var w bytes.Buffer
+ Lookup("mutex").WriteTo(&w, 1)
+ prof := w.String()
+ t.Logf("received profile: %v", prof)
- if !strings.HasPrefix(prof, "--- mutex:\ncycles/second=") {
- t.Errorf("Bad profile header:\n%v", prof)
- }
- prof = strings.Trim(prof, "\n")
- lines := strings.Split(prof, "\n")
- if len(lines) != 6 {
- t.Errorf("expected 6 lines, got %d %q\n%s", len(lines), prof, prof)
- }
- if len(lines) < 6 {
- return
- }
- // checking that the line is like "35258904 1 @ 0x48288d 0x47cd28 0x458931"
- r2 := `^\d+ 1 @(?: 0x[[:xdigit:]]+)+`
- //r2 := "^[0-9]+ 1 @ 0x[0-9a-f x]+$"
- if ok, err := regexp.MatchString(r2, lines[3]); err != nil || !ok {
- t.Errorf("%q didn't match %q", lines[3], r2)
- }
- r3 := "^#.*runtime/pprof.blockMutex.*$"
- if ok, err := regexp.MatchString(r3, lines[5]); err != nil || !ok {
- t.Errorf("%q didn't match %q", lines[5], r3)
- }
+ if !strings.HasPrefix(prof, "--- mutex:\ncycles/second=") {
+ t.Errorf("Bad profile header:\n%v", prof)
+ }
+ prof = strings.Trim(prof, "\n")
+ lines := strings.Split(prof, "\n")
+ if len(lines) != 6 {
+ t.Errorf("expected 6 lines, got %d %q\n%s", len(lines), prof, prof)
+ }
+ if len(lines) < 6 {
+ return
+ }
+ // checking that the line is like "35258904 1 @ 0x48288d 0x47cd28 0x458931"
+ r2 := `^\d+ 1 @(?: 0x[[:xdigit:]]+)+`
+ //r2 := "^[0-9]+ 1 @ 0x[0-9a-f x]+$"
+ if ok, err := regexp.MatchString(r2, lines[3]); err != nil || !ok {
+ t.Errorf("%q didn't match %q", lines[3], r2)
+ }
+ r3 := "^#.*runtime/pprof.blockMutex.*$"
+ if ok, err := regexp.MatchString(r3, lines[5]); err != nil || !ok {
+ t.Errorf("%q didn't match %q", lines[5], r3)
+ }
+ t.Logf(prof)
+ })
+ t.Run("proto", func(t *testing.T) {
+ // proto format
+ var w bytes.Buffer
+ Lookup("mutex").WriteTo(&w, 0)
+ p, err := profile.Parse(&w)
+ if err != nil {
+ t.Fatalf("failed to parse profile: %v", err)
+ }
+ t.Logf("parsed proto: %s", p)
+ if err := p.CheckValid(); err != nil {
+ t.Fatalf("invalid profile: %v", err)
+ }
+
+ stks := stacks(p)
+ for _, want := range [][]string{
+ {"sync.(*Mutex).Unlock", "runtime/pprof.blockMutex.func1"},
+ } {
+ if !containsStack(stks, want) {
+ t.Errorf("No matching stack entry for %+v", want)
+ }
+ }
+ })
}
func func1(c chan int) { <-c }
@@ -712,7 +845,7 @@
func TestCPUProfileLabel(t *testing.T) {
testCPUProfile(t, []string{"runtime/pprof.cpuHogger;key=value"}, func(dur time.Duration) {
Do(context.Background(), Labels("key", "value"), func(context.Context) {
- cpuHogger(cpuHog1, dur)
+ cpuHogger(cpuHog1, &salt1, dur)
})
})
}
@@ -725,14 +858,15 @@
start := time.Now()
var wg sync.WaitGroup
for time.Since(start) < dur {
+ var salts [10]int
for i := 0; i < 10; i++ {
wg.Add(1)
- go func() {
+ go func(j int) {
Do(context.Background(), Labels("key", "value"), func(context.Context) {
- cpuHogger(cpuHog1, time.Millisecond)
+ cpuHogger(cpuHog1, &salts[j], time.Millisecond)
})
wg.Done()
- }()
+ }(i)
}
wg.Wait()
}
diff --git a/src/runtime/print.go b/src/runtime/print.go
index 8fa3d39..7b2e4f4 100644
--- a/src/runtime/print.go
+++ b/src/runtime/print.go
@@ -6,6 +6,7 @@
import (
"runtime/internal/atomic"
+ "runtime/internal/sys"
"unsafe"
)
@@ -56,7 +57,7 @@
// The compiler emits calls to printlock and printunlock around
// the multiple calls that implement a single Go print or println
-// statement. Some of the print helpers (printsp, for example)
+// statement. Some of the print helpers (printslice, for example)
// call print recursively. There is also the problem of a crash
// happening during the print routines and needing to acquire
// the print lock to print information about the crash.
@@ -98,31 +99,31 @@
}
func printsp() {
- print(" ")
+ printstring(" ")
}
func printnl() {
- print("\n")
+ printstring("\n")
}
func printbool(v bool) {
if v {
- print("true")
+ printstring("true")
} else {
- print("false")
+ printstring("false")
}
}
func printfloat(v float64) {
switch {
case v != v:
- print("NaN")
+ printstring("NaN")
return
case v+v == v && v > 0:
- print("+Inf")
+ printstring("+Inf")
return
case v+v == v && v < 0:
- print("-Inf")
+ printstring("-Inf")
return
}
@@ -204,7 +205,7 @@
func printint(v int64) {
if v < 0 {
- print("-")
+ printstring("-")
v = -v
}
printuint(uint64(v))
@@ -249,3 +250,55 @@
func printiface(i iface) {
print("(", i.tab, ",", i.data, ")")
}
+
+// hexdumpWords prints a word-oriented hex dump of [p, end).
+//
+// If mark != nil, it will be called with each printed word's address
+// and should return a character mark to appear just before that
+// word's value. It can return 0 to indicate no mark.
+func hexdumpWords(p, end uintptr, mark func(uintptr) byte) {
+ p1 := func(x uintptr) {
+ var buf [2 * sys.PtrSize]byte
+ for i := len(buf) - 1; i >= 0; i-- {
+ if x&0xF < 10 {
+ buf[i] = byte(x&0xF) + '0'
+ } else {
+ buf[i] = byte(x&0xF) - 10 + 'a'
+ }
+ x >>= 4
+ }
+ gwrite(buf[:])
+ }
+
+ printlock()
+ var markbuf [1]byte
+ markbuf[0] = ' '
+ for i := uintptr(0); p+i < end; i += sys.PtrSize {
+ if i%16 == 0 {
+ if i != 0 {
+ println()
+ }
+ p1(p + i)
+ print(": ")
+ }
+
+ if mark != nil {
+ markbuf[0] = mark(p + i)
+ if markbuf[0] == 0 {
+ markbuf[0] = ' '
+ }
+ }
+ gwrite(markbuf[:])
+ val := *(*uintptr)(unsafe.Pointer(p + i))
+ p1(val)
+ print(" ")
+
+ // Can we symbolize val?
+ fn := findfunc(val)
+ if fn.valid() {
+ print("<", funcname(fn), "+", val-fn.entry, "> ")
+ }
+ }
+ println()
+ printunlock()
+}
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index ed333bb..2e958f7 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -96,6 +96,9 @@
//go:linkname main_main main.main
func main_main()
+// mainStarted indicates that the main M has started.
+var mainStarted bool
+
// runtimeInitTime is the nanotime() at which the runtime started.
var runtimeInitTime int64
@@ -119,8 +122,8 @@
maxstacksize = 250000000
}
- // Record when the world started.
- runtimeInitTime = nanotime()
+ // Allow newproc to start new Ms.
+ mainStarted = true
systemstack(func() {
newm(sysmon, nil)
@@ -139,6 +142,9 @@
}
runtime_init() // must be before defer
+ if nanotime() == 0 {
+ throw("nanotime returning zero")
+ }
// Defer unlock so that runtime.Goexit during init does the unlock too.
needUnlock := true
@@ -148,6 +154,10 @@
}
}()
+ // Record when the world started. Must be after runtime_init
+ // because nanotime on some platforms depends on startNano.
+ runtimeInitTime = nanotime()
+
gcenable()
main_init_done = make(chan bool)
@@ -166,6 +176,9 @@
if _cgo_notify_runtime_init_done == nil {
throw("_cgo_notify_runtime_init_done missing")
}
+ // Start the template thread in case we enter Go from
+ // a C-created thread and need to create a new thread.
+ startTemplateThread()
cgocall(_cgo_notify_runtime_init_done, nil)
}
@@ -242,9 +255,10 @@
}
}
+//go:nosplit
+
// Gosched yields the processor, allowing other goroutines to run. It does not
// suspend the current goroutine, so execution resumes automatically.
-//go:nosplit
func Gosched() {
mcall(gosched_m)
}
@@ -332,8 +346,8 @@
if s.elem != nil {
throw("runtime: sudog with non-nil elem")
}
- if s.selectdone != nil {
- throw("runtime: sudog with non-nil selectdone")
+ if s.isSelect {
+ throw("runtime: sudog with non-false isSelect")
}
if s.next != nil {
throw("runtime: sudog with non-nil next")
@@ -422,7 +436,7 @@
func lockedOSThread() bool {
gp := getg()
- return gp.lockedm != nil && gp.m.lockedg != nil
+ return gp.lockedm != 0 && gp.m.lockedg != 0
}
var (
@@ -488,13 +502,21 @@
if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 {
procs = n
}
- if procs > _MaxGomaxprocs {
- procs = _MaxGomaxprocs
- }
if procresize(procs) != nil {
throw("unknown runnable goroutine during bootstrap")
}
+ // For cgocheck > 1, we turn on the write barrier at all times
+ // and check all pointer writes. We can't do this until after
+ // procresize because the write barrier needs a P.
+ if debug.cgocheck > 1 {
+ writeBarrier.cgo = true
+ writeBarrier.enabled = true
+ for _, p := range allp {
+ p.wbBuf.reset()
+ }
+ }
+
if buildVersion == "" {
// Condition should never trigger. This code just serves
// to ensure runtime·buildVersion is kept in the resulting binary.
@@ -510,7 +532,7 @@
func checkmcount() {
// sched lock is held
- if sched.mcount > sched.maxmcount {
+ if mcount() > sched.maxmcount {
print("runtime: program exceeds ", sched.maxmcount, "-thread limit\n")
throw("thread exhaustion")
}
@@ -524,15 +546,20 @@
callers(1, mp.createstack[:])
}
- mp.fastrand = 0x49f6428a + uint32(mp.id) + uint32(cputicks())
- if mp.fastrand == 0 {
- mp.fastrand = 0x49f6428a
+ lock(&sched.lock)
+ if sched.mnext+1 < sched.mnext {
+ throw("runtime: thread ID overflow")
+ }
+ mp.id = sched.mnext
+ sched.mnext++
+ checkmcount()
+
+ mp.fastrand[0] = 1597334677 * uint32(mp.id)
+ mp.fastrand[1] = uint32(cputicks())
+ if mp.fastrand[0]|mp.fastrand[1] == 0 {
+ mp.fastrand[1] = 1
}
- lock(&sched.lock)
- mp.id = sched.mcount
- sched.mcount++
- checkmcount()
mpreinit(mp)
if mp.gsignal != nil {
mp.gsignal.stackguard1 = mp.gsignal.stack.lo + _StackGuard
@@ -755,8 +782,10 @@
// _Grunning or _Grunning|_Gscan; either way,
// we own gp.gcscanvalid, so it's safe to read.
// gp.gcscanvalid must not be true when we are running.
- print("runtime: casgstatus ", hex(oldval), "->", hex(newval), " gp.status=", hex(gp.atomicstatus), " gp.gcscanvalid=true\n")
- throw("casgstatus")
+ systemstack(func() {
+ print("runtime: casgstatus ", hex(oldval), "->", hex(newval), " gp.status=", hex(gp.atomicstatus), " gp.gcscanvalid=true\n")
+ throw("casgstatus")
+ })
}
// See http://golang.org/cl/21503 for justification of the yield delay.
@@ -941,7 +970,7 @@
// startTheWorld undoes the effects of stopTheWorld.
func startTheWorld() {
- systemstack(startTheWorldWithSema)
+ systemstack(func() { startTheWorldWithSema(false) })
// worldsema must be held over startTheWorldWithSema to ensure
// gomaxprocs cannot change while worldsema is held.
semrelease(&worldsema)
@@ -991,8 +1020,7 @@
_g_.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic.
sched.stopwait--
// try to retake all P's in Psyscall status
- for i := 0; i < int(gomaxprocs); i++ {
- p := allp[i]
+ for _, p := range allp {
s := p.status
if s == _Psyscall && atomic.Cas(&p.status, s, _Pgcstop) {
if trace.enabled {
@@ -1032,8 +1060,7 @@
if sched.stopwait != 0 {
bad = "stopTheWorld: not stopped (stopwait != 0)"
} else {
- for i := 0; i < int(gomaxprocs); i++ {
- p := allp[i]
+ for _, p := range allp {
if p.status != _Pgcstop {
bad = "stopTheWorld: not stopped (status != _Pgcstop)"
}
@@ -1057,12 +1084,14 @@
_g_.m.helpgc = -1
}
-func startTheWorldWithSema() {
+func startTheWorldWithSema(emitTraceEvent bool) int64 {
_g_ := getg()
- _g_.m.locks++ // disable preemption because it can be holding p in a local var
- gp := netpoll(false) // non-blocking
- injectglist(gp)
+ _g_.m.locks++ // disable preemption because it can be holding p in a local var
+ if netpollinited() {
+ gp := netpoll(false) // non-blocking
+ injectglist(gp)
+ }
add := needaddgcproc()
lock(&sched.lock)
@@ -1097,6 +1126,12 @@
}
}
+ // Capture start-the-world time before doing clean-up tasks.
+ startTime := nanotime()
+ if emitTraceEvent {
+ traceGCSTWDone()
+ }
+
// Wakeup an additional proc in case we have excessive runnable goroutines
// in local queues or in the global queue. If we don't, the proc will park itself.
// If we have lots of excessive work, resetspinning will unpark additional procs as necessary.
@@ -1118,14 +1153,25 @@
if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
_g_.stackguard0 = stackPreempt
}
+
+ return startTime
}
// Called to start an M.
+//
+// This must not split the stack because we may not even have stack
+// bounds set up yet.
+//
+// May run during STW (because it doesn't have a P yet), so write
+// barriers are not allowed.
+//
//go:nosplit
+//go:nowritebarrierrec
func mstart() {
_g_ := getg()
- if _g_.stack.lo == 0 {
+ osStack := _g_.stack.lo == 0
+ if osStack {
// Initialize stack bounds from system stack.
// Cgo may have left stack size in stack.hi.
size := _g_.stack.hi
@@ -1139,33 +1185,37 @@
// both Go and C functions with stack growth prologues.
_g_.stackguard0 = _g_.stack.lo + _StackGuard
_g_.stackguard1 = _g_.stackguard0
- mstart1()
+ mstart1(0)
+
+ // Exit this thread.
+ if GOOS == "windows" || GOOS == "solaris" || GOOS == "plan9" {
+ // Window, Solaris and Plan 9 always system-allocate
+ // the stack, but put it in _g_.stack before mstart,
+ // so the logic above hasn't set osStack yet.
+ osStack = true
+ }
+ mexit(osStack)
}
-func mstart1() {
+func mstart1(dummy int32) {
_g_ := getg()
if _g_ != _g_.m.g0 {
throw("bad runtime·mstart")
}
- // Record top of stack for use by mcall.
- // Once we call schedule we're never coming back,
- // so other calls can reuse this stack space.
- gosave(&_g_.m.g0.sched)
- _g_.m.g0.sched.pc = ^uintptr(0) // make sure it is never used
+ // Record the caller for use as the top of stack in mcall and
+ // for terminating the thread.
+ // We're never coming back to mstart1 after we call schedule,
+ // so other calls can reuse the current frame.
+ save(getcallerpc(), getcallersp(unsafe.Pointer(&dummy)))
asminit()
minit()
// Install signal handlers; after minit so that minit can
// prepare the thread to be able to handle the signals.
if _g_.m == &m0 {
- // Create an extra M for callbacks on threads not created by Go.
- if iscgo && !cgoHasExtraM {
- cgoHasExtraM = true
- newextram()
- }
- initsig(false)
+ mstartm0()
}
if fn := _g_.m.mstartfn; fn != nil {
@@ -1182,6 +1232,114 @@
schedule()
}
+// mstartm0 implements part of mstart1 that only runs on the m0.
+//
+// Write barriers are allowed here because we know the GC can't be
+// running yet, so they'll be no-ops.
+//
+//go:yeswritebarrierrec
+func mstartm0() {
+ // Create an extra M for callbacks on threads not created by Go.
+ if iscgo && !cgoHasExtraM {
+ cgoHasExtraM = true
+ newextram()
+ }
+ initsig(false)
+}
+
+// mexit tears down and exits the current thread.
+//
+// Don't call this directly to exit the thread, since it must run at
+// the top of the thread stack. Instead, use gogo(&_g_.m.g0.sched) to
+// unwind the stack to the point that exits the thread.
+//
+// It is entered with m.p != nil, so write barriers are allowed. It
+// will release the P before exiting.
+//
+//go:yeswritebarrierrec
+func mexit(osStack bool) {
+ g := getg()
+ m := g.m
+
+ if m == &m0 {
+ // This is the main thread. Just wedge it.
+ //
+ // On Linux, exiting the main thread puts the process
+ // into a non-waitable zombie state. On Plan 9,
+ // exiting the main thread unblocks wait even though
+ // other threads are still running. On Solaris we can
+ // neither exitThread nor return from mstart. Other
+ // bad things probably happen on other platforms.
+ //
+ // We could try to clean up this M more before wedging
+ // it, but that complicates signal handling.
+ handoffp(releasep())
+ lock(&sched.lock)
+ sched.nmfreed++
+ checkdead()
+ unlock(&sched.lock)
+ notesleep(&m.park)
+ throw("locked m0 woke up")
+ }
+
+ sigblock()
+ unminit()
+
+ // Free the gsignal stack.
+ if m.gsignal != nil {
+ stackfree(m.gsignal.stack)
+ }
+
+ // Remove m from allm.
+ lock(&sched.lock)
+ for pprev := &allm; *pprev != nil; pprev = &(*pprev).alllink {
+ if *pprev == m {
+ *pprev = m.alllink
+ goto found
+ }
+ }
+ throw("m not found in allm")
+found:
+ if !osStack {
+ // Delay reaping m until it's done with the stack.
+ //
+ // If this is using an OS stack, the OS will free it
+ // so there's no need for reaping.
+ atomic.Store(&m.freeWait, 1)
+ // Put m on the free list, though it will not be reaped until
+ // freeWait is 0. Note that the free list must not be linked
+ // through alllink because some functions walk allm without
+ // locking, so may be using alllink.
+ m.freelink = sched.freem
+ sched.freem = m
+ }
+ unlock(&sched.lock)
+
+ // Release the P.
+ handoffp(releasep())
+ // After this point we must not have write barriers.
+
+ // Invoke the deadlock detector. This must happen after
+ // handoffp because it may have started a new M to take our
+ // P's work.
+ lock(&sched.lock)
+ sched.nmfreed++
+ checkdead()
+ unlock(&sched.lock)
+
+ if osStack {
+ // Return from mstart and let the system thread
+ // library free the g0 stack and terminate the thread.
+ return
+ }
+
+ // mstart is the thread's entry point, so there's nothing to
+ // return to. Exit the thread directly. exitThread will clear
+ // m.freeWait when it's done with the stack and the m can be
+ // reaped.
+ exitThread(&m.freeWait)
+}
+
// forEachP calls fn(p) for every P p when p reaches a GC safe point.
// If a P is currently executing code, this will bring the P to a GC
// safe point and execute fn on that P. If the P is not executing code
@@ -1205,7 +1363,7 @@
sched.safePointFn = fn
// Ask all Ps to run the safe point function.
- for _, p := range allp[:gomaxprocs] {
+ for _, p := range allp {
if p != _p_ {
atomic.Store(&p.runSafePointFn, 1)
}
@@ -1233,8 +1391,7 @@
// Force Ps currently in _Psyscall into _Pidle and hand them
// off to induce safe point function execution.
- for i := 0; i < int(gomaxprocs); i++ {
- p := allp[i]
+ for _, p := range allp {
s := p.status
if s == _Psyscall && p.runSafePointFn == 1 && atomic.Cas(&p.status, s, _Pidle) {
if trace.enabled {
@@ -1263,8 +1420,7 @@
if sched.safePointWait != 0 {
throw("forEachP: not done")
}
- for i := 0; i < int(gomaxprocs); i++ {
- p := allp[i]
+ for _, p := range allp {
if p.runSafePointFn != 0 {
throw("forEachP: P did not run fn")
}
@@ -1329,6 +1485,27 @@
if _g_.m.p == 0 {
acquirep(_p_) // temporarily borrow p for mallocs in this function
}
+
+ // Release the free M list. We need to do this somewhere and
+ // this may free up a stack we can use.
+ if sched.freem != nil {
+ lock(&sched.lock)
+ var newList *m
+ for freem := sched.freem; freem != nil; {
+ if freem.freeWait != 0 {
+ next := freem.freelink
+ freem.freelink = newList
+ newList = freem
+ freem = next
+ continue
+ }
+ stackfree(freem.g0.stack)
+ freem = freem.freelink
+ }
+ sched.freem = newList
+ unlock(&sched.lock)
+ }
+
mp := new(m)
mp.mstartfn = fn
mcommoninit(mp)
@@ -1488,9 +1665,9 @@
casgstatus(gp, _Gidle, _Gdead)
gp.m = mp
mp.curg = gp
- mp.locked = _LockInternal
- mp.lockedg = gp
- gp.lockedm = mp
+ mp.lockedInt++
+ mp.lockedg.set(gp)
+ gp.lockedm.set(mp)
gp.goid = int64(atomic.Xadd64(&sched.goidgen, 1))
if raceenabled {
gp.racectx = racegostart(funcPC(newextram) + sys.PCQuantum)
@@ -1619,6 +1796,27 @@
// around exec'ing while creating/destroying threads. See issue #19546.
var execLock rwmutex
+// newmHandoff contains a list of m structures that need new OS threads.
+// This is used by newm in situations where newm itself can't safely
+// start an OS thread.
+var newmHandoff struct {
+ lock mutex
+
+ // newm points to a list of M structures that need new OS
+ // threads. The list is linked through m.schedlink.
+ newm muintptr
+
+ // waiting indicates that wake needs to be notified when an m
+ // is put on the list.
+ waiting bool
+ wake note
+
+ // haveTemplateThread indicates that the templateThread has
+ // been started. This is not protected by lock. Use cas to set
+ // to 1.
+ haveTemplateThread uint32
+}
+
// Create a new m. It will start off with a call to fn, or else the scheduler.
// fn needs to be static and not a heap allocated closure.
// May run with m.p==nil, so write barriers are not allowed.
@@ -1627,6 +1825,35 @@
mp := allocm(_p_, fn)
mp.nextp.set(_p_)
mp.sigmask = initSigmask
+ if gp := getg(); gp != nil && gp.m != nil && (gp.m.lockedExt != 0 || gp.m.incgo) && GOOS != "plan9" {
+ // We're on a locked M or a thread that may have been
+ // started by C. The kernel state of this thread may
+ // be strange (the user may have locked it for that
+ // purpose). We don't want to clone that into another
+ // thread. Instead, ask a known-good thread to create
+ // the thread for us.
+ //
+ // This is disabled on Plan 9. See golang.org/issue/22227.
+ //
+ // TODO: This may be unnecessary on Windows, which
+ // doesn't model thread creation off fork.
+ lock(&newmHandoff.lock)
+ if newmHandoff.haveTemplateThread == 0 {
+ throw("on a locked thread with no template thread")
+ }
+ mp.schedlink = newmHandoff.newm
+ newmHandoff.newm.set(mp)
+ if newmHandoff.waiting {
+ newmHandoff.waiting = false
+ notewakeup(&newmHandoff.wake)
+ }
+ unlock(&newmHandoff.lock)
+ return
+ }
+ newm1(mp)
+}
+
+func newm1(mp *m) {
if iscgo {
var ts cgothreadstart
if _cgo_thread_start == nil {
@@ -1648,6 +1875,56 @@
execLock.runlock()
}
+// startTemplateThread starts the template thread if it is not already
+// running.
+//
+// The calling thread must itself be in a known-good state.
+func startTemplateThread() {
+ if !atomic.Cas(&newmHandoff.haveTemplateThread, 0, 1) {
+ return
+ }
+ newm(templateThread, nil)
+}
+
+// tmeplateThread is a thread in a known-good state that exists solely
+// to start new threads in known-good states when the calling thread
+// may not be a a good state.
+//
+// Many programs never need this, so templateThread is started lazily
+// when we first enter a state that might lead to running on a thread
+// in an unknown state.
+//
+// templateThread runs on an M without a P, so it must not have write
+// barriers.
+//
+//go:nowritebarrierrec
+func templateThread() {
+ lock(&sched.lock)
+ sched.nmsys++
+ checkdead()
+ unlock(&sched.lock)
+
+ for {
+ lock(&newmHandoff.lock)
+ for newmHandoff.newm != 0 {
+ newm := newmHandoff.newm.ptr()
+ newmHandoff.newm = 0
+ unlock(&newmHandoff.lock)
+ for newm != nil {
+ next := newm.schedlink.ptr()
+ newm.schedlink = 0
+ newm1(newm)
+ newm = next
+ }
+ lock(&newmHandoff.lock)
+ }
+ newmHandoff.waiting = true
+ noteclear(&newmHandoff.wake)
+ unlock(&newmHandoff.lock)
+ notesleep(&newmHandoff.wake)
+ }
+}
+
// Stops execution of the current m until new work is available.
// Returns with acquired P.
func stopm() {
@@ -1670,7 +1947,9 @@
notesleep(&_g_.m.park)
noteclear(&_g_.m.park)
if _g_.m.helpgc != 0 {
+ // helpgc() set _g_.m.p and _g_.m.mcache, so we have a P.
gchelper()
+ // Undo the effects of helpgc().
_g_.m.helpgc = 0
_g_.m.mcache = nil
_g_.m.p = 0
@@ -1804,7 +2083,7 @@
func stoplockedm() {
_g_ := getg()
- if _g_.m.lockedg == nil || _g_.m.lockedg.lockedm != _g_.m {
+ if _g_.m.lockedg == 0 || _g_.m.lockedg.ptr().lockedm.ptr() != _g_.m {
throw("stoplockedm: inconsistent locking")
}
if _g_.m.p != 0 {
@@ -1816,7 +2095,7 @@
// Wait until another thread schedules lockedg again.
notesleep(&_g_.m.park)
noteclear(&_g_.m.park)
- status := readgstatus(_g_.m.lockedg)
+ status := readgstatus(_g_.m.lockedg.ptr())
if status&^_Gscan != _Grunnable {
print("runtime:stoplockedm: g is not Grunnable or Gscanrunnable\n")
dumpgstatus(_g_)
@@ -1832,7 +2111,7 @@
func startlockedm(gp *g) {
_g_ := getg()
- mp := gp.lockedm
+ mp := gp.lockedm.ptr()
if mp == _g_.m {
throw("startlockedm: locked to me")
}
@@ -1958,11 +2237,12 @@
// Poll network.
// This netpoll is only an optimization before we resort to stealing.
- // We can safely skip it if there a thread blocked in netpoll already.
- // If there is any kind of logical race with that blocked thread
- // (e.g. it has already returned from netpoll, but does not set lastpoll yet),
- // this thread will do blocking netpoll below anyway.
- if netpollinited() && sched.lastpoll != 0 {
+ // We can safely skip it if there are no waiters or a thread is blocked
+ // in netpoll already. If there is any kind of logical race with that
+ // blocked thread (e.g. it has already returned from netpoll, but does
+ // not set lastpoll yet), this thread will do blocking netpoll below
+ // anyway.
+ if netpollinited() && atomic.Load(&netpollWaiters) > 0 && atomic.Load64(&sched.lastpoll) != 0 {
if gp := netpoll(false); gp != nil { // non-blocking
// netpoll returns list of goroutines linked by schedlink.
injectglist(gp.schedlink.ptr())
@@ -2019,6 +2299,12 @@
return gp, false
}
+ // Before we drop our P, make a snapshot of the allp slice,
+ // which can change underfoot once we no longer block
+ // safe-points. We don't need to snapshot the contents because
+ // everything up to cap(allp) is immutable.
+ allpSnapshot := allp
+
// return P and block
lock(&sched.lock)
if sched.gcwaiting != 0 || _p_.runSafePointFn != 0 {
@@ -2058,9 +2344,8 @@
}
// check all runqueues once again
- for i := 0; i < int(gomaxprocs); i++ {
- _p_ := allp[i]
- if _p_ != nil && !runqempty(_p_) {
+ for _, _p_ := range allpSnapshot {
+ if !runqempty(_p_) {
lock(&sched.lock)
_p_ = pidleget()
unlock(&sched.lock)
@@ -2199,9 +2484,15 @@
throw("schedule: holding locks")
}
- if _g_.m.lockedg != nil {
+ if _g_.m.lockedg != 0 {
stoplockedm()
- execute(_g_.m.lockedg, false) // Never returns.
+ execute(_g_.m.lockedg.ptr(), false) // Never returns.
+ }
+
+ // We should not schedule away from a g that is executing a cgo call,
+ // since the cgo call is using the m's g0 stack.
+ if _g_.m.incgo {
+ throw("schedule: in cgo")
}
top:
@@ -2252,7 +2543,7 @@
resetspinning()
}
- if gp.lockedm != nil {
+ if gp.lockedm != 0 {
// Hands off own p to the locked m,
// then blocks waiting for a new p.
startlockedm(gp)
@@ -2371,8 +2662,9 @@
atomic.Xadd(&sched.ngsys, -1)
}
gp.m = nil
- gp.lockedm = nil
- _g_.m.lockedg = nil
+ locked := gp.lockedm != 0
+ gp.lockedm = 0
+ _g_.m.lockedg = 0
gp.paniconfault = false
gp._defer = nil // should be true already but just in case.
gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data.
@@ -2382,17 +2674,37 @@
gp.labels = nil
gp.timer = nil
+ if gcBlackenEnabled != 0 && gp.gcAssistBytes > 0 {
+ // Flush assist credit to the global pool. This gives
+ // better information to pacing if the application is
+ // rapidly creating an exiting goroutines.
+ scanCredit := int64(gcController.assistWorkPerByte * float64(gp.gcAssistBytes))
+ atomic.Xaddint64(&gcController.bgScanCredit, scanCredit)
+ gp.gcAssistBytes = 0
+ }
+
// Note that gp's stack scan is now "valid" because it has no
// stack.
gp.gcscanvalid = true
dropg()
- if _g_.m.locked&^_LockExternal != 0 {
- print("invalid m->locked = ", _g_.m.locked, "\n")
+ if _g_.m.lockedInt != 0 {
+ print("invalid m->lockedInt = ", _g_.m.lockedInt, "\n")
throw("internal lockOSThread error")
}
- _g_.m.locked = 0
+ _g_.m.lockedExt = 0
gfput(_g_.m.p.ptr(), gp)
+ if locked {
+ // The goroutine may have locked this thread because
+ // it put it in an unusual kernel state. Kill it
+ // rather than returning it to the thread pool.
+
+ // Return to mstart, which will release the P and exit
+ // the thread.
+ if GOOS != "plan9" { // See golang.org/issue/22227.
+ gogo(&_g_.m.g0.sched)
+ }
+ }
schedule()
}
@@ -2522,7 +2834,7 @@
// Standard syscall entry used by the go syscall library and normal cgo calls.
//go:nosplit
func entersyscall(dummy int32) {
- reentersyscall(getcallerpc(unsafe.Pointer(&dummy)), getcallersp(unsafe.Pointer(&dummy)))
+ reentersyscall(getcallerpc(), getcallersp(unsafe.Pointer(&dummy)))
}
func entersyscall_sysmon() {
@@ -2565,7 +2877,7 @@
_g_.m.p.ptr().syscalltick++
// Leave SP around for GC and traceback.
- pc := getcallerpc(unsafe.Pointer(&dummy))
+ pc := getcallerpc()
sp := getcallersp(unsafe.Pointer(&dummy))
save(pc, sp)
_g_.syscallsp = _g_.sched.sp
@@ -2590,7 +2902,7 @@
systemstack(entersyscallblock_handoff)
// Resave for traceback during blocked call.
- save(getcallerpc(unsafe.Pointer(&dummy)), getcallersp(unsafe.Pointer(&dummy)))
+ save(getcallerpc(), getcallersp(unsafe.Pointer(&dummy)))
_g_.m.locks--
}
@@ -2629,7 +2941,9 @@
oldp := _g_.m.p.ptr()
if exitsyscallfast() {
if _g_.m.mcache == nil {
- throw("lost mcache")
+ systemstack(func() {
+ throw("lost mcache")
+ })
}
if trace.enabled {
if oldp != _g_.m.p.ptr() || _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
@@ -2676,7 +2990,9 @@
mcall(exitsyscall0)
if _g_.m.mcache == nil {
- throw("lost mcache")
+ systemstack(func() {
+ throw("lost mcache")
+ })
}
// Scheduler returned, so we're allowed to run now.
@@ -2800,7 +3116,7 @@
acquirep(_p_)
execute(gp, false) // Never returns.
}
- if _g_.m.lockedg != nil {
+ if _g_.m.lockedg != 0 {
// Wait until another thread schedules gp and so m again.
stoplockedm()
execute(gp, false) // Never returns.
@@ -2918,17 +3234,16 @@
//go:nosplit
func newproc(siz int32, fn *funcval) {
argp := add(unsafe.Pointer(&fn), sys.PtrSize)
- pc := getcallerpc(unsafe.Pointer(&siz))
+ pc := getcallerpc()
systemstack(func() {
- newproc1(fn, (*uint8)(argp), siz, 0, pc)
+ newproc1(fn, (*uint8)(argp), siz, pc)
})
}
// Create a new g running fn with narg bytes of arguments starting
-// at argp and returning nret bytes of results. callerpc is the
-// address of the go statement that created this. The new g is put
-// on the queue of g's waiting to run.
-func newproc1(fn *funcval, argp *uint8, narg int32, nret int32, callerpc uintptr) *g {
+// at argp. callerpc is the address of the go statement that created
+// this. The new g is put on the queue of g's waiting to run.
+func newproc1(fn *funcval, argp *uint8, narg int32, callerpc uintptr) {
_g_ := getg()
if fn == nil {
@@ -2936,7 +3251,7 @@
throw("go of nil func value")
}
_g_.m.locks++ // disable preemption because it can be holding p in a local var
- siz := narg + nret
+ siz := narg
siz = (siz + 7) &^ 7
// We could allocate a larger initial stack if necessary.
@@ -3024,14 +3339,13 @@
}
runqput(_p_, newg, true)
- if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 && runtimeInitTime != 0 {
+ if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 && mainStarted {
wakep()
}
_g_.m.locks--
if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
_g_.stackguard0 = stackPreempt
}
- return newg
}
// Put on gfree list.
@@ -3150,23 +3464,41 @@
//go:nosplit
func dolockOSThread() {
_g_ := getg()
- _g_.m.lockedg = _g_
- _g_.lockedm = _g_.m
+ _g_.m.lockedg.set(_g_)
+ _g_.lockedm.set(_g_.m)
}
//go:nosplit
// LockOSThread wires the calling goroutine to its current operating system thread.
-// Until the calling goroutine exits or calls UnlockOSThread, it will always
-// execute in that thread, and no other goroutine can.
+// The calling goroutine will always execute in that thread,
+// and no other goroutine will execute in it,
+// until the calling goroutine has made as many calls to
+// UnlockOSThread as to LockOSThread.
+// If the calling goroutine exits without unlocking the thread,
+// the thread will be terminated.
+//
+// A goroutine should call LockOSThread before calling OS services or
+// non-Go library functions that depend on per-thread state.
func LockOSThread() {
- getg().m.locked |= _LockExternal
+ if atomic.Load(&newmHandoff.haveTemplateThread) == 0 && GOOS != "plan9" {
+ // If we need to start a new thread from the locked
+ // thread, we need the template thread. Start it now
+ // while we're in a known-good state.
+ startTemplateThread()
+ }
+ _g_ := getg()
+ _g_.m.lockedExt++
+ if _g_.m.lockedExt == 0 {
+ _g_.m.lockedExt--
+ panic("LockOSThread nesting overflow")
+ }
dolockOSThread()
}
//go:nosplit
func lockOSThread() {
- getg().m.locked += _LockInternal
+ getg().m.lockedInt++
dolockOSThread()
}
@@ -3176,29 +3508,43 @@
//go:nosplit
func dounlockOSThread() {
_g_ := getg()
- if _g_.m.locked != 0 {
+ if _g_.m.lockedInt != 0 || _g_.m.lockedExt != 0 {
return
}
- _g_.m.lockedg = nil
- _g_.lockedm = nil
+ _g_.m.lockedg = 0
+ _g_.lockedm = 0
}
//go:nosplit
-// UnlockOSThread unwires the calling goroutine from its fixed operating system thread.
-// If the calling goroutine has not called LockOSThread, UnlockOSThread is a no-op.
+// UnlockOSThread undoes an earlier call to LockOSThread.
+// If this drops the number of active LockOSThread calls on the
+// calling goroutine to zero, it unwires the calling goroutine from
+// its fixed operating system thread.
+// If there are no active LockOSThread calls, this is a no-op.
+//
+// Before calling UnlockOSThread, the caller must ensure that the OS
+// thread is suitable for running other goroutines. If the caller made
+// any permanent changes to the state of the thread that would affect
+// other goroutines, it should not call this function and thus leave
+// the goroutine locked to the OS thread until the goroutine (and
+// hence the thread) exits.
func UnlockOSThread() {
- getg().m.locked &^= _LockExternal
+ _g_ := getg()
+ if _g_.m.lockedExt == 0 {
+ return
+ }
+ _g_.m.lockedExt--
dounlockOSThread()
}
//go:nosplit
func unlockOSThread() {
_g_ := getg()
- if _g_.m.locked < _LockInternal {
+ if _g_.m.lockedInt == 0 {
systemstack(badunlockosthread)
}
- _g_.m.locked -= _LockInternal
+ _g_.m.lockedInt--
dounlockOSThread()
}
@@ -3208,10 +3554,7 @@
func gcount() int32 {
n := int32(allglen) - sched.ngfree - int32(atomic.Load(&sched.ngsys))
- for _, _p_ := range &allp {
- if _p_ == nil {
- break
- }
+ for _, _p_ := range allp {
n -= _p_.gfreecnt
}
@@ -3224,7 +3567,7 @@
}
func mcount() int32 {
- return sched.mcount
+ return int32(sched.mnext - sched.nmfreed)
}
var prof struct {
@@ -3506,7 +3849,7 @@
// Returns list of Ps with local work, they need to be scheduled by the caller.
func procresize(nprocs int32) *p {
old := gomaxprocs
- if old < 0 || old > _MaxGomaxprocs || nprocs <= 0 || nprocs > _MaxGomaxprocs {
+ if old < 0 || nprocs <= 0 {
throw("procresize: invalid arg")
}
if trace.enabled {
@@ -3520,6 +3863,23 @@
}
sched.procresizetime = now
+ // Grow allp if necessary.
+ if nprocs > int32(len(allp)) {
+ // Synchronize with retake, which could be running
+ // concurrently since it doesn't run on a P.
+ lock(&allpLock)
+ if nprocs <= int32(cap(allp)) {
+ allp = allp[:nprocs]
+ } else {
+ nallp := make([]*p, nprocs)
+ // Copy everything up to allp's cap so we
+ // never lose old allocated Ps.
+ copy(nallp, allp[:cap(allp)])
+ allp = nallp
+ }
+ unlock(&allpLock)
+ }
+
// initialize new P's
for i := int32(0); i < nprocs; i++ {
pp := allp[i]
@@ -3531,6 +3891,7 @@
for i := range pp.deferpool {
pp.deferpool[i] = pp.deferpoolbuf[i][:0]
}
+ pp.wbBuf.reset()
atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp))
}
if pp.mcache == nil {
@@ -3556,13 +3917,11 @@
// free unused P's
for i := nprocs; i < old; i++ {
p := allp[i]
- if trace.enabled {
- if p == getg().m.p.ptr() {
- // moving to p[0], pretend that we were descheduled
- // and then scheduled again to keep the trace sane.
- traceGoSched()
- traceProcStop(p)
- }
+ if trace.enabled && p == getg().m.p.ptr() {
+ // moving to p[0], pretend that we were descheduled
+ // and then scheduled again to keep the trace sane.
+ traceGoSched()
+ traceProcStop(p)
}
// move all runnable goroutines to the global queue
for p.runqhead != p.runqtail {
@@ -3588,6 +3947,11 @@
// world is stopped.
p.gcBgMarkWorker.set(nil)
}
+ // Flush p's write barrier buffer.
+ if gcphase != _GCoff {
+ wbBufFlush1(p)
+ p.gcw.dispose()
+ }
for i := range p.sudogbuf {
p.sudogbuf[i] = nil
}
@@ -3606,10 +3970,18 @@
raceprocdestroy(p.racectx)
p.racectx = 0
}
+ p.gcAssistTime = 0
p.status = _Pdead
// can't free P itself because it can be referenced by an M in syscall
}
+ // Trim allp.
+ if int32(len(allp)) != nprocs {
+ lock(&allpLock)
+ allp = allp[:nprocs]
+ unlock(&allpLock)
+ }
+
_g_ := getg()
if _g_.m.p != 0 && _g_.m.p.ptr().id < nprocs {
// continue to use the current P
@@ -3681,7 +4053,7 @@
throw("acquirep: already in go")
}
if _p_.m != 0 || _p_.status != _Pidle {
- id := int32(0)
+ id := int64(0)
if _p_.m != 0 {
id = _p_.m.ptr().id
}
@@ -3726,6 +4098,7 @@
// Check for deadlock situation.
// The check is based on number of running M's, if 0 -> deadlock.
+// sched.lock must be held.
func checkdead() {
// For -buildmode=c-shared or -buildmode=c-archive it's OK if
// there are no running goroutines. The calling program is
@@ -3742,13 +4115,12 @@
return
}
- // -1 for sysmon
- run := sched.mcount - sched.nmidle - sched.nmidlelocked - 1
+ run := mcount() - sched.nmidle - sched.nmidlelocked - sched.nmsys
if run > 0 {
return
}
if run < 0 {
- print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", sched.mcount, "\n")
+ print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", mcount(), " nmsys=", sched.nmsys, "\n")
throw("checkdead: inconsistent counts")
}
@@ -3811,6 +4183,11 @@
//
//go:nowritebarrierrec
func sysmon() {
+ lock(&sched.lock)
+ sched.nmsys++
+ checkdead()
+ unlock(&sched.lock)
+
// If a heap span goes unused for 5 minutes after a garbage collection,
// we hand it back to the operating system.
scavengelimit := int64(5 * 60 * 1e9)
@@ -3850,15 +4227,11 @@
}
shouldRelax := true
if osRelaxMinNS > 0 {
- lock(&timers.lock)
- if timers.sleeping {
- now := nanotime()
- next := timers.sleepUntil
- if next-now < osRelaxMinNS {
- shouldRelax = false
- }
+ next := timeSleepUntil()
+ now := nanotime()
+ if next-now < osRelaxMinNS {
+ shouldRelax = false
}
- unlock(&timers.lock)
}
if shouldRelax {
osRelax(true)
@@ -3882,7 +4255,7 @@
// poll network if not polled for more than 10ms
lastpoll := int64(atomic.Load64(&sched.lastpoll))
now := nanotime()
- if lastpoll != 0 && lastpoll+10*1000*1000 < now {
+ if netpollinited() && lastpoll != 0 && lastpoll+10*1000*1000 < now {
atomic.Cas64(&sched.lastpoll, uint64(lastpoll), uint64(now))
gp := netpoll(false) // non-blocking - returns list of goroutines
if gp != nil {
@@ -3939,9 +4312,17 @@
func retake(now int64) uint32 {
n := 0
- for i := int32(0); i < gomaxprocs; i++ {
+ // Prevent allp slice changes. This lock will be completely
+ // uncontended unless we're already stopping the world.
+ lock(&allpLock)
+ // We can't use a range loop over allp because we may
+ // temporarily drop the allpLock. Hence, we need to re-fetch
+ // allp each time around the loop.
+ for i := 0; i < len(allp); i++ {
_p_ := allp[i]
if _p_ == nil {
+ // This can happen if procresize has grown
+ // allp but not yet created new Ps.
continue
}
pd := &_p_.sysmontick
@@ -3960,6 +4341,8 @@
if runqempty(_p_) && atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) > 0 && pd.syscallwhen+10*1000*1000 > now {
continue
}
+ // Drop allpLock so we can take sched.lock.
+ unlock(&allpLock)
// Need to decrement number of idle locked M's
// (pretending that one more is running) before the CAS.
// Otherwise the M from which we retake can exit the syscall,
@@ -3975,6 +4358,7 @@
handoffp(_p_)
}
incidlelocked(1)
+ lock(&allpLock)
} else if s == _Prunning {
// Preempt G if it's running for too long.
t := int64(_p_.schedtick)
@@ -3989,6 +4373,7 @@
preemptone(_p_)
}
}
+ unlock(&allpLock)
return uint32(n)
}
@@ -3999,9 +4384,8 @@
// Returns true if preemption request was issued to at least one goroutine.
func preemptall() bool {
res := false
- for i := int32(0); i < gomaxprocs; i++ {
- _p_ := allp[i]
- if _p_ == nil || _p_.status != _Prunning {
+ for _, _p_ := range allp {
+ if _p_.status != _Prunning {
continue
}
if preemptone(_p_) {
@@ -4050,23 +4434,19 @@
}
lock(&sched.lock)
- print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle, " threads=", sched.mcount, " spinningthreads=", sched.nmspinning, " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize)
+ print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle, " threads=", mcount(), " spinningthreads=", sched.nmspinning, " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize)
if detailed {
print(" gcwaiting=", sched.gcwaiting, " nmidlelocked=", sched.nmidlelocked, " stopwait=", sched.stopwait, " sysmonwait=", sched.sysmonwait, "\n")
}
// We must be careful while reading data from P's, M's and G's.
// Even if we hold schedlock, most data can be changed concurrently.
// E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
- for i := int32(0); i < gomaxprocs; i++ {
- _p_ := allp[i]
- if _p_ == nil {
- continue
- }
+ for i, _p_ := range allp {
mp := _p_.m.ptr()
h := atomic.Load(&_p_.runqhead)
t := atomic.Load(&_p_.runqtail)
if detailed {
- id := int32(-1)
+ id := int64(-1)
if mp != nil {
id = mp.id
}
@@ -4079,7 +4459,7 @@
print("[")
}
print(t - h)
- if i == gomaxprocs-1 {
+ if i == len(allp)-1 {
print("]\n")
}
}
@@ -4093,7 +4473,7 @@
for mp := allm; mp != nil; mp = mp.alllink {
_p_ := mp.p.ptr()
gp := mp.curg
- lockedg := mp.lockedg
+ lockedg := mp.lockedg.ptr()
id1 := int32(-1)
if _p_ != nil {
id1 = _p_.id
@@ -4113,12 +4493,12 @@
for gi := 0; gi < len(allgs); gi++ {
gp := allgs[gi]
mp := gp.m
- lockedm := gp.lockedm
- id1 := int32(-1)
+ lockedm := gp.lockedm.ptr()
+ id1 := int64(-1)
if mp != nil {
id1 = mp.id
}
- id2 := int32(-1)
+ id2 := int64(-1)
if lockedm != nil {
id2 = lockedm.id
}
@@ -4400,22 +4780,25 @@
if stealRunNextG {
// Try to steal from _p_.runnext.
if next := _p_.runnext; next != 0 {
- // Sleep to ensure that _p_ isn't about to run the g we
- // are about to steal.
- // The important use case here is when the g running on _p_
- // ready()s another g and then almost immediately blocks.
- // Instead of stealing runnext in this window, back off
- // to give _p_ a chance to schedule runnext. This will avoid
- // thrashing gs between different Ps.
- // A sync chan send/recv takes ~50ns as of time of writing,
- // so 3us gives ~50x overshoot.
- if GOOS != "windows" {
- usleep(3)
- } else {
- // On windows system timer granularity is 1-15ms,
- // which is way too much for this optimization.
- // So just yield.
- osyield()
+ if _p_.status == _Prunning {
+ // Sleep to ensure that _p_ isn't about to run the g
+ // we are about to steal.
+ // The important use case here is when the g running
+ // on _p_ ready()s another g and then almost
+ // immediately blocks. Instead of stealing runnext
+ // in this window, back off to give _p_ a chance to
+ // schedule runnext. This will avoid thrashing gs
+ // between different Ps.
+ // A sync chan send/recv takes ~50ns as of time of
+ // writing, so 3us gives ~50x overshoot.
+ if GOOS != "windows" {
+ usleep(3)
+ } else {
+ // On windows system timer granularity is
+ // 1-15ms, which is way too much for this
+ // optimization. So just yield.
+ osyield()
+ }
}
if !_p_.runnext.cas(next, 0) {
continue
diff --git a/src/runtime/proc_test.go b/src/runtime/proc_test.go
index 90a6cab..2ece829 100644
--- a/src/runtime/proc_test.go
+++ b/src/runtime/proc_test.go
@@ -655,6 +655,116 @@
_ = sum
}
+func benchmarkWakeupParallel(b *testing.B, spin func(time.Duration)) {
+ if runtime.GOMAXPROCS(0) == 1 {
+ b.Skip("skipping: GOMAXPROCS=1")
+ }
+
+ wakeDelay := 5 * time.Microsecond
+ for _, delay := range []time.Duration{
+ 0,
+ 1 * time.Microsecond,
+ 2 * time.Microsecond,
+ 5 * time.Microsecond,
+ 10 * time.Microsecond,
+ 20 * time.Microsecond,
+ 50 * time.Microsecond,
+ 100 * time.Microsecond,
+ } {
+ b.Run(delay.String(), func(b *testing.B) {
+ if b.N == 0 {
+ return
+ }
+ // Start two goroutines, which alternate between being
+ // sender and receiver in the following protocol:
+ //
+ // - The receiver spins for `delay` and then does a
+ // blocking receive on a channel.
+ //
+ // - The sender spins for `delay+wakeDelay` and then
+ // sends to the same channel. (The addition of
+ // `wakeDelay` improves the probability that the
+ // receiver will be blocking when the send occurs when
+ // the goroutines execute in parallel.)
+ //
+ // In each iteration of the benchmark, each goroutine
+ // acts once as sender and once as receiver, so each
+ // goroutine spins for delay twice.
+ //
+ // BenchmarkWakeupParallel is used to estimate how
+ // efficiently the scheduler parallelizes goroutines in
+ // the presence of blocking:
+ //
+ // - If both goroutines are executed on the same core,
+ // an increase in delay by N will increase the time per
+ // iteration by 4*N, because all 4 delays are
+ // serialized.
+ //
+ // - Otherwise, an increase in delay by N will increase
+ // the time per iteration by 2*N, and the time per
+ // iteration is 2 * (runtime overhead + chan
+ // send/receive pair + delay + wakeDelay). This allows
+ // the runtime overhead, including the time it takes
+ // for the unblocked goroutine to be scheduled, to be
+ // estimated.
+ ping, pong := make(chan struct{}), make(chan struct{})
+ start := make(chan struct{})
+ done := make(chan struct{})
+ go func() {
+ <-start
+ for i := 0; i < b.N; i++ {
+ // sender
+ spin(delay + wakeDelay)
+ ping <- struct{}{}
+ // receiver
+ spin(delay)
+ <-pong
+ }
+ done <- struct{}{}
+ }()
+ go func() {
+ for i := 0; i < b.N; i++ {
+ // receiver
+ spin(delay)
+ <-ping
+ // sender
+ spin(delay + wakeDelay)
+ pong <- struct{}{}
+ }
+ done <- struct{}{}
+ }()
+ b.ResetTimer()
+ start <- struct{}{}
+ <-done
+ <-done
+ })
+ }
+}
+
+func BenchmarkWakeupParallelSpinning(b *testing.B) {
+ benchmarkWakeupParallel(b, func(d time.Duration) {
+ end := time.Now().Add(d)
+ for time.Now().Before(end) {
+ // do nothing
+ }
+ })
+}
+
+// sysNanosleep is defined by OS-specific files (such as runtime_linux_test.go)
+// to sleep for the given duration. If nil, dependent tests are skipped.
+// The implementation should invoke a blocking system call and not
+// call time.Sleep, which would deschedule the goroutine.
+var sysNanosleep func(d time.Duration)
+
+func BenchmarkWakeupParallelSyscall(b *testing.B) {
+ if sysNanosleep == nil {
+ b.Skipf("skipping on %v; sysNanosleep not defined", runtime.GOOS)
+ }
+ benchmarkWakeupParallel(b, func(d time.Duration) {
+ sysNanosleep(d)
+ })
+}
+
type Matrix [][]float64
func BenchmarkMatmult(b *testing.B) {
@@ -722,3 +832,44 @@
func TestStealOrder(t *testing.T) {
runtime.RunStealOrderTest()
}
+
+func TestLockOSThreadNesting(t *testing.T) {
+ go func() {
+ e, i := runtime.LockOSCounts()
+ if e != 0 || i != 0 {
+ t.Errorf("want locked counts 0, 0; got %d, %d", e, i)
+ return
+ }
+ runtime.LockOSThread()
+ runtime.LockOSThread()
+ runtime.UnlockOSThread()
+ e, i = runtime.LockOSCounts()
+ if e != 1 || i != 0 {
+ t.Errorf("want locked counts 1, 0; got %d, %d", e, i)
+ return
+ }
+ runtime.UnlockOSThread()
+ e, i = runtime.LockOSCounts()
+ if e != 0 || i != 0 {
+ t.Errorf("want locked counts 0, 0; got %d, %d", e, i)
+ return
+ }
+ }()
+}
+
+func TestLockOSThreadExit(t *testing.T) {
+ testLockOSThreadExit(t, "testprog")
+}
+
+func testLockOSThreadExit(t *testing.T, prog string) {
+ output := runTestProg(t, prog, "LockOSThreadMain", "GOMAXPROCS=1")
+ want := "OK\n"
+ if output != want {
+ t.Errorf("want %s, got %s\n", want, output)
+ }
+
+ output = runTestProg(t, prog, "LockOSThreadAlt")
+ if output != want {
+ t.Errorf("want %s, got %s\n", want, output)
+ }
+}
diff --git a/src/runtime/race.go b/src/runtime/race.go
index 49495cc..2f5713d 100644
--- a/src/runtime/race.go
+++ b/src/runtime/race.go
@@ -4,14 +4,14 @@
// +build race
-// Public race detection API, present iff build with -race.
-
package runtime
import (
"unsafe"
)
+// Public race detection API, present iff build with -race.
+
func RaceRead(addr unsafe.Pointer)
func RaceWrite(addr unsafe.Pointer)
func RaceReadRange(addr unsafe.Pointer, len int)
@@ -23,7 +23,69 @@
return int(n)
}
-// private interface for the runtime
+//go:nosplit
+
+// RaceAcquire/RaceRelease/RaceReleaseMerge establish happens-before relations
+// between goroutines. These inform the race detector about actual synchronization
+// that it can't see for some reason (e.g. synchronization within RaceDisable/RaceEnable
+// sections of code).
+// RaceAcquire establishes a happens-before relation with the preceding
+// RaceReleaseMerge on addr up to and including the last RaceRelease on addr.
+// In terms of the C memory model (C11 §5.1.2.4, §7.17.3),
+// RaceAcquire is equivalent to atomic_load(memory_order_acquire).
+func RaceAcquire(addr unsafe.Pointer) {
+ raceacquire(addr)
+}
+
+//go:nosplit
+
+// RaceRelease performs a release operation on addr that
+// can synchronize with a later RaceAcquire on addr.
+//
+// In terms of the C memory model, RaceRelease is equivalent to
+// atomic_store(memory_order_release).
+func RaceRelease(addr unsafe.Pointer) {
+ racerelease(addr)
+}
+
+//go:nosplit
+
+// RaceReleaseMerge is like RaceRelease, but also establishes a happens-before
+// relation with the preceding RaceRelease or RaceReleaseMerge on addr.
+//
+// In terms of the C memory model, RaceReleaseMerge is equivalent to
+// atomic_exchange(memory_order_release).
+func RaceReleaseMerge(addr unsafe.Pointer) {
+ racereleasemerge(addr)
+}
+
+//go:nosplit
+
+// RaceDisable disables handling of race synchronization events in the current goroutine.
+// Handling is re-enabled with RaceEnable. RaceDisable/RaceEnable can be nested.
+// Non-synchronization events (memory accesses, function entry/exit) still affect
+// the race detector.
+func RaceDisable() {
+ _g_ := getg()
+ if _g_.raceignore == 0 {
+ racecall(&__tsan_go_ignore_sync_begin, _g_.racectx, 0, 0, 0)
+ }
+ _g_.raceignore++
+}
+
+//go:nosplit
+
+// RaceEnable re-enables handling of race events in the current goroutine.
+func RaceEnable() {
+ _g_ := getg()
+ _g_.raceignore--
+ if _g_.raceignore == 0 {
+ racecall(&__tsan_go_ignore_sync_end, _g_.racectx, 0, 0, 0)
+ }
+}
+
+// Private interface for the runtime.
+
const raceenabled = true
// For all functions accepting callerpc and pc,
@@ -433,43 +495,3 @@
func racefingo() {
racecall(&__tsan_finalizer_goroutine, getg().racectx, 0, 0, 0)
}
-
-//go:nosplit
-
-func RaceAcquire(addr unsafe.Pointer) {
- raceacquire(addr)
-}
-
-//go:nosplit
-
-func RaceRelease(addr unsafe.Pointer) {
- racerelease(addr)
-}
-
-//go:nosplit
-
-func RaceReleaseMerge(addr unsafe.Pointer) {
- racereleasemerge(addr)
-}
-
-//go:nosplit
-
-// RaceDisable disables handling of race events in the current goroutine.
-func RaceDisable() {
- _g_ := getg()
- if _g_.raceignore == 0 {
- racecall(&__tsan_go_ignore_sync_begin, _g_.racectx, 0, 0, 0)
- }
- _g_.raceignore++
-}
-
-//go:nosplit
-
-// RaceEnable re-enables handling of race events in the current goroutine.
-func RaceEnable() {
- _g_ := getg()
- _g_.raceignore--
- if _g_.raceignore == 0 {
- racecall(&__tsan_go_ignore_sync_end, _g_.racectx, 0, 0, 0)
- }
-}
diff --git a/src/runtime/race/output_test.go b/src/runtime/race/output_test.go
index 13dfc33..adf9ce8 100644
--- a/src/runtime/race/output_test.go
+++ b/src/runtime/race/output_test.go
@@ -19,6 +19,16 @@
)
func TestOutput(t *testing.T) {
+ pkgdir, err := ioutil.TempDir("", "go-build-race-output")
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer os.RemoveAll(pkgdir)
+ out, err := exec.Command(testenv.GoToolPath(t), "install", "-race", "-pkgdir="+pkgdir, "-gcflags=all=-l", "testing").CombinedOutput()
+ if err != nil {
+ t.Fatalf("go install -race: %v\n%s", err, out)
+ }
+
for _, test := range tests {
if test.goos != "" && test.goos != runtime.GOOS {
t.Logf("test %v runs only on %v, skipping: ", test.name, test.goos)
@@ -47,7 +57,7 @@
t.Fatalf("failed to close file: %v", err)
}
// Pass -l to the compiler to test stack traces.
- cmd := exec.Command(testenv.GoToolPath(t), test.run, "-race", "-gcflags=-l", src)
+ cmd := exec.Command(testenv.GoToolPath(t), test.run, "-race", "-pkgdir="+pkgdir, "-gcflags=all=-l", src)
// GODEBUG spoils program output, GOMAXPROCS makes it flaky.
for _, env := range os.Environ() {
if strings.HasPrefix(env, "GODEBUG=") ||
@@ -175,6 +185,7 @@
func TestFail(t *testing.T) {
done := make(chan bool)
x := 0
+ _ = x
go func() {
x = 42
done <- true
@@ -186,7 +197,7 @@
`, `
==================
--- FAIL: TestFail \(0...s\)
-.*main_test.go:13: true
+.*main_test.go:14: true
.*testing.go:.*: race detected during execution of test
FAIL`},
@@ -253,7 +264,7 @@
main\.goCallback\(\)
.*/main\.go:27 \+0x[0-9,a-f]+
main._cgoexpwrap_[0-9a-z]+_goCallback\(\)
- .*/_cgo_gotypes\.go:[0-9]+ \+0x[0-9,a-f]+
+ .*_cgo_gotypes\.go:[0-9]+ \+0x[0-9,a-f]+
Goroutine [0-9] \(running\) created at:
runtime\.newextram\(\)
@@ -265,6 +276,7 @@
func TestFail(t *testing.T) {
done := make(chan bool)
x := 0
+ _ = x
go func() {
x = 42
done <- true
diff --git a/src/runtime/race/testdata/atomic_test.go b/src/runtime/race/testdata/atomic_test.go
index 232744b..769c8d7 100644
--- a/src/runtime/race/testdata/atomic_test.go
+++ b/src/runtime/race/testdata/atomic_test.go
@@ -14,6 +14,7 @@
func TestNoRaceAtomicAddInt64(t *testing.T) {
var x1, x2 int8
+ _ = x1 + x2
var s int64
ch := make(chan bool, 2)
go func() {
@@ -36,6 +37,7 @@
func TestRaceAtomicAddInt64(t *testing.T) {
var x1, x2 int8
+ _ = x1 + x2
var s int64
ch := make(chan bool, 2)
go func() {
@@ -58,6 +60,7 @@
func TestNoRaceAtomicAddInt32(t *testing.T) {
var x1, x2 int8
+ _ = x1 + x2
var s int32
ch := make(chan bool, 2)
go func() {
@@ -80,6 +83,7 @@
func TestNoRaceAtomicLoadAddInt32(t *testing.T) {
var x int64
+ _ = x
var s int32
go func() {
x = 2
@@ -93,6 +97,7 @@
func TestNoRaceAtomicLoadStoreInt32(t *testing.T) {
var x int64
+ _ = x
var s int32
go func() {
x = 2
@@ -106,6 +111,7 @@
func TestNoRaceAtomicStoreCASInt32(t *testing.T) {
var x int64
+ _ = x
var s int32
go func() {
x = 2
@@ -119,6 +125,7 @@
func TestNoRaceAtomicCASLoadInt32(t *testing.T) {
var x int64
+ _ = x
var s int32
go func() {
x = 2
@@ -134,6 +141,7 @@
func TestNoRaceAtomicCASCASInt32(t *testing.T) {
var x int64
+ _ = x
var s int32
go func() {
x = 2
@@ -149,6 +157,7 @@
func TestNoRaceAtomicCASCASInt32_2(t *testing.T) {
var x1, x2 int8
+ _ = x1 + x2
var s int32
ch := make(chan bool, 2)
go func() {
@@ -171,6 +180,7 @@
func TestNoRaceAtomicLoadInt64(t *testing.T) {
var x int32
+ _ = x
var s int64
go func() {
x = 2
@@ -184,6 +194,7 @@
func TestNoRaceAtomicCASCASUInt64(t *testing.T) {
var x int64
+ _ = x
var s uint64
go func() {
x = 2
@@ -199,6 +210,7 @@
func TestNoRaceAtomicLoadStorePointer(t *testing.T) {
var x int64
+ _ = x
var s unsafe.Pointer
var y int = 2
var p unsafe.Pointer = unsafe.Pointer(&y)
@@ -214,6 +226,7 @@
func TestNoRaceAtomicStoreCASUint64(t *testing.T) {
var x int64
+ _ = x
var s uint64
go func() {
x = 2
diff --git a/src/runtime/race/testdata/chan_test.go b/src/runtime/race/testdata/chan_test.go
index 4491916..7f349c4 100644
--- a/src/runtime/race/testdata/chan_test.go
+++ b/src/runtime/race/testdata/chan_test.go
@@ -12,6 +12,7 @@
func TestNoRaceChanSync(t *testing.T) {
v := 0
+ _ = v
c := make(chan int)
go func() {
v = 1
@@ -23,6 +24,7 @@
func TestNoRaceChanSyncRev(t *testing.T) {
v := 0
+ _ = v
c := make(chan int)
go func() {
c <- 0
@@ -34,6 +36,7 @@
func TestNoRaceChanAsync(t *testing.T) {
v := 0
+ _ = v
c := make(chan int, 10)
go func() {
v = 1
@@ -45,6 +48,7 @@
func TestRaceChanAsyncRev(t *testing.T) {
v := 0
+ _ = v
c := make(chan int, 10)
go func() {
c <- 0
@@ -56,6 +60,7 @@
func TestNoRaceChanAsyncCloseRecv(t *testing.T) {
v := 0
+ _ = v
c := make(chan int, 10)
go func() {
v = 1
@@ -72,6 +77,7 @@
func TestNoRaceChanAsyncCloseRecv2(t *testing.T) {
v := 0
+ _ = v
c := make(chan int, 10)
go func() {
v = 1
@@ -83,6 +89,7 @@
func TestNoRaceChanAsyncCloseRecv3(t *testing.T) {
v := 0
+ _ = v
c := make(chan int, 10)
go func() {
v = 1
@@ -95,6 +102,7 @@
func TestNoRaceChanSyncCloseRecv(t *testing.T) {
v := 0
+ _ = v
c := make(chan int)
go func() {
v = 1
@@ -111,6 +119,7 @@
func TestNoRaceChanSyncCloseRecv2(t *testing.T) {
v := 0
+ _ = v
c := make(chan int)
go func() {
v = 1
@@ -122,6 +131,7 @@
func TestNoRaceChanSyncCloseRecv3(t *testing.T) {
v := 0
+ _ = v
c := make(chan int)
go func() {
v = 1
@@ -134,6 +144,7 @@
func TestRaceChanSyncCloseSend(t *testing.T) {
v := 0
+ _ = v
c := make(chan int)
go func() {
v = 1
@@ -150,6 +161,7 @@
func TestRaceChanAsyncCloseSend(t *testing.T) {
v := 0
+ _ = v
c := make(chan int, 10)
go func() {
v = 1
@@ -170,6 +182,7 @@
compl := make(chan bool, 2)
v1 := 0
v2 := 0
+ _ = v1 + v2
c := make(chan int)
go func() {
defer func() {
@@ -197,6 +210,7 @@
func TestRaceChanSendLen(t *testing.T) {
v := 0
+ _ = v
c := make(chan int, 10)
go func() {
v = 1
@@ -210,6 +224,7 @@
func TestRaceChanRecvLen(t *testing.T) {
v := 0
+ _ = v
c := make(chan int, 10)
c <- 1
go func() {
@@ -226,6 +241,7 @@
compl := make(chan bool, 2)
v1 := 0
v2 := 0
+ _ = v1 + v2
c := make(chan int, 1)
go func() {
v1 = 1
@@ -264,6 +280,7 @@
func TestRaceChanWrongSend(t *testing.T) {
v1 := 0
v2 := 0
+ _ = v1 + v2
c := make(chan int, 2)
go func() {
v1 = 1
@@ -284,6 +301,7 @@
func TestRaceChanWrongClose(t *testing.T) {
v1 := 0
v2 := 0
+ _ = v1 + v2
c := make(chan int, 1)
done := make(chan bool)
go func() {
@@ -561,6 +579,7 @@
func TestRaceChanCloseLen(t *testing.T) {
v := 0
+ _ = v
c := make(chan int, 10)
c <- 0
go func() {
@@ -587,6 +606,7 @@
done := make(chan struct{})
mtx := make(chan struct{}, 1)
data := 0
+ _ = data
go func() {
mtx <- struct{}{}
data = 42
@@ -604,6 +624,7 @@
mtx := make(chan struct{}, 1)
aux := make(chan bool)
data := 0
+ _ = data
go func() {
select {
case mtx <- struct{}{}:
@@ -632,6 +653,7 @@
done := make(chan struct{})
mtx := make(chan bool, 2)
data := 0
+ _ = data
go func() {
mtx <- true
data = 42
diff --git a/src/runtime/race/testdata/finalizer_test.go b/src/runtime/race/testdata/finalizer_test.go
index 222cbf6..3ac33d2 100644
--- a/src/runtime/race/testdata/finalizer_test.go
+++ b/src/runtime/race/testdata/finalizer_test.go
@@ -53,6 +53,7 @@
func TestRaceFin(t *testing.T) {
c := make(chan bool)
y := 0
+ _ = y
go func() {
x := new(string)
runtime.SetFinalizer(x, func(x *string) {
diff --git a/src/runtime/race/testdata/map_test.go b/src/runtime/race/testdata/map_test.go
index a8d8148..88e735e 100644
--- a/src/runtime/race/testdata/map_test.go
+++ b/src/runtime/race/testdata/map_test.go
@@ -130,6 +130,7 @@
func TestRaceMapVariable(t *testing.T) {
ch := make(chan bool, 1)
m := make(map[int]int)
+ _ = m
go func() {
m = make(map[int]int)
ch <- true
@@ -230,6 +231,7 @@
conns[1] = []int{0}
ch := make(chan bool, 1)
var err error
+ _ = err
go func() {
conns[1][0], err = connect()
ch <- true
diff --git a/src/runtime/race/testdata/mop_test.go b/src/runtime/race/testdata/mop_test.go
index c96acb9..5d25ed4 100644
--- a/src/runtime/race/testdata/mop_test.go
+++ b/src/runtime/race/testdata/mop_test.go
@@ -60,6 +60,7 @@
func TestRaceIntRWClosures(t *testing.T) {
var x, y int
+ _ = y
ch := make(chan int, 2)
go func() {
@@ -76,6 +77,7 @@
func TestNoRaceIntRWClosures(t *testing.T) {
var x, y int
+ _ = y
ch := make(chan int, 1)
go func() {
@@ -93,6 +95,7 @@
func TestRaceInt32RWClosures(t *testing.T) {
var x, y int32
+ _ = y
ch := make(chan bool, 2)
go func() {
@@ -168,6 +171,7 @@
func TestRaceCaseBody(t *testing.T) {
var x, y int
+ _ = y
ch := make(chan int, 2)
go func() {
@@ -189,6 +193,7 @@
func TestNoRaceCaseFallthrough(t *testing.T) {
var x, y, z int
+ _ = y
ch := make(chan int, 2)
z = 1
@@ -210,6 +215,7 @@
func TestRaceCaseFallthrough(t *testing.T) {
var x, y, z int
+ _ = y
ch := make(chan int, 2)
z = 1
@@ -323,6 +329,7 @@
const N = 2
var a [N]int
var x, y int
+ _ = x + y
done := make(chan bool, N)
for i, v := range a {
go func(i int) {
@@ -433,6 +440,7 @@
func TestRacePlus(t *testing.T) {
var x, y, z int
+ _ = y
ch := make(chan int, 2)
go func() {
@@ -449,6 +457,7 @@
func TestRacePlus2(t *testing.T) {
var x, y, z int
+ _ = y
ch := make(chan int, 2)
go func() {
@@ -465,6 +474,7 @@
func TestNoRacePlus(t *testing.T) {
var x, y, z, f int
+ _ = x + y + f
ch := make(chan int, 2)
go func() {
@@ -481,6 +491,7 @@
func TestRaceComplement(t *testing.T) {
var x, y, z int
+ _ = x
ch := make(chan int, 2)
go func() {
@@ -497,6 +508,7 @@
func TestRaceDiv(t *testing.T) {
var x, y, z int
+ _ = x
ch := make(chan int, 2)
go func() {
@@ -513,6 +525,7 @@
func TestRaceDivConst(t *testing.T) {
var x, y, z uint32
+ _ = x
ch := make(chan int, 2)
go func() {
@@ -529,6 +542,7 @@
func TestRaceMod(t *testing.T) {
var x, y, z int
+ _ = x
ch := make(chan int, 2)
go func() {
@@ -545,6 +559,7 @@
func TestRaceModConst(t *testing.T) {
var x, y, z int
+ _ = x
ch := make(chan int, 2)
go func() {
@@ -561,6 +576,7 @@
func TestRaceRotate(t *testing.T) {
var x, y, z uint32
+ _ = x
ch := make(chan int, 2)
go func() {
@@ -932,6 +948,7 @@
func TestRaceFuncVariableWW(t *testing.T) {
var f func(x int) int
+ _ = f
ch := make(chan bool, 1)
go func() {
f = func(x int) int {
@@ -948,6 +965,7 @@
// This one should not belong to mop_test
func TestRacePanic(t *testing.T) {
var x int
+ _ = x
var zero int = 0
ch := make(chan bool, 2)
go func() {
@@ -1284,6 +1302,7 @@
ch := make(chan bool, 1)
var mu sync.Mutex
x := 0
+ _ = x
go func() {
mu.Lock()
x = 42
@@ -1812,6 +1831,7 @@
c := make(chan bool, 1)
var mu sync.Mutex
x := 0
+ _ = x
go func() {
x = func() int { // Write of x must be under the mutex.
mu.Lock()
@@ -2042,6 +2062,7 @@
const P = 4
const N = 1e6
var tinySink *byte
+ _ = tinySink
done := make(chan bool)
for p := 0; p < P; p++ {
go func() {
diff --git a/src/runtime/race/testdata/mutex_test.go b/src/runtime/race/testdata/mutex_test.go
index 3cf03ae..cbed2d3 100644
--- a/src/runtime/race/testdata/mutex_test.go
+++ b/src/runtime/race/testdata/mutex_test.go
@@ -13,6 +13,7 @@
func TestNoRaceMutex(t *testing.T) {
var mu sync.Mutex
var x int16 = 0
+ _ = x
ch := make(chan bool, 2)
go func() {
mu.Lock()
@@ -33,6 +34,7 @@
func TestRaceMutex(t *testing.T) {
var mu sync.Mutex
var x int16 = 0
+ _ = x
ch := make(chan bool, 2)
go func() {
x = 1
@@ -54,6 +56,7 @@
var mu1 sync.Mutex
var mu2 sync.Mutex
var x int8 = 0
+ _ = x
ch := make(chan bool, 2)
go func() {
mu1.Lock()
@@ -74,6 +77,7 @@
func TestNoRaceMutexPureHappensBefore(t *testing.T) {
var mu sync.Mutex
var x int16 = 0
+ _ = x
ch := make(chan bool, 2)
go func() {
x = 1
@@ -96,6 +100,7 @@
var mu sync.Mutex
ch := make(chan bool, 2)
x := 0
+ _ = x
mu.Lock()
go func() {
x = 1
diff --git a/src/runtime/race/testdata/rwmutex_test.go b/src/runtime/race/testdata/rwmutex_test.go
index 7ac829d..39219e5 100644
--- a/src/runtime/race/testdata/rwmutex_test.go
+++ b/src/runtime/race/testdata/rwmutex_test.go
@@ -14,6 +14,7 @@
var mu1 sync.Mutex
var mu2 sync.RWMutex
var x int16 = 0
+ _ = x
ch := make(chan bool, 2)
go func() {
mu1.Lock()
@@ -34,6 +35,7 @@
func TestNoRaceRWMutex(t *testing.T) {
var mu sync.RWMutex
var x, y int64 = 0, 1
+ _ = y
ch := make(chan bool, 2)
go func() {
mu.Lock()
diff --git a/src/runtime/race/testdata/select_test.go b/src/runtime/race/testdata/select_test.go
index 9969f47..3827867 100644
--- a/src/runtime/race/testdata/select_test.go
+++ b/src/runtime/race/testdata/select_test.go
@@ -11,6 +11,7 @@
func TestNoRaceSelect1(t *testing.T) {
var x int
+ _ = x
compl := make(chan bool)
c := make(chan bool)
c1 := make(chan bool)
@@ -36,6 +37,7 @@
func TestNoRaceSelect2(t *testing.T) {
var x int
+ _ = x
compl := make(chan bool)
c := make(chan bool)
c1 := make(chan bool)
@@ -55,6 +57,7 @@
func TestNoRaceSelect3(t *testing.T) {
var x int
+ _ = x
compl := make(chan bool)
c := make(chan bool, 10)
c1 := make(chan bool)
@@ -112,6 +115,7 @@
func TestNoRaceSelect5(t *testing.T) {
test := func(sel, needSched bool) {
var x int
+ _ = x
ch := make(chan bool)
c1 := make(chan bool)
@@ -158,6 +162,7 @@
func TestRaceSelect1(t *testing.T) {
var x int
+ _ = x
compl := make(chan bool, 2)
c := make(chan bool)
c1 := make(chan bool)
@@ -182,6 +187,7 @@
func TestRaceSelect2(t *testing.T) {
var x int
+ _ = x
compl := make(chan bool)
c := make(chan bool)
c1 := make(chan bool)
@@ -200,6 +206,7 @@
func TestRaceSelect3(t *testing.T) {
var x int
+ _ = x
compl := make(chan bool)
c := make(chan bool)
c1 := make(chan bool)
diff --git a/src/runtime/race/testdata/sync_test.go b/src/runtime/race/testdata/sync_test.go
index d48680d..2b2d95d 100644
--- a/src/runtime/race/testdata/sync_test.go
+++ b/src/runtime/race/testdata/sync_test.go
@@ -12,6 +12,7 @@
func TestNoRaceCond(t *testing.T) {
x := 0
+ _ = x
condition := 0
var mu sync.Mutex
cond := sync.NewCond(&mu)
@@ -35,6 +36,7 @@
var mu sync.Mutex
cond := sync.NewCond(&mu)
x := 0
+ _ = x
condition := 0
go func() {
time.Sleep(10 * time.Millisecond) // Enter cond.Wait loop
@@ -67,6 +69,7 @@
allDone := make(chan bool, N)
var x int
+ _ = x
var f, g, h func()
f = func() {
@@ -133,6 +136,7 @@
func TestNoRaceAfterFunc3(t *testing.T) {
c := make(chan bool, 1)
x := 0
+ _ = x
time.AfterFunc(1e7, func() {
x = 1
c <- true
@@ -143,6 +147,7 @@
func TestRaceAfterFunc3(t *testing.T) {
c := make(chan bool, 2)
x := 0
+ _ = x
time.AfterFunc(1e7, func() {
x = 1
c <- true
@@ -161,6 +166,7 @@
// comprehensible.
func TestRaceGoroutineCreationStack(t *testing.T) {
var x int
+ _ = x
var ch = make(chan bool, 1)
f1 := func() {
diff --git a/src/runtime/race/testdata/waitgroup_test.go b/src/runtime/race/testdata/waitgroup_test.go
index ff152b0..1693373 100644
--- a/src/runtime/race/testdata/waitgroup_test.go
+++ b/src/runtime/race/testdata/waitgroup_test.go
@@ -13,6 +13,7 @@
func TestNoRaceWaitGroup(t *testing.T) {
var x int
+ _ = x
var wg sync.WaitGroup
n := 1
for i := 0; i < n; i++ {
@@ -28,6 +29,7 @@
func TestRaceWaitGroup(t *testing.T) {
var x int
+ _ = x
var wg sync.WaitGroup
n := 2
for i := 0; i < n; i++ {
@@ -43,6 +45,7 @@
func TestNoRaceWaitGroup2(t *testing.T) {
var x int
+ _ = x
var wg sync.WaitGroup
wg.Add(1)
go func() {
@@ -56,6 +59,7 @@
// incrementing counter in Add and locking wg's mutex
func TestRaceWaitGroupAsMutex(t *testing.T) {
var x int
+ _ = x
var wg sync.WaitGroup
c := make(chan bool, 2)
go func() {
@@ -82,6 +86,7 @@
func TestRaceWaitGroupWrongWait(t *testing.T) {
c := make(chan bool, 2)
var x int
+ _ = x
var wg sync.WaitGroup
go func() {
wg.Add(1)
@@ -187,6 +192,7 @@
// Correct usage but still a race
func TestRaceWaitGroup2(t *testing.T) {
var x int
+ _ = x
var wg sync.WaitGroup
wg.Add(2)
go func() {
@@ -202,6 +208,7 @@
func TestNoRaceWaitGroupPanicRecover(t *testing.T) {
var x int
+ _ = x
var wg sync.WaitGroup
defer func() {
err := recover()
@@ -219,6 +226,7 @@
// Is it possible to get a race by synchronization via panic?
func TestNoRaceWaitGroupPanicRecover2(t *testing.T) {
var x int
+ _ = x
var wg sync.WaitGroup
ch := make(chan bool, 1)
var f func() = func() {
diff --git a/src/runtime/rt0_android_386.s b/src/runtime/rt0_android_386.s
index 9d20fc8..3a1b06b 100644
--- a/src/runtime/rt0_android_386.s
+++ b/src/runtime/rt0_android_386.s
@@ -4,18 +4,13 @@
#include "textflag.h"
-TEXT _rt0_386_android(SB),NOSPLIT,$8
- MOVL 8(SP), AX // argc
- LEAL 12(SP), BX // argv
- MOVL AX, 0(SP)
- MOVL BX, 4(SP)
- CALL main(SB)
- INT $3
+TEXT _rt0_386_android(SB),NOSPLIT,$0
+ JMP _rt0_386(SB)
TEXT _rt0_386_android_lib(SB),NOSPLIT,$0
PUSHL $_rt0_386_android_argv(SB) // argv
PUSHL $1 // argc
- CALL _rt0_386_linux_lib(SB)
+ CALL _rt0_386_lib(SB)
POPL AX
POPL AX
RET
diff --git a/src/runtime/rt0_android_amd64.s b/src/runtime/rt0_android_amd64.s
index 6420c9f..6bda3bf 100644
--- a/src/runtime/rt0_android_amd64.s
+++ b/src/runtime/rt0_android_amd64.s
@@ -5,16 +5,12 @@
#include "textflag.h"
TEXT _rt0_amd64_android(SB),NOSPLIT,$-8
- MOVQ 0(SP), DI // argc
- LEAQ 8(SP), SI // argv
- MOVQ $main(SB), AX
- JMP AX
+ JMP _rt0_amd64(SB)
TEXT _rt0_amd64_android_lib(SB),NOSPLIT,$0
MOVQ $1, DI // argc
MOVQ $_rt0_amd64_android_argv(SB), SI // argv
- MOVQ $_rt0_amd64_linux_lib(SB), AX
- JMP AX
+ JMP _rt0_amd64_lib(SB)
DATA _rt0_amd64_android_argv+0x00(SB)/8,$_rt0_amd64_android_argv0(SB)
DATA _rt0_amd64_android_argv+0x08(SB)/8,$0 // end argv
diff --git a/src/runtime/rt0_android_arm.s b/src/runtime/rt0_android_arm.s
index 189e290..1246238 100644
--- a/src/runtime/rt0_android_arm.s
+++ b/src/runtime/rt0_android_arm.s
@@ -10,13 +10,10 @@
MOVW $_rt0_arm_linux1(SB), R4
B (R4)
-// When building with -buildmode=c-shared, this symbol is called when the shared
-// library is loaded.
TEXT _rt0_arm_android_lib(SB),NOSPLIT,$0
MOVW $1, R0 // argc
MOVW $_rt0_arm_android_argv(SB), R1 // **argv
- BL _rt0_arm_linux_lib(SB)
- RET
+ B _rt0_arm_lib(SB)
DATA _rt0_arm_android_argv+0x00(SB)/4,$_rt0_arm_android_argv0(SB)
DATA _rt0_arm_android_argv+0x04(SB)/4,$0 // end argv
diff --git a/src/runtime/rt0_darwin_386.s b/src/runtime/rt0_darwin_386.s
index 6b404db..a8d3a79 100644
--- a/src/runtime/rt0_darwin_386.s
+++ b/src/runtime/rt0_darwin_386.s
@@ -4,72 +4,14 @@
#include "textflag.h"
-TEXT _rt0_386_darwin(SB),NOSPLIT,$8
- MOVL 8(SP), AX
- LEAL 12(SP), BX
- MOVL AX, 0(SP)
- MOVL BX, 4(SP)
- CALL main(SB)
- INT $3
+TEXT _rt0_386_darwin(SB),NOSPLIT,$0
+ JMP _rt0_386(SB)
-// With -buildmode=c-archive, this symbol is called from a global constructor.
TEXT _rt0_386_darwin_lib(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- PUSHL BX
- PUSHL SI
- PUSHL DI
-
- MOVL 8(BP), AX
- MOVL AX, _rt0_386_darwin_lib_argc<>(SB)
- MOVL 12(BP), AX
- MOVL AX, _rt0_386_darwin_lib_argv<>(SB)
-
- // Synchronous initialization.
- MOVL $runtime·libpreinit(SB), AX
- CALL AX
-
- SUBL $12, SP
-
- // Create a new thread to do the runtime initialization and return.
- MOVL _cgo_sys_thread_create(SB), AX
- TESTL AX, AX
- JZ nocgo
- MOVL $_rt0_386_darwin_lib_go(SB), BX
- MOVL BX, 0(SP)
- MOVL $0, 4(SP)
- CALL AX
- JMP restore
-
-nocgo:
- MOVL $0x800000, 0(SP) // stacksize = 8192KB
- MOVL $_rt0_386_darwin_lib_go(SB), AX
- MOVL AX, 4(SP) // fn
- MOVL $0, 8(SP) // fnarg
- MOVL $runtime·newosproc0(SB), AX
- CALL AX
-
-restore:
- ADDL $12, SP
- POPL DI
- POPL SI
- POPL BX
- POPL BP
- RET
-
-TEXT _rt0_386_darwin_lib_go(SB),NOSPLIT,$12
- MOVL _rt0_386_darwin_lib_argc<>(SB), AX
- MOVL AX, 0(SP)
- MOVL _rt0_386_darwin_lib_argv<>(SB), AX
- MOVL AX, 4(SP)
- MOVL $runtime·rt0_go(SB), AX
- CALL AX
- RET
-
-DATA _rt0_386_darwin_lib_argc<>(SB)/4, $0
-GLOBL _rt0_386_darwin_lib_argc<>(SB),NOPTR, $4
-DATA _rt0_386_darwin_lib_argv<>(SB)/4, $0
-GLOBL _rt0_386_darwin_lib_argv<>(SB),NOPTR, $4
+ JMP _rt0_386_lib(SB)
TEXT main(SB),NOSPLIT,$0
+ // Remove the return address from the stack.
+ // rt0_go doesn't expect it to be there.
+ ADDL $4, SP
JMP runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_darwin_amd64.s b/src/runtime/rt0_darwin_amd64.s
index 655e77a..ed804d4 100644
--- a/src/runtime/rt0_darwin_amd64.s
+++ b/src/runtime/rt0_darwin_amd64.s
@@ -5,75 +5,9 @@
#include "textflag.h"
TEXT _rt0_amd64_darwin(SB),NOSPLIT,$-8
- LEAQ 8(SP), SI // argv
- MOVQ 0(SP), DI // argc
- MOVQ $main(SB), AX
- JMP AX
+ JMP _rt0_amd64(SB)
// When linking with -shared, this symbol is called when the shared library
// is loaded.
-TEXT _rt0_amd64_darwin_lib(SB),NOSPLIT,$0x58
- // Align stack. We don't know whether Go is adding a frame pointer here or not.
- MOVQ SP, R8
- SUBQ $16, R8
- ANDQ $~15, R8
- XCHGQ SP, R8
-
- MOVQ R8, 0x48(SP)
- MOVQ BX, 0x18(SP)
- MOVQ BP, 0x20(SP)
- MOVQ R12, 0x28(SP)
- MOVQ R13, 0x30(SP)
- MOVQ R14, 0x38(SP)
- MOVQ R15, 0x40(SP)
-
- MOVQ DI, _rt0_amd64_darwin_lib_argc<>(SB)
- MOVQ SI, _rt0_amd64_darwin_lib_argv<>(SB)
-
- // Synchronous initialization.
- MOVQ $runtime·libpreinit(SB), AX
- CALL AX
-
- // Create a new thread to do the runtime initialization and return.
- MOVQ _cgo_sys_thread_create(SB), AX
- TESTQ AX, AX
- JZ nocgo
- MOVQ $_rt0_amd64_darwin_lib_go(SB), DI
- MOVQ $0, SI
- CALL AX
- JMP restore
-
-nocgo:
- MOVQ $8388608, 0(SP) // stacksize
- MOVQ $_rt0_amd64_darwin_lib_go(SB), AX
- MOVQ AX, 8(SP) // fn
- MOVQ $0, 16(SP) // fnarg
- MOVQ $runtime·newosproc0(SB), AX
- CALL AX
-
-restore:
- MOVQ 0x18(SP), BX
- MOVQ 0x20(SP), BP
- MOVQ 0x28(SP), R12
- MOVQ 0x30(SP), R13
- MOVQ 0x38(SP), R14
- MOVQ 0x40(SP), R15
-
- MOVQ 0x48(SP), R8
- MOVQ R8, SP
- RET
-
-TEXT _rt0_amd64_darwin_lib_go(SB),NOSPLIT,$0
- MOVQ _rt0_amd64_darwin_lib_argc<>(SB), DI
- MOVQ _rt0_amd64_darwin_lib_argv<>(SB), SI
- MOVQ $runtime·rt0_go(SB), AX
- JMP AX
-
-DATA _rt0_amd64_darwin_lib_argc<>(SB)/8, $0
-GLOBL _rt0_amd64_darwin_lib_argc<>(SB),NOPTR, $8
-DATA _rt0_amd64_darwin_lib_argv<>(SB)/8, $0
-GLOBL _rt0_amd64_darwin_lib_argv<>(SB),NOPTR, $8
-
-TEXT main(SB),NOSPLIT,$-8
- MOVQ $runtime·rt0_go(SB), AX
- JMP AX
+TEXT _rt0_amd64_darwin_lib(SB),NOSPLIT,$0
+ JMP _rt0_amd64_lib(SB)
diff --git a/src/runtime/rt0_darwin_arm.s b/src/runtime/rt0_darwin_arm.s
index 526d88f..71fbe5f 100644
--- a/src/runtime/rt0_darwin_arm.s
+++ b/src/runtime/rt0_darwin_arm.s
@@ -4,94 +4,8 @@
#include "textflag.h"
-TEXT _rt0_arm_darwin(SB),7,$-4
- // prepare arguments for main (_rt0_go)
- MOVW (R13), R0 // argc
- MOVW $4(R13), R1 // argv
- MOVW $main(SB), R4
- B (R4)
+TEXT _rt0_arm_darwin(SB),7,$0
+ B _rt0_asm(SB)
-// When linking with -buildmode=c-archive or -buildmode=c-shared,
-// this symbol is called from a global initialization function.
-//
-// Note that all currently shipping darwin/arm platforms require
-// cgo and do not support c-shared.
-TEXT _rt0_arm_darwin_lib(SB),NOSPLIT,$104
- // Preserve callee-save registers.
- MOVW R4, 12(R13)
- MOVW R5, 16(R13)
- MOVW R6, 20(R13)
- MOVW R7, 24(R13)
- MOVW R8, 28(R13)
- MOVW R11, 32(R13)
-
- MOVD F8, (32+8*1)(R13)
- MOVD F9, (32+8*2)(R13)
- MOVD F10, (32+8*3)(R13)
- MOVD F11, (32+8*4)(R13)
- MOVD F12, (32+8*5)(R13)
- MOVD F13, (32+8*6)(R13)
- MOVD F14, (32+8*7)(R13)
- MOVD F15, (32+8*8)(R13)
-
- MOVW R0, _rt0_arm_darwin_lib_argc<>(SB)
- MOVW R1, _rt0_arm_darwin_lib_argv<>(SB)
-
- // Synchronous initialization.
- MOVW $runtime·libpreinit(SB), R3
- CALL (R3)
-
- // Create a new thread to do the runtime initialization and return.
- MOVW _cgo_sys_thread_create(SB), R3
- CMP $0, R3
- B.EQ nocgo
- MOVW $_rt0_arm_darwin_lib_go(SB), R0
- MOVW $0, R1
- BL (R3)
- B rr
-nocgo:
- MOVW $0x400000, R0
- MOVW R0, (R13) // stacksize
- MOVW $_rt0_arm_darwin_lib_go(SB), R0
- MOVW R0, 4(R13) // fn
- MOVW $0, R0
- MOVW R0, 8(R13) // fnarg
- MOVW $runtime·newosproc0(SB), R3
- BL (R3)
-rr:
- // Restore callee-save registers and return.
- MOVW 12(R13), R4
- MOVW 16(R13), R5
- MOVW 20(R13), R6
- MOVW 24(R13), R7
- MOVW 28(R13), R8
- MOVW 32(R13), R11
- MOVD (32+8*1)(R13), F8
- MOVD (32+8*2)(R13), F9
- MOVD (32+8*3)(R13), F10
- MOVD (32+8*4)(R13), F11
- MOVD (32+8*5)(R13), F12
- MOVD (32+8*6)(R13), F13
- MOVD (32+8*7)(R13), F14
- MOVD (32+8*8)(R13), F15
- RET
-
-
-TEXT _rt0_arm_darwin_lib_go(SB),NOSPLIT,$0
- MOVW _rt0_arm_darwin_lib_argc<>(SB), R0
- MOVW _rt0_arm_darwin_lib_argv<>(SB), R1
- MOVW R0, (R13)
- MOVW R1, 4(R13)
- MOVW $runtime·rt0_go(SB), R4
- B (R4)
-
-DATA _rt0_arm_darwin_lib_argc<>(SB)/4, $0
-GLOBL _rt0_arm_darwin_lib_argc<>(SB),NOPTR, $4
-DATA _rt0_arm_darwin_lib_argv<>(SB)/4, $0
-GLOBL _rt0_arm_darwin_lib_argv<>(SB),NOPTR, $4
-
-TEXT main(SB),NOSPLIT,$-8
- // save argc and argv onto stack
- MOVM.DB.W [R0-R1], (R13)
- MOVW $runtime·rt0_go(SB), R4
- B (R4)
+TEXT _rt0_arm_darwin_lib(SB),NOSPLIT,$0
+ B _rt0_arm_lib(SB)
diff --git a/src/runtime/rt0_dragonfly_amd64.s b/src/runtime/rt0_dragonfly_amd64.s
index fb56618..e76f9b9 100644
--- a/src/runtime/rt0_dragonfly_amd64.s
+++ b/src/runtime/rt0_dragonfly_amd64.s
@@ -4,12 +4,11 @@
#include "textflag.h"
+// On Dragonfly argc/argv are passed in DI, not SP, so we can't use _rt0_amd64.
TEXT _rt0_amd64_dragonfly(SB),NOSPLIT,$-8
LEAQ 8(DI), SI // argv
MOVQ 0(DI), DI // argc
- MOVQ $main(SB), AX
- JMP AX
+ JMP runtime·rt0_go(SB)
-TEXT main(SB),NOSPLIT,$-8
- MOVQ $runtime·rt0_go(SB), AX
- JMP AX
+TEXT _rt0_amd64_dragonfly_lib(SB),NOSPLIT,$0
+ JMP _rt0_amd64_lib(SB)
diff --git a/src/runtime/rt0_freebsd_386.s b/src/runtime/rt0_freebsd_386.s
index cd7a915..1808059 100644
--- a/src/runtime/rt0_freebsd_386.s
+++ b/src/runtime/rt0_freebsd_386.s
@@ -4,13 +4,14 @@
#include "textflag.h"
-TEXT _rt0_386_freebsd(SB),NOSPLIT,$8
- MOVL 8(SP), AX
- LEAL 12(SP), BX
- MOVL AX, 0(SP)
- MOVL BX, 4(SP)
- CALL main(SB)
- INT $3
+TEXT _rt0_386_freebsd(SB),NOSPLIT,$0
+ JMP _rt0_386(SB)
+
+TEXT _rt0_386_freebsd_lib(SB),NOSPLIT,$0
+ JMP _rt0_386_lib(SB)
TEXT main(SB),NOSPLIT,$0
+ // Remove the return address from the stack.
+ // rt0_go doesn't expect it to be there.
+ ADDL $4, SP
JMP runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_freebsd_amd64.s b/src/runtime/rt0_freebsd_amd64.s
index 7989f7c..ccc48f6 100644
--- a/src/runtime/rt0_freebsd_amd64.s
+++ b/src/runtime/rt0_freebsd_amd64.s
@@ -4,12 +4,11 @@
#include "textflag.h"
+// On FreeBSD argc/argv are passed in DI, not SP, so we can't use _rt0_amd64.
TEXT _rt0_amd64_freebsd(SB),NOSPLIT,$-8
LEAQ 8(DI), SI // argv
MOVQ 0(DI), DI // argc
- MOVQ $main(SB), AX
- JMP AX
+ JMP runtime·rt0_go(SB)
-TEXT main(SB),NOSPLIT,$-8
- MOVQ $runtime·rt0_go(SB), AX
- JMP AX
+TEXT _rt0_amd64_freebsd_lib(SB),NOSPLIT,$0
+ JMP _rt0_amd64_lib(SB)
diff --git a/src/runtime/rt0_freebsd_arm.s b/src/runtime/rt0_freebsd_arm.s
index e1bb13d..62ecd9a 100644
--- a/src/runtime/rt0_freebsd_arm.s
+++ b/src/runtime/rt0_freebsd_arm.s
@@ -4,13 +4,8 @@
#include "textflag.h"
-TEXT _rt0_arm_freebsd(SB),NOSPLIT,$-4
- MOVW (R13), R0 // argc
- MOVW $4(R13), R1 // argv
- MOVM.DB.W [R0-R1], (R13)
- B runtime·rt0_go(SB)
+TEXT _rt0_arm_freebsd(SB),NOSPLIT,$0
+ B _rt0_arm(SB)
-TEXT main(SB),NOSPLIT,$-4
- MOVM.DB.W [R0-R1], (R13)
- MOVW $runtime·rt0_go(SB), R4
- B (R4)
+TEXT _rt0_arm_freebsd_lib(SB),NOSPLIT,$0
+ B _rt0_arm_lib(SB)
diff --git a/src/runtime/rt0_linux_386.s b/src/runtime/rt0_linux_386.s
index 23bfc98..325066f 100644
--- a/src/runtime/rt0_linux_386.s
+++ b/src/runtime/rt0_linux_386.s
@@ -4,72 +4,14 @@
#include "textflag.h"
-TEXT _rt0_386_linux(SB),NOSPLIT,$8
- MOVL 8(SP), AX
- LEAL 12(SP), BX
- MOVL AX, 0(SP)
- MOVL BX, 4(SP)
- CALL main(SB)
- INT $3
+TEXT _rt0_386_linux(SB),NOSPLIT,$0
+ JMP _rt0_386(SB)
-// When building with -buildmode=c-shared, this symbol is called when the shared
-// library is loaded.
TEXT _rt0_386_linux_lib(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- PUSHL BX
- PUSHL SI
- PUSHL DI
-
- MOVL 8(BP), AX
- MOVL AX, _rt0_386_linux_lib_argc<>(SB)
- MOVL 12(BP), AX
- MOVL AX, _rt0_386_linux_lib_argv<>(SB)
-
- // Synchronous initialization.
- MOVL $runtime·libpreinit(SB), AX
- CALL AX
-
- SUBL $8, SP
-
- // Create a new thread to do the runtime initialization.
- MOVL _cgo_sys_thread_create(SB), AX
- TESTL AX, AX
- JZ nocgo
- MOVL $_rt0_386_linux_lib_go(SB), BX
- MOVL BX, 0(SP)
- MOVL $0, 4(SP)
- CALL AX
- JMP restore
-
-nocgo:
- MOVL $0x800000, 0(SP) // stacksize = 8192KB
- MOVL $_rt0_386_linux_lib_go(SB), AX
- MOVL AX, 4(SP) // fn
- MOVL $runtime·newosproc0(SB), AX
- CALL AX
-
-restore:
- ADDL $8, SP
- POPL DI
- POPL SI
- POPL BX
- POPL BP
- RET
-
-TEXT _rt0_386_linux_lib_go(SB),NOSPLIT,$12
- MOVL _rt0_386_linux_lib_argc<>(SB), AX
- MOVL AX, 0(SP)
- MOVL _rt0_386_linux_lib_argv<>(SB), AX
- MOVL AX, 4(SP)
- MOVL $runtime·rt0_go(SB), AX
- CALL AX
- RET
-
-DATA _rt0_386_linux_lib_argc<>(SB)/4, $0
-GLOBL _rt0_386_linux_lib_argc<>(SB),NOPTR, $4
-DATA _rt0_386_linux_lib_argv<>(SB)/4, $0
-GLOBL _rt0_386_linux_lib_argv<>(SB),NOPTR, $4
+ JMP _rt0_386_lib(SB)
TEXT main(SB),NOSPLIT,$0
+ // Remove the return address from the stack.
+ // rt0_go doesn't expect it to be there.
+ ADDL $4, SP
JMP runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_linux_amd64.s b/src/runtime/rt0_linux_amd64.s
index ced471f..94ff709 100644
--- a/src/runtime/rt0_linux_amd64.s
+++ b/src/runtime/rt0_linux_amd64.s
@@ -5,70 +5,7 @@
#include "textflag.h"
TEXT _rt0_amd64_linux(SB),NOSPLIT,$-8
- LEAQ 8(SP), SI // argv
- MOVQ 0(SP), DI // argc
- MOVQ $main(SB), AX
- JMP AX
+ JMP _rt0_amd64(SB)
-// When building with -buildmode=c-shared, this symbol is called when the shared
-// library is loaded.
-// Note: This function calls external C code, which might required 16-byte stack
-// alignment after cmd/internal/obj applies its transformations.
-TEXT _rt0_amd64_linux_lib(SB),NOSPLIT,$0x50
- MOVQ SP, AX
- ANDQ $-16, SP
- MOVQ BX, 0x10(SP)
- MOVQ BP, 0x18(SP)
- MOVQ R12, 0x20(SP)
- MOVQ R13, 0x28(SP)
- MOVQ R14, 0x30(SP)
- MOVQ R15, 0x38(SP)
- MOVQ AX, 0x40(SP)
-
- MOVQ DI, _rt0_amd64_linux_lib_argc<>(SB)
- MOVQ SI, _rt0_amd64_linux_lib_argv<>(SB)
-
- // Synchronous initialization.
- MOVQ $runtime·libpreinit(SB), AX
- CALL AX
-
- // Create a new thread to do the runtime initialization and return.
- MOVQ _cgo_sys_thread_create(SB), AX
- TESTQ AX, AX
- JZ nocgo
- MOVQ $_rt0_amd64_linux_lib_go(SB), DI
- MOVQ $0, SI
- CALL AX
- JMP restore
-
-nocgo:
- MOVQ $8388608, 0(SP) // stacksize
- MOVQ $_rt0_amd64_linux_lib_go(SB), AX
- MOVQ AX, 8(SP) // fn
- MOVQ $runtime·newosproc0(SB), AX
- CALL AX
-
-restore:
- MOVQ 0x10(SP), BX
- MOVQ 0x18(SP), BP
- MOVQ 0x20(SP), R12
- MOVQ 0x28(SP), R13
- MOVQ 0x30(SP), R14
- MOVQ 0x38(SP), R15
- MOVQ 0x40(SP), SP
- RET
-
-TEXT _rt0_amd64_linux_lib_go(SB),NOSPLIT,$0
- MOVQ _rt0_amd64_linux_lib_argc<>(SB), DI
- MOVQ _rt0_amd64_linux_lib_argv<>(SB), SI
- MOVQ $runtime·rt0_go(SB), AX
- JMP AX
-
-DATA _rt0_amd64_linux_lib_argc<>(SB)/8, $0
-GLOBL _rt0_amd64_linux_lib_argc<>(SB),NOPTR, $8
-DATA _rt0_amd64_linux_lib_argv<>(SB)/8, $0
-GLOBL _rt0_amd64_linux_lib_argv<>(SB),NOPTR, $8
-
-TEXT main(SB),NOSPLIT,$-8
- MOVQ $runtime·rt0_go(SB), AX
- JMP AX
+TEXT _rt0_amd64_linux_lib(SB),NOSPLIT,$0
+ JMP _rt0_amd64_lib(SB)
diff --git a/src/runtime/rt0_linux_arm.s b/src/runtime/rt0_linux_arm.s
index 597e642..ba4ca2b 100644
--- a/src/runtime/rt0_linux_arm.s
+++ b/src/runtime/rt0_linux_arm.s
@@ -12,158 +12,22 @@
// When building with -buildmode=c-shared, this symbol is called when the shared
// library is loaded.
-TEXT _rt0_arm_linux_lib(SB),NOSPLIT,$104
- // Preserve callee-save registers. Raspberry Pi's dlopen(), for example,
- // actually cares that R11 is preserved.
- MOVW R4, 12(R13)
- MOVW R5, 16(R13)
- MOVW R6, 20(R13)
- MOVW R7, 24(R13)
- MOVW R8, 28(R13)
- MOVW R11, 32(R13)
-
- // Skip floating point registers on GOARM < 6.
- MOVB runtime·goarm(SB), R11
- CMP $6, R11
- BLT skipfpsave
- MOVD F8, (32+8*1)(R13)
- MOVD F9, (32+8*2)(R13)
- MOVD F10, (32+8*3)(R13)
- MOVD F11, (32+8*4)(R13)
- MOVD F12, (32+8*5)(R13)
- MOVD F13, (32+8*6)(R13)
- MOVD F14, (32+8*7)(R13)
- MOVD F15, (32+8*8)(R13)
-skipfpsave:
- // Save argc/argv.
- MOVW R0, _rt0_arm_linux_lib_argc<>(SB)
- MOVW R1, _rt0_arm_linux_lib_argv<>(SB)
-
- // Synchronous initialization.
- MOVW $runtime·libpreinit(SB), R2
- CALL (R2)
-
- // Create a new thread to do the runtime initialization.
- MOVW _cgo_sys_thread_create(SB), R2
- CMP $0, R2
- BEQ nocgo
- MOVW $_rt0_arm_linux_lib_go<>(SB), R0
- MOVW $0, R1
- BL (R2)
- B rr
-nocgo:
- MOVW $0x800000, R0 // stacksize = 8192KB
- MOVW $_rt0_arm_linux_lib_go<>(SB), R1 // fn
- MOVW R0, 4(R13)
- MOVW R1, 8(R13)
- BL runtime·newosproc0(SB)
-rr:
- // Restore callee-save registers and return.
- MOVB runtime·goarm(SB), R11
- CMP $6, R11
- BLT skipfprest
- MOVD (32+8*1)(R13), F8
- MOVD (32+8*2)(R13), F9
- MOVD (32+8*3)(R13), F10
- MOVD (32+8*4)(R13), F11
- MOVD (32+8*5)(R13), F12
- MOVD (32+8*6)(R13), F13
- MOVD (32+8*7)(R13), F14
- MOVD (32+8*8)(R13), F15
-skipfprest:
- MOVW 12(R13), R4
- MOVW 16(R13), R5
- MOVW 20(R13), R6
- MOVW 24(R13), R7
- MOVW 28(R13), R8
- MOVW 32(R13), R11
- RET
-
-TEXT _rt0_arm_linux_lib_go<>(SB),NOSPLIT,$8
- MOVW _rt0_arm_linux_lib_argc<>(SB), R0
- MOVW _rt0_arm_linux_lib_argv<>(SB), R1
- MOVW R0, 0(R13)
- MOVW R1, 4(R13)
- B runtime·rt0_go(SB)
-
-DATA _rt0_arm_linux_lib_argc<>(SB)/4,$0
-GLOBL _rt0_arm_linux_lib_argc<>(SB),NOPTR,$4
-DATA _rt0_arm_linux_lib_argv<>(SB)/4,$0
-GLOBL _rt0_arm_linux_lib_argv<>(SB),NOPTR,$4
+TEXT _rt0_arm_linux_lib(SB),NOSPLIT,$0
+ B _rt0_arm_lib(SB)
TEXT _rt0_arm_linux1(SB),NOSPLIT,$-4
// We first need to detect the kernel ABI, and warn the user
- // if the system only supports OABI
+ // if the system only supports OABI.
// The strategy here is to call some EABI syscall to see if
// SIGILL is received.
- // To catch SIGILL, we have to first setup sigaction, this is
- // a chicken-and-egg problem, because we can't do syscall if
- // we don't know the kernel ABI... Oh, not really, we can do
- // syscall in Thumb mode.
+ // If you get a SIGILL here, you have the wrong kernel.
- // Save argc and argv
+ // Save argc and argv (syscall will clobber at least R0).
MOVM.DB.W [R0-R1], (R13)
- // Thumb mode OABI check disabled because there are some
- // EABI systems that do not support Thumb execution.
- // We can run on them except for this check!
-
- // // set up sa_handler
- // MOVW $bad_abi<>(SB), R0 // sa_handler
- // MOVW $0, R1 // sa_flags
- // MOVW $0, R2 // sa_restorer
- // MOVW $0, R3 // sa_mask
- // MOVM.DB.W [R0-R3], (R13)
- // MOVW $4, R0 // SIGILL
- // MOVW R13, R1 // sa
- // SUB $16, R13
- // MOVW R13, R2 // old_sa
- // MOVW $8, R3 // c
- // MOVW $174, R7 // sys_sigaction
- // BL oabi_syscall<>(SB)
-
// do an EABI syscall
MOVW $20, R7 // sys_getpid
SWI $0 // this will trigger SIGILL on OABI systems
-
- // MOVW $4, R0 // SIGILL
- // MOVW R13, R1 // sa
- // MOVW $0, R2 // old_sa
- // MOVW $8, R3 // c
- // MOVW $174, R7 // sys_sigaction
- // SWI $0 // restore signal handler
- // ADD $32, R13
+ MOVM.IA.W (R13), [R0-R1]
B runtime·rt0_go(SB)
-
-TEXT bad_abi<>(SB),NOSPLIT,$-4
- // give diagnosis and exit
- MOVW $2, R0 // stderr
- MOVW $bad_abi_msg(SB), R1 // data
- MOVW $45, R2 // len
- MOVW $4, R7 // sys_write
- BL oabi_syscall<>(SB)
- MOVW $1, R0
- MOVW $1, R7 // sys_exit
- BL oabi_syscall<>(SB)
- B 0(PC)
-
-DATA bad_abi_msg+0x00(SB)/8, $"This pro"
-DATA bad_abi_msg+0x08(SB)/8, $"gram can"
-DATA bad_abi_msg+0x10(SB)/8, $" only be"
-DATA bad_abi_msg+0x18(SB)/8, $" run on "
-DATA bad_abi_msg+0x20(SB)/8, $"EABI ker"
-DATA bad_abi_msg+0x28(SB)/4, $"nels"
-DATA bad_abi_msg+0x2c(SB)/1, $0xa
-GLOBL bad_abi_msg(SB), RODATA, $45
-
-TEXT oabi_syscall<>(SB),NOSPLIT,$-4
- ADD $1, R15, R4 // R15 is hardware PC
- WORD $0xe12fff14 //BX (R4) // enter thumb mode
- // TODO(minux): only supports little-endian CPUs
- WORD $0x4770df01 // swi $1; bx lr
-
-TEXT main(SB),NOSPLIT,$-4
- MOVW $_rt0_arm_linux1(SB), R4
- B (R4)
-
diff --git a/src/runtime/rt0_linux_ppc64le.s b/src/runtime/rt0_linux_ppc64le.s
index 81b9913..73b9ae3 100644
--- a/src/runtime/rt0_linux_ppc64le.s
+++ b/src/runtime/rt0_linux_ppc64le.s
@@ -2,6 +2,7 @@
#include "textflag.h"
TEXT _rt0_ppc64le_linux(SB),NOSPLIT,$0
+ XOR R0, R0 // Make sure R0 is zero before _main
BR _main<>(SB)
TEXT _rt0_ppc64le_linux_lib(SB),NOSPLIT,$-8
@@ -10,7 +11,6 @@
MOVD R0, 16(R1) // Save LR in caller's frame.
MOVW CR, R0 // Save CR in caller's frame
MOVD R0, 8(R1)
- MOVD R2, 24(R1) // Save TOC in caller's frame.
MOVDU R1, -320(R1) // Allocate frame.
// Preserve callee-save registers.
@@ -121,7 +121,6 @@
FMOVD 304(R1), F31
ADD $320, R1
- MOVD 24(R1), R2
MOVD 8(R1), R0
MOVFL R0, $0xff
MOVD 16(R1), R0
diff --git a/src/runtime/rt0_linux_s390x.s b/src/runtime/rt0_linux_s390x.s
index aedd6c7..4b62c5a 100644
--- a/src/runtime/rt0_linux_s390x.s
+++ b/src/runtime/rt0_linux_s390x.s
@@ -4,17 +4,20 @@
#include "textflag.h"
-TEXT _rt0_s390x_linux(SB),NOSPLIT|NOFRAME,$0
+TEXT _rt0_s390x_linux(SB), NOSPLIT|NOFRAME, $0
// In a statically linked binary, the stack contains argc,
// argv as argc string pointers followed by a NULL, envv as a
// sequence of string pointers followed by a NULL, and auxv.
// There is no TLS base pointer.
- //
- // TODO: Support dynamic linking entry point
- MOVD 0(R15), R2 // argc
- ADD $8, R15, R3 // argv
- BR main(SB)
-TEXT main(SB),NOSPLIT|NOFRAME,$0
- MOVD $runtime·rt0_go(SB), R11
- BR R11
+ MOVD 0(R15), R2 // argc
+ ADD $8, R15, R3 // argv
+ BR main(SB)
+
+TEXT _rt0_s390x_linux_lib(SB), NOSPLIT, $0
+ MOVD $_rt0_s390x_lib(SB), R1
+ BR R1
+
+TEXT main(SB), NOSPLIT|NOFRAME, $0
+ MOVD $runtime·rt0_go(SB), R1
+ BR R1
diff --git a/src/runtime/rt0_nacl_386.s b/src/runtime/rt0_nacl_386.s
index d4ba063..4c99002 100644
--- a/src/runtime/rt0_nacl_386.s
+++ b/src/runtime/rt0_nacl_386.s
@@ -15,8 +15,10 @@
LEAL argv+16(FP), BX
MOVL AX, 0(SP)
MOVL BX, 4(SP)
- CALL main(SB)
- INT $3
+ JMP runtime·rt0_go(SB)
TEXT main(SB),NOSPLIT,$0
+ // Remove the return address from the stack.
+ // rt0_go doesn't expect it to be there.
+ ADDL $4, SP
JMP runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_nacl_arm.s b/src/runtime/rt0_nacl_arm.s
index eadb478..2be8a07 100644
--- a/src/runtime/rt0_nacl_arm.s
+++ b/src/runtime/rt0_nacl_arm.s
@@ -13,8 +13,4 @@
TEXT _rt0_arm_nacl(SB),NOSPLIT,$-4
MOVW 8(R13), R0
MOVW $12(R13), R1
- MOVM.DB.W [R0-R1], (R13)
- B main(SB)
-
-TEXT main(SB),NOSPLIT,$0
B runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_netbsd_386.s b/src/runtime/rt0_netbsd_386.s
index 70b8532..cefc04a 100644
--- a/src/runtime/rt0_netbsd_386.s
+++ b/src/runtime/rt0_netbsd_386.s
@@ -4,13 +4,14 @@
#include "textflag.h"
-TEXT _rt0_386_netbsd(SB),NOSPLIT,$8
- MOVL 8(SP), AX
- LEAL 12(SP), BX
- MOVL AX, 0(SP)
- MOVL BX, 4(SP)
- CALL main(SB)
- INT $3
+TEXT _rt0_386_netbsd(SB),NOSPLIT,$0
+ JMP _rt0_386(SB)
+
+TEXT _rt0_386_netbsd_lib(SB),NOSPLIT,$0
+ JMP _rt0_386_lib(SB)
TEXT main(SB),NOSPLIT,$0
+ // Remove the return address from the stack.
+ // rt0_go doesn't expect it to be there.
+ ADDL $4, SP
JMP runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_netbsd_amd64.s b/src/runtime/rt0_netbsd_amd64.s
index fad5661..77c7187 100644
--- a/src/runtime/rt0_netbsd_amd64.s
+++ b/src/runtime/rt0_netbsd_amd64.s
@@ -5,11 +5,7 @@
#include "textflag.h"
TEXT _rt0_amd64_netbsd(SB),NOSPLIT,$-8
- LEAQ 8(SP), SI // argv
- MOVQ 0(SP), DI // argc
- MOVQ $main(SB), AX
- JMP AX
+ JMP _rt0_amd64(SB)
-TEXT main(SB),NOSPLIT,$-8
- MOVQ $runtime·rt0_go(SB), AX
- JMP AX
+TEXT _rt0_amd64_netbsd_lib(SB),NOSPLIT,$0
+ JMP _rt0_amd64_lib(SB)
diff --git a/src/runtime/rt0_netbsd_arm.s b/src/runtime/rt0_netbsd_arm.s
index 2cb1182..503c32a 100644
--- a/src/runtime/rt0_netbsd_arm.s
+++ b/src/runtime/rt0_netbsd_arm.s
@@ -4,8 +4,8 @@
#include "textflag.h"
-TEXT _rt0_arm_netbsd(SB),NOSPLIT,$-4
- MOVW (R13), R0 // argc
- MOVW $4(R13), R1 // argv
- MOVM.DB.W [R0-R1], (R13)
- B runtime·rt0_go(SB)
+TEXT _rt0_arm_netbsd(SB),NOSPLIT,$0
+ B _rt0_arm(SB)
+
+TEXT _rt0_arm_netbsd_lib(SB),NOSPLIT,$0
+ B _rt0_arm_lib(SB)
diff --git a/src/runtime/rt0_openbsd_386.s b/src/runtime/rt0_openbsd_386.s
index f25d2e1..959f4d6 100644
--- a/src/runtime/rt0_openbsd_386.s
+++ b/src/runtime/rt0_openbsd_386.s
@@ -4,13 +4,14 @@
#include "textflag.h"
-TEXT _rt0_386_openbsd(SB),NOSPLIT,$8
- MOVL 8(SP), AX
- LEAL 12(SP), BX
- MOVL AX, 0(SP)
- MOVL BX, 4(SP)
- CALL main(SB)
- INT $3
+TEXT _rt0_386_openbsd(SB),NOSPLIT,$0
+ JMP _rt0_386(SB)
+
+TEXT _rt0_386_openbsd_lib(SB),NOSPLIT,$0
+ JMP _rt0_386_lib(SB)
TEXT main(SB),NOSPLIT,$0
+ // Remove the return address from the stack.
+ // rt0_go doesn't expect it to be there.
+ ADDL $4, SP
JMP runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_openbsd_amd64.s b/src/runtime/rt0_openbsd_amd64.s
index 58fe666..c2f3f23 100644
--- a/src/runtime/rt0_openbsd_amd64.s
+++ b/src/runtime/rt0_openbsd_amd64.s
@@ -5,11 +5,7 @@
#include "textflag.h"
TEXT _rt0_amd64_openbsd(SB),NOSPLIT,$-8
- LEAQ 8(SP), SI // argv
- MOVQ 0(SP), DI // argc
- MOVQ $main(SB), AX
- JMP AX
+ JMP _rt0_amd64(SB)
-TEXT main(SB),NOSPLIT,$-8
- MOVQ $runtime·rt0_go(SB), AX
- JMP AX
+TEXT _rt0_amd64_openbsd_lib(SB),NOSPLIT,$0
+ JMP _rt0_amd64_lib(SB)
diff --git a/src/runtime/rt0_openbsd_arm.s b/src/runtime/rt0_openbsd_arm.s
index 6207e55..3511c96 100644
--- a/src/runtime/rt0_openbsd_arm.s
+++ b/src/runtime/rt0_openbsd_arm.s
@@ -4,8 +4,8 @@
#include "textflag.h"
-TEXT _rt0_arm_openbsd(SB),NOSPLIT,$-4
- MOVW (R13), R0 // argc
- MOVW $4(R13), R1 // argv
- MOVM.DB.W [R0-R1], (R13)
- B runtime·rt0_go(SB)
+TEXT _rt0_arm_openbsd(SB),NOSPLIT,$0
+ B _rt0_arm(SB)
+
+TEXT _rt0_arm_openbsd_lib(SB),NOSPLIT,$0
+ B _rt0_arm_lib(SB)
diff --git a/src/runtime/rt0_plan9_386.s b/src/runtime/rt0_plan9_386.s
index cbbf245..6471615 100644
--- a/src/runtime/rt0_plan9_386.s
+++ b/src/runtime/rt0_plan9_386.s
@@ -14,7 +14,7 @@
MOVL AX, 0(SP)
LEAL inargv+0(FP), AX
MOVL AX, 4(SP)
- CALL runtime·rt0_go(SB)
+ JMP runtime·rt0_go(SB)
GLOBL _tos(SB), NOPTR, $4
GLOBL _privates(SB), NOPTR, $4
diff --git a/src/runtime/rt0_plan9_arm.s b/src/runtime/rt0_plan9_arm.s
index 2a35e4e..d6174a4 100644
--- a/src/runtime/rt0_plan9_arm.s
+++ b/src/runtime/rt0_plan9_arm.s
@@ -10,8 +10,6 @@
MOVW R0, _tos(SB)
MOVW 0(R13), R0
MOVW $4(R13), R1
- MOVW.W R1, -4(R13)
- MOVW.W R0, -4(R13)
B runtime·rt0_go(SB)
GLOBL _tos(SB), NOPTR, $4
diff --git a/src/runtime/rt0_solaris_amd64.s b/src/runtime/rt0_solaris_amd64.s
index e2d1e71..5c46ded 100644
--- a/src/runtime/rt0_solaris_amd64.s
+++ b/src/runtime/rt0_solaris_amd64.s
@@ -5,11 +5,7 @@
#include "textflag.h"
TEXT _rt0_amd64_solaris(SB),NOSPLIT,$-8
- LEAQ 8(SP), SI // argv
- MOVQ 0(SP), DI // argc
- MOVQ $main(SB), AX
- JMP AX
+ JMP _rt0_amd64(SB)
-TEXT main(SB),NOSPLIT,$-8
- MOVQ $runtime·rt0_go(SB), AX
- JMP AX
+TEXT _rt0_amd64_solaris_lib(SB),NOSPLIT,$0
+ JMP _rt0_amd64_lib(SB)
diff --git a/src/runtime/rt0_windows_386.s b/src/runtime/rt0_windows_386.s
index b9407a9..fa39edd 100644
--- a/src/runtime/rt0_windows_386.s
+++ b/src/runtime/rt0_windows_386.s
@@ -4,13 +4,8 @@
#include "textflag.h"
-TEXT _rt0_386_windows(SB),NOSPLIT,$12
- MOVL 12(SP), AX
- LEAL 16(SP), BX
- MOVL AX, 4(SP)
- MOVL BX, 8(SP)
- MOVL $-1, 0(SP) // return PC for main
- JMP _main(SB)
+TEXT _rt0_386_windows(SB),NOSPLIT,$0
+ JMP _rt0_386(SB)
// When building with -buildmode=(c-shared or c-archive), this
// symbol is called. For dynamic libraries it is called when the
@@ -41,10 +36,12 @@
RET
TEXT _rt0_386_windows_lib_go(SB),NOSPLIT,$0
- MOVL $0, DI
- MOVL $0, SI
- MOVL $runtime·rt0_go(SB), AX
- JMP AX
+ PUSHL $0
+ PUSHL $0
+ JMP runtime·rt0_go(SB)
TEXT _main(SB),NOSPLIT,$0
+ // Remove the return address from the stack.
+ // rt0_go doesn't expect it to be there.
+ ADDL $4, SP
JMP runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_windows_amd64.s b/src/runtime/rt0_windows_amd64.s
index 2f73b37..1604711 100644
--- a/src/runtime/rt0_windows_amd64.s
+++ b/src/runtime/rt0_windows_amd64.s
@@ -7,10 +7,7 @@
#include "textflag.h"
TEXT _rt0_amd64_windows(SB),NOSPLIT,$-8
- LEAQ 8(SP), SI // argv
- MOVQ 0(SP), DI // argc
- MOVQ $main(SB), AX
- JMP AX
+ JMP _rt0_amd64(SB)
// When building with -buildmode=(c-shared or c-archive), this
// symbol is called. For dynamic libraries it is called when the
@@ -42,7 +39,3 @@
MOVQ $0, SI
MOVQ $runtime·rt0_go(SB), AX
JMP AX
-
-TEXT main(SB),NOSPLIT,$-8
- MOVQ $runtime·rt0_go(SB), AX
- JMP AX
diff --git a/src/runtime/runtime-gdb_test.go b/src/runtime/runtime-gdb_test.go
index 1318bab..5e05086 100644
--- a/src/runtime/runtime-gdb_test.go
+++ b/src/runtime/runtime-gdb_test.go
@@ -22,11 +22,15 @@
func checkGdbEnvironment(t *testing.T) {
testenv.MustHaveGoBuild(t)
- if runtime.GOOS == "darwin" {
+ switch runtime.GOOS {
+ case "darwin":
t.Skip("gdb does not work on darwin")
- }
- if runtime.GOOS == "linux" && runtime.GOARCH == "ppc64" {
- t.Skip("skipping gdb tests on linux/ppc64; see golang.org/issue/17366")
+ case "netbsd":
+ t.Skip("gdb does not work with threads on NetBSD; see golang.org/issue/22893 and gnats.netbsd.org/52548")
+ case "linux":
+ if runtime.GOARCH == "ppc64" {
+ t.Skip("skipping gdb tests on linux/ppc64; see golang.org/issue/17366")
+ }
}
if final := os.Getenv("GOROOT_FINAL"); final != "" && runtime.GOROOT() != final {
t.Skip("gdb test can fail with GOROOT_FINAL pending")
@@ -76,7 +80,7 @@
import "runtime"
var gslice []string
func main() {
- mapvar := make(map[string]string,5)
+ mapvar := make(map[string]string, 13)
mapvar["abc"] = "def"
mapvar["ghi"] = "jkl"
strvar := "abc"
@@ -84,7 +88,7 @@
slicevar := make([]string, 0, 16)
slicevar = append(slicevar, mapvar["abc"])
fmt.Println("hi") // line 13
- _ = ptrvar
+ runtime.KeepAlive(ptrvar)
gslice = slicevar
runtime.KeepAlive(mapvar)
}
@@ -106,8 +110,8 @@
t.Skip("skipping because cgo is not enabled")
}
- t.Parallel()
checkGdbEnvironment(t)
+ t.Parallel()
checkGdbVersion(t)
checkGdbPython(t)
@@ -132,7 +136,7 @@
cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "a.exe")
cmd.Dir = dir
- out, err := testEnv(cmd).CombinedOutput()
+ out, err := testenv.CleanCmdEnv(cmd).CombinedOutput()
if err != nil {
t.Fatalf("building source %v\n%s", err, out)
}
@@ -198,8 +202,10 @@
t.Fatalf("info goroutines failed: %s", bl)
}
- printMapvarRe := regexp.MustCompile(`\Q = map[string]string = {["abc"] = "def", ["ghi"] = "jkl"}\E$`)
- if bl := blocks["print mapvar"]; !printMapvarRe.MatchString(bl) {
+ printMapvarRe1 := regexp.MustCompile(`\Q = map[string]string = {["abc"] = "def", ["ghi"] = "jkl"}\E$`)
+ printMapvarRe2 := regexp.MustCompile(`\Q = map[string]string = {["ghi"] = "jkl", ["abc"] = "def"}\E$`)
+ if bl := blocks["print mapvar"]; !printMapvarRe1.MatchString(bl) &&
+ !printMapvarRe2.MatchString(bl) {
t.Fatalf("print mapvar failed: %s", bl)
}
@@ -212,7 +218,7 @@
// a collection of scalar vars holding the fields. In such cases
// the DWARF variable location expression should be of the
// form "var.field" and not just "field".
- infoLocalsRe := regexp.MustCompile(`^slicevar.len = `)
+ infoLocalsRe := regexp.MustCompile(`.*\sslicevar.cap = `)
if bl := blocks["info locals"]; !infoLocalsRe.MatchString(bl) {
t.Fatalf("info locals failed: %s", bl)
}
@@ -260,8 +266,8 @@
testenv.SkipFlaky(t, 15603)
}
- t.Parallel()
checkGdbEnvironment(t)
+ t.Parallel()
checkGdbVersion(t)
dir, err := ioutil.TempDir("", "go-build")
@@ -278,7 +284,7 @@
}
cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "a.exe")
cmd.Dir = dir
- out, err := testEnv(cmd).CombinedOutput()
+ out, err := testenv.CleanCmdEnv(cmd).CombinedOutput()
if err != nil {
t.Fatalf("building source %v\n%s", err, out)
}
@@ -330,8 +336,8 @@
// TestGdbAutotmpTypes ensures that types of autotmp variables appear in .debug_info
// See bug #17830.
func TestGdbAutotmpTypes(t *testing.T) {
- t.Parallel()
checkGdbEnvironment(t)
+ t.Parallel()
checkGdbVersion(t)
dir, err := ioutil.TempDir("", "go-build")
@@ -346,9 +352,9 @@
if err != nil {
t.Fatalf("failed to create file: %v", err)
}
- cmd := exec.Command(testenv.GoToolPath(t), "build", "-gcflags=-N -l", "-o", "a.exe")
+ cmd := exec.Command(testenv.GoToolPath(t), "build", "-gcflags=all=-N -l", "-o", "a.exe")
cmd.Dir = dir
- out, err := testEnv(cmd).CombinedOutput()
+ out, err := testenv.CleanCmdEnv(cmd).CombinedOutput()
if err != nil {
t.Fatalf("building source %v\n%s", err, out)
}
@@ -381,3 +387,62 @@
}
}
}
+
+const constsSource = `
+package main
+
+const aConstant int = 42
+const largeConstant uint64 = ^uint64(0)
+const minusOne int64 = -1
+
+func main() {
+ println("hello world")
+}
+`
+
+func TestGdbConst(t *testing.T) {
+ checkGdbEnvironment(t)
+ t.Parallel()
+ checkGdbVersion(t)
+
+ dir, err := ioutil.TempDir("", "go-build")
+ if err != nil {
+ t.Fatalf("failed to create temp directory: %v", err)
+ }
+ defer os.RemoveAll(dir)
+
+ // Build the source code.
+ src := filepath.Join(dir, "main.go")
+ err = ioutil.WriteFile(src, []byte(constsSource), 0644)
+ if err != nil {
+ t.Fatalf("failed to create file: %v", err)
+ }
+ cmd := exec.Command(testenv.GoToolPath(t), "build", "-gcflags=all=-N -l", "-o", "a.exe")
+ cmd.Dir = dir
+ out, err := testenv.CleanCmdEnv(cmd).CombinedOutput()
+ if err != nil {
+ t.Fatalf("building source %v\n%s", err, out)
+ }
+
+ // Execute gdb commands.
+ args := []string{"-nx", "-batch",
+ "-ex", "set startup-with-shell off",
+ "-ex", "break main.main",
+ "-ex", "run",
+ "-ex", "print main.aConstant",
+ "-ex", "print main.largeConstant",
+ "-ex", "print main.minusOne",
+ "-ex", "print 'runtime._MSpanInUse'",
+ "-ex", "print 'runtime._PageSize'",
+ filepath.Join(dir, "a.exe"),
+ }
+ got, _ := exec.Command("gdb", args...).CombinedOutput()
+
+ sgot := strings.Replace(string(got), "\r\n", "\n", -1)
+
+ t.Logf("output %q", sgot)
+
+ if !strings.Contains(sgot, "\n$1 = 42\n$2 = 18446744073709551615\n$3 = -1\n$4 = 1 '\\001'\n$5 = 8192") {
+ t.Fatalf("output mismatch")
+ }
+}
diff --git a/src/runtime/runtime-lldb_test.go b/src/runtime/runtime-lldb_test.go
index 98bc906..9a28705 100644
--- a/src/runtime/runtime-lldb_test.go
+++ b/src/runtime/runtime-lldb_test.go
@@ -5,11 +5,7 @@
package runtime_test
import (
- "debug/elf"
- "debug/macho"
- "encoding/binary"
"internal/testenv"
- "io"
"io/ioutil"
"os"
"os/exec"
@@ -158,7 +154,7 @@
t.Fatalf("failed to create file: %v", err)
}
- cmd := exec.Command(testenv.GoToolPath(t), "build", "-gcflags", "-N -l", "-o", "a.exe")
+ cmd := exec.Command(testenv.GoToolPath(t), "build", "-gcflags=all=-N -l", "-o", "a.exe")
cmd.Dir = dir
out, err := cmd.CombinedOutput()
if err != nil {
@@ -182,81 +178,3 @@
t.Fatalf("Unexpected lldb output:\n%s", got)
}
}
-
-// Check that aranges are valid even when lldb isn't installed.
-func TestDwarfAranges(t *testing.T) {
- testenv.MustHaveGoBuild(t)
- dir, err := ioutil.TempDir("", "go-build")
- if err != nil {
- t.Fatalf("failed to create temp directory: %v", err)
- }
- defer os.RemoveAll(dir)
-
- src := filepath.Join(dir, "main.go")
- err = ioutil.WriteFile(src, []byte(lldbHelloSource), 0644)
- if err != nil {
- t.Fatalf("failed to create file: %v", err)
- }
-
- cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "a.exe")
- cmd.Dir = dir
- out, err := cmd.CombinedOutput()
- if err != nil {
- t.Fatalf("building source %v\n%s", err, out)
- }
-
- filename := filepath.Join(dir, "a.exe")
- if f, err := elf.Open(filename); err == nil {
- sect := f.Section(".debug_aranges")
- if sect == nil {
- t.Fatal("Missing aranges section")
- }
- verifyAranges(t, f.ByteOrder, sect.Open())
- } else if f, err := macho.Open(filename); err == nil {
- sect := f.Section("__debug_aranges")
- if sect == nil {
- t.Fatal("Missing aranges section")
- }
- verifyAranges(t, f.ByteOrder, sect.Open())
- } else {
- t.Skip("Not an elf or macho binary.")
- }
-}
-
-func verifyAranges(t *testing.T, byteorder binary.ByteOrder, data io.ReadSeeker) {
- var header struct {
- UnitLength uint32 // does not include the UnitLength field
- Version uint16
- Offset uint32
- AddressSize uint8
- SegmentSize uint8
- }
- for {
- offset, err := data.Seek(0, io.SeekCurrent)
- if err != nil {
- t.Fatalf("Seek error: %v", err)
- }
- if err = binary.Read(data, byteorder, &header); err == io.EOF {
- return
- } else if err != nil {
- t.Fatalf("Error reading arange header: %v", err)
- }
- tupleSize := int64(header.SegmentSize) + 2*int64(header.AddressSize)
- lastTupleOffset := offset + int64(header.UnitLength) + 4 - tupleSize
- if lastTupleOffset%tupleSize != 0 {
- t.Fatalf("Invalid arange length %d, (addr %d, seg %d)", header.UnitLength, header.AddressSize, header.SegmentSize)
- }
- if _, err = data.Seek(lastTupleOffset, io.SeekStart); err != nil {
- t.Fatalf("Seek error: %v", err)
- }
- buf := make([]byte, tupleSize)
- if n, err := data.Read(buf); err != nil || int64(n) < tupleSize {
- t.Fatalf("Read error: %v", err)
- }
- for _, val := range buf {
- if val != 0 {
- t.Fatalf("Invalid terminator")
- }
- }
- }
-}
diff --git a/src/runtime/runtime.go b/src/runtime/runtime.go
index d8fe2f4..33ecc26 100644
--- a/src/runtime/runtime.go
+++ b/src/runtime/runtime.go
@@ -57,3 +57,9 @@
//go:linkname os_runtime_args os.runtime_args
func os_runtime_args() []string { return append([]string{}, argslice...) }
+
+//go:linkname syscall_Exit syscall.Exit
+//go:nosplit
+func syscall_Exit(code int) {
+ exit(int32(code))
+}
diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go
index c073348..0971e0c 100644
--- a/src/runtime/runtime1.go
+++ b/src/runtime/runtime1.go
@@ -99,10 +99,6 @@
func testAtomic64() {
test_z64 = 42
test_x64 = 0
- prefetcht0(uintptr(unsafe.Pointer(&test_z64)))
- prefetcht1(uintptr(unsafe.Pointer(&test_z64)))
- prefetcht2(uintptr(unsafe.Pointer(&test_z64)))
- prefetchnta(uintptr(unsafe.Pointer(&test_z64)))
if atomic.Cas64(&test_z64, test_x64, 1) {
throw("cas64 failed")
}
@@ -390,13 +386,6 @@
setTraceback(gogetenv("GOTRACEBACK"))
traceback_env = traceback_cache
-
- // For cgocheck > 1, we turn on the write barrier at all times
- // and check all pointer writes.
- if debug.cgocheck > 1 {
- writeBarrier.cgo = true
- writeBarrier.enabled = true
- }
}
//go:linkname setTraceback runtime/debug.SetTraceback
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 6871d9c..556f13d 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -169,9 +169,13 @@
// a word that is completely ignored by the GC than to have one for which
// only a few updates are ignored.
//
-// Gs, Ms, and Ps are always reachable via true pointers in the
-// allgs, allm, and allp lists or (during allocation before they reach those lists)
+// Gs and Ps are always reachable via true pointers in the
+// allgs and allp lists or (during allocation before they reach those lists)
// from stack variables.
+//
+// Ms are always reachable via true pointers either from allm or
+// freem. Unlike Gs and Ps we do free Ms, so it's important that
+// nothing ever hold an muintptr across a safe point.
// A guintptr holds a goroutine pointer, but typed as a uintptr
// to bypass write barriers. It is used in the Gobuf goroutine state
@@ -221,6 +225,15 @@
//go:nosplit
func (pp *puintptr) set(p *p) { *pp = puintptr(unsafe.Pointer(p)) }
+// muintptr is a *m that is not tracked by the garbage collector.
+//
+// Because we do free Ms, there are some additional constrains on
+// muintptrs:
+//
+// 1. Never hold an muintptr locally across a safe point.
+//
+// 2. Any muintptr in the heap must be owned by the M itself so it can
+// ensure it is not in use when the last true *m is released.
type muintptr uintptr
//go:nosplit
@@ -241,17 +254,19 @@
// The offsets of sp, pc, and g are known to (hard-coded in) libmach.
//
// ctxt is unusual with respect to GC: it may be a
- // heap-allocated funcval so write require a write barrier,
- // but gobuf needs to be cleared from assembly. We take
- // advantage of the fact that the only path that uses a
- // non-nil ctxt is morestack. As a result, gogo is the only
- // place where it may not already be nil, so gogo uses an
- // explicit write barrier. Everywhere else that resets the
- // gobuf asserts that ctxt is already nil.
+ // heap-allocated funcval, so GC needs to track it, but it
+ // needs to be set and cleared from assembly, where it's
+ // difficult to have write barriers. However, ctxt is really a
+ // saved, live register, and we only ever exchange it between
+ // the real register and the gobuf. Hence, we treat it as a
+ // root during stack scanning, which means assembly that saves
+ // and restores it doesn't need write barriers. It's still
+ // typed as a pointer so that any other writes from Go get
+ // write barriers.
sp uintptr
pc uintptr
g guintptr
- ctxt unsafe.Pointer // this has to be a pointer so that gc scans it
+ ctxt unsafe.Pointer
ret sys.Uintreg
lr uintptr
bp uintptr // for GOEXPERIMENT=framepointer
@@ -272,11 +287,14 @@
// channel this sudog is blocking on. shrinkstack depends on
// this for sudogs involved in channel ops.
- g *g
- selectdone *uint32 // CAS to 1 to win select race (may point to stack)
- next *sudog
- prev *sudog
- elem unsafe.Pointer // data element (may point to stack)
+ g *g
+
+ // isSelect indicates g is participating in a select, so
+ // g.selectDone must be CAS'd to win the wake-up race.
+ isSelect bool
+ next *sudog
+ prev *sudog
+ elem unsafe.Pointer // data element (may point to stack)
// The following fields are never accessed concurrently.
// For channels, waitlink is only accessed by g.
@@ -354,7 +372,7 @@
sysexitticks int64 // cputicks when syscall has returned (for tracing)
traceseq uint64 // trace event sequencer
tracelastp puintptr // last P emitted an event for this goroutine
- lockedm *m
+ lockedm muintptr
sig uint32
writebuf []byte
sigcode0 uintptr
@@ -367,6 +385,7 @@
cgoCtxt []uintptr // cgo traceback context
labels unsafe.Pointer // profiler labels
timer *timer // cached timer for time.Sleep
+ selectDone uint32 // are we participating in a select and did someone win the race?
// Per-G GC state
@@ -386,16 +405,17 @@
divmod uint32 // div/mod denominator for arm - known to liblink
// Fields not known to debuggers.
- procid uint64 // for debuggers, but offset not hard-coded
- gsignal *g // signal-handling g
- sigmask sigset // storage for saved signal mask
- tls [6]uintptr // thread-local storage (for x86 extern register)
+ procid uint64 // for debuggers, but offset not hard-coded
+ gsignal *g // signal-handling g
+ goSigStack gsignalStack // Go-allocated signal handling stack
+ sigmask sigset // storage for saved signal mask
+ tls [6]uintptr // thread-local storage (for x86 extern register)
mstartfn func()
curg *g // current running goroutine
caughtsig guintptr // goroutine running during fatal signal
p puintptr // attached p for executing go code (nil if not executing go code)
nextp puintptr
- id int32
+ id int64
mallocing int32
throwing int32
preemptoff string // if != "", keep curg running on this m
@@ -409,8 +429,11 @@
inwb bool // m is executing a write barrier
newSigstack bool // minit on C thread called sigaltstack
printlock int8
- incgo bool // m is executing a cgo call
- fastrand uint32
+ incgo bool // m is executing a cgo call
+ freeWait uint32 // if == 0, safe to free g0 and delete m (atomic)
+ fastrand [2]uint32
+ needextram bool
+ traceback uint8
ncgocall uint64 // number of cgo calls in total
ncgo int32 // number of cgo calls currently in progress
cgoCallersUse uint32 // if non-zero, cgoCallers in use temporarily
@@ -419,15 +442,14 @@
alllink *m // on allm
schedlink muintptr
mcache *mcache
- lockedg *g
- createstack [32]uintptr // stack that created this thread.
- freglo [16]uint32 // d[i] lsb and f[i]
- freghi [16]uint32 // d[i] msb and f[i+16]
- fflag uint32 // floating point compare flags
- locked uint32 // tracking for lockosthread
- nextwaitm uintptr // next m waiting for lock
- needextram bool
- traceback uint8
+ lockedg guintptr
+ createstack [32]uintptr // stack that created this thread.
+ freglo [16]uint32 // d[i] lsb and f[i]
+ freghi [16]uint32 // d[i] msb and f[i+16]
+ fflag uint32 // floating point compare flags
+ lockedExt uint32 // tracking for external LockOSThread
+ lockedInt uint32 // tracking for internal lockOSThread
+ nextwaitm muintptr // next m waiting for lock
waitunlockf unsafe.Pointer // todo go func(*g, unsafe.pointer) bool
waitlock unsafe.Pointer
waittraceev byte
@@ -435,6 +457,7 @@
startingtrace bool
syscalltick uint32
thread uintptr // thread handle
+ freelink *m // on sched.freem
// these are here because they are too large to be on the stack
// of low-level NOSPLIT functions.
@@ -502,26 +525,30 @@
palloc persistentAlloc // per-P to avoid mutex
// Per-P GC state
- gcAssistTime int64 // Nanoseconds in assistAlloc
- gcBgMarkWorker guintptr
- gcMarkWorkerMode gcMarkWorkerMode
+ gcAssistTime int64 // Nanoseconds in assistAlloc
+ gcFractionalMarkTime int64 // Nanoseconds in fractional mark worker
+ gcBgMarkWorker guintptr
+ gcMarkWorkerMode gcMarkWorkerMode
+
+ // gcMarkWorkerStartTime is the nanotime() at which this mark
+ // worker started.
+ gcMarkWorkerStartTime int64
// gcw is this P's GC work buffer cache. The work buffer is
// filled by write barriers, drained by mutator assists, and
// disposed on certain GC state transitions.
gcw gcWork
+ // wbBuf is this P's GC write barrier buffer.
+ //
+ // TODO: Consider caching this in the running G.
+ wbBuf wbBuf
+
runSafePointFn uint32 // if 1, run sched.safePointFn at next safe point
pad [sys.CacheLineSize]byte
}
-const (
- // The max value of GOMAXPROCS.
- // There are no fundamental restrictions on the value.
- _MaxGomaxprocs = 1 << 10
-)
-
type schedt struct {
// accessed atomically. keep at top to ensure alignment on 32-bit systems.
goidgen uint64
@@ -529,11 +556,16 @@
lock mutex
+ // When increasing nmidle, nmidlelocked, nmsys, or nmfreed, be
+ // sure to call checkdead().
+
midle muintptr // idle m's waiting for work
nmidle int32 // number of idle m's waiting for work
nmidlelocked int32 // number of locked m's waiting for work
- mcount int32 // number of m's that have been created
+ mnext int64 // number of m's that have been created and next M ID
maxmcount int32 // maximum number of m's allowed (or die)
+ nmsys int32 // number of system m's not counted for deadlock
+ nmfreed int64 // cumulative number of freed m's
ngsys uint32 // number of system goroutines; updated atomically
@@ -560,6 +592,10 @@
deferlock mutex
deferpool [5]*_defer
+ // freem is the list of m's waiting to be freed when their
+ // m.exited is set. Linked through m.freelink.
+ freem *m
+
gcwaiting uint32 // gc is waiting to run
stopwait int32
stopnote note
@@ -578,18 +614,7 @@
totaltime int64 // ∫gomaxprocs dt up to procresizetime
}
-// The m.locked word holds two pieces of state counting active calls to LockOSThread/lockOSThread.
-// The low bit (LockExternal) is a boolean reporting whether any LockOSThread call is active.
-// External locks are not recursive; a second lock is silently ignored.
-// The upper bits of m.locked record the nesting depth of calls to lockOSThread
-// (counting up by LockInternal), popped by unlockOSThread (counting down by LockInternal).
-// Internal locks can be recursive. For instance, a lock for cgo can occur while the main
-// goroutine is holding the lock during the initialization phase.
-const (
- _LockExternal = 1
- _LockInternal = 2
-)
-
+// Values for the flags field of a sigTabT.
const (
_SigNotify = 1 << iota // let signal.Notify have signal, even if from kernel
_SigKill // if signal.Notify doesn't take it, exit quietly
@@ -598,7 +623,8 @@
_SigDefault // if the signal isn't explicitly requested, don't monitor it
_SigGoExit // cause all runtime procs to exit (only used on Plan 9).
_SigSetStack // add SA_ONSTACK to libc handler
- _SigUnblock // unblocked in minit
+ _SigUnblock // always unblock; see blockableSig
+ _SigIgn // _SIG_DFL action is to ignore the signal
)
// Layout of in-memory per-function information prepared by linker
@@ -624,14 +650,11 @@
// Needs to be in sync with
// ../cmd/compile/internal/gc/reflect.go:/^func.dumptypestructs.
type itab struct {
- inter *interfacetype
- _type *_type
- link *itab
- hash uint32 // copy of _type.hash. Used for type switches.
- bad bool // type does not implement interface
- inhash bool // has this itab been added to hash?
- unused [2]byte
- fun [1]uintptr // variable sized
+ inter *interfacetype
+ _type *_type
+ hash uint32 // copy of _type.hash. Used for type switches.
+ _ [4]byte
+ fun [1]uintptr // variable sized. fun[0]==0 means _type does not implement inter.
}
// Lock-free stack node.
@@ -672,7 +695,8 @@
}
}
-// deferred subroutine calls
+// A _defer holds an entry on the list of deferred calls.
+// If you add a field here, add code to clear it in freedefer.
type _defer struct {
siz int32
started bool
@@ -716,15 +740,15 @@
const _TracebackMaxFrames = 100
var (
- emptystring string
- allglen uintptr
- allm *m
- allp [_MaxGomaxprocs + 1]*p
- gomaxprocs int32
- ncpu int32
- forcegc forcegcstate
- sched schedt
- newprocs int32
+ allglen uintptr
+ allm *m
+ allp []*p // len(allp) == gomaxprocs; may change at safe points, otherwise immutable
+ allpLock mutex // Protects P-less reads of allp and all writes
+ gomaxprocs int32
+ ncpu int32
+ forcegc forcegcstate
+ sched schedt
+ newprocs int32
// Information about what cpu features are available.
// Set on startup in asm_{386,amd64,amd64p32}.s.
diff --git a/src/runtime/runtime_linux_test.go b/src/runtime/runtime_linux_test.go
index 2b6daec..6123972 100644
--- a/src/runtime/runtime_linux_test.go
+++ b/src/runtime/runtime_linux_test.go
@@ -8,6 +8,7 @@
. "runtime"
"syscall"
"testing"
+ "time"
"unsafe"
)
@@ -21,6 +22,17 @@
// for how it is used in init (must be on main thread).
pid, tid = syscall.Getpid(), syscall.Gettid()
LockOSThread()
+
+ sysNanosleep = func(d time.Duration) {
+ // Invoke a blocking syscall directly; calling time.Sleep()
+ // would deschedule the goroutine instead.
+ ts := syscall.NsecToTimespec(d.Nanoseconds())
+ for {
+ if err := syscall.Nanosleep(&ts, &ts); err != syscall.EINTR {
+ return
+ }
+ }
+ }
}
func TestLockOSThread(t *testing.T) {
diff --git a/src/runtime/runtime_mmap_test.go b/src/runtime/runtime_mmap_test.go
index 2eca6b9..57c38bc 100644
--- a/src/runtime/runtime_mmap_test.go
+++ b/src/runtime/runtime_mmap_test.go
@@ -16,16 +16,10 @@
// what the code in mem_bsd.go, mem_darwin.go, and mem_linux.go expects.
// See the uses of ENOMEM in sysMap in those files.
func TestMmapErrorSign(t *testing.T) {
- p := runtime.Mmap(nil, ^uintptr(0)&^(runtime.GetPhysPageSize()-1), 0, runtime.MAP_ANON|runtime.MAP_PRIVATE, -1, 0)
+ p, err := runtime.Mmap(nil, ^uintptr(0)&^(runtime.GetPhysPageSize()-1), 0, runtime.MAP_ANON|runtime.MAP_PRIVATE, -1, 0)
- // The runtime.mmap function is nosplit, but t.Errorf is not.
- // Reset the pointer so that we don't get an "invalid stack
- // pointer" error from t.Errorf if we call it.
- v := uintptr(p)
- p = nil
-
- if v != runtime.ENOMEM {
- t.Errorf("mmap = %v, want %v", v, runtime.ENOMEM)
+ if p != nil || err != runtime.ENOMEM {
+ t.Errorf("mmap = %v, %v, want nil, %v", p, err, runtime.ENOMEM)
}
}
@@ -35,20 +29,20 @@
ps := runtime.GetPhysPageSize()
// Get a region of memory to play with. This should be page-aligned.
- b := uintptr(runtime.Mmap(nil, 2*ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE, -1, 0))
- if b < 4096 {
- t.Fatalf("Mmap: %v", b)
+ b, err := runtime.Mmap(nil, 2*ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE, -1, 0)
+ if err != 0 {
+ t.Fatalf("Mmap: %v", err)
}
// Mmap should fail at a half page into the buffer.
- err := uintptr(runtime.Mmap(unsafe.Pointer(uintptr(b)+ps/2), ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE|runtime.MAP_FIXED, -1, 0))
- if err >= 4096 {
+ _, err = runtime.Mmap(unsafe.Pointer(uintptr(b)+ps/2), ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE|runtime.MAP_FIXED, -1, 0)
+ if err == 0 {
t.Errorf("Mmap should have failed with half-page alignment %d, but succeeded: %v", ps/2, err)
}
// Mmap should succeed at a full page into the buffer.
- err = uintptr(runtime.Mmap(unsafe.Pointer(uintptr(b)+ps), ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE|runtime.MAP_FIXED, -1, 0))
- if err < 4096 {
+ _, err = runtime.Mmap(unsafe.Pointer(uintptr(b)+ps), ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE|runtime.MAP_FIXED, -1, 0)
+ if err != 0 {
t.Errorf("Mmap at full-page alignment %d failed: %v", ps, err)
}
}
diff --git a/src/runtime/runtime_test.go b/src/runtime/runtime_test.go
index e9bc256..d5b6b3a 100644
--- a/src/runtime/runtime_test.go
+++ b/src/runtime/runtime_test.go
@@ -5,6 +5,7 @@
package runtime_test
import (
+ "flag"
"io"
. "runtime"
"runtime/debug"
@@ -13,6 +14,8 @@
"unsafe"
)
+var flagQuick = flag.Bool("quick", false, "skip slow tests, for second run in all.bash")
+
func init() {
// We're testing the runtime, so make tracebacks show things
// in the runtime. This only raises the level, so it won't
@@ -196,9 +199,9 @@
}
func TestEqString(t *testing.T) {
- // This isn't really an exhaustive test of eqstring, it's
+ // This isn't really an exhaustive test of == on strings, it's
// just a convenient way of documenting (via eqstring_generic)
- // what eqstring does.
+ // what == does.
s := []string{
"",
"a",
@@ -213,7 +216,7 @@
x := s1 == s2
y := eqstring_generic(s1, s2)
if x != y {
- t.Errorf(`eqstring("%s","%s") = %t, want %t`, s1, s2, x, y)
+ t.Errorf(`("%s" == "%s") = %t, want %t`, s1, s2, x, y)
}
}
}
diff --git a/src/runtime/rwmutex.go b/src/runtime/rwmutex.go
index 7eeb559..a6da4c9 100644
--- a/src/runtime/rwmutex.go
+++ b/src/runtime/rwmutex.go
@@ -10,7 +10,7 @@
// This is a copy of sync/rwmutex.go rewritten to work in the runtime.
-// An rwmutex is a reader/writer mutual exclusion lock.
+// A rwmutex is a reader/writer mutual exclusion lock.
// The lock can be held by an arbitrary number of readers or a single writer.
// This is a variant of sync.RWMutex, for the runtime package.
// Like mutex, rwmutex blocks the calling M.
diff --git a/src/runtime/rwmutex_test.go b/src/runtime/rwmutex_test.go
index a69eca1..872b3b0 100644
--- a/src/runtime/rwmutex_test.go
+++ b/src/runtime/rwmutex_test.go
@@ -12,6 +12,7 @@
import (
"fmt"
. "runtime"
+ "runtime/debug"
"sync/atomic"
"testing"
)
@@ -47,6 +48,10 @@
func TestParallelRWMutexReaders(t *testing.T) {
defer GOMAXPROCS(GOMAXPROCS(-1))
+ // If runtime triggers a forced GC during this test then it will deadlock,
+ // since the goroutines can't be stopped/preempted.
+ // Disable GC for this test (see issue #10958).
+ defer debug.SetGCPercent(debug.SetGCPercent(-1))
doTestParallelReaders(1)
doTestParallelReaders(3)
doTestParallelReaders(4)
diff --git a/src/runtime/select.go b/src/runtime/select.go
index 715cee8..b59c096 100644
--- a/src/runtime/select.go
+++ b/src/runtime/select.go
@@ -73,7 +73,7 @@
}
func selectsend(sel *hselect, c *hchan, elem unsafe.Pointer) {
- pc := getcallerpc(unsafe.Pointer(&sel))
+ pc := getcallerpc()
i := sel.ncase
if i >= sel.tcase {
throw("selectsend: too many cases")
@@ -94,7 +94,7 @@
}
func selectrecv(sel *hselect, c *hchan, elem unsafe.Pointer, received *bool) {
- pc := getcallerpc(unsafe.Pointer(&sel))
+ pc := getcallerpc()
i := sel.ncase
if i >= sel.tcase {
throw("selectrecv: too many cases")
@@ -116,7 +116,7 @@
}
func selectdefault(sel *hselect) {
- pc := getcallerpc(unsafe.Pointer(&sel))
+ pc := getcallerpc()
i := sel.ncase
if i >= sel.tcase {
throw("selectdefault: too many cases")
@@ -286,7 +286,6 @@
var (
gp *g
- done uint32
sg *sudog
c *hchan
k *scase
@@ -353,7 +352,6 @@
// pass 2 - enqueue on all chans
gp = getg()
- done = 0
if gp.waiting != nil {
throw("gp.waiting != nil")
}
@@ -367,8 +365,7 @@
c = cas.c
sg := acquireSudog()
sg.g = gp
- // Note: selectdone is adjusted for stack copies in stack1.go:adjustsudogs
- sg.selectdone = (*uint32)(noescape(unsafe.Pointer(&done)))
+ sg.isSelect = true
// No stack splits between assigning elem and enqueuing
// sg on gp.waiting where copystack can find it.
sg.elem = cas.elem
@@ -394,62 +391,9 @@
gp.param = nil
gopark(selparkcommit, nil, "select", traceEvGoBlockSelect, 1)
- // While we were asleep, some goroutine came along and completed
- // one of the cases in the select and woke us up (called ready).
- // As part of that process, the goroutine did a cas on done above
- // (aka *sg.selectdone for all queued sg) to win the right to
- // complete the select. Now done = 1.
- //
- // If we copy (grow) our own stack, we will update the
- // selectdone pointers inside the gp.waiting sudog list to point
- // at the new stack. Another goroutine attempting to
- // complete one of our (still linked in) select cases might
- // see the new selectdone pointer (pointing at the new stack)
- // before the new stack has real data; if the new stack has done = 0
- // (before the old values are copied over), the goroutine might
- // do a cas via sg.selectdone and incorrectly believe that it has
- // won the right to complete the select, executing a second
- // communication and attempting to wake us (call ready) again.
- //
- // Then things break.
- //
- // The best break is that the goroutine doing ready sees the
- // _Gcopystack status and throws, as in #17007.
- // A worse break would be for us to continue on, start running real code,
- // block in a semaphore acquisition (sema.go), and have the other
- // goroutine wake us up without having really acquired the semaphore.
- // That would result in the goroutine spuriously running and then
- // queue up another spurious wakeup when the semaphore really is ready.
- // In general the situation can cascade until something notices the
- // problem and causes a crash.
- //
- // A stack shrink does not have this problem, because it locks
- // all the channels that are involved first, blocking out the
- // possibility of a cas on selectdone.
- //
- // A stack growth before gopark above does not have this
- // problem, because we hold those channel locks (released by
- // selparkcommit).
- //
- // A stack growth after sellock below does not have this
- // problem, because again we hold those channel locks.
- //
- // The only problem is a stack growth during sellock.
- // To keep that from happening, run sellock on the system stack.
- //
- // It might be that we could avoid this if copystack copied the
- // stack before calling adjustsudogs. In that case,
- // syncadjustsudogs would need to recopy the tiny part that
- // it copies today, resulting in a little bit of extra copying.
- //
- // An even better fix, not for the week before a release candidate,
- // would be to put space in every sudog and make selectdone
- // point at (say) the space in the first sudog.
+ sellock(scases, lockorder)
- systemstack(func() {
- sellock(scases, lockorder)
- })
-
+ gp.selectDone = 0
sg = (*sudog)(gp.param)
gp.param = nil
@@ -462,7 +406,7 @@
sglist = gp.waiting
// Clear all elem before unlinking from gp.waiting.
for sg1 := gp.waiting; sg1 != nil; sg1 = sg1.waitlink {
- sg1.selectdone = nil
+ sg1.isSelect = false
sg1.elem = nil
sg1.c = nil
}
@@ -513,10 +457,8 @@
print("wait-return: sel=", sel, " c=", c, " cas=", cas, " kind=", cas.kind, "\n")
}
- if cas.kind == caseRecv {
- if cas.receivedp != nil {
- *cas.receivedp = true
- }
+ if cas.kind == caseRecv && cas.receivedp != nil {
+ *cas.receivedp = true
}
if raceenabled {
diff --git a/src/runtime/sema.go b/src/runtime/sema.go
index 8715e07..d5ea14d 100644
--- a/src/runtime/sema.go
+++ b/src/runtime/sema.go
@@ -275,7 +275,10 @@
// on the ticket: s.ticket <= both s.prev.ticket and s.next.ticket.
// https://en.wikipedia.org/wiki/Treap
// http://faculty.washington.edu/aragon/pubs/rst89.pdf
- s.ticket = fastrand()
+ //
+ // s.ticket compared with zero in couple of places, therefore set lowest bit.
+ // It will not affect treap's quality noticeably.
+ s.ticket = fastrand() | 1
s.parent = last
*pt = s
diff --git a/src/runtime/signal_darwin.go b/src/runtime/signal_darwin.go
index 0c5481a..8090fb2 100644
--- a/src/runtime/signal_darwin.go
+++ b/src/runtime/signal_darwin.go
@@ -4,11 +4,6 @@
package runtime
-type sigTabT struct {
- flags int32
- name string
-}
-
var sigtable = [...]sigTabT{
/* 0 */ {0, "SIGNONE: no trap"},
/* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"},
@@ -26,20 +21,20 @@
/* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"},
/* 14 */ {_SigNotify, "SIGALRM: alarm clock"},
/* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"},
- /* 16 */ {_SigNotify, "SIGURG: urgent condition on socket"},
+ /* 16 */ {_SigNotify + _SigIgn, "SIGURG: urgent condition on socket"},
/* 17 */ {0, "SIGSTOP: stop"},
- /* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"},
- /* 19 */ {_SigNotify + _SigDefault, "SIGCONT: continue after stop"},
- /* 20 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"},
- /* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"},
- /* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"},
- /* 23 */ {_SigNotify, "SIGIO: i/o now possible"},
+ /* 18 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTSTP: keyboard stop"},
+ /* 19 */ {_SigNotify + _SigDefault + _SigIgn, "SIGCONT: continue after stop"},
+ /* 20 */ {_SigNotify + _SigUnblock + _SigIgn, "SIGCHLD: child status has changed"},
+ /* 21 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTIN: background read from tty"},
+ /* 22 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTOU: background write to tty"},
+ /* 23 */ {_SigNotify + _SigIgn, "SIGIO: i/o now possible"},
/* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"},
/* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"},
/* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"},
/* 27 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling alarm clock"},
- /* 28 */ {_SigNotify, "SIGWINCH: window size change"},
- /* 29 */ {_SigNotify, "SIGINFO: status request from keyboard"},
+ /* 28 */ {_SigNotify + _SigIgn, "SIGWINCH: window size change"},
+ /* 29 */ {_SigNotify + _SigIgn, "SIGINFO: status request from keyboard"},
/* 30 */ {_SigNotify, "SIGUSR1: user-defined signal 1"},
/* 31 */ {_SigNotify, "SIGUSR2: user-defined signal 2"},
}
diff --git a/src/runtime/signal_darwin_arm.go b/src/runtime/signal_darwin_arm.go
index c88b90c..9a5d3ac 100644
--- a/src/runtime/signal_darwin_arm.go
+++ b/src/runtime/signal_darwin_arm.go
@@ -36,7 +36,7 @@
func (c *sigctxt) pc() uint32 { return c.regs().pc }
func (c *sigctxt) cpsr() uint32 { return c.regs().cpsr }
-func (c *sigctxt) fault() uint32 { return c.info.si_addr }
+func (c *sigctxt) fault() uintptr { return uintptr(c.info.si_addr) }
func (c *sigctxt) sigcode() uint32 { return uint32(c.info.si_code) }
func (c *sigctxt) trap() uint32 { return 0 }
func (c *sigctxt) error() uint32 { return 0 }
diff --git a/src/runtime/signal_darwin_arm64.go b/src/runtime/signal_darwin_arm64.go
index b14b9f1..41b8fca 100644
--- a/src/runtime/signal_darwin_arm64.go
+++ b/src/runtime/signal_darwin_arm64.go
@@ -52,7 +52,7 @@
//go:nowritebarrierrec
func (c *sigctxt) pc() uint64 { return c.regs().pc }
-func (c *sigctxt) fault() uint64 { return uint64(uintptr(unsafe.Pointer(c.info.si_addr))) }
+func (c *sigctxt) fault() uintptr { return uintptr(unsafe.Pointer(c.info.si_addr)) }
func (c *sigctxt) sigcode() uint64 { return uint64(c.info.si_code) }
func (c *sigctxt) sigaddr() uint64 { return uint64(uintptr(unsafe.Pointer(c.info.si_addr))) }
diff --git a/src/runtime/signal_dragonfly.go b/src/runtime/signal_dragonfly.go
index 8e9ce17..f2b26e7 100644
--- a/src/runtime/signal_dragonfly.go
+++ b/src/runtime/signal_dragonfly.go
@@ -4,11 +4,6 @@
package runtime
-type sigTabT struct {
- flags int32
- name string
-}
-
var sigtable = [...]sigTabT{
/* 0 */ {0, "SIGNONE: no trap"},
/* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"},
@@ -26,20 +21,20 @@
/* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"},
/* 14 */ {_SigNotify, "SIGALRM: alarm clock"},
/* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"},
- /* 16 */ {_SigNotify, "SIGURG: urgent condition on socket"},
+ /* 16 */ {_SigNotify + _SigIgn, "SIGURG: urgent condition on socket"},
/* 17 */ {0, "SIGSTOP: stop"},
- /* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"},
- /* 19 */ {_SigNotify + _SigDefault, "SIGCONT: continue after stop"},
- /* 20 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"},
- /* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"},
- /* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"},
- /* 23 */ {_SigNotify, "SIGIO: i/o now possible"},
+ /* 18 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTSTP: keyboard stop"},
+ /* 19 */ {_SigNotify + _SigDefault + _SigIgn, "SIGCONT: continue after stop"},
+ /* 20 */ {_SigNotify + _SigUnblock + _SigIgn, "SIGCHLD: child status has changed"},
+ /* 21 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTIN: background read from tty"},
+ /* 22 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTOU: background write to tty"},
+ /* 23 */ {_SigNotify + _SigIgn, "SIGIO: i/o now possible"},
/* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"},
/* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"},
/* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"},
/* 27 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling alarm clock"},
- /* 28 */ {_SigNotify, "SIGWINCH: window size change"},
- /* 29 */ {_SigNotify, "SIGINFO: status request from keyboard"},
+ /* 28 */ {_SigNotify + _SigIgn, "SIGWINCH: window size change"},
+ /* 29 */ {_SigNotify + _SigIgn, "SIGINFO: status request from keyboard"},
/* 30 */ {_SigNotify, "SIGUSR1: user-defined signal 1"},
/* 31 */ {_SigNotify, "SIGUSR2: user-defined signal 2"},
/* 32 */ {_SigNotify, "SIGTHR: reserved"},
diff --git a/src/runtime/signal_freebsd.go b/src/runtime/signal_freebsd.go
index 7ce7217..2812c69 100644
--- a/src/runtime/signal_freebsd.go
+++ b/src/runtime/signal_freebsd.go
@@ -4,11 +4,6 @@
package runtime
-type sigTabT struct {
- flags int32
- name string
-}
-
var sigtable = [...]sigTabT{
/* 0 */ {0, "SIGNONE: no trap"},
/* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"},
@@ -26,20 +21,20 @@
/* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"},
/* 14 */ {_SigNotify, "SIGALRM: alarm clock"},
/* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"},
- /* 16 */ {_SigNotify, "SIGURG: urgent condition on socket"},
+ /* 16 */ {_SigNotify + _SigIgn, "SIGURG: urgent condition on socket"},
/* 17 */ {0, "SIGSTOP: stop"},
- /* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"},
- /* 19 */ {_SigNotify + _SigDefault, "SIGCONT: continue after stop"},
- /* 20 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"},
- /* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"},
- /* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"},
- /* 23 */ {_SigNotify, "SIGIO: i/o now possible"},
+ /* 18 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTSTP: keyboard stop"},
+ /* 19 */ {_SigNotify + _SigDefault + _SigIgn, "SIGCONT: continue after stop"},
+ /* 20 */ {_SigNotify + _SigUnblock + _SigIgn, "SIGCHLD: child status has changed"},
+ /* 21 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTIN: background read from tty"},
+ /* 22 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTOU: background write to tty"},
+ /* 23 */ {_SigNotify + _SigIgn, "SIGIO: i/o now possible"},
/* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"},
/* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"},
/* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"},
/* 27 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling alarm clock"},
- /* 28 */ {_SigNotify, "SIGWINCH: window size change"},
- /* 29 */ {_SigNotify, "SIGINFO: status request from keyboard"},
+ /* 28 */ {_SigNotify + _SigIgn, "SIGWINCH: window size change"},
+ /* 29 */ {_SigNotify + _SigIgn, "SIGINFO: status request from keyboard"},
/* 30 */ {_SigNotify, "SIGUSR1: user-defined signal 1"},
/* 31 */ {_SigNotify, "SIGUSR2: user-defined signal 2"},
/* 32 */ {_SigNotify, "SIGTHR: reserved"},
diff --git a/src/runtime/signal_freebsd_arm.go b/src/runtime/signal_freebsd_arm.go
index 9601370..2135c1e 100644
--- a/src/runtime/signal_freebsd_arm.go
+++ b/src/runtime/signal_freebsd_arm.go
@@ -36,7 +36,7 @@
func (c *sigctxt) pc() uint32 { return c.regs().__gregs[15] }
func (c *sigctxt) cpsr() uint32 { return c.regs().__gregs[16] }
-func (c *sigctxt) fault() uint32 { return uint32(c.info.si_addr) }
+func (c *sigctxt) fault() uintptr { return uintptr(c.info.si_addr) }
func (c *sigctxt) trap() uint32 { return 0 }
func (c *sigctxt) error() uint32 { return 0 }
func (c *sigctxt) oldmask() uint32 { return 0 }
diff --git a/src/runtime/signal_linux_arm.go b/src/runtime/signal_linux_arm.go
index 06a57b8..876b505 100644
--- a/src/runtime/signal_linux_arm.go
+++ b/src/runtime/signal_linux_arm.go
@@ -39,7 +39,7 @@
func (c *sigctxt) pc() uint32 { return c.regs().pc }
func (c *sigctxt) cpsr() uint32 { return c.regs().cpsr }
-func (c *sigctxt) fault() uint32 { return c.regs().fault_address }
+func (c *sigctxt) fault() uintptr { return uintptr(c.regs().fault_address) }
func (c *sigctxt) trap() uint32 { return c.regs().trap_no }
func (c *sigctxt) error() uint32 { return c.regs().error_code }
func (c *sigctxt) oldmask() uint32 { return c.regs().oldmask }
diff --git a/src/runtime/signal_linux_arm64.go b/src/runtime/signal_linux_arm64.go
index f3d4d38..2075f25 100644
--- a/src/runtime/signal_linux_arm64.go
+++ b/src/runtime/signal_linux_arm64.go
@@ -56,7 +56,7 @@
func (c *sigctxt) pc() uint64 { return c.regs().pc }
func (c *sigctxt) pstate() uint64 { return c.regs().pstate }
-func (c *sigctxt) fault() uint64 { return c.regs().fault_address }
+func (c *sigctxt) fault() uintptr { return uintptr(c.regs().fault_address) }
func (c *sigctxt) sigcode() uint64 { return uint64(c.info.si_code) }
func (c *sigctxt) sigaddr() uint64 { return c.info.si_addr }
diff --git a/src/runtime/signal_linux_mips64x.go b/src/runtime/signal_linux_mips64x.go
index 9e0cf42..b608197 100644
--- a/src/runtime/signal_linux_mips64x.go
+++ b/src/runtime/signal_linux_mips64x.go
@@ -66,6 +66,7 @@
func (c *sigctxt) sigcode() uint32 { return uint32(c.info.si_code) }
func (c *sigctxt) sigaddr() uint64 { return c.info.si_addr }
+func (c *sigctxt) set_r28(x uint64) { c.regs().sc_regs[28] = x }
func (c *sigctxt) set_r30(x uint64) { c.regs().sc_regs[30] = x }
func (c *sigctxt) set_pc(x uint64) { c.regs().sc_pc = x }
func (c *sigctxt) set_sp(x uint64) { c.regs().sc_regs[29] = x }
diff --git a/src/runtime/signal_linux_ppc64x.go b/src/runtime/signal_linux_ppc64x.go
index b6831bc..97cb26d 100644
--- a/src/runtime/signal_linux_ppc64x.go
+++ b/src/runtime/signal_linux_ppc64x.go
@@ -67,7 +67,7 @@
func (c *sigctxt) sigcode() uint32 { return uint32(c.info.si_code) }
func (c *sigctxt) sigaddr() uint64 { return c.info.si_addr }
-func (c *sigctxt) fault() uint64 { return c.regs().dar }
+func (c *sigctxt) fault() uintptr { return uintptr(c.regs().dar) }
func (c *sigctxt) set_r0(x uint64) { c.regs().gpr[0] = x }
func (c *sigctxt) set_r12(x uint64) { c.regs().gpr[12] = x }
diff --git a/src/runtime/signal_mips64x.go b/src/runtime/signal_mips64x.go
index 9546a5a..35b356c 100644
--- a/src/runtime/signal_mips64x.go
+++ b/src/runtime/signal_mips64x.go
@@ -89,6 +89,8 @@
}
// In case we are panicking from external C code
+ sigpanicPC := uint64(funcPC(sigpanic))
+ c.set_r28(sigpanicPC >> 32 << 32) // RSB register
c.set_r30(uint64(uintptr(unsafe.Pointer(gp))))
- c.set_pc(uint64(funcPC(sigpanic)))
+ c.set_pc(sigpanicPC)
}
diff --git a/src/runtime/signal_nacl.go b/src/runtime/signal_nacl.go
index 4793075..ad321d8 100644
--- a/src/runtime/signal_nacl.go
+++ b/src/runtime/signal_nacl.go
@@ -26,13 +26,13 @@
/* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"},
/* 14 */ {_SigNotify, "SIGALRM: alarm clock"},
/* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"},
- /* 16 */ {_SigNotify, "SIGURG: urgent condition on socket"},
+ /* 16 */ {_SigNotify + _SigIgn, "SIGURG: urgent condition on socket"},
/* 17 */ {0, "SIGSTOP: stop"},
- /* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"},
- /* 19 */ {_SigNotify + _SigDefault, "SIGCONT: continue after stop"},
- /* 20 */ {_SigNotify, "SIGCHLD: child status has changed"},
- /* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"},
- /* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"},
+ /* 18 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTSTP: keyboard stop"},
+ /* 19 */ {_SigNotify + _SigDefault + _SigIgn, "SIGCONT: continue after stop"},
+ /* 20 */ {_SigNotify + _SigIgn, "SIGCHLD: child status has changed"},
+ /* 21 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTIN: background read from tty"},
+ /* 22 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTOU: background write to tty"},
/* 23 */ {_SigNotify, "SIGIO: i/o now possible"},
/* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"},
/* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"},
diff --git a/src/runtime/signal_nacl_arm.go b/src/runtime/signal_nacl_arm.go
index 959dbfb..b831232 100644
--- a/src/runtime/signal_nacl_arm.go
+++ b/src/runtime/signal_nacl_arm.go
@@ -36,7 +36,7 @@
func (c *sigctxt) pc() uint32 { return c.regs().pc }
func (c *sigctxt) cpsr() uint32 { return c.regs().cpsr }
-func (c *sigctxt) fault() uint32 { return ^uint32(0) }
+func (c *sigctxt) fault() uintptr { return ^uintptr(0) }
func (c *sigctxt) trap() uint32 { return ^uint32(0) }
func (c *sigctxt) error() uint32 { return ^uint32(0) }
func (c *sigctxt) oldmask() uint32 { return ^uint32(0) }
diff --git a/src/runtime/signal_netbsd.go b/src/runtime/signal_netbsd.go
index 30a3b8e..ca51084 100644
--- a/src/runtime/signal_netbsd.go
+++ b/src/runtime/signal_netbsd.go
@@ -4,11 +4,6 @@
package runtime
-type sigTabT struct {
- flags int32
- name string
-}
-
var sigtable = [...]sigTabT{
/* 0 */ {0, "SIGNONE: no trap"},
/* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"},
@@ -26,20 +21,20 @@
/* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"},
/* 14 */ {_SigNotify, "SIGALRM: alarm clock"},
/* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"},
- /* 16 */ {_SigNotify, "SIGURG: urgent condition on socket"},
+ /* 16 */ {_SigNotify + _SigIgn, "SIGURG: urgent condition on socket"},
/* 17 */ {0, "SIGSTOP: stop"},
- /* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"},
- /* 19 */ {_SigNotify + _SigDefault, "SIGCONT: continue after stop"},
- /* 20 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"},
- /* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"},
- /* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"},
- /* 23 */ {_SigNotify, "SIGIO: i/o now possible"},
+ /* 18 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTSTP: keyboard stop"},
+ /* 19 */ {_SigNotify + _SigDefault + _SigIgn, "SIGCONT: continue after stop"},
+ /* 20 */ {_SigNotify + _SigUnblock + _SigIgn, "SIGCHLD: child status has changed"},
+ /* 21 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTIN: background read from tty"},
+ /* 22 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTOU: background write to tty"},
+ /* 23 */ {_SigNotify + _SigIgn, "SIGIO: i/o now possible"},
/* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"},
/* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"},
/* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"},
/* 27 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling alarm clock"},
- /* 28 */ {_SigNotify, "SIGWINCH: window size change"},
- /* 29 */ {_SigNotify, "SIGINFO: status request from keyboard"},
+ /* 28 */ {_SigNotify + _SigIgn, "SIGWINCH: window size change"},
+ /* 29 */ {_SigNotify + _SigIgn, "SIGINFO: status request from keyboard"},
/* 30 */ {_SigNotify, "SIGUSR1: user-defined signal 1"},
/* 31 */ {_SigNotify, "SIGUSR2: user-defined signal 2"},
/* 32 */ {_SigNotify, "SIGTHR: reserved"},
diff --git a/src/runtime/signal_netbsd_arm.go b/src/runtime/signal_netbsd_arm.go
index 64cfffa..fdb3078 100644
--- a/src/runtime/signal_netbsd_arm.go
+++ b/src/runtime/signal_netbsd_arm.go
@@ -36,7 +36,7 @@
func (c *sigctxt) pc() uint32 { return c.regs().__gregs[_REG_R15] }
func (c *sigctxt) cpsr() uint32 { return c.regs().__gregs[_REG_CPSR] }
-func (c *sigctxt) fault() uint32 { return uint32(c.info._reason) }
+func (c *sigctxt) fault() uintptr { return uintptr(c.info._reason) }
func (c *sigctxt) trap() uint32 { return 0 }
func (c *sigctxt) error() uint32 { return 0 }
func (c *sigctxt) oldmask() uint32 { return 0 }
diff --git a/src/runtime/signal_openbsd.go b/src/runtime/signal_openbsd.go
index 30a3b8e..99c601c 100644
--- a/src/runtime/signal_openbsd.go
+++ b/src/runtime/signal_openbsd.go
@@ -4,11 +4,6 @@
package runtime
-type sigTabT struct {
- flags int32
- name string
-}
-
var sigtable = [...]sigTabT{
/* 0 */ {0, "SIGNONE: no trap"},
/* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"},
@@ -26,13 +21,13 @@
/* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"},
/* 14 */ {_SigNotify, "SIGALRM: alarm clock"},
/* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"},
- /* 16 */ {_SigNotify, "SIGURG: urgent condition on socket"},
+ /* 16 */ {_SigNotify + _SigIgn, "SIGURG: urgent condition on socket"},
/* 17 */ {0, "SIGSTOP: stop"},
- /* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"},
- /* 19 */ {_SigNotify + _SigDefault, "SIGCONT: continue after stop"},
- /* 20 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"},
- /* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"},
- /* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"},
+ /* 18 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTSTP: keyboard stop"},
+ /* 19 */ {_SigNotify + _SigDefault + _SigIgn, "SIGCONT: continue after stop"},
+ /* 20 */ {_SigNotify + _SigUnblock + _SigIgn, "SIGCHLD: child status has changed"},
+ /* 21 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTIN: background read from tty"},
+ /* 22 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTOU: background write to tty"},
/* 23 */ {_SigNotify, "SIGIO: i/o now possible"},
/* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"},
/* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"},
diff --git a/src/runtime/signal_openbsd_arm.go b/src/runtime/signal_openbsd_arm.go
index 66aea93..97bb13b 100644
--- a/src/runtime/signal_openbsd_arm.go
+++ b/src/runtime/signal_openbsd_arm.go
@@ -38,7 +38,7 @@
func (c *sigctxt) pc() uint32 { return c.regs().sc_pc }
func (c *sigctxt) cpsr() uint32 { return c.regs().sc_spsr }
-func (c *sigctxt) fault() uint32 { return c.sigaddr() }
+func (c *sigctxt) fault() uintptr { return uintptr(c.sigaddr()) }
func (c *sigctxt) trap() uint32 { return 0 }
func (c *sigctxt) error() uint32 { return 0 }
func (c *sigctxt) oldmask() uint32 { return 0 }
diff --git a/src/runtime/signal_sighandler.go b/src/runtime/signal_sighandler.go
index b2e15a6..bf2237c 100644
--- a/src/runtime/signal_sighandler.go
+++ b/src/runtime/signal_sighandler.go
@@ -38,6 +38,11 @@
if sig < uint32(len(sigtable)) {
flags = sigtable[sig].flags
}
+ if flags&_SigPanic != 0 && gp.throwsplit {
+ // We can't safely sigpanic because it may grow the
+ // stack. Abort in the signal handler instead.
+ flags = (flags &^ _SigPanic) | _SigThrow
+ }
if c.sigcode() != _SI_USER && flags&_SigPanic != 0 {
// The signal is going to cause a panic.
// Arrange the stack so that it looks like the point
@@ -88,9 +93,9 @@
}
print("PC=", hex(c.sigpc()), " m=", _g_.m.id, " sigcode=", c.sigcode(), "\n")
- if _g_.m.lockedg != nil && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
+ if _g_.m.lockedg != 0 && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
print("signal arrived during cgo execution\n")
- gp = _g_.m.lockedg
+ gp = _g_.m.lockedg.ptr()
}
print("\n")
@@ -111,7 +116,7 @@
if docrash {
crashing++
- if crashing < sched.mcount-int32(extraMCount) {
+ if crashing < mcount()-int32(extraMCount) {
// There are other m's that need to dump their stacks.
// Relay SIGQUIT to the next m by sending it to the current process.
// All m's that have already received SIGQUIT have signal masks blocking
diff --git a/src/runtime/signal_solaris.go b/src/runtime/signal_solaris.go
index c931c22..a8eeeee 100644
--- a/src/runtime/signal_solaris.go
+++ b/src/runtime/signal_solaris.go
@@ -4,11 +4,6 @@
package runtime
-type sigTabT struct {
- flags int32
- name string
-}
-
var sigtable = [...]sigTabT{
/* 0 */ {0, "SIGNONE: no trap"},
/* 1 */ {_SigNotify + _SigKill, "SIGHUP: hangup"},
@@ -28,16 +23,16 @@
/* 15 */ {_SigNotify + _SigKill, "SIGTERM: software termination signal from kill"},
/* 16 */ {_SigNotify, "SIGUSR1: user defined signal 1"},
/* 17 */ {_SigNotify, "SIGUSR2: user defined signal 2"},
- /* 18 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status change alias (POSIX)"},
+ /* 18 */ {_SigNotify + _SigUnblock + _SigIgn, "SIGCHLD: child status change alias (POSIX)"},
/* 19 */ {_SigNotify, "SIGPWR: power-fail restart"},
- /* 20 */ {_SigNotify, "SIGWINCH: window size change"},
- /* 21 */ {_SigNotify, "SIGURG: urgent socket condition"},
+ /* 20 */ {_SigNotify + _SigIgn, "SIGWINCH: window size change"},
+ /* 21 */ {_SigNotify + _SigIgn, "SIGURG: urgent socket condition"},
/* 22 */ {_SigNotify, "SIGPOLL: pollable event occurred"},
/* 23 */ {0, "SIGSTOP: stop (cannot be caught or ignored)"},
- /* 24 */ {_SigNotify + _SigDefault, "SIGTSTP: user stop requested from tty"},
- /* 25 */ {_SigNotify + _SigDefault, "SIGCONT: stopped process has been continued"},
- /* 26 */ {_SigNotify + _SigDefault, "SIGTTIN: background tty read attempted"},
- /* 27 */ {_SigNotify + _SigDefault, "SIGTTOU: background tty write attempted"},
+ /* 24 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTSTP: user stop requested from tty"},
+ /* 25 */ {_SigNotify + _SigDefault + _SigIgn, "SIGCONT: stopped process has been continued"},
+ /* 26 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTIN: background tty read attempted"},
+ /* 27 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTOU: background tty write attempted"},
/* 28 */ {_SigNotify, "SIGVTALRM: virtual timer expired"},
/* 29 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling timer expired"},
/* 30 */ {_SigNotify, "SIGXCPU: exceeded cpu limit"},
diff --git a/src/runtime/signal_unix.go b/src/runtime/signal_unix.go
index 539b165..78649c5 100644
--- a/src/runtime/signal_unix.go
+++ b/src/runtime/signal_unix.go
@@ -8,10 +8,19 @@
import (
"runtime/internal/atomic"
- "runtime/internal/sys"
"unsafe"
)
+// sigTabT is the type of an entry in the global sigtable array.
+// sigtable is inherently system dependent, and appears in OS-specific files,
+// but sigTabT is the same for all Unixy systems.
+// The sigtable array is indexed by a system signal number to get the flags
+// and printable name of each signal.
+type sigTabT struct {
+ flags int32
+ name string
+}
+
//go:linkname os_sigpipe os.sigpipe
func os_sigpipe() {
systemstack(sigpipe)
@@ -266,6 +275,12 @@
// sigtrampgo is called from the signal handler function, sigtramp,
// written in assembly code.
// This is called by the signal handler, and the world may be stopped.
+//
+// It must be nosplit because getg() is still the G that was running
+// (if any) when the signal was delivered, but it's (usually) called
+// on the gsignal stack. Until this switches the G to gsignal, the
+// stack bounds check won't work.
+//
//go:nosplit
//go:nowritebarrierrec
func sigtrampgo(sig uint32, info *siginfo, ctx unsafe.Pointer) {
@@ -345,6 +360,12 @@
// the signal handler. The effect is that the program will act as
// though the function that got the signal simply called sigpanic
// instead.
+//
+// This must NOT be nosplit because the linker doesn't know where
+// sigpanic calls can be injected.
+//
+// The signal handler must not inject a call to sigpanic if
+// getg().throwsplit, since sigpanic may need to grow the stack.
func sigpanic() {
g := getg()
if !canpanic(g) {
@@ -395,8 +416,9 @@
//go:nosplit
//go:nowritebarrierrec
func dieFromSignal(sig uint32) {
- setsig(sig, _SIG_DFL)
unblocksig(sig)
+ // Mark the signal as unhandled to ensure it is forwarded.
+ atomic.Store(&handlingSig[sig], 0)
raise(sig)
// That should have killed us. On some systems, though, raise
@@ -408,6 +430,22 @@
osyield()
osyield()
+ // If that didn't work, try _SIG_DFL.
+ setsig(sig, _SIG_DFL)
+ raise(sig)
+
+ osyield()
+ osyield()
+ osyield()
+
+ // On Darwin we may still fail to die, because raise sends the
+ // signal to the whole process rather than just the current thread,
+ // and osyield just sleeps briefly rather than letting all other
+ // threads run. See issue 20315. Sleep longer.
+ if GOOS == "darwin" {
+ usleep(100)
+ }
+
// If we are still somehow running, just exit with the wrong status.
exit(2)
}
@@ -474,7 +512,7 @@
// this means the OS X core file will be >128 GB and even on a zippy
// workstation can take OS X well over an hour to write (uninterruptible).
// Save users from making that mistake.
- if sys.PtrSize == 8 {
+ if GOARCH == "amd64" {
return
}
}
@@ -502,7 +540,7 @@
// mask accordingly.
sigBlocked := sigset_all
for i := range sigtable {
- if sigtable[i].flags&_SigUnblock != 0 {
+ if !blockableSig(uint32(i)) {
sigdelset(&sigBlocked, i)
}
}
@@ -514,7 +552,7 @@
sigdelset(&sigBlocked, int(sig))
}
case sig := <-disableSigChan:
- if sig > 0 {
+ if sig > 0 && blockableSig(sig) {
sigaddset(&sigBlocked, int(sig))
}
}
@@ -578,17 +616,23 @@
return false
}
fwdFn := atomic.Loaduintptr(&fwdSig[sig])
+ flags := sigtable[sig].flags
- if !signalsOK {
- // The only way we can get here is if we are in a
- // library or archive, we installed a signal handler
- // at program startup, but the Go runtime has not yet
- // been initialized.
- if fwdFn == _SIG_DFL {
- dieFromSignal(sig)
- } else {
- sigfwd(fwdFn, sig, info, ctx)
+ // If we aren't handling the signal, forward it.
+ if atomic.Load(&handlingSig[sig]) == 0 || !signalsOK {
+ // If the signal is ignored, doing nothing is the same as forwarding.
+ if fwdFn == _SIG_IGN || (fwdFn == _SIG_DFL && flags&_SigIgn != 0) {
+ return true
}
+ // We are not handling the signal and there is no other handler to forward to.
+ // Crash with the default behavior.
+ if fwdFn == _SIG_DFL {
+ setsig(sig, _SIG_DFL)
+ dieFromSignal(sig)
+ return false
+ }
+
+ sigfwd(fwdFn, sig, info, ctx)
return true
}
@@ -597,18 +641,6 @@
return false
}
- // If we aren't handling the signal, forward it.
- // Really if we aren't handling the signal, we shouldn't get here,
- // but on Darwin setsigstack can lead us here because it sets
- // the sa_tramp field. The sa_tramp field is not returned by
- // sigaction, so the fix for that is non-obvious.
- if atomic.Load(&handlingSig[sig]) == 0 {
- sigfwd(fwdFn, sig, info, ctx)
- return true
- }
-
- flags := sigtable[sig].flags
-
c := &sigctxt{info, ctx}
// Only forward synchronous signals and SIGPIPE.
// Unfortunately, user generated SIGPIPEs will also be forwarded, because si_code
@@ -702,7 +734,7 @@
signalstack(&_g_.m.gsignal.stack)
_g_.m.newSigstack = true
} else {
- setGsignalStack(&st, nil)
+ setGsignalStack(&st, &_g_.m.goSigStack)
_g_.m.newSigstack = false
}
}
@@ -718,7 +750,7 @@
func minitSignalMask() {
nmask := getg().m.sigmask
for i := range sigtable {
- if sigtable[i].flags&_SigUnblock != 0 {
+ if !blockableSig(uint32(i)) {
sigdelset(&nmask, i)
}
}
@@ -732,9 +764,36 @@
if getg().m.newSigstack {
st := stackt{ss_flags: _SS_DISABLE}
sigaltstack(&st, nil)
+ } else {
+ // We got the signal stack from someone else. Restore
+ // the Go-allocated stack in case this M gets reused
+ // for another thread (e.g., it's an extram). Also, on
+ // Android, libc allocates a signal stack for all
+ // threads, so it's important to restore the Go stack
+ // even on Go-created threads so we can free it.
+ restoreGsignalStack(&getg().m.goSigStack)
}
}
+// blockableSig returns whether sig may be blocked by the signal mask.
+// We never want to block the signals marked _SigUnblock;
+// these are the synchronous signals that turn into a Go panic.
+// In a Go program--not a c-archive/c-shared--we never want to block
+// the signals marked _SigKill or _SigThrow, as otherwise it's possible
+// for all running threads to block them and delay their delivery until
+// we start a new thread. When linked into a C program we let the C code
+// decide on the disposition of those signals.
+func blockableSig(sig uint32) bool {
+ flags := sigtable[sig].flags
+ if flags&_SigUnblock != 0 {
+ return false
+ }
+ if isarchive || islibrary {
+ return true
+ }
+ return flags&(_SigKill|_SigThrow) == 0
+}
+
// gsignalStack saves the fields of the gsignal stack changed by
// setGsignalStack.
type gsignalStack struct {
diff --git a/src/runtime/signal_windows.go b/src/runtime/signal_windows.go
index 73bd5b5..518aac3 100644
--- a/src/runtime/signal_windows.go
+++ b/src/runtime/signal_windows.go
@@ -71,6 +71,12 @@
return _EXCEPTION_CONTINUE_SEARCH
}
+ if gp.throwsplit {
+ // We can't safely sigpanic because it may grow the
+ // stack. Let it fall through.
+ return _EXCEPTION_CONTINUE_SEARCH
+ }
+
// Make it look like a call to the signal func.
// Have to pass arguments out of band since
// augmenting the stack frame would break
@@ -126,11 +132,11 @@
print("Exception ", hex(info.exceptioncode), " ", hex(info.exceptioninformation[0]), " ", hex(info.exceptioninformation[1]), " ", hex(r.ip()), "\n")
print("PC=", hex(r.ip()), "\n")
- if _g_.m.lockedg != nil && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
+ if _g_.m.lockedg != 0 && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
if iscgo {
print("signal arrived during external code execution\n")
}
- gp = _g_.m.lockedg
+ gp = _g_.m.lockedg.ptr()
}
print("\n")
@@ -223,3 +229,6 @@
// It's okay to leave this empty for now: if crash returns
// the ordinary exit-after-panic happens.
}
+
+// gsignalStack is unused on Windows.
+type gsignalStack struct{}
diff --git a/src/runtime/sigqueue.go b/src/runtime/sigqueue.go
index 236bb29..9833162 100644
--- a/src/runtime/sigqueue.go
+++ b/src/runtime/sigqueue.go
@@ -45,13 +45,14 @@
// as there is no connection between handling a signal and receiving one,
// but atomic instructions should minimize it.
var sig struct {
- note note
- mask [(_NSIG + 31) / 32]uint32
- wanted [(_NSIG + 31) / 32]uint32
- ignored [(_NSIG + 31) / 32]uint32
- recv [(_NSIG + 31) / 32]uint32
- state uint32
- inuse bool
+ note note
+ mask [(_NSIG + 31) / 32]uint32
+ wanted [(_NSIG + 31) / 32]uint32
+ ignored [(_NSIG + 31) / 32]uint32
+ recv [(_NSIG + 31) / 32]uint32
+ state uint32
+ delivering uint32
+ inuse bool
}
const (
@@ -60,15 +61,20 @@
sigSending
)
-// Called from sighandler to send a signal back out of the signal handling thread.
-// Reports whether the signal was sent. If not, the caller typically crashes the program.
+// sigsend delivers a signal from sighandler to the internal signal delivery queue.
+// It reports whether the signal was sent. If not, the caller typically crashes the program.
+// It runs from the signal handler, so it's limited in what it can do.
func sigsend(s uint32) bool {
bit := uint32(1) << uint(s&31)
if !sig.inuse || s >= uint32(32*len(sig.wanted)) {
return false
}
+ atomic.Xadd(&sig.delivering, 1)
+ // We are running in the signal handler; defer is not available.
+
if w := atomic.Load(&sig.wanted[s/32]); w&bit == 0 {
+ atomic.Xadd(&sig.delivering, -1)
return false
}
@@ -76,6 +82,7 @@
for {
mask := sig.mask[s/32]
if mask&bit != 0 {
+ atomic.Xadd(&sig.delivering, -1)
return true // signal already in queue
}
if atomic.Cas(&sig.mask[s/32], mask, mask|bit) {
@@ -104,6 +111,7 @@
}
}
+ atomic.Xadd(&sig.delivering, -1)
return true
}
@@ -155,6 +163,15 @@
// by the os/signal package.
//go:linkname signalWaitUntilIdle os/signal.signalWaitUntilIdle
func signalWaitUntilIdle() {
+ // Although the signals we care about have been removed from
+ // sig.wanted, it is possible that another thread has received
+ // a signal, has read from sig.wanted, is now updating sig.mask,
+ // and has not yet woken up the processor thread. We need to wait
+ // until all current signal deliveries have completed.
+ for atomic.Load(&sig.delivering) != 0 {
+ Gosched()
+ }
+
// Although WaitUntilIdle seems like the right name for this
// function, the state we are looking for is sigReceiving, not
// sigIdle. The sigIdle state is really more like sigProcessing.
diff --git a/src/runtime/sigtab_linux_generic.go b/src/runtime/sigtab_linux_generic.go
index 874148e..b26040b 100644
--- a/src/runtime/sigtab_linux_generic.go
+++ b/src/runtime/sigtab_linux_generic.go
@@ -10,11 +10,6 @@
package runtime
-type sigTabT struct {
- flags int32
- name string
-}
-
var sigtable = [...]sigTabT{
/* 0 */ {0, "SIGNONE: no trap"},
/* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"},
@@ -33,18 +28,18 @@
/* 14 */ {_SigNotify, "SIGALRM: alarm clock"},
/* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"},
/* 16 */ {_SigThrow + _SigUnblock, "SIGSTKFLT: stack fault"},
- /* 17 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"},
- /* 18 */ {_SigNotify + _SigDefault, "SIGCONT: continue"},
+ /* 17 */ {_SigNotify + _SigUnblock + _SigIgn, "SIGCHLD: child status has changed"},
+ /* 18 */ {_SigNotify + _SigDefault + _SigIgn, "SIGCONT: continue"},
/* 19 */ {0, "SIGSTOP: stop, unblockable"},
- /* 20 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"},
- /* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"},
- /* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"},
- /* 23 */ {_SigNotify, "SIGURG: urgent condition on socket"},
+ /* 20 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTSTP: keyboard stop"},
+ /* 21 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTIN: background read from tty"},
+ /* 22 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTOU: background write to tty"},
+ /* 23 */ {_SigNotify + _SigIgn, "SIGURG: urgent condition on socket"},
/* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"},
/* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"},
/* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"},
/* 27 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling alarm clock"},
- /* 28 */ {_SigNotify, "SIGWINCH: window size change"},
+ /* 28 */ {_SigNotify + _SigIgn, "SIGWINCH: window size change"},
/* 29 */ {_SigNotify, "SIGIO: i/o now possible"},
/* 30 */ {_SigNotify, "SIGPWR: power failure restart"},
/* 31 */ {_SigThrow, "SIGSYS: bad system call"},
diff --git a/src/runtime/sigtab_linux_mipsx.go b/src/runtime/sigtab_linux_mipsx.go
index 8d9fb06..81dd231 100644
--- a/src/runtime/sigtab_linux_mipsx.go
+++ b/src/runtime/sigtab_linux_mipsx.go
@@ -7,11 +7,6 @@
package runtime
-type sigTabT struct {
- flags int32
- name string
-}
-
var sigtable = [...]sigTabT{
/* 0 */ {0, "SIGNONE: no trap"},
/* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"},
@@ -31,16 +26,16 @@
/* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"},
/* 16 */ {_SigNotify, "SIGUSR1: user-defined signal 1"},
/* 17 */ {_SigNotify, "SIGUSR2: user-defined signal 2"},
- /* 18 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"},
+ /* 18 */ {_SigNotify + _SigUnblock + _SigIgn, "SIGCHLD: child status has changed"},
/* 19 */ {_SigNotify, "SIGPWR: power failure restart"},
- /* 20 */ {_SigNotify, "SIGWINCH: window size change"},
- /* 21 */ {_SigNotify, "SIGURG: urgent condition on socket"},
+ /* 20 */ {_SigNotify + _SigIgn, "SIGWINCH: window size change"},
+ /* 21 */ {_SigNotify + _SigIgn, "SIGURG: urgent condition on socket"},
/* 22 */ {_SigNotify, "SIGIO: i/o now possible"},
/* 23 */ {0, "SIGSTOP: stop, unblockable"},
- /* 24 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"},
- /* 25 */ {_SigNotify + _SigDefault, "SIGCONT: continue"},
- /* 26 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"},
- /* 27 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"},
+ /* 24 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTSTP: keyboard stop"},
+ /* 25 */ {_SigNotify + _SigDefault + _SigIgn, "SIGCONT: continue"},
+ /* 26 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTIN: background read from tty"},
+ /* 27 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTOU: background write to tty"},
/* 28 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"},
/* 29 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling alarm clock"},
/* 30 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"},
diff --git a/src/runtime/sizeclasses.go b/src/runtime/sizeclasses.go
index 5366564..9e17b00 100644
--- a/src/runtime/sizeclasses.go
+++ b/src/runtime/sizeclasses.go
@@ -3,73 +3,73 @@
package runtime
-// class bytes/obj bytes/span objects waste bytes
-// 1 8 8192 1024 0
-// 2 16 8192 512 0
-// 3 32 8192 256 0
-// 4 48 8192 170 32
-// 5 64 8192 128 0
-// 6 80 8192 102 32
-// 7 96 8192 85 32
-// 8 112 8192 73 16
-// 9 128 8192 64 0
-// 10 144 8192 56 128
-// 11 160 8192 51 32
-// 12 176 8192 46 96
-// 13 192 8192 42 128
-// 14 208 8192 39 80
-// 15 224 8192 36 128
-// 16 240 8192 34 32
-// 17 256 8192 32 0
-// 18 288 8192 28 128
-// 19 320 8192 25 192
-// 20 352 8192 23 96
-// 21 384 8192 21 128
-// 22 416 8192 19 288
-// 23 448 8192 18 128
-// 24 480 8192 17 32
-// 25 512 8192 16 0
-// 26 576 8192 14 128
-// 27 640 8192 12 512
-// 28 704 8192 11 448
-// 29 768 8192 10 512
-// 30 896 8192 9 128
-// 31 1024 8192 8 0
-// 32 1152 8192 7 128
-// 33 1280 8192 6 512
-// 34 1408 16384 11 896
-// 35 1536 8192 5 512
-// 36 1792 16384 9 256
-// 37 2048 8192 4 0
-// 38 2304 16384 7 256
-// 39 2688 8192 3 128
-// 40 3072 24576 8 0
-// 41 3200 16384 5 384
-// 42 3456 24576 7 384
-// 43 4096 8192 2 0
-// 44 4864 24576 5 256
-// 45 5376 16384 3 256
-// 46 6144 24576 4 0
-// 47 6528 32768 5 128
-// 48 6784 40960 6 256
-// 49 6912 49152 7 768
-// 50 8192 8192 1 0
-// 51 9472 57344 6 512
-// 52 9728 49152 5 512
-// 53 10240 40960 4 0
-// 54 10880 32768 3 128
-// 55 12288 24576 2 0
-// 56 13568 40960 3 256
-// 57 14336 57344 4 0
-// 58 16384 16384 1 0
-// 59 18432 73728 4 0
-// 60 19072 57344 3 128
-// 61 20480 40960 2 0
-// 62 21760 65536 3 256
-// 63 24576 24576 1 0
-// 64 27264 81920 3 128
-// 65 28672 57344 2 0
-// 66 32768 32768 1 0
+// class bytes/obj bytes/span objects tail waste max waste
+// 1 8 8192 1024 0 87.50%
+// 2 16 8192 512 0 43.75%
+// 3 32 8192 256 0 46.88%
+// 4 48 8192 170 32 31.52%
+// 5 64 8192 128 0 23.44%
+// 6 80 8192 102 32 19.07%
+// 7 96 8192 85 32 15.95%
+// 8 112 8192 73 16 13.56%
+// 9 128 8192 64 0 11.72%
+// 10 144 8192 56 128 11.82%
+// 11 160 8192 51 32 9.73%
+// 12 176 8192 46 96 9.59%
+// 13 192 8192 42 128 9.25%
+// 14 208 8192 39 80 8.12%
+// 15 224 8192 36 128 8.15%
+// 16 240 8192 34 32 6.62%
+// 17 256 8192 32 0 5.86%
+// 18 288 8192 28 128 12.16%
+// 19 320 8192 25 192 11.80%
+// 20 352 8192 23 96 9.88%
+// 21 384 8192 21 128 9.51%
+// 22 416 8192 19 288 10.71%
+// 23 448 8192 18 128 8.37%
+// 24 480 8192 17 32 6.82%
+// 25 512 8192 16 0 6.05%
+// 26 576 8192 14 128 12.33%
+// 27 640 8192 12 512 15.48%
+// 28 704 8192 11 448 13.93%
+// 29 768 8192 10 512 13.94%
+// 30 896 8192 9 128 15.52%
+// 31 1024 8192 8 0 12.40%
+// 32 1152 8192 7 128 12.41%
+// 33 1280 8192 6 512 15.55%
+// 34 1408 16384 11 896 14.00%
+// 35 1536 8192 5 512 14.00%
+// 36 1792 16384 9 256 15.57%
+// 37 2048 8192 4 0 12.45%
+// 38 2304 16384 7 256 12.46%
+// 39 2688 8192 3 128 15.59%
+// 40 3072 24576 8 0 12.47%
+// 41 3200 16384 5 384 6.22%
+// 42 3456 24576 7 384 8.83%
+// 43 4096 8192 2 0 15.60%
+// 44 4864 24576 5 256 16.65%
+// 45 5376 16384 3 256 10.92%
+// 46 6144 24576 4 0 12.48%
+// 47 6528 32768 5 128 6.23%
+// 48 6784 40960 6 256 4.36%
+// 49 6912 49152 7 768 3.37%
+// 50 8192 8192 1 0 15.61%
+// 51 9472 57344 6 512 14.28%
+// 52 9728 49152 5 512 3.64%
+// 53 10240 40960 4 0 4.99%
+// 54 10880 32768 3 128 6.24%
+// 55 12288 24576 2 0 11.45%
+// 56 13568 40960 3 256 9.99%
+// 57 14336 57344 4 0 5.35%
+// 58 16384 16384 1 0 12.49%
+// 59 18432 73728 4 0 11.11%
+// 60 19072 57344 3 128 3.57%
+// 61 20480 40960 2 0 6.87%
+// 62 21760 65536 3 256 6.25%
+// 63 24576 24576 1 0 11.45%
+// 64 27264 81920 3 128 10.00%
+// 65 28672 57344 2 0 4.91%
+// 66 32768 32768 1 0 12.50%
const (
_MaxSmallSize = 32768
diff --git a/src/runtime/slice.go b/src/runtime/slice.go
index 0f49df1..351fec0 100644
--- a/src/runtime/slice.go
+++ b/src/runtime/slice.go
@@ -14,6 +14,13 @@
cap int
}
+// An notInHeapSlice is a slice backed by go:notinheap memory.
+type notInHeapSlice struct {
+ array *notInHeap
+ len int
+ cap int
+}
+
// maxElems is a lookup table containing the maximum capacity for a slice.
// The index is the size of the slice element.
var maxElems = [...]uintptr{
@@ -81,7 +88,7 @@
// The SSA backend might prefer the new length or to return only ptr/cap and save stack space.
func growslice(et *_type, old slice, cap int) slice {
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer(&et))
+ callerpc := getcallerpc()
racereadrangepc(old.array, uintptr(old.len*int(et.size)), callerpc, funcPC(growslice))
}
if msanenabled {
@@ -105,12 +112,20 @@
if old.len < 1024 {
newcap = doublecap
} else {
- for newcap < cap {
+ // Check 0 < newcap to detect overflow
+ // and prevent an infinite loop.
+ for 0 < newcap && newcap < cap {
newcap += newcap / 4
}
+ // Set newcap to the requested cap when
+ // the newcap calculation overflowed.
+ if newcap <= 0 {
+ newcap = cap
+ }
}
}
+ var overflow bool
var lenmem, newlenmem, capmem uintptr
const ptrSize = unsafe.Sizeof((*byte)(nil))
switch et.size {
@@ -118,20 +133,37 @@
lenmem = uintptr(old.len)
newlenmem = uintptr(cap)
capmem = roundupsize(uintptr(newcap))
+ overflow = uintptr(newcap) > _MaxMem
newcap = int(capmem)
case ptrSize:
lenmem = uintptr(old.len) * ptrSize
newlenmem = uintptr(cap) * ptrSize
capmem = roundupsize(uintptr(newcap) * ptrSize)
+ overflow = uintptr(newcap) > _MaxMem/ptrSize
newcap = int(capmem / ptrSize)
default:
lenmem = uintptr(old.len) * et.size
newlenmem = uintptr(cap) * et.size
capmem = roundupsize(uintptr(newcap) * et.size)
+ overflow = uintptr(newcap) > maxSliceCap(et.size)
newcap = int(capmem / et.size)
}
- if cap < old.cap || uintptr(newcap) > maxSliceCap(et.size) {
+ // The check of overflow (uintptr(newcap) > maxSliceCap(et.size))
+ // in addition to capmem > _MaxMem is needed to prevent an overflow
+ // which can be used to trigger a segfault on 32bit architectures
+ // with this example program:
+ //
+ // type T [1<<27 + 1]int64
+ //
+ // var d T
+ // var s []T
+ //
+ // func main() {
+ // s = append(s, d, d, d, d)
+ // print(len(s), "\n")
+ // }
+ if cap < old.cap || overflow || capmem > _MaxMem {
panic(errorString("growslice: cap out of range"))
}
@@ -172,7 +204,7 @@
}
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer(&to))
+ callerpc := getcallerpc()
pc := funcPC(slicecopy)
racewriterangepc(to.array, uintptr(n*int(width)), callerpc, pc)
racereadrangepc(fm.array, uintptr(n*int(width)), callerpc, pc)
@@ -203,7 +235,7 @@
}
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer(&to))
+ callerpc := getcallerpc()
pc := funcPC(slicestringcopy)
racewriterangepc(unsafe.Pointer(&to[0]), uintptr(n), callerpc, pc)
}
diff --git a/src/runtime/softfloat64.go b/src/runtime/softfloat64.go
index 1678e8f..8fde0fe 100644
--- a/src/runtime/softfloat64.go
+++ b/src/runtime/softfloat64.go
@@ -483,3 +483,115 @@
return q1*b + q0, (un21*b + un0 - q0*v) >> s
}
+
+func fadd32(x, y uint32) uint32 {
+ return f64to32(fadd64(f32to64(x), f32to64(y)))
+}
+
+func fmul32(x, y uint32) uint32 {
+ return f64to32(fmul64(f32to64(x), f32to64(y)))
+}
+
+func fdiv32(x, y uint32) uint32 {
+ return f64to32(fdiv64(f32to64(x), f32to64(y)))
+}
+
+func feq32(x, y uint32) bool {
+ cmp, nan := fcmp64(f32to64(x), f32to64(y))
+ return cmp == 0 && !nan
+}
+
+func fgt32(x, y uint32) bool {
+ cmp, nan := fcmp64(f32to64(x), f32to64(y))
+ return cmp >= 1 && !nan
+}
+
+func fge32(x, y uint32) bool {
+ cmp, nan := fcmp64(f32to64(x), f32to64(y))
+ return cmp >= 0 && !nan
+}
+
+func feq64(x, y uint64) bool {
+ cmp, nan := fcmp64(x, y)
+ return cmp == 0 && !nan
+}
+
+func fgt64(x, y uint64) bool {
+ cmp, nan := fcmp64(x, y)
+ return cmp >= 1 && !nan
+}
+
+func fge64(x, y uint64) bool {
+ cmp, nan := fcmp64(x, y)
+ return cmp >= 0 && !nan
+}
+
+func fint32to32(x int32) uint32 {
+ return f64to32(fintto64(int64(x)))
+}
+
+func fint32to64(x int32) uint64 {
+ return fintto64(int64(x))
+}
+
+func fint64to32(x int64) uint32 {
+ return f64to32(fintto64(x))
+}
+
+func fint64to64(x int64) uint64 {
+ return fintto64(x)
+}
+
+func f32toint32(x uint32) int32 {
+ val, _ := f64toint(f32to64(x))
+ return int32(val)
+}
+
+func f32toint64(x uint32) int64 {
+ val, _ := f64toint(f32to64(x))
+ return val
+}
+
+func f64toint32(x uint64) int32 {
+ val, _ := f64toint(x)
+ return int32(val)
+}
+
+func f64toint64(x uint64) int64 {
+ val, _ := f64toint(x)
+ return val
+}
+
+func f64touint64(x float64) uint64 {
+ if x < float64(1<<63) {
+ return uint64(int64(x))
+ }
+ y := x - float64(1<<63)
+ z := uint64(int64(y))
+ return z | (1 << 63)
+}
+
+func f32touint64(x float32) uint64 {
+ if x < float32(1<<63) {
+ return uint64(int64(x))
+ }
+ y := x - float32(1<<63)
+ z := uint64(int64(y))
+ return z | (1 << 63)
+}
+
+func fuint64to64(x uint64) float64 {
+ if int64(x) >= 0 {
+ return float64(int64(x))
+ }
+ // See ../cmd/compile/internal/gc/ssa.go:uint64Tofloat
+ y := x & 1
+ z := x >> 1
+ z = z | y
+ r := float64(int64(z))
+ return r + r
+}
+
+func fuint64to32(x uint64) float32 {
+ return float32(fuint64to64(x))
+}
diff --git a/src/runtime/stack.go b/src/runtime/stack.go
index 525d0b1..6149838 100644
--- a/src/runtime/stack.go
+++ b/src/runtime/stack.go
@@ -578,29 +578,30 @@
if stackDebug >= 4 {
print(" ", add(scanp, i*sys.PtrSize), ":", ptrnames[ptrbit(&bv, i)], ":", hex(*(*uintptr)(add(scanp, i*sys.PtrSize))), " # ", i, " ", bv.bytedata[i/8], "\n")
}
- if ptrbit(&bv, i) == 1 {
- pp := (*uintptr)(add(scanp, i*sys.PtrSize))
- retry:
- p := *pp
- if f.valid() && 0 < p && p < minLegalPointer && debug.invalidptr != 0 {
- // Looks like a junk value in a pointer slot.
- // Live analysis wrong?
- getg().m.traceback = 2
- print("runtime: bad pointer in frame ", funcname(f), " at ", pp, ": ", hex(p), "\n")
- throw("invalid pointer found on stack")
+ if ptrbit(&bv, i) != 1 {
+ continue
+ }
+ pp := (*uintptr)(add(scanp, i*sys.PtrSize))
+ retry:
+ p := *pp
+ if f.valid() && 0 < p && p < minLegalPointer && debug.invalidptr != 0 {
+ // Looks like a junk value in a pointer slot.
+ // Live analysis wrong?
+ getg().m.traceback = 2
+ print("runtime: bad pointer in frame ", funcname(f), " at ", pp, ": ", hex(p), "\n")
+ throw("invalid pointer found on stack")
+ }
+ if minp <= p && p < maxp {
+ if stackDebug >= 3 {
+ print("adjust ptr ", hex(p), " ", funcname(f), "\n")
}
- if minp <= p && p < maxp {
- if stackDebug >= 3 {
- print("adjust ptr ", hex(p), " ", funcname(f), "\n")
+ if useCAS {
+ ppu := (*unsafe.Pointer)(unsafe.Pointer(pp))
+ if !atomic.Casp1(ppu, unsafe.Pointer(p), unsafe.Pointer(p+delta)) {
+ goto retry
}
- if useCAS {
- ppu := (*unsafe.Pointer)(unsafe.Pointer(pp))
- if !atomic.Casp1(ppu, unsafe.Pointer(p), unsafe.Pointer(p+delta)) {
- goto retry
- }
- } else {
- *pp = p + delta
- }
+ } else {
+ *pp = p + delta
}
}
}
@@ -751,7 +752,6 @@
// might be in the stack.
for s := gp.waiting; s != nil; s = s.waitlink {
adjustpointer(adjinfo, unsafe.Pointer(&s.elem))
- adjustpointer(adjinfo, unsafe.Pointer(&s.selectdone))
}
}
@@ -768,10 +768,6 @@
if stk.lo <= p && p < stk.hi && p > sghi {
sghi = p
}
- p = uintptr(unsafe.Pointer(sg.selectdone)) + unsafe.Sizeof(sg.selectdone)
- if stk.lo <= p && p < stk.hi && p > sghi {
- sghi = p
- }
}
return sghi
}
@@ -917,9 +913,12 @@
// g->atomicstatus will be Grunning or Gscanrunning upon entry.
// If the GC is trying to stop this g then it will set preemptscan to true.
//
-// ctxt is the value of the context register on morestack. newstack
-// will write it to g.sched.ctxt.
-func newstack(ctxt unsafe.Pointer) {
+// This must be nowritebarrierrec because it can be called as part of
+// stack growth from other nowritebarrierrec functions, but the
+// compiler doesn't check this.
+//
+//go:nowritebarrierrec
+func newstack() {
thisg := getg()
// TODO: double check all gp. shouldn't be getg().
if thisg.m.morebuf.g.ptr().stackguard0 == stackFork {
@@ -933,19 +932,24 @@
}
gp := thisg.m.curg
- // Write ctxt to gp.sched. We do this here instead of in
- // morestack so it has the necessary write barrier.
- gp.sched.ctxt = ctxt
if thisg.m.curg.throwsplit {
// Update syscallsp, syscallpc in case traceback uses them.
morebuf := thisg.m.morebuf
gp.syscallsp = morebuf.sp
gp.syscallpc = morebuf.pc
- print("runtime: newstack sp=", hex(gp.sched.sp), " stack=[", hex(gp.stack.lo), ", ", hex(gp.stack.hi), "]\n",
+ pcname, pcoff := "(unknown)", uintptr(0)
+ f := findfunc(gp.sched.pc)
+ if f.valid() {
+ pcname = funcname(f)
+ pcoff = gp.sched.pc - f.entry
+ }
+ print("runtime: newstack at ", pcname, "+", hex(pcoff),
+ " sp=", hex(gp.sched.sp), " stack=[", hex(gp.stack.lo), ", ", hex(gp.stack.hi), "]\n",
"\tmorebuf={pc:", hex(morebuf.pc), " sp:", hex(morebuf.sp), " lr:", hex(morebuf.lr), "}\n",
"\tsched={pc:", hex(gp.sched.pc), " sp:", hex(gp.sched.sp), " lr:", hex(gp.sched.lr), " ctxt:", gp.sched.ctxt, "}\n")
+ thisg.m.traceback = 2 // Include runtime frames
traceback(morebuf.pc, morebuf.sp, morebuf.lr, gp)
throw("runtime: stack split at bad time")
}
diff --git a/src/runtime/stack_test.go b/src/runtime/stack_test.go
index 25e8f77..0fed241 100644
--- a/src/runtime/stack_test.go
+++ b/src/runtime/stack_test.go
@@ -5,6 +5,9 @@
package runtime_test
import (
+ "bytes"
+ "fmt"
+ "reflect"
. "runtime"
"strings"
"sync"
@@ -78,10 +81,13 @@
var wg sync.WaitGroup
// in a normal goroutine
+ var growDuration time.Duration // For debugging failures
wg.Add(1)
go func() {
defer wg.Done()
- growStack()
+ start := time.Now()
+ growStack(nil)
+ growDuration = time.Since(start)
}()
wg.Wait()
@@ -90,7 +96,7 @@
go func() {
defer wg.Done()
LockOSThread()
- growStack()
+ growStack(nil)
UnlockOSThread()
}()
wg.Wait()
@@ -100,12 +106,14 @@
go func() {
defer wg.Done()
done := make(chan bool)
- var started uint32
+ var startTime time.Time
+ var started, progress uint32
go func() {
s := new(string)
SetFinalizer(s, func(ss *string) {
+ startTime = time.Now()
atomic.StoreUint32(&started, 1)
- growStack()
+ growStack(&progress)
done <- true
})
s = nil
@@ -118,7 +126,10 @@
case <-time.After(20 * time.Second):
if atomic.LoadUint32(&started) == 0 {
t.Log("finalizer did not start")
+ } else {
+ t.Logf("finalizer started %s ago and finished %d iterations", time.Since(startTime), atomic.LoadUint32(&progress))
}
+ t.Log("first growStack took", growDuration)
t.Error("finalizer did not run")
return
}
@@ -131,7 +142,7 @@
// growStack()
//}
-func growStack() {
+func growStack(progress *uint32) {
n := 1 << 10
if testing.Short() {
n = 1 << 8
@@ -142,6 +153,9 @@
if x != i+1 {
panic("stack is corrupted")
}
+ if progress != nil {
+ atomic.StoreUint32(progress, uint32(i))
+ }
}
GC()
}
@@ -231,7 +245,7 @@
}
}()
defer set(&y, 42)
- growStack()
+ growStack(nil)
}
type bigBuf [4 * 1024]byte
@@ -627,3 +641,169 @@
}
return 1 + count1(n-1)
}
+
+type structWithMethod struct{}
+
+func (s structWithMethod) caller() string {
+ _, file, line, ok := Caller(1)
+ if !ok {
+ panic("Caller failed")
+ }
+ return fmt.Sprintf("%s:%d", file, line)
+}
+
+func (s structWithMethod) callers() []uintptr {
+ pc := make([]uintptr, 16)
+ return pc[:Callers(0, pc)]
+}
+
+func (s structWithMethod) stack() string {
+ buf := make([]byte, 4<<10)
+ return string(buf[:Stack(buf, false)])
+}
+
+func (s structWithMethod) nop() {}
+
+func TestStackWrapperCaller(t *testing.T) {
+ var d structWithMethod
+ // Force the compiler to construct a wrapper method.
+ wrapper := (*structWithMethod).caller
+ // Check that the wrapper doesn't affect the stack trace.
+ if dc, ic := d.caller(), wrapper(&d); dc != ic {
+ t.Fatalf("direct caller %q != indirect caller %q", dc, ic)
+ }
+}
+
+func TestStackWrapperCallers(t *testing.T) {
+ var d structWithMethod
+ wrapper := (*structWithMethod).callers
+ // Check that <autogenerated> doesn't appear in the stack trace.
+ pcs := wrapper(&d)
+ frames := CallersFrames(pcs)
+ for {
+ fr, more := frames.Next()
+ if fr.File == "<autogenerated>" {
+ t.Fatalf("<autogenerated> appears in stack trace: %+v", fr)
+ }
+ if !more {
+ break
+ }
+ }
+}
+
+func TestStackWrapperStack(t *testing.T) {
+ var d structWithMethod
+ wrapper := (*structWithMethod).stack
+ // Check that <autogenerated> doesn't appear in the stack trace.
+ stk := wrapper(&d)
+ if strings.Contains(stk, "<autogenerated>") {
+ t.Fatalf("<autogenerated> appears in stack trace:\n%s", stk)
+ }
+}
+
+type I interface {
+ M()
+}
+
+func TestStackWrapperStackPanic(t *testing.T) {
+ t.Run("sigpanic", func(t *testing.T) {
+ // nil calls to interface methods cause a sigpanic.
+ testStackWrapperPanic(t, func() { I.M(nil) }, "runtime_test.I.M")
+ })
+ t.Run("panicwrap", func(t *testing.T) {
+ // Nil calls to value method wrappers call panicwrap.
+ wrapper := (*structWithMethod).nop
+ testStackWrapperPanic(t, func() { wrapper(nil) }, "runtime_test.(*structWithMethod).nop")
+ })
+}
+
+func testStackWrapperPanic(t *testing.T, cb func(), expect string) {
+ // Test that the stack trace from a panicking wrapper includes
+ // the wrapper, even though elide these when they don't panic.
+ t.Run("CallersFrames", func(t *testing.T) {
+ defer func() {
+ err := recover()
+ if err == nil {
+ t.Fatalf("expected panic")
+ }
+ pcs := make([]uintptr, 10)
+ n := Callers(0, pcs)
+ frames := CallersFrames(pcs[:n])
+ for {
+ frame, more := frames.Next()
+ t.Log(frame.Function)
+ if frame.Function == expect {
+ return
+ }
+ if !more {
+ break
+ }
+ }
+ t.Fatalf("panicking wrapper %s missing from stack trace", expect)
+ }()
+ cb()
+ })
+ t.Run("Stack", func(t *testing.T) {
+ defer func() {
+ err := recover()
+ if err == nil {
+ t.Fatalf("expected panic")
+ }
+ buf := make([]byte, 4<<10)
+ stk := string(buf[:Stack(buf, false)])
+ if !strings.Contains(stk, "\n"+expect) {
+ t.Fatalf("panicking wrapper %s missing from stack trace:\n%s", expect, stk)
+ }
+ }()
+ cb()
+ })
+}
+
+func TestCallersFromWrapper(t *testing.T) {
+ // Test that invoking CallersFrames on a stack where the first
+ // PC is an autogenerated wrapper keeps the wrapper in the
+ // trace. Normally we elide these, assuming that the wrapper
+ // calls the thing you actually wanted to see, but in this
+ // case we need to keep it.
+ pc := reflect.ValueOf(I.M).Pointer()
+ frames := CallersFrames([]uintptr{pc})
+ frame, more := frames.Next()
+ if frame.Function != "runtime_test.I.M" {
+ t.Fatalf("want function %s, got %s", "runtime_test.I.M", frame.Function)
+ }
+ if more {
+ t.Fatalf("want 1 frame, got > 1")
+ }
+}
+
+func TestTracebackSystemstack(t *testing.T) {
+ if GOARCH == "ppc64" || GOARCH == "ppc64le" {
+ t.Skip("systemstack tail call not implemented on ppc64x")
+ }
+
+ // Test that profiles correctly jump over systemstack,
+ // including nested systemstack calls.
+ pcs := make([]uintptr, 20)
+ pcs = pcs[:TracebackSystemstack(pcs, 5)]
+ // Check that runtime.TracebackSystemstack appears five times
+ // and that we see TestTracebackSystemstack.
+ countIn, countOut := 0, 0
+ frames := CallersFrames(pcs)
+ var tb bytes.Buffer
+ for {
+ frame, more := frames.Next()
+ fmt.Fprintf(&tb, "\n%s+0x%x %s:%d", frame.Function, frame.PC-frame.Entry, frame.File, frame.Line)
+ switch frame.Function {
+ case "runtime.TracebackSystemstack":
+ countIn++
+ case "runtime_test.TestTracebackSystemstack":
+ countOut++
+ }
+ if !more {
+ break
+ }
+ }
+ if countIn != 5 || countOut != 1 {
+ t.Fatalf("expected 5 calls to TracebackSystemstack and 1 call to TestTracebackSystemstack, got:%s", tb.String())
+ }
+}
diff --git a/src/runtime/string.go b/src/runtime/string.go
index 0ccc81e..22be091 100644
--- a/src/runtime/string.go
+++ b/src/runtime/string.go
@@ -80,7 +80,7 @@
if raceenabled {
racereadrangepc(unsafe.Pointer(&b[0]),
uintptr(l),
- getcallerpc(unsafe.Pointer(&buf)),
+ getcallerpc(),
funcPC(slicebytetostring))
}
if msanenabled {
@@ -134,7 +134,7 @@
if raceenabled && len(b) > 0 {
racereadrangepc(unsafe.Pointer(&b[0]),
uintptr(len(b)),
- getcallerpc(unsafe.Pointer(&b)),
+ getcallerpc(),
funcPC(slicebytetostringtmp))
}
if msanenabled && len(b) > 0 {
@@ -183,7 +183,7 @@
if raceenabled && len(a) > 0 {
racereadrangepc(unsafe.Pointer(&a[0]),
uintptr(len(a))*unsafe.Sizeof(a[0]),
- getcallerpc(unsafe.Pointer(&buf)),
+ getcallerpc(),
funcPC(slicerunetostring))
}
if msanenabled && len(a) > 0 {
diff --git a/src/runtime/stubs.go b/src/runtime/stubs.go
index c4f32a8..e830641 100644
--- a/src/runtime/stubs.go
+++ b/src/runtime/stubs.go
@@ -4,10 +4,7 @@
package runtime
-import (
- "runtime/internal/sys"
- "unsafe"
-)
+import "unsafe"
// Should be a built-in for unsafe.Pointer?
//go:nosplit
@@ -91,16 +88,21 @@
}
// exported value for testing
-var hashLoad = loadFactor
+var hashLoad = float32(loadFactorNum) / float32(loadFactorDen)
//go:nosplit
func fastrand() uint32 {
mp := getg().m
- fr := mp.fastrand
- mx := uint32(int32(fr)>>31) & 0xa8888eef
- fr = fr<<1 ^ mx
- mp.fastrand = fr
- return fr
+ // Implement xorshift64+: 2 32-bit xorshift sequences added together.
+ // Shift triplet [17,7,16] was calculated as indicated in Marsaglia's
+ // Xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf
+ // This generator passes the SmallCrush suite, part of TestU01 framework:
+ // http://simul.iro.umontreal.ca/testu01/tu01.html
+ s1, s0 := mp.fastrand[0], mp.fastrand[1]
+ s1 ^= s1 << 17
+ s1 = s1 ^ s0 ^ s1>>7 ^ s0>>16
+ mp.fastrand[0], mp.fastrand[1] = s0, s1
+ return s0 + s1
}
//go:nosplit
@@ -131,11 +133,9 @@
func cgocallback(fn, frame unsafe.Pointer, framesize, ctxt uintptr)
func gogo(buf *gobuf)
func gosave(buf *gobuf)
-func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
//go:noescape
func jmpdefer(fv *funcval, argp uintptr)
-func exit1(code int32)
func asminit()
func setg(gg *g)
func breakpoint()
@@ -194,14 +194,16 @@
// getcallerpc returns the program counter (PC) of its caller's caller.
// getcallersp returns the stack pointer (SP) of its caller's caller.
-// For both, the argp must be a pointer to the caller's first function argument.
+// argp must be a pointer to the caller's first function argument.
// The implementation may or may not use argp, depending on
-// the architecture.
+// the architecture. The implementation may be a compiler
+// intrinsic; there is not necessarily code implementing this
+// on every platform.
//
// For example:
//
// func f(arg1, arg2, arg3 int) {
-// pc := getcallerpc(unsafe.Pointer(&arg1))
+// pc := getcallerpc()
// sp := getcallersp(unsafe.Pointer(&arg1))
// }
//
@@ -221,12 +223,26 @@
// immediately and can only be passed to nosplit functions.
//go:noescape
-func getcallerpc(argp unsafe.Pointer) uintptr
+func getcallerpc() uintptr
-//go:nosplit
-func getcallersp(argp unsafe.Pointer) uintptr {
- return uintptr(argp) - sys.MinFrameSize
-}
+//go:noescape
+func getcallersp(argp unsafe.Pointer) uintptr // implemented as an intrinsic on all platforms
+
+// getclosureptr returns the pointer to the current closure.
+// getclosureptr can only be used in an assignment statement
+// at the entry of a function. Moreover, go:nosplit directive
+// must be specified at the declaration of caller function,
+// so that the function prolog does not clobber the closure register.
+// for example:
+//
+// //go:nosplit
+// func f(arg1, arg2, arg3 int) {
+// dx := getclosureptr()
+// }
+//
+// The compiler rewrites calls to this function into instructions that fetch the
+// pointer from a well-known register (DX on x86 architecture, etc.) directly.
+func getclosureptr() uintptr
//go:noescape
func asmcgocall(fn, arg unsafe.Pointer) int32
@@ -276,11 +292,6 @@
func systemstack_switch()
-func prefetcht0(addr uintptr)
-func prefetcht1(addr uintptr)
-func prefetcht2(addr uintptr)
-func prefetchnta(addr uintptr)
-
// round n up to a multiple of a. a must be a power of 2.
func round(n, a uintptr) uintptr {
return (n + a - 1) &^ (a - 1)
@@ -290,7 +301,6 @@
func checkASM() bool
func memequal_varlen(a, b unsafe.Pointer) bool
-func eqstring(s1, s2 string) bool
// bool2int returns 0 if x is false or 1 if x is true.
func bool2int(x bool) int {
diff --git a/src/runtime/stubs2.go b/src/runtime/stubs2.go
index 8390d8f..ae5ccd3 100644
--- a/src/runtime/stubs2.go
+++ b/src/runtime/stubs2.go
@@ -25,3 +25,9 @@
func open(name *byte, mode, perm int32) int32
func madvise(addr unsafe.Pointer, n uintptr, flags int32)
+
+// exitThread terminates the current thread, writing *wait = 0 when
+// the stack is safe to reclaim.
+//
+//go:noescape
+func exitThread(wait *uint32)
diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go
index e1b41ca..bdf98b9 100644
--- a/src/runtime/symtab.go
+++ b/src/runtime/symtab.go
@@ -19,6 +19,11 @@
// stackExpander expands callers into a sequence of Frames,
// tracking the necessary state across PCs.
stackExpander stackExpander
+
+ // elideWrapper indicates that, if the next frame is an
+ // autogenerated wrapper function, it should be elided from
+ // the stack.
+ elideWrapper bool
}
// Frame is the information returned by Frames for each call frame.
@@ -112,12 +117,14 @@
// Next returns frame information for the next caller.
// If more is false, there are no more callers (the Frame value is valid).
func (ci *Frames) Next() (frame Frame, more bool) {
- ci.callers, frame, more = ci.stackExpander.next(ci.callers)
+ ci.callers, frame, more = ci.stackExpander.next(ci.callers, ci.elideWrapper)
+ ci.elideWrapper = elideWrapperCalling(frame.Function)
return
}
-func (se *stackExpander) next(callers []uintptr) (ncallers []uintptr, frame Frame, more bool) {
+func (se *stackExpander) next(callers []uintptr, elideWrapper bool) (ncallers []uintptr, frame Frame, more bool) {
ncallers = callers
+again:
if !se.pcExpander.more {
// Expand the next PC.
if len(ncallers) == 0 {
@@ -144,6 +151,13 @@
}
frame = se.pcExpander.next()
+ if elideWrapper && frame.File == "<autogenerated>" {
+ // Ignore autogenerated functions such as pointer
+ // method forwarding functions. These are an
+ // implementation detail that doesn't reflect the
+ // source code.
+ goto again
+ }
return ncallers, frame, se.pcExpander.more || len(ncallers) > 0
}
@@ -338,8 +352,8 @@
// moduledata records information about the layout of the executable
// image. It is written by the linker. Any changes here must be
// matched changes to the code in cmd/internal/ld/symtab.go:symtab.
-// moduledata is stored in read-only memory; none of the pointers here
-// are visible to the garbage collector.
+// moduledata is stored in statically allocated non-pointer memory;
+// none of the pointers here are visible to the garbage collector.
type moduledata struct {
pclntable []byte
ftab []functab
@@ -367,10 +381,14 @@
modulename string
modulehashes []modulehash
+ hasmain uint8 // 1 if module contains the main function, 0 otherwise
+
gcdatamask, gcbssmask bitvector
typemap map[typeOff]*_type // offset to *_rtype in previous module
+ bad bool // module failed to load and should be ignored
+
next *moduledata
}
@@ -403,7 +421,7 @@
var firstmoduledata moduledata // linker symbol
var lastmoduledatap *moduledata // linker symbol
-var modulesSlice unsafe.Pointer // see activeModules
+var modulesSlice *[]*moduledata // see activeModules
// activeModules returns a slice of active modules.
//
@@ -443,6 +461,9 @@
func modulesinit() {
modules := new([]*moduledata)
for md := &firstmoduledata; md != nil; md = md.next {
+ if md.bad {
+ continue
+ }
*modules = append(*modules, md)
if md.gcdatamask == (bitvector{}) {
md.gcdatamask = progToPointerMask((*byte)(unsafe.Pointer(md.gcdata)), md.edata-md.data)
@@ -459,9 +480,8 @@
// contains the main function.
//
// See Issue #18729.
- mainText := funcPC(main_main)
for i, md := range *modules {
- if md.text <= mainText && mainText <= md.etext {
+ if md.hasmain != 0 {
(*modules)[0] = md
(*modules)[i] = &firstmoduledata
break
@@ -521,7 +541,6 @@
// ftab is lookup table for function by program counter.
nftab := len(datap.ftab) - 1
- var pcCache pcvalueCache
for i := 0; i < nftab; i++ {
// NOTE: ftab[nftab].entry is legal; it is the address beyond the final function.
if datap.ftab[i].entry > datap.ftab[i+1].entry {
@@ -537,30 +556,6 @@
}
throw("invalid runtime symbol table")
}
-
- if debugPcln || nftab-i < 5 {
- // Check a PC near but not at the very end.
- // The very end might be just padding that is not covered by the tables.
- // No architecture rounds function entries to more than 16 bytes,
- // but if one came along we'd need to subtract more here.
- // But don't use the next PC if it corresponds to a foreign object chunk
- // (no pcln table, f2.pcln == 0). That chunk might have an alignment
- // more than 16 bytes.
- f := funcInfo{(*_func)(unsafe.Pointer(&datap.pclntable[datap.ftab[i].funcoff])), datap}
- end := f.entry
- if i+1 < nftab {
- f2 := funcInfo{(*_func)(unsafe.Pointer(&datap.pclntable[datap.ftab[i+1].funcoff])), datap}
- if f2.pcln != 0 {
- end = f2.entry - 16
- if end < f.entry {
- end = f.entry
- }
- }
- }
- pcvalue(f, f.pcfile, end, &pcCache, true)
- pcvalue(f, f.pcln, end, &pcCache, true)
- pcvalue(f, f.pcsp, end, &pcCache, true)
- }
}
if datap.minpc != datap.ftab[0].entry ||
diff --git a/src/runtime/sys_darwin_386.s b/src/runtime/sys_darwin_386.s
index 5c62bfd..ccd901a 100644
--- a/src/runtime/sys_darwin_386.s
+++ b/src/runtime/sys_darwin_386.s
@@ -19,13 +19,39 @@
// Exit this OS thread (like pthread_exit, which eventually
// calls __bsdthread_terminate).
-TEXT runtime·exit1(SB),NOSPLIT,$0
+TEXT exit1<>(SB),NOSPLIT,$16-0
+ // __bsdthread_terminate takes 4 word-size arguments.
+ // Set them all to 0. (None are an exit status.)
+ MOVL $0, 0(SP)
+ MOVL $0, 4(SP)
+ MOVL $0, 8(SP)
+ MOVL $0, 12(SP)
MOVL $361, AX
INT $0x80
JAE 2(PC)
MOVL $0xf1, 0xf1 // crash
RET
+GLOBL exitStack<>(SB),RODATA,$(4*4)
+DATA exitStack<>+0x00(SB)/4, $0
+DATA exitStack<>+0x04(SB)/4, $0
+DATA exitStack<>+0x08(SB)/4, $0
+DATA exitStack<>+0x0c(SB)/4, $0
+
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$0-4
+ MOVL wait+0(FP), AX
+ // We're done using the stack.
+ MOVL $0, (AX)
+ // __bsdthread_terminate takes 4 arguments, which it expects
+ // on the stack. They should all be 0, so switch over to a
+ // fake stack of 0s. It won't write to the stack.
+ MOVL $exitStack<>(SB), SP
+ MOVL $361, AX // __bsdthread_terminate
+ INT $0x80
+ MOVL $0xf1, 0xf1 // crash
+ JMP 0(PC)
+
TEXT runtime·open(SB),NOSPLIT,$0
MOVL $5, AX
INT $0x80
@@ -77,7 +103,13 @@
TEXT runtime·mmap(SB),NOSPLIT,$0
MOVL $197, AX
INT $0x80
- MOVL AX, ret+24(FP)
+ JAE ok
+ MOVL $0, p+24(FP)
+ MOVL AX, err+28(FP)
+ RET
+ok:
+ MOVL AX, p+24(FP)
+ MOVL $0, err+28(FP)
RET
TEXT runtime·madvise(SB),NOSPLIT,$0
@@ -394,7 +426,7 @@
MOVL BX, m_procid(DX) // m->procid = thread port (for debuggers)
CALL runtime·stackcheck(SB) // smashes AX
CALL CX // fn()
- CALL runtime·exit1(SB)
+ CALL exit1<>(SB)
RET
// func bsdthread_register() int32
diff --git a/src/runtime/sys_darwin_amd64.s b/src/runtime/sys_darwin_amd64.s
index a8dc700..ab57843 100644
--- a/src/runtime/sys_darwin_amd64.s
+++ b/src/runtime/sys_darwin_amd64.s
@@ -25,13 +25,26 @@
// Exit this OS thread (like pthread_exit, which eventually
// calls __bsdthread_terminate).
-TEXT runtime·exit1(SB),NOSPLIT,$0
- MOVL code+0(FP), DI // arg 1 exit status
+TEXT exit1<>(SB),NOSPLIT,$0
+ // Because of exitThread below, this must not use the stack.
+ // __bsdthread_terminate takes 4 word-size arguments.
+ // Set them all to 0. (None are an exit status.)
+ MOVL $0, DI
+ MOVL $0, SI
+ MOVL $0, DX
+ MOVL $0, R10
MOVL $(0x2000000+361), AX // syscall entry
SYSCALL
MOVL $0xf1, 0xf1 // crash
RET
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$0-8
+ MOVQ wait+0(FP), AX
+ // We're done using the stack.
+ MOVL $0, (AX)
+ JMP exit1<>(SB)
+
TEXT runtime·open(SB),NOSPLIT,$0
MOVQ name+0(FP), DI // arg 1 pathname
MOVL mode+8(FP), SI // arg 2 flags
@@ -107,15 +120,22 @@
RET
// OS X comm page time offsets
-// http://www.opensource.apple.com/source/xnu/xnu-1699.26.8/osfmk/i386/cpu_capabilities.h
+// https://opensource.apple.com/source/xnu/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
+
#define nt_tsc_base 0x50
#define nt_scale 0x58
#define nt_shift 0x5c
#define nt_ns_base 0x60
#define nt_generation 0x68
-#define gtod_generation 0x6c
-#define gtod_ns_base 0x70
-#define gtod_sec_base 0x78
+#define gtod_generation 0x6c // obsolete since Darwin v17 (High Sierra)
+#define gtod_ns_base 0x70 // obsolete since Darwin v17 (High Sierra)
+#define gtod_sec_base 0x78 // obsolete since Darwin v17 (High Sierra)
+
+#define v17_gtod_ns_base 0xd0
+#define v17_gtod_sec_ofs 0xd8
+#define v17_gtod_frac_ofs 0xe0
+#define v17_gtod_scale 0xe8
+#define v17_gtod_tkspersec 0xf0
TEXT runtime·nanotime(SB),NOSPLIT,$0-8
MOVQ $0x7fffffe00000, BP /* comm page base */
@@ -151,6 +171,75 @@
// are used in the systime fallback, as the timeval address
// filled in by the system call.
MOVQ $0x7fffffe00000, BP /* comm page base */
+ CMPQ runtime·darwinVersion(SB), $17
+ JB legacy /* sierra and older */
+
+ // This is the new code, for macOS High Sierra (Darwin v17) and newer.
+v17:
+ // Loop trying to take a consistent snapshot
+ // of the time parameters.
+timeloop17:
+ MOVQ v17_gtod_ns_base(BP), R12
+
+ MOVL nt_generation(BP), CX
+ TESTL CX, CX
+ JZ timeloop17
+ RDTSC
+ MOVQ nt_tsc_base(BP), SI
+ MOVL nt_scale(BP), DI
+ MOVQ nt_ns_base(BP), BX
+ CMPL nt_generation(BP), CX
+ JNE timeloop17
+
+ MOVQ v17_gtod_sec_ofs(BP), R8
+ MOVQ v17_gtod_frac_ofs(BP), R9
+ MOVQ v17_gtod_scale(BP), R10
+ MOVQ v17_gtod_tkspersec(BP), R11
+ CMPQ v17_gtod_ns_base(BP), R12
+ JNE timeloop17
+
+ // Compute monotonic time
+ // mono = ((tsc - nt_tsc_base) * nt_scale) >> 32 + nt_ns_base
+ // The multiply and shift extracts the top 64 bits of the 96-bit product.
+ SHLQ $32, DX
+ ADDQ DX, AX
+ SUBQ SI, AX
+ MULQ DI
+ SHRQ $32, AX:DX
+ ADDQ BX, AX
+
+ // Subtract startNano base to return the monotonic runtime timer
+ // which is an offset from process boot.
+ MOVQ AX, BX
+ MOVQ runtime·startNano(SB), CX
+ SUBQ CX, BX
+ MOVQ BX, monotonic+16(FP)
+
+ // Now compute the 128-bit wall time:
+ // wall = ((mono - gtod_ns_base) * gtod_scale) + gtod_offs
+ // The parameters are updated every second, so if we found them
+ // outdated (that is, more than one second is passed from the ns base),
+ // fallback to the syscall.
+ TESTQ R12, R12
+ JZ systime
+ SUBQ R12, AX
+ CMPQ R11, AX
+ JB systime
+ MULQ R10
+ ADDQ R9, AX
+ ADCQ R8, DX
+
+ // Convert the 128-bit wall time into (sec,nsec).
+ // High part (seconds) is already good to go, while low part
+ // (fraction of seconds) must be converted to nanoseconds.
+ MOVQ DX, sec+0(FP)
+ MOVQ $1000000000, CX
+ MULQ CX
+ MOVQ DX, nsec+8(FP)
+ RET
+
+ // This is the legacy code needed for macOS Sierra (Darwin v16) and older.
+legacy:
// Loop trying to take a consistent snapshot
// of the time parameters.
timeloop:
@@ -283,7 +372,13 @@
MOVL off+28(FP), R9 // arg 6 offset
MOVL $(0x2000000+197), AX // syscall entry
SYSCALL
- MOVQ AX, ret+32(FP)
+ JCC ok
+ MOVQ $0, p+32(FP)
+ MOVQ AX, err+40(FP)
+ RET
+ok:
+ MOVQ AX, p+32(FP)
+ MOVQ $0, err+40(FP)
RET
TEXT runtime·munmap(SB),NOSPLIT,$0
@@ -375,7 +470,7 @@
MOVQ CX, g_m(AX)
CALL runtime·stackcheck(SB) // smashes AX, CX
CALL DX // fn
- CALL runtime·exit1(SB)
+ CALL exit1<>(SB)
RET
// func bsdthread_register() int32
diff --git a/src/runtime/sys_darwin_arm.s b/src/runtime/sys_darwin_arm.s
index ea559b5..1ad904f 100644
--- a/src/runtime/sys_darwin_arm.s
+++ b/src/runtime/sys_darwin_arm.s
@@ -19,7 +19,6 @@
#define SYS_mmap 197
#define SYS_munmap 73
#define SYS_madvise 75
-#define SYS_mincore 78
#define SYS_gettimeofday 116
#define SYS_kill 37
#define SYS_getpid 20
@@ -90,13 +89,32 @@
// Exit this OS thread (like pthread_exit, which eventually
// calls __bsdthread_terminate).
-TEXT runtime·exit1(SB),NOSPLIT,$0
+TEXT exit1<>(SB),NOSPLIT,$0
+ // Because of exitThread below, this must not use the stack.
+ // __bsdthread_terminate takes 4 word-size arguments.
+ // Set them all to 0. (None are an exit status.)
+ MOVW $0, R0
+ MOVW $0, R1
+ MOVW $0, R2
+ MOVW $0, R3
MOVW $SYS_bsdthread_terminate, R12
SWI $0x80
MOVW $1234, R0
MOVW $1003, R1
MOVW R0, (R1) // fail hard
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$0-4
+ MOVW wait+0(FP), R0
+ // We're done using the stack.
+ MOVW $0, R2
+storeloop:
+ LDREX (R0), R4 // loads R4
+ STREX R2, (R0), R1 // stores R2
+ CMP $0, R1
+ BNE storeloop
+ JMP exit1<>(SB)
+
TEXT runtime·raise(SB),NOSPLIT,$0
// Ideally we'd send the signal to the current thread,
// not the whole process, but that's too hard on OS X.
@@ -122,7 +140,14 @@
MOVW $0, R6 // off_t is uint64_t
MOVW $SYS_mmap, R12
SWI $0x80
- MOVW R0, ret+24(FP)
+ MOVW $0, R1
+ BCC ok
+ MOVW R1, p+24(FP)
+ MOVW R0, err+28(FP)
+ RET
+ok:
+ MOVW R0, p+24(FP)
+ MOVW R1, err+28(FP)
RET
TEXT runtime·munmap(SB),NOSPLIT,$0
@@ -150,15 +175,6 @@
SWI $0x80
RET
-TEXT runtime·mincore(SB),NOSPLIT,$0
- MOVW addr+0(FP), R0
- MOVW n+4(FP), R1
- MOVW dst+8(FP), R2
- MOVW $SYS_mincore, R12
- SWI $0x80
- MOVW R0, ret+12(FP)
- RET
-
TEXT runtime·walltime(SB), 7, $32
MOVW $8(R13), R0 // timeval
MOVW $0, R1 // zone
@@ -380,7 +396,7 @@
EOR R12, R12
WORD $0xeee1ca10 // fmxr fpscr, ip
BL (R2) // fn
- BL runtime·exit1(SB)
+ BL exit1<>(SB)
RET
// int32 bsdthread_register(void)
diff --git a/src/runtime/sys_darwin_arm64.s b/src/runtime/sys_darwin_arm64.s
index 0e91d5b..5663af5 100644
--- a/src/runtime/sys_darwin_arm64.s
+++ b/src/runtime/sys_darwin_arm64.s
@@ -19,7 +19,6 @@
#define SYS_mmap 197
#define SYS_munmap 73
#define SYS_madvise 75
-#define SYS_mincore 78
#define SYS_gettimeofday 116
#define SYS_kill 37
#define SYS_getpid 20
@@ -90,13 +89,28 @@
// Exit this OS thread (like pthread_exit, which eventually
// calls __bsdthread_terminate).
-TEXT runtime·exit1(SB),NOSPLIT,$0
+TEXT exit1<>(SB),NOSPLIT,$0
+ // Because of exitThread below, this must not use the stack.
+ // __bsdthread_terminate takes 4 word-size arguments.
+ // Set them all to 0. (None are an exit status.)
+ MOVW $0, R0
+ MOVW $0, R1
+ MOVW $0, R2
+ MOVW $0, R3
MOVW $SYS_bsdthread_terminate, R16
SVC $0x80
MOVD $1234, R0
MOVD $1003, R1
MOVD R0, (R1) // fail hard
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$0-8
+ MOVD wait+0(FP), R0
+ // We're done using the stack.
+ MOVW $0, R1
+ STLRW R1, (R0)
+ JMP exit1<>(SB)
+
TEXT runtime·raise(SB),NOSPLIT,$0
// Ideally we'd send the signal to the current thread,
// not the whole process, but that's too hard on OS X.
@@ -121,7 +135,13 @@
MOVW off+28(FP), R5
MOVW $SYS_mmap, R16
SVC $0x80
- MOVD R0, ret+32(FP)
+ BCC ok
+ MOVD $0, p+32(FP)
+ MOVD R0, err+40(FP)
+ RET
+ok:
+ MOVD R0, p+32(FP)
+ MOVD $0, err+40(FP)
RET
TEXT runtime·munmap(SB),NOSPLIT,$0
diff --git a/src/runtime/sys_dragonfly_amd64.s b/src/runtime/sys_dragonfly_amd64.s
index f355268..813f1f4 100644
--- a/src/runtime/sys_dragonfly_amd64.s
+++ b/src/runtime/sys_dragonfly_amd64.s
@@ -64,12 +64,18 @@
MOVL $0xf1, 0xf1 // crash
RET
-TEXT runtime·exit1(SB),NOSPLIT,$-8
- MOVL code+0(FP), DI // arg 1 exit status
- MOVL $431, AX
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$0-8
+ MOVQ wait+0(FP), AX
+ // We're done using the stack.
+ MOVL $0, (AX)
+ MOVL $0x10000, DI // arg 1 how - EXTEXIT_LWP
+ MOVL $0, SI // arg 2 status
+ MOVL $0, DX // arg 3 addr
+ MOVL $494, AX // extexit
SYSCALL
MOVL $0xf1, 0xf1 // crash
- RET
+ JMP 0(PC)
TEXT runtime·open(SB),NOSPLIT,$-8
MOVQ name+0(FP), DI // arg 1 pathname
@@ -236,8 +242,15 @@
MOVQ $0, R9 // arg 6 - pad
MOVL $197, AX
SYSCALL
+ JCC ok
ADDQ $16, SP
- MOVQ AX, ret+32(FP)
+ MOVQ $0, p+32(FP)
+ MOVQ AX, err+40(FP)
+ RET
+ok:
+ ADDQ $16, SP
+ MOVQ AX, p+32(FP)
+ MOVQ $0, err+40(FP)
RET
TEXT runtime·munmap(SB),NOSPLIT,$0
diff --git a/src/runtime/sys_freebsd_386.s b/src/runtime/sys_freebsd_386.s
index 0f5df21..bef8e32 100644
--- a/src/runtime/sys_freebsd_386.s
+++ b/src/runtime/sys_freebsd_386.s
@@ -52,12 +52,23 @@
MOVL $0xf1, 0xf1 // crash
RET
-TEXT runtime·exit1(SB),NOSPLIT,$-4
- MOVL $431, AX
+GLOBL exitStack<>(SB),RODATA,$8
+DATA exitStack<>+0x00(SB)/4, $0
+DATA exitStack<>+0x04(SB)/4, $0
+
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$0-4
+ MOVL wait+0(FP), AX
+ // We're done using the stack.
+ MOVL $0, (AX)
+ // thr_exit takes a single pointer argument, which it expects
+ // on the stack. We want to pass 0, so switch over to a fake
+ // stack of 0s. It won't write to the stack.
+ MOVL $exitStack<>(SB), SP
+ MOVL $431, AX // thr_exit
INT $0x80
- JAE 2(PC)
MOVL $0xf1, 0xf1 // crash
- RET
+ JMP 0(PC)
TEXT runtime·open(SB),NOSPLIT,$-4
MOVL $5, AX
@@ -138,7 +149,13 @@
STOSL
MOVL $477, AX
INT $0x80
- MOVL AX, ret+24(FP)
+ JAE ok
+ MOVL $0, p+24(FP)
+ MOVL AX, err+28(FP)
+ RET
+ok:
+ MOVL AX, p+24(FP)
+ MOVL $0, err+28(FP)
RET
TEXT runtime·munmap(SB),NOSPLIT,$-4
diff --git a/src/runtime/sys_freebsd_amd64.s b/src/runtime/sys_freebsd_amd64.s
index 5d072a9..7499931 100644
--- a/src/runtime/sys_freebsd_amd64.s
+++ b/src/runtime/sys_freebsd_amd64.s
@@ -54,12 +54,16 @@
MOVL $0xf1, 0xf1 // crash
RET
-TEXT runtime·exit1(SB),NOSPLIT,$-8
- MOVL code+0(FP), DI // arg 1 exit status
- MOVL $431, AX
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$0-8
+ MOVQ wait+0(FP), AX
+ // We're done using the stack.
+ MOVL $0, (AX)
+ MOVL $0, DI // arg 1 long *state
+ MOVL $431, AX // thr_exit
SYSCALL
MOVL $0xf1, 0xf1 // crash
- RET
+ JMP 0(PC)
TEXT runtime·open(SB),NOSPLIT,$-8
MOVQ name+0(FP), DI // arg 1 pathname
@@ -229,7 +233,13 @@
MOVL off+28(FP), R9 // arg 6 offset
MOVL $477, AX
SYSCALL
- MOVQ AX, ret+32(FP)
+ JCC ok
+ MOVQ $0, p+32(FP)
+ MOVQ AX, err+40(FP)
+ RET
+ok:
+ MOVQ AX, p+32(FP)
+ MOVQ $0, err+40(FP)
RET
TEXT runtime·munmap(SB),NOSPLIT,$0
diff --git a/src/runtime/sys_freebsd_arm.s b/src/runtime/sys_freebsd_arm.s
index 2851587..3f52864 100644
--- a/src/runtime/sys_freebsd_arm.s
+++ b/src/runtime/sys_freebsd_arm.s
@@ -82,13 +82,22 @@
MOVW.CS R8, (R8)
RET
-TEXT runtime·exit1(SB),NOSPLIT,$-8
- MOVW code+0(FP), R0 // arg 1 exit status
- MOVW $SYS_thr_exit, R7
- SWI $0
- MOVW.CS $0, R8 // crash on syscall failure
- MOVW.CS R8, (R8)
- RET
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$0-4
+ MOVW wait+0(FP), R0
+ // We're done using the stack.
+ MOVW $0, R2
+storeloop:
+ LDREX (R0), R4 // loads R4
+ STREX R2, (R0), R1 // stores R2
+ CMP $0, R1
+ BNE storeloop
+ MOVW $0, R0 // arg 1 long *state
+ MOVW $SYS_thr_exit, R7
+ SWI $0
+ MOVW.CS $0, R8 // crash on syscall failure
+ MOVW.CS R8, (R8)
+ JMP 0(PC)
TEXT runtime·open(SB),NOSPLIT,$-8
MOVW name+0(FP), R0 // arg 1 name
@@ -249,8 +258,11 @@
MOVW $SYS_mmap, R7
SWI $0
SUB $4, R13
- // TODO(dfc) error checking ?
- MOVW R0, ret+24(FP)
+ MOVW $0, R1
+ MOVW.CS R0, R1 // if failed, put in R1
+ MOVW.CS $0, R0
+ MOVW R0, p+24(FP)
+ MOVW R1, err+28(FP)
RET
TEXT runtime·munmap(SB),NOSPLIT,$0
diff --git a/src/runtime/sys_linux_386.s b/src/runtime/sys_linux_386.s
index a3baeba..bc3b8db 100644
--- a/src/runtime/sys_linux_386.s
+++ b/src/runtime/sys_linux_386.s
@@ -24,22 +24,70 @@
//#define INVOKE_SYSCALL CALL 0x10(GS) // non-portable
#define INVOKE_SYSCALL INT $0x80
+#define SYS_exit 1
+#define SYS_read 3
+#define SYS_write 4
+#define SYS_open 5
+#define SYS_close 6
+#define SYS_getpid 20
+#define SYS_access 33
+#define SYS_kill 37
+#define SYS_brk 45
+#define SYS_fcntl 55
+#define SYS_munmap 91
+#define SYS_socketcall 102
+#define SYS_setittimer 104
+#define SYS_clone 120
+#define SYS_sched_yield 158
+#define SYS_rt_sigreturn 173
+#define SYS_rt_sigaction 174
+#define SYS_rt_sigprocmask 175
+#define SYS_sigaltstack 186
+#define SYS_ugetrlimit 191
+#define SYS_mmap2 192
+#define SYS_mincore 218
+#define SYS_madvise 219
+#define SYS_gettid 224
+#define SYS_tkill 238
+#define SYS_futex 240
+#define SYS_sched_getaffinity 242
+#define SYS_set_thread_area 243
+#define SYS_exit_group 252
+#define SYS_epoll_create 254
+#define SYS_epoll_ctl 255
+#define SYS_epoll_wait 256
+#define SYS_clock_gettime 265
+#define SYS_pselect6 308
+#define SYS_epoll_create1 329
+
TEXT runtime·exit(SB),NOSPLIT,$0
- MOVL $252, AX // syscall number
+ MOVL $SYS_exit_group, AX
MOVL code+0(FP), BX
INVOKE_SYSCALL
INT $3 // not reached
RET
-TEXT runtime·exit1(SB),NOSPLIT,$0
- MOVL $1, AX // exit - exit the current os thread
+TEXT exit1<>(SB),NOSPLIT,$0
+ MOVL $SYS_exit, AX
MOVL code+0(FP), BX
INVOKE_SYSCALL
INT $3 // not reached
RET
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$0-4
+ MOVL wait+0(FP), AX
+ // We're done using the stack.
+ MOVL $0, (AX)
+ MOVL $1, AX // exit (just this thread)
+ MOVL $0, BX // exit code
+ INT $0x80 // no stack; must not use CALL
+ // We may not even have a stack any more.
+ INT $3
+ JMP 0(PC)
+
TEXT runtime·open(SB),NOSPLIT,$0
- MOVL $5, AX // syscall - open
+ MOVL $SYS_open, AX
MOVL name+0(FP), BX
MOVL mode+4(FP), CX
MOVL perm+8(FP), DX
@@ -51,7 +99,7 @@
RET
TEXT runtime·closefd(SB),NOSPLIT,$0
- MOVL $6, AX // syscall - close
+ MOVL $SYS_close, AX
MOVL fd+0(FP), BX
INVOKE_SYSCALL
CMPL AX, $0xfffff001
@@ -61,7 +109,7 @@
RET
TEXT runtime·write(SB),NOSPLIT,$0
- MOVL $4, AX // syscall - write
+ MOVL $SYS_write, AX
MOVL fd+0(FP), BX
MOVL p+4(FP), CX
MOVL n+8(FP), DX
@@ -73,7 +121,7 @@
RET
TEXT runtime·read(SB),NOSPLIT,$0
- MOVL $3, AX // syscall - read
+ MOVL $SYS_read, AX
MOVL fd+0(FP), BX
MOVL p+4(FP), CX
MOVL n+8(FP), DX
@@ -85,7 +133,7 @@
RET
TEXT runtime·getrlimit(SB),NOSPLIT,$0
- MOVL $191, AX // syscall - ugetrlimit
+ MOVL $SYS_ugetrlimit, AX
MOVL kind+0(FP), BX
MOVL limit+4(FP), CX
INVOKE_SYSCALL
@@ -103,7 +151,7 @@
MOVL AX, 4(SP)
// pselect6(0, 0, 0, 0, &ts, 0)
- MOVL $308, AX
+ MOVL $SYS_pselect6, AX
MOVL $0, BX
MOVL $0, CX
MOVL $0, DX
@@ -114,31 +162,31 @@
RET
TEXT runtime·gettid(SB),NOSPLIT,$0-4
- MOVL $224, AX // syscall - gettid
+ MOVL $SYS_gettid, AX
INVOKE_SYSCALL
MOVL AX, ret+0(FP)
RET
TEXT runtime·raise(SB),NOSPLIT,$12
- MOVL $224, AX // syscall - gettid
+ MOVL $SYS_gettid, AX
INVOKE_SYSCALL
MOVL AX, BX // arg 1 tid
MOVL sig+0(FP), CX // arg 2 signal
- MOVL $238, AX // syscall - tkill
+ MOVL $SYS_tkill, AX
INVOKE_SYSCALL
RET
TEXT runtime·raiseproc(SB),NOSPLIT,$12
- MOVL $20, AX // syscall - getpid
+ MOVL $SYS_getpid, AX
INVOKE_SYSCALL
MOVL AX, BX // arg 1 pid
MOVL sig+0(FP), CX // arg 2 signal
- MOVL $37, AX // syscall - kill
+ MOVL $SYS_kill, AX
INVOKE_SYSCALL
RET
TEXT runtime·setitimer(SB),NOSPLIT,$0-12
- MOVL $104, AX // syscall - setitimer
+ MOVL $SYS_setittimer, AX
MOVL mode+0(FP), BX
MOVL new+4(FP), CX
MOVL old+8(FP), DX
@@ -146,7 +194,7 @@
RET
TEXT runtime·mincore(SB),NOSPLIT,$0-16
- MOVL $218, AX // syscall - mincore
+ MOVL $SYS_mincore, AX
MOVL addr+0(FP), BX
MOVL n+4(FP), CX
MOVL dst+8(FP), DX
@@ -155,15 +203,56 @@
RET
// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB), NOSPLIT, $32
- MOVL $265, AX // syscall - clock_gettime
+TEXT runtime·walltime(SB), NOSPLIT, $0-12
+ // We don't know how much stack space the VDSO code will need,
+ // so switch to g0.
+
+ MOVL SP, BP // Save old SP; BP unchanged by C code.
+
+ get_tls(CX)
+ MOVL g(CX), AX
+ MOVL g_m(AX), CX
+ MOVL m_curg(CX), DX
+
+ CMPL AX, DX // Only switch if on curg.
+ JNE noswitch
+
+ MOVL m_g0(CX), DX
+ MOVL (g_sched+gobuf_sp)(DX), SP // Set SP to g0 stack
+
+noswitch:
+ SUBL $16, SP // Space for results
+ ANDL $~15, SP // Align for C code
+
+ // Stack layout, depending on call path:
+ // x(SP) vDSO INVOKE_SYSCALL
+ // 12 ts.tv_nsec ts.tv_nsec
+ // 8 ts.tv_sec ts.tv_sec
+ // 4 &ts -
+ // 0 CLOCK_<id> -
+
+ MOVL runtime·__vdso_clock_gettime_sym(SB), AX
+ CMPL AX, $0
+ JEQ fallback
+
+ LEAL 8(SP), BX // &ts (struct timespec)
+ MOVL BX, 4(SP)
+ MOVL $0, 0(SP) // CLOCK_REALTIME
+ CALL AX
+ JMP finish
+
+fallback:
+ MOVL $SYS_clock_gettime, AX
MOVL $0, BX // CLOCK_REALTIME
LEAL 8(SP), CX
- MOVL $0, DX
INVOKE_SYSCALL
+
+finish:
MOVL 8(SP), AX // sec
MOVL 12(SP), BX // nsec
+ MOVL BP, SP // Restore real SP
+
// sec is in AX, nsec in BX
MOVL AX, sec_lo+0(FP)
MOVL $0, sec_hi+4(FP)
@@ -172,15 +261,48 @@
// int64 nanotime(void) so really
// void nanotime(int64 *nsec)
-TEXT runtime·nanotime(SB), NOSPLIT, $32
- MOVL $265, AX // syscall - clock_gettime
+TEXT runtime·nanotime(SB), NOSPLIT, $0-8
+ // Switch to g0 stack. See comment above in runtime·walltime.
+
+ MOVL SP, BP // Save old SP; BP unchanged by C code.
+
+ get_tls(CX)
+ MOVL g(CX), AX
+ MOVL g_m(AX), CX
+ MOVL m_curg(CX), DX
+
+ CMPL AX, DX // Only switch if on curg.
+ JNE noswitch
+
+ MOVL m_g0(CX), DX
+ MOVL (g_sched+gobuf_sp)(DX), SP // Set SP to g0 stack
+
+noswitch:
+ SUBL $16, SP // Space for results
+ ANDL $~15, SP // Align for C code
+
+ MOVL runtime·__vdso_clock_gettime_sym(SB), AX
+ CMPL AX, $0
+ JEQ fallback
+
+ LEAL 8(SP), BX // &ts (struct timespec)
+ MOVL BX, 4(SP)
+ MOVL $1, 0(SP) // CLOCK_MONOTONIC
+ CALL AX
+ JMP finish
+
+fallback:
+ MOVL $SYS_clock_gettime, AX
MOVL $1, BX // CLOCK_MONOTONIC
LEAL 8(SP), CX
- MOVL $0, DX
INVOKE_SYSCALL
+
+finish:
MOVL 8(SP), AX // sec
MOVL 12(SP), BX // nsec
+ MOVL BP, SP // Restore real SP
+
// sec is in AX, nsec in BX
// convert to DX:AX nsec
MOVL $1000000000, CX
@@ -193,7 +315,7 @@
RET
TEXT runtime·rtsigprocmask(SB),NOSPLIT,$0
- MOVL $175, AX // syscall entry
+ MOVL $SYS_rt_sigprocmask, AX
MOVL how+0(FP), BX
MOVL new+4(FP), CX
MOVL old+8(FP), DX
@@ -205,7 +327,7 @@
RET
TEXT runtime·rt_sigaction(SB),NOSPLIT,$0
- MOVL $174, AX // syscall - rt_sigaction
+ MOVL $SYS_rt_sigaction, AX
MOVL sig+0(FP), BX
MOVL new+4(FP), CX
MOVL old+8(FP), DX
@@ -258,7 +380,7 @@
JMP runtime·sigtramp(SB)
TEXT runtime·sigreturn(SB),NOSPLIT,$0
- MOVL $173, AX // rt_sigreturn
+ MOVL $SYS_rt_sigreturn, AX
// Sigreturn expects same SP as signal handler,
// so cannot CALL 0x10(GS) here.
INT $0x80
@@ -266,7 +388,7 @@
RET
TEXT runtime·mmap(SB),NOSPLIT,$0
- MOVL $192, AX // mmap2
+ MOVL $SYS_mmap2, AX
MOVL addr+0(FP), BX
MOVL n+4(FP), CX
MOVL prot+8(FP), DX
@@ -276,14 +398,19 @@
SHRL $12, BP
INVOKE_SYSCALL
CMPL AX, $0xfffff001
- JLS 3(PC)
+ JLS ok
NOTL AX
INCL AX
- MOVL AX, ret+24(FP)
+ MOVL $0, p+24(FP)
+ MOVL AX, err+28(FP)
+ RET
+ok:
+ MOVL AX, p+24(FP)
+ MOVL $0, err+28(FP)
RET
TEXT runtime·munmap(SB),NOSPLIT,$0
- MOVL $91, AX // munmap
+ MOVL $SYS_munmap, AX
MOVL addr+0(FP), BX
MOVL n+4(FP), CX
INVOKE_SYSCALL
@@ -293,7 +420,7 @@
RET
TEXT runtime·madvise(SB),NOSPLIT,$0
- MOVL $219, AX // madvise
+ MOVL $SYS_madvise, AX
MOVL addr+0(FP), BX
MOVL n+4(FP), CX
MOVL flags+8(FP), DX
@@ -304,7 +431,7 @@
// int32 futex(int32 *uaddr, int32 op, int32 val,
// struct timespec *timeout, int32 *uaddr2, int32 val2);
TEXT runtime·futex(SB),NOSPLIT,$0
- MOVL $240, AX // futex
+ MOVL $SYS_futex, AX
MOVL addr+0(FP), BX
MOVL op+4(FP), CX
MOVL val+8(FP), DX
@@ -317,7 +444,7 @@
// int32 clone(int32 flags, void *stack, M *mp, G *gp, void (*fn)(void));
TEXT runtime·clone(SB),NOSPLIT,$0
- MOVL $120, AX // clone
+ MOVL $SYS_clone, AX
MOVL flags+0(FP), BX
MOVL stk+4(FP), CX
MOVL $0, DX // parent tid ptr
@@ -351,7 +478,7 @@
INT $3
// Initialize AX to Linux tid
- MOVL $224, AX
+ MOVL $SYS_gettid, AX
INVOKE_SYSCALL
MOVL 0(SP), BX // m
@@ -396,11 +523,11 @@
nog:
CALL SI // fn()
- CALL runtime·exit1(SB)
+ CALL exit1<>(SB)
MOVL $0x1234, 0x1005
TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
- MOVL $186, AX // sigaltstack
+ MOVL $SYS_sigaltstack, AX
MOVL new+0(FP), BX
MOVL old+4(FP), CX
INVOKE_SYSCALL
@@ -483,7 +610,7 @@
// call set_thread_area
MOVL AX, BX // user_desc
- MOVL $243, AX // syscall - set_thread_area
+ MOVL $SYS_set_thread_area, AX
// We can't call this via 0x10(GS) because this is called from setldt0 to set that up.
INT $0x80
@@ -509,12 +636,12 @@
RET
TEXT runtime·osyield(SB),NOSPLIT,$0
- MOVL $158, AX
+ MOVL $SYS_sched_yield, AX
INVOKE_SYSCALL
RET
TEXT runtime·sched_getaffinity(SB),NOSPLIT,$0
- MOVL $242, AX // syscall - sched_getaffinity
+ MOVL $SYS_sched_getaffinity, AX
MOVL pid+0(FP), BX
MOVL len+4(FP), CX
MOVL buf+8(FP), DX
@@ -524,7 +651,7 @@
// int32 runtime·epollcreate(int32 size);
TEXT runtime·epollcreate(SB),NOSPLIT,$0
- MOVL $254, AX
+ MOVL $SYS_epoll_create, AX
MOVL size+0(FP), BX
INVOKE_SYSCALL
MOVL AX, ret+4(FP)
@@ -532,7 +659,7 @@
// int32 runtime·epollcreate1(int32 flags);
TEXT runtime·epollcreate1(SB),NOSPLIT,$0
- MOVL $329, AX
+ MOVL $SYS_epoll_create1, AX
MOVL flags+0(FP), BX
INVOKE_SYSCALL
MOVL AX, ret+4(FP)
@@ -540,7 +667,7 @@
// func epollctl(epfd, op, fd int32, ev *epollEvent) int
TEXT runtime·epollctl(SB),NOSPLIT,$0
- MOVL $255, AX
+ MOVL $SYS_epoll_ctl, AX
MOVL epfd+0(FP), BX
MOVL op+4(FP), CX
MOVL fd+8(FP), DX
@@ -551,7 +678,7 @@
// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout);
TEXT runtime·epollwait(SB),NOSPLIT,$0
- MOVL $256, AX
+ MOVL $SYS_epoll_wait, AX
MOVL epfd+0(FP), BX
MOVL ev+4(FP), CX
MOVL nev+8(FP), DX
@@ -562,7 +689,7 @@
// void runtime·closeonexec(int32 fd);
TEXT runtime·closeonexec(SB),NOSPLIT,$0
- MOVL $55, AX // fcntl
+ MOVL $SYS_fcntl, AX
MOVL fd+0(FP), BX // fd
MOVL $2, CX // F_SETFD
MOVL $1, DX // FD_CLOEXEC
@@ -571,7 +698,7 @@
// int access(const char *name, int mode)
TEXT runtime·access(SB),NOSPLIT,$0
- MOVL $33, AX // syscall - access
+ MOVL $SYS_access, AX
MOVL name+0(FP), BX
MOVL mode+4(FP), CX
INVOKE_SYSCALL
@@ -582,7 +709,7 @@
TEXT runtime·connect(SB),NOSPLIT,$0-16
// connect is implemented as socketcall(NR_socket, 3, *(rest of args))
// stack already should have fd, addr, addrlen.
- MOVL $102, AX // syscall - socketcall
+ MOVL $SYS_socketcall, AX
MOVL $3, BX // connect
LEAL fd+0(FP), CX
INVOKE_SYSCALL
@@ -593,7 +720,7 @@
TEXT runtime·socket(SB),NOSPLIT,$0-16
// socket is implemented as socketcall(NR_socket, 1, *(rest of args))
// stack already should have domain, type, protocol.
- MOVL $102, AX // syscall - socketcall
+ MOVL $SYS_socketcall, AX
MOVL $1, BX // socket
LEAL domain+0(FP), CX
INVOKE_SYSCALL
@@ -603,7 +730,7 @@
// func sbrk0() uintptr
TEXT runtime·sbrk0(SB),NOSPLIT,$0-4
// Implemented as brk(NULL).
- MOVL $45, AX // syscall - brk
+ MOVL $SYS_brk, AX
MOVL $0, BX // NULL
INVOKE_SYSCALL
MOVL AX, ret+0(FP)
diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s
index e0dc3e1..5a94bda 100644
--- a/src/runtime/sys_linux_amd64.s
+++ b/src/runtime/sys_linux_amd64.s
@@ -10,23 +10,65 @@
#include "go_tls.h"
#include "textflag.h"
+#define SYS_read 0
+#define SYS_write 1
+#define SYS_open 2
+#define SYS_close 3
+#define SYS_mmap 9
+#define SYS_munmap 11
+#define SYS_brk 12
+#define SYS_rt_sigaction 13
+#define SYS_rt_sigprocmask 14
+#define SYS_rt_sigreturn 15
+#define SYS_access 21
+#define SYS_sched_yield 24
+#define SYS_mincore 27
+#define SYS_madvise 28
+#define SYS_setittimer 38
+#define SYS_getpid 39
+#define SYS_socket 41
+#define SYS_connect 42
+#define SYS_clone 56
+#define SYS_exit 60
+#define SYS_kill 62
+#define SYS_fcntl 72
+#define SYS_getrlimit 97
+#define SYS_sigaltstack 131
+#define SYS_arch_prctl 158
+#define SYS_gettid 186
+#define SYS_tkill 200
+#define SYS_futex 202
+#define SYS_sched_getaffinity 204
+#define SYS_epoll_create 213
+#define SYS_exit_group 231
+#define SYS_epoll_wait 232
+#define SYS_epoll_ctl 233
+#define SYS_pselect6 270
+#define SYS_epoll_create1 291
+
TEXT runtime·exit(SB),NOSPLIT,$0-4
MOVL code+0(FP), DI
- MOVL $231, AX // exitgroup - force all os threads to exit
+ MOVL $SYS_exit_group, AX
SYSCALL
RET
-TEXT runtime·exit1(SB),NOSPLIT,$0-4
- MOVL code+0(FP), DI
- MOVL $60, AX // exit - exit the current os thread
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$0-8
+ MOVQ wait+0(FP), AX
+ // We're done using the stack.
+ MOVL $0, (AX)
+ MOVL $0, DI // exit code
+ MOVL $SYS_exit, AX
SYSCALL
- RET
+ // We may not even have a stack any more.
+ INT $3
+ JMP 0(PC)
TEXT runtime·open(SB),NOSPLIT,$0-20
MOVQ name+0(FP), DI
MOVL mode+8(FP), SI
MOVL perm+12(FP), DX
- MOVL $2, AX // syscall entry
+ MOVL $SYS_open, AX
SYSCALL
CMPQ AX, $0xfffffffffffff001
JLS 2(PC)
@@ -36,7 +78,7 @@
TEXT runtime·closefd(SB),NOSPLIT,$0-12
MOVL fd+0(FP), DI
- MOVL $3, AX // syscall entry
+ MOVL $SYS_close, AX
SYSCALL
CMPQ AX, $0xfffffffffffff001
JLS 2(PC)
@@ -48,7 +90,7 @@
MOVQ fd+0(FP), DI
MOVQ p+8(FP), SI
MOVL n+16(FP), DX
- MOVL $1, AX // syscall entry
+ MOVL $SYS_write, AX
SYSCALL
CMPQ AX, $0xfffffffffffff001
JLS 2(PC)
@@ -60,7 +102,7 @@
MOVL fd+0(FP), DI
MOVQ p+8(FP), SI
MOVL n+16(FP), DX
- MOVL $0, AX // syscall entry
+ MOVL $SYS_read, AX
SYSCALL
CMPQ AX, $0xfffffffffffff001
JLS 2(PC)
@@ -71,7 +113,7 @@
TEXT runtime·getrlimit(SB),NOSPLIT,$0-20
MOVL kind+0(FP), DI
MOVQ limit+8(FP), SI
- MOVL $97, AX // syscall entry
+ MOVL $SYS_getrlimit, AX
SYSCALL
MOVL AX, ret+16(FP)
RET
@@ -93,31 +135,31 @@
MOVL $0, R10
MOVQ SP, R8
MOVL $0, R9
- MOVL $270, AX
+ MOVL $SYS_pselect6, AX
SYSCALL
RET
TEXT runtime·gettid(SB),NOSPLIT,$0-4
- MOVL $186, AX // syscall - gettid
+ MOVL $SYS_gettid, AX
SYSCALL
MOVL AX, ret+0(FP)
RET
TEXT runtime·raise(SB),NOSPLIT,$0
- MOVL $186, AX // syscall - gettid
+ MOVL $SYS_gettid, AX
SYSCALL
MOVL AX, DI // arg 1 tid
MOVL sig+0(FP), SI // arg 2
- MOVL $200, AX // syscall - tkill
+ MOVL $SYS_tkill, AX
SYSCALL
RET
TEXT runtime·raiseproc(SB),NOSPLIT,$0
- MOVL $39, AX // syscall - getpid
+ MOVL $SYS_getpid, AX
SYSCALL
MOVL AX, DI // arg 1 pid
MOVL sig+0(FP), SI // arg 2
- MOVL $62, AX // syscall - kill
+ MOVL $SYS_kill, AX
SYSCALL
RET
@@ -125,7 +167,7 @@
MOVL mode+0(FP), DI
MOVQ new+8(FP), SI
MOVQ old+16(FP), DX
- MOVL $38, AX // syscall entry
+ MOVL $SYS_setittimer, AX
SYSCALL
RET
@@ -133,17 +175,37 @@
MOVQ addr+0(FP), DI
MOVQ n+8(FP), SI
MOVQ dst+16(FP), DX
- MOVL $27, AX // syscall entry
+ MOVL $SYS_mincore, AX
SYSCALL
MOVL AX, ret+24(FP)
RET
// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB),NOSPLIT,$16
- // Be careful. We're calling a function with gcc calling convention here.
- // We're guaranteed 128 bytes on entry, and we've taken 16, and the
- // call uses another 8.
- // That leaves 104 for the gettime code to use. Hope that's enough!
+TEXT runtime·walltime(SB),NOSPLIT,$0-12
+ // We don't know how much stack space the VDSO code will need,
+ // so switch to g0.
+ // In particular, a kernel configured with CONFIG_OPTIMIZE_INLINING=n
+ // and hardening can use a full page of stack space in gettime_sym
+ // due to stack probes inserted to avoid stack/heap collisions.
+ // See issue #20427.
+
+ MOVQ SP, BP // Save old SP; BP unchanged by C code.
+
+ get_tls(CX)
+ MOVQ g(CX), AX
+ MOVQ g_m(AX), CX
+ MOVQ m_curg(CX), DX
+
+ CMPQ AX, DX // Only switch if on curg.
+ JNE noswitch
+
+ MOVQ m_g0(CX), DX
+ MOVQ (g_sched+gobuf_sp)(DX), SP // Set SP to g0 stack
+
+noswitch:
+ SUBQ $16, SP // Space for results
+ ANDQ $~15, SP // Align for C code
+
MOVQ runtime·__vdso_clock_gettime_sym(SB), AX
CMPQ AX, $0
JEQ fallback
@@ -152,6 +214,7 @@
CALL AX
MOVQ 0(SP), AX // sec
MOVQ 8(SP), DX // nsec
+ MOVQ BP, SP // Restore real SP
MOVQ AX, sec+0(FP)
MOVL DX, nsec+8(FP)
RET
@@ -163,13 +226,31 @@
MOVQ 0(SP), AX // sec
MOVL 8(SP), DX // usec
IMULQ $1000, DX
+ MOVQ BP, SP // Restore real SP
MOVQ AX, sec+0(FP)
MOVL DX, nsec+8(FP)
RET
-TEXT runtime·nanotime(SB),NOSPLIT,$16
- // Duplicate time.now here to avoid using up precious stack space.
- // See comment above in time.now.
+TEXT runtime·nanotime(SB),NOSPLIT,$0-8
+ // Switch to g0 stack. See comment above in runtime·walltime.
+
+ MOVQ SP, BP // Save old SP; BX unchanged by C code.
+
+ get_tls(CX)
+ MOVQ g(CX), AX
+ MOVQ g_m(AX), CX
+ MOVQ m_curg(CX), DX
+
+ CMPQ AX, DX // Only switch if on curg.
+ JNE noswitch
+
+ MOVQ m_g0(CX), DX
+ MOVQ (g_sched+gobuf_sp)(DX), SP // Set SP to g0 stack
+
+noswitch:
+ SUBQ $16, SP // Space for results
+ ANDQ $~15, SP // Align for C code
+
MOVQ runtime·__vdso_clock_gettime_sym(SB), AX
CMPQ AX, $0
JEQ fallback
@@ -178,6 +259,7 @@
CALL AX
MOVQ 0(SP), AX // sec
MOVQ 8(SP), DX // nsec
+ MOVQ BP, SP // Restore real SP
// sec is in AX, nsec in DX
// return nsec in AX
IMULQ $1000000000, AX
@@ -191,6 +273,7 @@
CALL AX
MOVQ 0(SP), AX // sec
MOVL 8(SP), DX // usec
+ MOVQ BP, SP // Restore real SP
IMULQ $1000, DX
// sec is in AX, nsec in DX
// return nsec in AX
@@ -204,7 +287,7 @@
MOVQ new+8(FP), SI
MOVQ old+16(FP), DX
MOVL size+24(FP), R10
- MOVL $14, AX // syscall entry
+ MOVL $SYS_rt_sigprocmask, AX
SYSCALL
CMPQ AX, $0xfffffffffffff001
JLS 2(PC)
@@ -216,7 +299,7 @@
MOVQ new+8(FP), SI
MOVQ old+16(FP), DX
MOVQ size+24(FP), R10
- MOVL $13, AX // syscall entry
+ MOVL $SYS_rt_sigaction, AX
SYSCALL
MOVL AX, ret+32(FP)
RET
@@ -354,7 +437,7 @@
// The code that cares about the precise instructions used is:
// https://gcc.gnu.org/viewcvs/gcc/trunk/libgcc/config/i386/linux-unwind.h?revision=219188&view=markup
TEXT runtime·sigreturn(SB),NOSPLIT,$0
- MOVQ $15, AX // rt_sigreturn
+ MOVQ $SYS_rt_sigreturn, AX
SYSCALL
INT $3 // not reached
@@ -366,13 +449,18 @@
MOVL fd+24(FP), R8
MOVL off+28(FP), R9
- MOVL $9, AX // mmap
+ MOVL $SYS_mmap, AX
SYSCALL
CMPQ AX, $0xfffffffffffff001
- JLS 3(PC)
+ JLS ok
NOTQ AX
INCQ AX
- MOVQ AX, ret+32(FP)
+ MOVQ $0, p+32(FP)
+ MOVQ AX, err+40(FP)
+ RET
+ok:
+ MOVQ AX, p+32(FP)
+ MOVQ $0, err+40(FP)
RET
// Call the function stored in _cgo_mmap using the GCC calling convention.
@@ -396,7 +484,7 @@
TEXT runtime·sysMunmap(SB),NOSPLIT,$0
MOVQ addr+0(FP), DI
MOVQ n+8(FP), SI
- MOVQ $11, AX // munmap
+ MOVQ $SYS_munmap, AX
SYSCALL
CMPQ AX, $0xfffffffffffff001
JLS 2(PC)
@@ -420,7 +508,7 @@
MOVQ addr+0(FP), DI
MOVQ n+8(FP), SI
MOVL flags+16(FP), DX
- MOVQ $28, AX // madvise
+ MOVQ $SYS_madvise, AX
SYSCALL
// ignore failure - maybe pages are locked
RET
@@ -434,7 +522,7 @@
MOVQ ts+16(FP), R10
MOVQ addr2+24(FP), R8
MOVL val3+32(FP), R9
- MOVL $202, AX
+ MOVL $SYS_futex, AX
SYSCALL
MOVL AX, ret+40(FP)
RET
@@ -452,7 +540,7 @@
MOVQ gp+24(FP), R9
MOVQ fn+32(FP), R12
- MOVL $56, AX
+ MOVL $SYS_clone, AX
SYSCALL
// In parent, return.
@@ -471,7 +559,7 @@
JEQ nog
// Initialize m->procid to Linux tid
- MOVL $186, AX // gettid
+ MOVL $SYS_gettid, AX
SYSCALL
MOVQ AX, m_procid(R8)
@@ -491,14 +579,14 @@
// It shouldn't return. If it does, exit that thread.
MOVL $111, DI
- MOVL $60, AX
+ MOVL $SYS_exit, AX
SYSCALL
JMP -3(PC) // keep exiting
TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
MOVQ new+0(FP), DI
MOVQ old+8(FP), SI
- MOVQ $131, AX
+ MOVQ $SYS_sigaltstack, AX
SYSCALL
CMPQ AX, $0xfffffffffffff001
JLS 2(PC)
@@ -517,7 +605,7 @@
#endif
MOVQ DI, SI
MOVQ $0x1002, DI // ARCH_SET_FS
- MOVQ $158, AX // arch_prctl
+ MOVQ $SYS_arch_prctl, AX
SYSCALL
CMPQ AX, $0xfffffffffffff001
JLS 2(PC)
@@ -525,7 +613,7 @@
RET
TEXT runtime·osyield(SB),NOSPLIT,$0
- MOVL $24, AX
+ MOVL $SYS_sched_yield, AX
SYSCALL
RET
@@ -533,7 +621,7 @@
MOVQ pid+0(FP), DI
MOVQ len+8(FP), SI
MOVQ buf+16(FP), DX
- MOVL $204, AX // syscall entry
+ MOVL $SYS_sched_getaffinity, AX
SYSCALL
MOVL AX, ret+24(FP)
RET
@@ -541,7 +629,7 @@
// int32 runtime·epollcreate(int32 size);
TEXT runtime·epollcreate(SB),NOSPLIT,$0
MOVL size+0(FP), DI
- MOVL $213, AX // syscall entry
+ MOVL $SYS_epoll_create, AX
SYSCALL
MOVL AX, ret+8(FP)
RET
@@ -549,7 +637,7 @@
// int32 runtime·epollcreate1(int32 flags);
TEXT runtime·epollcreate1(SB),NOSPLIT,$0
MOVL flags+0(FP), DI
- MOVL $291, AX // syscall entry
+ MOVL $SYS_epoll_create1, AX
SYSCALL
MOVL AX, ret+8(FP)
RET
@@ -560,7 +648,7 @@
MOVL op+4(FP), SI
MOVL fd+8(FP), DX
MOVQ ev+16(FP), R10
- MOVL $233, AX // syscall entry
+ MOVL $SYS_epoll_ctl, AX
SYSCALL
MOVL AX, ret+24(FP)
RET
@@ -571,7 +659,7 @@
MOVQ ev+8(FP), SI
MOVL nev+16(FP), DX
MOVL timeout+20(FP), R10
- MOVL $232, AX // syscall entry
+ MOVL $SYS_epoll_wait, AX
SYSCALL
MOVL AX, ret+24(FP)
RET
@@ -581,7 +669,7 @@
MOVL fd+0(FP), DI // fd
MOVQ $2, SI // F_SETFD
MOVQ $1, DX // FD_CLOEXEC
- MOVL $72, AX // fcntl
+ MOVL $SYS_fcntl, AX
SYSCALL
RET
@@ -590,7 +678,7 @@
TEXT runtime·access(SB),NOSPLIT,$0
MOVQ name+0(FP), DI
MOVL mode+8(FP), SI
- MOVL $21, AX // syscall entry
+ MOVL $SYS_access, AX
SYSCALL
MOVL AX, ret+16(FP)
RET
@@ -600,7 +688,7 @@
MOVL fd+0(FP), DI
MOVQ addr+8(FP), SI
MOVL len+16(FP), DX
- MOVL $42, AX // syscall entry
+ MOVL $SYS_connect, AX
SYSCALL
MOVL AX, ret+24(FP)
RET
@@ -610,7 +698,7 @@
MOVL domain+0(FP), DI
MOVL typ+4(FP), SI
MOVL prot+8(FP), DX
- MOVL $41, AX // syscall entry
+ MOVL $SYS_socket, AX
SYSCALL
MOVL AX, ret+16(FP)
RET
@@ -619,7 +707,7 @@
TEXT runtime·sbrk0(SB),NOSPLIT,$0-8
// Implemented as brk(NULL).
MOVQ $0, DI
- MOVL $12, AX // syscall entry
+ MOVL $SYS_brk, AX
SYSCALL
MOVQ AX, ret+0(FP)
RET
diff --git a/src/runtime/sys_linux_arm.s b/src/runtime/sys_linux_arm.s
index 64beed8..794f9b3 100644
--- a/src/runtime/sys_linux_arm.s
+++ b/src/runtime/sys_linux_arm.s
@@ -114,7 +114,7 @@
MOVW $1002, R1
MOVW R0, (R1) // fail hard
-TEXT runtime·exit1(SB),NOSPLIT,$-4
+TEXT exit1<>(SB),NOSPLIT,$-4
MOVW code+0(FP), R0
MOVW $SYS_exit, R7
SWI $0
@@ -122,6 +122,22 @@
MOVW $1003, R1
MOVW R0, (R1) // fail hard
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$-4-4
+ MOVW wait+0(FP), R0
+ // We're done using the stack.
+ // Alas, there's no reliable way to make this write atomic
+ // without potentially using the stack. So it goes.
+ MOVW $0, R1
+ MOVW R1, (R0)
+ MOVW $0, R0 // exit code
+ MOVW $SYS_exit, R7
+ SWI $0
+ MOVW $1234, R0
+ MOVW $1004, R1
+ MOVW R0, (R1) // fail hard
+ JMP 0(PC)
+
TEXT runtime·gettid(SB),NOSPLIT,$0-4
MOVW $SYS_gettid, R7
SWI $0
@@ -157,8 +173,12 @@
SWI $0
MOVW $0xfffff001, R6
CMP R6, R0
+ MOVW $0, R1
RSB.HI $0, R0
- MOVW R0, ret+24(FP)
+ MOVW.HI R0, R1 // if error, put in R1
+ MOVW.HI $0, R0
+ MOVW R0, p+24(FP)
+ MOVW R1, err+28(FP)
RET
TEXT runtime·munmap(SB),NOSPLIT,$0
@@ -317,7 +337,7 @@
SUB $16, R13 // restore the stack pointer to avoid memory corruption
MOVW $0, R0
MOVW R0, 4(R13)
- BL runtime·exit1(SB)
+ BL exit1<>(SB)
MOVW $1234, R0
MOVW $1005, R1
diff --git a/src/runtime/sys_linux_arm64.s b/src/runtime/sys_linux_arm64.s
index e921f99..758e685 100644
--- a/src/runtime/sys_linux_arm64.s
+++ b/src/runtime/sys_linux_arm64.s
@@ -54,11 +54,16 @@
SVC
RET
-TEXT runtime·exit1(SB),NOSPLIT,$-8-4
- MOVW code+0(FP), R0
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$-8-8
+ MOVD wait+0(FP), R0
+ // We're done using the stack.
+ MOVW $0, R1
+ STLRW R1, (R0)
+ MOVW $0, R0 // exit code
MOVD $SYS_exit, R8
SVC
- RET
+ JMP 0(PC)
TEXT runtime·open(SB),NOSPLIT,$-8-20
MOVD $AT_FDCWD, R0
@@ -273,9 +278,14 @@
MOVD $SYS_mmap, R8
SVC
CMN $4095, R0
- BCC 2(PC)
+ BCC ok
NEG R0,R0
- MOVD R0, ret+32(FP)
+ MOVD $0, p+32(FP)
+ MOVD R0, err+40(FP)
+ RET
+ok:
+ MOVD R0, p+32(FP)
+ MOVD $0, err+40(FP)
RET
TEXT runtime·munmap(SB),NOSPLIT,$-8
diff --git a/src/runtime/sys_linux_mips64x.s b/src/runtime/sys_linux_mips64x.s
index 27de7b0..7402ae2 100644
--- a/src/runtime/sys_linux_mips64x.s
+++ b/src/runtime/sys_linux_mips64x.s
@@ -53,11 +53,18 @@
SYSCALL
RET
-TEXT runtime·exit1(SB),NOSPLIT,$-8-4
- MOVW code+0(FP), R4
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$-8-8
+ MOVV wait+0(FP), R1
+ // We're done using the stack.
+ MOVW $0, R2
+ SYNC
+ MOVW R2, (R1)
+ SYNC
+ MOVW $0, R4 // exit code
MOVV $SYS_exit, R2
SYSCALL
- RET
+ JMP 0(PC)
TEXT runtime·open(SB),NOSPLIT,$-8-20
MOVV name+0(FP), R4
@@ -262,7 +269,13 @@
MOVV $SYS_mmap, R2
SYSCALL
- MOVV R2, ret+32(FP)
+ BEQ R7, ok
+ MOVV $0, p+32(FP)
+ MOVV R2, err+40(FP)
+ RET
+ok:
+ MOVV R2, p+32(FP)
+ MOVV $0, err+40(FP)
RET
TEXT runtime·munmap(SB),NOSPLIT,$-8
diff --git a/src/runtime/sys_linux_mipsx.s b/src/runtime/sys_linux_mipsx.s
index 39bd731..6bd0267 100644
--- a/src/runtime/sys_linux_mipsx.s
+++ b/src/runtime/sys_linux_mipsx.s
@@ -54,12 +54,19 @@
UNDEF
RET
-TEXT runtime·exit1(SB),NOSPLIT,$0-4
- MOVW code+0(FP), R4
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$0-4
+ MOVW wait+0(FP), R1
+ // We're done using the stack.
+ MOVW $0, R2
+ SYNC
+ MOVW R2, (R1)
+ SYNC
+ MOVW $0, R4 // exit code
MOVW $SYS_exit, R2
SYSCALL
UNDEF
- RET
+ JMP 0(PC)
TEXT runtime·open(SB),NOSPLIT,$0-16
MOVW name+0(FP), R4
@@ -272,7 +279,7 @@
TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0
JMP runtime·sigtramp(SB)
-TEXT runtime·mmap(SB),NOSPLIT,$20-28
+TEXT runtime·mmap(SB),NOSPLIT,$20-32
MOVW addr+0(FP), R4
MOVW n+4(FP), R5
MOVW prot+8(FP), R6
@@ -284,7 +291,13 @@
MOVW $SYS_mmap, R2
SYSCALL
- MOVW R2, ret+24(FP)
+ BEQ R7, ok
+ MOVW $0, p+24(FP)
+ MOVW R2, err+28(FP)
+ RET
+ok:
+ MOVW R2, p+24(FP)
+ MOVW $0, err+28(FP)
RET
TEXT runtime·munmap(SB),NOSPLIT,$0-8
diff --git a/src/runtime/sys_linux_ppc64x.s b/src/runtime/sys_linux_ppc64x.s
index 2b2aa61..9b45f94 100644
--- a/src/runtime/sys_linux_ppc64x.s
+++ b/src/runtime/sys_linux_ppc64x.s
@@ -54,10 +54,16 @@
SYSCALL $SYS_exit_group
RET
-TEXT runtime·exit1(SB),NOSPLIT|NOFRAME,$0-4
- MOVW code+0(FP), R3
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT|NOFRAME,$0-8
+ MOVD wait+0(FP), R1
+ // We're done using the stack.
+ MOVW $0, R2
+ SYNC
+ MOVW R2, (R1)
+ MOVW $0, R3 // exit code
SYSCALL $SYS_exit
- RET
+ JMP 0(PC)
TEXT runtime·open(SB),NOSPLIT|NOFRAME,$0-20
MOVD name+0(FP), R3
@@ -244,7 +250,96 @@
#ifdef GOARCH_ppc64le
// ppc64le doesn't need function descriptors
-TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0
+TEXT runtime·cgoSigtramp(SB),NOSPLIT|NOFRAME,$0
+ // The stack unwinder, presumably written in C, may not be able to
+ // handle Go frame correctly. So, this function is NOFRAME, and we
+ // we save/restore LR manually.
+ MOVD LR, R10
+
+ // We're coming from C code, initialize essential registers.
+ CALL runtime·reginit(SB)
+
+ // If no traceback function, do usual sigtramp.
+ MOVD runtime·cgoTraceback(SB), R6
+ CMP $0, R6
+ BEQ sigtramp
+
+ // If no traceback support function, which means that
+ // runtime/cgo was not linked in, do usual sigtramp.
+ MOVD _cgo_callers(SB), R6
+ CMP $0, R6
+ BEQ sigtramp
+
+ // Set up g register.
+ CALL runtime·load_g(SB)
+
+ // Figure out if we are currently in a cgo call.
+ // If not, just do usual sigtramp.
+ CMP $0, g
+ BEQ sigtrampnog // g == nil
+ MOVD g_m(g), R6
+ CMP $0, R6
+ BEQ sigtramp // g.m == nil
+ MOVW m_ncgo(R6), R7
+ CMPW $0, R7
+ BEQ sigtramp // g.m.ncgo = 0
+ MOVD m_curg(R6), R7
+ CMP $0, R7
+ BEQ sigtramp // g.m.curg == nil
+ MOVD g_syscallsp(R7), R7
+ CMP $0, R7
+ BEQ sigtramp // g.m.curg.syscallsp == 0
+ MOVD m_cgoCallers(R6), R7 // R7 is the fifth arg in C calling convention.
+ CMP $0, R7
+ BEQ sigtramp // g.m.cgoCallers == nil
+ MOVW m_cgoCallersUse(R6), R8
+ CMPW $0, R8
+ BNE sigtramp // g.m.cgoCallersUse != 0
+
+ // Jump to a function in runtime/cgo.
+ // That function, written in C, will call the user's traceback
+ // function with proper unwind info, and will then call back here.
+ // The first three arguments, and the fifth, are already in registers.
+ // Set the two remaining arguments now.
+ MOVD runtime·cgoTraceback(SB), R6
+ MOVD $runtime·sigtramp(SB), R8
+ MOVD _cgo_callers(SB), R12
+ MOVD R12, CTR
+ MOVD R10, LR // restore LR
+ JMP (CTR)
+
+sigtramp:
+ MOVD R10, LR // restore LR
+ JMP runtime·sigtramp(SB)
+
+sigtrampnog:
+ // Signal arrived on a non-Go thread. If this is SIGPROF, get a
+ // stack trace.
+ CMPW R3, $27 // 27 == SIGPROF
+ BNE sigtramp
+
+ // Lock sigprofCallersUse (cas from 0 to 1).
+ MOVW $1, R7
+ MOVD $runtime·sigprofCallersUse(SB), R8
+ SYNC
+ LWAR (R8), R6
+ CMPW $0, R6
+ BNE sigtramp
+ STWCCC R7, (R8)
+ BNE -4(PC)
+ ISYNC
+
+ // Jump to the traceback function in runtime/cgo.
+ // It will call back to sigprofNonGo, which will ignore the
+ // arguments passed in registers.
+ // First three arguments to traceback function are in registers already.
+ MOVD runtime·cgoTraceback(SB), R6
+ MOVD $runtime·sigprofCallers(SB), R7
+ MOVD $runtime·sigprofNonGoWrapper<>(SB), R8
+ MOVD _cgo_callers(SB), R12
+ MOVD R12, CTR
+ MOVD R10, LR // restore LR
+ JMP (CTR)
#else
// function descriptor for the real sigtramp
TEXT runtime·cgoSigtramp(SB),NOSPLIT|NOFRAME,$0
@@ -252,10 +347,14 @@
DWORD $0
DWORD $0
TEXT runtime·_cgoSigtramp(SB),NOSPLIT,$0
+ JMP runtime·sigtramp(SB)
#endif
- MOVD $runtime·sigtramp(SB), R12
- MOVD R12, CTR
- JMP (CTR)
+
+TEXT runtime·sigprofNonGoWrapper<>(SB),NOSPLIT,$0
+ // We're coming from C code, set up essential register, then call sigprofNonGo.
+ CALL runtime·reginit(SB)
+ CALL runtime·sigprofNonGo(SB)
+ RET
TEXT runtime·mmap(SB),NOSPLIT|NOFRAME,$0
MOVD addr+0(FP), R3
@@ -266,7 +365,13 @@
MOVW off+28(FP), R8
SYSCALL $SYS_mmap
- MOVD R3, ret+32(FP)
+ BVC ok
+ MOVD $0, p+32(FP)
+ MOVD R3, err+40(FP)
+ RET
+ok:
+ MOVD R3, p+32(FP)
+ MOVD $0, err+40(FP)
RET
TEXT runtime·munmap(SB),NOSPLIT|NOFRAME,$0
diff --git a/src/runtime/sys_linux_s390x.s b/src/runtime/sys_linux_s390x.s
index b8099e2..72b0244 100644
--- a/src/runtime/sys_linux_s390x.s
+++ b/src/runtime/sys_linux_s390x.s
@@ -49,11 +49,16 @@
SYSCALL
RET
-TEXT runtime·exit1(SB),NOSPLIT|NOFRAME,$0-4
- MOVW code+0(FP), R2
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT|NOFRAME,$0-8
+ MOVD wait+0(FP), R1
+ // We're done using the stack.
+ MOVW $0, R2
+ MOVW R2, (R1)
+ MOVW $0, R2 // exit code
MOVW $SYS_exit, R1
SYSCALL
- RET
+ JMP 0(PC)
TEXT runtime·open(SB),NOSPLIT|NOFRAME,$0-20
MOVD name+0(FP), R2
@@ -246,7 +251,7 @@
BR runtime·sigtramp(SB)
// func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer
-TEXT runtime·mmap(SB),NOSPLIT,$48-40
+TEXT runtime·mmap(SB),NOSPLIT,$48-48
MOVD addr+0(FP), R2
MOVD n+8(FP), R3
MOVW prot+16(FP), R4
@@ -267,9 +272,14 @@
MOVW $SYS_mmap, R1
SYSCALL
MOVD $-4095, R3
- CMPUBLT R2, R3, 2(PC)
+ CMPUBLT R2, R3, ok
NEG R2
- MOVD R2, ret+32(FP)
+ MOVD $0, p+32(FP)
+ MOVD R2, err+40(FP)
+ RET
+ok:
+ MOVD R2, p+32(FP)
+ MOVD $0, err+40(FP)
RET
TEXT runtime·munmap(SB),NOSPLIT|NOFRAME,$0
diff --git a/src/runtime/sys_nacl_386.s b/src/runtime/sys_nacl_386.s
index d945453..cdc8ff1 100644
--- a/src/runtime/sys_nacl_386.s
+++ b/src/runtime/sys_nacl_386.s
@@ -16,11 +16,13 @@
NACL_SYSCALL(SYS_exit)
JMP 0(PC)
-TEXT runtime·exit1(SB),NOSPLIT,$4
- MOVL code+0(FP), AX
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$4-4
+ MOVL wait+0(FP), AX
+ // SYS_thread_exit will clear *wait when the stack is free.
MOVL AX, 0(SP)
NACL_SYSCALL(SYS_thread_exit)
- RET
+ JMP 0(PC)
TEXT runtime·open(SB),NOSPLIT,$12
MOVL name+0(FP), AX
@@ -228,9 +230,14 @@
MOVL AX, 20(SP)
NACL_SYSCALL(SYS_mmap)
CMPL AX, $-4095
- JNA 2(PC)
+ JNA ok
NEGL AX
- MOVL AX, ret+24(FP)
+ MOVL $0, p+24(FP)
+ MOVL AX, err+28(FP)
+ RET
+ok:
+ MOVL AX, p+24(FP)
+ MOVL $0, err+28(FP)
RET
TEXT runtime·walltime(SB),NOSPLIT,$20
diff --git a/src/runtime/sys_nacl_amd64p32.s b/src/runtime/sys_nacl_amd64p32.s
index 2a39983..ff4c2e7 100644
--- a/src/runtime/sys_nacl_amd64p32.s
+++ b/src/runtime/sys_nacl_amd64p32.s
@@ -19,10 +19,12 @@
NACL_SYSCALL(SYS_exit)
RET
-TEXT runtime·exit1(SB),NOSPLIT,$0
- MOVL code+0(FP), DI
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$0-4
+ MOVL wait+0(FP), DI
+ // SYS_thread_exit will clear *wait when the stack is free.
NACL_SYSCALL(SYS_thread_exit)
- RET
+ JMP 0(PC)
TEXT runtime·open(SB),NOSPLIT,$0
MOVL name+0(FP), DI
@@ -237,9 +239,14 @@
MOVL SP, R9
NACL_SYSCALL(SYS_mmap)
CMPL AX, $-4095
- JNA 2(PC)
+ JNA ok
NEGL AX
- MOVL AX, ret+24(FP)
+ MOVL $0, p+24(FP)
+ MOVL AX, err+28(FP)
+ RET
+ok:
+ MOVL AX, p+24(FP)
+ MOVL $0, err+28(FP)
RET
TEXT runtime·walltime(SB),NOSPLIT,$16
diff --git a/src/runtime/sys_nacl_arm.s b/src/runtime/sys_nacl_arm.s
index 6a6ef4e..6e01fe4 100644
--- a/src/runtime/sys_nacl_arm.s
+++ b/src/runtime/sys_nacl_arm.s
@@ -15,10 +15,12 @@
NACL_SYSCALL(SYS_exit)
RET
-TEXT runtime·exit1(SB),NOSPLIT,$0
- MOVW code+0(FP), R0
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$4-4
+ MOVW wait+0(FP), R0
+ // SYS_thread_exit will clear *wait when the stack is free.
NACL_SYSCALL(SYS_thread_exit)
- RET
+ JMP 0(PC)
TEXT runtime·open(SB),NOSPLIT,$0
MOVW name+0(FP), R0
@@ -192,8 +194,12 @@
NACL_SYSCALL(SYS_mmap)
MOVM.IA.W (R13), [R4, R5]
CMP $-4095, R0
+ MOVW $0, R1
RSB.HI $0, R0
- MOVW R0, ret+24(FP)
+ MOVW.HI R0, R1 // if error, put in R1
+ MOVW.HI $0, R0
+ MOVW R0, p+24(FP)
+ MOVW R1, err+28(FP)
RET
TEXT runtime·walltime(SB),NOSPLIT,$16
diff --git a/src/runtime/sys_netbsd_386.s b/src/runtime/sys_netbsd_386.s
index 742193c..4042ab4 100644
--- a/src/runtime/sys_netbsd_386.s
+++ b/src/runtime/sys_netbsd_386.s
@@ -17,12 +17,15 @@
MOVL $0xf1, 0xf1 // crash
RET
-TEXT runtime·exit1(SB),NOSPLIT,$-4
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$0-4
+ MOVL wait+0(FP), AX
+ // We're done using the stack.
+ MOVL $0, (AX)
MOVL $310, AX // sys__lwp_exit
INT $0x80
- JAE 2(PC)
MOVL $0xf1, 0xf1 // crash
- RET
+ JMP 0(PC)
TEXT runtime·open(SB),NOSPLIT,$-4
MOVL $5, AX
@@ -113,7 +116,13 @@
STOSL
MOVL $197, AX // sys_mmap
INT $0x80
- MOVL AX, ret+24(FP)
+ JAE ok
+ MOVL $0, p+24(FP)
+ MOVL AX, err+28(FP)
+ RET
+ok:
+ MOVL AX, p+24(FP)
+ MOVL $0, err+28(FP)
RET
TEXT runtime·munmap(SB),NOSPLIT,$-4
@@ -155,7 +164,7 @@
// void nanotime(int64 *nsec)
TEXT runtime·nanotime(SB),NOSPLIT,$32
LEAL 12(SP), BX
- MOVL $0, 4(SP) // arg 1 - clock_id
+ MOVL $3, 4(SP) // arg 1 - clock_id CLOCK_MONOTONIC
MOVL BX, 8(SP) // arg 2 - tp
MOVL $427, AX // sys_clock_gettime
INT $0x80
@@ -298,7 +307,7 @@
// Call fn
CALL SI
- CALL runtime·exit1(SB)
+ // fn should never return
MOVL $0x1234, 0x1005
RET
@@ -337,9 +346,9 @@
RET
TEXT runtime·lwp_park(SB),NOSPLIT,$-4
- MOVL $434, AX // sys__lwp_park
+ MOVL $478, AX // sys__lwp_park
INT $0x80
- MOVL AX, ret+16(FP)
+ MOVL AX, ret+24(FP)
RET
TEXT runtime·lwp_unpark(SB),NOSPLIT,$-4
@@ -366,10 +375,12 @@
MOVSL // arg 6 - newlen
MOVL $202, AX // sys___sysctl
INT $0x80
- JCC 3(PC)
+ JAE 4(PC)
NEGL AX
+ MOVL AX, ret+24(FP)
RET
MOVL $0, AX
+ MOVL AX, ret+24(FP)
RET
GLOBL runtime·tlsoffset(SB),NOPTR,$4
diff --git a/src/runtime/sys_netbsd_amd64.s b/src/runtime/sys_netbsd_amd64.s
index c632a0b..11b9c1b 100644
--- a/src/runtime/sys_netbsd_amd64.s
+++ b/src/runtime/sys_netbsd_amd64.s
@@ -48,13 +48,15 @@
RET
TEXT runtime·lwp_park(SB),NOSPLIT,$0
- MOVQ abstime+0(FP), DI // arg 1 - abstime
- MOVL unpark+8(FP), SI // arg 2 - unpark
- MOVQ hint+16(FP), DX // arg 3 - hint
- MOVQ unparkhint+24(FP), R10 // arg 4 - unparkhint
- MOVL $434, AX // sys__lwp_park
+ MOVL clockid+0(FP), DI // arg 1 - clockid
+ MOVL flags+4(FP), SI // arg 2 - flags
+ MOVQ ts+8(FP), DX // arg 3 - ts
+ MOVL unpark+16(FP), R10 // arg 4 - unpark
+ MOVQ hint+24(FP), R8 // arg 5 - hint
+ MOVQ unparkhint+32(FP), R9 // arg 6 - unparkhint
+ MOVL $478, AX // sys__lwp_park
SYSCALL
- MOVL AX, ret+32(FP)
+ MOVL AX, ret+40(FP)
RET
TEXT runtime·lwp_unpark(SB),NOSPLIT,$0
@@ -79,11 +81,15 @@
MOVL $0xf1, 0xf1 // crash
RET
-TEXT runtime·exit1(SB),NOSPLIT,$-8
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$0-8
+ MOVQ wait+0(FP), AX
+ // We're done using the stack.
+ MOVL $0, (AX)
MOVL $310, AX // sys__lwp_exit
SYSCALL
MOVL $0xf1, 0xf1 // crash
- RET
+ JMP 0(PC)
TEXT runtime·open(SB),NOSPLIT,$-8
MOVQ name+0(FP), DI // arg 1 pathname
@@ -184,7 +190,7 @@
RET
TEXT runtime·nanotime(SB),NOSPLIT,$32
- MOVQ $0, DI // arg 1 - clock_id
+ MOVQ $3, DI // arg 1 - clock_id CLOCK_MONOTONIC
LEAQ 8(SP), SI // arg 2 - tp
MOVL $427, AX // sys_clock_gettime
SYSCALL
@@ -286,8 +292,15 @@
MOVQ $0, R9 // arg 6 - pad
MOVL $197, AX // sys_mmap
SYSCALL
+ JCC ok
ADDQ $16, SP
- MOVQ AX, ret+32(FP)
+ MOVQ $0, p+32(FP)
+ MOVQ AX, err+40(FP)
+ RET
+ok:
+ ADDQ $16, SP
+ MOVQ AX, p+32(FP)
+ MOVQ $0, err+40(FP)
RET
TEXT runtime·munmap(SB),NOSPLIT,$0
diff --git a/src/runtime/sys_netbsd_arm.s b/src/runtime/sys_netbsd_arm.s
index 789b12e..7d2e290 100644
--- a/src/runtime/sys_netbsd_arm.s
+++ b/src/runtime/sys_netbsd_arm.s
@@ -18,12 +18,21 @@
MOVW.CS R8, (R8)
RET
-TEXT runtime·exit1(SB),NOSPLIT,$-4
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$0-4
+ MOVW wait+0(FP), R0
+ // We're done using the stack.
+ MOVW $0, R2
+storeloop:
+ LDREX (R0), R4 // loads R4
+ STREX R2, (R0), R1 // stores R2
+ CMP $0, R1
+ BNE storeloop
SWI $0xa00136 // sys__lwp_exit
MOVW $1, R8 // crash
MOVW R8, (R8)
- RET
-
+ JMP 0(PC)
+
TEXT runtime·open(SB),NOSPLIT,$-8
MOVW name+0(FP), R0
MOVW mode+4(FP), R1
@@ -71,13 +80,17 @@
SWI $0xa0015e // sys_sched_yield
RET
-TEXT runtime·lwp_park(SB),NOSPLIT,$0
- MOVW abstime+0(FP), R0 // arg 1 - abstime
- MOVW unpark+4(FP), R1 // arg 2 - unpark
- MOVW hint+8(FP), R2 // arg 3 - hint
- MOVW unparkhint+12(FP), R3 // arg 4 - unparkhint
- SWI $0xa001b2 // sys__lwp_park
- MOVW R0, ret+16(FP)
+TEXT runtime·lwp_park(SB),NOSPLIT,$8
+ MOVW clockid+0(FP), R0 // arg 1 - clock_id
+ MOVW flags+4(FP), R1 // arg 2 - flags
+ MOVW ts+8(FP), R2 // arg 3 - ts
+ MOVW unpark+12(FP), R3 // arg 4 - unpark
+ MOVW hint+16(FP), R4 // arg 5 - hint
+ MOVW R4, 4(R13)
+ MOVW unparkhint+20(FP), R5 // arg 6 - unparkhint
+ MOVW R5, 8(R13)
+ SWI $0xa001de // sys__lwp_park
+ MOVW R0, ret+24(FP)
RET
TEXT runtime·lwp_unpark(SB),NOSPLIT,$0
@@ -155,7 +168,7 @@
// int64 nanotime(void) so really
// void nanotime(int64 *nsec)
TEXT runtime·nanotime(SB), NOSPLIT, $32
- MOVW $0, R0 // CLOCK_REALTIME
+ MOVW $3, R0 // CLOCK_MONOTONIC
MOVW $8(R13), R1
SWI $0xa001ab // clock_gettime
@@ -255,7 +268,11 @@
ADD $4, R13 // pass arg 5 and arg 6 on stack
SWI $0xa000c5 // sys_mmap
SUB $4, R13
- MOVW R0, ret+24(FP)
+ MOVW $0, R1
+ MOVW.CS R0, R1 // if error, move to R1
+ MOVW.CS $0, R0
+ MOVW R0, p+24(FP)
+ MOVW R1, err+28(FP)
RET
TEXT runtime·munmap(SB),NOSPLIT,$0
diff --git a/src/runtime/sys_openbsd_386.s b/src/runtime/sys_openbsd_386.s
index fb2a688..475a937 100644
--- a/src/runtime/sys_openbsd_386.s
+++ b/src/runtime/sys_openbsd_386.s
@@ -19,14 +19,21 @@
MOVL $0xf1, 0xf1 // crash
RET
-TEXT runtime·exit1(SB),NOSPLIT,$8
- MOVL $0, 0(SP)
- MOVL $0, 4(SP) // arg 1 - notdead
+GLOBL exitStack<>(SB),RODATA,$8
+DATA exitStack<>+0x00(SB)/4, $0
+DATA exitStack<>+0x04(SB)/4, $0
+
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$0-4
+ MOVL wait+0(FP), AX
+ // We're done using the stack.
+ MOVL $0, (AX)
+ // sys__lwp_exit takes 1 argument, which it expects on the stack.
+ MOVL $exitStack<>(SB), SP
MOVL $302, AX // sys___threxit
INT $0x80
- JAE 2(PC)
MOVL $0xf1, 0xf1 // crash
- RET
+ JMP 0(PC)
TEXT runtime·open(SB),NOSPLIT,$-4
MOVL $5, AX
@@ -118,7 +125,13 @@
STOSL
MOVL $197, AX // sys_mmap
INT $0x80
- MOVL AX, ret+24(FP)
+ JAE ok
+ MOVL $0, p+24(FP)
+ MOVL AX, err+28(FP)
+ RET
+ok:
+ MOVL AX, p+24(FP)
+ MOVL $0, err+28(FP)
RET
TEXT runtime·munmap(SB),NOSPLIT,$-4
@@ -308,7 +321,7 @@
// Call fn.
CALL SI
- CALL runtime·exit1(SB)
+ // fn should never return.
MOVL $0x1234, 0x1005
RET
diff --git a/src/runtime/sys_openbsd_amd64.s b/src/runtime/sys_openbsd_amd64.s
index 9a52e5d..658f2c4 100644
--- a/src/runtime/sys_openbsd_amd64.s
+++ b/src/runtime/sys_openbsd_amd64.s
@@ -88,12 +88,16 @@
MOVL $0xf1, 0xf1 // crash
RET
-TEXT runtime·exit1(SB),NOSPLIT,$-8
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$0-8
+ MOVQ wait+0(FP), AX
+ // We're done using the stack.
+ MOVL $0, (AX)
MOVQ $0, DI // arg 1 - notdead
MOVL $302, AX // sys___threxit
SYSCALL
MOVL $0xf1, 0xf1 // crash
- RET
+ JMP 0(PC)
TEXT runtime·open(SB),NOSPLIT,$-8
MOVQ name+0(FP), DI // arg 1 pathname
@@ -278,8 +282,15 @@
MOVQ $0, R9 // arg 6 - pad
MOVL $197, AX
SYSCALL
+ JCC ok
ADDQ $16, SP
- MOVQ AX, ret+32(FP)
+ MOVQ $0, p+32(FP)
+ MOVQ AX, err+40(FP)
+ RET
+ok:
+ ADDQ $16, SP
+ MOVQ AX, p+32(FP)
+ MOVQ $0, err+40(FP)
RET
TEXT runtime·munmap(SB),NOSPLIT,$0
diff --git a/src/runtime/sys_openbsd_arm.s b/src/runtime/sys_openbsd_arm.s
index 93a5d5b..ea75386 100644
--- a/src/runtime/sys_openbsd_arm.s
+++ b/src/runtime/sys_openbsd_arm.s
@@ -22,13 +22,22 @@
MOVW.CS R8, (R8)
RET
-TEXT runtime·exit1(SB),NOSPLIT,$-4
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT,$0-4
+ MOVW wait+0(FP), R0
+ // We're done using the stack.
+ MOVW $0, R2
+storeloop:
+ LDREX (R0), R4 // loads R4
+ STREX R2, (R0), R1 // stores R2
+ CMP $0, R1
+ BNE storeloop
MOVW $0, R0 // arg 1 - notdead
MOVW $302, R12 // sys___threxit
SWI $0
MOVW.CS $1, R8 // crash on syscall failure
MOVW.CS R8, (R8)
- RET
+ JMP 0(PC)
TEXT runtime·open(SB),NOSPLIT,$-4
MOVW name+0(FP), R0 // arg 1 - path
@@ -120,7 +129,11 @@
MOVW $197, R12 // sys_mmap
SWI $0
SUB $4, R13
- MOVW R0, ret+24(FP)
+ MOVW $0, R1
+ MOVW.CS R0, R1 // if error, move to R1
+ MOVW.CS $0, R0
+ MOVW R0, p+24(FP)
+ MOVW R1, err+28(FP)
RET
TEXT runtime·munmap(SB),NOSPLIT,$0
@@ -269,7 +282,7 @@
// Call fn.
BL (R6)
- BL runtime·exit1(SB)
+ // fn should never return.
MOVW $2, R8 // crash if reached
MOVW R8, (R8)
RET
diff --git a/src/runtime/sys_plan9_386.s b/src/runtime/sys_plan9_386.s
index 688bd23..47dcb8d 100644
--- a/src/runtime/sys_plan9_386.s
+++ b/src/runtime/sys_plan9_386.s
@@ -139,7 +139,7 @@
MOVL AX, ret+4(FP)
RET
-TEXT runtime·tstart_plan9(SB),NOSPLIT,$0
+TEXT runtime·tstart_plan9(SB),NOSPLIT,$4
MOVL newm+0(FP), CX
MOVL m_g0(CX), DX
@@ -163,8 +163,10 @@
CALL runtime·stackcheck(SB) // smashes AX, CX
CALL runtime·mstart(SB)
- MOVL $0x1234, 0x1234 // not reached
- RET
+ // Exit the thread.
+ MOVL $0, 0(SP)
+ CALL runtime·exits(SB)
+ JMP 0(PC)
// void sigtramp(void *ureg, int8 *note)
TEXT runtime·sigtramp(SB),NOSPLIT,$0
diff --git a/src/runtime/sys_plan9_amd64.s b/src/runtime/sys_plan9_amd64.s
index d7bd92c..8077d6d 100644
--- a/src/runtime/sys_plan9_amd64.s
+++ b/src/runtime/sys_plan9_amd64.s
@@ -136,7 +136,7 @@
MOVL AX, ret+8(FP)
RET
-TEXT runtime·tstart_plan9(SB),NOSPLIT,$0
+TEXT runtime·tstart_plan9(SB),NOSPLIT,$8
MOVQ newm+0(FP), CX
MOVQ m_g0(CX), DX
@@ -160,8 +160,10 @@
CALL runtime·stackcheck(SB) // smashes AX, CX
CALL runtime·mstart(SB)
- MOVQ $0x1234, 0x1234 // not reached
- RET
+ // Exit the thread.
+ MOVQ $0, 0(SP)
+ CALL runtime·exits(SB)
+ JMP 0(PC)
// This is needed by asm_amd64.s
TEXT runtime·settls(SB),NOSPLIT,$0
diff --git a/src/runtime/sys_plan9_arm.s b/src/runtime/sys_plan9_arm.s
index 94a6f63..efaf603 100644
--- a/src/runtime/sys_plan9_arm.s
+++ b/src/runtime/sys_plan9_arm.s
@@ -207,7 +207,7 @@
RET
//func tstart_plan9(newm *m)
-TEXT runtime·tstart_plan9(SB),NOSPLIT,$0-4
+TEXT runtime·tstart_plan9(SB),NOSPLIT,$4-4
MOVW newm+0(FP), R1
MOVW m_g0(R1), g
@@ -226,9 +226,11 @@
BL runtime·mstart(SB)
- MOVW $0x1234, R0
- MOVW R0, 0(R0) // not reached
- RET
+ // Exit the thread.
+ MOVW $0, R0
+ MOVW R0, 4(R13)
+ CALL runtime·exits(SB)
+ JMP 0(PC)
//func sigtramp(ureg, note unsafe.Pointer)
TEXT runtime·sigtramp(SB),NOSPLIT,$0-8
diff --git a/src/runtime/sys_solaris_amd64.s b/src/runtime/sys_solaris_amd64.s
index aeb2e2c..2b6daba 100644
--- a/src/runtime/sys_solaris_amd64.s
+++ b/src/runtime/sys_solaris_amd64.s
@@ -183,9 +183,6 @@
JMP exit
allgood:
- // save g
- MOVQ R10, 80(SP)
-
// Save m->libcall and m->scratch. We need to do this because we
// might get interrupted by a signal in runtime·asmcgocall.
@@ -223,19 +220,11 @@
MOVL 0(R10), R10
MOVQ R10, 160(SP)
- MOVQ g(BX), R10
- // g = m->gsignal
- MOVQ m_gsignal(BP), BP
- MOVQ BP, g(BX)
-
- // TODO: If current SP is not in gsignal.stack, then adjust.
-
// prepare call
MOVQ DI, 0(SP)
MOVQ SI, 8(SP)
MOVQ DX, 16(SP)
- MOVQ R10, 24(SP)
- CALL runtime·sighandler(SB)
+ CALL runtime·sigtrampgo(SB)
get_tls(BX)
MOVQ g(BX), BP
@@ -273,10 +262,6 @@
MOVQ 160(SP), R10
MOVL R10, 0(R11)
- // restore g
- MOVQ 80(SP), R10
- MOVQ R10, g(BX)
-
exit:
// restore registers
MOVQ 32(SP), BX
diff --git a/src/runtime/sys_x86.go b/src/runtime/sys_x86.go
index 7e4e273..2b4ed8b 100644
--- a/src/runtime/sys_x86.go
+++ b/src/runtime/sys_x86.go
@@ -11,7 +11,7 @@
"unsafe"
)
-// adjust Gobuf as it if executed a call to fn with context ctxt
+// adjust Gobuf as if it executed a call to fn with context ctxt
// and then did an immediate gosave.
func gostartcall(buf *gobuf, fn, ctxt unsafe.Pointer) {
sp := buf.sp
diff --git a/src/runtime/syscall_solaris.go b/src/runtime/syscall_solaris.go
index 6c9dbe2..ee227ea 100644
--- a/src/runtime/syscall_solaris.go
+++ b/src/runtime/syscall_solaris.go
@@ -174,19 +174,9 @@
}
// This is syscall.RawSyscall, it exists to satisfy some build dependency,
-// but it doesn't work correctly.
-//
-// DO NOT USE!
-//
-// TODO(aram): make this panic once we stop calling fcntl(2) in net using it.
+// but it doesn't work.
func syscall_rawsyscall(trap, a1, a2, a3 uintptr) (r1, r2, err uintptr) {
- call := libcall{
- fn: uintptr(unsafe.Pointer(&libc_syscall)),
- n: 4,
- args: uintptr(unsafe.Pointer(&trap)),
- }
- asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
- return call.r1, call.r2, call.err
+ panic("RawSyscall not available on Solaris")
}
//go:nosplit
diff --git a/src/runtime/syscall_windows.go b/src/runtime/syscall_windows.go
index ca8ea8b..134d4db 100644
--- a/src/runtime/syscall_windows.go
+++ b/src/runtime/syscall_windows.go
@@ -93,6 +93,8 @@
//go:linkname syscall_loadsystemlibrary syscall.loadsystemlibrary
//go:nosplit
func syscall_loadsystemlibrary(filename *uint16) (handle, err uintptr) {
+ lockOSThread()
+ defer unlockOSThread()
c := &getg().m.syscall
if useLoadLibraryEx {
@@ -126,6 +128,8 @@
//go:linkname syscall_loadlibrary syscall.loadlibrary
//go:nosplit
func syscall_loadlibrary(filename *uint16) (handle, err uintptr) {
+ lockOSThread()
+ defer unlockOSThread()
c := &getg().m.syscall
c.fn = getLoadLibrary()
c.n = 1
@@ -141,6 +145,8 @@
//go:linkname syscall_getprocaddress syscall.getprocaddress
//go:nosplit
func syscall_getprocaddress(handle uintptr, procname *byte) (outhandle, err uintptr) {
+ lockOSThread()
+ defer unlockOSThread()
c := &getg().m.syscall
c.fn = getGetProcAddress()
c.n = 2
@@ -156,6 +162,8 @@
//go:linkname syscall_Syscall syscall.Syscall
//go:nosplit
func syscall_Syscall(fn, nargs, a1, a2, a3 uintptr) (r1, r2, err uintptr) {
+ lockOSThread()
+ defer unlockOSThread()
c := &getg().m.syscall
c.fn = fn
c.n = nargs
@@ -167,6 +175,8 @@
//go:linkname syscall_Syscall6 syscall.Syscall6
//go:nosplit
func syscall_Syscall6(fn, nargs, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) {
+ lockOSThread()
+ defer unlockOSThread()
c := &getg().m.syscall
c.fn = fn
c.n = nargs
@@ -178,6 +188,8 @@
//go:linkname syscall_Syscall9 syscall.Syscall9
//go:nosplit
func syscall_Syscall9(fn, nargs, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r1, r2, err uintptr) {
+ lockOSThread()
+ defer unlockOSThread()
c := &getg().m.syscall
c.fn = fn
c.n = nargs
@@ -189,6 +201,8 @@
//go:linkname syscall_Syscall12 syscall.Syscall12
//go:nosplit
func syscall_Syscall12(fn, nargs, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12 uintptr) (r1, r2, err uintptr) {
+ lockOSThread()
+ defer unlockOSThread()
c := &getg().m.syscall
c.fn = fn
c.n = nargs
@@ -200,6 +214,8 @@
//go:linkname syscall_Syscall15 syscall.Syscall15
//go:nosplit
func syscall_Syscall15(fn, nargs, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr) (r1, r2, err uintptr) {
+ lockOSThread()
+ defer unlockOSThread()
c := &getg().m.syscall
c.fn = fn
c.n = nargs
@@ -207,9 +223,3 @@
cgocall(asmstdcallAddr, unsafe.Pointer(c))
return c.r1, c.r2, c.err
}
-
-//go:linkname syscall_exit syscall.Exit
-//go:nosplit
-func syscall_exit(code int) {
- exit(int32(code))
-}
diff --git a/src/runtime/syscall_windows_test.go b/src/runtime/syscall_windows_test.go
index 3da154d..dfde12a 100644
--- a/src/runtime/syscall_windows_test.go
+++ b/src/runtime/syscall_windows_test.go
@@ -15,6 +15,7 @@
"os/exec"
"path/filepath"
"runtime"
+ "strconv"
"strings"
"syscall"
"testing"
@@ -537,6 +538,17 @@
cmd.CombinedOutput()
}
+func TestWindowsStackMemory(t *testing.T) {
+ o := runTestProg(t, "testprog", "StackMemory")
+ stackUsage, err := strconv.Atoi(o)
+ if err != nil {
+ t.Fatalf("Failed to read stack usage: %v", err)
+ }
+ if expected, got := 100<<10, stackUsage; got > expected {
+ t.Fatalf("expected < %d bytes of memory per thread, got %d", expected, got)
+ }
+}
+
var used byte
func use(buf []byte) {
@@ -1043,7 +1055,7 @@
}
exe := filepath.Join(tmpdir, "main.exe")
- cmd := exec.Command("go", "build", "-o", exe, src)
+ cmd := exec.Command(testenv.GoToolPath(b), "build", "-o", exe, src)
cmd.Dir = tmpdir
out, err := cmd.CombinedOutput()
if err != nil {
diff --git a/src/runtime/testdata/testprog/badtraceback.go b/src/runtime/testdata/testprog/badtraceback.go
new file mode 100644
index 0000000..d558adc
--- /dev/null
+++ b/src/runtime/testdata/testprog/badtraceback.go
@@ -0,0 +1,47 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+ "runtime"
+ "runtime/debug"
+ "unsafe"
+)
+
+func init() {
+ register("BadTraceback", BadTraceback)
+}
+
+func BadTraceback() {
+ // Disable GC to prevent traceback at unexpected time.
+ debug.SetGCPercent(-1)
+
+ // Run badLR1 on its own stack to minimize the stack size and
+ // exercise the stack bounds logic in the hex dump.
+ go badLR1()
+ select {}
+}
+
+//go:noinline
+func badLR1() {
+ // We need two frames on LR machines because we'll smash this
+ // frame's saved LR.
+ badLR2(0)
+}
+
+//go:noinline
+func badLR2(arg int) {
+ // Smash the return PC or saved LR.
+ lrOff := unsafe.Sizeof(uintptr(0))
+ if runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" {
+ lrOff = 32 // FIXED_FRAME or sys.MinFrameSize
+ }
+ lrPtr := (*uintptr)(unsafe.Pointer(uintptr(unsafe.Pointer(&arg)) - lrOff))
+ *lrPtr = 0xbad
+
+ // Print a backtrace. This should include diagnostics for the
+ // bad return PC and a hex dump.
+ panic("backtrace")
+}
diff --git a/src/runtime/testdata/testprog/gettid.go b/src/runtime/testdata/testprog/gettid.go
new file mode 100644
index 0000000..1b3e29a
--- /dev/null
+++ b/src/runtime/testdata/testprog/gettid.go
@@ -0,0 +1,29 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux
+
+package main
+
+import (
+ "bytes"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "syscall"
+)
+
+func gettid() int {
+ return syscall.Gettid()
+}
+
+func tidExists(tid int) (exists, supported bool) {
+ stat, err := ioutil.ReadFile(fmt.Sprintf("/proc/self/task/%d/stat", tid))
+ if os.IsNotExist(err) {
+ return false, true
+ }
+ // Check if it's a zombie thread.
+ state := bytes.Fields(stat)[2]
+ return !(len(state) == 1 && state[0] == 'Z'), true
+}
diff --git a/src/runtime/testdata/testprog/gettid_none.go b/src/runtime/testdata/testprog/gettid_none.go
new file mode 100644
index 0000000..036db87
--- /dev/null
+++ b/src/runtime/testdata/testprog/gettid_none.go
@@ -0,0 +1,15 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !linux
+
+package main
+
+func gettid() int {
+ return 0
+}
+
+func tidExists(tid int) (exists, supported bool) {
+ return false, false
+}
diff --git a/src/runtime/testdata/testprog/lockosthread.go b/src/runtime/testdata/testprog/lockosthread.go
new file mode 100644
index 0000000..88c0d12
--- /dev/null
+++ b/src/runtime/testdata/testprog/lockosthread.go
@@ -0,0 +1,94 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+ "os"
+ "runtime"
+ "time"
+)
+
+var mainTID int
+
+func init() {
+ registerInit("LockOSThreadMain", func() {
+ // init is guaranteed to run on the main thread.
+ mainTID = gettid()
+ })
+ register("LockOSThreadMain", LockOSThreadMain)
+
+ registerInit("LockOSThreadAlt", func() {
+ // Lock the OS thread now so main runs on the main thread.
+ runtime.LockOSThread()
+ })
+ register("LockOSThreadAlt", LockOSThreadAlt)
+}
+
+func LockOSThreadMain() {
+ // gettid only works on Linux, so on other platforms this just
+ // checks that the runtime doesn't do anything terrible.
+
+ // This requires GOMAXPROCS=1 from the beginning to reliably
+ // start a goroutine on the main thread.
+ if runtime.GOMAXPROCS(-1) != 1 {
+ println("requires GOMAXPROCS=1")
+ os.Exit(1)
+ }
+
+ ready := make(chan bool, 1)
+ go func() {
+ // Because GOMAXPROCS=1, this *should* be on the main
+ // thread. Stay there.
+ runtime.LockOSThread()
+ if mainTID != 0 && gettid() != mainTID {
+ println("failed to start goroutine on main thread")
+ os.Exit(1)
+ }
+ // Exit with the thread locked, which should exit the
+ // main thread.
+ ready <- true
+ }()
+ <-ready
+ time.Sleep(1 * time.Millisecond)
+ // Check that this goroutine is still running on a different
+ // thread.
+ if mainTID != 0 && gettid() == mainTID {
+ println("goroutine migrated to locked thread")
+ os.Exit(1)
+ }
+ println("OK")
+}
+
+func LockOSThreadAlt() {
+ // This is running locked to the main OS thread.
+
+ var subTID int
+ ready := make(chan bool, 1)
+ go func() {
+ // This goroutine must be running on a new thread.
+ runtime.LockOSThread()
+ subTID = gettid()
+ ready <- true
+ // Exit with the thread locked.
+ }()
+ <-ready
+ runtime.UnlockOSThread()
+ for i := 0; i < 100; i++ {
+ time.Sleep(1 * time.Millisecond)
+ // Check that this goroutine is running on a different thread.
+ if subTID != 0 && gettid() == subTID {
+ println("locked thread reused")
+ os.Exit(1)
+ }
+ exists, supported := tidExists(subTID)
+ if !supported || !exists {
+ goto ok
+ }
+ }
+ println("sub thread", subTID, "still running")
+ return
+ok:
+ println("OK")
+}
diff --git a/src/runtime/testdata/testprog/syscall_windows.go b/src/runtime/testdata/testprog/syscall_windows.go
index 6e6782e..b4b6644 100644
--- a/src/runtime/testdata/testprog/syscall_windows.go
+++ b/src/runtime/testdata/testprog/syscall_windows.go
@@ -4,11 +4,18 @@
package main
-import "syscall"
+import (
+ "internal/syscall/windows"
+ "runtime"
+ "sync"
+ "syscall"
+ "unsafe"
+)
func init() {
register("RaiseException", RaiseException)
register("ZeroDivisionException", ZeroDivisionException)
+ register("StackMemory", StackMemory)
}
func RaiseException() {
@@ -25,3 +32,39 @@
z := x / y
println(z)
}
+
+func getPagefileUsage() (uintptr, error) {
+ p, err := syscall.GetCurrentProcess()
+ if err != nil {
+ return 0, err
+ }
+ var m windows.PROCESS_MEMORY_COUNTERS
+ err = windows.GetProcessMemoryInfo(p, &m, uint32(unsafe.Sizeof(m)))
+ if err != nil {
+ return 0, err
+ }
+ return m.PagefileUsage, nil
+}
+
+func StackMemory() {
+ mem1, err := getPagefileUsage()
+ if err != nil {
+ panic(err)
+ }
+ const threadCount = 100
+ var wg sync.WaitGroup
+ for i := 0; i < threadCount; i++ {
+ wg.Add(1)
+ go func() {
+ runtime.LockOSThread()
+ wg.Done()
+ select {}
+ }()
+ }
+ wg.Wait()
+ mem2, err := getPagefileUsage()
+ if err != nil {
+ panic(err)
+ }
+ print((mem2 - mem1) / threadCount)
+}
diff --git a/src/runtime/testdata/testprogcgo/callback.go b/src/runtime/testdata/testprogcgo/callback.go
index 7d9d68d..be0409f 100644
--- a/src/runtime/testdata/testprogcgo/callback.go
+++ b/src/runtime/testdata/testprogcgo/callback.go
@@ -29,6 +29,7 @@
import (
"fmt"
+ "os"
"runtime"
)
@@ -63,7 +64,10 @@
}
func CgoCallbackGC() {
- const P = 100
+ P := 100
+ if os.Getenv("RUNTIME_TESTING_SHORT") != "" {
+ P = 10
+ }
done := make(chan bool)
// allocate a bunch of stack frames and spray them with pointers
for i := 0; i < P; i++ {
diff --git a/src/runtime/testdata/testprogcgo/catchpanic.go b/src/runtime/testdata/testprogcgo/catchpanic.go
new file mode 100644
index 0000000..55a606d
--- /dev/null
+++ b/src/runtime/testdata/testprogcgo/catchpanic.go
@@ -0,0 +1,46 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !plan9,!windows
+
+package main
+
+/*
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+
+static void abrthandler(int signum) {
+ if (signum == SIGABRT) {
+ exit(0); // success
+ }
+}
+
+void registerAbortHandler() {
+ struct sigaction act;
+ memset(&act, 0, sizeof act);
+ act.sa_handler = abrthandler;
+ sigaction(SIGABRT, &act, NULL);
+}
+
+static void __attribute__ ((constructor)) sigsetup(void) {
+ if (getenv("CGOCATCHPANIC_EARLY_HANDLER") == NULL)
+ return;
+ registerAbortHandler();
+}
+*/
+import "C"
+import "os"
+
+func init() {
+ register("CgoCatchPanic", CgoCatchPanic)
+}
+
+// Test that the SIGABRT raised by panic can be caught by an early signal handler.
+func CgoCatchPanic() {
+ if _, ok := os.LookupEnv("CGOCATCHPANIC_EARLY_HANDLER"); !ok {
+ C.registerAbortHandler()
+ }
+ panic("catch me")
+}
diff --git a/src/runtime/testdata/testprogcgo/cgo.go b/src/runtime/testdata/testprogcgo/cgo.go
index 209524a..a587db3 100644
--- a/src/runtime/testdata/testprogcgo/cgo.go
+++ b/src/runtime/testdata/testprogcgo/cgo.go
@@ -52,7 +52,11 @@
time.Sleep(time.Millisecond)
start := time.Now()
var times []time.Duration
- for i := 0; i < 64; i++ {
+ n := 64
+ if os.Getenv("RUNTIME_TEST_SHORT") != "" {
+ n = 16
+ }
+ for i := 0; i < n; i++ {
go func() {
runtime.LockOSThread()
select {}
diff --git a/src/runtime/testdata/testprogcgo/lockosthread.c b/src/runtime/testdata/testprogcgo/lockosthread.c
new file mode 100644
index 0000000..b10cc4f
--- /dev/null
+++ b/src/runtime/testdata/testprogcgo/lockosthread.c
@@ -0,0 +1,13 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !plan9,!windows
+
+#include <stdint.h>
+
+uint32_t threadExited;
+
+void setExited(void *x) {
+ __sync_fetch_and_add(&threadExited, 1);
+}
diff --git a/src/runtime/testdata/testprogcgo/lockosthread.go b/src/runtime/testdata/testprogcgo/lockosthread.go
new file mode 100644
index 0000000..36423d9
--- /dev/null
+++ b/src/runtime/testdata/testprogcgo/lockosthread.go
@@ -0,0 +1,111 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !plan9,!windows
+
+package main
+
+import (
+ "os"
+ "runtime"
+ "sync/atomic"
+ "time"
+ "unsafe"
+)
+
+/*
+#include <pthread.h>
+#include <stdint.h>
+
+extern uint32_t threadExited;
+
+void setExited(void *x);
+*/
+import "C"
+
+var mainThread C.pthread_t
+
+func init() {
+ registerInit("LockOSThreadMain", func() {
+ // init is guaranteed to run on the main thread.
+ mainThread = C.pthread_self()
+ })
+ register("LockOSThreadMain", LockOSThreadMain)
+
+ registerInit("LockOSThreadAlt", func() {
+ // Lock the OS thread now so main runs on the main thread.
+ runtime.LockOSThread()
+ })
+ register("LockOSThreadAlt", LockOSThreadAlt)
+}
+
+func LockOSThreadMain() {
+ // This requires GOMAXPROCS=1 from the beginning to reliably
+ // start a goroutine on the main thread.
+ if runtime.GOMAXPROCS(-1) != 1 {
+ println("requires GOMAXPROCS=1")
+ os.Exit(1)
+ }
+
+ ready := make(chan bool, 1)
+ go func() {
+ // Because GOMAXPROCS=1, this *should* be on the main
+ // thread. Stay there.
+ runtime.LockOSThread()
+ self := C.pthread_self()
+ if C.pthread_equal(mainThread, self) == 0 {
+ println("failed to start goroutine on main thread")
+ os.Exit(1)
+ }
+ // Exit with the thread locked, which should exit the
+ // main thread.
+ ready <- true
+ }()
+ <-ready
+ time.Sleep(1 * time.Millisecond)
+ // Check that this goroutine is still running on a different
+ // thread.
+ self := C.pthread_self()
+ if C.pthread_equal(mainThread, self) != 0 {
+ println("goroutine migrated to locked thread")
+ os.Exit(1)
+ }
+ println("OK")
+}
+
+func LockOSThreadAlt() {
+ // This is running locked to the main OS thread.
+
+ var subThread C.pthread_t
+ ready := make(chan bool, 1)
+ C.threadExited = 0
+ go func() {
+ // This goroutine must be running on a new thread.
+ runtime.LockOSThread()
+ subThread = C.pthread_self()
+ // Register a pthread destructor so we can tell this
+ // thread has exited.
+ var key C.pthread_key_t
+ C.pthread_key_create(&key, (*[0]byte)(unsafe.Pointer(C.setExited)))
+ C.pthread_setspecific(key, unsafe.Pointer(new(int)))
+ ready <- true
+ // Exit with the thread locked.
+ }()
+ <-ready
+ for i := 0; i < 100; i++ {
+ time.Sleep(1 * time.Millisecond)
+ // Check that this goroutine is running on a different thread.
+ self := C.pthread_self()
+ if C.pthread_equal(subThread, self) != 0 {
+ println("locked thread reused")
+ os.Exit(1)
+ }
+ if atomic.LoadUint32((*uint32)(&C.threadExited)) != 0 {
+ println("OK")
+ return
+ }
+ }
+ println("sub thread still running")
+ os.Exit(1)
+}
diff --git a/src/runtime/testdata/testprogcgo/sigpanic.go b/src/runtime/testdata/testprogcgo/sigpanic.go
new file mode 100644
index 0000000..cb46030
--- /dev/null
+++ b/src/runtime/testdata/testprogcgo/sigpanic.go
@@ -0,0 +1,28 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+// This program will crash.
+// We want to test unwinding from sigpanic into C code (without a C symbolizer).
+
+/*
+#cgo CFLAGS: -O0
+
+char *pnil;
+
+static int f1(void) {
+ *pnil = 0;
+ return 0;
+}
+*/
+import "C"
+
+func init() {
+ register("TracebackSigpanic", TracebackSigpanic)
+}
+
+func TracebackSigpanic() {
+ C.f1()
+}
diff --git a/src/runtime/testdata/testprogcgo/sigstack.go b/src/runtime/testdata/testprogcgo/sigstack.go
new file mode 100644
index 0000000..492dfef
--- /dev/null
+++ b/src/runtime/testdata/testprogcgo/sigstack.go
@@ -0,0 +1,91 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !plan9,!windows
+
+// Test handling of Go-allocated signal stacks when calling from
+// C-created threads with and without signal stacks. (See issue
+// #22930.)
+
+package main
+
+/*
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+extern void SigStackCallback();
+
+static void* WithSigStack(void* arg __attribute__((unused))) {
+ // Set up an alternate system stack.
+ void* base = mmap(0, SIGSTKSZ, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
+ if (base == MAP_FAILED) {
+ perror("mmap failed");
+ abort();
+ }
+ stack_t st = {}, ost = {};
+ st.ss_sp = (char*)base;
+ st.ss_flags = 0;
+ st.ss_size = SIGSTKSZ;
+ if (sigaltstack(&st, &ost) < 0) {
+ perror("sigaltstack failed");
+ abort();
+ }
+
+ // Call Go.
+ SigStackCallback();
+
+ // Disable signal stack and protect it so we can detect reuse.
+ if (ost.ss_flags & SS_DISABLE) {
+ // Darwin libsystem has a bug where it checks ss_size
+ // even if SS_DISABLE is set. (The kernel gets it right.)
+ ost.ss_size = SIGSTKSZ;
+ }
+ if (sigaltstack(&ost, NULL) < 0) {
+ perror("sigaltstack restore failed");
+ abort();
+ }
+ mprotect(base, SIGSTKSZ, PROT_NONE);
+ return NULL;
+}
+
+static void* WithoutSigStack(void* arg __attribute__((unused))) {
+ SigStackCallback();
+ return NULL;
+}
+
+static void DoThread(int sigstack) {
+ pthread_t tid;
+ if (sigstack) {
+ pthread_create(&tid, NULL, WithSigStack, NULL);
+ } else {
+ pthread_create(&tid, NULL, WithoutSigStack, NULL);
+ }
+ pthread_join(tid, NULL);
+}
+*/
+import "C"
+
+func init() {
+ register("SigStack", SigStack)
+}
+
+func SigStack() {
+ C.DoThread(0)
+ C.DoThread(1)
+ C.DoThread(0)
+ C.DoThread(1)
+ println("OK")
+}
+
+var BadPtr *int
+
+//export SigStackCallback
+func SigStackCallback() {
+ // Cause the Go signal handler to run.
+ defer func() { recover() }()
+ *BadPtr = 42
+}
diff --git a/src/runtime/testdata/testprogcgo/stack_windows.go b/src/runtime/testdata/testprogcgo/stack_windows.go
new file mode 100644
index 0000000..846297a
--- /dev/null
+++ b/src/runtime/testdata/testprogcgo/stack_windows.go
@@ -0,0 +1,54 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import "C"
+import (
+ "internal/syscall/windows"
+ "runtime"
+ "sync"
+ "syscall"
+ "unsafe"
+)
+
+func init() {
+ register("StackMemory", StackMemory)
+}
+
+func getPagefileUsage() (uintptr, error) {
+ p, err := syscall.GetCurrentProcess()
+ if err != nil {
+ return 0, err
+ }
+ var m windows.PROCESS_MEMORY_COUNTERS
+ err = windows.GetProcessMemoryInfo(p, &m, uint32(unsafe.Sizeof(m)))
+ if err != nil {
+ return 0, err
+ }
+ return m.PagefileUsage, nil
+}
+
+func StackMemory() {
+ mem1, err := getPagefileUsage()
+ if err != nil {
+ panic(err)
+ }
+ const threadCount = 100
+ var wg sync.WaitGroup
+ for i := 0; i < threadCount; i++ {
+ wg.Add(1)
+ go func() {
+ runtime.LockOSThread()
+ wg.Done()
+ select {}
+ }()
+ }
+ wg.Wait()
+ mem2, err := getPagefileUsage()
+ if err != nil {
+ panic(err)
+ }
+ print((mem2 - mem1) / threadCount)
+}
diff --git a/src/runtime/time.go b/src/runtime/time.go
index abf200d..3ac60f3 100644
--- a/src/runtime/time.go
+++ b/src/runtime/time.go
@@ -6,14 +6,18 @@
package runtime
-import "unsafe"
+import (
+ "runtime/internal/sys"
+ "unsafe"
+)
// Package time knows the layout of this structure.
// If this struct changes, adjust ../time/sleep.go:/runtimeTimer.
// For GOOS=nacl, package syscall knows the layout of this structure.
// If this struct changes, adjust ../syscall/net_nacl.go:/runtimeTimer.
type timer struct {
- i int // heap index
+ tb *timersBucket // the bucket the timer lives in
+ i int // heap index
// Timer wakes up at when, and then at when+period, ... (period > 0 only)
// each time calling f(arg, now) in the timer goroutine, so f must be
@@ -25,7 +29,38 @@
seq uintptr
}
-var timers struct {
+// timersLen is the length of timers array.
+//
+// Ideally, this would be set to GOMAXPROCS, but that would require
+// dynamic reallocation
+//
+// The current value is a compromise between memory usage and performance
+// that should cover the majority of GOMAXPROCS values used in the wild.
+const timersLen = 64
+
+// timers contains "per-P" timer heaps.
+//
+// Timers are queued into timersBucket associated with the current P,
+// so each P may work with its own timers independently of other P instances.
+//
+// Each timersBucket may be associated with multiple P
+// if GOMAXPROCS > timersLen.
+var timers [timersLen]struct {
+ timersBucket
+
+ // The padding should eliminate false sharing
+ // between timersBucket values.
+ pad [sys.CacheLineSize - unsafe.Sizeof(timersBucket{})%sys.CacheLineSize]byte
+}
+
+func (t *timer) assignBucket() *timersBucket {
+ id := uint8(getg().m.p.ptr().id) % timersLen
+ t.tb = &timers[id].timersBucket
+ return t.tb
+}
+
+//go:notinheap
+type timersBucket struct {
lock mutex
gp *g
created bool
@@ -51,18 +86,20 @@
return
}
- t := getg().timer
+ gp := getg()
+ t := gp.timer
if t == nil {
t = new(timer)
- getg().timer = t
+ gp.timer = t
}
*t = timer{}
t.when = nanotime() + ns
t.f = goroutineReady
- t.arg = getg()
- lock(&timers.lock)
- addtimerLocked(t)
- goparkunlock(&timers.lock, "sleep", traceEvGoSleep, 2)
+ t.arg = gp
+ tb := t.assignBucket()
+ lock(&tb.lock)
+ tb.addtimerLocked(t)
+ goparkunlock(&tb.lock, "sleep", traceEvGoSleep, 2)
}
// startTimer adds t to the timer heap.
@@ -89,87 +126,95 @@
}
func addtimer(t *timer) {
- lock(&timers.lock)
- addtimerLocked(t)
- unlock(&timers.lock)
+ tb := t.assignBucket()
+ lock(&tb.lock)
+ tb.addtimerLocked(t)
+ unlock(&tb.lock)
}
// Add a timer to the heap and start or kick timerproc if the new timer is
// earlier than any of the others.
// Timers are locked.
-func addtimerLocked(t *timer) {
+func (tb *timersBucket) addtimerLocked(t *timer) {
// when must never be negative; otherwise timerproc will overflow
// during its delta calculation and never expire other runtime timers.
if t.when < 0 {
t.when = 1<<63 - 1
}
- t.i = len(timers.t)
- timers.t = append(timers.t, t)
- siftupTimer(t.i)
+ t.i = len(tb.t)
+ tb.t = append(tb.t, t)
+ siftupTimer(tb.t, t.i)
if t.i == 0 {
// siftup moved to top: new earliest deadline.
- if timers.sleeping {
- timers.sleeping = false
- notewakeup(&timers.waitnote)
+ if tb.sleeping {
+ tb.sleeping = false
+ notewakeup(&tb.waitnote)
}
- if timers.rescheduling {
- timers.rescheduling = false
- goready(timers.gp, 0)
+ if tb.rescheduling {
+ tb.rescheduling = false
+ goready(tb.gp, 0)
}
}
- if !timers.created {
- timers.created = true
- go timerproc()
+ if !tb.created {
+ tb.created = true
+ go timerproc(tb)
}
}
// Delete timer t from the heap.
// Do not need to update the timerproc: if it wakes up early, no big deal.
func deltimer(t *timer) bool {
- // Dereference t so that any panic happens before the lock is held.
- // Discard result, because t might be moving in the heap.
- _ = t.i
+ if t.tb == nil {
+ // t.tb can be nil if the user created a timer
+ // directly, without invoking startTimer e.g
+ // time.Ticker{C: c}
+ // In this case, return early without any deletion.
+ // See Issue 21874.
+ return false
+ }
- lock(&timers.lock)
+ tb := t.tb
+
+ lock(&tb.lock)
// t may not be registered anymore and may have
// a bogus i (typically 0, if generated by Go).
// Verify it before proceeding.
i := t.i
- last := len(timers.t) - 1
- if i < 0 || i > last || timers.t[i] != t {
- unlock(&timers.lock)
+ last := len(tb.t) - 1
+ if i < 0 || i > last || tb.t[i] != t {
+ unlock(&tb.lock)
return false
}
if i != last {
- timers.t[i] = timers.t[last]
- timers.t[i].i = i
+ tb.t[i] = tb.t[last]
+ tb.t[i].i = i
}
- timers.t[last] = nil
- timers.t = timers.t[:last]
+ tb.t[last] = nil
+ tb.t = tb.t[:last]
if i != last {
- siftupTimer(i)
- siftdownTimer(i)
+ siftupTimer(tb.t, i)
+ siftdownTimer(tb.t, i)
}
- unlock(&timers.lock)
+ unlock(&tb.lock)
return true
}
// Timerproc runs the time-driven events.
-// It sleeps until the next event in the timers heap.
+// It sleeps until the next event in the tb heap.
// If addtimer inserts a new earlier event, it wakes timerproc early.
-func timerproc() {
- timers.gp = getg()
+func timerproc(tb *timersBucket) {
+ tb.gp = getg()
for {
- lock(&timers.lock)
- timers.sleeping = false
+ lock(&tb.lock)
+ tb.sleeping = false
now := nanotime()
delta := int64(-1)
for {
- if len(timers.t) == 0 {
+ if len(tb.t) == 0 {
delta = -1
break
}
- t := timers.t[0]
+ t := tb.t[0]
delta = t.when - now
if delta > 0 {
break
@@ -177,43 +222,43 @@
if t.period > 0 {
// leave in heap but adjust next time to fire
t.when += t.period * (1 + -delta/t.period)
- siftdownTimer(0)
+ siftdownTimer(tb.t, 0)
} else {
// remove from heap
- last := len(timers.t) - 1
+ last := len(tb.t) - 1
if last > 0 {
- timers.t[0] = timers.t[last]
- timers.t[0].i = 0
+ tb.t[0] = tb.t[last]
+ tb.t[0].i = 0
}
- timers.t[last] = nil
- timers.t = timers.t[:last]
+ tb.t[last] = nil
+ tb.t = tb.t[:last]
if last > 0 {
- siftdownTimer(0)
+ siftdownTimer(tb.t, 0)
}
t.i = -1 // mark as removed
}
f := t.f
arg := t.arg
seq := t.seq
- unlock(&timers.lock)
+ unlock(&tb.lock)
if raceenabled {
raceacquire(unsafe.Pointer(t))
}
f(arg, seq)
- lock(&timers.lock)
+ lock(&tb.lock)
}
if delta < 0 || faketime > 0 {
// No timers left - put goroutine to sleep.
- timers.rescheduling = true
- goparkunlock(&timers.lock, "timer goroutine (idle)", traceEvGoBlock, 1)
+ tb.rescheduling = true
+ goparkunlock(&tb.lock, "timer goroutine (idle)", traceEvGoBlock, 1)
continue
}
// At least one timer pending. Sleep until then.
- timers.sleeping = true
- timers.sleepUntil = now + delta
- noteclear(&timers.waitnote)
- unlock(&timers.lock)
- notetsleepg(&timers.waitnote, delta)
+ tb.sleeping = true
+ tb.sleepUntil = now + delta
+ noteclear(&tb.waitnote)
+ unlock(&tb.lock)
+ notetsleepg(&tb.waitnote, delta)
}
}
@@ -222,28 +267,67 @@
return nil
}
- lock(&timers.lock)
- if !timers.created || len(timers.t) == 0 {
- unlock(&timers.lock)
+ for i := range timers {
+ lock(&timers[i].lock)
+ }
+ gp := timejumpLocked()
+ for i := range timers {
+ unlock(&timers[i].lock)
+ }
+
+ return gp
+}
+
+func timejumpLocked() *g {
+ // Determine a timer bucket with minimum when.
+ var minT *timer
+ for i := range timers {
+ tb := &timers[i]
+ if !tb.created || len(tb.t) == 0 {
+ continue
+ }
+ t := tb.t[0]
+ if minT == nil || t.when < minT.when {
+ minT = t
+ }
+ }
+ if minT == nil || minT.when <= faketime {
return nil
}
- var gp *g
- if faketime < timers.t[0].when {
- faketime = timers.t[0].when
- if timers.rescheduling {
- timers.rescheduling = false
- gp = timers.gp
- }
+ faketime = minT.when
+ tb := minT.tb
+ if !tb.rescheduling {
+ return nil
}
- unlock(&timers.lock)
- return gp
+ tb.rescheduling = false
+ return tb.gp
+}
+
+func timeSleepUntil() int64 {
+ next := int64(1<<63 - 1)
+
+ // Determine minimum sleepUntil across all the timer buckets.
+ //
+ // The function can not return a precise answer,
+ // as another timer may pop in as soon as timers have been unlocked.
+ // So lock the timers one by one instead of all at once.
+ for i := range timers {
+ tb := &timers[i]
+
+ lock(&tb.lock)
+ if tb.sleeping && tb.sleepUntil < next {
+ next = tb.sleepUntil
+ }
+ unlock(&tb.lock)
+ }
+
+ return next
}
// Heap maintenance algorithms.
-func siftupTimer(i int) {
- t := timers.t
+func siftupTimer(t []*timer, i int) {
when := t[i].when
tmp := t[i]
for i > 0 {
@@ -253,14 +337,15 @@
}
t[i] = t[p]
t[i].i = i
- t[p] = tmp
- t[p].i = p
i = p
}
+ if tmp != t[i] {
+ t[i] = tmp
+ t[i].i = i
+ }
}
-func siftdownTimer(i int) {
- t := timers.t
+func siftdownTimer(t []*timer, i int) {
n := len(t)
when := t[i].when
tmp := t[i]
@@ -291,10 +376,12 @@
}
t[i] = t[c]
t[i].i = i
- t[c] = tmp
- t[c].i = c
i = c
}
+ if tmp != t[i] {
+ t[i] = tmp
+ t[i].i = i
+ }
}
// Entry points for net, time to call nanotime.
@@ -309,4 +396,10 @@
return nanotime()
}
-var startNano int64 = nanotime()
+// Monotonic times are reported as offsets from startNano.
+// We initialize startNano to nanotime() - 1 so that on systems where
+// monotonic time resolution is fairly low (e.g. Windows 2008
+// which appears to have a default resolution of 15ms),
+// we avoid ever reporting a nanotime of 0.
+// (Callers may want to use 0 as "time not set".)
+var startNano int64 = nanotime() - 1
diff --git a/src/runtime/trace.go b/src/runtime/trace.go
index 826dc9a..fab7976 100644
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -28,8 +28,8 @@
traceEvProcStop = 6 // stop of P [timestamp]
traceEvGCStart = 7 // GC start [timestamp, seq, stack id]
traceEvGCDone = 8 // GC done [timestamp]
- traceEvGCScanStart = 9 // GC mark termination start [timestamp]
- traceEvGCScanDone = 10 // GC mark termination done [timestamp]
+ traceEvGCSTWStart = 9 // GC STW start [timestamp, kind]
+ traceEvGCSTWDone = 10 // GC STW done [timestamp]
traceEvGCSweepStart = 11 // GC sweep start [timestamp, stack id]
traceEvGCSweepDone = 12 // GC sweep done [timestamp, swept, reclaimed]
traceEvGoCreate = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
@@ -235,21 +235,21 @@
trace.timeStart = nanotime()
trace.headerWritten = false
trace.footerWritten = false
- trace.strings = make(map[string]uint64)
+
+ // string to id mapping
+ // 0 : reserved for an empty string
+ // remaining: other strings registered by traceString
trace.stringSeq = 0
+ trace.strings = make(map[string]uint64)
+
trace.seqGC = 0
_g_.m.startingtrace = false
trace.enabled = true
// Register runtime goroutine labels.
_, pid, bufp := traceAcquireBuffer()
- buf := (*bufp).ptr()
- if buf == nil {
- buf = traceFlush(0).ptr()
- (*bufp).set(buf)
- }
for i, label := range gcMarkWorkerModeStrings[:] {
- trace.markWorkerLabels[i], buf = traceString(buf, label)
+ trace.markWorkerLabels[i], bufp = traceString(bufp, pid, label)
}
traceReleaseBuffer(pid)
@@ -277,10 +277,9 @@
traceGoSched()
- for _, p := range &allp {
- if p == nil {
- break
- }
+ // Loop over all allocated Ps because dead Ps may still have
+ // trace buffers.
+ for _, p := range allp[:cap(allp)] {
buf := p.tracebuf
if buf != 0 {
traceFullQueue(buf)
@@ -320,10 +319,7 @@
// The lock protects us from races with StartTrace/StopTrace because they do stop-the-world.
lock(&trace.lock)
- for _, p := range &allp {
- if p == nil {
- break
- }
+ for _, p := range allp[:cap(allp)] {
if p.tracebuf != 0 {
throw("trace: non-empty trace buffer in proc")
}
@@ -382,7 +378,7 @@
trace.headerWritten = true
trace.lockOwner = nil
unlock(&trace.lock)
- return []byte("go 1.9 trace\x00\x00\x00\x00")
+ return []byte("go 1.10 trace\x00\x00\x00")
}
// Wait for new data.
if trace.fullHead == 0 && !trace.shutdown {
@@ -408,9 +404,12 @@
var data []byte
data = append(data, traceEvFrequency|0<<traceArgCountShift)
data = traceAppend(data, uint64(freq))
- if timers.gp != nil {
- data = append(data, traceEvTimerGoroutine|0<<traceArgCountShift)
- data = traceAppend(data, uint64(timers.gp.goid))
+ for i := range timers {
+ tb := &timers[i]
+ if tb.gp != nil {
+ data = append(data, traceEvTimerGoroutine|0<<traceArgCountShift)
+ data = traceAppend(data, uint64(tb.gp.goid))
+ }
}
// This will emit a bunch of full buffers, we will pick them up
// on the next iteration.
@@ -514,18 +513,12 @@
buf := (*bufp).ptr()
const maxSize = 2 + 5*traceBytesPerNumber // event type, length, sequence, timestamp, stack id and two add params
if buf == nil || len(buf.arr)-buf.pos < maxSize {
- buf = traceFlush(traceBufPtrOf(buf)).ptr()
+ buf = traceFlush(traceBufPtrOf(buf), pid).ptr()
(*bufp).set(buf)
}
ticks := uint64(cputicks()) / traceTickDiv
tickDiff := ticks - buf.lastTicks
- if buf.pos == 0 {
- buf.byte(traceEvBatch | 1<<traceArgCountShift)
- buf.varint(uint64(pid))
- buf.varint(ticks)
- tickDiff = 0
- }
buf.lastTicks = ticks
narg := byte(len(args))
if skip >= 0 {
@@ -603,7 +596,7 @@
}
// traceFlush puts buf onto stack of full buffers and returns an empty buffer.
-func traceFlush(buf traceBufPtr) traceBufPtr {
+func traceFlush(buf traceBufPtr, pid int32) traceBufPtr {
owner := trace.lockOwner
dolock := owner == nil || owner != getg().m.curg
if dolock {
@@ -624,34 +617,51 @@
bufp := buf.ptr()
bufp.link.set(nil)
bufp.pos = 0
- bufp.lastTicks = 0
+
+ // initialize the buffer for a new batch
+ ticks := uint64(cputicks()) / traceTickDiv
+ bufp.lastTicks = ticks
+ bufp.byte(traceEvBatch | 1<<traceArgCountShift)
+ bufp.varint(uint64(pid))
+ bufp.varint(ticks)
+
if dolock {
unlock(&trace.lock)
}
return buf
}
-func traceString(buf *traceBuf, s string) (uint64, *traceBuf) {
+// traceString adds a string to the trace.strings and returns the id.
+func traceString(bufp *traceBufPtr, pid int32, s string) (uint64, *traceBufPtr) {
if s == "" {
- return 0, buf
+ return 0, bufp
}
if id, ok := trace.strings[s]; ok {
- return id, buf
+ return id, bufp
}
trace.stringSeq++
id := trace.stringSeq
trace.strings[s] = id
+ // memory allocation in above may trigger tracing and
+ // cause *bufp changes. Following code now works with *bufp,
+ // so there must be no memory allocation or any activities
+ // that causes tracing after this point.
+
+ buf := (*bufp).ptr()
size := 1 + 2*traceBytesPerNumber + len(s)
- if len(buf.arr)-buf.pos < size {
- buf = traceFlush(traceBufPtrOf(buf)).ptr()
+ if buf == nil || len(buf.arr)-buf.pos < size {
+ buf = traceFlush(traceBufPtrOf(buf), pid).ptr()
+ (*bufp).set(buf)
}
buf.byte(traceEvString)
buf.varint(id)
buf.varint(uint64(len(s)))
buf.pos += copy(buf.arr[buf.pos:], s)
- return id, buf
+
+ (*bufp).set(buf)
+ return id, bufp
}
// traceAppend appends v to buf in little-endian-base-128 encoding.
@@ -781,7 +791,7 @@
// releases all memory and resets state.
func (tab *traceStackTable) dump() {
var tmp [(2 + 4*traceStackSize) * traceBytesPerNumber]byte
- buf := traceFlush(0).ptr()
+ bufp := traceFlush(0, 0)
for _, stk := range tab.tab {
stk := stk.ptr()
for ; stk != nil; stk = stk.link.ptr() {
@@ -791,7 +801,7 @@
tmpbuf = traceAppend(tmpbuf, uint64(len(frames)))
for _, f := range frames {
var frame traceFrame
- frame, buf = traceFrameForPC(buf, f)
+ frame, bufp = traceFrameForPC(bufp, 0, f)
tmpbuf = traceAppend(tmpbuf, uint64(f.PC))
tmpbuf = traceAppend(tmpbuf, uint64(frame.funcID))
tmpbuf = traceAppend(tmpbuf, uint64(frame.fileID))
@@ -799,9 +809,10 @@
}
// Now copy to the buffer.
size := 1 + traceBytesPerNumber + len(tmpbuf)
- if len(buf.arr)-buf.pos < size {
- buf = traceFlush(traceBufPtrOf(buf)).ptr()
+ if buf := bufp.ptr(); len(buf.arr)-buf.pos < size {
+ bufp = traceFlush(bufp, 0)
}
+ buf := bufp.ptr()
buf.byte(traceEvStack | 3<<traceArgCountShift)
buf.varint(uint64(len(tmpbuf)))
buf.pos += copy(buf.arr[buf.pos:], tmpbuf)
@@ -809,7 +820,7 @@
}
lock(&trace.lock)
- traceFullQueue(traceBufPtrOf(buf))
+ traceFullQueue(bufp)
unlock(&trace.lock)
tab.mem.drop()
@@ -822,7 +833,10 @@
line uint64
}
-func traceFrameForPC(buf *traceBuf, f Frame) (traceFrame, *traceBuf) {
+// traceFrameForPC records the frame information.
+// It may allocate memory.
+func traceFrameForPC(buf traceBufPtr, pid int32, f Frame) (traceFrame, traceBufPtr) {
+ bufp := &buf
var frame traceFrame
fn := f.Function
@@ -830,14 +844,14 @@
if len(fn) > maxLen {
fn = fn[len(fn)-maxLen:]
}
- frame.funcID, buf = traceString(buf, fn)
+ frame.funcID, bufp = traceString(bufp, pid, fn)
frame.line = uint64(f.Line)
file := f.File
if len(file) > maxLen {
file = file[len(file)-maxLen:]
}
- frame.fileID, buf = traceString(buf, file)
- return frame, buf
+ frame.fileID, bufp = traceString(bufp, pid, file)
+ return frame, (*bufp)
}
// traceAlloc is a non-thread-safe region allocator.
@@ -924,12 +938,12 @@
traceEvent(traceEvGCDone, -1)
}
-func traceGCScanStart() {
- traceEvent(traceEvGCScanStart, -1)
+func traceGCSTWStart(kind int) {
+ traceEvent(traceEvGCSTWStart, -1, uint64(kind))
}
-func traceGCScanDone() {
- traceEvent(traceEvGCScanDone, -1)
+func traceGCSTWDone() {
+ traceEvent(traceEvGCSTWDone, -1)
}
// traceGCSweepStart prepares to trace a sweep loop. This does not
diff --git a/src/runtime/trace/example_test.go b/src/runtime/trace/example_test.go
new file mode 100644
index 0000000..ba96a82
--- /dev/null
+++ b/src/runtime/trace/example_test.go
@@ -0,0 +1,39 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace_test
+
+import (
+ "fmt"
+ "log"
+ "os"
+ "runtime/trace"
+)
+
+// Example demonstrates the use of the trace package to trace
+// the execution of a Go program. The trace output will be
+// written to the file trace.out
+func Example() {
+ f, err := os.Create("trace.out")
+ if err != nil {
+ log.Fatalf("failed to create trace output file: %v", err)
+ }
+ defer func() {
+ if err := f.Close(); err != nil {
+ log.Fatalf("failed to close trace file: %v", err)
+ }
+ }()
+
+ if err := trace.Start(f); err != nil {
+ log.Fatalf("failed to start trace: %v", err)
+ }
+ defer trace.Stop()
+
+ // your program here
+ RunMyProgram()
+}
+
+func RunMyProgram() {
+ fmt.Printf("this function will be traced")
+}
diff --git a/src/runtime/trace/trace.go b/src/runtime/trace/trace.go
index 7cbb8a6..439f998 100644
--- a/src/runtime/trace/trace.go
+++ b/src/runtime/trace/trace.go
@@ -2,13 +2,36 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// Go execution tracer.
-// The tracer captures a wide range of execution events like goroutine
-// creation/blocking/unblocking, syscall enter/exit/block, GC-related events,
-// changes of heap size, processor start/stop, etc and writes them to an io.Writer
-// in a compact form. A precise nanosecond-precision timestamp and a stack
-// trace is captured for most events. A trace can be analyzed later with
-// 'go tool trace' command.
+// Package trace contains facilities for programs to generate trace
+// for Go execution tracer.
+//
+// The execution trace captures a wide range of execution events such as
+// goroutine creation/blocking/unblocking, syscall enter/exit/block,
+// GC-related events, changes of heap size, processor start/stop, etc.
+// A precise nanosecond-precision timestamp and a stack trace is
+// captured for most events. The generated trace can be interpreted
+// using `go tool trace`.
+//
+// Tracing a Go program
+//
+// Support for tracing tests and benchmarks built with the standard
+// testing package is built into `go test`. For example, the following
+// command runs the test in the current directory and writes the trace
+// file (trace.out).
+//
+// go test -trace=test.out
+//
+// This runtime/trace package provides APIs to add equivalent tracing
+// support to a standalone program. See the Example that demonstrates
+// how to use this API to enable tracing.
+//
+// There is also a standard HTTP interface to profiling data. Adding the
+// following line will install handlers under the /debug/pprof/trace URL
+// to download live profiles:
+//
+// import _ "net/http/pprof"
+//
+// See the net/http/pprof package for more details.
package trace
import (
diff --git a/src/runtime/trace/trace_test.go b/src/runtime/trace/trace_test.go
index c5f64fc..997d486 100644
--- a/src/runtime/trace/trace_test.go
+++ b/src/runtime/trace/trace_test.go
@@ -7,6 +7,7 @@
import (
"bytes"
"flag"
+ "internal/race"
"internal/trace"
"io"
"io/ioutil"
@@ -14,6 +15,7 @@
"os"
"runtime"
. "runtime/trace"
+ "strconv"
"sync"
"testing"
"time"
@@ -23,6 +25,61 @@
saveTraces = flag.Bool("savetraces", false, "save traces collected by tests")
)
+// TestEventBatch tests Flush calls that happen during Start
+// don't produce corrupted traces.
+func TestEventBatch(t *testing.T) {
+ if race.Enabled {
+ t.Skip("skipping in race mode")
+ }
+ if testing.Short() {
+ t.Skip("skipping in short mode")
+ }
+ // During Start, bunch of records are written to reflect the current
+ // snapshot of the program, including state of each goroutines.
+ // And some string constants are written to the trace to aid trace
+ // parsing. This test checks Flush of the buffer occurred during
+ // this process doesn't cause corrupted traces.
+ // When a Flush is called during Start is complicated
+ // so we test with a range of number of goroutines hoping that one
+ // of them triggers Flush.
+ // This range was chosen to fill up a ~64KB buffer with traceEvGoCreate
+ // and traceEvGoWaiting events (12~13bytes per goroutine).
+ for g := 4950; g < 5050; g++ {
+ n := g
+ t.Run("G="+strconv.Itoa(n), func(t *testing.T) {
+ var wg sync.WaitGroup
+ wg.Add(n)
+
+ in := make(chan bool, 1000)
+ for i := 0; i < n; i++ {
+ go func() {
+ <-in
+ wg.Done()
+ }()
+ }
+ buf := new(bytes.Buffer)
+ if err := Start(buf); err != nil {
+ t.Fatalf("failed to start tracing: %v", err)
+ }
+
+ for i := 0; i < n; i++ {
+ in <- true
+ }
+ wg.Wait()
+ Stop()
+
+ _, err := trace.Parse(buf, "")
+ if err == trace.ErrTimeOrder {
+ t.Skipf("skipping trace: %v", err)
+ }
+
+ if err != nil {
+ t.Fatalf("failed to parse trace: %v", err)
+ }
+ })
+ }
+}
+
func TestTraceStartStop(t *testing.T) {
buf := new(bytes.Buffer)
if err := Start(buf); err != nil {
@@ -70,20 +127,20 @@
}
func parseTrace(t *testing.T, r io.Reader) ([]*trace.Event, map[uint64]*trace.GDesc) {
- events, err := trace.Parse(r, "")
+ res, err := trace.Parse(r, "")
if err == trace.ErrTimeOrder {
t.Skipf("skipping trace: %v", err)
}
if err != nil {
t.Fatalf("failed to parse trace: %v", err)
}
- gs := trace.GoroutineStats(events)
+ gs := trace.GoroutineStats(res.Events)
for goid := range gs {
// We don't do any particular checks on the result at the moment.
// But still check that RelatedGoroutines does not crash, hang, etc.
- _ = trace.RelatedGoroutines(events, goid)
+ _ = trace.RelatedGoroutines(res.Events, goid)
}
- return events, gs
+ return res.Events, gs
}
func testBrokenTimestamps(t *testing.T, data []byte) {
diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go
index c74d438..747176c 100644
--- a/src/runtime/traceback.go
+++ b/src/runtime/traceback.go
@@ -43,6 +43,7 @@
morestackPC uintptr
mstartPC uintptr
rt0_goPC uintptr
+ asmcgocallPC uintptr
sigpanicPC uintptr
runfinqPC uintptr
bgsweepPC uintptr
@@ -70,6 +71,7 @@
morestackPC = funcPC(morestack)
mstartPC = funcPC(mstart)
rt0_goPC = funcPC(rt0_go)
+ asmcgocallPC = funcPC(asmcgocall)
sigpanicPC = funcPC(sigpanic)
runfinqPC = funcPC(runfinq)
bgsweepPC = funcPC(bgsweep)
@@ -184,6 +186,7 @@
cgoCtxt := gp.cgoCtxt
printing := pcbuf == nil && callback == nil
_defer := gp._defer
+ elideWrapper := false
for _defer != nil && _defer.sp == _NoArgs {
_defer = _defer.link
@@ -203,8 +206,11 @@
f := findfunc(frame.pc)
if !f.valid() {
- if callback != nil {
+ if callback != nil || printing {
print("runtime: unknown pc ", hex(frame.pc), "\n")
+ tracebackHexdump(gp.stack, &frame, 0)
+ }
+ if callback != nil {
throw("unknown pc")
}
return 0
@@ -247,7 +253,7 @@
}
}
var flr funcInfo
- if topofstack(f) {
+ if topofstack(f, gp.m != nil && gp == gp.m.g0) {
frame.lr = 0
flr = funcInfo{}
} else if usesLR && f.entry == jmpdeferPC {
@@ -280,8 +286,19 @@
// In that context it is okay to stop early.
// But if callback is set, we're doing a garbage collection and must
// get everything, so crash loudly.
- if callback != nil {
+ doPrint := printing
+ if doPrint && gp.m.incgo {
+ // We can inject sigpanic
+ // calls directly into C code,
+ // in which case we'll see a C
+ // return PC. Don't complain.
+ doPrint = false
+ }
+ if callback != nil || doPrint {
print("runtime: unexpected return pc for ", funcname(f), " called from ", hex(frame.lr), "\n")
+ tracebackHexdump(gp.stack, &frame, lrPtr)
+ }
+ if callback != nil {
throw("unknown caller pc")
}
}
@@ -386,8 +403,15 @@
}
if printing {
- // assume skip=0 for printing
- if (flags&_TraceRuntimeFrames) != 0 || showframe(f, gp, nprint == 0) {
+ // assume skip=0 for printing.
+ //
+ // Never elide wrappers if we haven't printed
+ // any frames. And don't elide wrappers that
+ // called panic rather than the wrapped
+ // function. Otherwise, leave them out.
+ name := funcname(f)
+ nextElideWrapper := elideWrapperCalling(name)
+ if (flags&_TraceRuntimeFrames) != 0 || showframe(f, gp, nprint == 0, elideWrapper && nprint != 0) {
// Print during crash.
// main(0x1, 0x2, 0x3)
// /home/rsc/go/src/runtime/x.go:23 +0xf
@@ -411,7 +435,6 @@
ix = inltree[ix].parent
}
}
- name := funcname(f)
if name == "runtime.gopanic" {
name = "panic"
}
@@ -438,6 +461,7 @@
print("\n")
nprint++
}
+ elideWrapper = nextElideWrapper
}
n++
@@ -647,7 +671,7 @@
// Show what created goroutine, except main goroutine (goid 1).
pc := gp.gopc
f := findfunc(pc)
- if f.valid() && showframe(f, gp, false) && gp.goid != 1 {
+ if f.valid() && showframe(f, gp, false, false) && gp.goid != 1 {
print("created by ", funcname(f), "\n")
tracepc := pc // back up to CALL instruction for funcline.
if pc > f.entry {
@@ -714,7 +738,7 @@
func callers(skip int, pcbuf []uintptr) int {
sp := getcallersp(unsafe.Pointer(&skip))
- pc := getcallerpc(unsafe.Pointer(&skip))
+ pc := getcallerpc()
gp := getg()
var n int
systemstack(func() {
@@ -727,12 +751,28 @@
return gentraceback(^uintptr(0), ^uintptr(0), 0, gp, skip, &pcbuf[0], len(pcbuf), nil, nil, 0)
}
-func showframe(f funcInfo, gp *g, firstFrame bool) bool {
+func showframe(f funcInfo, gp *g, firstFrame, elideWrapper bool) bool {
g := getg()
if g.m.throwing > 0 && gp != nil && (gp == g.m.curg || gp == g.m.caughtsig.ptr()) {
return true
}
level, _, _ := gotraceback()
+ if level > 1 {
+ // Show all frames.
+ return true
+ }
+
+ if !f.valid() {
+ return false
+ }
+
+ if elideWrapper {
+ file, _ := funcline(f, f.entry)
+ if file == "<autogenerated>" {
+ return false
+ }
+ }
+
name := funcname(f)
// Special case: always show runtime.gopanic frame
@@ -744,7 +784,7 @@
return true
}
- return level > 1 || f.valid() && contains(name, ".") && (!hasprefix(name, "runtime.") || isExportedRuntime(name))
+ return contains(name, ".") && (!hasprefix(name, "runtime.") || isExportedRuntime(name))
}
// isExportedRuntime reports whether name is an exported runtime function.
@@ -754,6 +794,14 @@
return len(name) > n && name[:n] == "runtime." && 'A' <= name[n] && name[n] <= 'Z'
}
+// elideWrapperCalling returns whether a wrapper function that called
+// function "name" should be elided from stack traces.
+func elideWrapperCalling(name string) bool {
+ // If the wrapper called a panic function instead of the
+ // wrapped function, we want to include it in stacks.
+ return !(name == "runtime.gopanic" || name == "runtime.sigpanic" || name == "runtime.panicwrap")
+}
+
var gStatusStrings = [...]string{
_Gidle: "idle",
_Grunnable: "runnable",
@@ -795,7 +843,7 @@
if waitfor >= 1 {
print(", ", waitfor, " minutes")
}
- if gp.lockedm != nil {
+ if gp.lockedm != 0 {
print(", locked to thread")
}
print("]:\n")
@@ -834,15 +882,68 @@
unlock(&allglock)
}
+// tracebackHexdump hexdumps part of stk around frame.sp and frame.fp
+// for debugging purposes. If the address bad is included in the
+// hexdumped range, it will mark it as well.
+func tracebackHexdump(stk stack, frame *stkframe, bad uintptr) {
+ const expand = 32 * sys.PtrSize
+ const maxExpand = 256 * sys.PtrSize
+ // Start around frame.sp.
+ lo, hi := frame.sp, frame.sp
+ // Expand to include frame.fp.
+ if frame.fp != 0 && frame.fp < lo {
+ lo = frame.fp
+ }
+ if frame.fp != 0 && frame.fp > hi {
+ hi = frame.fp
+ }
+ // Expand a bit more.
+ lo, hi = lo-expand, hi+expand
+ // But don't go too far from frame.sp.
+ if lo < frame.sp-maxExpand {
+ lo = frame.sp - maxExpand
+ }
+ if hi > frame.sp+maxExpand {
+ hi = frame.sp + maxExpand
+ }
+ // And don't go outside the stack bounds.
+ if lo < stk.lo {
+ lo = stk.lo
+ }
+ if hi > stk.hi {
+ hi = stk.hi
+ }
+
+ // Print the hex dump.
+ print("stack: frame={sp:", hex(frame.sp), ", fp:", hex(frame.fp), "} stack=[", hex(stk.lo), ",", hex(stk.hi), ")\n")
+ hexdumpWords(lo, hi, func(p uintptr) byte {
+ switch p {
+ case frame.fp:
+ return '>'
+ case frame.sp:
+ return '<'
+ case bad:
+ return '!'
+ }
+ return 0
+ })
+}
+
// Does f mark the top of a goroutine stack?
-func topofstack(f funcInfo) bool {
+func topofstack(f funcInfo, g0 bool) bool {
pc := f.entry
return pc == goexitPC ||
pc == mstartPC ||
pc == mcallPC ||
pc == morestackPC ||
pc == rt0_goPC ||
- externalthreadhandlerp != 0 && pc == externalthreadhandlerp
+ externalthreadhandlerp != 0 && pc == externalthreadhandlerp ||
+ // asmcgocall is TOS on the system stack because it
+ // switches to the system stack, but in this case we
+ // can come back to the regular stack and still want
+ // to be able to unwind through the call that appeared
+ // on the regular stack.
+ (g0 && pc == asmcgocallPC)
}
// isSystemGoroutine reports whether the goroutine g must be omitted in
diff --git a/src/runtime/type.go b/src/runtime/type.go
index bf54d54..b3df335 100644
--- a/src/runtime/type.go
+++ b/src/runtime/type.go
@@ -655,15 +655,15 @@
if len(st.fields) != len(sv.fields) {
return false
}
+ if st.pkgPath.name() != sv.pkgPath.name() {
+ return false
+ }
for i := range st.fields {
tf := &st.fields[i]
vf := &sv.fields[i]
if tf.name.name() != vf.name.name() {
return false
}
- if tf.name.pkgPath() != vf.name.pkgPath() {
- return false
- }
if !typesEqual(tf.typ, vf.typ, seen) {
return false
}
diff --git a/src/runtime/vdso_linux.go b/src/runtime/vdso_linux.go
new file mode 100644
index 0000000..61872e3
--- /dev/null
+++ b/src/runtime/vdso_linux.go
@@ -0,0 +1,281 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux
+// +build 386 amd64
+
+package runtime
+
+import "unsafe"
+
+// Look up symbols in the Linux vDSO.
+
+// This code was originally based on the sample Linux vDSO parser at
+// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/vDSO/parse_vdso.c
+
+// This implements the ELF dynamic linking spec at
+// http://sco.com/developers/gabi/latest/ch5.dynamic.html
+
+// The version section is documented at
+// http://refspecs.linuxfoundation.org/LSB_3.2.0/LSB-Core-generic/LSB-Core-generic/symversion.html
+
+const (
+ _AT_SYSINFO_EHDR = 33
+
+ _PT_LOAD = 1 /* Loadable program segment */
+ _PT_DYNAMIC = 2 /* Dynamic linking information */
+
+ _DT_NULL = 0 /* Marks end of dynamic section */
+ _DT_HASH = 4 /* Dynamic symbol hash table */
+ _DT_STRTAB = 5 /* Address of string table */
+ _DT_SYMTAB = 6 /* Address of symbol table */
+ _DT_GNU_HASH = 0x6ffffef5 /* GNU-style dynamic symbol hash table */
+ _DT_VERSYM = 0x6ffffff0
+ _DT_VERDEF = 0x6ffffffc
+
+ _VER_FLG_BASE = 0x1 /* Version definition of file itself */
+
+ _SHN_UNDEF = 0 /* Undefined section */
+
+ _SHT_DYNSYM = 11 /* Dynamic linker symbol table */
+
+ _STT_FUNC = 2 /* Symbol is a code object */
+
+ _STB_GLOBAL = 1 /* Global symbol */
+ _STB_WEAK = 2 /* Weak symbol */
+
+ _EI_NIDENT = 16
+
+ // Maximum indices for the array types used when traversing the vDSO ELF structures.
+ // Computed from architecture-specific max provided by vdso_linux_*.go
+ vdsoSymTabSize = vdsoArrayMax / unsafe.Sizeof(elfSym{})
+ vdsoDynSize = vdsoArrayMax / unsafe.Sizeof(elfDyn{})
+ vdsoSymStringsSize = vdsoArrayMax // byte
+ vdsoVerSymSize = vdsoArrayMax / 2 // uint16
+ vdsoHashSize = vdsoArrayMax / 4 // uint32
+
+ // vdsoBloomSizeScale is a scaling factor for gnuhash tables which are uint32 indexed,
+ // but contain uintptrs
+ vdsoBloomSizeScale = unsafe.Sizeof(uintptr(0)) / 4 // uint32
+)
+
+/* How to extract and insert information held in the st_info field. */
+func _ELF_ST_BIND(val byte) byte { return val >> 4 }
+func _ELF_ST_TYPE(val byte) byte { return val & 0xf }
+
+type symbol_key struct {
+ name string
+ sym_hash uint32
+ gnu_hash uint32
+ ptr *uintptr
+}
+
+type version_key struct {
+ version string
+ ver_hash uint32
+}
+
+type vdso_info struct {
+ valid bool
+
+ /* Load information */
+ load_addr uintptr
+ load_offset uintptr /* load_addr - recorded vaddr */
+
+ /* Symbol table */
+ symtab *[vdsoSymTabSize]elfSym
+ symstrings *[vdsoSymStringsSize]byte
+ chain []uint32
+ bucket []uint32
+ symOff uint32
+ isGNUHash bool
+
+ /* Version table */
+ versym *[vdsoVerSymSize]uint16
+ verdef *elfVerdef
+}
+
+var linux26 = version_key{"LINUX_2.6", 0x3ae75f6}
+
+// see vdso_linux_*.go for sym_keys[] and __vdso_* vars
+
+func vdso_init_from_sysinfo_ehdr(info *vdso_info, hdr *elfEhdr) {
+ info.valid = false
+ info.load_addr = uintptr(unsafe.Pointer(hdr))
+
+ pt := unsafe.Pointer(info.load_addr + uintptr(hdr.e_phoff))
+
+ // We need two things from the segment table: the load offset
+ // and the dynamic table.
+ var found_vaddr bool
+ var dyn *[vdsoDynSize]elfDyn
+ for i := uint16(0); i < hdr.e_phnum; i++ {
+ pt := (*elfPhdr)(add(pt, uintptr(i)*unsafe.Sizeof(elfPhdr{})))
+ switch pt.p_type {
+ case _PT_LOAD:
+ if !found_vaddr {
+ found_vaddr = true
+ info.load_offset = info.load_addr + uintptr(pt.p_offset-pt.p_vaddr)
+ }
+
+ case _PT_DYNAMIC:
+ dyn = (*[vdsoDynSize]elfDyn)(unsafe.Pointer(info.load_addr + uintptr(pt.p_offset)))
+ }
+ }
+
+ if !found_vaddr || dyn == nil {
+ return // Failed
+ }
+
+ // Fish out the useful bits of the dynamic table.
+
+ var hash, gnuhash *[vdsoHashSize]uint32
+ info.symstrings = nil
+ info.symtab = nil
+ info.versym = nil
+ info.verdef = nil
+ for i := 0; dyn[i].d_tag != _DT_NULL; i++ {
+ dt := &dyn[i]
+ p := info.load_offset + uintptr(dt.d_val)
+ switch dt.d_tag {
+ case _DT_STRTAB:
+ info.symstrings = (*[vdsoSymStringsSize]byte)(unsafe.Pointer(p))
+ case _DT_SYMTAB:
+ info.symtab = (*[vdsoSymTabSize]elfSym)(unsafe.Pointer(p))
+ case _DT_HASH:
+ hash = (*[vdsoHashSize]uint32)(unsafe.Pointer(p))
+ case _DT_GNU_HASH:
+ gnuhash = (*[vdsoHashSize]uint32)(unsafe.Pointer(p))
+ case _DT_VERSYM:
+ info.versym = (*[vdsoVerSymSize]uint16)(unsafe.Pointer(p))
+ case _DT_VERDEF:
+ info.verdef = (*elfVerdef)(unsafe.Pointer(p))
+ }
+ }
+
+ if info.symstrings == nil || info.symtab == nil || (hash == nil && gnuhash == nil) {
+ return // Failed
+ }
+
+ if info.verdef == nil {
+ info.versym = nil
+ }
+
+ if gnuhash != nil {
+ // Parse the GNU hash table header.
+ nbucket := gnuhash[0]
+ info.symOff = gnuhash[1]
+ bloomSize := gnuhash[2]
+ info.bucket = gnuhash[4+bloomSize*uint32(vdsoBloomSizeScale):][:nbucket]
+ info.chain = gnuhash[4+bloomSize*uint32(vdsoBloomSizeScale)+nbucket:]
+ info.isGNUHash = true
+ } else {
+ // Parse the hash table header.
+ nbucket := hash[0]
+ nchain := hash[1]
+ info.bucket = hash[2 : 2+nbucket]
+ info.chain = hash[2+nbucket : 2+nbucket+nchain]
+ }
+
+ // That's all we need.
+ info.valid = true
+}
+
+func vdso_find_version(info *vdso_info, ver *version_key) int32 {
+ if !info.valid {
+ return 0
+ }
+
+ def := info.verdef
+ for {
+ if def.vd_flags&_VER_FLG_BASE == 0 {
+ aux := (*elfVerdaux)(add(unsafe.Pointer(def), uintptr(def.vd_aux)))
+ if def.vd_hash == ver.ver_hash && ver.version == gostringnocopy(&info.symstrings[aux.vda_name]) {
+ return int32(def.vd_ndx & 0x7fff)
+ }
+ }
+
+ if def.vd_next == 0 {
+ break
+ }
+ def = (*elfVerdef)(add(unsafe.Pointer(def), uintptr(def.vd_next)))
+ }
+
+ return -1 // cannot match any version
+}
+
+func vdso_parse_symbols(info *vdso_info, version int32) {
+ if !info.valid {
+ return
+ }
+
+ apply := func(symIndex uint32, k symbol_key) bool {
+ sym := &info.symtab[symIndex]
+ typ := _ELF_ST_TYPE(sym.st_info)
+ bind := _ELF_ST_BIND(sym.st_info)
+ if typ != _STT_FUNC || bind != _STB_GLOBAL && bind != _STB_WEAK || sym.st_shndx == _SHN_UNDEF {
+ return false
+ }
+ if k.name != gostringnocopy(&info.symstrings[sym.st_name]) {
+ return false
+ }
+
+ // Check symbol version.
+ if info.versym != nil && version != 0 && int32(info.versym[symIndex]&0x7fff) != version {
+ return false
+ }
+
+ *k.ptr = info.load_offset + uintptr(sym.st_value)
+ return true
+ }
+
+ if !info.isGNUHash {
+ // Old-style DT_HASH table.
+ for _, k := range sym_keys {
+ for chain := info.bucket[k.sym_hash%uint32(len(info.bucket))]; chain != 0; chain = info.chain[chain] {
+ if apply(chain, k) {
+ break
+ }
+ }
+ }
+ return
+ }
+
+ // New-style DT_GNU_HASH table.
+ for _, k := range sym_keys {
+ symIndex := info.bucket[k.gnu_hash%uint32(len(info.bucket))]
+ if symIndex < info.symOff {
+ continue
+ }
+ for ; ; symIndex++ {
+ hash := info.chain[symIndex-info.symOff]
+ if hash|1 == k.gnu_hash|1 {
+ // Found a hash match.
+ if apply(symIndex, k) {
+ break
+ }
+ }
+ if hash&1 != 0 {
+ // End of chain.
+ break
+ }
+ }
+ }
+}
+
+func archauxv(tag, val uintptr) {
+ switch tag {
+ case _AT_SYSINFO_EHDR:
+ if val == 0 {
+ // Something went wrong
+ return
+ }
+ var info vdso_info
+ // TODO(rsc): I don't understand why the compiler thinks info escapes
+ // when passed to the three functions below.
+ info1 := (*vdso_info)(noescape(unsafe.Pointer(&info)))
+ vdso_init_from_sysinfo_ehdr(info1, (*elfEhdr)(unsafe.Pointer(val)))
+ vdso_parse_symbols(info1, vdso_find_version(info1, &linux26))
+ }
+}
diff --git a/src/runtime/vdso_linux_386.go b/src/runtime/vdso_linux_386.go
new file mode 100644
index 0000000..74ad953
--- /dev/null
+++ b/src/runtime/vdso_linux_386.go
@@ -0,0 +1,93 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+// ELF32 structure definitions for use by the Linux vDSO loader
+
+type elfSym struct {
+ st_name uint32
+ st_value uint32
+ st_size uint32
+ st_info byte
+ st_other byte
+ st_shndx uint16
+}
+
+type elfVerdef struct {
+ vd_version uint16 /* Version revision */
+ vd_flags uint16 /* Version information */
+ vd_ndx uint16 /* Version Index */
+ vd_cnt uint16 /* Number of associated aux entries */
+ vd_hash uint32 /* Version name hash value */
+ vd_aux uint32 /* Offset in bytes to verdaux array */
+ vd_next uint32 /* Offset in bytes to next verdef entry */
+}
+
+type elfEhdr struct {
+ e_ident [_EI_NIDENT]byte /* Magic number and other info */
+ e_type uint16 /* Object file type */
+ e_machine uint16 /* Architecture */
+ e_version uint32 /* Object file version */
+ e_entry uint32 /* Entry point virtual address */
+ e_phoff uint32 /* Program header table file offset */
+ e_shoff uint32 /* Section header table file offset */
+ e_flags uint32 /* Processor-specific flags */
+ e_ehsize uint16 /* ELF header size in bytes */
+ e_phentsize uint16 /* Program header table entry size */
+ e_phnum uint16 /* Program header table entry count */
+ e_shentsize uint16 /* Section header table entry size */
+ e_shnum uint16 /* Section header table entry count */
+ e_shstrndx uint16 /* Section header string table index */
+}
+
+type elfPhdr struct {
+ p_type uint32 /* Segment type */
+ p_offset uint32 /* Segment file offset */
+ p_vaddr uint32 /* Segment virtual address */
+ p_paddr uint32 /* Segment physical address */
+ p_filesz uint32 /* Segment size in file */
+ p_memsz uint32 /* Segment size in memory */
+ p_flags uint32 /* Segment flags */
+ p_align uint32 /* Segment alignment */
+}
+
+type elfShdr struct {
+ sh_name uint32 /* Section name (string tbl index) */
+ sh_type uint32 /* Section type */
+ sh_flags uint32 /* Section flags */
+ sh_addr uint32 /* Section virtual addr at execution */
+ sh_offset uint32 /* Section file offset */
+ sh_size uint32 /* Section size in bytes */
+ sh_link uint32 /* Link to another section */
+ sh_info uint32 /* Additional section information */
+ sh_addralign uint32 /* Section alignment */
+ sh_entsize uint32 /* Entry size if section holds table */
+}
+
+type elfDyn struct {
+ d_tag int32 /* Dynamic entry type */
+ d_val uint32 /* Integer value */
+}
+
+type elfVerdaux struct {
+ vda_name uint32 /* Version or dependency names */
+ vda_next uint32 /* Offset in bytes to next verdaux entry */
+}
+
+const (
+ // vdsoArrayMax is the byte-size of a maximally sized array on this architecture.
+ // See cmd/compile/internal/x86/galign.go arch.MAXWIDTH initialization, but must also
+ // be constrained to max +ve int.
+ vdsoArrayMax = 1<<31 - 1
+)
+
+var sym_keys = []symbol_key{
+ {"__vdso_clock_gettime", 0xd35ec75, 0x6e43a318, &__vdso_clock_gettime_sym},
+}
+
+// initialize to fall back to syscall
+var (
+ __vdso_clock_gettime_sym uintptr = 0
+)
diff --git a/src/runtime/vdso_linux_amd64.go b/src/runtime/vdso_linux_amd64.go
index 8a970df..0bbe5c2 100644
--- a/src/runtime/vdso_linux_amd64.go
+++ b/src/runtime/vdso_linux_amd64.go
@@ -4,51 +4,9 @@
package runtime
-import "unsafe"
+// ELF64 structure definitions for use by the Linux vDSO loader
-// Look up symbols in the Linux vDSO.
-
-// This code was originally based on the sample Linux vDSO parser at
-// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/vDSO/parse_vdso.c
-
-// This implements the ELF dynamic linking spec at
-// http://sco.com/developers/gabi/latest/ch5.dynamic.html
-
-// The version section is documented at
-// http://refspecs.linuxfoundation.org/LSB_3.2.0/LSB-Core-generic/LSB-Core-generic/symversion.html
-
-const (
- _AT_SYSINFO_EHDR = 33
-
- _PT_LOAD = 1 /* Loadable program segment */
- _PT_DYNAMIC = 2 /* Dynamic linking information */
-
- _DT_NULL = 0 /* Marks end of dynamic section */
- _DT_HASH = 4 /* Dynamic symbol hash table */
- _DT_STRTAB = 5 /* Address of string table */
- _DT_SYMTAB = 6 /* Address of symbol table */
- _DT_VERSYM = 0x6ffffff0
- _DT_VERDEF = 0x6ffffffc
-
- _VER_FLG_BASE = 0x1 /* Version definition of file itself */
-
- _SHN_UNDEF = 0 /* Undefined section */
-
- _SHT_DYNSYM = 11 /* Dynamic linker symbol table */
-
- _STT_FUNC = 2 /* Symbol is a code object */
-
- _STB_GLOBAL = 1 /* Global symbol */
- _STB_WEAK = 2 /* Weak symbol */
-
- _EI_NIDENT = 16
-)
-
-/* How to extract and insert information held in the st_info field. */
-func _ELF64_ST_BIND(val byte) byte { return val >> 4 }
-func _ELF64_ST_TYPE(val byte) byte { return val & 0xf }
-
-type elf64Sym struct {
+type elfSym struct {
st_name uint32
st_info byte
st_other byte
@@ -57,7 +15,7 @@
st_size uint64
}
-type elf64Verdef struct {
+type elfVerdef struct {
vd_version uint16 /* Version revision */
vd_flags uint16 /* Version information */
vd_ndx uint16 /* Version Index */
@@ -67,7 +25,7 @@
vd_next uint32 /* Offset in bytes to next verdef entry */
}
-type elf64Ehdr struct {
+type elfEhdr struct {
e_ident [_EI_NIDENT]byte /* Magic number and other info */
e_type uint16 /* Object file type */
e_machine uint16 /* Architecture */
@@ -84,7 +42,7 @@
e_shstrndx uint16 /* Section header string table index */
}
-type elf64Phdr struct {
+type elfPhdr struct {
p_type uint32 /* Segment type */
p_flags uint32 /* Segment flags */
p_offset uint64 /* Segment file offset */
@@ -95,7 +53,7 @@
p_align uint64 /* Segment alignment */
}
-type elf64Shdr struct {
+type elfShdr struct {
sh_name uint32 /* Section name (string tbl index) */
sh_type uint32 /* Section type */
sh_flags uint64 /* Section flags */
@@ -108,56 +66,26 @@
sh_entsize uint64 /* Entry size if section holds table */
}
-type elf64Dyn struct {
+type elfDyn struct {
d_tag int64 /* Dynamic entry type */
d_val uint64 /* Integer value */
}
-type elf64Verdaux struct {
+type elfVerdaux struct {
vda_name uint32 /* Version or dependency names */
vda_next uint32 /* Offset in bytes to next verdaux entry */
}
-type elf64Auxv struct {
- a_type uint64 /* Entry type */
- a_val uint64 /* Integer value */
-}
-
-type symbol_key struct {
- name string
- sym_hash uint32
- ptr *uintptr
-}
-
-type version_key struct {
- version string
- ver_hash uint32
-}
-
-type vdso_info struct {
- valid bool
-
- /* Load information */
- load_addr uintptr
- load_offset uintptr /* load_addr - recorded vaddr */
-
- /* Symbol table */
- symtab *[1 << 32]elf64Sym
- symstrings *[1 << 32]byte
- chain []uint32
- bucket []uint32
-
- /* Version table */
- versym *[1 << 32]uint16
- verdef *elf64Verdef
-}
-
-var linux26 = version_key{"LINUX_2.6", 0x3ae75f6}
+const (
+ // vdsoArrayMax is the byte-size of a maximally sized array on this architecture.
+ // See cmd/compile/internal/amd64/galign.go arch.MAXWIDTH initialization.
+ vdsoArrayMax = 1<<50 - 1
+)
var sym_keys = []symbol_key{
- {"__vdso_time", 0xa33c485, &__vdso_time_sym},
- {"__vdso_gettimeofday", 0x315ca59, &__vdso_gettimeofday_sym},
- {"__vdso_clock_gettime", 0xd35ec75, &__vdso_clock_gettime_sym},
+ {"__vdso_time", 0xa33c485, 0x821e8e0d, &__vdso_time_sym},
+ {"__vdso_gettimeofday", 0x315ca59, 0xb01bca00, &__vdso_gettimeofday_sym},
+ {"__vdso_clock_gettime", 0xd35ec75, 0x6e43a318, &__vdso_clock_gettime_sym},
}
// initialize with vsyscall fallbacks
@@ -166,141 +94,3 @@
__vdso_gettimeofday_sym uintptr = 0xffffffffff600000
__vdso_clock_gettime_sym uintptr = 0
)
-
-func vdso_init_from_sysinfo_ehdr(info *vdso_info, hdr *elf64Ehdr) {
- info.valid = false
- info.load_addr = uintptr(unsafe.Pointer(hdr))
-
- pt := unsafe.Pointer(info.load_addr + uintptr(hdr.e_phoff))
-
- // We need two things from the segment table: the load offset
- // and the dynamic table.
- var found_vaddr bool
- var dyn *[1 << 20]elf64Dyn
- for i := uint16(0); i < hdr.e_phnum; i++ {
- pt := (*elf64Phdr)(add(pt, uintptr(i)*unsafe.Sizeof(elf64Phdr{})))
- switch pt.p_type {
- case _PT_LOAD:
- if !found_vaddr {
- found_vaddr = true
- info.load_offset = info.load_addr + uintptr(pt.p_offset-pt.p_vaddr)
- }
-
- case _PT_DYNAMIC:
- dyn = (*[1 << 20]elf64Dyn)(unsafe.Pointer(info.load_addr + uintptr(pt.p_offset)))
- }
- }
-
- if !found_vaddr || dyn == nil {
- return // Failed
- }
-
- // Fish out the useful bits of the dynamic table.
-
- var hash *[1 << 30]uint32
- hash = nil
- info.symstrings = nil
- info.symtab = nil
- info.versym = nil
- info.verdef = nil
- for i := 0; dyn[i].d_tag != _DT_NULL; i++ {
- dt := &dyn[i]
- p := info.load_offset + uintptr(dt.d_val)
- switch dt.d_tag {
- case _DT_STRTAB:
- info.symstrings = (*[1 << 32]byte)(unsafe.Pointer(p))
- case _DT_SYMTAB:
- info.symtab = (*[1 << 32]elf64Sym)(unsafe.Pointer(p))
- case _DT_HASH:
- hash = (*[1 << 30]uint32)(unsafe.Pointer(p))
- case _DT_VERSYM:
- info.versym = (*[1 << 32]uint16)(unsafe.Pointer(p))
- case _DT_VERDEF:
- info.verdef = (*elf64Verdef)(unsafe.Pointer(p))
- }
- }
-
- if info.symstrings == nil || info.symtab == nil || hash == nil {
- return // Failed
- }
-
- if info.verdef == nil {
- info.versym = nil
- }
-
- // Parse the hash table header.
- nbucket := hash[0]
- nchain := hash[1]
- info.bucket = hash[2 : 2+nbucket]
- info.chain = hash[2+nbucket : 2+nbucket+nchain]
-
- // That's all we need.
- info.valid = true
-}
-
-func vdso_find_version(info *vdso_info, ver *version_key) int32 {
- if !info.valid {
- return 0
- }
-
- def := info.verdef
- for {
- if def.vd_flags&_VER_FLG_BASE == 0 {
- aux := (*elf64Verdaux)(add(unsafe.Pointer(def), uintptr(def.vd_aux)))
- if def.vd_hash == ver.ver_hash && ver.version == gostringnocopy(&info.symstrings[aux.vda_name]) {
- return int32(def.vd_ndx & 0x7fff)
- }
- }
-
- if def.vd_next == 0 {
- break
- }
- def = (*elf64Verdef)(add(unsafe.Pointer(def), uintptr(def.vd_next)))
- }
-
- return -1 // cannot match any version
-}
-
-func vdso_parse_symbols(info *vdso_info, version int32) {
- if !info.valid {
- return
- }
-
- for _, k := range sym_keys {
- for chain := info.bucket[k.sym_hash%uint32(len(info.bucket))]; chain != 0; chain = info.chain[chain] {
- sym := &info.symtab[chain]
- typ := _ELF64_ST_TYPE(sym.st_info)
- bind := _ELF64_ST_BIND(sym.st_info)
- if typ != _STT_FUNC || bind != _STB_GLOBAL && bind != _STB_WEAK || sym.st_shndx == _SHN_UNDEF {
- continue
- }
- if k.name != gostringnocopy(&info.symstrings[sym.st_name]) {
- continue
- }
-
- // Check symbol version.
- if info.versym != nil && version != 0 && int32(info.versym[chain]&0x7fff) != version {
- continue
- }
-
- *k.ptr = info.load_offset + uintptr(sym.st_value)
- break
- }
- }
-}
-
-func archauxv(tag, val uintptr) {
- switch tag {
- case _AT_SYSINFO_EHDR:
- if val == 0 {
- // Something went wrong
- return
- }
- var info vdso_info
- // TODO(rsc): I don't understand why the compiler thinks info escapes
- // when passed to the three functions below.
- info1 := (*vdso_info)(noescape(unsafe.Pointer(&info)))
- vdso_init_from_sysinfo_ehdr(info1, (*elf64Ehdr)(unsafe.Pointer(val)))
- vdso_parse_symbols(info1, vdso_find_version(info1, &linux26))
- }
-}
diff --git a/src/runtime/vdso_linux_test.go b/src/runtime/vdso_linux_test.go
new file mode 100644
index 0000000..f507ee9
--- /dev/null
+++ b/src/runtime/vdso_linux_test.go
@@ -0,0 +1,63 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux
+// +build 386 amd64
+
+package runtime_test
+
+import (
+ "testing"
+ "time"
+ _ "unsafe"
+)
+
+// These tests are a little risky because they overwrite the __vdso_clock_gettime_sym value.
+// It's normally initialized at startup and remains unchanged after that.
+
+//go:linkname __vdso_clock_gettime_sym runtime.__vdso_clock_gettime_sym
+var __vdso_clock_gettime_sym uintptr
+
+func TestClockVDSOAndFallbackPaths(t *testing.T) {
+ // Check that we can call walltime() and nanotime() with and without their (1st) fast-paths.
+ // This just checks that fast and fallback paths can be called, rather than testing their
+ // results.
+ //
+ // Call them indirectly via time.Now(), so we don't need auxiliary .s files to allow us to
+ // use go:linkname to refer to the functions directly.
+
+ save := __vdso_clock_gettime_sym
+ if save == 0 {
+ t.Log("__vdso_clock_gettime symbol not found; fallback path will be used by default")
+ }
+
+ // Call with fast-path enabled (if vDSO symbol found at startup)
+ time.Now()
+
+ // Call with fast-path disabled
+ __vdso_clock_gettime_sym = 0
+ time.Now()
+ __vdso_clock_gettime_sym = save
+}
+
+func BenchmarkClockVDSOAndFallbackPaths(b *testing.B) {
+ run := func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ // Call via time.Now() - see comment in test above.
+ time.Now()
+ }
+ }
+
+ save := __vdso_clock_gettime_sym
+ b.Run("vDSO", run)
+ __vdso_clock_gettime_sym = 0
+ b.Run("Fallback", run)
+ __vdso_clock_gettime_sym = save
+}
+
+func BenchmarkTimeNow(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ time.Now()
+ }
+}