Update linux go to 1.15beta1
From https://ci.android.com/builds/submitted/6626886/linux/latest/go.zip
Test: m blueprint_tools
Change-Id: Ib0d1176e769611b25554177aef209bc7e6456694
diff --git a/src/runtime/alg.go b/src/runtime/alg.go
index 732d32b..0af48ab 100644
--- a/src/runtime/alg.go
+++ b/src/runtime/alg.go
@@ -34,17 +34,6 @@
alg_max
)
-// typeAlg is also copied/used in reflect/type.go.
-// keep them in sync.
-type typeAlg struct {
- // function for hashing objects of this type
- // (ptr to object, seed) -> hash
- hash func(unsafe.Pointer, uintptr) uintptr
- // function for comparing objects of this type
- // (ptr to object A, ptr to object B) -> ==?
- equal func(unsafe.Pointer, unsafe.Pointer) bool
-}
-
func memhash0(p unsafe.Pointer, h uintptr) uintptr {
return h
}
@@ -68,34 +57,20 @@
return memhash(p, h, size)
}
-var algarray = [alg_max]typeAlg{
- alg_NOEQ: {nil, nil},
- alg_MEM0: {memhash0, memequal0},
- alg_MEM8: {memhash8, memequal8},
- alg_MEM16: {memhash16, memequal16},
- alg_MEM32: {memhash32, memequal32},
- alg_MEM64: {memhash64, memequal64},
- alg_MEM128: {memhash128, memequal128},
- alg_STRING: {strhash, strequal},
- alg_INTER: {interhash, interequal},
- alg_NILINTER: {nilinterhash, nilinterequal},
- alg_FLOAT32: {f32hash, f32equal},
- alg_FLOAT64: {f64hash, f64equal},
- alg_CPLX64: {c64hash, c64equal},
- alg_CPLX128: {c128hash, c128equal},
-}
-
+// runtime variable to check if the processor we're running on
+// actually supports the instructions used by the AES-based
+// hash implementation.
var useAeshash bool
// in asm_*.s
-func aeshash(p unsafe.Pointer, h, s uintptr) uintptr
-func aeshash32(p unsafe.Pointer, h uintptr) uintptr
-func aeshash64(p unsafe.Pointer, h uintptr) uintptr
-func aeshashstr(p unsafe.Pointer, h uintptr) uintptr
+func memhash(p unsafe.Pointer, h, s uintptr) uintptr
+func memhash32(p unsafe.Pointer, h uintptr) uintptr
+func memhash64(p unsafe.Pointer, h uintptr) uintptr
+func strhash(p unsafe.Pointer, h uintptr) uintptr
-func strhash(a unsafe.Pointer, h uintptr) uintptr {
+func strhashFallback(a unsafe.Pointer, h uintptr) uintptr {
x := (*stringStruct)(a)
- return memhash(x.str, h, uintptr(x.len))
+ return memhashFallback(x.str, h, uintptr(x.len))
}
// NOTE: Because NaN != NaN, a map can contain any
@@ -144,14 +119,17 @@
return h
}
t := tab._type
- fn := t.alg.hash
- if fn == nil {
+ if t.equal == nil {
+ // Check hashability here. We could do this check inside
+ // typehash, but we want to report the topmost type in
+ // the error text (e.g. in a struct with a field of slice type
+ // we want to report the struct, not the slice).
panic(errorString("hash of unhashable type " + t.string()))
}
if isDirectIface(t) {
- return c1 * fn(unsafe.Pointer(&a.data), h^c0)
+ return c1 * typehash(t, unsafe.Pointer(&a.data), h^c0)
} else {
- return c1 * fn(a.data, h^c0)
+ return c1 * typehash(t, a.data, h^c0)
}
}
@@ -161,17 +139,100 @@
if t == nil {
return h
}
- fn := t.alg.hash
- if fn == nil {
+ if t.equal == nil {
+ // See comment in interhash above.
panic(errorString("hash of unhashable type " + t.string()))
}
if isDirectIface(t) {
- return c1 * fn(unsafe.Pointer(&a.data), h^c0)
+ return c1 * typehash(t, unsafe.Pointer(&a.data), h^c0)
} else {
- return c1 * fn(a.data, h^c0)
+ return c1 * typehash(t, a.data, h^c0)
}
}
+// typehash computes the hash of the object of type t at address p.
+// h is the seed.
+// This function is seldom used. Most maps use for hashing either
+// fixed functions (e.g. f32hash) or compiler-generated functions
+// (e.g. for a type like struct { x, y string }). This implementation
+// is slower but more general and is used for hashing interface types
+// (called from interhash or nilinterhash, above) or for hashing in
+// maps generated by reflect.MapOf (reflect_typehash, below).
+// Note: this function must match the compiler generated
+// functions exactly. See issue 37716.
+func typehash(t *_type, p unsafe.Pointer, h uintptr) uintptr {
+ if t.tflag&tflagRegularMemory != 0 {
+ // Handle ptr sizes specially, see issue 37086.
+ switch t.size {
+ case 4:
+ return memhash32(p, h)
+ case 8:
+ return memhash64(p, h)
+ default:
+ return memhash(p, h, t.size)
+ }
+ }
+ switch t.kind & kindMask {
+ case kindFloat32:
+ return f32hash(p, h)
+ case kindFloat64:
+ return f64hash(p, h)
+ case kindComplex64:
+ return c64hash(p, h)
+ case kindComplex128:
+ return c128hash(p, h)
+ case kindString:
+ return strhash(p, h)
+ case kindInterface:
+ i := (*interfacetype)(unsafe.Pointer(t))
+ if len(i.mhdr) == 0 {
+ return nilinterhash(p, h)
+ }
+ return interhash(p, h)
+ case kindArray:
+ a := (*arraytype)(unsafe.Pointer(t))
+ for i := uintptr(0); i < a.len; i++ {
+ h = typehash(a.elem, add(p, i*a.elem.size), h)
+ }
+ return h
+ case kindStruct:
+ s := (*structtype)(unsafe.Pointer(t))
+ memStart := uintptr(0)
+ memEnd := uintptr(0)
+ for _, f := range s.fields {
+ if memEnd > memStart && (f.name.isBlank() || f.offset() != memEnd || f.typ.tflag&tflagRegularMemory == 0) {
+ // flush any pending regular memory hashing
+ h = memhash(add(p, memStart), h, memEnd-memStart)
+ memStart = memEnd
+ }
+ if f.name.isBlank() {
+ continue
+ }
+ if f.typ.tflag&tflagRegularMemory == 0 {
+ h = typehash(f.typ, add(p, f.offset()), h)
+ continue
+ }
+ if memStart == memEnd {
+ memStart = f.offset()
+ }
+ memEnd = f.offset() + f.typ.size
+ }
+ if memEnd > memStart {
+ h = memhash(add(p, memStart), h, memEnd-memStart)
+ }
+ return h
+ default:
+ // Should never happen, as typehash should only be called
+ // with comparable types.
+ panic(errorString("hash of unhashable type " + t.string()))
+ }
+}
+
+//go:linkname reflect_typehash reflect.typehash
+func reflect_typehash(t *_type, p unsafe.Pointer, h uintptr) uintptr {
+ return typehash(t, p, h)
+}
+
func memequal0(p, q unsafe.Pointer) bool {
return true
}
@@ -219,7 +280,7 @@
if t == nil {
return true
}
- eq := t.alg.equal
+ eq := t.equal
if eq == nil {
panic(errorString("comparing uncomparable type " + t.string()))
}
@@ -236,7 +297,7 @@
return true
}
t := tab._type
- eq := t.alg.equal
+ eq := t.equal
if eq == nil {
panic(errorString("comparing uncomparable type " + t.string()))
}
@@ -249,7 +310,7 @@
// Testing adapters for hash quality tests (see hash_test.go)
func stringHash(s string, seed uintptr) uintptr {
- return algarray[alg_STRING].hash(noescape(unsafe.Pointer(&s)), seed)
+ return strhash(noescape(unsafe.Pointer(&s)), seed)
}
func bytesHash(b []byte, seed uintptr) uintptr {
@@ -258,21 +319,21 @@
}
func int32Hash(i uint32, seed uintptr) uintptr {
- return algarray[alg_MEM32].hash(noescape(unsafe.Pointer(&i)), seed)
+ return memhash32(noescape(unsafe.Pointer(&i)), seed)
}
func int64Hash(i uint64, seed uintptr) uintptr {
- return algarray[alg_MEM64].hash(noescape(unsafe.Pointer(&i)), seed)
+ return memhash64(noescape(unsafe.Pointer(&i)), seed)
}
func efaceHash(i interface{}, seed uintptr) uintptr {
- return algarray[alg_NILINTER].hash(noescape(unsafe.Pointer(&i)), seed)
+ return nilinterhash(noescape(unsafe.Pointer(&i)), seed)
}
func ifaceHash(i interface {
F()
}, seed uintptr) uintptr {
- return algarray[alg_INTER].hash(noescape(unsafe.Pointer(&i)), seed)
+ return interhash(noescape(unsafe.Pointer(&i)), seed)
}
const hashRandomBytes = sys.PtrSize / 4 * 64
@@ -286,7 +347,6 @@
func alginit() {
// Install AES hash algorithms if the instructions needed are present.
if (GOARCH == "386" || GOARCH == "amd64") &&
- GOOS != "nacl" &&
cpu.X86.HasAES && // AESENC
cpu.X86.HasSSSE3 && // PSHUFB
cpu.X86.HasSSE41 { // PINSR{D,Q}
@@ -305,19 +365,12 @@
}
func initAlgAES() {
- if GOOS == "aix" {
- // runtime.algarray is immutable on AIX: see cmd/link/internal/ld/xcoff.go
- return
- }
useAeshash = true
- algarray[alg_MEM32].hash = aeshash32
- algarray[alg_MEM64].hash = aeshash64
- algarray[alg_STRING].hash = aeshashstr
// Initialize with random data so hash collisions will be hard to engineer.
getRandomData(aeskeysched[:])
}
-// Note: These routines perform the read with an native endianness.
+// Note: These routines perform the read with a native endianness.
func readUnaligned32(p unsafe.Pointer) uint32 {
q := (*[4]byte)(p)
if sys.BigEndian {
diff --git a/src/runtime/asm.s b/src/runtime/asm.s
index 6b209b2..95a3424 100644
--- a/src/runtime/asm.s
+++ b/src/runtime/asm.s
@@ -12,9 +12,6 @@
DATA runtime·no_pointers_stackmap+0x04(SB)/4, $0
GLOBL runtime·no_pointers_stackmap(SB),RODATA, $8
-GLOBL runtime·mheap_(SB), NOPTR, $0
-GLOBL runtime·memstats(SB), NOPTR, $0
-
// NaCl requires that these skips be verifiable machine code.
#ifdef GOARCH_amd64
#define SKIP4 BYTE $0x90; BYTE $0x90; BYTE $0x90; BYTE $0x90
@@ -22,9 +19,6 @@
#ifdef GOARCH_386
#define SKIP4 BYTE $0x90; BYTE $0x90; BYTE $0x90; BYTE $0x90
#endif
-#ifdef GOARCH_amd64p32
-#define SKIP4 BYTE $0x90; BYTE $0x90; BYTE $0x90; BYTE $0x90
-#endif
#ifdef GOARCH_wasm
#define SKIP4 UNDEF; UNDEF; UNDEF; UNDEF
#endif
diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s
index a01841d..11863fb 100644
--- a/src/runtime/asm_386.s
+++ b/src/runtime/asm_386.s
@@ -109,9 +109,6 @@
MOVL SP, (g_stack+stack_hi)(BP)
// find out information about the processor we're on
-#ifdef GOOS_nacl // NaCl doesn't like PUSHFL/POPFL
- JMP has_cpuid
-#else
// first see if CPUID instruction is supported.
PUSHFL
PUSHFL
@@ -123,7 +120,6 @@
POPFL // restore EFLAGS
TESTL $(1<<21), AX
JNE has_cpuid
-#endif
bad_proc: // show that the program requires MMX.
MOVL $2, 0(SP)
@@ -203,10 +199,6 @@
// skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
JMP ok
#endif
-#ifdef GOOS_darwin
- // skip runtime·ldt0setup(SB) on Darwin
- JMP ok
-#endif
// set up %gs
CALL ldt0setup<>(SB)
@@ -911,18 +903,26 @@
RET
// hash function using AES hardware instructions
-TEXT runtime·aeshash(SB),NOSPLIT,$0-16
+TEXT runtime·memhash(SB),NOSPLIT,$0-16
+ CMPB runtime·useAeshash(SB), $0
+ JEQ noaes
MOVL p+0(FP), AX // ptr to data
MOVL s+8(FP), BX // size
LEAL ret+12(FP), DX
JMP aeshashbody<>(SB)
+noaes:
+ JMP runtime·memhashFallback(SB)
-TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12
+TEXT runtime·strhash(SB),NOSPLIT,$0-12
+ CMPB runtime·useAeshash(SB), $0
+ JEQ noaes
MOVL p+0(FP), AX // ptr to string object
MOVL 4(AX), BX // length of string
MOVL (AX), AX // string data
LEAL ret+8(FP), DX
JMP aeshashbody<>(SB)
+noaes:
+ JMP runtime·strhashFallback(SB)
// AX: data
// BX: length
@@ -1108,7 +1108,9 @@
MOVL X4, (DX)
RET
-TEXT runtime·aeshash32(SB),NOSPLIT,$0-12
+TEXT runtime·memhash32(SB),NOSPLIT,$0-12
+ CMPB runtime·useAeshash(SB), $0
+ JEQ noaes
MOVL p+0(FP), AX // ptr to data
MOVL h+4(FP), X0 // seed
PINSRD $1, (AX), X0 // data
@@ -1117,8 +1119,12 @@
AESENC runtime·aeskeysched+32(SB), X0
MOVL X0, ret+8(FP)
RET
+noaes:
+ JMP runtime·memhash32Fallback(SB)
-TEXT runtime·aeshash64(SB),NOSPLIT,$0-12
+TEXT runtime·memhash64(SB),NOSPLIT,$0-12
+ CMPB runtime·useAeshash(SB), $0
+ JEQ noaes
MOVL p+0(FP), AX // ptr to data
MOVQ (AX), X0 // data
PINSRD $2, h+4(FP), X0 // seed
@@ -1127,6 +1133,8 @@
AESENC runtime·aeskeysched+32(SB), X0
MOVL X0, ret+8(FP)
RET
+noaes:
+ JMP runtime·memhash64Fallback(SB)
// simple mask to get rid of data in the high part of the register.
DATA masks<>+0x00(SB)/4, $0x00000000
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s
index fd3a9c3..fa25c55 100644
--- a/src/runtime/asm_amd64.s
+++ b/src/runtime/asm_amd64.s
@@ -510,7 +510,8 @@
/* call function */ \
MOVQ f+8(FP), DX; \
PCDATA $PCDATA_StackMapIndex, $0; \
- CALL (DX); \
+ MOVQ (DX), AX; \
+ CALL AX; \
/* copy return values back */ \
MOVQ argtype+0(FP), DX; \
MOVQ argptr+16(FP), DI; \
@@ -885,21 +886,29 @@
MOVQ AX, ret+0(FP)
RET
-// func aeshash(p unsafe.Pointer, h, s uintptr) uintptr
+// func memhash(p unsafe.Pointer, h, s uintptr) uintptr
// hash function using AES hardware instructions
-TEXT runtime·aeshash(SB),NOSPLIT,$0-32
+TEXT runtime·memhash(SB),NOSPLIT,$0-32
+ CMPB runtime·useAeshash(SB), $0
+ JEQ noaes
MOVQ p+0(FP), AX // ptr to data
MOVQ s+16(FP), CX // size
LEAQ ret+24(FP), DX
JMP aeshashbody<>(SB)
+noaes:
+ JMP runtime·memhashFallback(SB)
-// func aeshashstr(p unsafe.Pointer, h uintptr) uintptr
-TEXT runtime·aeshashstr(SB),NOSPLIT,$0-24
+// func strhash(p unsafe.Pointer, h uintptr) uintptr
+TEXT runtime·strhash(SB),NOSPLIT,$0-24
+ CMPB runtime·useAeshash(SB), $0
+ JEQ noaes
MOVQ p+0(FP), AX // ptr to string struct
MOVQ 8(AX), CX // length of string
MOVQ (AX), AX // string data
LEAQ ret+16(FP), DX
JMP aeshashbody<>(SB)
+noaes:
+ JMP runtime·strhashFallback(SB)
// AX: data
// CX: length
@@ -1232,8 +1241,10 @@
MOVQ X8, (DX)
RET
-// func aeshash32(p unsafe.Pointer, h uintptr) uintptr
-TEXT runtime·aeshash32(SB),NOSPLIT,$0-24
+// func memhash32(p unsafe.Pointer, h uintptr) uintptr
+TEXT runtime·memhash32(SB),NOSPLIT,$0-24
+ CMPB runtime·useAeshash(SB), $0
+ JEQ noaes
MOVQ p+0(FP), AX // ptr to data
MOVQ h+8(FP), X0 // seed
PINSRD $2, (AX), X0 // data
@@ -1242,9 +1253,13 @@
AESENC runtime·aeskeysched+32(SB), X0
MOVQ X0, ret+16(FP)
RET
+noaes:
+ JMP runtime·memhash32Fallback(SB)
-// func aeshash64(p unsafe.Pointer, h uintptr) uintptr
-TEXT runtime·aeshash64(SB),NOSPLIT,$0-24
+// func memhash64(p unsafe.Pointer, h uintptr) uintptr
+TEXT runtime·memhash64(SB),NOSPLIT,$0-24
+ CMPB runtime·useAeshash(SB), $0
+ JEQ noaes
MOVQ p+0(FP), AX // ptr to data
MOVQ h+8(FP), X0 // seed
PINSRQ $1, (AX), X0 // data
@@ -1253,6 +1268,8 @@
AESENC runtime·aeskeysched+32(SB), X0
MOVQ X0, ret+16(FP)
RET
+noaes:
+ JMP runtime·memhash64Fallback(SB)
// simple mask to get rid of data in the high part of the register.
DATA masks<>+0x00(SB)/8, $0x0000000000000000
@@ -1458,6 +1475,55 @@
MOVQ 96(SP), R15
JMP ret
+// gcWriteBarrierCX is gcWriteBarrier, but with args in DI and CX.
+TEXT runtime·gcWriteBarrierCX(SB),NOSPLIT,$0
+ XCHGQ CX, AX
+ CALL runtime·gcWriteBarrier(SB)
+ XCHGQ CX, AX
+ RET
+
+// gcWriteBarrierDX is gcWriteBarrier, but with args in DI and DX.
+TEXT runtime·gcWriteBarrierDX(SB),NOSPLIT,$0
+ XCHGQ DX, AX
+ CALL runtime·gcWriteBarrier(SB)
+ XCHGQ DX, AX
+ RET
+
+// gcWriteBarrierBX is gcWriteBarrier, but with args in DI and BX.
+TEXT runtime·gcWriteBarrierBX(SB),NOSPLIT,$0
+ XCHGQ BX, AX
+ CALL runtime·gcWriteBarrier(SB)
+ XCHGQ BX, AX
+ RET
+
+// gcWriteBarrierBP is gcWriteBarrier, but with args in DI and BP.
+TEXT runtime·gcWriteBarrierBP(SB),NOSPLIT,$0
+ XCHGQ BP, AX
+ CALL runtime·gcWriteBarrier(SB)
+ XCHGQ BP, AX
+ RET
+
+// gcWriteBarrierSI is gcWriteBarrier, but with args in DI and SI.
+TEXT runtime·gcWriteBarrierSI(SB),NOSPLIT,$0
+ XCHGQ SI, AX
+ CALL runtime·gcWriteBarrier(SB)
+ XCHGQ SI, AX
+ RET
+
+// gcWriteBarrierR8 is gcWriteBarrier, but with args in DI and R8.
+TEXT runtime·gcWriteBarrierR8(SB),NOSPLIT,$0
+ XCHGQ R8, AX
+ CALL runtime·gcWriteBarrier(SB)
+ XCHGQ R8, AX
+ RET
+
+// gcWriteBarrierR9 is gcWriteBarrier, but with args in DI and R9.
+TEXT runtime·gcWriteBarrierR9(SB),NOSPLIT,$0
+ XCHGQ R9, AX
+ CALL runtime·gcWriteBarrier(SB)
+ XCHGQ R9, AX
+ RET
+
DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
@@ -1494,10 +1560,8 @@
// a stack pointer to an escaping argument. debugCallV1 cannot check
// this invariant.
TEXT runtime·debugCallV1(SB),NOSPLIT,$152-0
- // Save all registers that may contain pointers in GC register
- // map order (see ssa.registersAMD64). This makes it possible
- // to copy the stack while updating pointers currently held in
- // registers, and for the GC to find roots in registers.
+ // Save all registers that may contain pointers so they can be
+ // conservatively scanned.
//
// We can't do anything that might clobber any of these
// registers before this.
@@ -1727,3 +1791,34 @@
DATA runtime·tls_g+0(SB)/8, $16
GLOBL runtime·tls_g+0(SB), NOPTR, $8
#endif
+
+// The compiler and assembler's -spectre=ret mode rewrites
+// all indirect CALL AX / JMP AX instructions to be
+// CALL retpolineAX / JMP retpolineAX.
+// See https://support.google.com/faqs/answer/7625886.
+#define RETPOLINE(reg) \
+ /* CALL setup */ BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0; \
+ /* nospec: */ \
+ /* PAUSE */ BYTE $0xF3; BYTE $0x90; \
+ /* JMP nospec */ BYTE $0xEB; BYTE $-(2+2); \
+ /* setup: */ \
+ /* MOVQ AX, 0(SP) */ BYTE $0x48|((reg&8)>>1); BYTE $0x89; \
+ BYTE $0x04|((reg&7)<<3); BYTE $0x24; \
+ /* RET */ BYTE $0xC3
+
+TEXT runtime·retpolineAX(SB),NOSPLIT,$0; RETPOLINE(0)
+TEXT runtime·retpolineCX(SB),NOSPLIT,$0; RETPOLINE(1)
+TEXT runtime·retpolineDX(SB),NOSPLIT,$0; RETPOLINE(2)
+TEXT runtime·retpolineBX(SB),NOSPLIT,$0; RETPOLINE(3)
+/* SP is 4, can't happen / magic encodings */
+TEXT runtime·retpolineBP(SB),NOSPLIT,$0; RETPOLINE(5)
+TEXT runtime·retpolineSI(SB),NOSPLIT,$0; RETPOLINE(6)
+TEXT runtime·retpolineDI(SB),NOSPLIT,$0; RETPOLINE(7)
+TEXT runtime·retpolineR8(SB),NOSPLIT,$0; RETPOLINE(8)
+TEXT runtime·retpolineR9(SB),NOSPLIT,$0; RETPOLINE(9)
+TEXT runtime·retpolineR10(SB),NOSPLIT,$0; RETPOLINE(10)
+TEXT runtime·retpolineR11(SB),NOSPLIT,$0; RETPOLINE(11)
+TEXT runtime·retpolineR12(SB),NOSPLIT,$0; RETPOLINE(12)
+TEXT runtime·retpolineR13(SB),NOSPLIT,$0; RETPOLINE(13)
+TEXT runtime·retpolineR14(SB),NOSPLIT,$0; RETPOLINE(14)
+TEXT runtime·retpolineR15(SB),NOSPLIT,$0; RETPOLINE(15)
diff --git a/src/runtime/asm_amd64p32.s b/src/runtime/asm_amd64p32.s
deleted file mode 100644
index 48f3711..0000000
--- a/src/runtime/asm_amd64p32.s
+++ /dev/null
@@ -1,763 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "go_asm.h"
-#include "go_tls.h"
-#include "funcdata.h"
-#include "textflag.h"
-
-TEXT runtime·rt0_go(SB),NOSPLIT,$0
- // copy arguments forward on an even stack
- MOVL SP, CX
- MOVL 8(CX), AX // argc
- MOVL 12(CX), BX // argv
- SUBL $128, CX // plenty of scratch
- ANDL $~15, CX
- MOVL CX, SP
-
- MOVL AX, 16(SP)
- MOVL BX, 24(SP)
-
- // create istack out of the given (operating system) stack.
- MOVL $runtime·g0(SB), DI
- LEAL (-64*1024+104)(SP), BX
- MOVL BX, g_stackguard0(DI)
- MOVL BX, g_stackguard1(DI)
- MOVL BX, (g_stack+stack_lo)(DI)
- MOVL SP, (g_stack+stack_hi)(DI)
-
- // find out information about the processor we're on
- MOVL $0, AX
- CPUID
- CMPL AX, $0
- JE nocpuinfo
-
- CMPL BX, $0x756E6547 // "Genu"
- JNE notintel
- CMPL DX, $0x49656E69 // "ineI"
- JNE notintel
- CMPL CX, $0x6C65746E // "ntel"
- JNE notintel
- MOVB $1, runtime·isIntel(SB)
-notintel:
-
- // Load EAX=1 cpuid flags
- MOVL $1, AX
- CPUID
- MOVL AX, runtime·processorVersionInfo(SB)
-
-nocpuinfo:
- LEAL runtime·m0+m_tls(SB), DI
- CALL runtime·settls(SB)
-
- // store through it, to make sure it works
- get_tls(BX)
- MOVQ $0x123, g(BX)
- MOVQ runtime·m0+m_tls(SB), AX
- CMPQ AX, $0x123
- JEQ 2(PC)
- CALL runtime·abort(SB)
-ok:
- // set the per-goroutine and per-mach "registers"
- get_tls(BX)
- LEAL runtime·g0(SB), CX
- MOVL CX, g(BX)
- LEAL runtime·m0(SB), AX
-
- // save m->g0 = g0
- MOVL CX, m_g0(AX)
- // save m0 to g0->m
- MOVL AX, g_m(CX)
-
- CLD // convention is D is always left cleared
- CALL runtime·check(SB)
-
- MOVL 16(SP), AX // copy argc
- MOVL AX, 0(SP)
- MOVL 24(SP), AX // copy argv
- MOVL AX, 4(SP)
- CALL runtime·args(SB)
- CALL runtime·osinit(SB)
- CALL runtime·schedinit(SB)
-
- // create a new goroutine to start program
- MOVL $runtime·mainPC(SB), AX // entry
- MOVL $0, 0(SP)
- MOVL AX, 4(SP)
- CALL runtime·newproc(SB)
-
- // start this M
- CALL runtime·mstart(SB)
-
- MOVL $0xf1, 0xf1 // crash
- RET
-
-DATA runtime·mainPC+0(SB)/4,$runtime·main(SB)
-GLOBL runtime·mainPC(SB),RODATA,$4
-
-TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
- INT $3
- RET
-
-TEXT runtime·asminit(SB),NOSPLIT,$0-0
- // No per-thread init.
- RET
-
-/*
- * go-routine
- */
-
-// void gosave(Gobuf*)
-// save state in Gobuf; setjmp
-TEXT runtime·gosave(SB), NOSPLIT, $0-4
- MOVL buf+0(FP), AX // gobuf
- LEAL buf+0(FP), BX // caller's SP
- MOVL BX, gobuf_sp(AX)
- MOVL 0(SP), BX // caller's PC
- MOVL BX, gobuf_pc(AX)
- MOVQ $0, gobuf_ret(AX)
- // Assert ctxt is zero. See func save.
- MOVL gobuf_ctxt(AX), BX
- TESTL BX, BX
- JZ 2(PC)
- CALL runtime·badctxt(SB)
- get_tls(CX)
- MOVL g(CX), BX
- MOVL BX, gobuf_g(AX)
- RET
-
-// void gogo(Gobuf*)
-// restore state from Gobuf; longjmp
-TEXT runtime·gogo(SB), NOSPLIT, $8-4
- MOVL buf+0(FP), BX // gobuf
- MOVL gobuf_g(BX), DX
- MOVL 0(DX), CX // make sure g != nil
- get_tls(CX)
- MOVL DX, g(CX)
- MOVL gobuf_sp(BX), SP // restore SP
- MOVL gobuf_ctxt(BX), DX
- MOVQ gobuf_ret(BX), AX
- MOVL $0, gobuf_sp(BX) // clear to help garbage collector
- MOVQ $0, gobuf_ret(BX)
- MOVL $0, gobuf_ctxt(BX)
- MOVL gobuf_pc(BX), BX
- JMP BX
-
-// func mcall(fn func(*g))
-// Switch to m->g0's stack, call fn(g).
-// Fn must never return. It should gogo(&g->sched)
-// to keep running g.
-TEXT runtime·mcall(SB), NOSPLIT, $0-4
- MOVL fn+0(FP), DI
-
- get_tls(CX)
- MOVL g(CX), AX // save state in g->sched
- MOVL 0(SP), BX // caller's PC
- MOVL BX, (g_sched+gobuf_pc)(AX)
- LEAL fn+0(FP), BX // caller's SP
- MOVL BX, (g_sched+gobuf_sp)(AX)
- MOVL AX, (g_sched+gobuf_g)(AX)
-
- // switch to m->g0 & its stack, call fn
- MOVL g(CX), BX
- MOVL g_m(BX), BX
- MOVL m_g0(BX), SI
- CMPL SI, AX // if g == m->g0 call badmcall
- JNE 3(PC)
- MOVL $runtime·badmcall(SB), AX
- JMP AX
- MOVL SI, g(CX) // g = m->g0
- MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
- PUSHQ AX
- MOVL DI, DX
- MOVL 0(DI), DI
- CALL DI
- POPQ AX
- MOVL $runtime·badmcall2(SB), AX
- JMP AX
- RET
-
-// systemstack_switch is a dummy routine that systemstack leaves at the bottom
-// of the G stack. We need to distinguish the routine that
-// lives at the bottom of the G stack from the one that lives
-// at the top of the system stack because the one at the top of
-// the system stack terminates the stack walk (see topofstack()).
-TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
- RET
-
-// func systemstack(fn func())
-TEXT runtime·systemstack(SB), NOSPLIT, $0-4
- MOVL fn+0(FP), DI // DI = fn
- get_tls(CX)
- MOVL g(CX), AX // AX = g
- MOVL g_m(AX), BX // BX = m
-
- CMPL AX, m_gsignal(BX)
- JEQ noswitch
-
- MOVL m_g0(BX), DX // DX = g0
- CMPL AX, DX
- JEQ noswitch
-
- CMPL AX, m_curg(BX)
- JNE bad
-
- // switch stacks
- // save our state in g->sched. Pretend to
- // be systemstack_switch if the G stack is scanned.
- MOVL $runtime·systemstack_switch(SB), SI
- MOVL SI, (g_sched+gobuf_pc)(AX)
- MOVL SP, (g_sched+gobuf_sp)(AX)
- MOVL AX, (g_sched+gobuf_g)(AX)
-
- // switch to g0
- MOVL DX, g(CX)
- MOVL (g_sched+gobuf_sp)(DX), SP
-
- // call target function
- MOVL DI, DX
- MOVL 0(DI), DI
- CALL DI
-
- // switch back to g
- get_tls(CX)
- MOVL g(CX), AX
- MOVL g_m(AX), BX
- MOVL m_curg(BX), AX
- MOVL AX, g(CX)
- MOVL (g_sched+gobuf_sp)(AX), SP
- MOVL $0, (g_sched+gobuf_sp)(AX)
- RET
-
-noswitch:
- // already on m stack, just call directly
- // Using a tail call here cleans up tracebacks since we won't stop
- // at an intermediate systemstack.
- MOVL DI, DX
- MOVL 0(DI), DI
- JMP DI
-
-bad:
- // Not g0, not curg. Must be gsignal, but that's not allowed.
- // Hide call from linker nosplit analysis.
- MOVL $runtime·badsystemstack(SB), AX
- CALL AX
- INT $3
-
-/*
- * support for morestack
- */
-
-// Called during function prolog when more stack is needed.
-//
-// The traceback routines see morestack on a g0 as being
-// the top of a stack (for example, morestack calling newstack
-// calling the scheduler calling newm calling gc), so we must
-// record an argument size. For that purpose, it has no arguments.
-TEXT runtime·morestack(SB),NOSPLIT,$0-0
- get_tls(CX)
- MOVL g(CX), BX
- MOVL g_m(BX), BX
-
- // Cannot grow scheduler stack (m->g0).
- MOVL m_g0(BX), SI
- CMPL g(CX), SI
- JNE 3(PC)
- CALL runtime·badmorestackg0(SB)
- MOVL 0, AX
-
- // Cannot grow signal stack (m->gsignal).
- MOVL m_gsignal(BX), SI
- CMPL g(CX), SI
- JNE 3(PC)
- CALL runtime·badmorestackgsignal(SB)
- MOVL 0, AX
-
- // Called from f.
- // Set m->morebuf to f's caller.
- NOP SP // tell vet SP changed - stop checking offsets
- MOVL 8(SP), AX // f's caller's PC
- MOVL AX, (m_morebuf+gobuf_pc)(BX)
- LEAL 16(SP), AX // f's caller's SP
- MOVL AX, (m_morebuf+gobuf_sp)(BX)
- get_tls(CX)
- MOVL g(CX), SI
- MOVL SI, (m_morebuf+gobuf_g)(BX)
-
- // Set g->sched to context in f.
- MOVL 0(SP), AX // f's PC
- MOVL AX, (g_sched+gobuf_pc)(SI)
- MOVL SI, (g_sched+gobuf_g)(SI)
- LEAL 8(SP), AX // f's SP
- MOVL AX, (g_sched+gobuf_sp)(SI)
- MOVL DX, (g_sched+gobuf_ctxt)(SI)
-
- // Call newstack on m->g0's stack.
- MOVL m_g0(BX), BX
- MOVL BX, g(CX)
- MOVL (g_sched+gobuf_sp)(BX), SP
- CALL runtime·newstack(SB)
- MOVL $0, 0x1003 // crash if newstack returns
- RET
-
-// morestack trampolines
-TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
- MOVL $0, DX
- JMP runtime·morestack(SB)
-
-// reflectcall: call a function with the given argument list
-// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
-// we don't have variable-sized frames, so we use a small number
-// of constant-sized-frame functions to encode a few bits of size in the pc.
-// Caution: ugly multiline assembly macros in your future!
-
-#define DISPATCH(NAME,MAXSIZE) \
- CMPL CX, $MAXSIZE; \
- JA 3(PC); \
- MOVL $NAME(SB), AX; \
- JMP AX
-// Note: can't just "JMP NAME(SB)" - bad inlining results.
-
-TEXT ·reflectcall(SB), NOSPLIT, $0-20
- MOVLQZX argsize+12(FP), CX
- DISPATCH(runtime·call16, 16)
- DISPATCH(runtime·call32, 32)
- DISPATCH(runtime·call64, 64)
- DISPATCH(runtime·call128, 128)
- DISPATCH(runtime·call256, 256)
- DISPATCH(runtime·call512, 512)
- DISPATCH(runtime·call1024, 1024)
- DISPATCH(runtime·call2048, 2048)
- DISPATCH(runtime·call4096, 4096)
- DISPATCH(runtime·call8192, 8192)
- DISPATCH(runtime·call16384, 16384)
- DISPATCH(runtime·call32768, 32768)
- DISPATCH(runtime·call65536, 65536)
- DISPATCH(runtime·call131072, 131072)
- DISPATCH(runtime·call262144, 262144)
- DISPATCH(runtime·call524288, 524288)
- DISPATCH(runtime·call1048576, 1048576)
- DISPATCH(runtime·call2097152, 2097152)
- DISPATCH(runtime·call4194304, 4194304)
- DISPATCH(runtime·call8388608, 8388608)
- DISPATCH(runtime·call16777216, 16777216)
- DISPATCH(runtime·call33554432, 33554432)
- DISPATCH(runtime·call67108864, 67108864)
- DISPATCH(runtime·call134217728, 134217728)
- DISPATCH(runtime·call268435456, 268435456)
- DISPATCH(runtime·call536870912, 536870912)
- DISPATCH(runtime·call1073741824, 1073741824)
- MOVL $runtime·badreflectcall(SB), AX
- JMP AX
-
-#define CALLFN(NAME,MAXSIZE) \
-TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \
- NO_LOCAL_POINTERS; \
- /* copy arguments to stack */ \
- MOVL argptr+8(FP), SI; \
- MOVL argsize+12(FP), CX; \
- MOVL SP, DI; \
- REP;MOVSB; \
- /* call function */ \
- MOVL f+4(FP), DX; \
- MOVL (DX), AX; \
- CALL AX; \
- /* copy return values back */ \
- MOVL argtype+0(FP), DX; \
- MOVL argptr+8(FP), DI; \
- MOVL argsize+12(FP), CX; \
- MOVL retoffset+16(FP), BX; \
- MOVL SP, SI; \
- ADDL BX, DI; \
- ADDL BX, SI; \
- SUBL BX, CX; \
- CALL callRet<>(SB); \
- RET
-
-// callRet copies return values back at the end of call*. This is a
-// separate function so it can allocate stack space for the arguments
-// to reflectcallmove. It does not follow the Go ABI; it expects its
-// arguments in registers.
-TEXT callRet<>(SB), NOSPLIT, $16-0
- MOVL DX, 0(SP)
- MOVL DI, 4(SP)
- MOVL SI, 8(SP)
- MOVL CX, 12(SP)
- CALL runtime·reflectcallmove(SB)
- RET
-
-CALLFN(·call16, 16)
-CALLFN(·call32, 32)
-CALLFN(·call64, 64)
-CALLFN(·call128, 128)
-CALLFN(·call256, 256)
-CALLFN(·call512, 512)
-CALLFN(·call1024, 1024)
-CALLFN(·call2048, 2048)
-CALLFN(·call4096, 4096)
-CALLFN(·call8192, 8192)
-CALLFN(·call16384, 16384)
-CALLFN(·call32768, 32768)
-CALLFN(·call65536, 65536)
-CALLFN(·call131072, 131072)
-CALLFN(·call262144, 262144)
-CALLFN(·call524288, 524288)
-CALLFN(·call1048576, 1048576)
-CALLFN(·call2097152, 2097152)
-CALLFN(·call4194304, 4194304)
-CALLFN(·call8388608, 8388608)
-CALLFN(·call16777216, 16777216)
-CALLFN(·call33554432, 33554432)
-CALLFN(·call67108864, 67108864)
-CALLFN(·call134217728, 134217728)
-CALLFN(·call268435456, 268435456)
-CALLFN(·call536870912, 536870912)
-CALLFN(·call1073741824, 1073741824)
-
-TEXT runtime·procyield(SB),NOSPLIT,$0-0
- MOVL cycles+0(FP), AX
-again:
- PAUSE
- SUBL $1, AX
- JNZ again
- RET
-
-TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
- // Stores are already ordered on x86, so this is just a
- // compile barrier.
- RET
-
-// void jmpdefer(fn, sp);
-// called from deferreturn.
-// 1. pop the caller
-// 2. sub 5 bytes from the callers return
-// 3. jmp to the argument
-TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
- MOVL fv+0(FP), DX
- MOVL argp+4(FP), BX
- LEAL -8(BX), SP // caller sp after CALL
- SUBL $5, (SP) // return to CALL again
- MOVL 0(DX), BX
- JMP BX // but first run the deferred function
-
-// func asmcgocall(fn, arg unsafe.Pointer) int32
-// Not implemented.
-TEXT runtime·asmcgocall(SB),NOSPLIT,$0-12
- MOVL 0, AX // crash
- MOVL $0, ret+8(FP) // for vet
- RET
-
-// cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
-// Not implemented.
-TEXT runtime·cgocallback(SB),NOSPLIT,$0-16
- MOVL 0, AX
- RET
-
-// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
-// Not implemented.
-TEXT ·cgocallback_gofunc(SB),NOSPLIT,$0-16
- MOVL 0, AX
- RET
-
-// void setg(G*); set g. for use by needm.
-// Not implemented.
-TEXT runtime·setg(SB), NOSPLIT, $0-4
- MOVL 0, AX
- RET
-
-TEXT runtime·abort(SB),NOSPLIT,$0-0
- INT $3
-loop:
- JMP loop
-
-// check that SP is in range [g->stack.lo, g->stack.hi)
-TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
- get_tls(CX)
- MOVL g(CX), AX
- CMPL (g_stack+stack_hi)(AX), SP
- JHI 2(PC)
- MOVL 0, AX
- CMPL SP, (g_stack+stack_lo)(AX)
- JHI 2(PC)
- MOVL 0, AX
- RET
-
-// int64 runtime·cputicks(void)
-TEXT runtime·cputicks(SB),NOSPLIT,$0-0
- RDTSC
- SHLQ $32, DX
- ADDQ DX, AX
- MOVQ AX, ret+0(FP)
- RET
-
-// hash function using AES hardware instructions
-// For now, our one amd64p32 system (NaCl) does not
-// support using AES instructions, so have not bothered to
-// write the implementations. Can copy and adjust the ones
-// in asm_amd64.s when the time comes.
-
-TEXT runtime·aeshash(SB),NOSPLIT,$0-20
- MOVL AX, ret+16(FP)
- RET
-
-TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·aeshash32(SB),NOSPLIT,$0-12
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·aeshash64(SB),NOSPLIT,$0-12
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·return0(SB), NOSPLIT, $0
- MOVL $0, AX
- RET
-
-// The top-most function running on a goroutine
-// returns to goexit+PCQuantum.
-TEXT runtime·goexit(SB),NOSPLIT,$0-0
- BYTE $0x90 // NOP
- CALL runtime·goexit1(SB) // does not return
- // traceback from goexit1 must hit code range of goexit
- BYTE $0x90 // NOP
-
-TEXT ·checkASM(SB),NOSPLIT,$0-1
- MOVB $1, ret+0(FP)
- RET
-
-// gcWriteBarrier performs a heap pointer write and informs the GC.
-//
-// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
-// - DI is the destination of the write
-// - AX is the value being written at DI
-// It clobbers FLAGS and SI. It does not clobber any other general-purpose registers,
-// but may clobber others (e.g., SSE registers).
-TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$88
- // Save the registers clobbered by the fast path. This is slightly
- // faster than having the caller spill these.
- MOVQ R14, 72(SP)
- MOVQ R13, 80(SP)
- // TODO: Consider passing g.m.p in as an argument so they can be shared
- // across a sequence of write barriers.
- get_tls(R13)
- MOVL g(R13), R13
- MOVL g_m(R13), R13
- MOVL m_p(R13), R13
- MOVL (p_wbBuf+wbBuf_next)(R13), R14
- // Increment wbBuf.next position.
- LEAL 8(R14), R14
- MOVL R14, (p_wbBuf+wbBuf_next)(R13)
- CMPL R14, (p_wbBuf+wbBuf_end)(R13)
- // Record the write.
- MOVL AX, -8(R14) // Record value
- MOVL (DI), R13 // TODO: This turns bad writes into bad reads.
- MOVL R13, -4(R14) // Record *slot
- // Is the buffer full? (flags set in CMPL above)
- JEQ flush
-ret:
- MOVQ 72(SP), R14
- MOVQ 80(SP), R13
- // Do the write.
- MOVL AX, (DI)
- RET // Clobbers SI on NaCl
-
-flush:
- // Save all general purpose registers since these could be
- // clobbered by wbBufFlush and were not saved by the caller.
- // It is possible for wbBufFlush to clobber other registers
- // (e.g., SSE registers), but the compiler takes care of saving
- // those in the caller if necessary. This strikes a balance
- // with registers that are likely to be used.
- //
- // We don't have type information for these, but all code under
- // here is NOSPLIT, so nothing will observe these.
- //
- // TODO: We could strike a different balance; e.g., saving X0
- // and not saving GP registers that are less likely to be used.
- MOVL DI, 0(SP) // Also first argument to wbBufFlush
- MOVL AX, 4(SP) // Also second argument to wbBufFlush
- MOVQ BX, 8(SP)
- MOVQ CX, 16(SP)
- MOVQ DX, 24(SP)
- // DI already saved
- // SI is always clobbered on nacl
- // BP is reserved on nacl
- MOVQ R8, 32(SP)
- MOVQ R9, 40(SP)
- MOVQ R10, 48(SP)
- MOVQ R11, 56(SP)
- MOVQ R12, 64(SP)
- // R13 already saved
- // R14 already saved
- // R15 is reserved on nacl
-
- // This takes arguments DI and AX
- CALL runtime·wbBufFlush(SB)
-
- MOVL 0(SP), DI
- MOVL 4(SP), AX
- MOVQ 8(SP), BX
- MOVQ 16(SP), CX
- MOVQ 24(SP), DX
- MOVQ 32(SP), R8
- MOVQ 40(SP), R9
- MOVQ 48(SP), R10
- MOVQ 56(SP), R11
- MOVQ 64(SP), R12
- JMP ret
-
-// Note: these functions use a special calling convention to save generated code space.
-// Arguments are passed in registers, but the space for those arguments are allocated
-// in the caller's stack frame. These stubs write the args into that stack space and
-// then tail call to the corresponding runtime handler.
-// The tail call makes these stubs disappear in backtraces.
-TEXT runtime·panicIndex(SB),NOSPLIT,$0-8
- MOVL AX, x+0(FP)
- MOVL CX, y+4(FP)
- JMP runtime·goPanicIndex(SB)
-TEXT runtime·panicIndexU(SB),NOSPLIT,$0-8
- MOVL AX, x+0(FP)
- MOVL CX, y+4(FP)
- JMP runtime·goPanicIndexU(SB)
-TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-8
- MOVL CX, x+0(FP)
- MOVL DX, y+4(FP)
- JMP runtime·goPanicSliceAlen(SB)
-TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-8
- MOVL CX, x+0(FP)
- MOVL DX, y+4(FP)
- JMP runtime·goPanicSliceAlenU(SB)
-TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-8
- MOVL CX, x+0(FP)
- MOVL DX, y+4(FP)
- JMP runtime·goPanicSliceAcap(SB)
-TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-8
- MOVL CX, x+0(FP)
- MOVL DX, y+4(FP)
- JMP runtime·goPanicSliceAcapU(SB)
-TEXT runtime·panicSliceB(SB),NOSPLIT,$0-8
- MOVL AX, x+0(FP)
- MOVL CX, y+4(FP)
- JMP runtime·goPanicSliceB(SB)
-TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-8
- MOVL AX, x+0(FP)
- MOVL CX, y+4(FP)
- JMP runtime·goPanicSliceBU(SB)
-TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-8
- MOVL DX, x+0(FP)
- MOVL BX, y+4(FP)
- JMP runtime·goPanicSlice3Alen(SB)
-TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-8
- MOVL DX, x+0(FP)
- MOVL BX, y+4(FP)
- JMP runtime·goPanicSlice3AlenU(SB)
-TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-8
- MOVL DX, x+0(FP)
- MOVL BX, y+4(FP)
- JMP runtime·goPanicSlice3Acap(SB)
-TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-8
- MOVL DX, x+0(FP)
- MOVL BX, y+4(FP)
- JMP runtime·goPanicSlice3AcapU(SB)
-TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-8
- MOVL CX, x+0(FP)
- MOVL DX, y+4(FP)
- JMP runtime·goPanicSlice3B(SB)
-TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-8
- MOVL CX, x+0(FP)
- MOVL DX, y+4(FP)
- JMP runtime·goPanicSlice3BU(SB)
-TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-8
- MOVL AX, x+0(FP)
- MOVL CX, y+4(FP)
- JMP runtime·goPanicSlice3C(SB)
-TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-8
- MOVL AX, x+0(FP)
- MOVL CX, y+4(FP)
- JMP runtime·goPanicSlice3CU(SB)
-
-// Extended versions for 64-bit indexes.
-TEXT runtime·panicExtendIndex(SB),NOSPLIT,$0-12
- MOVL SI, hi+0(FP)
- MOVL AX, lo+4(FP)
- MOVL CX, y+8(FP)
- JMP runtime·goPanicExtendIndex(SB)
-TEXT runtime·panicExtendIndexU(SB),NOSPLIT,$0-12
- MOVL SI, hi+0(FP)
- MOVL AX, lo+4(FP)
- MOVL CX, y+8(FP)
- JMP runtime·goPanicExtendIndexU(SB)
-TEXT runtime·panicExtendSliceAlen(SB),NOSPLIT,$0-12
- MOVL SI, hi+0(FP)
- MOVL CX, lo+4(FP)
- MOVL DX, y+8(FP)
- JMP runtime·goPanicExtendSliceAlen(SB)
-TEXT runtime·panicExtendSliceAlenU(SB),NOSPLIT,$0-12
- MOVL SI, hi+0(FP)
- MOVL CX, lo+4(FP)
- MOVL DX, y+8(FP)
- JMP runtime·goPanicExtendSliceAlenU(SB)
-TEXT runtime·panicExtendSliceAcap(SB),NOSPLIT,$0-12
- MOVL SI, hi+0(FP)
- MOVL CX, lo+4(FP)
- MOVL DX, y+8(FP)
- JMP runtime·goPanicExtendSliceAcap(SB)
-TEXT runtime·panicExtendSliceAcapU(SB),NOSPLIT,$0-12
- MOVL SI, hi+0(FP)
- MOVL CX, lo+4(FP)
- MOVL DX, y+8(FP)
- JMP runtime·goPanicExtendSliceAcapU(SB)
-TEXT runtime·panicExtendSliceB(SB),NOSPLIT,$0-12
- MOVL SI, hi+0(FP)
- MOVL AX, lo+4(FP)
- MOVL CX, y+8(FP)
- JMP runtime·goPanicExtendSliceB(SB)
-TEXT runtime·panicExtendSliceBU(SB),NOSPLIT,$0-12
- MOVL SI, hi+0(FP)
- MOVL AX, lo+4(FP)
- MOVL CX, y+8(FP)
- JMP runtime·goPanicExtendSliceBU(SB)
-TEXT runtime·panicExtendSlice3Alen(SB),NOSPLIT,$0-12
- MOVL SI, hi+0(FP)
- MOVL DX, lo+4(FP)
- MOVL BX, y+8(FP)
- JMP runtime·goPanicExtendSlice3Alen(SB)
-TEXT runtime·panicExtendSlice3AlenU(SB),NOSPLIT,$0-12
- MOVL SI, hi+0(FP)
- MOVL DX, lo+4(FP)
- MOVL BX, y+8(FP)
- JMP runtime·goPanicExtendSlice3AlenU(SB)
-TEXT runtime·panicExtendSlice3Acap(SB),NOSPLIT,$0-12
- MOVL SI, hi+0(FP)
- MOVL DX, lo+4(FP)
- MOVL BX, y+8(FP)
- JMP runtime·goPanicExtendSlice3Acap(SB)
-TEXT runtime·panicExtendSlice3AcapU(SB),NOSPLIT,$0-12
- MOVL SI, hi+0(FP)
- MOVL DX, lo+4(FP)
- MOVL BX, y+8(FP)
- JMP runtime·goPanicExtendSlice3AcapU(SB)
-TEXT runtime·panicExtendSlice3B(SB),NOSPLIT,$0-12
- MOVL SI, hi+0(FP)
- MOVL CX, lo+4(FP)
- MOVL DX, y+8(FP)
- JMP runtime·goPanicExtendSlice3B(SB)
-TEXT runtime·panicExtendSlice3BU(SB),NOSPLIT,$0-12
- MOVL SI, hi+0(FP)
- MOVL CX, lo+4(FP)
- MOVL DX, y+8(FP)
- JMP runtime·goPanicExtendSlice3BU(SB)
-TEXT runtime·panicExtendSlice3C(SB),NOSPLIT,$0-12
- MOVL SI, hi+0(FP)
- MOVL AX, lo+4(FP)
- MOVL CX, y+8(FP)
- JMP runtime·goPanicExtendSlice3C(SB)
-TEXT runtime·panicExtendSlice3CU(SB),NOSPLIT,$0-12
- MOVL SI, hi+0(FP)
- MOVL AX, lo+4(FP)
- MOVL CX, y+8(FP)
- JMP runtime·goPanicExtendSlice3CU(SB)
diff --git a/src/runtime/asm_arm.s b/src/runtime/asm_arm.s
index 5a7e3b7..51a50c6 100644
--- a/src/runtime/asm_arm.s
+++ b/src/runtime/asm_arm.s
@@ -185,15 +185,11 @@
TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
// gdb won't skip this breakpoint instruction automatically,
// so you must manually "set $pc+=4" to skip it and continue.
-#ifdef GOOS_nacl
- WORD $0xe125be7f // BKPT 0x5bef, NACL_INSTR_ARM_BREAKPOINT
-#else
#ifdef GOOS_plan9
WORD $0xD1200070 // undefined instruction used as armv5 breakpoint in Plan 9
#else
WORD $0xe7f001f0 // undefined instruction that gdb understands is a software breakpoint
#endif
-#endif
RET
TEXT runtime·asminit(SB),NOSPLIT,$0-0
@@ -327,9 +323,6 @@
// save our state in g->sched. Pretend to
// be systemstack_switch if the G stack is scanned.
MOVW $runtime·systemstack_switch(SB), R3
-#ifdef GOOS_nacl
- ADD $4, R3, R3 // get past nacl-insert bic instruction
-#endif
ADD $4, R3, R3 // get past push {lr}
MOVW R3, (g_sched+gobuf_pc)(g)
MOVW R13, (g_sched+gobuf_sp)(g)
@@ -817,18 +810,14 @@
RET
// AES hashing not implemented for ARM
-TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
- MOVW $0, R0
- MOVW (R0), R1
-TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0
- MOVW $0, R0
- MOVW (R0), R1
-TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
- MOVW $0, R0
- MOVW (R0), R1
-TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
- MOVW $0, R0
- MOVW (R0), R1
+TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-16
+ JMP runtime·memhashFallback(SB)
+TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-12
+ JMP runtime·strhashFallback(SB)
+TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-12
+ JMP runtime·memhash32Fallback(SB)
+TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-12
+ JMP runtime·memhash64Fallback(SB)
TEXT runtime·return0(SB),NOSPLIT,$0
MOVW $0, R0
@@ -891,7 +880,6 @@
SUB R1, R3, R1
RET
-#ifndef GOOS_nacl
// This is called from .init_array and follows the platform, not Go, ABI.
TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
MOVW R9, saver9-4(SP) // The access to global variables below implicitly uses R9, which is callee-save
@@ -902,7 +890,6 @@
MOVW saver11-8(SP), R11
MOVW saver9-4(SP), R9
RET
-#endif
TEXT ·checkASM(SB),NOSPLIT,$0-1
MOVW $1, R3
@@ -939,8 +926,6 @@
MOVM.IA.W (R13), [R0,R1]
// Do the write.
MOVW R3, (R2)
- // Normally RET on nacl clobbers R12, but because this
- // function has no frame it doesn't have to usual epilogue.
RET
flush:
diff --git a/src/runtime/asm_arm64.s b/src/runtime/asm_arm64.s
index 6e3b1b1..6b3d1e7 100644
--- a/src/runtime/asm_arm64.s
+++ b/src/runtime/asm_arm64.s
@@ -27,8 +27,7 @@
// if there is a _cgo_init, call it using the gcc ABI.
MOVD _cgo_init(SB), R12
- CMP $0, R12
- BEQ nocgo
+ CBZ R12, nocgo
MRS_TPIDR_R0 // load TLS base pointer
MOVD R0, R3 // arg 3: TLS base pointer
@@ -114,8 +113,7 @@
MOVD ZR, gobuf_ret(R3)
// Assert ctxt is zero. See func save.
MOVD gobuf_ctxt(R3), R0
- CMP $0, R0
- BEQ 2(PC)
+ CBZ R0, 2(PC)
CALL runtime·badctxt(SB)
RET
@@ -445,8 +443,10 @@
CALLFN(·call536870912, 536870920 )
CALLFN(·call1073741824, 1073741832 )
-// func aeshash32(p unsafe.Pointer, h uintptr) uintptr
-TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-24
+// func memhash32(p unsafe.Pointer, h uintptr) uintptr
+TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-24
+ MOVB runtime·useAeshash(SB), R0
+ CBZ R0, noaes
MOVD p+0(FP), R0
MOVD h+8(FP), R1
MOVD $ret+16(FP), R2
@@ -465,9 +465,13 @@
VST1 [V0.D1], (R2)
RET
+noaes:
+ B runtime·memhash32Fallback(SB)
-// func aeshash64(p unsafe.Pointer, h uintptr) uintptr
-TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-24
+// func memhash64(p unsafe.Pointer, h uintptr) uintptr
+TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-24
+ MOVB runtime·useAeshash(SB), R0
+ CBZ R0, noaes
MOVD p+0(FP), R0
MOVD h+8(FP), R1
MOVD $ret+16(FP), R2
@@ -486,31 +490,41 @@
VST1 [V0.D1], (R2)
RET
+noaes:
+ B runtime·memhash64Fallback(SB)
-// func aeshash(p unsafe.Pointer, h, size uintptr) uintptr
-TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-32
+// func memhash(p unsafe.Pointer, h, size uintptr) uintptr
+TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-32
+ MOVB runtime·useAeshash(SB), R0
+ CBZ R0, noaes
MOVD p+0(FP), R0
MOVD s+16(FP), R1
- MOVWU h+8(FP), R3
+ MOVD h+8(FP), R3
MOVD $ret+24(FP), R2
B aeshashbody<>(SB)
+noaes:
+ B runtime·memhashFallback(SB)
-// func aeshashstr(p unsafe.Pointer, h uintptr) uintptr
-TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-24
+// func strhash(p unsafe.Pointer, h uintptr) uintptr
+TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-24
+ MOVB runtime·useAeshash(SB), R0
+ CBZ R0, noaes
MOVD p+0(FP), R10 // string pointer
LDP (R10), (R0, R1) //string data/ length
- MOVWU h+8(FP), R3
+ MOVD h+8(FP), R3
MOVD $ret+16(FP), R2 // return adddress
B aeshashbody<>(SB)
+noaes:
+ B runtime·strhashFallback(SB)
// R0: data
-// R1: length (maximum 32 bits)
+// R1: length
// R2: address to put return value
// R3: seed data
TEXT aeshashbody<>(SB),NOSPLIT|NOFRAME,$0
VEOR V30.B16, V30.B16, V30.B16
- VMOV R3, V30.S[0]
- VMOV R1, V30.S[1] // load length into seed
+ VMOV R3, V30.D[0]
+ VMOV R1, V30.D[1] // load length into seed
MOVD $runtime·aeskeysched+0(SB), R4
VLD1.P 16(R4), [V0.B16]
@@ -528,8 +542,7 @@
B aes129plus
aes0to15:
- CMP $0, R1
- BEQ aes0
+ CBZ R1, aes0
VEOR V2.B16, V2.B16, V2.B16
TBZ $3, R1, less_than_8
VLD1.P 8(R0), V2.D[0]
@@ -859,8 +872,7 @@
MOVD $0, (g_sched+gobuf_ret)(g)
// Assert ctxt is zero. See func save.
MOVD (g_sched+gobuf_ctxt)(g), R0
- CMP $0, R0
- BEQ 2(PC)
+ CBZ R0, 2(PC)
CALL runtime·badctxt(SB)
RET
@@ -873,8 +885,7 @@
MOVD arg+8(FP), R0
MOVD RSP, R2 // save original stack pointer
- CMP $0, g
- BEQ nosave
+ CBZ g, nosave
MOVD g, R4
// Figure out if we need to switch to m->g0 stack.
@@ -970,8 +981,7 @@
// Load g from thread-local storage.
MOVB runtime·iscgo(SB), R3
- CMP $0, R3
- BEQ nocgo
+ CBZ R3, nocgo
BL runtime·load_g(SB)
nocgo:
@@ -980,8 +990,7 @@
// In this case, we're running on the thread stack, so there's
// lots of space, but the linker doesn't know. Hide the call from
// the linker analysis by using an indirect call.
- CMP $0, g
- BEQ needm
+ CBZ g, needm
MOVD g_m(g), R8
MOVD R8, savedm-8(SP)
@@ -1072,8 +1081,7 @@
// If the m on entry was nil, we called needm above to borrow an m
// for the duration of the call. Since the call is over, return it with dropm.
MOVD savedm-8(SP), R6
- CMP $0, R6
- BNE droppedm
+ CBNZ R6, droppedm
MOVD $runtime·dropm(SB), R0
BL (R0)
droppedm:
diff --git a/src/runtime/asm_mips64x.s b/src/runtime/asm_mips64x.s
index 8e59140..7330f40 100644
--- a/src/runtime/asm_mips64x.s
+++ b/src/runtime/asm_mips64x.s
@@ -610,14 +610,14 @@
UNDEF
// AES hashing not implemented for mips64
-TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
- MOVW (R0), R1
-TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0
- MOVW (R0), R1
-TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
- MOVW (R0), R1
-TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
- MOVW (R0), R1
+TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-32
+ JMP runtime·memhashFallback(SB)
+TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-24
+ JMP runtime·strhashFallback(SB)
+TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-24
+ JMP runtime·memhash32Fallback(SB)
+TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-24
+ JMP runtime·memhash64Fallback(SB)
TEXT runtime·return0(SB), NOSPLIT, $0
MOVW $0, R1
diff --git a/src/runtime/asm_mipsx.s b/src/runtime/asm_mipsx.s
index 971dc37..aca0510 100644
--- a/src/runtime/asm_mipsx.s
+++ b/src/runtime/asm_mipsx.s
@@ -611,21 +611,15 @@
TEXT runtime·abort(SB),NOSPLIT,$0-0
UNDEF
-// Not implemented.
-TEXT runtime·aeshash(SB),NOSPLIT,$0
- UNDEF
-
-// Not implemented.
-TEXT runtime·aeshash32(SB),NOSPLIT,$0
- UNDEF
-
-// Not implemented.
-TEXT runtime·aeshash64(SB),NOSPLIT,$0
- UNDEF
-
-// Not implemented.
-TEXT runtime·aeshashstr(SB),NOSPLIT,$0
- UNDEF
+// AES hashing not implemented for mips
+TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-16
+ JMP runtime·memhashFallback(SB)
+TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-12
+ JMP runtime·strhashFallback(SB)
+TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-12
+ JMP runtime·memhash32Fallback(SB)
+TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-12
+ JMP runtime·memhash64Fallback(SB)
TEXT runtime·return0(SB),NOSPLIT,$0
MOVW $0, R1
diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s
index 441042c..11d2f2f 100644
--- a/src/runtime/asm_ppc64x.s
+++ b/src/runtime/asm_ppc64x.s
@@ -833,14 +833,14 @@
RET
// AES hashing not implemented for ppc64
-TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
- MOVW (R0), R1
-TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0
- MOVW (R0), R1
-TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
- MOVW (R0), R1
-TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
- MOVW (R0), R1
+TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-32
+ JMP runtime·memhashFallback(SB)
+TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-24
+ JMP runtime·strhashFallback(SB)
+TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-24
+ JMP runtime·memhash32Fallback(SB)
+TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-24
+ JMP runtime·memhash64Fallback(SB)
TEXT runtime·return0(SB), NOSPLIT, $0
MOVW $0, R3
diff --git a/src/runtime/asm_riscv64.s b/src/runtime/asm_riscv64.s
new file mode 100644
index 0000000..d7c45a1
--- /dev/null
+++ b/src/runtime/asm_riscv64.s
@@ -0,0 +1,669 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "funcdata.h"
+#include "textflag.h"
+
+// func rt0_go()
+TEXT runtime·rt0_go(SB),NOSPLIT,$0
+ // X2 = stack; A0 = argc; A1 = argv
+
+ ADD $-24, X2
+ MOV A0, 8(X2) // argc
+ MOV A1, 16(X2) // argv
+
+ // create istack out of the given (operating system) stack.
+ // _cgo_init may update stackguard.
+ MOV $runtime·g0(SB), g
+ MOV $(-64*1024), T0
+ ADD T0, X2, T1
+ MOV T1, g_stackguard0(g)
+ MOV T1, g_stackguard1(g)
+ MOV T1, (g_stack+stack_lo)(g)
+ MOV X2, (g_stack+stack_hi)(g)
+
+ // if there is a _cgo_init, call it using the gcc ABI.
+ MOV _cgo_init(SB), T0
+ BEQ T0, ZERO, nocgo
+
+ MOV ZERO, A3 // arg 3: not used
+ MOV ZERO, A2 // arg 2: not used
+ MOV $setg_gcc<>(SB), A1 // arg 1: setg
+ MOV g, A0 // arg 0: G
+ JALR RA, T0
+
+nocgo:
+ // update stackguard after _cgo_init
+ MOV (g_stack+stack_lo)(g), T0
+ ADD $const__StackGuard, T0
+ MOV T0, g_stackguard0(g)
+ MOV T0, g_stackguard1(g)
+
+ // set the per-goroutine and per-mach "registers"
+ MOV $runtime·m0(SB), T0
+
+ // save m->g0 = g0
+ MOV g, m_g0(T0)
+ // save m0 to g0->m
+ MOV T0, g_m(g)
+
+ CALL runtime·check(SB)
+
+ // args are already prepared
+ CALL runtime·args(SB)
+ CALL runtime·osinit(SB)
+ CALL runtime·schedinit(SB)
+
+ // create a new goroutine to start program
+ MOV $runtime·mainPC(SB), T0 // entry
+ ADD $-24, X2
+ MOV T0, 16(X2)
+ MOV ZERO, 8(X2)
+ MOV ZERO, 0(X2)
+ CALL runtime·newproc(SB)
+ ADD $24, X2
+
+ // start this M
+ CALL runtime·mstart(SB)
+
+ WORD $0 // crash if reached
+ RET
+
+// void setg_gcc(G*); set g called from gcc with g in A0
+TEXT setg_gcc<>(SB),NOSPLIT,$0-0
+ MOV A0, g
+ CALL runtime·save_g(SB)
+ RET
+
+// func cputicks() int64
+TEXT runtime·cputicks(SB),NOSPLIT,$0-8
+ WORD $0xc0102573 // rdtime a0
+ MOV A0, ret+0(FP)
+ RET
+
+// systemstack_switch is a dummy routine that systemstack leaves at the bottom
+// of the G stack. We need to distinguish the routine that
+// lives at the bottom of the G stack from the one that lives
+// at the top of the system stack because the one at the top of
+// the system stack terminates the stack walk (see topofstack()).
+TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
+ UNDEF
+ JALR RA, ZERO // make sure this function is not leaf
+ RET
+
+// func systemstack(fn func())
+TEXT runtime·systemstack(SB), NOSPLIT, $0-8
+ MOV fn+0(FP), CTXT // CTXT = fn
+ MOV g_m(g), T0 // T0 = m
+
+ MOV m_gsignal(T0), T1 // T1 = gsignal
+ BEQ g, T1, noswitch
+
+ MOV m_g0(T0), T1 // T1 = g0
+ BEQ g, T1, noswitch
+
+ MOV m_curg(T0), T2
+ BEQ g, T2, switch
+
+ // Bad: g is not gsignal, not g0, not curg. What is it?
+ // Hide call from linker nosplit analysis.
+ MOV $runtime·badsystemstack(SB), T1
+ JALR RA, T1
+
+switch:
+ // save our state in g->sched. Pretend to
+ // be systemstack_switch if the G stack is scanned.
+ MOV $runtime·systemstack_switch(SB), T2
+ ADD $8, T2 // get past prologue
+ MOV T2, (g_sched+gobuf_pc)(g)
+ MOV X2, (g_sched+gobuf_sp)(g)
+ MOV ZERO, (g_sched+gobuf_lr)(g)
+ MOV g, (g_sched+gobuf_g)(g)
+
+ // switch to g0
+ MOV T1, g
+ CALL runtime·save_g(SB)
+ MOV (g_sched+gobuf_sp)(g), T0
+ // make it look like mstart called systemstack on g0, to stop traceback
+ ADD $-8, T0
+ MOV $runtime·mstart(SB), T1
+ MOV T1, 0(T0)
+ MOV T0, X2
+
+ // call target function
+ MOV 0(CTXT), T1 // code pointer
+ JALR RA, T1
+
+ // switch back to g
+ MOV g_m(g), T0
+ MOV m_curg(T0), g
+ CALL runtime·save_g(SB)
+ MOV (g_sched+gobuf_sp)(g), X2
+ MOV ZERO, (g_sched+gobuf_sp)(g)
+ RET
+
+noswitch:
+ // already on m stack, just call directly
+ // Using a tail call here cleans up tracebacks since we won't stop
+ // at an intermediate systemstack.
+ MOV 0(CTXT), T1 // code pointer
+ ADD $8, X2
+ JMP (T1)
+
+TEXT runtime·getcallerpc(SB),NOSPLIT|NOFRAME,$0-8
+ MOV 0(X2), T0 // LR saved by caller
+ MOV T0, ret+0(FP)
+ RET
+
+/*
+ * support for morestack
+ */
+
+// Called during function prolog when more stack is needed.
+// Caller has already loaded:
+// R1: framesize, R2: argsize, R3: LR
+//
+// The traceback routines see morestack on a g0 as being
+// the top of a stack (for example, morestack calling newstack
+// calling the scheduler calling newm calling gc), so we must
+// record an argument size. For that purpose, it has no arguments.
+
+// func morestack()
+TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
+ // Cannot grow scheduler stack (m->g0).
+ MOV g_m(g), A0
+ MOV m_g0(A0), A1
+ BNE g, A1, 3(PC)
+ CALL runtime·badmorestackg0(SB)
+ CALL runtime·abort(SB)
+
+ // Cannot grow signal stack (m->gsignal).
+ MOV m_gsignal(A0), A1
+ BNE g, A1, 3(PC)
+ CALL runtime·badmorestackgsignal(SB)
+ CALL runtime·abort(SB)
+
+ // Called from f.
+ // Set g->sched to context in f.
+ MOV X2, (g_sched+gobuf_sp)(g)
+ MOV T0, (g_sched+gobuf_pc)(g)
+ MOV RA, (g_sched+gobuf_lr)(g)
+ MOV CTXT, (g_sched+gobuf_ctxt)(g)
+
+ // Called from f.
+ // Set m->morebuf to f's caller.
+ MOV RA, (m_morebuf+gobuf_pc)(A0) // f's caller's PC
+ MOV X2, (m_morebuf+gobuf_sp)(A0) // f's caller's SP
+ MOV g, (m_morebuf+gobuf_g)(A0)
+
+ // Call newstack on m->g0's stack.
+ MOV m_g0(A0), g
+ CALL runtime·save_g(SB)
+ MOV (g_sched+gobuf_sp)(g), X2
+ // Create a stack frame on g0 to call newstack.
+ MOV ZERO, -8(X2) // Zero saved LR in frame
+ ADD $-8, X2
+ CALL runtime·newstack(SB)
+
+ // Not reached, but make sure the return PC from the call to newstack
+ // is still in this function, and not the beginning of the next.
+ UNDEF
+
+// func morestack_noctxt()
+TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
+ MOV ZERO, CTXT
+ JMP runtime·morestack(SB)
+
+// AES hashing not implemented for riscv64
+TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-32
+ JMP runtime·memhashFallback(SB)
+TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-24
+ JMP runtime·strhashFallback(SB)
+TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-24
+ JMP runtime·memhash32Fallback(SB)
+TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-24
+ JMP runtime·memhash64Fallback(SB)
+
+// func return0()
+TEXT runtime·return0(SB), NOSPLIT, $0
+ MOV $0, A0
+ RET
+
+// restore state from Gobuf; longjmp
+
+// func gogo(buf *gobuf)
+TEXT runtime·gogo(SB), NOSPLIT, $16-8
+ MOV buf+0(FP), T0
+ MOV gobuf_g(T0), g // make sure g is not nil
+ CALL runtime·save_g(SB)
+
+ MOV (g), ZERO // make sure g is not nil
+ MOV gobuf_sp(T0), X2
+ MOV gobuf_lr(T0), RA
+ MOV gobuf_ret(T0), A0
+ MOV gobuf_ctxt(T0), CTXT
+ MOV ZERO, gobuf_sp(T0)
+ MOV ZERO, gobuf_ret(T0)
+ MOV ZERO, gobuf_lr(T0)
+ MOV ZERO, gobuf_ctxt(T0)
+ MOV gobuf_pc(T0), T0
+ JALR ZERO, T0
+
+// func jmpdefer(fv *funcval, argp uintptr)
+// called from deferreturn
+// 1. grab stored return address from the caller's frame
+// 2. sub 8 bytes to get back to JAL deferreturn
+// 3. JMP to fn
+TEXT runtime·jmpdefer(SB), NOSPLIT|NOFRAME, $0-16
+ MOV 0(X2), RA
+ ADD $-8, RA
+
+ MOV fv+0(FP), CTXT
+ MOV argp+8(FP), X2
+ ADD $-8, X2
+ MOV 0(CTXT), T0
+ JALR ZERO, T0
+
+// func procyield(cycles uint32)
+TEXT runtime·procyield(SB),NOSPLIT,$0-0
+ RET
+
+// Switch to m->g0's stack, call fn(g).
+// Fn must never return. It should gogo(&g->sched)
+// to keep running g.
+
+// func mcall(fn func(*g))
+TEXT runtime·mcall(SB), NOSPLIT|NOFRAME, $0-8
+ // Save caller state in g->sched
+ MOV X2, (g_sched+gobuf_sp)(g)
+ MOV RA, (g_sched+gobuf_pc)(g)
+ MOV ZERO, (g_sched+gobuf_lr)(g)
+ MOV g, (g_sched+gobuf_g)(g)
+
+ // Switch to m->g0 & its stack, call fn.
+ MOV g, T0
+ MOV g_m(g), T1
+ MOV m_g0(T1), g
+ CALL runtime·save_g(SB)
+ BNE g, T0, 2(PC)
+ JMP runtime·badmcall(SB)
+ MOV fn+0(FP), CTXT // context
+ MOV 0(CTXT), T1 // code pointer
+ MOV (g_sched+gobuf_sp)(g), X2 // sp = m->g0->sched.sp
+ ADD $-16, X2
+ MOV T0, 8(X2)
+ MOV ZERO, 0(X2)
+ JALR RA, T1
+ JMP runtime·badmcall2(SB)
+
+// func gosave(buf *gobuf)
+// save state in Gobuf; setjmp
+TEXT runtime·gosave(SB), NOSPLIT|NOFRAME, $0-8
+ MOV buf+0(FP), T1
+ MOV X2, gobuf_sp(T1)
+ MOV RA, gobuf_pc(T1)
+ MOV g, gobuf_g(T1)
+ MOV ZERO, gobuf_lr(T1)
+ MOV ZERO, gobuf_ret(T1)
+ // Assert ctxt is zero. See func save.
+ MOV gobuf_ctxt(T1), T1
+ BEQ T1, ZERO, 2(PC)
+ CALL runtime·badctxt(SB)
+ RET
+
+// func asmcgocall(fn, arg unsafe.Pointer) int32
+TEXT ·asmcgocall(SB),NOSPLIT,$0-20
+ // TODO(jsing): Add support for cgo - issue #36641.
+ WORD $0 // crash
+
+// func asminit()
+TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
+ RET
+
+// reflectcall: call a function with the given argument list
+// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
+// we don't have variable-sized frames, so we use a small number
+// of constant-sized-frame functions to encode a few bits of size in the pc.
+// Caution: ugly multiline assembly macros in your future!
+
+#define DISPATCH(NAME,MAXSIZE) \
+ MOV $MAXSIZE, T1 \
+ BLTU T1, T0, 3(PC) \
+ MOV $NAME(SB), T2; \
+ JALR ZERO, T2
+// Note: can't just "BR NAME(SB)" - bad inlining results.
+
+// func call(argtype *rtype, fn, arg unsafe.Pointer, n uint32, retoffset uint32)
+TEXT reflect·call(SB), NOSPLIT, $0-0
+ JMP ·reflectcall(SB)
+
+// func reflectcall(argtype *_type, fn, arg unsafe.Pointer, argsize uint32, retoffset uint32)
+TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-32
+ MOVWU argsize+24(FP), T0
+ DISPATCH(runtime·call32, 32)
+ DISPATCH(runtime·call64, 64)
+ DISPATCH(runtime·call128, 128)
+ DISPATCH(runtime·call256, 256)
+ DISPATCH(runtime·call512, 512)
+ DISPATCH(runtime·call1024, 1024)
+ DISPATCH(runtime·call2048, 2048)
+ DISPATCH(runtime·call4096, 4096)
+ DISPATCH(runtime·call8192, 8192)
+ DISPATCH(runtime·call16384, 16384)
+ DISPATCH(runtime·call32768, 32768)
+ DISPATCH(runtime·call65536, 65536)
+ DISPATCH(runtime·call131072, 131072)
+ DISPATCH(runtime·call262144, 262144)
+ DISPATCH(runtime·call524288, 524288)
+ DISPATCH(runtime·call1048576, 1048576)
+ DISPATCH(runtime·call2097152, 2097152)
+ DISPATCH(runtime·call4194304, 4194304)
+ DISPATCH(runtime·call8388608, 8388608)
+ DISPATCH(runtime·call16777216, 16777216)
+ DISPATCH(runtime·call33554432, 33554432)
+ DISPATCH(runtime·call67108864, 67108864)
+ DISPATCH(runtime·call134217728, 134217728)
+ DISPATCH(runtime·call268435456, 268435456)
+ DISPATCH(runtime·call536870912, 536870912)
+ DISPATCH(runtime·call1073741824, 1073741824)
+ MOV $runtime·badreflectcall(SB), T2
+ JALR ZERO, T2
+
+#define CALLFN(NAME,MAXSIZE) \
+TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \
+ NO_LOCAL_POINTERS; \
+ /* copy arguments to stack */ \
+ MOV arg+16(FP), A1; \
+ MOVWU argsize+24(FP), A2; \
+ MOV X2, A3; \
+ ADD $8, A3; \
+ ADD A3, A2; \
+ BEQ A3, A2, 6(PC); \
+ MOVBU (A1), A4; \
+ ADD $1, A1; \
+ MOVB A4, (A3); \
+ ADD $1, A3; \
+ JMP -5(PC); \
+ /* call function */ \
+ MOV f+8(FP), CTXT; \
+ MOV (CTXT), A4; \
+ PCDATA $PCDATA_StackMapIndex, $0; \
+ JALR RA, A4; \
+ /* copy return values back */ \
+ MOV argtype+0(FP), A5; \
+ MOV arg+16(FP), A1; \
+ MOVWU n+24(FP), A2; \
+ MOVWU retoffset+28(FP), A4; \
+ ADD $8, X2, A3; \
+ ADD A4, A3; \
+ ADD A4, A1; \
+ SUB A4, A2; \
+ CALL callRet<>(SB); \
+ RET
+
+// callRet copies return values back at the end of call*. This is a
+// separate function so it can allocate stack space for the arguments
+// to reflectcallmove. It does not follow the Go ABI; it expects its
+// arguments in registers.
+TEXT callRet<>(SB), NOSPLIT, $32-0
+ MOV A5, 8(X2)
+ MOV A1, 16(X2)
+ MOV A3, 24(X2)
+ MOV A2, 32(X2)
+ CALL runtime·reflectcallmove(SB)
+ RET
+
+CALLFN(·call16, 16)
+CALLFN(·call32, 32)
+CALLFN(·call64, 64)
+CALLFN(·call128, 128)
+CALLFN(·call256, 256)
+CALLFN(·call512, 512)
+CALLFN(·call1024, 1024)
+CALLFN(·call2048, 2048)
+CALLFN(·call4096, 4096)
+CALLFN(·call8192, 8192)
+CALLFN(·call16384, 16384)
+CALLFN(·call32768, 32768)
+CALLFN(·call65536, 65536)
+CALLFN(·call131072, 131072)
+CALLFN(·call262144, 262144)
+CALLFN(·call524288, 524288)
+CALLFN(·call1048576, 1048576)
+CALLFN(·call2097152, 2097152)
+CALLFN(·call4194304, 4194304)
+CALLFN(·call8388608, 8388608)
+CALLFN(·call16777216, 16777216)
+CALLFN(·call33554432, 33554432)
+CALLFN(·call67108864, 67108864)
+CALLFN(·call134217728, 134217728)
+CALLFN(·call268435456, 268435456)
+CALLFN(·call536870912, 536870912)
+CALLFN(·call1073741824, 1073741824)
+
+// func goexit(neverCallThisFunction)
+// The top-most function running on a goroutine
+// returns to goexit+PCQuantum.
+TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0
+ MOV ZERO, ZERO // NOP
+ JMP runtime·goexit1(SB) // does not return
+ // traceback from goexit1 must hit code range of goexit
+ MOV ZERO, ZERO // NOP
+
+// func cgocallback_gofunc(fv uintptr, frame uintptr, framesize, ctxt uintptr)
+TEXT ·cgocallback_gofunc(SB),NOSPLIT,$24-32
+ // TODO(jsing): Add support for cgo - issue #36641.
+ WORD $0 // crash
+
+TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
+ EBREAK
+ RET
+
+TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
+ EBREAK
+ RET
+
+// void setg(G*); set g. for use by needm.
+TEXT runtime·setg(SB), NOSPLIT, $0-8
+ MOV gg+0(FP), g
+ // This only happens if iscgo, so jump straight to save_g
+ CALL runtime·save_g(SB)
+ RET
+
+TEXT ·checkASM(SB),NOSPLIT,$0-1
+ MOV $1, T0
+ MOV T0, ret+0(FP)
+ RET
+
+// gcWriteBarrier performs a heap pointer write and informs the GC.
+//
+// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
+// - T0 is the destination of the write
+// - T1 is the value being written at T0.
+// It clobbers R30 (the linker temp register - REG_TMP).
+// The act of CALLing gcWriteBarrier will clobber RA (LR).
+// It does not clobber any other general-purpose registers,
+// but may clobber others (e.g., floating point registers).
+TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$296
+ // Save the registers clobbered by the fast path.
+ MOV A0, 280(X2)
+ MOV A1, 288(X2)
+ MOV g_m(g), A0
+ MOV m_p(A0), A0
+ MOV (p_wbBuf+wbBuf_next)(A0), A1
+ // Increment wbBuf.next position.
+ ADD $16, A1
+ MOV A1, (p_wbBuf+wbBuf_next)(A0)
+ MOV (p_wbBuf+wbBuf_end)(A0), A0
+ MOV A0, T6 // T6 is linker temp register (REG_TMP)
+ // Record the write.
+ MOV T1, -16(A1) // Record value
+ MOV (T0), A0 // TODO: This turns bad writes into bad reads.
+ MOV A0, -8(A1) // Record *slot
+ // Is the buffer full?
+ BEQ A1, T6, flush
+ret:
+ MOV 280(X2), A0
+ MOV 288(X2), A1
+ // Do the write.
+ MOV T1, (T0)
+ RET
+
+flush:
+ // Save all general purpose registers since these could be
+ // clobbered by wbBufFlush and were not saved by the caller.
+ MOV T0, 8(X2) // Also first argument to wbBufFlush
+ MOV T1, 16(X2) // Also second argument to wbBufFlush
+
+ // TODO: Optimise
+ // R3 is g.
+ // R4 already saved (T0)
+ // R5 already saved (T1)
+ // R9 already saved (A0)
+ // R10 already saved (A1)
+ // R30 is tmp register.
+ MOV X0, 24(X2)
+ MOV X1, 32(X2)
+ MOV X2, 40(X2)
+ MOV X3, 48(X2)
+ MOV X4, 56(X2)
+ MOV X5, 64(X2)
+ MOV X6, 72(X2)
+ MOV X7, 80(X2)
+ MOV X8, 88(X2)
+ MOV X9, 96(X2)
+ MOV X10, 104(X2)
+ MOV X11, 112(X2)
+ MOV X12, 120(X2)
+ MOV X13, 128(X2)
+ MOV X14, 136(X2)
+ MOV X15, 144(X2)
+ MOV X16, 152(X2)
+ MOV X17, 160(X2)
+ MOV X18, 168(X2)
+ MOV X19, 176(X2)
+ MOV X20, 184(X2)
+ MOV X21, 192(X2)
+ MOV X22, 200(X2)
+ MOV X23, 208(X2)
+ MOV X24, 216(X2)
+ MOV X25, 224(X2)
+ MOV X26, 232(X2)
+ MOV X27, 240(X2)
+ MOV X28, 248(X2)
+ MOV X29, 256(X2)
+ MOV X30, 264(X2)
+ MOV X31, 272(X2)
+
+ // This takes arguments T0 and T1.
+ CALL runtime·wbBufFlush(SB)
+
+ MOV 24(X2), X0
+ MOV 32(X2), X1
+ MOV 40(X2), X2
+ MOV 48(X2), X3
+ MOV 56(X2), X4
+ MOV 64(X2), X5
+ MOV 72(X2), X6
+ MOV 80(X2), X7
+ MOV 88(X2), X8
+ MOV 96(X2), X9
+ MOV 104(X2), X10
+ MOV 112(X2), X11
+ MOV 120(X2), X12
+ MOV 128(X2), X13
+ MOV 136(X2), X14
+ MOV 144(X2), X15
+ MOV 152(X2), X16
+ MOV 160(X2), X17
+ MOV 168(X2), X18
+ MOV 176(X2), X19
+ MOV 184(X2), X20
+ MOV 192(X2), X21
+ MOV 200(X2), X22
+ MOV 208(X2), X23
+ MOV 216(X2), X24
+ MOV 224(X2), X25
+ MOV 232(X2), X26
+ MOV 240(X2), X27
+ MOV 248(X2), X28
+ MOV 256(X2), X29
+ MOV 264(X2), X30
+ MOV 272(X2), X31
+
+ JMP ret
+
+// Note: these functions use a special calling convention to save generated code space.
+// Arguments are passed in registers, but the space for those arguments are allocated
+// in the caller's stack frame. These stubs write the args into that stack space and
+// then tail call to the corresponding runtime handler.
+// The tail call makes these stubs disappear in backtraces.
+TEXT runtime·panicIndex(SB),NOSPLIT,$0-16
+ MOV T0, x+0(FP)
+ MOV T1, y+8(FP)
+ JMP runtime·goPanicIndex(SB)
+TEXT runtime·panicIndexU(SB),NOSPLIT,$0-16
+ MOV T0, x+0(FP)
+ MOV T1, y+8(FP)
+ JMP runtime·goPanicIndexU(SB)
+TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-16
+ MOV T1, x+0(FP)
+ MOV T2, y+8(FP)
+ JMP runtime·goPanicSliceAlen(SB)
+TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-16
+ MOV T1, x+0(FP)
+ MOV T2, y+8(FP)
+ JMP runtime·goPanicSliceAlenU(SB)
+TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-16
+ MOV T1, x+0(FP)
+ MOV T2, y+8(FP)
+ JMP runtime·goPanicSliceAcap(SB)
+TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-16
+ MOV T1, x+0(FP)
+ MOV T2, y+8(FP)
+ JMP runtime·goPanicSliceAcapU(SB)
+TEXT runtime·panicSliceB(SB),NOSPLIT,$0-16
+ MOV T0, x+0(FP)
+ MOV T1, y+8(FP)
+ JMP runtime·goPanicSliceB(SB)
+TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-16
+ MOV T0, x+0(FP)
+ MOV T1, y+8(FP)
+ JMP runtime·goPanicSliceBU(SB)
+TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-16
+ MOV T2, x+0(FP)
+ MOV T3, y+8(FP)
+ JMP runtime·goPanicSlice3Alen(SB)
+TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-16
+ MOV T2, x+0(FP)
+ MOV T3, y+8(FP)
+ JMP runtime·goPanicSlice3AlenU(SB)
+TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-16
+ MOV T2, x+0(FP)
+ MOV T3, y+8(FP)
+ JMP runtime·goPanicSlice3Acap(SB)
+TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-16
+ MOV T2, x+0(FP)
+ MOV T3, y+8(FP)
+ JMP runtime·goPanicSlice3AcapU(SB)
+TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-16
+ MOV T1, x+0(FP)
+ MOV T2, y+8(FP)
+ JMP runtime·goPanicSlice3B(SB)
+TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-16
+ MOV T1, x+0(FP)
+ MOV T2, y+8(FP)
+ JMP runtime·goPanicSlice3BU(SB)
+TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-16
+ MOV T0, x+0(FP)
+ MOV T1, y+8(FP)
+ JMP runtime·goPanicSlice3C(SB)
+TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-16
+ MOV T0, x+0(FP)
+ MOV T1, y+8(FP)
+ JMP runtime·goPanicSlice3CU(SB)
+
+DATA runtime·mainPC+0(SB)/8,$runtime·main(SB)
+GLOBL runtime·mainPC(SB),RODATA,$8
diff --git a/src/runtime/asm_s390x.s b/src/runtime/asm_s390x.s
index ff3caf7..cb39451 100644
--- a/src/runtime/asm_s390x.s
+++ b/src/runtime/asm_s390x.s
@@ -740,14 +740,14 @@
RET
// AES hashing not implemented for s390x
-TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
- MOVW (R0), R15
-TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0
- MOVW (R0), R15
-TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
- MOVW (R0), R15
-TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
- MOVW (R0), R15
+TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-32
+ JMP runtime·memhashFallback(SB)
+TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-24
+ JMP runtime·strhashFallback(SB)
+TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-24
+ JMP runtime·memhash32Fallback(SB)
+TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-24
+ JMP runtime·memhash64Fallback(SB)
TEXT runtime·return0(SB), NOSPLIT, $0
MOVW $0, R3
diff --git a/src/runtime/asm_wasm.s b/src/runtime/asm_wasm.s
index 8f3964f..7d88beb 100644
--- a/src/runtime/asm_wasm.s
+++ b/src/runtime/asm_wasm.s
@@ -176,6 +176,16 @@
TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
RET
+// AES hashing not implemented for wasm
+TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-32
+ JMP runtime·memhashFallback(SB)
+TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-24
+ JMP runtime·strhashFallback(SB)
+TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-24
+ JMP runtime·memhash32Fallback(SB)
+TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-24
+ JMP runtime·memhash64Fallback(SB)
+
TEXT runtime·return0(SB), NOSPLIT, $0-0
MOVD $0, RET0
RET
diff --git a/src/runtime/atomic_riscv64.s b/src/runtime/atomic_riscv64.s
new file mode 100644
index 0000000..544a7c5
--- /dev/null
+++ b/src/runtime/atomic_riscv64.s
@@ -0,0 +1,10 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func publicationBarrier()
+TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
+ FENCE
+ RET
diff --git a/src/runtime/callers_test.go b/src/runtime/callers_test.go
index ad83f99..3cf3fbe 100644
--- a/src/runtime/callers_test.go
+++ b/src/runtime/callers_test.go
@@ -5,25 +5,26 @@
package runtime_test
import (
+ "reflect"
"runtime"
"strings"
"testing"
)
func f1(pan bool) []uintptr {
- return f2(pan) // line 14
+ return f2(pan) // line 15
}
func f2(pan bool) []uintptr {
- return f3(pan) // line 18
+ return f3(pan) // line 19
}
func f3(pan bool) []uintptr {
if pan {
- panic("f3") // line 23
+ panic("f3") // line 24
}
ret := make([]uintptr, 20)
- return ret[:runtime.Callers(0, ret)] // line 26
+ return ret[:runtime.Callers(0, ret)] // line 27
}
func testCallers(t *testing.T, pcs []uintptr, pan bool) {
@@ -47,16 +48,16 @@
var f3Line int
if pan {
- f3Line = 23
+ f3Line = 24
} else {
- f3Line = 26
+ f3Line = 27
}
want := []struct {
name string
line int
}{
- {"f1", 14},
- {"f2", 18},
+ {"f1", 15},
+ {"f2", 19},
{"f3", f3Line},
}
for _, w := range want {
@@ -66,11 +67,35 @@
}
}
+func testCallersEqual(t *testing.T, pcs []uintptr, want []string) {
+ t.Helper()
+
+ got := make([]string, 0, len(want))
+
+ frames := runtime.CallersFrames(pcs)
+ for {
+ frame, more := frames.Next()
+ if !more || len(got) >= len(want) {
+ break
+ }
+ got = append(got, frame.Function)
+ }
+ if !reflect.DeepEqual(want, got) {
+ t.Fatalf("wanted %v, got %v", want, got)
+ }
+}
+
func TestCallers(t *testing.T) {
testCallers(t, f1(false), false)
}
func TestCallersPanic(t *testing.T) {
+ // Make sure we don't have any extra frames on the stack (due to
+ // open-coded defer processing)
+ want := []string{"runtime.Callers", "runtime_test.TestCallersPanic.func1",
+ "runtime.gopanic", "runtime_test.f3", "runtime_test.f2", "runtime_test.f1",
+ "runtime_test.TestCallersPanic"}
+
defer func() {
if r := recover(); r == nil {
t.Fatal("did not panic")
@@ -78,6 +103,209 @@
pcs := make([]uintptr, 20)
pcs = pcs[:runtime.Callers(0, pcs)]
testCallers(t, pcs, true)
+ testCallersEqual(t, pcs, want)
}()
f1(true)
}
+
+func TestCallersDoublePanic(t *testing.T) {
+ // Make sure we don't have any extra frames on the stack (due to
+ // open-coded defer processing)
+ want := []string{"runtime.Callers", "runtime_test.TestCallersDoublePanic.func1.1",
+ "runtime.gopanic", "runtime_test.TestCallersDoublePanic.func1", "runtime.gopanic", "runtime_test.TestCallersDoublePanic"}
+
+ defer func() {
+ defer func() {
+ pcs := make([]uintptr, 20)
+ pcs = pcs[:runtime.Callers(0, pcs)]
+ if recover() == nil {
+ t.Fatal("did not panic")
+ }
+ testCallersEqual(t, pcs, want)
+ }()
+ if recover() == nil {
+ t.Fatal("did not panic")
+ }
+ panic(2)
+ }()
+ panic(1)
+}
+
+// Test that a defer after a successful recovery looks like it is called directly
+// from the function with the defers.
+func TestCallersAfterRecovery(t *testing.T) {
+ want := []string{"runtime.Callers", "runtime_test.TestCallersAfterRecovery.func1", "runtime_test.TestCallersAfterRecovery"}
+
+ defer func() {
+ pcs := make([]uintptr, 20)
+ pcs = pcs[:runtime.Callers(0, pcs)]
+ testCallersEqual(t, pcs, want)
+ }()
+ defer func() {
+ if recover() == nil {
+ t.Fatal("did not recover from panic")
+ }
+ }()
+ panic(1)
+}
+
+func TestCallersAbortedPanic(t *testing.T) {
+ want := []string{"runtime.Callers", "runtime_test.TestCallersAbortedPanic.func2", "runtime_test.TestCallersAbortedPanic"}
+
+ defer func() {
+ r := recover()
+ if r != nil {
+ t.Fatalf("should be no panic remaining to recover")
+ }
+ }()
+
+ defer func() {
+ // panic2 was aborted/replaced by panic1, so when panic2 was
+ // recovered, there is no remaining panic on the stack.
+ pcs := make([]uintptr, 20)
+ pcs = pcs[:runtime.Callers(0, pcs)]
+ testCallersEqual(t, pcs, want)
+ }()
+ defer func() {
+ r := recover()
+ if r != "panic2" {
+ t.Fatalf("got %v, wanted %v", r, "panic2")
+ }
+ }()
+ defer func() {
+ // panic2 aborts/replaces panic1, because it is a recursive panic
+ // that is not recovered within the defer function called by
+ // panic1 panicking sequence
+ panic("panic2")
+ }()
+ panic("panic1")
+}
+
+func TestCallersAbortedPanic2(t *testing.T) {
+ want := []string{"runtime.Callers", "runtime_test.TestCallersAbortedPanic2.func2", "runtime_test.TestCallersAbortedPanic2"}
+ defer func() {
+ r := recover()
+ if r != nil {
+ t.Fatalf("should be no panic remaining to recover")
+ }
+ }()
+ defer func() {
+ pcs := make([]uintptr, 20)
+ pcs = pcs[:runtime.Callers(0, pcs)]
+ testCallersEqual(t, pcs, want)
+ }()
+ func() {
+ defer func() {
+ r := recover()
+ if r != "panic2" {
+ t.Fatalf("got %v, wanted %v", r, "panic2")
+ }
+ }()
+ func() {
+ defer func() {
+ // Again, panic2 aborts/replaces panic1
+ panic("panic2")
+ }()
+ panic("panic1")
+ }()
+ }()
+}
+
+func TestCallersNilPointerPanic(t *testing.T) {
+ // Make sure we don't have any extra frames on the stack (due to
+ // open-coded defer processing)
+ want := []string{"runtime.Callers", "runtime_test.TestCallersNilPointerPanic.func1",
+ "runtime.gopanic", "runtime.panicmem", "runtime.sigpanic",
+ "runtime_test.TestCallersNilPointerPanic"}
+
+ defer func() {
+ if r := recover(); r == nil {
+ t.Fatal("did not panic")
+ }
+ pcs := make([]uintptr, 20)
+ pcs = pcs[:runtime.Callers(0, pcs)]
+ testCallersEqual(t, pcs, want)
+ }()
+ var p *int
+ if *p == 3 {
+ t.Fatal("did not see nil pointer panic")
+ }
+}
+
+func TestCallersDivZeroPanic(t *testing.T) {
+ // Make sure we don't have any extra frames on the stack (due to
+ // open-coded defer processing)
+ want := []string{"runtime.Callers", "runtime_test.TestCallersDivZeroPanic.func1",
+ "runtime.gopanic", "runtime.panicdivide",
+ "runtime_test.TestCallersDivZeroPanic"}
+
+ defer func() {
+ if r := recover(); r == nil {
+ t.Fatal("did not panic")
+ }
+ pcs := make([]uintptr, 20)
+ pcs = pcs[:runtime.Callers(0, pcs)]
+ testCallersEqual(t, pcs, want)
+ }()
+ var n int
+ if 5/n == 1 {
+ t.Fatal("did not see divide-by-sizer panic")
+ }
+}
+
+func TestCallersDeferNilFuncPanic(t *testing.T) {
+ // Make sure we don't have any extra frames on the stack. We cut off the check
+ // at runtime.sigpanic, because non-open-coded defers (which may be used in
+ // non-opt or race checker mode) include an extra 'deferreturn' frame (which is
+ // where the nil pointer deref happens).
+ state := 1
+ want := []string{"runtime.Callers", "runtime_test.TestCallersDeferNilFuncPanic.func1",
+ "runtime.gopanic", "runtime.panicmem", "runtime.sigpanic"}
+
+ defer func() {
+ if r := recover(); r == nil {
+ t.Fatal("did not panic")
+ }
+ pcs := make([]uintptr, 20)
+ pcs = pcs[:runtime.Callers(0, pcs)]
+ testCallersEqual(t, pcs, want)
+ if state == 1 {
+ t.Fatal("nil defer func panicked at defer time rather than function exit time")
+ }
+
+ }()
+ var f func()
+ defer f()
+ // Use the value of 'state' to make sure nil defer func f causes panic at
+ // function exit, rather than at the defer statement.
+ state = 2
+}
+
+// Same test, but forcing non-open-coded defer by putting the defer in a loop. See
+// issue #36050
+func TestCallersDeferNilFuncPanicWithLoop(t *testing.T) {
+ state := 1
+ want := []string{"runtime.Callers", "runtime_test.TestCallersDeferNilFuncPanicWithLoop.func1",
+ "runtime.gopanic", "runtime.panicmem", "runtime.sigpanic", "runtime.deferreturn", "runtime_test.TestCallersDeferNilFuncPanicWithLoop"}
+
+ defer func() {
+ if r := recover(); r == nil {
+ t.Fatal("did not panic")
+ }
+ pcs := make([]uintptr, 20)
+ pcs = pcs[:runtime.Callers(0, pcs)]
+ testCallersEqual(t, pcs, want)
+ if state == 1 {
+ t.Fatal("nil defer func panicked at defer time rather than function exit time")
+ }
+
+ }()
+
+ for i := 0; i < 1; i++ {
+ var f func()
+ defer f()
+ }
+ // Use the value of 'state' to make sure nil defer func f causes panic at
+ // function exit, rather than at the defer statement.
+ state = 2
+}
diff --git a/src/runtime/cgo/asm_nacl_amd64p32.s b/src/runtime/cgo/asm_nacl_amd64p32.s
deleted file mode 100644
index 82aaecd..0000000
--- a/src/runtime/cgo/asm_nacl_amd64p32.s
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-/*
- * void crosscall2(void (*fn)(void*, int32), void*, int32)
- * Save registers and call fn with two arguments.
- */
-TEXT crosscall2(SB),NOSPLIT,$0
- INT $3
- RET
diff --git a/src/runtime/cgo/cgo.go b/src/runtime/cgo/cgo.go
index 5a2b5e4..c02b837 100644
--- a/src/runtime/cgo/cgo.go
+++ b/src/runtime/cgo/cgo.go
@@ -11,8 +11,7 @@
/*
-#cgo darwin,!arm,!arm64 LDFLAGS: -lpthread
-#cgo darwin,arm LDFLAGS: -framework CoreFoundation
+#cgo darwin,!arm64 LDFLAGS: -lpthread
#cgo darwin,arm64 LDFLAGS: -framework CoreFoundation
#cgo dragonfly LDFLAGS: -lpthread
#cgo freebsd LDFLAGS: -lpthread
@@ -23,6 +22,9 @@
#cgo aix LDFLAGS: -Wl,-berok
#cgo solaris LDFLAGS: -lxnet
+// Issue 35247.
+#cgo darwin CFLAGS: -Wno-nullability-completeness
+
#cgo CFLAGS: -Wall -Werror
#cgo solaris CPPFLAGS: -D_POSIX_PTHREAD_SEMANTICS
diff --git a/src/runtime/cgo/gcc_android.c b/src/runtime/cgo/gcc_android.c
index 321a515..7ea2135 100644
--- a/src/runtime/cgo/gcc_android.c
+++ b/src/runtime/cgo/gcc_android.c
@@ -35,7 +35,7 @@
// Truncated to a different magic value on 32-bit; that's ok.
#define magic1 (0x23581321345589ULL)
-// From https://android.googlesource.com/platform/bionic/+/refs/heads/master/libc/private/bionic_asm_tls.h#69.
+// From https://android.googlesource.com/platform/bionic/+/refs/heads/android10-tests-release/libc/private/bionic_asm_tls.h#69.
#define TLS_SLOT_APP 2
// inittls allocates a thread-local storage slot for g.
diff --git a/src/runtime/cgo/gcc_arm64.S b/src/runtime/cgo/gcc_arm64.S
index 59dce08..9154d2a 100644
--- a/src/runtime/cgo/gcc_arm64.S
+++ b/src/runtime/cgo/gcc_arm64.S
@@ -24,13 +24,28 @@
*/
.globl EXT(crosscall1)
EXT(crosscall1):
- stp x19, x20, [sp, #-16]!
- stp x21, x22, [sp, #-16]!
- stp x23, x24, [sp, #-16]!
- stp x25, x26, [sp, #-16]!
- stp x27, x28, [sp, #-16]!
- stp x29, x30, [sp, #-16]!
+ .cfi_startproc
+ stp x29, x30, [sp, #-96]!
+ .cfi_def_cfa_offset 96
+ .cfi_offset 29, -96
+ .cfi_offset 30, -88
mov x29, sp
+ .cfi_def_cfa_register 29
+ stp x19, x20, [sp, #80]
+ .cfi_offset 19, -16
+ .cfi_offset 20, -8
+ stp x21, x22, [sp, #64]
+ .cfi_offset 21, -32
+ .cfi_offset 22, -24
+ stp x23, x24, [sp, #48]
+ .cfi_offset 23, -48
+ .cfi_offset 24, -40
+ stp x25, x26, [sp, #32]
+ .cfi_offset 25, -64
+ .cfi_offset 26, -56
+ stp x27, x28, [sp, #16]
+ .cfi_offset 27, -80
+ .cfi_offset 28, -72
mov x19, x0
mov x20, x1
@@ -39,13 +54,27 @@
blr x20
blr x19
- ldp x29, x30, [sp], #16
- ldp x27, x28, [sp], #16
- ldp x25, x26, [sp], #16
- ldp x23, x24, [sp], #16
- ldp x21, x22, [sp], #16
- ldp x19, x20, [sp], #16
+ ldp x27, x28, [sp, #16]
+ .cfi_restore 27
+ .cfi_restore 28
+ ldp x25, x26, [sp, #32]
+ .cfi_restore 25
+ .cfi_restore 26
+ ldp x23, x24, [sp, #48]
+ .cfi_restore 23
+ .cfi_restore 24
+ ldp x21, x22, [sp, #64]
+ .cfi_restore 21
+ .cfi_restore 22
+ ldp x19, x20, [sp, #80]
+ .cfi_restore 19
+ .cfi_restore 20
+ ldp x29, x30, [sp], #96
+ .cfi_restore 29
+ .cfi_restore 30
+ .cfi_def_cfa 31, 0
ret
+ .cfi_endproc
#ifdef __ELF__
diff --git a/src/runtime/cgo/gcc_darwin_arm.c b/src/runtime/cgo/gcc_darwin_arm.c
deleted file mode 100644
index 205977c..0000000
--- a/src/runtime/cgo/gcc_darwin_arm.c
+++ /dev/null
@@ -1,164 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include <limits.h>
-#include <pthread.h>
-#include <signal.h>
-#include <string.h> /* for strerror */
-#include <sys/param.h>
-#include <unistd.h>
-
-#include <CoreFoundation/CFBundle.h>
-#include <CoreFoundation/CFString.h>
-
-#include "libcgo.h"
-#include "libcgo_unix.h"
-
-#define magic (0xe696c4f4U)
-
-// inittls allocates a thread-local storage slot for g.
-//
-// It finds the first available slot using pthread_key_create and uses
-// it as the offset value for runtime.tlsg.
-static void
-inittls(void **tlsg, void **tlsbase)
-{
- pthread_key_t k;
- int i, err;
-
- err = pthread_key_create(&k, nil);
- if(err != 0) {
- fprintf(stderr, "runtime/cgo: pthread_key_create failed: %d\n", err);
- abort();
- }
- //fprintf(stderr, "runtime/cgo: k = %d, tlsbase = %p\n", (int)k, tlsbase); // debug
- pthread_setspecific(k, (void*)magic);
- // The first key should be at 258.
- for (i=0; i<PTHREAD_KEYS_MAX; i++) {
- if (*(tlsbase+i) == (void*)magic) {
- *tlsg = (void*)(i*sizeof(void *));
- pthread_setspecific(k, 0);
- return;
- }
- }
- fprintf(stderr, "runtime/cgo: could not find pthread key.\n");
- abort();
-}
-
-static void *threadentry(void*);
-static void (*setg_gcc)(void*);
-
-void
-_cgo_sys_thread_start(ThreadStart *ts)
-{
- pthread_attr_t attr;
- sigset_t ign, oset;
- pthread_t p;
- size_t size;
- int err;
-
- sigfillset(&ign);
- pthread_sigmask(SIG_SETMASK, &ign, &oset);
-
- pthread_attr_init(&attr);
- size = 0;
- pthread_attr_getstacksize(&attr, &size);
- // Leave stacklo=0 and set stackhi=size; mstart will do the rest.
- ts->g->stackhi = size;
- err = _cgo_try_pthread_create(&p, &attr, threadentry, ts);
-
- pthread_sigmask(SIG_SETMASK, &oset, nil);
-
- if (err != 0) {
- fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err));
- abort();
- }
-}
-
-extern void crosscall_arm1(void (*fn)(void), void (*setg_gcc)(void*), void *g);
-static void*
-threadentry(void *v)
-{
- ThreadStart ts;
-
- ts = *(ThreadStart*)v;
- free(v);
-
- darwin_arm_init_thread_exception_port();
-
- crosscall_arm1(ts.fn, setg_gcc, (void*)ts.g);
- return nil;
-}
-
-// init_working_dir sets the current working directory to the app root.
-// By default darwin/arm processes start in "/".
-static void
-init_working_dir()
-{
- CFBundleRef bundle = CFBundleGetMainBundle();
- if (bundle == NULL) {
- fprintf(stderr, "runtime/cgo: no main bundle\n");
- return;
- }
- CFURLRef url_ref = CFBundleCopyResourceURL(bundle, CFSTR("Info"), CFSTR("plist"), NULL);
- if (url_ref == NULL) {
- // No Info.plist found. It can happen on Corellium virtual devices.
- return;
- }
- CFStringRef url_str_ref = CFURLGetString(url_ref);
- char buf[MAXPATHLEN];
- Boolean res = CFStringGetCString(url_str_ref, buf, sizeof(buf), kCFStringEncodingUTF8);
- CFRelease(url_ref);
- if (!res) {
- fprintf(stderr, "runtime/cgo: cannot get URL string\n");
- return;
- }
-
- // url is of the form "file:///path/to/Info.plist".
- // strip it down to the working directory "/path/to".
- int url_len = strlen(buf);
- if (url_len < sizeof("file://")+sizeof("/Info.plist")) {
- fprintf(stderr, "runtime/cgo: bad URL: %s\n", buf);
- return;
- }
- buf[url_len-sizeof("/Info.plist")+1] = 0;
- char *dir = &buf[0] + sizeof("file://")-1;
-
- if (chdir(dir) != 0) {
- fprintf(stderr, "runtime/cgo: chdir(%s) failed\n", dir);
- }
-
- // The test harness in go_darwin_arm_exec passes the relative working directory
- // in the GoExecWrapperWorkingDirectory property of the app bundle.
- CFStringRef wd_ref = CFBundleGetValueForInfoDictionaryKey(bundle, CFSTR("GoExecWrapperWorkingDirectory"));
- if (wd_ref != NULL) {
- if (!CFStringGetCString(wd_ref, buf, sizeof(buf), kCFStringEncodingUTF8)) {
- fprintf(stderr, "runtime/cgo: cannot get GoExecWrapperWorkingDirectory string\n");
- return;
- }
- if (chdir(buf) != 0) {
- fprintf(stderr, "runtime/cgo: chdir(%s) failed\n", buf);
- }
- }
-}
-
-void
-x_cgo_init(G *g, void (*setg)(void*), void **tlsg, void **tlsbase)
-{
- pthread_attr_t attr;
- size_t size;
-
- setg_gcc = setg;
- pthread_attr_init(&attr);
- pthread_attr_getstacksize(&attr, &size);
- g->stacklo = (uintptr)&attr - size + 4096;
- pthread_attr_destroy(&attr);
-
- // yes, tlsbase from mrc might not be correctly aligned.
- inittls(tlsg, (void**)((uintptr)tlsbase & ~3));
-
- darwin_arm_init_mach_exception_handler();
- darwin_arm_init_thread_exception_port();
- init_working_dir();
-}
diff --git a/src/runtime/cgo/gcc_darwin_arm64.c b/src/runtime/cgo/gcc_darwin_arm64.c
index e6e3057..fd7d408 100644
--- a/src/runtime/cgo/gcc_darwin_arm64.c
+++ b/src/runtime/cgo/gcc_darwin_arm64.c
@@ -94,7 +94,7 @@
}
// init_working_dir sets the current working directory to the app root.
-// By default darwin/arm processes start in "/".
+// By default darwin/arm64 processes start in "/".
static void
init_working_dir()
{
diff --git a/src/runtime/cgo/gcc_darwin_386.c b/src/runtime/cgo/gcc_freebsd_arm64.c
similarity index 74%
rename from src/runtime/cgo/gcc_darwin_386.c
rename to src/runtime/cgo/gcc_freebsd_arm64.c
index 501457a..dd8f888 100644
--- a/src/runtime/cgo/gcc_darwin_386.c
+++ b/src/runtime/cgo/gcc_freebsd_arm64.c
@@ -1,28 +1,32 @@
-// Copyright 2009 The Go Authors. All rights reserved.
+// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#include <string.h> /* for strerror */
+#include <sys/types.h>
+#include <errno.h>
+#include <sys/signalvar.h>
#include <pthread.h>
#include <signal.h>
+#include <string.h>
#include "libcgo.h"
#include "libcgo_unix.h"
static void* threadentry(void*);
+static void (*setg_gcc)(void*);
void
-x_cgo_init(G *g)
+x_cgo_init(G *g, void (*setg)(void*))
{
pthread_attr_t attr;
size_t size;
+ setg_gcc = setg;
pthread_attr_init(&attr);
pthread_attr_getstacksize(&attr, &size);
g->stacklo = (uintptr)&attr - size + 4096;
pthread_attr_destroy(&attr);
}
-
void
_cgo_sys_thread_start(ThreadStart *ts)
{
@@ -32,7 +36,7 @@
size_t size;
int err;
- sigfillset(&ign);
+ SIGFILLSET(ign);
pthread_sigmask(SIG_SETMASK, &ign, &oset);
pthread_attr_init(&attr);
@@ -49,6 +53,8 @@
}
}
+extern void crosscall1(void (*fn)(void), void (*setg_gcc)(void*), void *g);
+
static void*
threadentry(void *v)
{
@@ -57,10 +63,6 @@
ts = *(ThreadStart*)v;
free(v);
- // Move the g pointer into the slot reserved in thread local storage.
- // Constant must match the one in cmd/link/internal/ld/sym.go.
- asm volatile("movl %0, %%gs:0x18" :: "r"(ts.g));
-
- crosscall_386(ts.fn);
+ crosscall1(ts.fn, setg_gcc, (void*)ts.g);
return nil;
}
diff --git a/src/runtime/cgo/gcc_setenv.c b/src/runtime/cgo/gcc_setenv.c
index 88e92bf..d4f7983 100644
--- a/src/runtime/cgo/gcc_setenv.c
+++ b/src/runtime/cgo/gcc_setenv.c
@@ -20,9 +20,9 @@
/* Stub for calling unsetenv */
void
-x_cgo_unsetenv(char *arg)
+x_cgo_unsetenv(char **arg)
{
_cgo_tsan_acquire();
- unsetenv(arg);
+ unsetenv(arg[0]);
_cgo_tsan_release();
}
diff --git a/src/runtime/cgo/gcc_signal2_darwin_armx.c b/src/runtime/cgo/gcc_signal2_darwin_arm64.c
similarity index 79%
rename from src/runtime/cgo/gcc_signal2_darwin_armx.c
rename to src/runtime/cgo/gcc_signal2_darwin_arm64.c
index 54b7e32..5b8a18f 100644
--- a/src/runtime/cgo/gcc_signal2_darwin_armx.c
+++ b/src/runtime/cgo/gcc_signal2_darwin_arm64.c
@@ -3,10 +3,8 @@
// license that can be found in the LICENSE file.
// +build lldb
-// +build darwin
-// +build arm arm64
-// Used by gcc_signal_darwin_armx.c when doing the test build during cgo.
+// Used by gcc_signal_darwin_arm64.c when doing the test build during cgo.
// We hope that for real binaries the definition provided by Go will take precedence
// and the linker will drop this .o file altogether, which is why this definition
// is all by itself in its own file.
diff --git a/src/runtime/cgo/gcc_signal_darwin_armx.c b/src/runtime/cgo/gcc_signal_darwin_arm64.c
similarity index 98%
rename from src/runtime/cgo/gcc_signal_darwin_armx.c
rename to src/runtime/cgo/gcc_signal_darwin_arm64.c
index 3ab1d8b..6519edd 100644
--- a/src/runtime/cgo/gcc_signal_darwin_armx.c
+++ b/src/runtime/cgo/gcc_signal_darwin_arm64.c
@@ -18,8 +18,6 @@
// The dist tool enables this by build flag when testing.
// +build lldb
-// +build darwin
-// +build arm arm64
#include <limits.h>
#include <pthread.h>
diff --git a/src/runtime/cgo/gcc_signal_darwin_lldb.c b/src/runtime/cgo/gcc_signal_darwin_lldb.c
index 54d91f6..0ccdae3 100644
--- a/src/runtime/cgo/gcc_signal_darwin_lldb.c
+++ b/src/runtime/cgo/gcc_signal_darwin_lldb.c
@@ -4,7 +4,7 @@
// +build !lldb
// +build darwin
-// +build arm arm64
+// +build arm64
#include <stdint.h>
diff --git a/src/runtime/cgo/signal_darwin_arm.s b/src/runtime/cgo/signal_darwin_arm.s
deleted file mode 100644
index 0be10c0..0000000
--- a/src/runtime/cgo/signal_darwin_arm.s
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-// xx_cgo_panicmem is the entrypoint for SIGSEGV as intercepted via a
-// mach thread port as EXC_BAD_ACCESS. As the segfault may have happened
-// in C code, we first need to load_g then call xx_cgo_panicmem.
-//
-// R1 - LR at moment of fault
-// R2 - PC at moment of fault
-TEXT xx_cgo_panicmem(SB),NOSPLIT|NOFRAME,$0
- // If in external C code, we need to load the g register.
- BL runtime·load_g(SB)
- CMP $0, g
- BNE ongothread
-
- // On a foreign thread. We call badsignal, which will, if all
- // goes according to plan, not return.
- SUB $4, R13
- MOVW $11, R1
- MOVW $11, R2
- MOVM.DB.W [R1,R2], (R13)
- // TODO: badsignal should not return, but it does. Issue #10139.
- //BL runtime·badsignal(SB)
- MOVW $139, R1
- MOVW R1, 4(R13)
- B runtime·exit(SB)
-
-ongothread:
- // Trigger a SIGSEGV panic.
- //
- // The goal is to arrange the stack so it looks like the runtime
- // function sigpanic was called from the PC that faulted. It has
- // to be sigpanic, as the stack unwinding code in traceback.go
- // looks explicitly for it.
- //
- // To do this we call into runtime·setsigsegv, which sets the
- // appropriate state inside the g object. We give it the faulting
- // PC on the stack, then put it in the LR before calling sigpanic.
- MOVM.DB.W [R1,R2], (R13)
- BL runtime·setsigsegv(SB)
- MOVM.IA.W (R13), [R1,R2]
-
- SUB $4, R13
- MOVW R1, 0(R13)
- MOVW R2, R14
- B runtime·sigpanic(SB)
diff --git a/src/runtime/cgo/signal_darwin_armx.go b/src/runtime/cgo/signal_darwin_arm64.go
similarity index 87%
rename from src/runtime/cgo/signal_darwin_armx.go
rename to src/runtime/cgo/signal_darwin_arm64.go
index 9f4b462..3425c44 100644
--- a/src/runtime/cgo/signal_darwin_armx.go
+++ b/src/runtime/cgo/signal_darwin_arm64.go
@@ -2,9 +2,6 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build darwin
-// +build arm arm64
-
package cgo
import _ "unsafe"
diff --git a/src/runtime/cgo_mmap.go b/src/runtime/cgo_mmap.go
index 048621f..d5e0cc1 100644
--- a/src/runtime/cgo_mmap.go
+++ b/src/runtime/cgo_mmap.go
@@ -20,6 +20,11 @@
//go:linkname _cgo_munmap _cgo_munmap
var _cgo_munmap unsafe.Pointer
+// mmap is used to route the mmap system call through C code when using cgo, to
+// support sanitizer interceptors. Don't allow stack splits, since this function
+// (used by sysAlloc) is called in a lot of low-level parts of the runtime and
+// callers often assume it won't acquire any locks.
+//go:nosplit
func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (unsafe.Pointer, int) {
if _cgo_mmap != nil {
// Make ret a uintptr so that writing to it in the
diff --git a/src/runtime/cgo_sigaction.go b/src/runtime/cgo_sigaction.go
index bc5e078..de634dc 100644
--- a/src/runtime/cgo_sigaction.go
+++ b/src/runtime/cgo_sigaction.go
@@ -18,12 +18,12 @@
//go:nosplit
//go:nowritebarrierrec
func sigaction(sig uint32, new, old *sigactiont) {
- // The runtime package is explicitly blacklisted from sanitizer
- // instrumentation in racewalk.go, but we might be calling into instrumented C
- // functions here — so we need the pointer parameters to be properly marked.
+ // racewalk.go avoids adding sanitizing instrumentation to package runtime,
+ // but we might be calling into instrumented C functions here,
+ // so we need the pointer parameters to be properly marked.
//
- // Mark the input as having been written before the call and the output as
- // read after.
+ // Mark the input as having been written before the call
+ // and the output as read after.
if msanenabled && new != nil {
msanwrite(unsafe.Pointer(new), unsafe.Sizeof(*new))
}
diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go
index a881ae1..a4e64b0 100644
--- a/src/runtime/cgocall.go
+++ b/src/runtime/cgocall.go
@@ -90,6 +90,11 @@
type cgoCallers [32]uintptr
// Call from Go to C.
+//
+// This must be nosplit because it's used for syscalls on some
+// platforms. Syscalls may have untyped arguments on the stack, so
+// it's not safe to grow or scan the stack.
+//
//go:nosplit
func cgocall(fn, arg unsafe.Pointer) int32 {
if !iscgo && GOOS != "solaris" && GOOS != "illumos" && GOOS != "windows" {
@@ -127,6 +132,13 @@
// saved by entersyscall here.
entersyscall()
+ // Tell asynchronous preemption that we're entering external
+ // code. We do this after entersyscall because this may block
+ // and cause an async preemption to fail, but at this point a
+ // sync preemption will succeed (though this is not a matter
+ // of correctness).
+ osPreemptExtEnter(mp)
+
mp.incgo = true
errno := asmcgocall(fn, arg)
@@ -135,6 +147,8 @@
mp.incgo = false
mp.ncgo--
+ osPreemptExtExit(mp)
+
exitsyscall()
// Note that raceacquire must be called only after exitsyscall has
@@ -188,12 +202,16 @@
exitsyscall() // coming out of cgo call
gp.m.incgo = false
+ osPreemptExtExit(gp.m)
+
cgocallbackg1(ctxt)
// At this point unlockOSThread has been called.
// The following code must not change to a different m.
// This is enforced by checking incgo in the schedule function.
+ osPreemptExtEnter(gp.m)
+
gp.m.incgo = true
// going back to cgo call
reentersyscall(savedpc, uintptr(savedsp))
@@ -352,6 +370,7 @@
if mp.ncgo > 0 {
mp.incgo = false
mp.ncgo--
+ osPreemptExtExit(mp)
}
releasem(mp)
@@ -406,24 +425,24 @@
// cgoCheckPointer checks if the argument contains a Go pointer that
// points to a Go pointer, and panics if it does.
-func cgoCheckPointer(ptr interface{}, args ...interface{}) {
+func cgoCheckPointer(ptr interface{}, arg interface{}) {
if debug.cgocheck == 0 {
return
}
- ep := (*eface)(unsafe.Pointer(&ptr))
+ ep := efaceOf(&ptr)
t := ep._type
top := true
- if len(args) > 0 && (t.kind&kindMask == kindPtr || t.kind&kindMask == kindUnsafePointer) {
+ if arg != nil && (t.kind&kindMask == kindPtr || t.kind&kindMask == kindUnsafePointer) {
p := ep.data
if t.kind&kindDirectIface == 0 {
p = *(*unsafe.Pointer)(p)
}
- if !cgoIsGoPointer(p) {
+ if p == nil || !cgoIsGoPointer(p) {
return
}
- aep := (*eface)(unsafe.Pointer(&args[0]))
+ aep := efaceOf(&arg)
switch aep._type.kind & kindMask {
case kindBool:
if t.kind&kindMask == kindUnsafePointer {
@@ -460,7 +479,7 @@
// depending on indir. The top parameter is whether we are at the top
// level, where Go pointers are allowed.
func cgoCheckArg(t *_type, p unsafe.Pointer, indir, top bool, msg string) {
- if t.ptrdata == 0 {
+ if t.ptrdata == 0 || p == nil {
// If the type has no pointers there is nothing to do.
return
}
@@ -517,7 +536,7 @@
st := (*slicetype)(unsafe.Pointer(t))
s := (*slice)(p)
p = s.array
- if !cgoIsGoPointer(p) {
+ if p == nil || !cgoIsGoPointer(p) {
return
}
if !top {
@@ -548,11 +567,17 @@
return
}
for _, f := range st.fields {
+ if f.typ.ptrdata == 0 {
+ continue
+ }
cgoCheckArg(f.typ, add(p, f.offset()), true, top, msg)
}
case kindPtr, kindUnsafePointer:
if indir {
p = *(*unsafe.Pointer)(p)
+ if p == nil {
+ return
+ }
}
if !cgoIsGoPointer(p) {
@@ -644,7 +669,7 @@
return
}
- ep := (*eface)(unsafe.Pointer(&val))
+ ep := efaceOf(&val)
t := ep._type
cgoCheckArg(t, ep.data, t.kind&kindDirectIface == 0, false, cgoResultFail)
}
diff --git a/src/runtime/cgocheck.go b/src/runtime/cgocheck.go
index ed854e5..516045c 100644
--- a/src/runtime/cgocheck.go
+++ b/src/runtime/cgocheck.go
@@ -76,23 +76,24 @@
cgoCheckTypedBlock(typ, src, off, size)
}
-// cgoCheckSliceCopy is called when copying n elements of a slice from
-// src to dst. typ is the element type of the slice.
+// cgoCheckSliceCopy is called when copying n elements of a slice.
+// src and dst are pointers to the first element of the slice.
+// typ is the element type of the slice.
// It throws if the program is copying slice elements that contain Go pointers
// into non-Go memory.
//go:nosplit
//go:nowritebarrier
-func cgoCheckSliceCopy(typ *_type, dst, src slice, n int) {
+func cgoCheckSliceCopy(typ *_type, dst, src unsafe.Pointer, n int) {
if typ.ptrdata == 0 {
return
}
- if !cgoIsGoPointer(src.array) {
+ if !cgoIsGoPointer(src) {
return
}
- if cgoIsGoPointer(dst.array) {
+ if cgoIsGoPointer(dst) {
return
}
- p := src.array
+ p := src
for i := 0; i < n; i++ {
cgoCheckTypedBlock(typ, p, 0, typ.size)
p = add(p, typ.size)
@@ -133,7 +134,7 @@
}
s := spanOfUnchecked(uintptr(src))
- if s.state == mSpanManual {
+ if s.state.get() == mSpanManual {
// There are no heap bits for value stored on the stack.
// For a channel receive src might be on the stack of some
// other goroutine, so we can't unwind the stack even if
diff --git a/src/runtime/chan.go b/src/runtime/chan.go
index 8334c1e..f6f4ffd 100644
--- a/src/runtime/chan.go
+++ b/src/runtime/chan.go
@@ -109,9 +109,10 @@
c.elemsize = uint16(elem.size)
c.elemtype = elem
c.dataqsiz = uint(size)
+ lockInit(&c.lock, lockRankHchan)
if debugChan {
- print("makechan: chan=", c, "; elemsize=", elem.size, "; elemalg=", elem.alg, "; dataqsiz=", size, "\n")
+ print("makechan: chan=", c, "; elemsize=", elem.size, "; dataqsiz=", size, "\n")
}
return c
}
@@ -121,6 +122,21 @@
return add(c.buf, uintptr(i)*uintptr(c.elemsize))
}
+// full reports whether a send on c would block (that is, the channel is full).
+// It uses a single word-sized read of mutable state, so although
+// the answer is instantaneously true, the correct answer may have changed
+// by the time the calling function receives the return value.
+func full(c *hchan) bool {
+ // c.dataqsiz is immutable (never written after the channel is created)
+ // so it is safe to read at any time during channel operation.
+ if c.dataqsiz == 0 {
+ // Assumes that a pointer read is relaxed-atomic.
+ return c.recvq.first == nil
+ }
+ // Assumes that a uint read is relaxed-atomic.
+ return c.qcount == c.dataqsiz
+}
+
// entry point for c <- x from compiled code
//go:nosplit
func chansend1(c *hchan, elem unsafe.Pointer) {
@@ -160,7 +176,7 @@
//
// After observing that the channel is not closed, we observe that the channel is
// not ready for sending. Each of these observations is a single word-sized read
- // (first c.closed and second c.recvq.first or c.qcount depending on kind of channel).
+ // (first c.closed and second full()).
// Because a closed channel cannot transition from 'ready for sending' to
// 'not ready for sending', even if the channel is closed between the two observations,
// they imply a moment between the two when the channel was both not yet closed
@@ -169,9 +185,10 @@
//
// It is okay if the reads are reordered here: if we observe that the channel is not
// ready for sending and then observe that it is not closed, that implies that the
- // channel wasn't closed during the first observation.
- if !block && c.closed == 0 && ((c.dataqsiz == 0 && c.recvq.first == nil) ||
- (c.dataqsiz > 0 && c.qcount == c.dataqsiz)) {
+ // channel wasn't closed during the first observation. However, nothing here
+ // guarantees forward progress. We rely on the side effects of lock release in
+ // chanrecv() and closechan() to update this thread's view of c.closed and full().
+ if !block && c.closed == 0 && full(c) {
return false
}
@@ -233,7 +250,7 @@
gp.waiting = mysg
gp.param = nil
c.sendq.enqueue(mysg)
- goparkunlock(&c.lock, waitReasonChanSend, traceEvGoBlockSend, 3)
+ gopark(chanparkcommit, unsafe.Pointer(&c.lock), waitReasonChanSend, traceEvGoBlockSend, 2)
// Ensure the value being sent is kept alive until the
// receiver copies it out. The sudog has a pointer to the
// stack object, but sudogs aren't considered as roots of the
@@ -245,6 +262,7 @@
throw("G waiting list is corrupted")
}
gp.waiting = nil
+ gp.activeStackChans = false
if gp.param == nil {
if c.closed == 0 {
throw("chansend: spurious wakeup")
@@ -400,6 +418,16 @@
}
}
+// empty reports whether a read from c would block (that is, the channel is
+// empty). It uses a single atomic read of mutable state.
+func empty(c *hchan) bool {
+ // c.dataqsiz is immutable.
+ if c.dataqsiz == 0 {
+ return atomic.Loadp(unsafe.Pointer(&c.sendq.first)) == nil
+ }
+ return atomic.Loaduint(&c.qcount) == 0
+}
+
// entry points for <- c from compiled code
//go:nosplit
func chanrecv1(c *hchan, elem unsafe.Pointer) {
@@ -435,21 +463,36 @@
}
// Fast path: check for failed non-blocking operation without acquiring the lock.
- //
- // After observing that the channel is not ready for receiving, we observe that the
- // channel is not closed. Each of these observations is a single word-sized read
- // (first c.sendq.first or c.qcount, and second c.closed).
- // Because a channel cannot be reopened, the later observation of the channel
- // being not closed implies that it was also not closed at the moment of the
- // first observation. We behave as if we observed the channel at that moment
- // and report that the receive cannot proceed.
- //
- // The order of operations is important here: reversing the operations can lead to
- // incorrect behavior when racing with a close.
- if !block && (c.dataqsiz == 0 && c.sendq.first == nil ||
- c.dataqsiz > 0 && atomic.Loaduint(&c.qcount) == 0) &&
- atomic.Load(&c.closed) == 0 {
- return
+ if !block && empty(c) {
+ // After observing that the channel is not ready for receiving, we observe whether the
+ // channel is closed.
+ //
+ // Reordering of these checks could lead to incorrect behavior when racing with a close.
+ // For example, if the channel was open and not empty, was closed, and then drained,
+ // reordered reads could incorrectly indicate "open and empty". To prevent reordering,
+ // we use atomic loads for both checks, and rely on emptying and closing to happen in
+ // separate critical sections under the same lock. This assumption fails when closing
+ // an unbuffered channel with a blocked send, but that is an error condition anyway.
+ if atomic.Load(&c.closed) == 0 {
+ // Because a channel cannot be reopened, the later observation of the channel
+ // being not closed implies that it was also not closed at the moment of the
+ // first observation. We behave as if we observed the channel at that moment
+ // and report that the receive cannot proceed.
+ return
+ }
+ // The channel is irreversibly closed. Re-check whether the channel has any pending data
+ // to receive, which could have arrived between the empty and closed checks above.
+ // Sequential consistency is also required here, when racing with such a send.
+ if empty(c) {
+ // The channel is irreversibly closed and empty.
+ if raceenabled {
+ raceacquire(c.raceaddr())
+ }
+ if ep != nil {
+ typedmemclr(c.elemtype, ep)
+ }
+ return true, false
+ }
}
var t0 int64
@@ -521,13 +564,14 @@
mysg.c = c
gp.param = nil
c.recvq.enqueue(mysg)
- goparkunlock(&c.lock, waitReasonChanReceive, traceEvGoBlockRecv, 3)
+ gopark(chanparkcommit, unsafe.Pointer(&c.lock), waitReasonChanReceive, traceEvGoBlockRecv, 2)
// someone woke us up
if mysg != gp.waiting {
throw("G waiting list is corrupted")
}
gp.waiting = nil
+ gp.activeStackChans = false
if mysg.releasetime > 0 {
blockevent(mysg.releasetime-t0, 2)
}
@@ -594,6 +638,14 @@
goready(gp, skip+1)
}
+func chanparkcommit(gp *g, chanLock unsafe.Pointer) bool {
+ // There are unlocked sudogs that point into gp's stack. Stack
+ // copying must lock the channels of those sudogs.
+ gp.activeStackChans = true
+ unlock((*mutex)(chanLock))
+ return true
+}
+
// compiler implements
//
// select {
diff --git a/src/runtime/chan_test.go b/src/runtime/chan_test.go
index b6188f5..039a086 100644
--- a/src/runtime/chan_test.go
+++ b/src/runtime/chan_test.go
@@ -480,11 +480,11 @@
// If the select in the goroutine is fair,
// cnt1 and cnt2 should be about the same value.
// With 10,000 trials, the expected margin of error at
- // a confidence level of five nines is 4.4172 / (2 * Sqrt(10000)).
+ // a confidence level of six nines is 4.891676 / (2 * Sqrt(10000)).
r := float64(cnt1) / trials
e := math.Abs(r - 0.5)
t.Log(cnt1, cnt2, r, e)
- if e > 4.4172/(2*math.Sqrt(trials)) {
+ if e > 4.891676/(2*math.Sqrt(trials)) {
t.Errorf("unfair select: in %d trials, results were %d, %d", trials, cnt1, cnt2)
}
close(done)
@@ -719,6 +719,7 @@
if after.NumGC-before.NumGC >= 2 {
goto done
}
+ runtime.Gosched()
}
t.Fatal("failed to trigger concurrent GC")
done:
@@ -1126,6 +1127,20 @@
wg.Wait()
}
+func BenchmarkChanClosed(b *testing.B) {
+ c := make(chan struct{})
+ close(c)
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ select {
+ case <-c:
+ default:
+ b.Error("Unreachable")
+ }
+ }
+ })
+}
+
var (
alwaysFalse = false
workSink = 0
diff --git a/src/runtime/checkptr.go b/src/runtime/checkptr.go
new file mode 100644
index 0000000..59891a0
--- /dev/null
+++ b/src/runtime/checkptr.go
@@ -0,0 +1,83 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func checkptrAlignment(p unsafe.Pointer, elem *_type, n uintptr) {
+ // Check that (*[n]elem)(p) is appropriately aligned.
+ // Note that we allow unaligned pointers if the types they point to contain
+ // no pointers themselves. See issue 37298.
+ // TODO(mdempsky): What about fieldAlign?
+ if elem.ptrdata != 0 && uintptr(p)&(uintptr(elem.align)-1) != 0 {
+ throw("checkptr: misaligned pointer conversion")
+ }
+
+ // Check that (*[n]elem)(p) doesn't straddle multiple heap objects.
+ if size := n * elem.size; size > 1 && checkptrBase(p) != checkptrBase(add(p, size-1)) {
+ throw("checkptr: converted pointer straddles multiple allocations")
+ }
+}
+
+func checkptrArithmetic(p unsafe.Pointer, originals []unsafe.Pointer) {
+ if 0 < uintptr(p) && uintptr(p) < minLegalPointer {
+ throw("checkptr: pointer arithmetic computed bad pointer value")
+ }
+
+ // Check that if the computed pointer p points into a heap
+ // object, then one of the original pointers must have pointed
+ // into the same object.
+ base := checkptrBase(p)
+ if base == 0 {
+ return
+ }
+
+ for _, original := range originals {
+ if base == checkptrBase(original) {
+ return
+ }
+ }
+
+ throw("checkptr: pointer arithmetic result points to invalid allocation")
+}
+
+// checkptrBase returns the base address for the allocation containing
+// the address p.
+//
+// Importantly, if p1 and p2 point into the same variable, then
+// checkptrBase(p1) == checkptrBase(p2). However, the converse/inverse
+// is not necessarily true as allocations can have trailing padding,
+// and multiple variables may be packed into a single allocation.
+func checkptrBase(p unsafe.Pointer) uintptr {
+ // stack
+ if gp := getg(); gp.stack.lo <= uintptr(p) && uintptr(p) < gp.stack.hi {
+ // TODO(mdempsky): Walk the stack to identify the
+ // specific stack frame or even stack object that p
+ // points into.
+ //
+ // In the mean time, use "1" as a pseudo-address to
+ // represent the stack. This is an invalid address on
+ // all platforms, so it's guaranteed to be distinct
+ // from any of the addresses we might return below.
+ return 1
+ }
+
+ // heap (must check after stack because of #35068)
+ if base, _, _ := findObject(uintptr(p), 0, 0); base != 0 {
+ return base
+ }
+
+ // data or bss
+ for _, datap := range activeModules() {
+ if datap.data <= uintptr(p) && uintptr(p) < datap.edata {
+ return datap.data
+ }
+ if datap.bss <= uintptr(p) && uintptr(p) < datap.ebss {
+ return datap.bss
+ }
+ }
+
+ return 0
+}
diff --git a/src/runtime/checkptr_test.go b/src/runtime/checkptr_test.go
new file mode 100644
index 0000000..8ab8a49
--- /dev/null
+++ b/src/runtime/checkptr_test.go
@@ -0,0 +1,53 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "internal/testenv"
+ "os/exec"
+ "strings"
+ "testing"
+)
+
+func TestCheckPtr(t *testing.T) {
+ t.Parallel()
+ testenv.MustHaveGoRun(t)
+
+ exe, err := buildTestProg(t, "testprog", "-gcflags=all=-d=checkptr=1")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ testCases := []struct {
+ cmd string
+ want string
+ }{
+ {"CheckPtrAlignmentPtr", "fatal error: checkptr: misaligned pointer conversion\n"},
+ {"CheckPtrAlignmentNoPtr", ""},
+ {"CheckPtrArithmetic", "fatal error: checkptr: pointer arithmetic result points to invalid allocation\n"},
+ {"CheckPtrSize", "fatal error: checkptr: converted pointer straddles multiple allocations\n"},
+ {"CheckPtrSmall", "fatal error: checkptr: pointer arithmetic computed bad pointer value\n"},
+ }
+
+ for _, tc := range testCases {
+ tc := tc
+ t.Run(tc.cmd, func(t *testing.T) {
+ t.Parallel()
+ got, err := testenv.CleanCmdEnv(exec.Command(exe, tc.cmd)).CombinedOutput()
+ if err != nil {
+ t.Log(err)
+ }
+ if tc.want == "" {
+ if len(got) > 0 {
+ t.Errorf("output:\n%s\nwant no output", got)
+ }
+ return
+ }
+ if !strings.HasPrefix(string(got), tc.want) {
+ t.Errorf("output:\n%s\n\nwant output starting with: %s", got, tc.want)
+ }
+ })
+ }
+}
diff --git a/src/runtime/conv_wasm_test.go b/src/runtime/conv_wasm_test.go
new file mode 100644
index 0000000..5054fca
--- /dev/null
+++ b/src/runtime/conv_wasm_test.go
@@ -0,0 +1,128 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "testing"
+)
+
+var res int64
+var ures uint64
+
+func TestFloatTruncation(t *testing.T) {
+ testdata := []struct {
+ input float64
+ convInt64 int64
+ convUInt64 uint64
+ overflow bool
+ }{
+ // max +- 1
+ {
+ input: 0x7fffffffffffffff,
+ convInt64: -0x8000000000000000,
+ convUInt64: 0x8000000000000000,
+ },
+ // For out-of-bounds conversion, the result is implementation-dependent.
+ // This test verifies the implementation of wasm architecture.
+ {
+ input: 0x8000000000000000,
+ convInt64: -0x8000000000000000,
+ convUInt64: 0x8000000000000000,
+ },
+ {
+ input: 0x7ffffffffffffffe,
+ convInt64: -0x8000000000000000,
+ convUInt64: 0x8000000000000000,
+ },
+ // neg max +- 1
+ {
+ input: -0x8000000000000000,
+ convInt64: -0x8000000000000000,
+ convUInt64: 0x8000000000000000,
+ },
+ {
+ input: -0x8000000000000001,
+ convInt64: -0x8000000000000000,
+ convUInt64: 0x8000000000000000,
+ },
+ {
+ input: -0x7fffffffffffffff,
+ convInt64: -0x8000000000000000,
+ convUInt64: 0x8000000000000000,
+ },
+ // trunc point +- 1
+ {
+ input: 0x7ffffffffffffdff,
+ convInt64: 0x7ffffffffffffc00,
+ convUInt64: 0x7ffffffffffffc00,
+ },
+ {
+ input: 0x7ffffffffffffe00,
+ convInt64: -0x8000000000000000,
+ convUInt64: 0x8000000000000000,
+ },
+ {
+ input: 0x7ffffffffffffdfe,
+ convInt64: 0x7ffffffffffffc00,
+ convUInt64: 0x7ffffffffffffc00,
+ },
+ // neg trunc point +- 1
+ {
+ input: -0x7ffffffffffffdff,
+ convInt64: -0x7ffffffffffffc00,
+ convUInt64: 0x8000000000000000,
+ },
+ {
+ input: -0x7ffffffffffffe00,
+ convInt64: -0x8000000000000000,
+ convUInt64: 0x8000000000000000,
+ },
+ {
+ input: -0x7ffffffffffffdfe,
+ convInt64: -0x7ffffffffffffc00,
+ convUInt64: 0x8000000000000000,
+ },
+ // umax +- 1
+ {
+ input: 0xffffffffffffffff,
+ convInt64: -0x8000000000000000,
+ convUInt64: 0x8000000000000000,
+ },
+ {
+ input: 0x10000000000000000,
+ convInt64: -0x8000000000000000,
+ convUInt64: 0x8000000000000000,
+ },
+ {
+ input: 0xfffffffffffffffe,
+ convInt64: -0x8000000000000000,
+ convUInt64: 0x8000000000000000,
+ },
+ // umax trunc +- 1
+ {
+ input: 0xfffffffffffffbff,
+ convInt64: -0x8000000000000000,
+ convUInt64: 0xfffffffffffff800,
+ },
+ {
+ input: 0xfffffffffffffc00,
+ convInt64: -0x8000000000000000,
+ convUInt64: 0x8000000000000000,
+ },
+ {
+ input: 0xfffffffffffffbfe,
+ convInt64: -0x8000000000000000,
+ convUInt64: 0xfffffffffffff800,
+ },
+ }
+ for _, item := range testdata {
+ if got, want := int64(item.input), item.convInt64; got != want {
+ t.Errorf("int64(%f): got %x, want %x", item.input, got, want)
+ }
+ if got, want := uint64(item.input), item.convUInt64; got != want {
+ t.Errorf("uint64(%f): got %x, want %x", item.input, got, want)
+ }
+ }
+}
diff --git a/src/runtime/cpuflags.go b/src/runtime/cpuflags.go
index 1565afb..5104650 100644
--- a/src/runtime/cpuflags.go
+++ b/src/runtime/cpuflags.go
@@ -11,11 +11,14 @@
// Offsets into internal/cpu records for use in assembly.
const (
+ offsetX86HasAVX = unsafe.Offsetof(cpu.X86.HasAVX)
offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
offsetX86HasERMS = unsafe.Offsetof(cpu.X86.HasERMS)
offsetX86HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2)
offsetARMHasIDIVA = unsafe.Offsetof(cpu.ARM.HasIDIVA)
+
+ offsetMIPS64XHasMSA = unsafe.Offsetof(cpu.MIPS64X.HasMSA)
)
var (
@@ -23,6 +26,9 @@
// TODO: deprecate these; use internal/cpu directly.
x86HasPOPCNT bool
x86HasSSE41 bool
+ x86HasFMA bool
+
+ armHasVFPv4 bool
arm64HasATOMICS bool
)
diff --git a/src/runtime/crash_cgo_test.go b/src/runtime/crash_cgo_test.go
index 56cfb08..4872189 100644
--- a/src/runtime/crash_cgo_test.go
+++ b/src/runtime/crash_cgo_test.go
@@ -275,7 +275,13 @@
t.Fatal(err)
}
- got, err := testenv.CleanCmdEnv(exec.Command(exe, runArg)).CombinedOutput()
+ // pprofCgoTraceback is called whenever CGO code is executing and a signal
+ // is received. Disable signal preemption to increase the likelihood at
+ // least one SIGPROF signal fired to capture a sample. See issue #37201.
+ cmd := testenv.CleanCmdEnv(exec.Command(exe, runArg))
+ cmd.Env = append(cmd.Env, "GODEBUG=asyncpreemptoff=1")
+
+ got, err := cmd.CombinedOutput()
if err != nil {
if testenv.Builder() == "linux-amd64-alpine" {
// See Issue 18243 and Issue 19938.
@@ -549,3 +555,48 @@
}
return nil
}
+
+func TestSegv(t *testing.T) {
+ switch runtime.GOOS {
+ case "plan9", "windows":
+ t.Skipf("no signals on %s", runtime.GOOS)
+ }
+
+ for _, test := range []string{"Segv", "SegvInCgo"} {
+ t.Run(test, func(t *testing.T) {
+ t.Parallel()
+ got := runTestProg(t, "testprogcgo", test)
+ t.Log(got)
+ if !strings.Contains(got, "SIGSEGV") {
+ t.Errorf("expected crash from signal")
+ }
+ })
+ }
+}
+
+// TestEINTR tests that we handle EINTR correctly.
+// See issue #20400 and friends.
+func TestEINTR(t *testing.T) {
+ switch runtime.GOOS {
+ case "plan9", "windows":
+ t.Skipf("no EINTR on %s", runtime.GOOS)
+ case "linux":
+ if runtime.GOARCH == "386" {
+ // On linux-386 the Go signal handler sets
+ // a restorer function that is not preserved
+ // by the C sigaction call in the test,
+ // causing the signal handler to crash when
+ // returning the normal code. The test is not
+ // architecture-specific, so just skip on 386
+ // rather than doing a complicated workaround.
+ t.Skip("skipping on linux-386; C sigaction does not preserve Go restorer")
+ }
+ }
+
+ t.Parallel()
+ output := runTestProg(t, "testprogcgo", "EINTR")
+ want := "OK\n"
+ if output != want {
+ t.Fatalf("want %s, got %s\n", want, output)
+ }
+}
diff --git a/src/runtime/crash_nonunix_test.go b/src/runtime/crash_nonunix_test.go
index bf349a5..06c197e 100644
--- a/src/runtime/crash_nonunix_test.go
+++ b/src/runtime/crash_nonunix_test.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build windows plan9 nacl js,wasm
+// +build windows plan9 js,wasm
package runtime_test
diff --git a/src/runtime/crash_test.go b/src/runtime/crash_test.go
index c54bb57..34f30c9 100644
--- a/src/runtime/crash_test.go
+++ b/src/runtime/crash_test.go
@@ -55,6 +55,16 @@
t.Fatal(err)
}
+ return runBuiltTestProg(t, exe, name, env...)
+}
+
+func runBuiltTestProg(t *testing.T, exe, name string, env ...string) string {
+ if *flagQuick {
+ t.Skip("-quick")
+ }
+
+ testenv.MustHaveGoBuild(t)
+
cmd := testenv.CleanCmdEnv(exec.Command(exe, name))
cmd.Env = append(cmd.Env, env...)
if testing.Short() {
@@ -64,7 +74,7 @@
cmd.Stdout = &b
cmd.Stderr = &b
if err := cmd.Start(); err != nil {
- t.Fatalf("starting %s %s: %v", binary, name, err)
+ t.Fatalf("starting %s %s: %v", exe, name, err)
}
// If the process doesn't complete within 1 minute,
@@ -92,7 +102,7 @@
}()
if err := cmd.Wait(); err != nil {
- t.Logf("%s %s exit status: %v", binary, name, err)
+ t.Logf("%s %s exit status: %v", exe, name, err)
}
close(done)
@@ -104,8 +114,6 @@
t.Skip("-quick")
}
- checkStaleRuntime(t)
-
testprog.Lock()
defer testprog.Unlock()
if testprog.dir == "" {
@@ -143,31 +151,12 @@
return exe, nil
}
-var (
- staleRuntimeOnce sync.Once // guards init of staleRuntimeErr
- staleRuntimeErr error
-)
-
-func checkStaleRuntime(t *testing.T) {
- staleRuntimeOnce.Do(func() {
- // 'go run' uses the installed copy of runtime.a, which may be out of date.
- out, err := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "list", "-gcflags=all="+os.Getenv("GO_GCFLAGS"), "-f", "{{.Stale}}", "runtime")).CombinedOutput()
- if err != nil {
- staleRuntimeErr = fmt.Errorf("failed to execute 'go list': %v\n%v", err, string(out))
- return
- }
- if string(out) != "false\n" {
- t.Logf("go list -f {{.Stale}} runtime:\n%s", out)
- out, err := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "list", "-gcflags=all="+os.Getenv("GO_GCFLAGS"), "-f", "{{.StaleReason}}", "runtime")).CombinedOutput()
- if err != nil {
- t.Logf("go list -f {{.StaleReason}} failed: %v", err)
- }
- t.Logf("go list -f {{.StaleReason}} runtime:\n%s", out)
- staleRuntimeErr = fmt.Errorf("Stale runtime.a. Run 'go install runtime'.")
- }
- })
- if staleRuntimeErr != nil {
- t.Fatal(staleRuntimeErr)
+func TestVDSO(t *testing.T) {
+ t.Parallel()
+ output := runTestProg(t, "testprog", "SignalInVDSO")
+ want := "success\n"
+ if output != want {
+ t.Fatalf("output:\n%s\n\nwanted:\n%s", output, want)
}
}
@@ -225,9 +214,23 @@
func TestStackOverflow(t *testing.T) {
output := runTestProg(t, "testprog", "StackOverflow")
- want := "runtime: goroutine stack exceeds 1474560-byte limit\nfatal error: stack overflow"
- if !strings.HasPrefix(output, want) {
- t.Fatalf("output does not start with %q:\n%s", want, output)
+ want := []string{
+ "runtime: goroutine stack exceeds 1474560-byte limit\n",
+ "fatal error: stack overflow",
+ // information about the current SP and stack bounds
+ "runtime: sp=",
+ "stack=[",
+ }
+ if !strings.HasPrefix(output, want[0]) {
+ t.Errorf("output does not start with %q", want[0])
+ }
+ for _, s := range want[1:] {
+ if !strings.Contains(output, s) {
+ t.Errorf("output does not contain %q", s)
+ }
+ }
+ if t.Failed() {
+ t.Logf("output:\n%s", output)
}
}
@@ -251,6 +254,41 @@
}
+func TestRecursivePanic2(t *testing.T) {
+ output := runTestProg(t, "testprog", "RecursivePanic2")
+ want := `first panic
+second panic
+panic: third panic
+
+`
+ if !strings.HasPrefix(output, want) {
+ t.Fatalf("output does not start with %q:\n%s", want, output)
+ }
+
+}
+
+func TestRecursivePanic3(t *testing.T) {
+ output := runTestProg(t, "testprog", "RecursivePanic3")
+ want := `panic: first panic
+
+`
+ if !strings.HasPrefix(output, want) {
+ t.Fatalf("output does not start with %q:\n%s", want, output)
+ }
+
+}
+
+func TestRecursivePanic4(t *testing.T) {
+ output := runTestProg(t, "testprog", "RecursivePanic4")
+ want := `panic: first panic [recovered]
+ panic: second panic
+`
+ if !strings.HasPrefix(output, want) {
+ t.Fatalf("output does not start with %q:\n%s", want, output)
+ }
+
+}
+
func TestGoexitCrash(t *testing.T) {
output := runTestProg(t, "testprog", "GoexitExit")
want := "no goroutines (main called runtime.Goexit) - deadlock!"
@@ -382,26 +420,32 @@
}
func TestRecoverBeforePanicAfterGoexit(t *testing.T) {
- // 1. defer a function that recovers
- // 2. defer a function that panics
- // 3. call goexit
- // Goexit should run the #2 defer. Its panic
- // should be caught by the #1 defer, and execution
- // should resume in the caller. Like the Goexit
- // never happened!
- defer func() {
- r := recover()
- if r == nil {
- panic("bad recover")
- }
- }()
- defer func() {
- panic("hello")
- }()
- runtime.Goexit()
+ t.Parallel()
+ output := runTestProg(t, "testprog", "RecoverBeforePanicAfterGoexit")
+ want := "fatal error: no goroutines (main called runtime.Goexit) - deadlock!"
+ if !strings.HasPrefix(output, want) {
+ t.Fatalf("output does not start with %q:\n%s", want, output)
+ }
+}
+
+func TestRecoverBeforePanicAfterGoexit2(t *testing.T) {
+ t.Parallel()
+ output := runTestProg(t, "testprog", "RecoverBeforePanicAfterGoexit2")
+ want := "fatal error: no goroutines (main called runtime.Goexit) - deadlock!"
+ if !strings.HasPrefix(output, want) {
+ t.Fatalf("output does not start with %q:\n%s", want, output)
+ }
}
func TestNetpollDeadlock(t *testing.T) {
+ if os.Getenv("GO_BUILDER_NAME") == "darwin-amd64-10_12" {
+ // A suspected kernel bug in macOS 10.12 occasionally results in
+ // an apparent deadlock when dialing localhost. The errors have not
+ // been observed on newer versions of the OS, so we don't plan to work
+ // around them. See https://golang.org/issue/22019.
+ testenv.SkipFlaky(t, 22019)
+ }
+
t.Parallel()
output := runTestProg(t, "testprognet", "NetpollDeadlock")
want := "done\n"
@@ -413,7 +457,7 @@
func TestPanicTraceback(t *testing.T) {
t.Parallel()
output := runTestProg(t, "testprog", "PanicTraceback")
- want := "panic: hello"
+ want := "panic: hello\n\tpanic: panic pt2\n\tpanic: panic pt1\n"
if !strings.HasPrefix(output, want) {
t.Fatalf("output does not start with %q:\n%s", want, output)
}
diff --git a/src/runtime/crash_unix_test.go b/src/runtime/crash_unix_test.go
index ce227fe..8ef52ab 100644
--- a/src/runtime/crash_unix_test.go
+++ b/src/runtime/crash_unix_test.go
@@ -16,8 +16,11 @@
"path/filepath"
"runtime"
"strings"
+ "sync"
"syscall"
"testing"
+ "time"
+ "unsafe"
)
// sigquit is the signal to send to kill a hanging testdata program.
@@ -33,6 +36,29 @@
}
}
+func TestBadOpen(t *testing.T) {
+ // make sure we get the correct error code if open fails. Same for
+ // read/write/close on the resulting -1 fd. See issue 10052.
+ nonfile := []byte("/notreallyafile")
+ fd := runtime.Open(&nonfile[0], 0, 0)
+ if fd != -1 {
+ t.Errorf("open(%q)=%d, want -1", nonfile, fd)
+ }
+ var buf [32]byte
+ r := runtime.Read(-1, unsafe.Pointer(&buf[0]), int32(len(buf)))
+ if got, want := r, -int32(syscall.EBADF); got != want {
+ t.Errorf("read()=%d, want %d", got, want)
+ }
+ w := runtime.Write(^uintptr(0), unsafe.Pointer(&buf[0]), int32(len(buf)))
+ if got, want := w, -int32(syscall.EBADF); got != want {
+ t.Errorf("write()=%d, want %d", got, want)
+ }
+ c := runtime.Close(-1)
+ if c != -1 {
+ t.Errorf("close()=%d, want -1", c)
+ }
+}
+
func TestCrashDumpsAllThreads(t *testing.T) {
if *flagQuick {
t.Skip("-quick")
@@ -53,8 +79,6 @@
testenv.MustHaveGoBuild(t)
- checkStaleRuntime(t)
-
t.Parallel()
dir, err := ioutil.TempDir("", "go-build")
@@ -76,18 +100,17 @@
cmd = exec.Command(filepath.Join(dir, "a.exe"))
cmd = testenv.CleanCmdEnv(cmd)
- cmd.Env = append(cmd.Env, "GOTRACEBACK=crash")
-
- // Set GOGC=off. Because of golang.org/issue/10958, the tight
- // loops in the test program are not preemptible. If GC kicks
- // in, it may lock up and prevent main from saying it's ready.
- newEnv := []string{}
- for _, s := range cmd.Env {
- if !strings.HasPrefix(s, "GOGC=") {
- newEnv = append(newEnv, s)
- }
- }
- cmd.Env = append(newEnv, "GOGC=off")
+ cmd.Env = append(cmd.Env,
+ "GOTRACEBACK=crash",
+ // Set GOGC=off. Because of golang.org/issue/10958, the tight
+ // loops in the test program are not preemptible. If GC kicks
+ // in, it may lock up and prevent main from saying it's ready.
+ "GOGC=off",
+ // Set GODEBUG=asyncpreemptoff=1. If a thread is preempted
+ // when it receives SIGQUIT, it won't show the expected
+ // stack trace. See issue 35356.
+ "GODEBUG=asyncpreemptoff=1",
+ )
var outbuf bytes.Buffer
cmd.Stdout = &outbuf
@@ -266,6 +289,12 @@
}
func TestSignalIgnoreSIGTRAP(t *testing.T) {
+ if runtime.GOOS == "openbsd" {
+ if bn := testenv.Builder(); strings.HasSuffix(bn, "-62") || strings.HasSuffix(bn, "-64") {
+ testenv.SkipFlaky(t, 17496)
+ }
+ }
+
output := runTestProg(t, "testprognet", "SignalIgnoreSIGTRAP")
want := "OK\n"
if output != want {
@@ -285,3 +314,47 @@
t.Fatalf("want %s, got %s\n", want, output)
}
}
+
+func TestSignalM(t *testing.T) {
+ r, w, errno := runtime.Pipe()
+ if errno != 0 {
+ t.Fatal(syscall.Errno(errno))
+ }
+ defer func() {
+ runtime.Close(r)
+ runtime.Close(w)
+ }()
+ runtime.Closeonexec(r)
+ runtime.Closeonexec(w)
+
+ var want, got int64
+ var wg sync.WaitGroup
+ ready := make(chan *runtime.M)
+ wg.Add(1)
+ go func() {
+ runtime.LockOSThread()
+ want, got = runtime.WaitForSigusr1(r, w, func(mp *runtime.M) {
+ ready <- mp
+ })
+ runtime.UnlockOSThread()
+ wg.Done()
+ }()
+ waitingM := <-ready
+ runtime.SendSigusr1(waitingM)
+
+ timer := time.AfterFunc(time.Second, func() {
+ // Write 1 to tell WaitForSigusr1 that we timed out.
+ bw := byte(1)
+ if n := runtime.Write(uintptr(w), unsafe.Pointer(&bw), 1); n != 1 {
+ t.Errorf("pipe write failed: %d", n)
+ }
+ })
+ defer timer.Stop()
+
+ wg.Wait()
+ if got == -1 {
+ t.Fatal("signalM signal not received")
+ } else if want != got {
+ t.Fatalf("signal sent to M %d, but received on M %d", want, got)
+ }
+}
diff --git a/src/runtime/debug.go b/src/runtime/debug.go
index af5c3a1..76eeb2e 100644
--- a/src/runtime/debug.go
+++ b/src/runtime/debug.go
@@ -26,12 +26,12 @@
return ret
}
- stopTheWorld("GOMAXPROCS")
+ stopTheWorldGC("GOMAXPROCS")
// newprocs will be processed by startTheWorld
newprocs = int32(n)
- startTheWorld()
+ startTheWorldGC()
return ret
}
diff --git a/src/runtime/debug/heapdump_test.go b/src/runtime/debug/heapdump_test.go
index c986efc..de1ec27 100644
--- a/src/runtime/debug/heapdump_test.go
+++ b/src/runtime/debug/heapdump_test.go
@@ -13,7 +13,7 @@
)
func TestWriteHeapDumpNonempty(t *testing.T) {
- if runtime.GOOS == "nacl" || runtime.GOOS == "js" {
+ if runtime.GOOS == "js" {
t.Skipf("WriteHeapDump is not available on %s.", runtime.GOOS)
}
f, err := ioutil.TempFile("", "heapdumptest")
@@ -42,7 +42,7 @@
}
func TestWriteHeapDumpFinalizers(t *testing.T) {
- if runtime.GOOS == "nacl" || runtime.GOOS == "js" {
+ if runtime.GOOS == "js" {
t.Skipf("WriteHeapDump is not available on %s.", runtime.GOOS)
}
f, err := ioutil.TempFile("", "heapdumptest")
diff --git a/src/runtime/debug/mod.go b/src/runtime/debug/mod.go
index e3b929a..0381bdc 100644
--- a/src/runtime/debug/mod.go
+++ b/src/runtime/debug/mod.go
@@ -22,7 +22,7 @@
// the running binary.
type BuildInfo struct {
Path string // The main package path
- Main Module // The main module information
+ Main Module // The module containing the main package
Deps []*Module // Module dependencies
}
@@ -47,9 +47,27 @@
repLine = "=>\t"
)
- info := &BuildInfo{}
+ readEntryFirstLine := func(elem []string) (Module, bool) {
+ if len(elem) != 2 && len(elem) != 3 {
+ return Module{}, false
+ }
+ sum := ""
+ if len(elem) == 3 {
+ sum = elem[2]
+ }
+ return Module{
+ Path: elem[0],
+ Version: elem[1],
+ Sum: sum,
+ }, true
+ }
- var line string
+ var (
+ info = &BuildInfo{}
+ last *Module
+ line string
+ ok bool
+ )
// Reverse of cmd/go/internal/modload.PackageBuildInfo
for len(data) > 0 {
i := strings.IndexByte(data, '\n')
@@ -63,42 +81,33 @@
info.Path = elem
case strings.HasPrefix(line, modLine):
elem := strings.Split(line[len(modLine):], "\t")
- if len(elem) != 3 {
+ last = &info.Main
+ *last, ok = readEntryFirstLine(elem)
+ if !ok {
return nil, false
}
- info.Main = Module{
- Path: elem[0],
- Version: elem[1],
- Sum: elem[2],
- }
case strings.HasPrefix(line, depLine):
elem := strings.Split(line[len(depLine):], "\t")
- if len(elem) != 2 && len(elem) != 3 {
+ last = new(Module)
+ info.Deps = append(info.Deps, last)
+ *last, ok = readEntryFirstLine(elem)
+ if !ok {
return nil, false
}
- sum := ""
- if len(elem) == 3 {
- sum = elem[2]
- }
- info.Deps = append(info.Deps, &Module{
- Path: elem[0],
- Version: elem[1],
- Sum: sum,
- })
case strings.HasPrefix(line, repLine):
elem := strings.Split(line[len(repLine):], "\t")
if len(elem) != 3 {
return nil, false
}
- last := len(info.Deps) - 1
- if last < 0 {
+ if last == nil {
return nil, false
}
- info.Deps[last].Replace = &Module{
+ last.Replace = &Module{
Path: elem[0],
Version: elem[1],
Sum: elem[2],
}
+ last = nil
}
}
return info, true
diff --git a/src/runtime/debug_test.go b/src/runtime/debug_test.go
index f77a373..722e811 100644
--- a/src/runtime/debug_test.go
+++ b/src/runtime/debug_test.go
@@ -126,7 +126,7 @@
return x + 1
}
args.x = 42
- if _, err := runtime.InjectDebugCall(g, fn, &args, debugCallTKill); err != nil {
+ if _, err := runtime.InjectDebugCall(g, fn, &args, debugCallTKill, false); err != nil {
t.Fatal(err)
}
if args.yRet != 43 {
@@ -155,7 +155,7 @@
args.in[i] = i
want[i] = i + 1
}
- if _, err := runtime.InjectDebugCall(g, fn, &args, debugCallTKill); err != nil {
+ if _, err := runtime.InjectDebugCall(g, fn, &args, debugCallTKill, false); err != nil {
t.Fatal(err)
}
if want != args.out {
@@ -168,7 +168,7 @@
defer after()
// Inject a call that performs a GC.
- if _, err := runtime.InjectDebugCall(g, runtime.GC, nil, debugCallTKill); err != nil {
+ if _, err := runtime.InjectDebugCall(g, runtime.GC, nil, debugCallTKill, false); err != nil {
t.Fatal(err)
}
}
@@ -179,7 +179,7 @@
// Inject a call that grows the stack. debugCallWorker checks
// for stack pointer breakage.
- if _, err := runtime.InjectDebugCall(g, func() { growStack(nil) }, nil, debugCallTKill); err != nil {
+ if _, err := runtime.InjectDebugCall(g, func() { growStack(nil) }, nil, debugCallTKill, false); err != nil {
t.Fatal(err)
}
}
@@ -215,7 +215,7 @@
runtime.Gosched()
}
- _, err := runtime.InjectDebugCall(g, func() {}, nil, debugCallTKill)
+ _, err := runtime.InjectDebugCall(g, func() {}, nil, debugCallTKill, true)
if msg := "call not at safe point"; err == nil || err.Error() != msg {
t.Fatalf("want %q, got %s", msg, err)
}
@@ -239,7 +239,7 @@
}()
g := <-ready
- p, err := runtime.InjectDebugCall(g, func() { panic("test") }, nil, debugCallTKill)
+ p, err := runtime.InjectDebugCall(g, func() { panic("test") }, nil, debugCallTKill, false)
if err != nil {
t.Fatal(err)
}
diff --git a/src/runtime/debugcall.go b/src/runtime/debugcall.go
index f03d235..6c285ec 100644
--- a/src/runtime/debugcall.go
+++ b/src/runtime/debugcall.go
@@ -61,7 +61,7 @@
"debugCall16384",
"debugCall32768",
"debugCall65536":
- // These functions are whitelisted so that the debugger can initiate multiple function calls.
+ // These functions are allowed so that the debugger can initiate multiple function calls.
// See: https://golang.org/cl/161137/
return
}
@@ -76,28 +76,173 @@
return
}
- // Look up PC's register map.
- pcdata := int32(-1)
- if pc != f.entry {
- pc--
- pcdata = pcdatavalue(f, _PCDATA_RegMapIndex, pc, nil)
- }
- if pcdata == -1 {
- pcdata = 0 // in prologue
- }
- stkmap := (*stackmap)(funcdata(f, _FUNCDATA_RegPointerMaps))
- if pcdata == -2 || stkmap == nil {
- // Not at a safe point.
- ret = debugCallUnsafePoint
- return
+ if !go115ReduceLiveness {
+ // Look up PC's register map.
+ pcdata := int32(-1)
+ if pc != f.entry {
+ pc--
+ pcdata = pcdatavalue(f, _PCDATA_RegMapIndex, pc, nil)
+ }
+ if pcdata == -1 {
+ pcdata = 0 // in prologue
+ }
+ stkmap := (*stackmap)(funcdata(f, _FUNCDATA_RegPointerMaps))
+ if pcdata == -2 || stkmap == nil {
+ // Not at a safe point.
+ ret = debugCallUnsafePoint
+ return
+ }
+ } else {
+ // Check that this isn't an unsafe-point.
+ if pc != f.entry {
+ pc--
+ }
+ up := pcdatavalue(f, _PCDATA_UnsafePoint, pc, nil)
+ if up != _PCDATA_UnsafePointSafe {
+ // Not at a safe point.
+ ret = debugCallUnsafePoint
+ }
}
})
return ret
}
-// debugCallWrap pushes a defer to recover from panics in debug calls
-// and then calls the dispatching function at PC dispatch.
+// debugCallWrap starts a new goroutine to run a debug call and blocks
+// the calling goroutine. On the goroutine, it prepares to recover
+// panics from the debug call, and then calls the call dispatching
+// function at PC dispatch.
+//
+// This must be deeply nosplit because there are untyped values on the
+// stack from debugCallV1.
+//
+//go:nosplit
func debugCallWrap(dispatch uintptr) {
+ var lockedm bool
+ var lockedExt uint32
+ callerpc := getcallerpc()
+ gp := getg()
+
+ // Create a new goroutine to execute the call on. Run this on
+ // the system stack to avoid growing our stack.
+ systemstack(func() {
+ var args struct {
+ dispatch uintptr
+ callingG *g
+ }
+ args.dispatch = dispatch
+ args.callingG = gp
+ fn := debugCallWrap1
+ newg := newproc1(*(**funcval)(unsafe.Pointer(&fn)), unsafe.Pointer(&args), int32(unsafe.Sizeof(args)), gp, callerpc)
+
+ // If the current G is locked, then transfer that
+ // locked-ness to the new goroutine.
+ if gp.lockedm != 0 {
+ // Save lock state to restore later.
+ mp := gp.m
+ if mp != gp.lockedm.ptr() {
+ throw("inconsistent lockedm")
+ }
+
+ lockedm = true
+ lockedExt = mp.lockedExt
+
+ // Transfer external lock count to internal so
+ // it can't be unlocked from the debug call.
+ mp.lockedInt++
+ mp.lockedExt = 0
+
+ mp.lockedg.set(newg)
+ newg.lockedm.set(mp)
+ gp.lockedm = 0
+ }
+
+ // Mark the calling goroutine as being at an async
+ // safe-point, since it has a few conservative frames
+ // at the bottom of the stack. This also prevents
+ // stack shrinks.
+ gp.asyncSafePoint = true
+
+ // Stash newg away so we can execute it below (mcall's
+ // closure can't capture anything).
+ gp.schedlink.set(newg)
+ })
+
+ // Switch to the new goroutine.
+ mcall(func(gp *g) {
+ // Get newg.
+ newg := gp.schedlink.ptr()
+ gp.schedlink = 0
+
+ // Park the calling goroutine.
+ gp.waitreason = waitReasonDebugCall
+ if trace.enabled {
+ traceGoPark(traceEvGoBlock, 1)
+ }
+ casgstatus(gp, _Grunning, _Gwaiting)
+ dropg()
+
+ // Directly execute the new goroutine. The debug
+ // protocol will continue on the new goroutine, so
+ // it's important we not just let the scheduler do
+ // this or it may resume a different goroutine.
+ execute(newg, true)
+ })
+
+ // We'll resume here when the call returns.
+
+ // Restore locked state.
+ if lockedm {
+ mp := gp.m
+ mp.lockedExt = lockedExt
+ mp.lockedInt--
+ mp.lockedg.set(gp)
+ gp.lockedm.set(mp)
+ }
+
+ gp.asyncSafePoint = false
+}
+
+// debugCallWrap1 is the continuation of debugCallWrap on the callee
+// goroutine.
+func debugCallWrap1(dispatch uintptr, callingG *g) {
+ // Dispatch call and trap panics.
+ debugCallWrap2(dispatch)
+
+ // Resume the caller goroutine.
+ getg().schedlink.set(callingG)
+ mcall(func(gp *g) {
+ callingG := gp.schedlink.ptr()
+ gp.schedlink = 0
+
+ // Unlock this goroutine from the M if necessary. The
+ // calling G will relock.
+ if gp.lockedm != 0 {
+ gp.lockedm = 0
+ gp.m.lockedg = 0
+ }
+
+ // Switch back to the calling goroutine. At some point
+ // the scheduler will schedule us again and we'll
+ // finish exiting.
+ if trace.enabled {
+ traceGoSched()
+ }
+ casgstatus(gp, _Grunning, _Grunnable)
+ dropg()
+ lock(&sched.lock)
+ globrunqput(gp)
+ unlock(&sched.lock)
+
+ if trace.enabled {
+ traceGoUnpark(callingG, 0)
+ }
+ casgstatus(callingG, _Gwaiting, _Grunnable)
+ execute(callingG, true)
+ })
+}
+
+func debugCallWrap2(dispatch uintptr) {
+ // Call the dispatch function and trap panics.
var dispatchF func()
dispatchFV := funcval{dispatch}
*(*unsafe.Pointer)(unsafe.Pointer(&dispatchF)) = noescape(unsafe.Pointer(&dispatchFV))
diff --git a/src/runtime/debuglog.go b/src/runtime/debuglog.go
index 100f2d3..3ce3273 100644
--- a/src/runtime/debuglog.go
+++ b/src/runtime/debuglog.go
@@ -665,13 +665,17 @@
print("..(", r.uvarint(), " more bytes)..")
case debugLogPC:
- printDebugLogPC(uintptr(r.uvarint()))
+ printDebugLogPC(uintptr(r.uvarint()), false)
case debugLogTraceback:
n := int(r.uvarint())
for i := 0; i < n; i++ {
print("\n\t")
- printDebugLogPC(uintptr(r.uvarint()))
+ // gentraceback PCs are always return PCs.
+ // Convert them to call PCs.
+ //
+ // TODO(austin): Expand inlined frames.
+ printDebugLogPC(uintptr(r.uvarint()), true)
}
}
@@ -794,9 +798,17 @@
printunlock()
}
-func printDebugLogPC(pc uintptr) {
- print(hex(pc))
+// printDebugLogPC prints a single symbolized PC. If returnPC is true,
+// pc is a return PC that must first be converted to a call PC.
+func printDebugLogPC(pc uintptr, returnPC bool) {
fn := findfunc(pc)
+ if returnPC && (!fn.valid() || pc > fn.entry) {
+ // TODO(austin): Don't back up if the previous frame
+ // was a sigpanic.
+ pc--
+ }
+
+ print(hex(pc))
if !fn.valid() {
print(" [unknown PC]")
} else {
diff --git a/src/runtime/defer_test.go b/src/runtime/defer_test.go
new file mode 100644
index 0000000..5ac0814
--- /dev/null
+++ b/src/runtime/defer_test.go
@@ -0,0 +1,412 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "fmt"
+ "reflect"
+ "runtime"
+ "testing"
+)
+
+// Make sure open-coded defer exit code is not lost, even when there is an
+// unconditional panic (hence no return from the function)
+func TestUnconditionalPanic(t *testing.T) {
+ defer func() {
+ if recover() != "testUnconditional" {
+ t.Fatal("expected unconditional panic")
+ }
+ }()
+ panic("testUnconditional")
+}
+
+var glob int = 3
+
+// Test an open-coded defer and non-open-coded defer - make sure both defers run
+// and call recover()
+func TestOpenAndNonOpenDefers(t *testing.T) {
+ for {
+ // Non-open defer because in a loop
+ defer func(n int) {
+ if recover() != "testNonOpenDefer" {
+ t.Fatal("expected testNonOpen panic")
+ }
+ }(3)
+ if glob > 2 {
+ break
+ }
+ }
+ testOpen(t, 47)
+ panic("testNonOpenDefer")
+}
+
+//go:noinline
+func testOpen(t *testing.T, arg int) {
+ defer func(n int) {
+ if recover() != "testOpenDefer" {
+ t.Fatal("expected testOpen panic")
+ }
+ }(4)
+ if arg > 2 {
+ panic("testOpenDefer")
+ }
+}
+
+// Test a non-open-coded defer and an open-coded defer - make sure both defers run
+// and call recover()
+func TestNonOpenAndOpenDefers(t *testing.T) {
+ testOpen(t, 47)
+ for {
+ // Non-open defer because in a loop
+ defer func(n int) {
+ if recover() != "testNonOpenDefer" {
+ t.Fatal("expected testNonOpen panic")
+ }
+ }(3)
+ if glob > 2 {
+ break
+ }
+ }
+ panic("testNonOpenDefer")
+}
+
+var list []int
+
+// Make sure that conditional open-coded defers are activated correctly and run in
+// the correct order.
+func TestConditionalDefers(t *testing.T) {
+ list = make([]int, 0, 10)
+
+ defer func() {
+ if recover() != "testConditional" {
+ t.Fatal("expected panic")
+ }
+ want := []int{4, 2, 1}
+ if !reflect.DeepEqual(want, list) {
+ t.Fatal(fmt.Sprintf("wanted %v, got %v", want, list))
+ }
+
+ }()
+ testConditionalDefers(8)
+}
+
+func testConditionalDefers(n int) {
+ doappend := func(i int) {
+ list = append(list, i)
+ }
+
+ defer doappend(1)
+ if n > 5 {
+ defer doappend(2)
+ if n > 8 {
+ defer doappend(3)
+ } else {
+ defer doappend(4)
+ }
+ }
+ panic("testConditional")
+}
+
+// Test that there is no compile-time or run-time error if an open-coded defer
+// call is removed by constant propagation and dead-code elimination.
+func TestDisappearingDefer(t *testing.T) {
+ switch runtime.GOOS {
+ case "invalidOS":
+ defer func() {
+ t.Fatal("Defer shouldn't run")
+ }()
+ }
+}
+
+// This tests an extra recursive panic behavior that is only specified in the
+// code. Suppose a first panic P1 happens and starts processing defer calls. If a
+// second panic P2 happens while processing defer call D in frame F, then defer
+// call processing is restarted (with some potentially new defer calls created by
+// D or its callees). If the defer processing reaches the started defer call D
+// again in the defer stack, then the original panic P1 is aborted and cannot
+// continue panic processing or be recovered. If the panic P2 does a recover at
+// some point, it will naturally remove the original panic P1 from the stack
+// (since the original panic had to be in frame F or a descendant of F).
+func TestAbortedPanic(t *testing.T) {
+ defer func() {
+ r := recover()
+ if r != nil {
+ t.Fatal(fmt.Sprintf("wanted nil recover, got %v", r))
+ }
+ }()
+ defer func() {
+ r := recover()
+ if r != "panic2" {
+ t.Fatal(fmt.Sprintf("wanted %v, got %v", "panic2", r))
+ }
+ }()
+ defer func() {
+ panic("panic2")
+ }()
+ panic("panic1")
+}
+
+// This tests that recover() does not succeed unless it is called directly from a
+// defer function that is directly called by the panic. Here, we first call it
+// from a defer function that is created by the defer function called directly by
+// the panic. In
+func TestRecoverMatching(t *testing.T) {
+ defer func() {
+ r := recover()
+ if r != "panic1" {
+ t.Fatal(fmt.Sprintf("wanted %v, got %v", "panic1", r))
+ }
+ }()
+ defer func() {
+ defer func() {
+ // Shouldn't succeed, even though it is called directly
+ // from a defer function, since this defer function was
+ // not directly called by the panic.
+ r := recover()
+ if r != nil {
+ t.Fatal(fmt.Sprintf("wanted nil recover, got %v", r))
+ }
+ }()
+ }()
+ panic("panic1")
+}
+
+type nonSSAable [128]byte
+
+type bigStruct struct {
+ x, y, z, w, p, q int64
+}
+
+type containsBigStruct struct {
+ element bigStruct
+}
+
+func mknonSSAable() nonSSAable {
+ globint1++
+ return nonSSAable{0, 0, 0, 0, 5}
+}
+
+var globint1, globint2, globint3 int
+
+//go:noinline
+func sideeffect(n int64) int64 {
+ globint2++
+ return n
+}
+
+func sideeffect2(in containsBigStruct) containsBigStruct {
+ globint3++
+ return in
+}
+
+// Test that nonSSAable arguments to defer are handled correctly and only evaluated once.
+func TestNonSSAableArgs(t *testing.T) {
+ globint1 = 0
+ globint2 = 0
+ globint3 = 0
+ var save1 byte
+ var save2 int64
+ var save3 int64
+ var save4 int64
+
+ defer func() {
+ if globint1 != 1 {
+ t.Fatal(fmt.Sprintf("globint1: wanted: 1, got %v", globint1))
+ }
+ if save1 != 5 {
+ t.Fatal(fmt.Sprintf("save1: wanted: 5, got %v", save1))
+ }
+ if globint2 != 1 {
+ t.Fatal(fmt.Sprintf("globint2: wanted: 1, got %v", globint2))
+ }
+ if save2 != 2 {
+ t.Fatal(fmt.Sprintf("save2: wanted: 2, got %v", save2))
+ }
+ if save3 != 4 {
+ t.Fatal(fmt.Sprintf("save3: wanted: 4, got %v", save3))
+ }
+ if globint3 != 1 {
+ t.Fatal(fmt.Sprintf("globint3: wanted: 1, got %v", globint3))
+ }
+ if save4 != 4 {
+ t.Fatal(fmt.Sprintf("save1: wanted: 4, got %v", save4))
+ }
+ }()
+
+ // Test function returning a non-SSAable arg
+ defer func(n nonSSAable) {
+ save1 = n[4]
+ }(mknonSSAable())
+ // Test composite literal that is not SSAable
+ defer func(b bigStruct) {
+ save2 = b.y
+ }(bigStruct{1, 2, 3, 4, 5, sideeffect(6)})
+
+ // Test struct field reference that is non-SSAable
+ foo := containsBigStruct{}
+ foo.element.z = 4
+ defer func(element bigStruct) {
+ save3 = element.z
+ }(foo.element)
+ defer func(element bigStruct) {
+ save4 = element.z
+ }(sideeffect2(foo).element)
+}
+
+//go:noinline
+func doPanic() {
+ panic("Test panic")
+}
+
+func TestDeferForFuncWithNoExit(t *testing.T) {
+ cond := 1
+ defer func() {
+ if cond != 2 {
+ t.Fatal(fmt.Sprintf("cond: wanted 2, got %v", cond))
+ }
+ if recover() != "Test panic" {
+ t.Fatal("Didn't find expected panic")
+ }
+ }()
+ x := 0
+ // Force a stack copy, to make sure that the &cond pointer passed to defer
+ // function is properly updated.
+ growStackIter(&x, 1000)
+ cond = 2
+ doPanic()
+
+ // This function has no exit/return, since it ends with an infinite loop
+ for {
+ }
+}
+
+// Test case approximating issue #37664, where a recursive function (interpreter)
+// may do repeated recovers/re-panics until it reaches the frame where the panic
+// can actually be handled. The recurseFnPanicRec() function is testing that there
+// are no stale defer structs on the defer chain after the interpreter() sequence,
+// by writing a bunch of 0xffffffffs into several recursive stack frames, and then
+// doing a single panic-recover which would invoke any such stale defer structs.
+func TestDeferWithRepeatedRepanics(t *testing.T) {
+ interpreter(0, 6, 2)
+ recurseFnPanicRec(0, 10)
+ interpreter(0, 5, 1)
+ recurseFnPanicRec(0, 10)
+ interpreter(0, 6, 3)
+ recurseFnPanicRec(0, 10)
+}
+
+func interpreter(level int, maxlevel int, rec int) {
+ defer func() {
+ e := recover()
+ if e == nil {
+ return
+ }
+ if level != e.(int) {
+ //fmt.Fprintln(os.Stderr, "re-panicing, level", level)
+ panic(e)
+ }
+ //fmt.Fprintln(os.Stderr, "Recovered, level", level)
+ }()
+ if level+1 < maxlevel {
+ interpreter(level+1, maxlevel, rec)
+ } else {
+ //fmt.Fprintln(os.Stderr, "Initiating panic")
+ panic(rec)
+ }
+}
+
+func recurseFnPanicRec(level int, maxlevel int) {
+ defer func() {
+ recover()
+ }()
+ recurseFn(level, maxlevel)
+}
+
+var saveInt uint32
+
+func recurseFn(level int, maxlevel int) {
+ a := [40]uint32{0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}
+ if level+1 < maxlevel {
+ // Make sure a array is referenced, so it is not optimized away
+ saveInt = a[4]
+ recurseFn(level+1, maxlevel)
+ } else {
+ panic("recurseFn panic")
+ }
+}
+
+// Try to reproduce issue #37688, where a pointer to an open-coded defer struct is
+// mistakenly held, and that struct keeps a pointer to a stack-allocated defer
+// struct, and that stack-allocated struct gets overwritten or the stack gets
+// moved, so a memory error happens on GC.
+func TestIssue37688(t *testing.T) {
+ for j := 0; j < 10; j++ {
+ g2()
+ g3()
+ }
+}
+
+type foo struct {
+}
+
+//go:noinline
+func (f *foo) method1() {
+}
+
+//go:noinline
+func (f *foo) method2() {
+}
+
+func g2() {
+ var a foo
+ ap := &a
+ // The loop forces this defer to be heap-allocated and the remaining two
+ // to be stack-allocated.
+ for i := 0; i < 1; i++ {
+ defer ap.method1()
+ }
+ defer ap.method2()
+ defer ap.method1()
+ ff1(ap, 1, 2, 3, 4, 5, 6, 7, 8, 9)
+ // Try to get the stack to be be moved by growing it too large, so
+ // existing stack-allocated defer becomes invalid.
+ rec1(2000)
+}
+
+func g3() {
+ // Mix up the stack layout by adding in an extra function frame
+ g2()
+}
+
+var globstruct struct {
+ a, b, c, d, e, f, g, h, i int
+}
+
+func ff1(ap *foo, a, b, c, d, e, f, g, h, i int) {
+ defer ap.method1()
+
+ // Make a defer that has a very large set of args, hence big size for the
+ // defer record for the open-coded frame (which means it won't use the
+ // defer pool)
+ defer func(ap *foo, a, b, c, d, e, f, g, h, i int) {
+ if v := recover(); v != nil {
+ }
+ globstruct.a = a
+ globstruct.b = b
+ globstruct.c = c
+ globstruct.d = d
+ globstruct.e = e
+ globstruct.f = f
+ globstruct.g = g
+ globstruct.h = h
+ }(ap, a, b, c, d, e, f, g, h, i)
+ panic("ff1 panic")
+}
+
+func rec1(max int) {
+ if max > 0 {
+ rec1(max - 1)
+ }
+}
diff --git a/src/runtime/defs1_linux.go b/src/runtime/defs1_linux.go
index e136d96..4085d6f 100644
--- a/src/runtime/defs1_linux.go
+++ b/src/runtime/defs1_linux.go
@@ -21,6 +21,7 @@
const (
O_RDONLY = C.O_RDONLY
+ O_NONBLOCK = C.O_NONBLOCK
O_CLOEXEC = C.O_CLOEXEC
SA_RESTORER = C.SA_RESTORER
)
diff --git a/src/runtime/defs1_netbsd_386.go b/src/runtime/defs1_netbsd_386.go
index 3eae12e..a4548e6 100644
--- a/src/runtime/defs1_netbsd_386.go
+++ b/src/runtime/defs1_netbsd_386.go
@@ -6,6 +6,11 @@
const (
_EINTR = 0x4
_EFAULT = 0xe
+ _EAGAIN = 0x23
+ _ENOSYS = 0x4e
+
+ _O_NONBLOCK = 0x4
+ _O_CLOEXEC = 0x400000
_PROT_NONE = 0x0
_PROT_READ = 0x1
diff --git a/src/runtime/defs1_netbsd_amd64.go b/src/runtime/defs1_netbsd_amd64.go
index 51d55c9..4b0e79e 100644
--- a/src/runtime/defs1_netbsd_amd64.go
+++ b/src/runtime/defs1_netbsd_amd64.go
@@ -6,6 +6,11 @@
const (
_EINTR = 0x4
_EFAULT = 0xe
+ _EAGAIN = 0x23
+ _ENOSYS = 0x4e
+
+ _O_NONBLOCK = 0x4
+ _O_CLOEXEC = 0x400000
_PROT_NONE = 0x0
_PROT_READ = 0x1
diff --git a/src/runtime/defs1_netbsd_arm.go b/src/runtime/defs1_netbsd_arm.go
index fadb341..2b5d599 100644
--- a/src/runtime/defs1_netbsd_arm.go
+++ b/src/runtime/defs1_netbsd_arm.go
@@ -6,6 +6,11 @@
const (
_EINTR = 0x4
_EFAULT = 0xe
+ _EAGAIN = 0x23
+ _ENOSYS = 0x4e
+
+ _O_NONBLOCK = 0x4
+ _O_CLOEXEC = 0x400000
_PROT_NONE = 0x0
_PROT_READ = 0x1
diff --git a/src/runtime/defs1_netbsd_arm64.go b/src/runtime/defs1_netbsd_arm64.go
index 41b7aac..740dc77 100644
--- a/src/runtime/defs1_netbsd_arm64.go
+++ b/src/runtime/defs1_netbsd_arm64.go
@@ -6,6 +6,11 @@
const (
_EINTR = 0x4
_EFAULT = 0xe
+ _EAGAIN = 0x23
+ _ENOSYS = 0x4e
+
+ _O_NONBLOCK = 0x4
+ _O_CLOEXEC = 0x400000
_PROT_NONE = 0x0
_PROT_READ = 0x1
diff --git a/src/runtime/defs1_solaris_amd64.go b/src/runtime/defs1_solaris_amd64.go
index 64d51a7..19e8a25 100644
--- a/src/runtime/defs1_solaris_amd64.go
+++ b/src/runtime/defs1_solaris_amd64.go
@@ -8,9 +8,12 @@
_EBADF = 0x9
_EFAULT = 0xe
_EAGAIN = 0xb
+ _EBUSY = 0x10
+ _ETIME = 0x3e
_ETIMEDOUT = 0x91
_EWOULDBLOCK = 0xb
_EINPROGRESS = 0x96
+ _ENOSYS = 0x59
_PROT_NONE = 0x0
_PROT_READ = 0x1
@@ -89,6 +92,7 @@
_MAXHOSTNAMELEN = 0x100
_O_NONBLOCK = 0x80
+ _O_CLOEXEC = 0x800000
_FD_CLOEXEC = 0x1
_F_GETFL = 0x3
_F_SETFL = 0x4
@@ -99,7 +103,9 @@
_POLLHUP = 0x10
_POLLERR = 0x8
- _PORT_SOURCE_FD = 0x4
+ _PORT_SOURCE_FD = 0x4
+ _PORT_SOURCE_ALERT = 0x5
+ _PORT_ALERT_UPDATE = 0x2
)
type semt struct {
diff --git a/src/runtime/defs2_linux.go b/src/runtime/defs2_linux.go
index b08c0da..87e19c1 100644
--- a/src/runtime/defs2_linux.go
+++ b/src/runtime/defs2_linux.go
@@ -61,7 +61,7 @@
MADV_DONTNEED = C.MADV_DONTNEED
MADV_FREE = C.MADV_FREE
MADV_HUGEPAGE = C.MADV_HUGEPAGE
- MADV_NOHUGEPAGE = C.MADV_HNOUGEPAGE
+ MADV_NOHUGEPAGE = C.MADV_NOHUGEPAGE
SA_RESTART = C.SA_RESTART
SA_ONSTACK = C.SA_ONSTACK
diff --git a/src/runtime/defs_aix.go b/src/runtime/defs_aix.go
index bc5101f..23a6cac 100644
--- a/src/runtime/defs_aix.go
+++ b/src/runtime/defs_aix.go
@@ -8,7 +8,7 @@
Input to cgo -godefs
GOARCH=ppc64 go tool cgo -godefs defs_aix.go > defs_aix_ppc64_tmp.go
-This is only an helper to create defs_aix_ppc64.go
+This is only a helper to create defs_aix_ppc64.go
Go runtime functions require the "linux" name of fields (ss_sp, si_addr, etc)
However, AIX structures don't provide such names and must be modified.
@@ -123,7 +123,8 @@
_ITIMER_VIRTUAL = C.ITIMER_VIRTUAL
_ITIMER_PROF = C.ITIMER_PROF
- _O_RDONLY = C.O_RDONLY
+ _O_RDONLY = C.O_RDONLY
+ _O_NONBLOCK = C.O_NONBLOCK
_SS_DISABLE = C.SS_DISABLE
_SI_USER = C.SI_USER
diff --git a/src/runtime/defs_aix_ppc64.go b/src/runtime/defs_aix_ppc64.go
index dccc3a5..a53fcc5 100644
--- a/src/runtime/defs_aix_ppc64.go
+++ b/src/runtime/defs_aix_ppc64.go
@@ -80,7 +80,8 @@
_ITIMER_VIRTUAL = 0x1
_ITIMER_PROF = 0x2
- _O_RDONLY = 0x0
+ _O_RDONLY = 0x0
+ _O_NONBLOCK = 0x4
_SS_DISABLE = 0x2
_SI_USER = 0x0
diff --git a/src/runtime/defs_darwin.go b/src/runtime/defs_darwin.go
index 0cd133f..cc8c475 100644
--- a/src/runtime/defs_darwin.go
+++ b/src/runtime/defs_darwin.go
@@ -8,7 +8,6 @@
Input to cgo.
GOARCH=amd64 go tool cgo -cdefs defs_darwin.go >defs_darwin_amd64.h
-GOARCH=386 go tool cgo -cdefs defs_darwin.go >defs_darwin_386.h
*/
package runtime
@@ -30,6 +29,7 @@
const (
EINTR = C.EINTR
EFAULT = C.EFAULT
+ EAGAIN = C.EAGAIN
ETIMEDOUT = C.ETIMEDOUT
PROT_NONE = C.PROT_NONE
diff --git a/src/runtime/defs_darwin_386.go b/src/runtime/defs_darwin_386.go
deleted file mode 100644
index 83928e7..0000000
--- a/src/runtime/defs_darwin_386.go
+++ /dev/null
@@ -1,366 +0,0 @@
-// created by cgo -cdefs and then converted to Go
-// cgo -cdefs defs_darwin.go
-
-package runtime
-
-import "unsafe"
-
-const (
- _EINTR = 0x4
- _EFAULT = 0xe
- _ETIMEDOUT = 0x3c
-
- _PROT_NONE = 0x0
- _PROT_READ = 0x1
- _PROT_WRITE = 0x2
- _PROT_EXEC = 0x4
-
- _MAP_ANON = 0x1000
- _MAP_PRIVATE = 0x2
- _MAP_FIXED = 0x10
-
- _MADV_DONTNEED = 0x4
- _MADV_FREE = 0x5
- _MADV_FREE_REUSABLE = 0x7
- _MADV_FREE_REUSE = 0x8
-
- _SA_SIGINFO = 0x40
- _SA_RESTART = 0x2
- _SA_ONSTACK = 0x1
- _SA_USERTRAMP = 0x100
- _SA_64REGSET = 0x200
-
- _SIGHUP = 0x1
- _SIGINT = 0x2
- _SIGQUIT = 0x3
- _SIGILL = 0x4
- _SIGTRAP = 0x5
- _SIGABRT = 0x6
- _SIGEMT = 0x7
- _SIGFPE = 0x8
- _SIGKILL = 0x9
- _SIGBUS = 0xa
- _SIGSEGV = 0xb
- _SIGSYS = 0xc
- _SIGPIPE = 0xd
- _SIGALRM = 0xe
- _SIGTERM = 0xf
- _SIGURG = 0x10
- _SIGSTOP = 0x11
- _SIGTSTP = 0x12
- _SIGCONT = 0x13
- _SIGCHLD = 0x14
- _SIGTTIN = 0x15
- _SIGTTOU = 0x16
- _SIGIO = 0x17
- _SIGXCPU = 0x18
- _SIGXFSZ = 0x19
- _SIGVTALRM = 0x1a
- _SIGPROF = 0x1b
- _SIGWINCH = 0x1c
- _SIGINFO = 0x1d
- _SIGUSR1 = 0x1e
- _SIGUSR2 = 0x1f
-
- _FPE_INTDIV = 0x7
- _FPE_INTOVF = 0x8
- _FPE_FLTDIV = 0x1
- _FPE_FLTOVF = 0x2
- _FPE_FLTUND = 0x3
- _FPE_FLTRES = 0x4
- _FPE_FLTINV = 0x5
- _FPE_FLTSUB = 0x6
-
- _BUS_ADRALN = 0x1
- _BUS_ADRERR = 0x2
- _BUS_OBJERR = 0x3
-
- _SEGV_MAPERR = 0x1
- _SEGV_ACCERR = 0x2
-
- _ITIMER_REAL = 0x0
- _ITIMER_VIRTUAL = 0x1
- _ITIMER_PROF = 0x2
-
- _EV_ADD = 0x1
- _EV_DELETE = 0x2
- _EV_CLEAR = 0x20
- _EV_RECEIPT = 0x40
- _EV_ERROR = 0x4000
- _EV_EOF = 0x8000
- _EVFILT_READ = -0x1
- _EVFILT_WRITE = -0x2
-
- _PTHREAD_CREATE_DETACHED = 0x2
-
- _F_SETFD = 0x2
- _F_GETFL = 0x3
- _F_SETFL = 0x4
- _FD_CLOEXEC = 0x1
-
- _O_NONBLOCK = 4
-)
-
-type stackt struct {
- ss_sp *byte
- ss_size uintptr
- ss_flags int32
-}
-
-type sigactiont struct {
- __sigaction_u [4]byte
- sa_tramp unsafe.Pointer
- sa_mask uint32
- sa_flags int32
-}
-
-type usigactiont struct {
- __sigaction_u [4]byte
- sa_mask uint32
- sa_flags int32
-}
-
-type siginfo struct {
- si_signo int32
- si_errno int32
- si_code int32
- si_pid int32
- si_uid uint32
- si_status int32
- si_addr uint32
- si_value [4]byte
- si_band int32
- __pad [7]uint32
-}
-
-type timeval struct {
- tv_sec int32
- tv_usec int32
-}
-
-func (tv *timeval) set_usec(x int32) {
- tv.tv_usec = x
-}
-
-type itimerval struct {
- it_interval timeval
- it_value timeval
-}
-
-type timespec struct {
- tv_sec int32
- tv_nsec int32
-}
-
-//go:nosplit
-func (ts *timespec) setNsec(ns int64) {
- ts.tv_sec = timediv(ns, 1e9, &ts.tv_nsec)
-}
-
-type fpcontrol struct {
- pad_cgo_0 [2]byte
-}
-
-type fpstatus struct {
- pad_cgo_0 [2]byte
-}
-
-type regmmst struct {
- mmst_reg [10]int8
- mmst_rsrv [6]int8
-}
-
-type regxmm struct {
- xmm_reg [16]int8
-}
-
-type regs64 struct {
- rax uint64
- rbx uint64
- rcx uint64
- rdx uint64
- rdi uint64
- rsi uint64
- rbp uint64
- rsp uint64
- r8 uint64
- r9 uint64
- r10 uint64
- r11 uint64
- r12 uint64
- r13 uint64
- r14 uint64
- r15 uint64
- rip uint64
- rflags uint64
- cs uint64
- fs uint64
- gs uint64
-}
-
-type floatstate64 struct {
- fpu_reserved [2]int32
- fpu_fcw fpcontrol
- fpu_fsw fpstatus
- fpu_ftw uint8
- fpu_rsrv1 uint8
- fpu_fop uint16
- fpu_ip uint32
- fpu_cs uint16
- fpu_rsrv2 uint16
- fpu_dp uint32
- fpu_ds uint16
- fpu_rsrv3 uint16
- fpu_mxcsr uint32
- fpu_mxcsrmask uint32
- fpu_stmm0 regmmst
- fpu_stmm1 regmmst
- fpu_stmm2 regmmst
- fpu_stmm3 regmmst
- fpu_stmm4 regmmst
- fpu_stmm5 regmmst
- fpu_stmm6 regmmst
- fpu_stmm7 regmmst
- fpu_xmm0 regxmm
- fpu_xmm1 regxmm
- fpu_xmm2 regxmm
- fpu_xmm3 regxmm
- fpu_xmm4 regxmm
- fpu_xmm5 regxmm
- fpu_xmm6 regxmm
- fpu_xmm7 regxmm
- fpu_xmm8 regxmm
- fpu_xmm9 regxmm
- fpu_xmm10 regxmm
- fpu_xmm11 regxmm
- fpu_xmm12 regxmm
- fpu_xmm13 regxmm
- fpu_xmm14 regxmm
- fpu_xmm15 regxmm
- fpu_rsrv4 [96]int8
- fpu_reserved1 int32
-}
-
-type exceptionstate64 struct {
- trapno uint16
- cpu uint16
- err uint32
- faultvaddr uint64
-}
-
-type mcontext64 struct {
- es exceptionstate64
- ss regs64
- fs floatstate64
-}
-
-type regs32 struct {
- eax uint32
- ebx uint32
- ecx uint32
- edx uint32
- edi uint32
- esi uint32
- ebp uint32
- esp uint32
- ss uint32
- eflags uint32
- eip uint32
- cs uint32
- ds uint32
- es uint32
- fs uint32
- gs uint32
-}
-
-type floatstate32 struct {
- fpu_reserved [2]int32
- fpu_fcw fpcontrol
- fpu_fsw fpstatus
- fpu_ftw uint8
- fpu_rsrv1 uint8
- fpu_fop uint16
- fpu_ip uint32
- fpu_cs uint16
- fpu_rsrv2 uint16
- fpu_dp uint32
- fpu_ds uint16
- fpu_rsrv3 uint16
- fpu_mxcsr uint32
- fpu_mxcsrmask uint32
- fpu_stmm0 regmmst
- fpu_stmm1 regmmst
- fpu_stmm2 regmmst
- fpu_stmm3 regmmst
- fpu_stmm4 regmmst
- fpu_stmm5 regmmst
- fpu_stmm6 regmmst
- fpu_stmm7 regmmst
- fpu_xmm0 regxmm
- fpu_xmm1 regxmm
- fpu_xmm2 regxmm
- fpu_xmm3 regxmm
- fpu_xmm4 regxmm
- fpu_xmm5 regxmm
- fpu_xmm6 regxmm
- fpu_xmm7 regxmm
- fpu_rsrv4 [224]int8
- fpu_reserved1 int32
-}
-
-type exceptionstate32 struct {
- trapno uint16
- cpu uint16
- err uint32
- faultvaddr uint32
-}
-
-type mcontext32 struct {
- es exceptionstate32
- ss regs32
- fs floatstate32
-}
-
-type ucontext struct {
- uc_onstack int32
- uc_sigmask uint32
- uc_stack stackt
- uc_link *ucontext
- uc_mcsize uint32
- uc_mcontext *mcontext32
-}
-
-type keventt struct {
- ident uint32
- filter int16
- flags uint16
- fflags uint32
- data int32
- udata *byte
-}
-
-type pthread uintptr
-type pthreadattr struct {
- X__sig int32
- X__opaque [36]int8
-}
-type pthreadmutex struct {
- X__sig int32
- X__opaque [40]int8
-}
-type pthreadmutexattr struct {
- X__sig int32
- X__opaque [8]int8
-}
-type pthreadcond struct {
- X__sig int32
- X__opaque [24]int8
-}
-type pthreadcondattr struct {
- X__sig int32
- X__opaque [4]int8
-}
-type machTimebaseInfo struct {
- numer uint32
- denom uint32
-}
diff --git a/src/runtime/defs_darwin_amd64.go b/src/runtime/defs_darwin_amd64.go
index 45c34a8..cbc26bf 100644
--- a/src/runtime/defs_darwin_amd64.go
+++ b/src/runtime/defs_darwin_amd64.go
@@ -8,6 +8,7 @@
const (
_EINTR = 0x4
_EFAULT = 0xe
+ _EAGAIN = 0x23
_ETIMEDOUT = 0x3c
_PROT_NONE = 0x0
diff --git a/src/runtime/defs_darwin_arm.go b/src/runtime/defs_darwin_arm.go
deleted file mode 100644
index 5e2af97..0000000
--- a/src/runtime/defs_darwin_arm.go
+++ /dev/null
@@ -1,230 +0,0 @@
-// Note: cgo can't handle some Darwin/ARM structures, so this file can't
-// be auto generated by cgo yet.
-// Created based on output of `cgo -cdefs defs_darwin.go` and Darwin/ARM
-// specific header (mainly mcontext and ucontext related stuff)
-
-package runtime
-
-import "unsafe"
-
-const (
- _EINTR = 0x4
- _EFAULT = 0xe
- _ETIMEDOUT = 0x3c
-
- _PROT_NONE = 0x0
- _PROT_READ = 0x1
- _PROT_WRITE = 0x2
- _PROT_EXEC = 0x4
-
- _MAP_ANON = 0x1000
- _MAP_PRIVATE = 0x2
- _MAP_FIXED = 0x10
-
- _MADV_DONTNEED = 0x4
- _MADV_FREE = 0x5
- _MADV_FREE_REUSABLE = 0x7
- _MADV_FREE_REUSE = 0x8
-
- _SA_SIGINFO = 0x40
- _SA_RESTART = 0x2
- _SA_ONSTACK = 0x1
- _SA_USERTRAMP = 0x100
- _SA_64REGSET = 0x200
-
- _SIGHUP = 0x1
- _SIGINT = 0x2
- _SIGQUIT = 0x3
- _SIGILL = 0x4
- _SIGTRAP = 0x5
- _SIGABRT = 0x6
- _SIGEMT = 0x7
- _SIGFPE = 0x8
- _SIGKILL = 0x9
- _SIGBUS = 0xa
- _SIGSEGV = 0xb
- _SIGSYS = 0xc
- _SIGPIPE = 0xd
- _SIGALRM = 0xe
- _SIGTERM = 0xf
- _SIGURG = 0x10
- _SIGSTOP = 0x11
- _SIGTSTP = 0x12
- _SIGCONT = 0x13
- _SIGCHLD = 0x14
- _SIGTTIN = 0x15
- _SIGTTOU = 0x16
- _SIGIO = 0x17
- _SIGXCPU = 0x18
- _SIGXFSZ = 0x19
- _SIGVTALRM = 0x1a
- _SIGPROF = 0x1b
- _SIGWINCH = 0x1c
- _SIGINFO = 0x1d
- _SIGUSR1 = 0x1e
- _SIGUSR2 = 0x1f
-
- _FPE_INTDIV = 0x7
- _FPE_INTOVF = 0x8
- _FPE_FLTDIV = 0x1
- _FPE_FLTOVF = 0x2
- _FPE_FLTUND = 0x3
- _FPE_FLTRES = 0x4
- _FPE_FLTINV = 0x5
- _FPE_FLTSUB = 0x6
-
- _BUS_ADRALN = 0x1
- _BUS_ADRERR = 0x2
- _BUS_OBJERR = 0x3
-
- _SEGV_MAPERR = 0x1
- _SEGV_ACCERR = 0x2
-
- _ITIMER_REAL = 0x0
- _ITIMER_VIRTUAL = 0x1
- _ITIMER_PROF = 0x2
-
- _EV_ADD = 0x1
- _EV_DELETE = 0x2
- _EV_CLEAR = 0x20
- _EV_RECEIPT = 0x40
- _EV_ERROR = 0x4000
- _EV_EOF = 0x8000
- _EVFILT_READ = -0x1
- _EVFILT_WRITE = -0x2
-
- _PTHREAD_CREATE_DETACHED = 0x2
-
- _F_SETFD = 0x2
- _F_GETFL = 0x3
- _F_SETFL = 0x4
- _FD_CLOEXEC = 0x1
-
- _O_NONBLOCK = 4
-)
-
-type stackt struct {
- ss_sp *byte
- ss_size uintptr
- ss_flags int32
-}
-
-type sigactiont struct {
- __sigaction_u [4]byte
- sa_tramp unsafe.Pointer
- sa_mask uint32
- sa_flags int32
-}
-
-type usigactiont struct {
- __sigaction_u [4]byte
- sa_mask uint32
- sa_flags int32
-}
-
-type siginfo struct {
- si_signo int32
- si_errno int32
- si_code int32
- si_pid int32
- si_uid uint32
- si_status int32
- si_addr uint32
- si_value [4]byte
- si_band int32
- __pad [7]uint32
-}
-
-type timeval struct {
- tv_sec int32
- tv_usec int32
-}
-
-func (tv *timeval) set_usec(x int32) {
- tv.tv_usec = x
-}
-
-type itimerval struct {
- it_interval timeval
- it_value timeval
-}
-
-type timespec struct {
- tv_sec int32
- tv_nsec int32
-}
-
-//go:nosplit
-func (ts *timespec) setNsec(ns int64) {
- ts.tv_sec = timediv(ns, 1e9, &ts.tv_nsec)
-}
-
-type floatstate32 struct {
- r [32]uint32
- fpscr uint32
-}
-
-type regs32 struct {
- r [13]uint32 // r0 to r12
- sp uint32 // r13
- lr uint32 // r14
- pc uint32 // r15
- cpsr uint32
-}
-
-type exceptionstate32 struct {
- trapno uint32 // NOTE: on 386, the trapno field is split into trapno and cpu
- err uint32
- faultvaddr uint32
-}
-
-type mcontext32 struct {
- es exceptionstate32
- ss regs32
- fs floatstate32
-}
-
-type ucontext struct {
- uc_onstack int32
- uc_sigmask uint32
- uc_stack stackt
- uc_link *ucontext
- uc_mcsize uint32
- uc_mcontext *mcontext32
-}
-
-type keventt struct {
- ident uint32
- filter int16
- flags uint16
- fflags uint32
- data int32
- udata *byte
-}
-
-type pthread uintptr
-type pthreadattr struct {
- X__sig int32
- X__opaque [36]int8
-}
-type pthreadmutex struct {
- X__sig int32
- X__opaque [40]int8
-}
-type pthreadmutexattr struct {
- X__sig int32
- X__opaque [8]int8
-}
-type pthreadcond struct {
- X__sig int32
- X__opaque [24]int8
-}
-type pthreadcondattr struct {
- X__sig int32
- X__opaque [4]int8
-}
-
-type machTimebaseInfo struct {
- numer uint32
- denom uint32
-}
diff --git a/src/runtime/defs_darwin_arm64.go b/src/runtime/defs_darwin_arm64.go
index f673eb7..2f46604 100644
--- a/src/runtime/defs_darwin_arm64.go
+++ b/src/runtime/defs_darwin_arm64.go
@@ -8,6 +8,7 @@
const (
_EINTR = 0x4
_EFAULT = 0xe
+ _EAGAIN = 0x23
_ETIMEDOUT = 0x3c
_PROT_NONE = 0x0
diff --git a/src/runtime/defs_freebsd.go b/src/runtime/defs_freebsd.go
index 53c1508..e196dff 100644
--- a/src/runtime/defs_freebsd.go
+++ b/src/runtime/defs_freebsd.go
@@ -47,6 +47,11 @@
const (
EINTR = C.EINTR
EFAULT = C.EFAULT
+ EAGAIN = C.EAGAIN
+ ENOSYS = C.ENOSYS
+
+ O_NONBLOCK = C.O_NONBLOCK
+ O_CLOEXEC = C.O_CLOEXEC
PROT_NONE = C.PROT_NONE
PROT_READ = C.PROT_READ
diff --git a/src/runtime/defs_freebsd_386.go b/src/runtime/defs_freebsd_386.go
index c4d5c89..7677554 100644
--- a/src/runtime/defs_freebsd_386.go
+++ b/src/runtime/defs_freebsd_386.go
@@ -15,6 +15,11 @@
const (
_EINTR = 0x4
_EFAULT = 0xe
+ _EAGAIN = 0x23
+ _ENOSYS = 0x4e
+
+ _O_NONBLOCK = 0x4
+ _O_CLOEXEC = 0x100000
_PROT_NONE = 0x0
_PROT_READ = 0x1
@@ -121,6 +126,8 @@
spare [3]uintptr
}
+type thread int32 // long
+
type sigset struct {
__bits [4]uint32
}
diff --git a/src/runtime/defs_freebsd_amd64.go b/src/runtime/defs_freebsd_amd64.go
index 89d36c2..5a83342 100644
--- a/src/runtime/defs_freebsd_amd64.go
+++ b/src/runtime/defs_freebsd_amd64.go
@@ -15,6 +15,11 @@
const (
_EINTR = 0x4
_EFAULT = 0xe
+ _EAGAIN = 0x23
+ _ENOSYS = 0x4e
+
+ _O_NONBLOCK = 0x4
+ _O_CLOEXEC = 0x100000
_PROT_NONE = 0x0
_PROT_READ = 0x1
@@ -122,6 +127,8 @@
spare [3]uintptr
}
+type thread int64 // long
+
type sigset struct {
__bits [4]uint32
}
diff --git a/src/runtime/defs_freebsd_arm.go b/src/runtime/defs_freebsd_arm.go
index cc8c924..b55dfd8 100644
--- a/src/runtime/defs_freebsd_arm.go
+++ b/src/runtime/defs_freebsd_arm.go
@@ -15,6 +15,11 @@
const (
_EINTR = 0x4
_EFAULT = 0xe
+ _EAGAIN = 0x23
+ _ENOSYS = 0x4e
+
+ _O_NONBLOCK = 0x4
+ _O_CLOEXEC = 0x100000
_PROT_NONE = 0x0
_PROT_READ = 0x1
@@ -121,6 +126,8 @@
spare [3]uintptr
}
+type thread int32 // long
+
type sigset struct {
__bits [4]uint32
}
diff --git a/src/runtime/defs_freebsd_arm64.go b/src/runtime/defs_freebsd_arm64.go
new file mode 100644
index 0000000..5b9d504
--- /dev/null
+++ b/src/runtime/defs_freebsd_arm64.go
@@ -0,0 +1,259 @@
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_freebsd.go
+
+package runtime
+
+import "unsafe"
+
+const (
+ _NBBY = 0x8
+ _CTL_MAXNAME = 0x18
+ _CPU_LEVEL_WHICH = 0x3
+ _CPU_WHICH_PID = 0x2
+)
+
+const (
+ _EINTR = 0x4
+ _EFAULT = 0xe
+ _EAGAIN = 0x23
+ _ENOSYS = 0x4e
+
+ _O_NONBLOCK = 0x4
+ _O_CLOEXEC = 0x100000
+
+ _PROT_NONE = 0x0
+ _PROT_READ = 0x1
+ _PROT_WRITE = 0x2
+ _PROT_EXEC = 0x4
+
+ _MAP_ANON = 0x1000
+ _MAP_SHARED = 0x1
+ _MAP_PRIVATE = 0x2
+ _MAP_FIXED = 0x10
+
+ _MADV_FREE = 0x5
+
+ _SA_SIGINFO = 0x40
+ _SA_RESTART = 0x2
+ _SA_ONSTACK = 0x1
+
+ _CLOCK_MONOTONIC = 0x4
+ _CLOCK_REALTIME = 0x0
+
+ _UMTX_OP_WAIT_UINT = 0xb
+ _UMTX_OP_WAIT_UINT_PRIVATE = 0xf
+ _UMTX_OP_WAKE = 0x3
+ _UMTX_OP_WAKE_PRIVATE = 0x10
+
+ _SIGHUP = 0x1
+ _SIGINT = 0x2
+ _SIGQUIT = 0x3
+ _SIGILL = 0x4
+ _SIGTRAP = 0x5
+ _SIGABRT = 0x6
+ _SIGEMT = 0x7
+ _SIGFPE = 0x8
+ _SIGKILL = 0x9
+ _SIGBUS = 0xa
+ _SIGSEGV = 0xb
+ _SIGSYS = 0xc
+ _SIGPIPE = 0xd
+ _SIGALRM = 0xe
+ _SIGTERM = 0xf
+ _SIGURG = 0x10
+ _SIGSTOP = 0x11
+ _SIGTSTP = 0x12
+ _SIGCONT = 0x13
+ _SIGCHLD = 0x14
+ _SIGTTIN = 0x15
+ _SIGTTOU = 0x16
+ _SIGIO = 0x17
+ _SIGXCPU = 0x18
+ _SIGXFSZ = 0x19
+ _SIGVTALRM = 0x1a
+ _SIGPROF = 0x1b
+ _SIGWINCH = 0x1c
+ _SIGINFO = 0x1d
+ _SIGUSR1 = 0x1e
+ _SIGUSR2 = 0x1f
+
+ _FPE_INTDIV = 0x2
+ _FPE_INTOVF = 0x1
+ _FPE_FLTDIV = 0x3
+ _FPE_FLTOVF = 0x4
+ _FPE_FLTUND = 0x5
+ _FPE_FLTRES = 0x6
+ _FPE_FLTINV = 0x7
+ _FPE_FLTSUB = 0x8
+
+ _BUS_ADRALN = 0x1
+ _BUS_ADRERR = 0x2
+ _BUS_OBJERR = 0x3
+
+ _SEGV_MAPERR = 0x1
+ _SEGV_ACCERR = 0x2
+
+ _ITIMER_REAL = 0x0
+ _ITIMER_VIRTUAL = 0x1
+ _ITIMER_PROF = 0x2
+
+ _EV_ADD = 0x1
+ _EV_DELETE = 0x2
+ _EV_CLEAR = 0x20
+ _EV_RECEIPT = 0x40
+ _EV_ERROR = 0x4000
+ _EV_EOF = 0x8000
+ _EVFILT_READ = -0x1
+ _EVFILT_WRITE = -0x2
+)
+
+type rtprio struct {
+ _type uint16
+ prio uint16
+}
+
+type thrparam struct {
+ start_func uintptr
+ arg unsafe.Pointer
+ stack_base uintptr
+ stack_size uintptr
+ tls_base unsafe.Pointer
+ tls_size uintptr
+ child_tid unsafe.Pointer // *int64
+ parent_tid *int64
+ flags int32
+ pad_cgo_0 [4]byte
+ rtp *rtprio
+ spare [3]uintptr
+}
+
+type thread int64 // long
+
+type sigset struct {
+ __bits [4]uint32
+}
+
+type stackt struct {
+ ss_sp uintptr
+ ss_size uintptr
+ ss_flags int32
+ pad_cgo_0 [4]byte
+}
+
+type siginfo struct {
+ si_signo int32
+ si_errno int32
+ si_code int32
+ si_pid int32
+ si_uid uint32
+ si_status int32
+ si_addr uint64
+ si_value [8]byte
+ _reason [40]byte
+}
+
+type gpregs struct {
+ gp_x [30]uint64
+ gp_lr uint64
+ gp_sp uint64
+ gp_elr uint64
+ gp_spsr uint32
+ gp_pad int32
+}
+
+type fpregs struct {
+ fp_q [64]uint64 // actually [32]uint128
+ fp_sr uint32
+ fp_cr uint32
+ fp_flags int32
+ fp_pad int32
+}
+
+type mcontext struct {
+ mc_gpregs gpregs
+ mc_fpregs fpregs
+ mc_flags int32
+ mc_pad int32
+ mc_spare [8]uint64
+}
+
+type ucontext struct {
+ uc_sigmask sigset
+ uc_mcontext mcontext
+ uc_link *ucontext
+ uc_stack stackt
+ uc_flags int32
+ __spare__ [4]int32
+ pad_cgo_0 [12]byte
+}
+
+type timespec struct {
+ tv_sec int64
+ tv_nsec int64
+}
+
+//go:nosplit
+func (ts *timespec) setNsec(ns int64) {
+ ts.tv_sec = ns / 1e9
+ ts.tv_nsec = ns % 1e9
+}
+
+type timeval struct {
+ tv_sec int64
+ tv_usec int64
+}
+
+func (tv *timeval) set_usec(x int32) {
+ tv.tv_usec = int64(x)
+}
+
+type itimerval struct {
+ it_interval timeval
+ it_value timeval
+}
+
+type umtx_time struct {
+ _timeout timespec
+ _flags uint32
+ _clockid uint32
+}
+
+type keventt struct {
+ ident uint64
+ filter int16
+ flags uint16
+ fflags uint32
+ data int64
+ udata *byte
+}
+
+type bintime struct {
+ sec int64
+ frac uint64
+}
+
+type vdsoTimehands struct {
+ algo uint32
+ gen uint32
+ scale uint64
+ offset_count uint32
+ counter_mask uint32
+ offset bintime
+ boottime bintime
+ physical uint32
+ res [7]uint32
+}
+
+type vdsoTimekeep struct {
+ ver uint32
+ enabled uint32
+ current uint32
+ pad_cgo_0 [4]byte
+}
+
+const (
+ _VDSO_TK_VER_CURR = 0x1
+
+ vdsoTimehandsSize = 0x58
+ vdsoTimekeepSize = 0x10
+)
diff --git a/src/runtime/defs_illumos_amd64.go b/src/runtime/defs_illumos_amd64.go
new file mode 100644
index 0000000..9c5413b
--- /dev/null
+++ b/src/runtime/defs_illumos_amd64.go
@@ -0,0 +1,14 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+const (
+ _RCTL_LOCAL_DENY = 0x2
+
+ _RCTL_LOCAL_MAXIMAL = 0x80000000
+
+ _RCTL_FIRST = 0x0
+ _RCTL_NEXT = 0x1
+)
diff --git a/src/runtime/defs_linux.go b/src/runtime/defs_linux.go
index 2d81013..7b14063 100644
--- a/src/runtime/defs_linux.go
+++ b/src/runtime/defs_linux.go
@@ -37,6 +37,7 @@
EINTR = C.EINTR
EAGAIN = C.EAGAIN
ENOMEM = C.ENOMEM
+ ENOSYS = C.ENOSYS
PROT_NONE = C.PROT_NONE
PROT_READ = C.PROT_READ
@@ -50,7 +51,7 @@
MADV_DONTNEED = C.MADV_DONTNEED
MADV_FREE = C.MADV_FREE
MADV_HUGEPAGE = C.MADV_HUGEPAGE
- MADV_NOHUGEPAGE = C.MADV_HNOUGEPAGE
+ MADV_NOHUGEPAGE = C.MADV_NOHUGEPAGE
SA_RESTART = C.SA_RESTART
SA_ONSTACK = C.SA_ONSTACK
diff --git a/src/runtime/defs_linux_386.go b/src/runtime/defs_linux_386.go
index e2fcbca..f4db8cf 100644
--- a/src/runtime/defs_linux_386.go
+++ b/src/runtime/defs_linux_386.go
@@ -7,6 +7,7 @@
_EINTR = 0x4
_EAGAIN = 0xb
_ENOMEM = 0xc
+ _ENOSYS = 0x26
_PROT_NONE = 0x0
_PROT_READ = 0x1
@@ -78,8 +79,9 @@
_ITIMER_VIRTUAL = 0x1
_ITIMER_PROF = 0x2
- _O_RDONLY = 0x0
- _O_CLOEXEC = 0x80000
+ _O_RDONLY = 0x0
+ _O_NONBLOCK = 0x800
+ _O_CLOEXEC = 0x80000
_EPOLLIN = 0x1
_EPOLLOUT = 0x4
@@ -93,7 +95,6 @@
_EPOLL_CTL_MOD = 0x3
_AF_UNIX = 0x1
- _F_SETFL = 0x4
_SOCK_DGRAM = 0x2
)
@@ -225,3 +226,14 @@
family uint16
path [108]byte
}
+
+const __NEW_UTS_LEN = 64
+
+type new_utsname struct {
+ sysname [__NEW_UTS_LEN + 1]byte
+ nodename [__NEW_UTS_LEN + 1]byte
+ release [__NEW_UTS_LEN + 1]byte
+ version [__NEW_UTS_LEN + 1]byte
+ machine [__NEW_UTS_LEN + 1]byte
+ domainname [__NEW_UTS_LEN + 1]byte
+}
diff --git a/src/runtime/defs_linux_amd64.go b/src/runtime/defs_linux_amd64.go
index ddad7fd..8480d85 100644
--- a/src/runtime/defs_linux_amd64.go
+++ b/src/runtime/defs_linux_amd64.go
@@ -7,6 +7,7 @@
_EINTR = 0x4
_EAGAIN = 0xb
_ENOMEM = 0xc
+ _ENOSYS = 0x26
_PROT_NONE = 0x0
_PROT_READ = 0x1
@@ -90,7 +91,6 @@
_EPOLL_CTL_MOD = 0x3
_AF_UNIX = 0x1
- _F_SETFL = 0x4
_SOCK_DGRAM = 0x2
)
@@ -143,8 +143,9 @@
// cgo -cdefs defs_linux.go defs1_linux.go
const (
- _O_RDONLY = 0x0
- _O_CLOEXEC = 0x80000
+ _O_RDONLY = 0x0
+ _O_NONBLOCK = 0x800
+ _O_CLOEXEC = 0x80000
)
type usigset struct {
@@ -261,3 +262,14 @@
family uint16
path [108]byte
}
+
+const __NEW_UTS_LEN = 64
+
+type new_utsname struct {
+ sysname [__NEW_UTS_LEN + 1]byte
+ nodename [__NEW_UTS_LEN + 1]byte
+ release [__NEW_UTS_LEN + 1]byte
+ version [__NEW_UTS_LEN + 1]byte
+ machine [__NEW_UTS_LEN + 1]byte
+ domainname [__NEW_UTS_LEN + 1]byte
+}
diff --git a/src/runtime/defs_linux_arm.go b/src/runtime/defs_linux_arm.go
index 9d10d66..ea29fd9 100644
--- a/src/runtime/defs_linux_arm.go
+++ b/src/runtime/defs_linux_arm.go
@@ -5,6 +5,7 @@
_EINTR = 0x4
_ENOMEM = 0xc
_EAGAIN = 0xb
+ _ENOSYS = 0x26
_PROT_NONE = 0
_PROT_READ = 0x1
@@ -71,6 +72,7 @@
_ITIMER_PROF = 0x2
_ITIMER_VIRTUAL = 0x1
_O_RDONLY = 0
+ _O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000
_EPOLLIN = 0x1
@@ -85,7 +87,6 @@
_EPOLL_CTL_MOD = 0x3
_AF_UNIX = 0x1
- _F_SETFL = 0x4
_SOCK_DGRAM = 0x2
)
diff --git a/src/runtime/defs_linux_arm64.go b/src/runtime/defs_linux_arm64.go
index b325a22..0690cd3 100644
--- a/src/runtime/defs_linux_arm64.go
+++ b/src/runtime/defs_linux_arm64.go
@@ -7,6 +7,7 @@
_EINTR = 0x4
_EAGAIN = 0xb
_ENOMEM = 0xc
+ _ENOSYS = 0x26
_PROT_NONE = 0x0
_PROT_READ = 0x1
@@ -90,7 +91,6 @@
_EPOLL_CTL_MOD = 0x3
_AF_UNIX = 0x1
- _F_SETFL = 0x4
_SOCK_DGRAM = 0x2
)
@@ -144,8 +144,9 @@
// ../cmd/cgo/cgo -cdefs defs_linux.go defs1_linux.go defs2_linux.go
const (
- _O_RDONLY = 0x0
- _O_CLOEXEC = 0x80000
+ _O_RDONLY = 0x0
+ _O_NONBLOCK = 0x800
+ _O_CLOEXEC = 0x80000
)
type usigset struct {
diff --git a/src/runtime/defs_linux_mips64x.go b/src/runtime/defs_linux_mips64x.go
index a52d0d4..0fb53d5 100644
--- a/src/runtime/defs_linux_mips64x.go
+++ b/src/runtime/defs_linux_mips64x.go
@@ -7,6 +7,7 @@
_EINTR = 0x4
_EAGAIN = 0xb
_ENOMEM = 0xc
+ _ENOSYS = 0x59
_PROT_NONE = 0x0
_PROT_READ = 0x1
@@ -145,6 +146,7 @@
const (
_O_RDONLY = 0x0
+ _O_NONBLOCK = 0x80
_O_CLOEXEC = 0x80000
_SA_RESTORER = 0
)
diff --git a/src/runtime/defs_linux_mipsx.go b/src/runtime/defs_linux_mipsx.go
index f3a1dd0..9315ba9 100644
--- a/src/runtime/defs_linux_mipsx.go
+++ b/src/runtime/defs_linux_mipsx.go
@@ -11,6 +11,7 @@
_EINTR = 0x4
_EAGAIN = 0xb
_ENOMEM = 0xc
+ _ENOSYS = 0x59
_PROT_NONE = 0x0
_PROT_READ = 0x1
@@ -143,6 +144,7 @@
const (
_O_RDONLY = 0x0
+ _O_NONBLOCK = 0x80
_O_CLOEXEC = 0x80000
_SA_RESTORER = 0
)
diff --git a/src/runtime/defs_linux_ppc64.go b/src/runtime/defs_linux_ppc64.go
index f438993..90b1dc1 100644
--- a/src/runtime/defs_linux_ppc64.go
+++ b/src/runtime/defs_linux_ppc64.go
@@ -7,6 +7,7 @@
_EINTR = 0x4
_EAGAIN = 0xb
_ENOMEM = 0xc
+ _ENOSYS = 0x26
_PROT_NONE = 0x0
_PROT_READ = 0x1
@@ -145,6 +146,7 @@
const (
_O_RDONLY = 0x0
+ _O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000
_SA_RESTORER = 0
)
diff --git a/src/runtime/defs_linux_ppc64le.go b/src/runtime/defs_linux_ppc64le.go
index f438993..90b1dc1 100644
--- a/src/runtime/defs_linux_ppc64le.go
+++ b/src/runtime/defs_linux_ppc64le.go
@@ -7,6 +7,7 @@
_EINTR = 0x4
_EAGAIN = 0xb
_ENOMEM = 0xc
+ _ENOSYS = 0x26
_PROT_NONE = 0x0
_PROT_READ = 0x1
@@ -145,6 +146,7 @@
const (
_O_RDONLY = 0x0
+ _O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000
_SA_RESTORER = 0
)
diff --git a/src/runtime/defs_linux_riscv64.go b/src/runtime/defs_linux_riscv64.go
new file mode 100644
index 0000000..60da0fa
--- /dev/null
+++ b/src/runtime/defs_linux_riscv64.go
@@ -0,0 +1,209 @@
+// Generated using cgo, then manually converted into appropriate naming and code
+// for the Go runtime.
+// go tool cgo -godefs defs_linux.go defs1_linux.go defs2_linux.go
+
+package runtime
+
+const (
+ _EINTR = 0x4
+ _EAGAIN = 0xb
+ _ENOMEM = 0xc
+ _ENOSYS = 0x26
+
+ _PROT_NONE = 0x0
+ _PROT_READ = 0x1
+ _PROT_WRITE = 0x2
+ _PROT_EXEC = 0x4
+
+ _MAP_ANON = 0x20
+ _MAP_PRIVATE = 0x2
+ _MAP_FIXED = 0x10
+
+ _MADV_DONTNEED = 0x4
+ _MADV_FREE = 0x8
+ _MADV_HUGEPAGE = 0xe
+ _MADV_NOHUGEPAGE = 0xf
+
+ _SA_RESTART = 0x10000000
+ _SA_ONSTACK = 0x8000000
+ _SA_RESTORER = 0x0
+ _SA_SIGINFO = 0x4
+
+ _SIGHUP = 0x1
+ _SIGINT = 0x2
+ _SIGQUIT = 0x3
+ _SIGILL = 0x4
+ _SIGTRAP = 0x5
+ _SIGABRT = 0x6
+ _SIGBUS = 0x7
+ _SIGFPE = 0x8
+ _SIGKILL = 0x9
+ _SIGUSR1 = 0xa
+ _SIGSEGV = 0xb
+ _SIGUSR2 = 0xc
+ _SIGPIPE = 0xd
+ _SIGALRM = 0xe
+ _SIGSTKFLT = 0x10
+ _SIGCHLD = 0x11
+ _SIGCONT = 0x12
+ _SIGSTOP = 0x13
+ _SIGTSTP = 0x14
+ _SIGTTIN = 0x15
+ _SIGTTOU = 0x16
+ _SIGURG = 0x17
+ _SIGXCPU = 0x18
+ _SIGXFSZ = 0x19
+ _SIGVTALRM = 0x1a
+ _SIGPROF = 0x1b
+ _SIGWINCH = 0x1c
+ _SIGIO = 0x1d
+ _SIGPWR = 0x1e
+ _SIGSYS = 0x1f
+
+ _FPE_INTDIV = 0x1
+ _FPE_INTOVF = 0x2
+ _FPE_FLTDIV = 0x3
+ _FPE_FLTOVF = 0x4
+ _FPE_FLTUND = 0x5
+ _FPE_FLTRES = 0x6
+ _FPE_FLTINV = 0x7
+ _FPE_FLTSUB = 0x8
+
+ _BUS_ADRALN = 0x1
+ _BUS_ADRERR = 0x2
+ _BUS_OBJERR = 0x3
+
+ _SEGV_MAPERR = 0x1
+ _SEGV_ACCERR = 0x2
+
+ _ITIMER_REAL = 0x0
+ _ITIMER_VIRTUAL = 0x1
+ _ITIMER_PROF = 0x2
+
+ _EPOLLIN = 0x1
+ _EPOLLOUT = 0x4
+ _EPOLLERR = 0x8
+ _EPOLLHUP = 0x10
+ _EPOLLRDHUP = 0x2000
+ _EPOLLET = 0x80000000
+ _EPOLL_CLOEXEC = 0x80000
+ _EPOLL_CTL_ADD = 0x1
+ _EPOLL_CTL_DEL = 0x2
+ _EPOLL_CTL_MOD = 0x3
+)
+
+type timespec struct {
+ tv_sec int64
+ tv_nsec int64
+}
+
+//go:nosplit
+func (ts *timespec) setNsec(ns int64) {
+ ts.tv_sec = ns / 1e9
+ ts.tv_nsec = ns % 1e9
+}
+
+type timeval struct {
+ tv_sec int64
+ tv_usec int64
+}
+
+func (tv *timeval) set_usec(x int32) {
+ tv.tv_usec = int64(x)
+}
+
+type sigactiont struct {
+ sa_handler uintptr
+ sa_flags uint64
+ sa_restorer uintptr
+ sa_mask uint64
+}
+
+type siginfo struct {
+ si_signo int32
+ si_errno int32
+ si_code int32
+ // below here is a union; si_addr is the only field we use
+ si_addr uint64
+}
+
+type itimerval struct {
+ it_interval timeval
+ it_value timeval
+}
+
+type epollevent struct {
+ events uint32
+ pad_cgo_0 [4]byte
+ data [8]byte // unaligned uintptr
+}
+
+const (
+ _O_RDONLY = 0x0
+ _O_NONBLOCK = 0x800
+ _O_CLOEXEC = 0x80000
+)
+
+type user_regs_struct struct {
+ pc uint64
+ ra uint64
+ sp uint64
+ gp uint64
+ tp uint64
+ t0 uint64
+ t1 uint64
+ t2 uint64
+ s0 uint64
+ s1 uint64
+ a0 uint64
+ a1 uint64
+ a2 uint64
+ a3 uint64
+ a4 uint64
+ a5 uint64
+ a6 uint64
+ a7 uint64
+ s2 uint64
+ s3 uint64
+ s4 uint64
+ s5 uint64
+ s6 uint64
+ s7 uint64
+ s8 uint64
+ s9 uint64
+ s10 uint64
+ s11 uint64
+ t3 uint64
+ t4 uint64
+ t5 uint64
+ t6 uint64
+}
+
+type user_fpregs_struct struct {
+ f [528]byte
+}
+
+type usigset struct {
+ us_x__val [16]uint64
+}
+
+type sigcontext struct {
+ sc_regs user_regs_struct
+ sc_fpregs user_fpregs_struct
+}
+
+type stackt struct {
+ ss_sp *byte
+ ss_flags int32
+ ss_size uintptr
+}
+
+type ucontext struct {
+ uc_flags uint64
+ uc_link *ucontext
+ uc_stack stackt
+ uc_sigmask usigset
+ uc_x__unused [0]uint8
+ uc_pad_cgo_0 [8]byte
+ uc_mcontext sigcontext
+}
diff --git a/src/runtime/defs_linux_s390x.go b/src/runtime/defs_linux_s390x.go
index 19b99b5..fa289d5 100644
--- a/src/runtime/defs_linux_s390x.go
+++ b/src/runtime/defs_linux_s390x.go
@@ -8,6 +8,7 @@
_EINTR = 0x4
_EAGAIN = 0xb
_ENOMEM = 0xc
+ _ENOSYS = 0x26
_PROT_NONE = 0x0
_PROT_READ = 0x1
@@ -138,6 +139,7 @@
const (
_O_RDONLY = 0x0
+ _O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000
_SA_RESTORER = 0
)
diff --git a/src/runtime/defs_nacl_386.go b/src/runtime/defs_nacl_386.go
deleted file mode 100644
index 70dfcf2..0000000
--- a/src/runtime/defs_nacl_386.go
+++ /dev/null
@@ -1,49 +0,0 @@
-package runtime
-
-const (
- // These values are referred to in the source code
- // but really don't matter. Even so, use the standard numbers.
- _SIGQUIT = 3
- _SIGTRAP = 5
- _SIGSEGV = 11
- _SIGPROF = 27
-)
-
-type timespec struct {
- tv_sec int64
- tv_nsec int32
-}
-
-//go:nosplit
-func (ts *timespec) setNsec(ns int64) {
- ts.tv_sec = int64(timediv(ns, 1e9, &ts.tv_nsec))
-}
-
-type excregs386 struct {
- eax uint32
- ecx uint32
- edx uint32
- ebx uint32
- esp uint32
- ebp uint32
- esi uint32
- edi uint32
- eip uint32
- eflags uint32
-}
-
-type exccontext struct {
- size uint32
- portable_context_offset uint32
- portable_context_size uint32
- arch uint32
- regs_size uint32
- reserved [11]uint32
- regs excregs386
-}
-
-type excportablecontext struct {
- pc uint32
- sp uint32
- fp uint32
-}
diff --git a/src/runtime/defs_nacl_amd64p32.go b/src/runtime/defs_nacl_amd64p32.go
deleted file mode 100644
index 3706748..0000000
--- a/src/runtime/defs_nacl_amd64p32.go
+++ /dev/null
@@ -1,71 +0,0 @@
-package runtime
-
-const (
- // These values are referred to in the source code
- // but really don't matter. Even so, use the standard numbers.
- _SIGQUIT = 3
- _SIGTRAP = 5
- _SIGSEGV = 11
- _SIGPROF = 27
-)
-
-type timespec struct {
- tv_sec int64
- tv_nsec int32
-}
-
-//go:nosplit
-func (ts *timespec) setNsec(ns int64) {
- ts.tv_sec = ns / 1e9
- ts.tv_nsec = int32(ns % 1e9)
-}
-
-type excregs386 struct {
- eax uint32
- ecx uint32
- edx uint32
- ebx uint32
- esp uint32
- ebp uint32
- esi uint32
- edi uint32
- eip uint32
- eflags uint32
-}
-
-type excregsamd64 struct {
- rax uint64
- rcx uint64
- rdx uint64
- rbx uint64
- rsp uint64
- rbp uint64
- rsi uint64
- rdi uint64
- r8 uint64
- r9 uint64
- r10 uint64
- r11 uint64
- r12 uint64
- r13 uint64
- r14 uint64
- r15 uint64
- rip uint64
- rflags uint32
-}
-
-type exccontext struct {
- size uint32
- portable_context_offset uint32
- portable_context_size uint32
- arch uint32
- regs_size uint32
- reserved [11]uint32
- regs excregsamd64
-}
-
-type excportablecontext struct {
- pc uint32
- sp uint32
- fp uint32
-}
diff --git a/src/runtime/defs_nacl_arm.go b/src/runtime/defs_nacl_arm.go
deleted file mode 100644
index 89e539e..0000000
--- a/src/runtime/defs_nacl_arm.go
+++ /dev/null
@@ -1,56 +0,0 @@
-package runtime
-
-const (
- // These values are referred to in the source code
- // but really don't matter. Even so, use the standard numbers.
- _SIGQUIT = 3
- _SIGTRAP = 5
- _SIGSEGV = 11
- _SIGPROF = 27
-)
-
-type timespec struct {
- tv_sec int64
- tv_nsec int32
-}
-
-//go:nosplit
-func (ts *timespec) setNsec(ns int64) {
- ts.tv_sec = int64(timediv(ns, 1e9, &ts.tv_nsec))
-}
-
-type excregsarm struct {
- r0 uint32
- r1 uint32
- r2 uint32
- r3 uint32
- r4 uint32
- r5 uint32
- r6 uint32
- r7 uint32
- r8 uint32
- r9 uint32 // the value reported here is undefined.
- r10 uint32
- r11 uint32
- r12 uint32
- sp uint32 /* r13 */
- lr uint32 /* r14 */
- pc uint32 /* r15 */
- cpsr uint32
-}
-
-type exccontext struct {
- size uint32
- portable_context_offset uint32
- portable_context_size uint32
- arch uint32
- regs_size uint32
- reserved [11]uint32
- regs excregsarm
-}
-
-type excportablecontext struct {
- pc uint32
- sp uint32
- fp uint32
-}
diff --git a/src/runtime/defs_netbsd.go b/src/runtime/defs_netbsd.go
index 41aa07a..3f5ce5a 100644
--- a/src/runtime/defs_netbsd.go
+++ b/src/runtime/defs_netbsd.go
@@ -32,6 +32,11 @@
const (
EINTR = C.EINTR
EFAULT = C.EFAULT
+ EAGAIN = C.EAGAIN
+ ENOSYS = C.ENOSYS
+
+ O_NONBLOCK = C.O_NONBLOCK
+ O_CLOEXEC = C.O_CLOEXEC
PROT_NONE = C.PROT_NONE
PROT_READ = C.PROT_READ
diff --git a/src/runtime/defs_openbsd.go b/src/runtime/defs_openbsd.go
index a328d25..4774e36 100644
--- a/src/runtime/defs_openbsd.go
+++ b/src/runtime/defs_openbsd.go
@@ -28,6 +28,11 @@
const (
EINTR = C.EINTR
EFAULT = C.EFAULT
+ EAGAIN = C.EAGAIN
+ ENOSYS = C.ENOSYS
+
+ O_NONBLOCK = C.O_NONBLOCK
+ O_CLOEXEC = C.O_CLOEXEC
PROT_NONE = C.PROT_NONE
PROT_READ = C.PROT_READ
diff --git a/src/runtime/defs_openbsd_386.go b/src/runtime/defs_openbsd_386.go
index 0e59a05..35f2e53 100644
--- a/src/runtime/defs_openbsd_386.go
+++ b/src/runtime/defs_openbsd_386.go
@@ -8,6 +8,11 @@
const (
_EINTR = 0x4
_EFAULT = 0xe
+ _EAGAIN = 0x23
+ _ENOSYS = 0x4e
+
+ _O_NONBLOCK = 0x4
+ _O_CLOEXEC = 0x10000
_PROT_NONE = 0x0
_PROT_READ = 0x1
diff --git a/src/runtime/defs_openbsd_amd64.go b/src/runtime/defs_openbsd_amd64.go
index 5cefac5..c187a98 100644
--- a/src/runtime/defs_openbsd_amd64.go
+++ b/src/runtime/defs_openbsd_amd64.go
@@ -8,6 +8,11 @@
const (
_EINTR = 0x4
_EFAULT = 0xe
+ _EAGAIN = 0x23
+ _ENOSYS = 0x4e
+
+ _O_NONBLOCK = 0x4
+ _O_CLOEXEC = 0x10000
_PROT_NONE = 0x0
_PROT_READ = 0x1
diff --git a/src/runtime/defs_openbsd_arm.go b/src/runtime/defs_openbsd_arm.go
index b187e97..170bb38 100644
--- a/src/runtime/defs_openbsd_arm.go
+++ b/src/runtime/defs_openbsd_arm.go
@@ -8,6 +8,11 @@
const (
_EINTR = 0x4
_EFAULT = 0xe
+ _EAGAIN = 0x23
+ _ENOSYS = 0x4e
+
+ _O_NONBLOCK = 0x4
+ _O_CLOEXEC = 0x10000
_PROT_NONE = 0x0
_PROT_READ = 0x1
diff --git a/src/runtime/defs_openbsd_arm64.go b/src/runtime/defs_openbsd_arm64.go
index 6b9d601..8b8d5cd 100644
--- a/src/runtime/defs_openbsd_arm64.go
+++ b/src/runtime/defs_openbsd_arm64.go
@@ -5,6 +5,11 @@
const (
_EINTR = 0x4
_EFAULT = 0xe
+ _EAGAIN = 0x23
+ _ENOSYS = 0x4e
+
+ _O_NONBLOCK = 0x4
+ _O_CLOEXEC = 0x10000
_PROT_NONE = 0x0
_PROT_READ = 0x1
diff --git a/src/runtime/defs_solaris.go b/src/runtime/defs_solaris.go
index 0638e0b..22df590 100644
--- a/src/runtime/defs_solaris.go
+++ b/src/runtime/defs_solaris.go
@@ -38,9 +38,12 @@
EBADF = C.EBADF
EFAULT = C.EFAULT
EAGAIN = C.EAGAIN
+ EBUSY = C.EBUSY
+ ETIME = C.ETIME
ETIMEDOUT = C.ETIMEDOUT
EWOULDBLOCK = C.EWOULDBLOCK
EINPROGRESS = C.EINPROGRESS
+ ENOSYS = C.ENOSYS
PROT_NONE = C.PROT_NONE
PROT_READ = C.PROT_READ
@@ -118,6 +121,7 @@
MAXHOSTNAMELEN = C.MAXHOSTNAMELEN
O_NONBLOCK = C.O_NONBLOCK
+ O_CLOEXEC = C.O_CLOEXEC
FD_CLOEXEC = C.FD_CLOEXEC
F_GETFL = C.F_GETFL
F_SETFL = C.F_SETFL
@@ -128,7 +132,9 @@
POLLHUP = C.POLLHUP
POLLERR = C.POLLERR
- PORT_SOURCE_FD = C.PORT_SOURCE_FD
+ PORT_SOURCE_FD = C.PORT_SOURCE_FD
+ PORT_SOURCE_ALERT = C.PORT_SOURCE_ALERT
+ PORT_ALERT_UPDATE = C.PORT_ALERT_UPDATE
)
type SemT C.sem_t
diff --git a/src/runtime/defs_windows.go b/src/runtime/defs_windows.go
index 9bd9107..43f358d 100644
--- a/src/runtime/defs_windows.go
+++ b/src/runtime/defs_windows.go
@@ -41,9 +41,13 @@
DUPLICATE_SAME_ACCESS = C.DUPLICATE_SAME_ACCESS
THREAD_PRIORITY_HIGHEST = C.THREAD_PRIORITY_HIGHEST
- SIGINT = C.SIGINT
- CTRL_C_EVENT = C.CTRL_C_EVENT
- CTRL_BREAK_EVENT = C.CTRL_BREAK_EVENT
+ SIGINT = C.SIGINT
+ SIGTERM = C.SIGTERM
+ CTRL_C_EVENT = C.CTRL_C_EVENT
+ CTRL_BREAK_EVENT = C.CTRL_BREAK_EVENT
+ CTRL_CLOSE_EVENT = C.CTRL_CLOSE_EVENT
+ CTRL_LOGOFF_EVENT = C.CTRL_LOGOFF_EVENT
+ CTRL_SHUTDOWN_EVENT = C.CTRL_SHUTDOWN_EVENT
CONTEXT_CONTROL = C.CONTEXT_CONTROL
CONTEXT_FULL = C.CONTEXT_FULL
diff --git a/src/runtime/defs_windows_386.go b/src/runtime/defs_windows_386.go
index 8c0d6d8..3c5057b 100644
--- a/src/runtime/defs_windows_386.go
+++ b/src/runtime/defs_windows_386.go
@@ -15,9 +15,13 @@
_DUPLICATE_SAME_ACCESS = 0x2
_THREAD_PRIORITY_HIGHEST = 0x2
- _SIGINT = 0x2
- _CTRL_C_EVENT = 0x0
- _CTRL_BREAK_EVENT = 0x1
+ _SIGINT = 0x2
+ _SIGTERM = 0xF
+ _CTRL_C_EVENT = 0x0
+ _CTRL_BREAK_EVENT = 0x1
+ _CTRL_CLOSE_EVENT = 0x2
+ _CTRL_LOGOFF_EVENT = 0x5
+ _CTRL_SHUTDOWN_EVENT = 0x6
_CONTEXT_CONTROL = 0x10001
_CONTEXT_FULL = 0x10007
diff --git a/src/runtime/defs_windows_amd64.go b/src/runtime/defs_windows_amd64.go
index 42a446d..ebb1506 100644
--- a/src/runtime/defs_windows_amd64.go
+++ b/src/runtime/defs_windows_amd64.go
@@ -15,9 +15,13 @@
_DUPLICATE_SAME_ACCESS = 0x2
_THREAD_PRIORITY_HIGHEST = 0x2
- _SIGINT = 0x2
- _CTRL_C_EVENT = 0x0
- _CTRL_BREAK_EVENT = 0x1
+ _SIGINT = 0x2
+ _SIGTERM = 0xF
+ _CTRL_C_EVENT = 0x0
+ _CTRL_BREAK_EVENT = 0x1
+ _CTRL_CLOSE_EVENT = 0x2
+ _CTRL_LOGOFF_EVENT = 0x5
+ _CTRL_SHUTDOWN_EVENT = 0x6
_CONTEXT_CONTROL = 0x100001
_CONTEXT_FULL = 0x10000b
diff --git a/src/runtime/defs_windows_arm.go b/src/runtime/defs_windows_arm.go
index 049f5b6..b275b05 100644
--- a/src/runtime/defs_windows_arm.go
+++ b/src/runtime/defs_windows_arm.go
@@ -16,9 +16,13 @@
_DUPLICATE_SAME_ACCESS = 0x2
_THREAD_PRIORITY_HIGHEST = 0x2
- _SIGINT = 0x2
- _CTRL_C_EVENT = 0x0
- _CTRL_BREAK_EVENT = 0x1
+ _SIGINT = 0x2
+ _SIGTERM = 0xF
+ _CTRL_C_EVENT = 0x0
+ _CTRL_BREAK_EVENT = 0x1
+ _CTRL_CLOSE_EVENT = 0x2
+ _CTRL_LOGOFF_EVENT = 0x5
+ _CTRL_SHUTDOWN_EVENT = 0x6
_CONTEXT_CONTROL = 0x10001
_CONTEXT_FULL = 0x10007
diff --git a/src/runtime/duff_mips64x.s b/src/runtime/duff_mips64x.s
index acf0a4e..c4e04cc 100644
--- a/src/runtime/duff_mips64x.s
+++ b/src/runtime/duff_mips64x.s
@@ -265,7 +265,645 @@
ADDV $8, R1
RET
-// TODO: Implement runtime·duffcopy.
-TEXT runtime·duffcopy(SB),NOSPLIT|NOFRAME,$0-0
- MOVV R0, 2(R0)
+TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
+ MOVV (R1), R23
+ ADDV $8, R1
+ MOVV R23, (R2)
+ ADDV $8, R2
+
RET
diff --git a/src/runtime/duff_ppc64x.s b/src/runtime/duff_ppc64x.s
index 0c62d0a..d6b89ba 100644
--- a/src/runtime/duff_ppc64x.s
+++ b/src/runtime/duff_ppc64x.s
@@ -137,7 +137,5 @@
MOVDU R0, 8(R3)
RET
-// TODO: Implement runtime·duffcopy.
-TEXT runtime·duffcopy(SB),NOSPLIT|NOFRAME,$0-0
- MOVD R0, 0(R0)
- RET
+TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0
+ UNDEF
diff --git a/src/runtime/env_posix.go b/src/runtime/env_posix.go
index 03208c7..f95ff68 100644
--- a/src/runtime/env_posix.go
+++ b/src/runtime/env_posix.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build aix darwin dragonfly freebsd js,wasm linux nacl netbsd openbsd solaris windows
+// +build aix darwin dragonfly freebsd js,wasm linux netbsd openbsd solaris windows
package runtime
diff --git a/src/runtime/error.go b/src/runtime/error.go
index 0085dfc..386569b 100644
--- a/src/runtime/error.go
+++ b/src/runtime/error.go
@@ -88,7 +88,7 @@
return string(e)
}
-// An boundsError represents a an indexing or slicing operation gone wrong.
+// A boundsError represents an indexing or slicing operation gone wrong.
type boundsError struct {
x int64
y int
@@ -185,11 +185,6 @@
String() string
}
-func typestring(x interface{}) string {
- e := efaceOf(&x)
- return e._type.string()
-}
-
// printany prints an argument passed to panic.
// If panic is called with a value that has a String or Error method,
// it has already been converted into a string by preprintpanics.
@@ -232,7 +227,51 @@
case string:
print(v)
default:
- print("(", typestring(i), ") ", i)
+ printanycustomtype(i)
+ }
+}
+
+func printanycustomtype(i interface{}) {
+ eface := efaceOf(&i)
+ typestring := eface._type.string()
+
+ switch eface._type.kind {
+ case kindString:
+ print(typestring, `("`, *(*string)(eface.data), `")`)
+ case kindBool:
+ print(typestring, "(", *(*bool)(eface.data), ")")
+ case kindInt:
+ print(typestring, "(", *(*int)(eface.data), ")")
+ case kindInt8:
+ print(typestring, "(", *(*int8)(eface.data), ")")
+ case kindInt16:
+ print(typestring, "(", *(*int16)(eface.data), ")")
+ case kindInt32:
+ print(typestring, "(", *(*int32)(eface.data), ")")
+ case kindInt64:
+ print(typestring, "(", *(*int64)(eface.data), ")")
+ case kindUint:
+ print(typestring, "(", *(*uint)(eface.data), ")")
+ case kindUint8:
+ print(typestring, "(", *(*uint8)(eface.data), ")")
+ case kindUint16:
+ print(typestring, "(", *(*uint16)(eface.data), ")")
+ case kindUint32:
+ print(typestring, "(", *(*uint32)(eface.data), ")")
+ case kindUint64:
+ print(typestring, "(", *(*uint64)(eface.data), ")")
+ case kindUintptr:
+ print(typestring, "(", *(*uintptr)(eface.data), ")")
+ case kindFloat32:
+ print(typestring, "(", *(*float32)(eface.data), ")")
+ case kindFloat64:
+ print(typestring, "(", *(*float64)(eface.data), ")")
+ case kindComplex64:
+ print(typestring, *(*complex64)(eface.data))
+ case kindComplex128:
+ print(typestring, *(*complex128)(eface.data))
+ default:
+ print("(", typestring, ") ", eface.data)
}
}
diff --git a/src/runtime/export_aix_test.go b/src/runtime/export_aix_test.go
new file mode 100644
index 0000000..162552d
--- /dev/null
+++ b/src/runtime/export_aix_test.go
@@ -0,0 +1,7 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+var Fcntl = syscall_fcntl1
diff --git a/src/runtime/export_darwin_test.go b/src/runtime/export_darwin_test.go
new file mode 100644
index 0000000..e9b6eb3
--- /dev/null
+++ b/src/runtime/export_darwin_test.go
@@ -0,0 +1,13 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+func Fcntl(fd, cmd, arg uintptr) (uintptr, uintptr) {
+ r := fcntl(int32(fd), int32(cmd), int32(arg))
+ if r < 0 {
+ return ^uintptr(0), uintptr(-r)
+ }
+ return uintptr(r), 0
+}
diff --git a/src/runtime/export_debug_test.go b/src/runtime/export_debug_test.go
index e97dd52..ed4242e 100644
--- a/src/runtime/export_debug_test.go
+++ b/src/runtime/export_debug_test.go
@@ -20,7 +20,7 @@
//
// On success, InjectDebugCall returns the panic value of fn or nil.
// If fn did not panic, its results will be available in args.
-func InjectDebugCall(gp *g, fn, args interface{}, tkill func(tid int) error) (interface{}, error) {
+func InjectDebugCall(gp *g, fn, args interface{}, tkill func(tid int) error, returnOnUnsafePoint bool) (interface{}, error) {
if gp.lockedm == 0 {
return nil, plainError("goroutine not locked to thread")
}
@@ -48,6 +48,9 @@
h := new(debugCallHandler)
h.gp = gp
+ // gp may not be running right now, but we can still get the M
+ // it will run on since it's locked.
+ h.mp = gp.lockedm.ptr()
h.fv, h.argp, h.argSize = fv, argp, argSize
h.handleF = h.handle // Avoid allocating closure during signal
@@ -64,9 +67,16 @@
notetsleepg(&h.done, -1)
if h.err != "" {
switch h.err {
- case "retry _Grunnable", "executing on Go runtime stack":
+ case "call not at safe point":
+ if returnOnUnsafePoint {
+ // This is for TestDebugCallUnsafePoint.
+ return nil, h.err
+ }
+ fallthrough
+ case "retry _Grunnable", "executing on Go runtime stack", "call from within the Go runtime":
// These are transient states. Try to get out of them.
if i < 100 {
+ usleep(100)
Gosched()
continue
}
@@ -79,6 +89,7 @@
type debugCallHandler struct {
gp *g
+ mp *m
fv *funcval
argp unsafe.Pointer
argSize uintptr
@@ -95,8 +106,8 @@
func (h *debugCallHandler) inject(info *siginfo, ctxt *sigctxt, gp2 *g) bool {
switch h.gp.atomicstatus {
case _Grunning:
- if getg().m != h.gp.m {
- println("trap on wrong M", getg().m, h.gp.m)
+ if getg().m != h.mp {
+ println("trap on wrong M", getg().m, h.mp)
return false
}
// Push current PC on the stack.
@@ -128,8 +139,8 @@
func (h *debugCallHandler) handle(info *siginfo, ctxt *sigctxt, gp2 *g) bool {
// Sanity check.
- if getg().m != h.gp.m {
- println("trap on wrong M", getg().m, h.gp.m)
+ if getg().m != h.mp {
+ println("trap on wrong M", getg().m, h.mp)
return false
}
f := findfunc(uintptr(ctxt.rip()))
diff --git a/src/runtime/export_futex_test.go b/src/runtime/export_futex_test.go
index 5e27236..a727a93 100644
--- a/src/runtime/export_futex_test.go
+++ b/src/runtime/export_futex_test.go
@@ -6,5 +6,14 @@
package runtime
-var Futexsleep = futexsleep
var Futexwakeup = futexwakeup
+
+//go:nosplit
+func Futexsleep(addr *uint32, val uint32, ns int64) {
+ // Temporarily disable preemption so that a preemption signal
+ // doesn't interrupt the system call.
+ poff := debug.asyncpreemptoff
+ debug.asyncpreemptoff = 1
+ futexsleep(addr, val, ns)
+ debug.asyncpreemptoff = poff
+}
diff --git a/src/runtime/export_linux_test.go b/src/runtime/export_linux_test.go
index c73f2f3..b7c901f 100644
--- a/src/runtime/export_linux_test.go
+++ b/src/runtime/export_linux_test.go
@@ -10,6 +10,9 @@
var NewOSProc0 = newosproc0
var Mincore = mincore
+var Add = add
+
+type EpollEvent epollevent
func Epollctl(epfd, op, fd int32, ev unsafe.Pointer) int32 {
return epollctl(epfd, op, fd, (*epollevent)(ev))
diff --git a/src/runtime/export_mmap_test.go b/src/runtime/export_mmap_test.go
index 6c4a446..aeaf37f 100644
--- a/src/runtime/export_mmap_test.go
+++ b/src/runtime/export_mmap_test.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris
+// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris
// Export guts for testing.
diff --git a/src/runtime/export_solaris_test.go b/src/runtime/export_solaris_test.go
new file mode 100644
index 0000000..e865c77
--- /dev/null
+++ b/src/runtime/export_solaris_test.go
@@ -0,0 +1,9 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+func Fcntl(fd, cmd, arg uintptr) (uintptr, uintptr) {
+ return sysvicall3Err(&libc_fcntl, fd, cmd, arg)
+}
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index 6009932..5ab03f3 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -35,9 +35,21 @@
var Atoi32 = atoi32
var Nanotime = nanotime
+var NetpollBreak = netpollBreak
+var Usleep = usleep
+var PhysPageSize = physPageSize
var PhysHugePageSize = physHugePageSize
+var NetpollGenericInit = netpollGenericInit
+
+var ParseRelease = parseRelease
+
+var Memmove = memmove
+var MemclrNoHeapPointers = memclrNoHeapPointers
+
+const PreemptMSupported = preemptMSupported
+
type LFNode struct {
Next uint64
Pushcnt uintptr
@@ -51,6 +63,12 @@
return (*LFNode)(unsafe.Pointer((*lfstack)(head).pop()))
}
+func Netpoll(delta int64) {
+ systemstack(func() {
+ netpoll(delta)
+ })
+}
+
func GCMask(x interface{}) (ret []byte) {
systemstack(func() {
ret = getgcmask(x)
@@ -246,7 +264,7 @@
pagesInUse = uintptr(mheap_.pagesInUse)
for _, s := range mheap_.allspans {
- if s.state == mSpanInUse {
+ if s.state.get() == mSpanInUse {
counted += s.npages
}
}
@@ -308,7 +326,7 @@
// Add up current allocations in spans.
for _, s := range mheap_.allspans {
- if s.state != mSpanInUse {
+ if s.state.get() != mSpanInUse {
continue
}
if sizeclass := s.spanclass.sizeclass(); sizeclass == 0 {
@@ -341,9 +359,17 @@
slow.BySize[i].Frees = bySize[i].Frees
}
- for i := mheap_.free.start(0, 0); i.valid(); i = i.next() {
- slow.HeapReleased += uint64(i.span().released())
+ for i := mheap_.pages.start; i < mheap_.pages.end; i++ {
+ pg := mheap_.pages.chunkOf(i).scavenged.popcntRange(0, pallocChunkPages)
+ slow.HeapReleased += uint64(pg) * pageSize
}
+ for _, p := range allp {
+ pg := sys.OnesCount64(p.pcache.scav)
+ slow.HeapReleased += uint64(pg) * pageSize
+ }
+
+ // Unused space in the current arena also counts as released space.
+ slow.HeapReleased += uint64(mheap_.curArena.end - mheap_.curArena.base)
getg().m.mallocing--
})
@@ -457,6 +483,8 @@
type G = g
+type Sudog = sudog
+
func Getg() *G {
return getg()
}
@@ -518,170 +546,6 @@
}
}
-// UnscavHugePagesSlow returns the value of mheap_.freeHugePages
-// and the number of unscavenged huge pages calculated by
-// scanning the heap.
-func UnscavHugePagesSlow() (uintptr, uintptr) {
- var base, slow uintptr
- // Run on the system stack to avoid deadlock from stack growth
- // trying to acquire the heap lock.
- systemstack(func() {
- lock(&mheap_.lock)
- base = mheap_.free.unscavHugePages
- for _, s := range mheap_.allspans {
- if s.state == mSpanFree && !s.scavenged {
- slow += s.hugePages()
- }
- }
- unlock(&mheap_.lock)
- })
- return base, slow
-}
-
-// Span is a safe wrapper around an mspan, whose memory
-// is managed manually.
-type Span struct {
- *mspan
-}
-
-func AllocSpan(base, npages uintptr, scavenged bool) Span {
- var s *mspan
- systemstack(func() {
- lock(&mheap_.lock)
- s = (*mspan)(mheap_.spanalloc.alloc())
- unlock(&mheap_.lock)
- })
- s.init(base, npages)
- s.scavenged = scavenged
- return Span{s}
-}
-
-func (s *Span) Free() {
- systemstack(func() {
- lock(&mheap_.lock)
- mheap_.spanalloc.free(unsafe.Pointer(s.mspan))
- unlock(&mheap_.lock)
- })
- s.mspan = nil
-}
-
-func (s Span) Base() uintptr {
- return s.mspan.base()
-}
-
-func (s Span) Pages() uintptr {
- return s.mspan.npages
-}
-
-type TreapIterType treapIterType
-
-const (
- TreapIterScav TreapIterType = TreapIterType(treapIterScav)
- TreapIterHuge = TreapIterType(treapIterHuge)
- TreapIterBits = treapIterBits
-)
-
-type TreapIterFilter treapIterFilter
-
-func TreapFilter(mask, match TreapIterType) TreapIterFilter {
- return TreapIterFilter(treapFilter(treapIterType(mask), treapIterType(match)))
-}
-
-func (s Span) MatchesIter(mask, match TreapIterType) bool {
- return treapFilter(treapIterType(mask), treapIterType(match)).matches(s.treapFilter())
-}
-
-type TreapIter struct {
- treapIter
-}
-
-func (t TreapIter) Span() Span {
- return Span{t.span()}
-}
-
-func (t TreapIter) Valid() bool {
- return t.valid()
-}
-
-func (t TreapIter) Next() TreapIter {
- return TreapIter{t.next()}
-}
-
-func (t TreapIter) Prev() TreapIter {
- return TreapIter{t.prev()}
-}
-
-// Treap is a safe wrapper around mTreap for testing.
-//
-// It must never be heap-allocated because mTreap is
-// notinheap.
-//
-//go:notinheap
-type Treap struct {
- mTreap
-}
-
-func (t *Treap) Start(mask, match TreapIterType) TreapIter {
- return TreapIter{t.start(treapIterType(mask), treapIterType(match))}
-}
-
-func (t *Treap) End(mask, match TreapIterType) TreapIter {
- return TreapIter{t.end(treapIterType(mask), treapIterType(match))}
-}
-
-func (t *Treap) Insert(s Span) {
- // mTreap uses a fixalloc in mheap_ for treapNode
- // allocation which requires the mheap_ lock to manipulate.
- // Locking here is safe because the treap itself never allocs
- // or otherwise ends up grabbing this lock.
- systemstack(func() {
- lock(&mheap_.lock)
- t.insert(s.mspan)
- unlock(&mheap_.lock)
- })
- t.CheckInvariants()
-}
-
-func (t *Treap) Find(npages uintptr) TreapIter {
- return TreapIter{t.find(npages)}
-}
-
-func (t *Treap) Erase(i TreapIter) {
- // mTreap uses a fixalloc in mheap_ for treapNode
- // freeing which requires the mheap_ lock to manipulate.
- // Locking here is safe because the treap itself never allocs
- // or otherwise ends up grabbing this lock.
- systemstack(func() {
- lock(&mheap_.lock)
- t.erase(i.treapIter)
- unlock(&mheap_.lock)
- })
- t.CheckInvariants()
-}
-
-func (t *Treap) RemoveSpan(s Span) {
- // See Erase about locking.
- systemstack(func() {
- lock(&mheap_.lock)
- t.removeSpan(s.mspan)
- unlock(&mheap_.lock)
- })
- t.CheckInvariants()
-}
-
-func (t *Treap) Size() int {
- i := 0
- t.mTreap.treap.walkTreap(func(t *treapNode) {
- i++
- })
- return i
-}
-
-func (t *Treap) CheckInvariants() {
- t.mTreap.treap.walkTreap(checkTreapNode)
- t.mTreap.treap.validateInvariants()
-}
-
func RunGetgThreadSwitchTest() {
// Test that getg works correctly with thread switch.
// With gccgo, if we generate getg inlined, the backend
@@ -715,3 +579,413 @@
panic("g1 != g3")
}
}
+
+const (
+ PageSize = pageSize
+ PallocChunkPages = pallocChunkPages
+ PageAlloc64Bit = pageAlloc64Bit
+ PallocSumBytes = pallocSumBytes
+)
+
+// Expose pallocSum for testing.
+type PallocSum pallocSum
+
+func PackPallocSum(start, max, end uint) PallocSum { return PallocSum(packPallocSum(start, max, end)) }
+func (m PallocSum) Start() uint { return pallocSum(m).start() }
+func (m PallocSum) Max() uint { return pallocSum(m).max() }
+func (m PallocSum) End() uint { return pallocSum(m).end() }
+
+// Expose pallocBits for testing.
+type PallocBits pallocBits
+
+func (b *PallocBits) Find(npages uintptr, searchIdx uint) (uint, uint) {
+ return (*pallocBits)(b).find(npages, searchIdx)
+}
+func (b *PallocBits) AllocRange(i, n uint) { (*pallocBits)(b).allocRange(i, n) }
+func (b *PallocBits) Free(i, n uint) { (*pallocBits)(b).free(i, n) }
+func (b *PallocBits) Summarize() PallocSum { return PallocSum((*pallocBits)(b).summarize()) }
+func (b *PallocBits) PopcntRange(i, n uint) uint { return (*pageBits)(b).popcntRange(i, n) }
+
+// SummarizeSlow is a slow but more obviously correct implementation
+// of (*pallocBits).summarize. Used for testing.
+func SummarizeSlow(b *PallocBits) PallocSum {
+ var start, max, end uint
+
+ const N = uint(len(b)) * 64
+ for start < N && (*pageBits)(b).get(start) == 0 {
+ start++
+ }
+ for end < N && (*pageBits)(b).get(N-end-1) == 0 {
+ end++
+ }
+ run := uint(0)
+ for i := uint(0); i < N; i++ {
+ if (*pageBits)(b).get(i) == 0 {
+ run++
+ } else {
+ run = 0
+ }
+ if run > max {
+ max = run
+ }
+ }
+ return PackPallocSum(start, max, end)
+}
+
+// Expose non-trivial helpers for testing.
+func FindBitRange64(c uint64, n uint) uint { return findBitRange64(c, n) }
+
+// Given two PallocBits, returns a set of bit ranges where
+// they differ.
+func DiffPallocBits(a, b *PallocBits) []BitRange {
+ ba := (*pageBits)(a)
+ bb := (*pageBits)(b)
+
+ var d []BitRange
+ base, size := uint(0), uint(0)
+ for i := uint(0); i < uint(len(ba))*64; i++ {
+ if ba.get(i) != bb.get(i) {
+ if size == 0 {
+ base = i
+ }
+ size++
+ } else {
+ if size != 0 {
+ d = append(d, BitRange{base, size})
+ }
+ size = 0
+ }
+ }
+ if size != 0 {
+ d = append(d, BitRange{base, size})
+ }
+ return d
+}
+
+// StringifyPallocBits gets the bits in the bit range r from b,
+// and returns a string containing the bits as ASCII 0 and 1
+// characters.
+func StringifyPallocBits(b *PallocBits, r BitRange) string {
+ str := ""
+ for j := r.I; j < r.I+r.N; j++ {
+ if (*pageBits)(b).get(j) != 0 {
+ str += "1"
+ } else {
+ str += "0"
+ }
+ }
+ return str
+}
+
+// Expose pallocData for testing.
+type PallocData pallocData
+
+func (d *PallocData) FindScavengeCandidate(searchIdx uint, min, max uintptr) (uint, uint) {
+ return (*pallocData)(d).findScavengeCandidate(searchIdx, min, max)
+}
+func (d *PallocData) AllocRange(i, n uint) { (*pallocData)(d).allocRange(i, n) }
+func (d *PallocData) ScavengedSetRange(i, n uint) {
+ (*pallocData)(d).scavenged.setRange(i, n)
+}
+func (d *PallocData) PallocBits() *PallocBits {
+ return (*PallocBits)(&(*pallocData)(d).pallocBits)
+}
+func (d *PallocData) Scavenged() *PallocBits {
+ return (*PallocBits)(&(*pallocData)(d).scavenged)
+}
+
+// Expose fillAligned for testing.
+func FillAligned(x uint64, m uint) uint64 { return fillAligned(x, m) }
+
+// Expose pageCache for testing.
+type PageCache pageCache
+
+const PageCachePages = pageCachePages
+
+func NewPageCache(base uintptr, cache, scav uint64) PageCache {
+ return PageCache(pageCache{base: base, cache: cache, scav: scav})
+}
+func (c *PageCache) Empty() bool { return (*pageCache)(c).empty() }
+func (c *PageCache) Base() uintptr { return (*pageCache)(c).base }
+func (c *PageCache) Cache() uint64 { return (*pageCache)(c).cache }
+func (c *PageCache) Scav() uint64 { return (*pageCache)(c).scav }
+func (c *PageCache) Alloc(npages uintptr) (uintptr, uintptr) {
+ return (*pageCache)(c).alloc(npages)
+}
+func (c *PageCache) Flush(s *PageAlloc) {
+ (*pageCache)(c).flush((*pageAlloc)(s))
+}
+
+// Expose chunk index type.
+type ChunkIdx chunkIdx
+
+// Expose pageAlloc for testing. Note that because pageAlloc is
+// not in the heap, so is PageAlloc.
+type PageAlloc pageAlloc
+
+func (p *PageAlloc) Alloc(npages uintptr) (uintptr, uintptr) {
+ return (*pageAlloc)(p).alloc(npages)
+}
+func (p *PageAlloc) AllocToCache() PageCache {
+ return PageCache((*pageAlloc)(p).allocToCache())
+}
+func (p *PageAlloc) Free(base, npages uintptr) {
+ (*pageAlloc)(p).free(base, npages)
+}
+func (p *PageAlloc) Bounds() (ChunkIdx, ChunkIdx) {
+ return ChunkIdx((*pageAlloc)(p).start), ChunkIdx((*pageAlloc)(p).end)
+}
+func (p *PageAlloc) Scavenge(nbytes uintptr, mayUnlock bool) (r uintptr) {
+ pp := (*pageAlloc)(p)
+ systemstack(func() {
+ lock(pp.mheapLock)
+ r = pp.scavenge(nbytes, mayUnlock)
+ unlock(pp.mheapLock)
+ })
+ return
+}
+func (p *PageAlloc) InUse() []AddrRange {
+ ranges := make([]AddrRange, 0, len(p.inUse.ranges))
+ for _, r := range p.inUse.ranges {
+ ranges = append(ranges, AddrRange{
+ Base: r.base.addr(),
+ Limit: r.limit.addr(),
+ })
+ }
+ return ranges
+}
+
+// Returns nil if the PallocData's L2 is missing.
+func (p *PageAlloc) PallocData(i ChunkIdx) *PallocData {
+ ci := chunkIdx(i)
+ l2 := (*pageAlloc)(p).chunks[ci.l1()]
+ if l2 == nil {
+ return nil
+ }
+ return (*PallocData)(&l2[ci.l2()])
+}
+
+// AddrRange represents a range over addresses.
+// Specifically, it represents the range [Base, Limit).
+type AddrRange struct {
+ Base, Limit uintptr
+}
+
+// BitRange represents a range over a bitmap.
+type BitRange struct {
+ I, N uint // bit index and length in bits
+}
+
+// NewPageAlloc creates a new page allocator for testing and
+// initializes it with the scav and chunks maps. Each key in these maps
+// represents a chunk index and each value is a series of bit ranges to
+// set within each bitmap's chunk.
+//
+// The initialization of the pageAlloc preserves the invariant that if a
+// scavenged bit is set the alloc bit is necessarily unset, so some
+// of the bits described by scav may be cleared in the final bitmap if
+// ranges in chunks overlap with them.
+//
+// scav is optional, and if nil, the scavenged bitmap will be cleared
+// (as opposed to all 1s, which it usually is). Furthermore, every
+// chunk index in scav must appear in chunks; ones that do not are
+// ignored.
+func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc {
+ p := new(pageAlloc)
+
+ // We've got an entry, so initialize the pageAlloc.
+ p.init(new(mutex), nil)
+ lockInit(p.mheapLock, lockRankMheap)
+ p.test = true
+
+ for i, init := range chunks {
+ addr := chunkBase(chunkIdx(i))
+
+ // Mark the chunk's existence in the pageAlloc.
+ p.grow(addr, pallocChunkBytes)
+
+ // Initialize the bitmap and update pageAlloc metadata.
+ chunk := p.chunkOf(chunkIndex(addr))
+
+ // Clear all the scavenged bits which grow set.
+ chunk.scavenged.clearRange(0, pallocChunkPages)
+
+ // Apply scavenge state if applicable.
+ if scav != nil {
+ if scvg, ok := scav[i]; ok {
+ for _, s := range scvg {
+ // Ignore the case of s.N == 0. setRange doesn't handle
+ // it and it's a no-op anyway.
+ if s.N != 0 {
+ chunk.scavenged.setRange(s.I, s.N)
+ }
+ }
+ }
+ }
+
+ // Apply alloc state.
+ for _, s := range init {
+ // Ignore the case of s.N == 0. allocRange doesn't handle
+ // it and it's a no-op anyway.
+ if s.N != 0 {
+ chunk.allocRange(s.I, s.N)
+ }
+ }
+
+ // Update heap metadata for the allocRange calls above.
+ p.update(addr, pallocChunkPages, false, false)
+ }
+ systemstack(func() {
+ lock(p.mheapLock)
+ p.scavengeStartGen()
+ unlock(p.mheapLock)
+ })
+ return (*PageAlloc)(p)
+}
+
+// FreePageAlloc releases hard OS resources owned by the pageAlloc. Once this
+// is called the pageAlloc may no longer be used. The object itself will be
+// collected by the garbage collector once it is no longer live.
+func FreePageAlloc(pp *PageAlloc) {
+ p := (*pageAlloc)(pp)
+
+ // Free all the mapped space for the summary levels.
+ if pageAlloc64Bit != 0 {
+ for l := 0; l < summaryLevels; l++ {
+ sysFree(unsafe.Pointer(&p.summary[l][0]), uintptr(cap(p.summary[l]))*pallocSumBytes, nil)
+ }
+ } else {
+ resSize := uintptr(0)
+ for _, s := range p.summary {
+ resSize += uintptr(cap(s)) * pallocSumBytes
+ }
+ sysFree(unsafe.Pointer(&p.summary[0][0]), alignUp(resSize, physPageSize), nil)
+ }
+
+ // Free the mapped space for chunks.
+ for i := range p.chunks {
+ if x := p.chunks[i]; x != nil {
+ p.chunks[i] = nil
+ // This memory comes from sysAlloc and will always be page-aligned.
+ sysFree(unsafe.Pointer(x), unsafe.Sizeof(*p.chunks[0]), nil)
+ }
+ }
+}
+
+// BaseChunkIdx is a convenient chunkIdx value which works on both
+// 64 bit and 32 bit platforms, allowing the tests to share code
+// between the two.
+//
+// This should not be higher than 0x100*pallocChunkBytes to support
+// mips and mipsle, which only have 31-bit address spaces.
+var BaseChunkIdx = ChunkIdx(chunkIndex(((0xc000*pageAlloc64Bit + 0x100*pageAlloc32Bit) * pallocChunkBytes) + arenaBaseOffset*sys.GoosAix))
+
+// PageBase returns an address given a chunk index and a page index
+// relative to that chunk.
+func PageBase(c ChunkIdx, pageIdx uint) uintptr {
+ return chunkBase(chunkIdx(c)) + uintptr(pageIdx)*pageSize
+}
+
+type BitsMismatch struct {
+ Base uintptr
+ Got, Want uint64
+}
+
+func CheckScavengedBitsCleared(mismatches []BitsMismatch) (n int, ok bool) {
+ ok = true
+
+ // Run on the system stack to avoid stack growth allocation.
+ systemstack(func() {
+ getg().m.mallocing++
+
+ // Lock so that we can safely access the bitmap.
+ lock(&mheap_.lock)
+ chunkLoop:
+ for i := mheap_.pages.start; i < mheap_.pages.end; i++ {
+ chunk := mheap_.pages.chunkOf(i)
+ for j := 0; j < pallocChunkPages/64; j++ {
+ // Run over each 64-bit bitmap section and ensure
+ // scavenged is being cleared properly on allocation.
+ // If a used bit and scavenged bit are both set, that's
+ // an error, and could indicate a larger problem, or
+ // an accounting problem.
+ want := chunk.scavenged[j] &^ chunk.pallocBits[j]
+ got := chunk.scavenged[j]
+ if want != got {
+ ok = false
+ if n >= len(mismatches) {
+ break chunkLoop
+ }
+ mismatches[n] = BitsMismatch{
+ Base: chunkBase(i) + uintptr(j)*64*pageSize,
+ Got: got,
+ Want: want,
+ }
+ n++
+ }
+ }
+ }
+ unlock(&mheap_.lock)
+
+ getg().m.mallocing--
+ })
+ return
+}
+
+func PageCachePagesLeaked() (leaked uintptr) {
+ stopTheWorld("PageCachePagesLeaked")
+
+ // Walk over destroyed Ps and look for unflushed caches.
+ deadp := allp[len(allp):cap(allp)]
+ for _, p := range deadp {
+ // Since we're going past len(allp) we may see nil Ps.
+ // Just ignore them.
+ if p != nil {
+ leaked += uintptr(sys.OnesCount64(p.pcache.cache))
+ }
+ }
+
+ startTheWorld()
+ return
+}
+
+var Semacquire = semacquire
+var Semrelease1 = semrelease1
+
+func SemNwait(addr *uint32) uint32 {
+ root := semroot(addr)
+ return atomic.Load(&root.nwait)
+}
+
+// MapHashCheck computes the hash of the key k for the map m, twice.
+// Method 1 uses the built-in hasher for the map.
+// Method 2 uses the typehash function (the one used by reflect).
+// Returns the two hash values, which should always be equal.
+func MapHashCheck(m interface{}, k interface{}) (uintptr, uintptr) {
+ // Unpack m.
+ mt := (*maptype)(unsafe.Pointer(efaceOf(&m)._type))
+ mh := (*hmap)(efaceOf(&m).data)
+
+ // Unpack k.
+ kt := efaceOf(&k)._type
+ var p unsafe.Pointer
+ if isDirectIface(kt) {
+ q := efaceOf(&k).data
+ p = unsafe.Pointer(&q)
+ } else {
+ p = efaceOf(&k).data
+ }
+
+ // Compute the hash functions.
+ x := mt.hasher(noescape(p), uintptr(mh.hash0))
+ y := typehash(kt, noescape(p), uintptr(mh.hash0))
+ return x, y
+}
+
+func MSpanCountAlloc(bits []byte) int {
+ s := mspan{
+ nelems: uintptr(len(bits) * 8),
+ gcmarkBits: (*gcBits)(unsafe.Pointer(&bits[0])),
+ }
+ return s.countAlloc()
+}
diff --git a/src/runtime/export_unix_test.go b/src/runtime/export_unix_test.go
index eecdfb7..621488e 100644
--- a/src/runtime/export_unix_test.go
+++ b/src/runtime/export_unix_test.go
@@ -6,6 +6,13 @@
package runtime
+import "unsafe"
+
+var NonblockingPipe = nonblockingPipe
+var Pipe = pipe
+var SetNonblock = setNonblock
+var Closeonexec = closeonexec
+
func sigismember(mask *sigset, i int) bool {
clear := *mask
sigdelset(&clear, i)
@@ -17,3 +24,71 @@
sigprocmask(_SIG_SETMASK, nil, &sigmask)
return sigismember(&sigmask, i)
}
+
+type M = m
+
+var waitForSigusr1 struct {
+ rdpipe int32
+ wrpipe int32
+ mID int64
+}
+
+// WaitForSigusr1 blocks until a SIGUSR1 is received. It calls ready
+// when it is set up to receive SIGUSR1. The ready function should
+// cause a SIGUSR1 to be sent. The r and w arguments are a pipe that
+// the signal handler can use to report when the signal is received.
+//
+// Once SIGUSR1 is received, it returns the ID of the current M and
+// the ID of the M the SIGUSR1 was received on. If the caller writes
+// a non-zero byte to w, WaitForSigusr1 returns immediately with -1, -1.
+func WaitForSigusr1(r, w int32, ready func(mp *M)) (int64, int64) {
+ lockOSThread()
+ // Make sure we can receive SIGUSR1.
+ unblocksig(_SIGUSR1)
+
+ waitForSigusr1.rdpipe = r
+ waitForSigusr1.wrpipe = w
+
+ mp := getg().m
+ testSigusr1 = waitForSigusr1Callback
+ ready(mp)
+
+ // Wait for the signal. We use a pipe rather than a note
+ // because write is always async-signal-safe.
+ entersyscallblock()
+ var b byte
+ read(waitForSigusr1.rdpipe, noescape(unsafe.Pointer(&b)), 1)
+ exitsyscall()
+
+ gotM := waitForSigusr1.mID
+ testSigusr1 = nil
+
+ unlockOSThread()
+
+ if b != 0 {
+ // timeout signal from caller
+ return -1, -1
+ }
+ return mp.id, gotM
+}
+
+// waitForSigusr1Callback is called from the signal handler during
+// WaitForSigusr1. It must not have write barriers because there may
+// not be a P.
+//
+//go:nowritebarrierrec
+func waitForSigusr1Callback(gp *g) bool {
+ if gp == nil || gp.m == nil {
+ waitForSigusr1.mID = -1
+ } else {
+ waitForSigusr1.mID = gp.m.id
+ }
+ b := byte(0)
+ write(uintptr(waitForSigusr1.wrpipe), noescape(unsafe.Pointer(&b)), 1)
+ return true
+}
+
+// SendSigusr1 sends SIGUSR1 to mp.
+func SendSigusr1(mp *M) {
+ signalM(mp, _SIGUSR1)
+}
diff --git a/src/runtime/extern.go b/src/runtime/extern.go
index 2917efe..7316503 100644
--- a/src/runtime/extern.go
+++ b/src/runtime/extern.go
@@ -78,21 +78,6 @@
If the line ends with "(forced)", this GC was forced by a
runtime.GC() call.
- Setting gctrace to any value > 0 also causes the garbage collector
- to emit a summary when memory is released back to the system.
- This process of returning memory to the system is called scavenging.
- The format of this summary is subject to change.
- Currently it is:
- scvg#: # MB released printed only if non-zero
- scvg#: inuse: # idle: # sys: # released: # consumed: # (MB)
- where the fields are as follows:
- scvg# the scavenge cycle number, incremented at each scavenge
- inuse: # MB used or partially used spans
- idle: # MB spans pending scavenging
- sys: # MB mapped from the system
- released: # MB released to the system
- consumed: # MB allocated from the system
-
madvdontneed: setting madvdontneed=1 will use MADV_DONTNEED
instead of MADV_FREE on Linux when returning memory to the
kernel. This is less efficient, but causes RSS numbers to drop
@@ -102,7 +87,7 @@
When set to 0 memory profiling is disabled. Refer to the description of
MemProfileRate for the default value.
- invalidptr: defaults to invalidptr=1, causing the garbage collector and stack
+ invalidptr: invalidptr=1 (the default) causes the garbage collector and stack
copier to crash the program if an invalid pointer value (for example, 1)
is found in a pointer-typed location. Setting invalidptr=0 disables this check.
This should only be used as a temporary workaround to diagnose buggy code.
@@ -114,6 +99,20 @@
scavenge: scavenge=1 enables debugging mode of heap scavenger.
+ scavtrace: setting scavtrace=1 causes the runtime to emit a single line to standard
+ error, roughly once per GC cycle, summarizing the amount of work done by the
+ scavenger as well as the total amount of memory returned to the operating system
+ and an estimate of physical memory utilization. The format of this line is subject
+ to change, but currently it is:
+ scav # # KiB work, # KiB total, #% util
+ where the fields are as follows:
+ scav # the scavenge cycle number
+ # KiB work the amount of memory returned to the OS since the last line
+ # KiB total the total amount of memory returned to the OS
+ #% util the fraction of all unscavenged memory which is in-use
+ If the line ends with "(forced)", then scavenging was forced by a
+ debug.FreeOSMemory() call.
+
scheddetail: setting schedtrace=X and scheddetail=1 causes the scheduler to emit
detailed multiline info every X milliseconds, describing state of the scheduler,
processors, threads and goroutines.
@@ -127,6 +126,13 @@
IDs will refer to the ID of the goroutine at the time of creation; it's possible for this
ID to be reused for another goroutine. Setting N to 0 will report no ancestry information.
+ asyncpreemptoff: asyncpreemptoff=1 disables signal-based
+ asynchronous goroutine preemption. This makes some loops
+ non-preemptible for long periods, which may delay GC and
+ goroutine scheduling. This is useful for debugging GC issues
+ because it also disables the conservative stack scanning used
+ for asynchronously preempted goroutines.
+
The net, net/http, and crypto/tls packages also refer to debugging variables in GODEBUG.
See the documentation for those packages for details.
@@ -200,7 +206,6 @@
// directly is discouraged, as is using FuncForPC on any of the
// returned PCs, since these cannot account for inlining or return
// program counter adjustment.
-//go:noinline
func Callers(skip int, pc []uintptr) int {
// runtime.callers uses pc.array==nil as a signal
// to print a stack trace. Pick off 0-length pc here
diff --git a/src/runtime/funcdata.h b/src/runtime/funcdata.h
index d9a35c5..0fb50dd 100644
--- a/src/runtime/funcdata.h
+++ b/src/runtime/funcdata.h
@@ -17,6 +17,7 @@
#define FUNCDATA_RegPointerMaps 2
#define FUNCDATA_StackObjects 3
#define FUNCDATA_InlTree 4
+#define FUNCDATA_OpenCodedDeferInfo 5 /* info for func with open-coded defers */
// Pseudo-assembly statements.
diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go
index d55a934..c5c8a4c 100644
--- a/src/runtime/gc_test.go
+++ b/src/runtime/gc_test.go
@@ -6,10 +6,13 @@
import (
"fmt"
+ "math/rand"
"os"
"reflect"
"runtime"
"runtime/debug"
+ "sort"
+ "strings"
"sync"
"sync/atomic"
"testing"
@@ -21,12 +24,6 @@
if os.Getenv("GOGC") == "off" {
t.Skip("skipping test; GOGC=off in environment")
}
- if runtime.GOOS == "windows" {
- t.Skip("skipping test; GOOS=windows http://golang.org/issue/27156")
- }
- if runtime.GOOS == "linux" && runtime.GOARCH == "arm64" {
- t.Skip("skipping test; GOOS=linux GOARCH=arm64 https://github.com/golang/go/issues/27636")
- }
got := runTestProg(t, "testprog", "GCSys")
want := "OK\n"
if got != want {
@@ -196,6 +193,15 @@
}
}
+func TestGcZombieReporting(t *testing.T) {
+ // This test is somewhat sensitive to how the allocator works.
+ got := runTestProg(t, "testprog", "GCZombie")
+ want := "found pointer to free object"
+ if !strings.Contains(got, want) {
+ t.Fatalf("expected %q in output, but got %q", want, got)
+ }
+}
+
func BenchmarkSetTypePtr(b *testing.B) {
benchSetType(b, new(*byte))
}
@@ -470,25 +476,6 @@
}
}
-func TestUnscavHugePages(t *testing.T) {
- // Allocate 20 MiB and immediately free it a few times to increase
- // the chance that unscavHugePages isn't zero and that some kind of
- // accounting had to happen in the runtime.
- for j := 0; j < 3; j++ {
- var large [][]byte
- for i := 0; i < 5; i++ {
- large = append(large, make([]byte, runtime.PhysHugePageSize))
- }
- runtime.KeepAlive(large)
- runtime.GC()
- }
- base, slow := runtime.UnscavHugePagesSlow()
- if base != slow {
- logDiff(t, "unscavHugePages", reflect.ValueOf(base), reflect.ValueOf(slow))
- t.Fatal("unscavHugePages mismatch")
- }
-}
-
func logDiff(t *testing.T, prefix string, got, want reflect.Value) {
typ := got.Type()
switch typ.Kind() {
@@ -531,6 +518,90 @@
hugeSink = nil
}
+func BenchmarkReadMemStatsLatency(b *testing.B) {
+ // We’ll apply load to the runtime with maxProcs-1 goroutines
+ // and use one more to actually benchmark. It doesn't make sense
+ // to try to run this test with only 1 P (that's what
+ // BenchmarkReadMemStats is for).
+ maxProcs := runtime.GOMAXPROCS(-1)
+ if maxProcs == 1 {
+ b.Skip("This benchmark can only be run with GOMAXPROCS > 1")
+ }
+
+ // Code to build a big tree with lots of pointers.
+ type node struct {
+ children [16]*node
+ }
+ var buildTree func(depth int) *node
+ buildTree = func(depth int) *node {
+ tree := new(node)
+ if depth != 0 {
+ for i := range tree.children {
+ tree.children[i] = buildTree(depth - 1)
+ }
+ }
+ return tree
+ }
+
+ // Keep the GC busy by continuously generating large trees.
+ done := make(chan struct{})
+ var wg sync.WaitGroup
+ for i := 0; i < maxProcs-1; i++ {
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ var hold *node
+ loop:
+ for {
+ hold = buildTree(5)
+ select {
+ case <-done:
+ break loop
+ default:
+ }
+ }
+ runtime.KeepAlive(hold)
+ }()
+ }
+
+ // Spend this much time measuring latencies.
+ latencies := make([]time.Duration, 0, 1024)
+
+ // Run for timeToBench hitting ReadMemStats continuously
+ // and measuring the latency.
+ b.ResetTimer()
+ var ms runtime.MemStats
+ for i := 0; i < b.N; i++ {
+ // Sleep for a bit, otherwise we're just going to keep
+ // stopping the world and no one will get to do anything.
+ time.Sleep(100 * time.Millisecond)
+ start := time.Now()
+ runtime.ReadMemStats(&ms)
+ latencies = append(latencies, time.Now().Sub(start))
+ }
+ close(done)
+ // Make sure to stop the timer before we wait! The goroutines above
+ // are very heavy-weight and not easy to stop, so we could end up
+ // confusing the benchmarking framework for small b.N.
+ b.StopTimer()
+ wg.Wait()
+
+ // Disable the default */op metrics.
+ // ns/op doesn't mean anything because it's an average, but we
+ // have a sleep in our b.N loop above which skews this significantly.
+ b.ReportMetric(0, "ns/op")
+ b.ReportMetric(0, "B/op")
+ b.ReportMetric(0, "allocs/op")
+
+ // Sort latencies then report percentiles.
+ sort.Slice(latencies, func(i, j int) bool {
+ return latencies[i] < latencies[j]
+ })
+ b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns")
+ b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns")
+ b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns")
+}
+
func TestUserForcedGC(t *testing.T) {
// Test that runtime.GC() triggers a GC even if GOGC=off.
defer debug.SetGCPercent(debug.SetGCPercent(-1))
@@ -691,6 +762,24 @@
close(teardown)
}
+func BenchmarkMSpanCountAlloc(b *testing.B) {
+ // n is the number of bytes to benchmark against.
+ // n must always be a multiple of 8, since gcBits is
+ // always rounded up 8 bytes.
+ for _, n := range []int{8, 16, 32, 64, 128} {
+ b.Run(fmt.Sprintf("bits=%d", n*8), func(b *testing.B) {
+ // Initialize a new byte slice with pseduo-random data.
+ bits := make([]byte, n)
+ rand.Read(bits)
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ runtime.MSpanCountAlloc(bits)
+ }
+ })
+ }
+}
+
func countpwg(n *int, ready *sync.WaitGroup, teardown chan bool) {
if *n == 0 {
ready.Done()
diff --git a/src/runtime/gcinfo_test.go b/src/runtime/gcinfo_test.go
index 0741f63..ec1ba90 100644
--- a/src/runtime/gcinfo_test.go
+++ b/src/runtime/gcinfo_test.go
@@ -179,7 +179,7 @@
typeScalar, typeScalar, typeScalar, typeScalar, // t int; y uint16; u uint64
typePointer, typeScalar, // i string
}
- case "arm64", "amd64", "mips64", "mips64le", "ppc64", "ppc64le", "s390x", "wasm":
+ case "arm64", "amd64", "mips64", "mips64le", "ppc64", "ppc64le", "riscv64", "s390x", "wasm":
return []byte{
typePointer, // q *int
typeScalar, typeScalar, typeScalar, // w byte; e [17]byte
@@ -187,14 +187,6 @@
typeScalar, typeScalar, typeScalar, // t int; y uint16; u uint64
typePointer, typeScalar, // i string
}
- case "amd64p32":
- return []byte{
- typePointer, // q *int
- typeScalar, typeScalar, typeScalar, typeScalar, typeScalar, // w byte; e [17]byte
- typePointer, typeScalar, typeScalar, // r []byte
- typeScalar, typeScalar, typeScalar, typeScalar, typeScalar, // t int; y uint16; u uint64
- typePointer, typeScalar, // i string
- }
default:
panic("unknown arch")
}
diff --git a/src/runtime/go_tls.h b/src/runtime/go_tls.h
index 61f7dbe..a47e798 100644
--- a/src/runtime/go_tls.h
+++ b/src/runtime/go_tls.h
@@ -11,11 +11,6 @@
#define g(r) 0(r)(TLS*1)
#endif
-#ifdef GOARCH_amd64p32
-#define get_tls(r) MOVL TLS, r
-#define g(r) 0(r)(TLS*1)
-#endif
-
#ifdef GOARCH_386
#define get_tls(r) MOVL TLS, r
#define g(r) 0(r)(TLS*1)
diff --git a/src/runtime/hash32.go b/src/runtime/hash32.go
index 5574923..966f70e 100644
--- a/src/runtime/hash32.go
+++ b/src/runtime/hash32.go
@@ -20,10 +20,7 @@
m4 = 2336365089
)
-func memhash(p unsafe.Pointer, seed, s uintptr) uintptr {
- if GOARCH == "386" && GOOS != "nacl" && useAeshash {
- return aeshash(p, seed, s)
- }
+func memhashFallback(p unsafe.Pointer, seed, s uintptr) uintptr {
h := uint32(seed + s*hashkey[0])
tail:
switch {
@@ -81,7 +78,7 @@
return uintptr(h)
}
-func memhash32(p unsafe.Pointer, seed uintptr) uintptr {
+func memhash32Fallback(p unsafe.Pointer, seed uintptr) uintptr {
h := uint32(seed + 4*hashkey[0])
h ^= readUnaligned32(p)
h = rotl_15(h*m1) * m2
@@ -93,7 +90,7 @@
return uintptr(h)
}
-func memhash64(p unsafe.Pointer, seed uintptr) uintptr {
+func memhash64Fallback(p unsafe.Pointer, seed uintptr) uintptr {
h := uint32(seed + 8*hashkey[0])
h ^= readUnaligned32(p)
h = rotl_15(h*m1) * m2
diff --git a/src/runtime/hash64.go b/src/runtime/hash64.go
index c3f2b9b..d128382 100644
--- a/src/runtime/hash64.go
+++ b/src/runtime/hash64.go
@@ -6,7 +6,7 @@
// xxhash: https://code.google.com/p/xxhash/
// cityhash: https://code.google.com/p/cityhash/
-// +build amd64 amd64p32 arm64 mips64 mips64le ppc64 ppc64le s390x wasm
+// +build amd64 arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x wasm
package runtime
@@ -20,11 +20,7 @@
m4 = 15839092249703872147
)
-func memhash(p unsafe.Pointer, seed, s uintptr) uintptr {
- if (GOARCH == "amd64" || GOARCH == "arm64") &&
- GOOS != "nacl" && useAeshash {
- return aeshash(p, seed, s)
- }
+func memhashFallback(p unsafe.Pointer, seed, s uintptr) uintptr {
h := uint64(seed + s*hashkey[0])
tail:
switch {
@@ -82,7 +78,7 @@
return uintptr(h)
}
-func memhash32(p unsafe.Pointer, seed uintptr) uintptr {
+func memhash32Fallback(p unsafe.Pointer, seed uintptr) uintptr {
h := uint64(seed + 4*hashkey[0])
v := uint64(readUnaligned32(p))
h ^= v
@@ -94,7 +90,7 @@
return uintptr(h)
}
-func memhash64(p unsafe.Pointer, seed uintptr) uintptr {
+func memhash64Fallback(p unsafe.Pointer, seed uintptr) uintptr {
h := uint64(seed + 8*hashkey[0])
h ^= uint64(readUnaligned32(p)) | uint64(readUnaligned32(add(p, 4)))<<32
h = rotl_31(h*m1) * m2
diff --git a/src/runtime/hash_test.go b/src/runtime/hash_test.go
index fe25a7f..655ca18 100644
--- a/src/runtime/hash_test.go
+++ b/src/runtime/hash_test.go
@@ -8,6 +8,7 @@
"fmt"
"math"
"math/rand"
+ "reflect"
. "runtime"
"strings"
"testing"
@@ -48,6 +49,54 @@
}
}
+func TestCompilerVsRuntimeHash(t *testing.T) {
+ // Test to make sure the compiler's hash function and the runtime's hash function agree.
+ // See issue 37716.
+ for _, m := range []interface{}{
+ map[bool]int{},
+ map[int8]int{},
+ map[uint8]int{},
+ map[int16]int{},
+ map[uint16]int{},
+ map[int32]int{},
+ map[uint32]int{},
+ map[int64]int{},
+ map[uint64]int{},
+ map[int]int{},
+ map[uint]int{},
+ map[uintptr]int{},
+ map[*byte]int{},
+ map[chan int]int{},
+ map[unsafe.Pointer]int{},
+ map[float32]int{},
+ map[float64]int{},
+ map[complex64]int{},
+ map[complex128]int{},
+ map[string]int{},
+ //map[interface{}]int{},
+ //map[interface{F()}]int{},
+ map[[8]uint64]int{},
+ map[[8]string]int{},
+ map[struct{ a, b, c, d int32 }]int{}, // Note: tests AMEM128
+ map[struct{ a, b, _, d int32 }]int{},
+ map[struct {
+ a, b int32
+ c float32
+ d, e [8]byte
+ }]int{},
+ map[struct {
+ a int16
+ b int64
+ }]int{},
+ } {
+ k := reflect.New(reflect.TypeOf(m).Key()).Elem().Interface() // the zero key
+ x, y := MapHashCheck(m, k)
+ if x != y {
+ t.Errorf("hashes did not match (%x vs %x) for map %T", x, y, m)
+ }
+ }
+}
+
// Smhasher is a torture test for hash functions.
// https://code.google.com/p/smhasher/
// This code is a port of some of the Smhasher tests to Go.
diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go
index 992df63..cfd5c25 100644
--- a/src/runtime/heapdump.go
+++ b/src/runtime/heapdump.go
@@ -371,7 +371,12 @@
dumpint(uint64(d.sp))
dumpint(uint64(d.pc))
dumpint(uint64(uintptr(unsafe.Pointer(d.fn))))
- dumpint(uint64(uintptr(unsafe.Pointer(d.fn.fn))))
+ if d.fn == nil {
+ // d.fn can be nil for open-coded defers
+ dumpint(uint64(0))
+ } else {
+ dumpint(uint64(uintptr(unsafe.Pointer(d.fn.fn))))
+ }
dumpint(uint64(uintptr(unsafe.Pointer(d.link))))
}
for p := gp._panic; p != nil; p = p.link {
@@ -430,7 +435,7 @@
// mspan.types
for _, s := range mheap_.allspans {
- if s.state == mSpanInUse {
+ if s.state.get() == mSpanInUse {
// Finalizers
for sp := s.specials; sp != nil; sp = sp.next {
if sp.kind != _KindSpecialFinalizer {
@@ -453,7 +458,7 @@
func dumpobjs() {
for _, s := range mheap_.allspans {
- if s.state != mSpanInUse {
+ if s.state.get() != mSpanInUse {
continue
}
p := s.base()
@@ -616,7 +621,7 @@
func dumpmemprof() {
iterate_memprof(dumpmemprof_callback)
for _, s := range mheap_.allspans {
- if s.state != mSpanInUse {
+ if s.state.get() != mSpanInUse {
continue
}
for sp := s.specials; sp != nil; sp = sp.next {
@@ -637,7 +642,7 @@
func mdump() {
// make sure we're done sweeping
for _, s := range mheap_.allspans {
- if s.state == mSpanInUse {
+ if s.state.get() == mSpanInUse {
s.ensureSwept()
}
}
diff --git a/src/runtime/iface.go b/src/runtime/iface.go
index bb4eccc..0504b89 100644
--- a/src/runtime/iface.go
+++ b/src/runtime/iface.go
@@ -66,6 +66,12 @@
m = (*itab)(persistentalloc(unsafe.Sizeof(itab{})+uintptr(len(inter.mhdr)-1)*sys.PtrSize, 0, &memstats.other_sys))
m.inter = inter
m._type = typ
+ // The hash is used in type switches. However, compiler statically generates itab's
+ // for all interface/type pairs used in switches (which are added to itabTable
+ // in itabsinit). The dynamically-generated itab's never participate in type switches,
+ // and thus the hash is irrelevant.
+ // Note: m.hash is _not_ the hash used for the runtime itabTable hash table.
+ m.hash = 0
m.init()
itabAdd(m)
unlock(&itabLock)
@@ -233,11 +239,11 @@
return iname
}
m.fun[0] = uintptr(fun0)
- m.hash = typ.hash
return ""
}
func itabsinit() {
+ lockInit(&itabLock, lockRankItab)
lock(&itabLock)
for _, md := range activeModules() {
for _, i := range md.itablinks {
@@ -295,11 +301,11 @@
stringEface interface{} = stringInterfacePtr("")
sliceEface interface{} = sliceInterfacePtr(nil)
- uint16Type *_type = (*eface)(unsafe.Pointer(&uint16Eface))._type
- uint32Type *_type = (*eface)(unsafe.Pointer(&uint32Eface))._type
- uint64Type *_type = (*eface)(unsafe.Pointer(&uint64Eface))._type
- stringType *_type = (*eface)(unsafe.Pointer(&stringEface))._type
- sliceType *_type = (*eface)(unsafe.Pointer(&sliceEface))._type
+ uint16Type *_type = efaceOf(&uint16Eface)._type
+ uint32Type *_type = efaceOf(&uint32Eface)._type
+ uint64Type *_type = efaceOf(&uint64Eface)._type
+ stringType *_type = efaceOf(&stringEface)._type
+ sliceType *_type = efaceOf(&sliceEface)._type
)
// The conv and assert functions below do very similar things.
@@ -326,8 +332,11 @@
}
func convT16(val uint16) (x unsafe.Pointer) {
- if val == 0 {
- x = unsafe.Pointer(&zeroVal[0])
+ if val < uint16(len(staticuint64s)) {
+ x = unsafe.Pointer(&staticuint64s[val])
+ if sys.BigEndian {
+ x = add(x, 6)
+ }
} else {
x = mallocgc(2, uint16Type, false)
*(*uint16)(x) = val
@@ -336,8 +345,11 @@
}
func convT32(val uint32) (x unsafe.Pointer) {
- if val == 0 {
- x = unsafe.Pointer(&zeroVal[0])
+ if val < uint32(len(staticuint64s)) {
+ x = unsafe.Pointer(&staticuint64s[val])
+ if sys.BigEndian {
+ x = add(x, 4)
+ }
} else {
x = mallocgc(4, uint32Type, false)
*(*uint32)(x) = val
@@ -346,8 +358,8 @@
}
func convT64(val uint64) (x unsafe.Pointer) {
- if val == 0 {
- x = unsafe.Pointer(&zeroVal[0])
+ if val < uint64(len(staticuint64s)) {
+ x = unsafe.Pointer(&staticuint64s[val])
} else {
x = mallocgc(8, uint64Type, false)
*(*uint64)(x) = val
@@ -516,8 +528,8 @@
}
}
-// staticbytes is used to avoid convT2E for byte-sized values.
-var staticbytes = [...]byte{
+// staticuint64s is used to avoid allocating in convTx for small integer values.
+var staticuint64s = [...]uint64{
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
diff --git a/src/runtime/iface_test.go b/src/runtime/iface_test.go
index 6d8f861..4fab6c9 100644
--- a/src/runtime/iface_test.go
+++ b/src/runtime/iface_test.go
@@ -95,6 +95,19 @@
}
}
+func BenchmarkConvT2EByteSized(b *testing.B) {
+ b.Run("bool", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ e = yes
+ }
+ })
+ b.Run("uint8", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ e = eight8
+ }
+ })
+}
+
func BenchmarkConvT2ESmall(b *testing.B) {
for i := 0; i < b.N; i++ {
e = ts
@@ -310,18 +323,22 @@
var (
eight8 uint8 = 8
eight8I T8 = 8
+ yes bool = true
- zero16 uint16 = 0
- zero16I T16 = 0
- one16 uint16 = 1
+ zero16 uint16 = 0
+ zero16I T16 = 0
+ one16 uint16 = 1
+ thousand16 uint16 = 1000
- zero32 uint32 = 0
- zero32I T32 = 0
- one32 uint32 = 1
+ zero32 uint32 = 0
+ zero32I T32 = 0
+ one32 uint32 = 1
+ thousand32 uint32 = 1000
- zero64 uint64 = 0
- zero64I T64 = 0
- one64 uint64 = 1
+ zero64 uint64 = 0
+ zero64I T64 = 0
+ one64 uint64 = 1
+ thousand64 uint64 = 1000
zerostr string = ""
zerostrI Tstr = ""
@@ -369,6 +386,23 @@
})
})
b.Run("nonzero", func(b *testing.B) {
+ b.Run("str", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ e = nzstr
+ }
+ })
+ b.Run("slice", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ e = nzslice
+ }
+ })
+ b.Run("big", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ e = nzbig
+ }
+ })
+ })
+ b.Run("smallint", func(b *testing.B) {
b.Run("16", func(b *testing.B) {
for i := 0; i < b.N; i++ {
e = one16
@@ -384,19 +418,21 @@
e = one64
}
})
- b.Run("str", func(b *testing.B) {
+ })
+ b.Run("largeint", func(b *testing.B) {
+ b.Run("16", func(b *testing.B) {
for i := 0; i < b.N; i++ {
- e = nzstr
+ e = thousand16
}
})
- b.Run("slice", func(b *testing.B) {
+ b.Run("32", func(b *testing.B) {
for i := 0; i < b.N; i++ {
- e = nzslice
+ e = thousand32
}
})
- b.Run("big", func(b *testing.B) {
+ b.Run("64", func(b *testing.B) {
for i := 0; i < b.N; i++ {
- e = nzbig
+ e = thousand64
}
})
})
diff --git a/src/runtime/internal/atomic/asm_386.s b/src/runtime/internal/atomic/asm_386.s
index 13289a8..9b9dc14 100644
--- a/src/runtime/internal/atomic/asm_386.s
+++ b/src/runtime/internal/atomic/asm_386.s
@@ -229,3 +229,9 @@
LOCK
ANDB BX, (AX)
RET
+
+TEXT runtime∕internal∕atomic·Store8(SB), NOSPLIT, $0-5
+ MOVL ptr+0(FP), BX
+ MOVB val+4(FP), AX
+ XCHGB AX, 0(BX)
+ RET
diff --git a/src/runtime/internal/atomic/asm_amd64.s b/src/runtime/internal/atomic/asm_amd64.s
index e18aee7..90c5642 100644
--- a/src/runtime/internal/atomic/asm_amd64.s
+++ b/src/runtime/internal/atomic/asm_amd64.s
@@ -136,6 +136,12 @@
TEXT runtime∕internal∕atomic·StoreRel(SB), NOSPLIT, $0-12
JMP runtime∕internal∕atomic·Store(SB)
+TEXT runtime∕internal∕atomic·Store8(SB), NOSPLIT, $0-9
+ MOVQ ptr+0(FP), BX
+ MOVB val+8(FP), AX
+ XCHGB AX, 0(BX)
+ RET
+
TEXT runtime∕internal∕atomic·Store64(SB), NOSPLIT, $0-16
MOVQ ptr+0(FP), BX
MOVQ val+8(FP), AX
diff --git a/src/runtime/internal/atomic/asm_amd64p32.s b/src/runtime/internal/atomic/asm_amd64p32.s
deleted file mode 100644
index 35b5ef2..0000000
--- a/src/runtime/internal/atomic/asm_amd64p32.s
+++ /dev/null
@@ -1,159 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-// bool Cas(int32 *val, int32 old, int32 new)
-// Atomically:
-// if(*val == old){
-// *val = new;
-// return 1;
-// } else
-// return 0;
-TEXT runtime∕internal∕atomic·Cas(SB), NOSPLIT, $0-17
- MOVL ptr+0(FP), BX
- MOVL old+4(FP), AX
- MOVL new+8(FP), CX
- LOCK
- CMPXCHGL CX, 0(BX)
- SETEQ ret+16(FP)
- RET
-
-TEXT runtime∕internal∕atomic·Casuintptr(SB), NOSPLIT, $0-17
- JMP runtime∕internal∕atomic·Cas(SB)
-
-TEXT runtime∕internal∕atomic·CasRel(SB), NOSPLIT, $0-17
- JMP runtime∕internal∕atomic·Cas(SB)
-
-TEXT runtime∕internal∕atomic·Loaduintptr(SB), NOSPLIT, $0-12
- JMP runtime∕internal∕atomic·Load(SB)
-
-TEXT runtime∕internal∕atomic·Loaduint(SB), NOSPLIT, $0-12
- JMP runtime∕internal∕atomic·Load(SB)
-
-TEXT runtime∕internal∕atomic·Storeuintptr(SB), NOSPLIT, $0-8
- JMP runtime∕internal∕atomic·Store(SB)
-
-TEXT runtime∕internal∕atomic·Loadint64(SB), NOSPLIT, $0-16
- JMP runtime∕internal∕atomic·Load64(SB)
-
-TEXT runtime∕internal∕atomic·Xaddint64(SB), NOSPLIT, $0-24
- JMP runtime∕internal∕atomic·Xadd64(SB)
-
-// bool runtime∕internal∕atomic·cas64(uint64 *val, uint64 old, uint64 new)
-// Atomically:
-// if(*val == *old){
-// *val = new;
-// return 1;
-// } else {
-// return 0;
-// }
-TEXT runtime∕internal∕atomic·Cas64(SB), NOSPLIT, $0-25
- MOVL ptr+0(FP), BX
- MOVQ old+8(FP), AX
- MOVQ new+16(FP), CX
- LOCK
- CMPXCHGQ CX, 0(BX)
- SETEQ ret+24(FP)
- RET
-
-// bool Casp1(void **val, void *old, void *new)
-// Atomically:
-// if(*val == old){
-// *val = new;
-// return 1;
-// } else
-// return 0;
-TEXT runtime∕internal∕atomic·Casp1(SB), NOSPLIT, $0-17
- MOVL ptr+0(FP), BX
- MOVL old+4(FP), AX
- MOVL new+8(FP), CX
- LOCK
- CMPXCHGL CX, 0(BX)
- SETEQ ret+16(FP)
- RET
-
-// uint32 Xadd(uint32 volatile *val, int32 delta)
-// Atomically:
-// *val += delta;
-// return *val;
-TEXT runtime∕internal∕atomic·Xadd(SB), NOSPLIT, $0-12
- MOVL ptr+0(FP), BX
- MOVL delta+4(FP), AX
- MOVL AX, CX
- LOCK
- XADDL AX, 0(BX)
- ADDL CX, AX
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime∕internal∕atomic·Xadd64(SB), NOSPLIT, $0-24
- MOVL ptr+0(FP), BX
- MOVQ delta+8(FP), AX
- MOVQ AX, CX
- LOCK
- XADDQ AX, 0(BX)
- ADDQ CX, AX
- MOVQ AX, ret+16(FP)
- RET
-
-TEXT runtime∕internal∕atomic·Xadduintptr(SB), NOSPLIT, $0-12
- JMP runtime∕internal∕atomic·Xadd(SB)
-
-TEXT runtime∕internal∕atomic·Xchg(SB), NOSPLIT, $0-12
- MOVL ptr+0(FP), BX
- MOVL new+4(FP), AX
- XCHGL AX, 0(BX)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime∕internal∕atomic·Xchg64(SB), NOSPLIT, $0-24
- MOVL ptr+0(FP), BX
- MOVQ new+8(FP), AX
- TESTL $7, BX
- JZ 2(PC)
- MOVL 0, BX // crash when unaligned
- XCHGQ AX, 0(BX)
- MOVQ AX, ret+16(FP)
- RET
-
-TEXT runtime∕internal∕atomic·Xchguintptr(SB), NOSPLIT, $0-12
- JMP runtime∕internal∕atomic·Xchg(SB)
-
-TEXT runtime∕internal∕atomic·StorepNoWB(SB), NOSPLIT, $0-8
- MOVL ptr+0(FP), BX
- MOVL val+4(FP), AX
- XCHGL AX, 0(BX)
- RET
-
-TEXT runtime∕internal∕atomic·Store(SB), NOSPLIT, $0-8
- MOVL ptr+0(FP), BX
- MOVL val+4(FP), AX
- XCHGL AX, 0(BX)
- RET
-
-TEXT runtime∕internal∕atomic·StoreRel(SB), NOSPLIT, $0-8
- JMP runtime∕internal∕atomic·Store(SB)
-
-TEXT runtime∕internal∕atomic·Store64(SB), NOSPLIT, $0-16
- MOVL ptr+0(FP), BX
- MOVQ val+8(FP), AX
- XCHGQ AX, 0(BX)
- RET
-
-// void runtime∕internal∕atomic·Or8(byte volatile*, byte);
-TEXT runtime∕internal∕atomic·Or8(SB), NOSPLIT, $0-5
- MOVL ptr+0(FP), BX
- MOVB val+4(FP), AX
- LOCK
- ORB AX, 0(BX)
- RET
-
-// void runtime∕internal∕atomic·And8(byte volatile*, byte);
-TEXT runtime∕internal∕atomic·And8(SB), NOSPLIT, $0-5
- MOVL ptr+0(FP), BX
- MOVB val+4(FP), AX
- LOCK
- ANDB AX, 0(BX)
- RET
diff --git a/src/runtime/internal/atomic/asm_mips64x.s b/src/runtime/internal/atomic/asm_mips64x.s
index 9cb1037..3290fb7 100644
--- a/src/runtime/internal/atomic/asm_mips64x.s
+++ b/src/runtime/internal/atomic/asm_mips64x.s
@@ -166,6 +166,14 @@
SYNC
RET
+TEXT ·Store8(SB), NOSPLIT, $0-9
+ MOVV ptr+0(FP), R1
+ MOVB val+8(FP), R2
+ SYNC
+ MOVB R2, 0(R1)
+ SYNC
+ RET
+
TEXT ·Store64(SB), NOSPLIT, $0-16
MOVV ptr+0(FP), R1
MOVV val+8(FP), R2
diff --git a/src/runtime/internal/atomic/asm_mipsx.s b/src/runtime/internal/atomic/asm_mipsx.s
index af6bce5..62811a6 100644
--- a/src/runtime/internal/atomic/asm_mipsx.s
+++ b/src/runtime/internal/atomic/asm_mipsx.s
@@ -32,6 +32,14 @@
SYNC
RET
+TEXT ·Store8(SB),NOSPLIT,$0-5
+ MOVW ptr+0(FP), R1
+ MOVB val+4(FP), R2
+ SYNC
+ MOVB R2, 0(R1)
+ SYNC
+ RET
+
TEXT ·Load(SB),NOSPLIT,$0-8
MOVW ptr+0(FP), R1
SYNC
diff --git a/src/runtime/internal/atomic/asm_ppc64x.s b/src/runtime/internal/atomic/asm_ppc64x.s
index 052b031..06dc931 100644
--- a/src/runtime/internal/atomic/asm_ppc64x.s
+++ b/src/runtime/internal/atomic/asm_ppc64x.s
@@ -170,6 +170,13 @@
MOVW R4, 0(R3)
RET
+TEXT runtime∕internal∕atomic·Store8(SB), NOSPLIT, $0-9
+ MOVD ptr+0(FP), R3
+ MOVB val+8(FP), R4
+ SYNC
+ MOVB R4, 0(R3)
+ RET
+
TEXT runtime∕internal∕atomic·Store64(SB), NOSPLIT, $0-16
MOVD ptr+0(FP), R3
MOVD val+8(FP), R4
diff --git a/src/runtime/internal/atomic/asm_s390x.s b/src/runtime/internal/atomic/asm_s390x.s
index 084f5b5..9a19bc0 100644
--- a/src/runtime/internal/atomic/asm_s390x.s
+++ b/src/runtime/internal/atomic/asm_s390x.s
@@ -12,6 +12,14 @@
SYNC
RET
+// func Store8(ptr *uint8, val uint8)
+TEXT ·Store8(SB), NOSPLIT, $0
+ MOVD ptr+0(FP), R2
+ MOVB val+8(FP), R3
+ MOVB R3, 0(R2)
+ SYNC
+ RET
+
// func Store64(ptr *uint64, val uint64)
TEXT ·Store64(SB), NOSPLIT, $0
MOVD ptr+0(FP), R2
@@ -168,37 +176,27 @@
TEXT ·Or8(SB), NOSPLIT, $0-9
MOVD ptr+0(FP), R3
MOVBZ val+8(FP), R4
- // Calculate shift.
- MOVD R3, R5
- AND $3, R5
- XOR $3, R5 // big endian - flip direction
- SLD $3, R5 // MUL $8, R5
- SLD R5, R4
- // Align ptr down to 4 bytes so we can use 32-bit load/store.
- AND $-4, R3
- MOVWZ 0(R3), R6
-again:
- OR R4, R6, R7
- CS R6, R7, 0(R3) // if R6==(R3) then (R3)=R7 else R6=(R3)
- BNE again
+ // We don't have atomic operations that work on individual bytes so we
+ // need to align addr down to a word boundary and create a mask
+ // containing v to OR with the entire word atomically.
+ MOVD $(3<<3), R5
+ RXSBG $59, $60, $3, R3, R5 // R5 = 24 - ((addr % 4) * 8) = ((addr & 3) << 3) ^ (3 << 3)
+ ANDW $~3, R3 // R3 = floor(addr, 4) = addr &^ 3
+ SLW R5, R4 // R4 = uint32(v) << R5
+ LAO R4, R6, 0(R3) // R6 = *R3; *R3 |= R4; (atomic)
RET
// func And8(addr *uint8, v uint8)
TEXT ·And8(SB), NOSPLIT, $0-9
MOVD ptr+0(FP), R3
MOVBZ val+8(FP), R4
- // Calculate shift.
- MOVD R3, R5
- AND $3, R5
- XOR $3, R5 // big endian - flip direction
- SLD $3, R5 // MUL $8, R5
- OR $-256, R4 // create 0xffffffffffffffxx
- RLLG R5, R4
- // Align ptr down to 4 bytes so we can use 32-bit load/store.
- AND $-4, R3
- MOVWZ 0(R3), R6
-again:
- AND R4, R6, R7
- CS R6, R7, 0(R3) // if R6==(R3) then (R3)=R7 else R6=(R3)
- BNE again
+ // We don't have atomic operations that work on individual bytes so we
+ // need to align addr down to a word boundary and create a mask
+ // containing v to AND with the entire word atomically.
+ ORW $~0xff, R4 // R4 = uint32(v) | 0xffffff00
+ MOVD $(3<<3), R5
+ RXSBG $59, $60, $3, R3, R5 // R5 = 24 - ((addr % 4) * 8) = ((addr & 3) << 3) ^ (3 << 3)
+ ANDW $~3, R3 // R3 = floor(addr, 4) = addr &^ 3
+ RLL R5, R4, R4 // R4 = rotl(R4, R5)
+ LAN R4, R6, 0(R3) // R6 = *R3; *R3 &= R4; (atomic)
RET
diff --git a/src/runtime/internal/atomic/atomic_386.go b/src/runtime/internal/atomic/atomic_386.go
index d7f82cc..8d002eb 100644
--- a/src/runtime/internal/atomic/atomic_386.go
+++ b/src/runtime/internal/atomic/atomic_386.go
@@ -75,6 +75,9 @@
func Store(ptr *uint32, val uint32)
//go:noescape
+func Store8(ptr *uint8, val uint8)
+
+//go:noescape
func Store64(ptr *uint64, val uint64)
//go:noescape
diff --git a/src/runtime/internal/atomic/atomic_amd64x.go b/src/runtime/internal/atomic/atomic_amd64.go
similarity index 96%
rename from src/runtime/internal/atomic/atomic_amd64x.go
rename to src/runtime/internal/atomic/atomic_amd64.go
index 31c1636..14b8101 100644
--- a/src/runtime/internal/atomic/atomic_amd64x.go
+++ b/src/runtime/internal/atomic/atomic_amd64.go
@@ -2,8 +2,6 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build amd64 amd64p32
-
package atomic
import "unsafe"
@@ -79,6 +77,9 @@
func Store(ptr *uint32, val uint32)
//go:noescape
+func Store8(ptr *uint8, val uint8)
+
+//go:noescape
func Store64(ptr *uint64, val uint64)
//go:noescape
diff --git a/src/runtime/internal/atomic/atomic_arm.go b/src/runtime/internal/atomic/atomic_arm.go
index c1fc1f7..95713af 100644
--- a/src/runtime/internal/atomic/atomic_arm.go
+++ b/src/runtime/internal/atomic/atomic_arm.go
@@ -210,4 +210,7 @@
func Load64(addr *uint64) uint64
//go:noescape
+func Store8(addr *uint8, v uint8)
+
+//go:noescape
func Store64(addr *uint64, v uint64)
diff --git a/src/runtime/internal/atomic/atomic_arm64.go b/src/runtime/internal/atomic/atomic_arm64.go
index 0182f30..26ca94d 100644
--- a/src/runtime/internal/atomic/atomic_arm64.go
+++ b/src/runtime/internal/atomic/atomic_arm64.go
@@ -57,6 +57,9 @@
func Store(ptr *uint32, val uint32)
//go:noescape
+func Store8(ptr *uint8, val uint8)
+
+//go:noescape
func Store64(ptr *uint64, val uint64)
// NO go:noescape annotation; see atomic_pointer.go.
diff --git a/src/runtime/internal/atomic/atomic_arm64.s b/src/runtime/internal/atomic/atomic_arm64.s
index a7e8c35..a2eb756 100644
--- a/src/runtime/internal/atomic/atomic_arm64.s
+++ b/src/runtime/internal/atomic/atomic_arm64.s
@@ -48,6 +48,12 @@
STLRW R1, (R0)
RET
+TEXT runtime∕internal∕atomic·Store8(SB), NOSPLIT, $0-9
+ MOVD ptr+0(FP), R0
+ MOVB val+8(FP), R1
+ STLRB R1, (R0)
+ RET
+
TEXT runtime∕internal∕atomic·Store64(SB), NOSPLIT, $0-16
MOVD ptr+0(FP), R0
MOVD val+8(FP), R1
@@ -55,9 +61,9 @@
RET
TEXT runtime∕internal∕atomic·Xchg(SB), NOSPLIT, $0-20
-again:
MOVD ptr+0(FP), R0
MOVW new+8(FP), R1
+again:
LDAXRW (R0), R2
STLXRW R1, (R0), R3
CBNZ R3, again
@@ -65,9 +71,9 @@
RET
TEXT runtime∕internal∕atomic·Xchg64(SB), NOSPLIT, $0-24
-again:
MOVD ptr+0(FP), R0
MOVD new+8(FP), R1
+again:
LDAXR (R0), R2
STLXR R1, (R0), R3
CBNZ R3, again
@@ -102,9 +108,9 @@
// *val += delta;
// return *val;
TEXT runtime∕internal∕atomic·Xadd(SB), NOSPLIT, $0-20
-again:
MOVD ptr+0(FP), R0
MOVW delta+8(FP), R1
+again:
LDAXRW (R0), R2
ADDW R2, R1, R2
STLXRW R2, (R0), R3
@@ -113,9 +119,9 @@
RET
TEXT runtime∕internal∕atomic·Xadd64(SB), NOSPLIT, $0-24
-again:
MOVD ptr+0(FP), R0
MOVD delta+8(FP), R1
+again:
LDAXR (R0), R2
ADD R2, R1, R2
STLXR R2, (R0), R3
diff --git a/src/runtime/internal/atomic/atomic_mips64x.go b/src/runtime/internal/atomic/atomic_mips64x.go
index ce11e38..1d99778 100644
--- a/src/runtime/internal/atomic/atomic_mips64x.go
+++ b/src/runtime/internal/atomic/atomic_mips64x.go
@@ -59,6 +59,9 @@
func Store(ptr *uint32, val uint32)
//go:noescape
+func Store8(ptr *uint8, val uint8)
+
+//go:noescape
func Store64(ptr *uint64, val uint64)
// NO go:noescape annotation; see atomic_pointer.go.
diff --git a/src/runtime/internal/atomic/atomic_mipsx.go b/src/runtime/internal/atomic/atomic_mipsx.go
index 6e39262..0e2d77a 100644
--- a/src/runtime/internal/atomic/atomic_mipsx.go
+++ b/src/runtime/internal/atomic/atomic_mipsx.go
@@ -141,6 +141,9 @@
//go:noescape
func Store(ptr *uint32, val uint32)
+//go:noescape
+func Store8(ptr *uint8, val uint8)
+
// NO go:noescape annotation; see atomic_pointer.go.
func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
diff --git a/src/runtime/internal/atomic/atomic_ppc64x.go b/src/runtime/internal/atomic/atomic_ppc64x.go
index 13805a5..a48ecf5 100644
--- a/src/runtime/internal/atomic/atomic_ppc64x.go
+++ b/src/runtime/internal/atomic/atomic_ppc64x.go
@@ -59,6 +59,9 @@
func Store(ptr *uint32, val uint32)
//go:noescape
+func Store8(ptr *uint8, val uint8)
+
+//go:noescape
func Store64(ptr *uint64, val uint64)
//go:noescape
diff --git a/src/runtime/internal/atomic/atomic_riscv64.go b/src/runtime/internal/atomic/atomic_riscv64.go
new file mode 100644
index 0000000..d525123
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_riscv64.go
@@ -0,0 +1,67 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package atomic
+
+import "unsafe"
+
+//go:noescape
+func Xadd(ptr *uint32, delta int32) uint32
+
+//go:noescape
+func Xadd64(ptr *uint64, delta int64) uint64
+
+//go:noescape
+func Xadduintptr(ptr *uintptr, delta uintptr) uintptr
+
+//go:noescape
+func Xchg(ptr *uint32, new uint32) uint32
+
+//go:noescape
+func Xchg64(ptr *uint64, new uint64) uint64
+
+//go:noescape
+func Xchguintptr(ptr *uintptr, new uintptr) uintptr
+
+//go:noescape
+func Load(ptr *uint32) uint32
+
+//go:noescape
+func Load8(ptr *uint8) uint8
+
+//go:noescape
+func Load64(ptr *uint64) uint64
+
+// NO go:noescape annotation; *ptr escapes if result escapes (#31525)
+func Loadp(ptr unsafe.Pointer) unsafe.Pointer
+
+//go:noescape
+func LoadAcq(ptr *uint32) uint32
+
+//go:noescape
+func Or8(ptr *uint8, val uint8)
+
+//go:noescape
+func And8(ptr *uint8, val uint8)
+
+//go:noescape
+func Cas64(ptr *uint64, old, new uint64) bool
+
+//go:noescape
+func CasRel(ptr *uint32, old, new uint32) bool
+
+//go:noescape
+func Store(ptr *uint32, val uint32)
+
+//go:noescape
+func Store8(ptr *uint8, val uint8)
+
+//go:noescape
+func Store64(ptr *uint64, val uint64)
+
+// NO go:noescape annotation; see atomic_pointer.go.
+func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
+
+//go:noescape
+func StoreRel(ptr *uint32, val uint32)
diff --git a/src/runtime/internal/atomic/atomic_riscv64.s b/src/runtime/internal/atomic/atomic_riscv64.s
new file mode 100644
index 0000000..d005325
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_riscv64.s
@@ -0,0 +1,232 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// RISC-V's atomic operations have two bits, aq ("acquire") and rl ("release"),
+// which may be toggled on and off. Their precise semantics are defined in
+// section 6.3 of the specification, but the basic idea is as follows:
+//
+// - If neither aq nor rl is set, the CPU may reorder the atomic arbitrarily.
+// It guarantees only that it will execute atomically.
+//
+// - If aq is set, the CPU may move the instruction backward, but not forward.
+//
+// - If rl is set, the CPU may move the instruction forward, but not backward.
+//
+// - If both are set, the CPU may not reorder the instruction at all.
+//
+// These four modes correspond to other well-known memory models on other CPUs.
+// On ARM, aq corresponds to a dmb ishst, aq+rl corresponds to a dmb ish. On
+// Intel, aq corresponds to an lfence, rl to an sfence, and aq+rl to an mfence
+// (or a lock prefix).
+//
+// Go's memory model requires that
+// - if a read happens after a write, the read must observe the write, and
+// that
+// - if a read happens concurrently with a write, the read may observe the
+// write.
+// aq is sufficient to guarantee this, so that's what we use here. (This jibes
+// with ARM, which uses dmb ishst.)
+
+#include "textflag.h"
+
+// Atomically:
+// if(*val == *old){
+// *val = new;
+// return 1;
+// } else {
+// return 0;
+// }
+
+TEXT ·Cas(SB), NOSPLIT, $0-17
+ MOV ptr+0(FP), A0
+ MOVW old+8(FP), A1
+ MOVW new+12(FP), A2
+cas_again:
+ LRW (A0), A3
+ BNE A3, A1, cas_fail
+ SCW A2, (A0), A4
+ BNE A4, ZERO, cas_again
+ MOV $1, A0
+ MOVB A0, ret+16(FP)
+ RET
+cas_fail:
+ MOV $0, A0
+ MOV A0, ret+16(FP)
+ RET
+
+// func Cas64(ptr *uint64, old, new uint64) bool
+TEXT ·Cas64(SB), NOSPLIT, $0-25
+ MOV ptr+0(FP), A0
+ MOV old+8(FP), A1
+ MOV new+16(FP), A2
+cas_again:
+ LRD (A0), A3
+ BNE A3, A1, cas_fail
+ SCD A2, (A0), A4
+ BNE A4, ZERO, cas_again
+ MOV $1, A0
+ MOVB A0, ret+24(FP)
+ RET
+cas_fail:
+ MOVB ZERO, ret+24(FP)
+ RET
+
+// func Load(ptr *uint32) uint32
+TEXT ·Load(SB),NOSPLIT|NOFRAME,$0-12
+ MOV ptr+0(FP), A0
+ LRW (A0), A0
+ MOVW A0, ret+8(FP)
+ RET
+
+// func Load8(ptr *uint8) uint8
+TEXT ·Load8(SB),NOSPLIT|NOFRAME,$0-9
+ MOV ptr+0(FP), A0
+ FENCE
+ MOVBU (A0), A1
+ FENCE
+ MOVB A1, ret+8(FP)
+ RET
+
+// func Load64(ptr *uint64) uint64
+TEXT ·Load64(SB),NOSPLIT|NOFRAME,$0-16
+ MOV ptr+0(FP), A0
+ LRD (A0), A0
+ MOV A0, ret+8(FP)
+ RET
+
+// func Store(ptr *uint32, val uint32)
+TEXT ·Store(SB), NOSPLIT, $0-12
+ MOV ptr+0(FP), A0
+ MOVW val+8(FP), A1
+ AMOSWAPW A1, (A0), ZERO
+ RET
+
+// func Store8(ptr *uint8, val uint8)
+TEXT ·Store8(SB), NOSPLIT, $0-9
+ MOV ptr+0(FP), A0
+ MOVBU val+8(FP), A1
+ FENCE
+ MOVB A1, (A0)
+ FENCE
+ RET
+
+// func Store64(ptr *uint64, val uint64)
+TEXT ·Store64(SB), NOSPLIT, $0-16
+ MOV ptr+0(FP), A0
+ MOV val+8(FP), A1
+ AMOSWAPD A1, (A0), ZERO
+ RET
+
+TEXT ·Casp1(SB), NOSPLIT, $0-25
+ JMP ·Cas64(SB)
+
+TEXT ·Casuintptr(SB),NOSPLIT,$0-25
+ JMP ·Cas64(SB)
+
+TEXT ·CasRel(SB), NOSPLIT, $0-17
+ JMP ·Cas(SB)
+
+TEXT ·Loaduintptr(SB),NOSPLIT,$0-16
+ JMP ·Load64(SB)
+
+TEXT ·Storeuintptr(SB),NOSPLIT,$0-16
+ JMP ·Store64(SB)
+
+TEXT ·Loaduint(SB),NOSPLIT,$0-16
+ JMP ·Loaduintptr(SB)
+
+TEXT ·Loadint64(SB),NOSPLIT,$0-16
+ JMP ·Loaduintptr(SB)
+
+TEXT ·Xaddint64(SB),NOSPLIT,$0-24
+ MOV ptr+0(FP), A0
+ MOV delta+8(FP), A1
+ AMOADDD A1, (A0), A0
+ ADD A0, A1, A0
+ MOVW A0, ret+16(FP)
+ RET
+
+TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-12
+ JMP ·Load(SB)
+
+// func Loadp(ptr unsafe.Pointer) unsafe.Pointer
+TEXT ·Loadp(SB),NOSPLIT,$0-16
+ JMP ·Load64(SB)
+
+// func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
+TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
+ JMP ·Store64(SB)
+
+TEXT ·StoreRel(SB), NOSPLIT, $0-12
+ JMP ·Store(SB)
+
+// func Xchg(ptr *uint32, new uint32) uint32
+TEXT ·Xchg(SB), NOSPLIT, $0-20
+ MOV ptr+0(FP), A0
+ MOVW new+8(FP), A1
+ AMOSWAPW A1, (A0), A1
+ MOVW A1, ret+16(FP)
+ RET
+
+// func Xchg64(ptr *uint64, new uint64) uint64
+TEXT ·Xchg64(SB), NOSPLIT, $0-24
+ MOV ptr+0(FP), A0
+ MOV new+8(FP), A1
+ AMOSWAPD A1, (A0), A1
+ MOV A1, ret+16(FP)
+ RET
+
+// Atomically:
+// *val += delta;
+// return *val;
+
+// func Xadd(ptr *uint32, delta int32) uint32
+TEXT ·Xadd(SB), NOSPLIT, $0-20
+ MOV ptr+0(FP), A0
+ MOVW delta+8(FP), A1
+ AMOADDW A1, (A0), A2
+ ADD A2,A1,A0
+ MOVW A0, ret+16(FP)
+ RET
+
+// func Xadd64(ptr *uint64, delta int64) uint64
+TEXT ·Xadd64(SB), NOSPLIT, $0-24
+ MOV ptr+0(FP), A0
+ MOV delta+8(FP), A1
+ AMOADDD A1, (A0), A2
+ ADD A2, A1, A0
+ MOV A0, ret+16(FP)
+ RET
+
+// func Xadduintptr(ptr *uintptr, delta uintptr) uintptr
+TEXT ·Xadduintptr(SB), NOSPLIT, $0-24
+ JMP ·Xadd64(SB)
+
+// func Xchguintptr(ptr *uintptr, new uintptr) uintptr
+TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
+ JMP ·Xchg64(SB)
+
+// func And8(ptr *uint8, val uint8)
+TEXT ·And8(SB), NOSPLIT, $0-9
+ MOV ptr+0(FP), A0
+ MOVBU val+8(FP), A1
+ AND $3, A0, A2
+ AND $-4, A0
+ SLL $3, A2
+ XOR $255, A1
+ SLL A2, A1
+ XOR $-1, A1
+ AMOANDW A1, (A0), ZERO
+ RET
+
+// func Or8(ptr *uint8, val uint8)
+TEXT ·Or8(SB), NOSPLIT, $0-9
+ MOV ptr+0(FP), A0
+ MOVBU val+8(FP), A1
+ AND $3, A0, A2
+ AND $-4, A0
+ SLL $3, A2
+ SLL A2, A1
+ AMOORW A1, (A0), ZERO
+ RET
diff --git a/src/runtime/internal/atomic/atomic_s390x.go b/src/runtime/internal/atomic/atomic_s390x.go
index 25fd890..4d73b39 100644
--- a/src/runtime/internal/atomic/atomic_s390x.go
+++ b/src/runtime/internal/atomic/atomic_s390x.go
@@ -45,6 +45,9 @@
func Store(ptr *uint32, val uint32)
//go:noescape
+func Store8(ptr *uint8, val uint8)
+
+//go:noescape
func Store64(ptr *uint64, val uint64)
// NO go:noescape annotation; see atomic_pointer.go.
diff --git a/src/runtime/internal/atomic/atomic_test.go b/src/runtime/internal/atomic/atomic_test.go
index 0ba7544..0c1125c 100644
--- a/src/runtime/internal/atomic/atomic_test.go
+++ b/src/runtime/internal/atomic/atomic_test.go
@@ -86,14 +86,8 @@
// a continual source of pain. Test that on 32-bit systems they crash
// instead of failing silently.
- switch runtime.GOARCH {
- default:
- if unsafe.Sizeof(int(0)) != 4 {
- t.Skip("test only runs on 32-bit systems")
- }
- case "amd64p32":
- // amd64p32 can handle unaligned atomics.
- t.Skipf("test not needed on %v", runtime.GOARCH)
+ if unsafe.Sizeof(int(0)) != 4 {
+ t.Skip("test only runs on 32-bit systems")
}
x := make([]uint32, 4)
@@ -109,3 +103,120 @@
shouldPanic(t, "Xchg64", func() { atomic.Xchg64(up64, 1) })
shouldPanic(t, "Cas64", func() { atomic.Cas64(up64, 1, 2) })
}
+
+func TestAnd8(t *testing.T) {
+ // Basic sanity check.
+ x := uint8(0xff)
+ for i := uint8(0); i < 8; i++ {
+ atomic.And8(&x, ^(1 << i))
+ if r := uint8(0xff) << (i + 1); x != r {
+ t.Fatalf("clearing bit %#x: want %#x, got %#x", uint8(1<<i), r, x)
+ }
+ }
+
+ // Set every bit in array to 1.
+ a := make([]uint8, 1<<12)
+ for i := range a {
+ a[i] = 0xff
+ }
+
+ // Clear array bit-by-bit in different goroutines.
+ done := make(chan bool)
+ for i := 0; i < 8; i++ {
+ m := ^uint8(1 << i)
+ go func() {
+ for i := range a {
+ atomic.And8(&a[i], m)
+ }
+ done <- true
+ }()
+ }
+ for i := 0; i < 8; i++ {
+ <-done
+ }
+
+ // Check that the array has been totally cleared.
+ for i, v := range a {
+ if v != 0 {
+ t.Fatalf("a[%v] not cleared: want %#x, got %#x", i, uint8(0), v)
+ }
+ }
+}
+
+func TestOr8(t *testing.T) {
+ // Basic sanity check.
+ x := uint8(0)
+ for i := uint8(0); i < 8; i++ {
+ atomic.Or8(&x, 1<<i)
+ if r := (uint8(1) << (i + 1)) - 1; x != r {
+ t.Fatalf("setting bit %#x: want %#x, got %#x", uint8(1)<<i, r, x)
+ }
+ }
+
+ // Start with every bit in array set to 0.
+ a := make([]uint8, 1<<12)
+
+ // Set every bit in array bit-by-bit in different goroutines.
+ done := make(chan bool)
+ for i := 0; i < 8; i++ {
+ m := uint8(1 << i)
+ go func() {
+ for i := range a {
+ atomic.Or8(&a[i], m)
+ }
+ done <- true
+ }()
+ }
+ for i := 0; i < 8; i++ {
+ <-done
+ }
+
+ // Check that the array has been totally set.
+ for i, v := range a {
+ if v != 0xff {
+ t.Fatalf("a[%v] not fully set: want %#x, got %#x", i, uint8(0xff), v)
+ }
+ }
+}
+
+func TestBitwiseContended(t *testing.T) {
+ // Start with every bit in array set to 0.
+ a := make([]uint8, 16)
+
+ // Iterations to try.
+ N := 1 << 16
+ if testing.Short() {
+ N = 1 << 10
+ }
+
+ // Set and then clear every bit in the array bit-by-bit in different goroutines.
+ done := make(chan bool)
+ for i := 0; i < 8; i++ {
+ m := uint8(1 << i)
+ go func() {
+ for n := 0; n < N; n++ {
+ for i := range a {
+ atomic.Or8(&a[i], m)
+ if atomic.Load8(&a[i])&m != m {
+ t.Errorf("a[%v] bit %#x not set", i, m)
+ }
+ atomic.And8(&a[i], ^m)
+ if atomic.Load8(&a[i])&m != 0 {
+ t.Errorf("a[%v] bit %#x not clear", i, m)
+ }
+ }
+ }
+ done <- true
+ }()
+ }
+ for i := 0; i < 8; i++ {
+ <-done
+ }
+
+ // Check that the array has been totally cleared.
+ for i, v := range a {
+ if v != 0 {
+ t.Fatalf("a[%v] not cleared: want %#x, got %#x", i, uint8(0), v)
+ }
+ }
+}
diff --git a/src/runtime/internal/atomic/atomic_wasm.go b/src/runtime/internal/atomic/atomic_wasm.go
index 0731763..9037c2f 100644
--- a/src/runtime/internal/atomic/atomic_wasm.go
+++ b/src/runtime/internal/atomic/atomic_wasm.go
@@ -143,6 +143,12 @@
//go:nosplit
//go:noinline
+func Store8(ptr *uint8, val uint8) {
+ *ptr = val
+}
+
+//go:nosplit
+//go:noinline
func Store64(ptr *uint64, val uint64) {
*ptr = val
}
diff --git a/src/runtime/internal/atomic/bench_test.go b/src/runtime/internal/atomic/bench_test.go
index 083a75c..de71b0f 100644
--- a/src/runtime/internal/atomic/bench_test.go
+++ b/src/runtime/internal/atomic/bench_test.go
@@ -43,6 +43,46 @@
}
}
+func BenchmarkAnd8(b *testing.B) {
+ var x [512]uint8 // give byte its own cache line
+ sink = &x
+ for i := 0; i < b.N; i++ {
+ atomic.And8(&x[255], uint8(i))
+ }
+}
+
+func BenchmarkAnd8Parallel(b *testing.B) {
+ var x [512]uint8 // give byte its own cache line
+ sink = &x
+ b.RunParallel(func(pb *testing.PB) {
+ i := uint8(0)
+ for pb.Next() {
+ atomic.And8(&x[255], i)
+ i++
+ }
+ })
+}
+
+func BenchmarkOr8(b *testing.B) {
+ var x [512]uint8 // give byte its own cache line
+ sink = &x
+ for i := 0; i < b.N; i++ {
+ atomic.Or8(&x[255], uint8(i))
+ }
+}
+
+func BenchmarkOr8Parallel(b *testing.B) {
+ var x [512]uint8 // give byte its own cache line
+ sink = &x
+ b.RunParallel(func(pb *testing.PB) {
+ i := uint8(0)
+ for pb.Next() {
+ atomic.Or8(&x[255], i)
+ i++
+ }
+ })
+}
+
func BenchmarkXadd(b *testing.B) {
var x uint32
ptr := &x
diff --git a/src/runtime/internal/atomic/sys_linux_arm.s b/src/runtime/internal/atomic/sys_linux_arm.s
index df62f6c..192be4b 100644
--- a/src/runtime/internal/atomic/sys_linux_arm.s
+++ b/src/runtime/internal/atomic/sys_linux_arm.s
@@ -29,9 +29,9 @@
CMP $7, R11
BLT 2(PC)
JMP ·armcas(SB)
- JMP ·kernelcas<>(SB)
+ JMP kernelcas<>(SB)
-TEXT runtime∕internal∕atomic·kernelcas<>(SB),NOSPLIT,$0
+TEXT kernelcas<>(SB),NOSPLIT,$0
MOVW ptr+0(FP), R2
// trigger potential paging fault here,
// because we don't know how to traceback through __kuser_cmpxchg
@@ -120,3 +120,25 @@
MOVB R1, ret+4(FP)
RET
+TEXT ·Store8(SB),NOSPLIT,$0-5
+ MOVW addr+0(FP), R1
+ MOVB v+4(FP), R2
+
+ MOVB runtime·goarm(SB), R8
+ CMP $7, R8
+ BGE native_barrier
+ BL memory_barrier<>(SB)
+ B store
+native_barrier:
+ DMB MB_ISH
+
+store:
+ MOVB R2, (R1)
+
+ CMP $7, R8
+ BGE native_barrier2
+ BL memory_barrier<>(SB)
+ RET
+native_barrier2:
+ DMB MB_ISH
+ RET
diff --git a/src/runtime/internal/atomic/sys_nonlinux_arm.s b/src/runtime/internal/atomic/sys_nonlinux_arm.s
index 9d81334..57568b2 100644
--- a/src/runtime/internal/atomic/sys_nonlinux_arm.s
+++ b/src/runtime/internal/atomic/sys_nonlinux_arm.s
@@ -60,3 +60,20 @@
MOVB R1, ret+4(FP)
RET
+
+TEXT ·Store8(SB),NOSPLIT,$0-5
+ MOVW addr+0(FP), R1
+ MOVB v+4(FP), R2
+
+ MOVB runtime·goarm(SB), R8
+ CMP $7, R8
+ BLT 2(PC)
+ DMB MB_ISH
+
+ MOVB R2, (R1)
+
+ CMP $7, R8
+ BLT 2(PC)
+ DMB MB_ISH
+ RET
+
diff --git a/src/runtime/internal/sys/arch.go b/src/runtime/internal/sys/arch.go
index 75beb78..13c00cf 100644
--- a/src/runtime/internal/sys/arch.go
+++ b/src/runtime/internal/sys/arch.go
@@ -14,6 +14,7 @@
MIPS
MIPS64
PPC64
+ RISCV64
S390X
WASM
)
diff --git a/src/runtime/internal/sys/arch_386.go b/src/runtime/internal/sys/arch_386.go
index 3426fd1..b51f70a 100644
--- a/src/runtime/internal/sys/arch_386.go
+++ b/src/runtime/internal/sys/arch_386.go
@@ -7,7 +7,7 @@
const (
ArchFamily = I386
BigEndian = false
- DefaultPhysPageSize = GoosNacl*65536 + (1-GoosNacl)*4096 // 4k normally; 64k on NaCl
+ DefaultPhysPageSize = 4096
PCQuantum = 1
Int64Align = 4
MinFrameSize = 0
diff --git a/src/runtime/internal/sys/arch_amd64p32.go b/src/runtime/internal/sys/arch_amd64p32.go
deleted file mode 100644
index d51c8a5..0000000
--- a/src/runtime/internal/sys/arch_amd64p32.go
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package sys
-
-const (
- ArchFamily = AMD64
- BigEndian = false
- DefaultPhysPageSize = 65536*GoosNacl + 4096*(1-GoosNacl)
- PCQuantum = 1
- Int64Align = 8
- MinFrameSize = 0
-)
-
-type Uintreg uint64
diff --git a/src/runtime/internal/sys/arch_riscv64.go b/src/runtime/internal/sys/arch_riscv64.go
new file mode 100644
index 0000000..7cdcc8f
--- /dev/null
+++ b/src/runtime/internal/sys/arch_riscv64.go
@@ -0,0 +1,18 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package sys
+
+const (
+ ArchFamily = RISCV64
+ BigEndian = false
+ CacheLineSize = 64
+ DefaultPhysPageSize = 4096
+ PCQuantum = 4
+ Int64Align = 8
+ HugePageSize = 1 << 21
+ MinFrameSize = 8
+)
+
+type Uintreg uint64
diff --git a/src/runtime/internal/sys/intrinsics.go b/src/runtime/internal/sys/intrinsics.go
index ad6f0c3..3c88982 100644
--- a/src/runtime/internal/sys/intrinsics.go
+++ b/src/runtime/internal/sys/intrinsics.go
@@ -4,13 +4,16 @@
// +build !386
+// TODO finish intrinsifying 386, deadcode the assembly, remove build tags, merge w/ intrinsics_common
+// TODO replace all uses of CtzXX with TrailingZerosXX; they are the same.
+
package sys
// Using techniques from http://supertech.csail.mit.edu/papers/debruijn.pdf
-const deBruijn64 = 0x0218a392cd3d5dbf
+const deBruijn64ctz = 0x0218a392cd3d5dbf
-var deBruijnIdx64 = [64]byte{
+var deBruijnIdx64ctz = [64]byte{
0, 1, 2, 7, 3, 13, 8, 19,
4, 25, 14, 28, 9, 34, 20, 40,
5, 17, 26, 38, 15, 46, 29, 48,
@@ -21,9 +24,9 @@
61, 22, 43, 51, 60, 42, 59, 58,
}
-const deBruijn32 = 0x04653adf
+const deBruijn32ctz = 0x04653adf
-var deBruijnIdx32 = [32]byte{
+var deBruijnIdx32ctz = [32]byte{
0, 1, 2, 6, 3, 11, 7, 16,
4, 14, 12, 21, 8, 23, 17, 26,
31, 5, 10, 15, 13, 20, 22, 25,
@@ -33,20 +36,20 @@
// Ctz64 counts trailing (low-order) zeroes,
// and if all are zero, then 64.
func Ctz64(x uint64) int {
- x &= -x // isolate low-order bit
- y := x * deBruijn64 >> 58 // extract part of deBruijn sequence
- i := int(deBruijnIdx64[y]) // convert to bit index
- z := int((x - 1) >> 57 & 64) // adjustment if zero
+ x &= -x // isolate low-order bit
+ y := x * deBruijn64ctz >> 58 // extract part of deBruijn sequence
+ i := int(deBruijnIdx64ctz[y]) // convert to bit index
+ z := int((x - 1) >> 57 & 64) // adjustment if zero
return i + z
}
// Ctz32 counts trailing (low-order) zeroes,
// and if all are zero, then 32.
func Ctz32(x uint32) int {
- x &= -x // isolate low-order bit
- y := x * deBruijn32 >> 27 // extract part of deBruijn sequence
- i := int(deBruijnIdx32[y]) // convert to bit index
- z := int((x - 1) >> 26 & 32) // adjustment if zero
+ x &= -x // isolate low-order bit
+ y := x * deBruijn32ctz >> 27 // extract part of deBruijn sequence
+ i := int(deBruijnIdx32ctz[y]) // convert to bit index
+ z := int((x - 1) >> 26 & 32) // adjustment if zero
return i + z
}
@@ -55,25 +58,6 @@
return int(ntz8tab[x])
}
-var ntz8tab = [256]uint8{
- 0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
-}
-
// Bswap64 returns its input with byte order reversed
// 0x0102030405060708 -> 0x0807060504030201
func Bswap64(x uint64) uint64 {
diff --git a/src/runtime/internal/sys/intrinsics_common.go b/src/runtime/internal/sys/intrinsics_common.go
new file mode 100644
index 0000000..818d75e
--- /dev/null
+++ b/src/runtime/internal/sys/intrinsics_common.go
@@ -0,0 +1,143 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package sys
+
+// Copied from math/bits to avoid dependence.
+
+var len8tab = [256]uint8{
+ 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+}
+
+var ntz8tab = [256]uint8{
+ 0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+}
+
+// len64 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
+func Len64(x uint64) (n int) {
+ if x >= 1<<32 {
+ x >>= 32
+ n = 32
+ }
+ if x >= 1<<16 {
+ x >>= 16
+ n += 16
+ }
+ if x >= 1<<8 {
+ x >>= 8
+ n += 8
+ }
+ return n + int(len8tab[x])
+}
+
+// --- OnesCount ---
+
+const m0 = 0x5555555555555555 // 01010101 ...
+const m1 = 0x3333333333333333 // 00110011 ...
+const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ...
+
+// OnesCount64 returns the number of one bits ("population count") in x.
+func OnesCount64(x uint64) int {
+ // Implementation: Parallel summing of adjacent bits.
+ // See "Hacker's Delight", Chap. 5: Counting Bits.
+ // The following pattern shows the general approach:
+ //
+ // x = x>>1&(m0&m) + x&(m0&m)
+ // x = x>>2&(m1&m) + x&(m1&m)
+ // x = x>>4&(m2&m) + x&(m2&m)
+ // x = x>>8&(m3&m) + x&(m3&m)
+ // x = x>>16&(m4&m) + x&(m4&m)
+ // x = x>>32&(m5&m) + x&(m5&m)
+ // return int(x)
+ //
+ // Masking (& operations) can be left away when there's no
+ // danger that a field's sum will carry over into the next
+ // field: Since the result cannot be > 64, 8 bits is enough
+ // and we can ignore the masks for the shifts by 8 and up.
+ // Per "Hacker's Delight", the first line can be simplified
+ // more, but it saves at best one instruction, so we leave
+ // it alone for clarity.
+ const m = 1<<64 - 1
+ x = x>>1&(m0&m) + x&(m0&m)
+ x = x>>2&(m1&m) + x&(m1&m)
+ x = (x>>4 + x) & (m2 & m)
+ x += x >> 8
+ x += x >> 16
+ x += x >> 32
+ return int(x) & (1<<7 - 1)
+}
+
+var deBruijn64tab = [64]byte{
+ 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
+ 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
+ 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
+ 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
+}
+
+const deBruijn64 = 0x03f79d71b4ca8b09
+
+// TrailingZeros64 returns the number of trailing zero bits in x; the result is 64 for x == 0.
+func TrailingZeros64(x uint64) int {
+ if x == 0 {
+ return 64
+ }
+ // If popcount is fast, replace code below with return popcount(^x & (x - 1)).
+ //
+ // x & -x leaves only the right-most bit set in the word. Let k be the
+ // index of that bit. Since only a single bit is set, the value is two
+ // to the power of k. Multiplying by a power of two is equivalent to
+ // left shifting, in this case by k bits. The de Bruijn (64 bit) constant
+ // is such that all six bit, consecutive substrings are distinct.
+ // Therefore, if we have a left shifted version of this constant we can
+ // find by how many bits it was shifted by looking at which six bit
+ // substring ended up at the top of the word.
+ // (Knuth, volume 4, section 7.3.1)
+ return int(deBruijn64tab[(x&-x)*deBruijn64>>(64-6)])
+}
+
+// LeadingZeros64 returns the number of leading zero bits in x; the result is 64 for x == 0.
+func LeadingZeros64(x uint64) int { return 64 - Len64(x) }
+
+// LeadingZeros8 returns the number of leading zero bits in x; the result is 8 for x == 0.
+func LeadingZeros8(x uint8) int { return 8 - Len8(x) }
+
+// TrailingZeros8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
+func TrailingZeros8(x uint8) int {
+ return int(ntz8tab[x])
+}
+
+// Len8 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
+func Len8(x uint8) int {
+ return int(len8tab[x])
+}
diff --git a/src/runtime/internal/sys/zgoarch_amd64p32.go b/src/runtime/internal/sys/zgoarch_amd64p32.go
deleted file mode 100644
index 13dc2e7..0000000
--- a/src/runtime/internal/sys/zgoarch_amd64p32.go
+++ /dev/null
@@ -1,31 +0,0 @@
-// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
-
-// +build amd64p32
-
-package sys
-
-const GOARCH = `amd64p32`
-
-const Goarch386 = 0
-const GoarchAmd64 = 0
-const GoarchAmd64p32 = 1
-const GoarchArm = 0
-const GoarchArmbe = 0
-const GoarchArm64 = 0
-const GoarchArm64be = 0
-const GoarchPpc64 = 0
-const GoarchPpc64le = 0
-const GoarchMips = 0
-const GoarchMipsle = 0
-const GoarchMips64 = 0
-const GoarchMips64le = 0
-const GoarchMips64p32 = 0
-const GoarchMips64p32le = 0
-const GoarchPpc = 0
-const GoarchRiscv = 0
-const GoarchRiscv64 = 0
-const GoarchS390 = 0
-const GoarchS390x = 0
-const GoarchSparc = 0
-const GoarchSparc64 = 0
-const GoarchWasm = 0
diff --git a/src/runtime/internal/sys/zgoos_nacl.go b/src/runtime/internal/sys/zgoos_nacl.go
deleted file mode 100644
index 9e65b6f..0000000
--- a/src/runtime/internal/sys/zgoos_nacl.go
+++ /dev/null
@@ -1,24 +0,0 @@
-// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
-
-// +build nacl
-
-package sys
-
-const GOOS = `nacl`
-
-const GoosAix = 0
-const GoosAndroid = 0
-const GoosDarwin = 0
-const GoosDragonfly = 0
-const GoosFreebsd = 0
-const GoosHurd = 0
-const GoosIllumos = 0
-const GoosJs = 0
-const GoosLinux = 0
-const GoosNacl = 1
-const GoosNetbsd = 0
-const GoosOpenbsd = 0
-const GoosPlan9 = 0
-const GoosSolaris = 0
-const GoosWindows = 0
-const GoosZos = 0
diff --git a/src/runtime/internal/sys/zversion.go b/src/runtime/internal/sys/zversion.go
index db1e69b..ffe1ac1 100644
--- a/src/runtime/internal/sys/zversion.go
+++ b/src/runtime/internal/sys/zversion.go
@@ -2,6 +2,6 @@
package sys
-const TheVersion = `go1.13`
+const TheVersion = `go1.15beta1`
const Goexperiment = ``
const StackGuardMultiplierDefault = 1
diff --git a/src/runtime/lfstack_32bit.go b/src/runtime/lfstack_32bit.go
index d36ca50..f07ff1c 100644
--- a/src/runtime/lfstack_32bit.go
+++ b/src/runtime/lfstack_32bit.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build 386 arm nacl mips mipsle
+// +build 386 arm mips mipsle
package runtime
diff --git a/src/runtime/lfstack_64bit.go b/src/runtime/lfstack_64bit.go
index ea3455a..9d821b9 100644
--- a/src/runtime/lfstack_64bit.go
+++ b/src/runtime/lfstack_64bit.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build amd64 arm64 mips64 mips64le ppc64 ppc64le s390x wasm
+// +build amd64 arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x wasm
package runtime
diff --git a/src/runtime/libfuzzer.go b/src/runtime/libfuzzer.go
new file mode 100644
index 0000000..0161955
--- /dev/null
+++ b/src/runtime/libfuzzer.go
@@ -0,0 +1,75 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build libfuzzer
+
+package runtime
+
+import _ "unsafe" // for go:linkname
+
+func libfuzzerCall(fn *byte, arg0, arg1 uintptr)
+
+func libfuzzerTraceCmp1(arg0, arg1 uint8) {
+ libfuzzerCall(&__sanitizer_cov_trace_cmp1, uintptr(arg0), uintptr(arg1))
+}
+
+func libfuzzerTraceCmp2(arg0, arg1 uint16) {
+ libfuzzerCall(&__sanitizer_cov_trace_cmp2, uintptr(arg0), uintptr(arg1))
+}
+
+func libfuzzerTraceCmp4(arg0, arg1 uint32) {
+ libfuzzerCall(&__sanitizer_cov_trace_cmp4, uintptr(arg0), uintptr(arg1))
+}
+
+func libfuzzerTraceCmp8(arg0, arg1 uint64) {
+ libfuzzerCall(&__sanitizer_cov_trace_cmp8, uintptr(arg0), uintptr(arg1))
+}
+
+func libfuzzerTraceConstCmp1(arg0, arg1 uint8) {
+ libfuzzerCall(&__sanitizer_cov_trace_const_cmp1, uintptr(arg0), uintptr(arg1))
+}
+
+func libfuzzerTraceConstCmp2(arg0, arg1 uint16) {
+ libfuzzerCall(&__sanitizer_cov_trace_const_cmp2, uintptr(arg0), uintptr(arg1))
+}
+
+func libfuzzerTraceConstCmp4(arg0, arg1 uint32) {
+ libfuzzerCall(&__sanitizer_cov_trace_const_cmp4, uintptr(arg0), uintptr(arg1))
+}
+
+func libfuzzerTraceConstCmp8(arg0, arg1 uint64) {
+ libfuzzerCall(&__sanitizer_cov_trace_const_cmp8, uintptr(arg0), uintptr(arg1))
+}
+
+//go:linkname __sanitizer_cov_trace_cmp1 __sanitizer_cov_trace_cmp1
+//go:cgo_import_static __sanitizer_cov_trace_cmp1
+var __sanitizer_cov_trace_cmp1 byte
+
+//go:linkname __sanitizer_cov_trace_cmp2 __sanitizer_cov_trace_cmp2
+//go:cgo_import_static __sanitizer_cov_trace_cmp2
+var __sanitizer_cov_trace_cmp2 byte
+
+//go:linkname __sanitizer_cov_trace_cmp4 __sanitizer_cov_trace_cmp4
+//go:cgo_import_static __sanitizer_cov_trace_cmp4
+var __sanitizer_cov_trace_cmp4 byte
+
+//go:linkname __sanitizer_cov_trace_cmp8 __sanitizer_cov_trace_cmp8
+//go:cgo_import_static __sanitizer_cov_trace_cmp8
+var __sanitizer_cov_trace_cmp8 byte
+
+//go:linkname __sanitizer_cov_trace_const_cmp1 __sanitizer_cov_trace_const_cmp1
+//go:cgo_import_static __sanitizer_cov_trace_const_cmp1
+var __sanitizer_cov_trace_const_cmp1 byte
+
+//go:linkname __sanitizer_cov_trace_const_cmp2 __sanitizer_cov_trace_const_cmp2
+//go:cgo_import_static __sanitizer_cov_trace_const_cmp2
+var __sanitizer_cov_trace_const_cmp2 byte
+
+//go:linkname __sanitizer_cov_trace_const_cmp4 __sanitizer_cov_trace_const_cmp4
+//go:cgo_import_static __sanitizer_cov_trace_const_cmp4
+var __sanitizer_cov_trace_const_cmp4 byte
+
+//go:linkname __sanitizer_cov_trace_const_cmp8 __sanitizer_cov_trace_const_cmp8
+//go:cgo_import_static __sanitizer_cov_trace_const_cmp8
+var __sanitizer_cov_trace_const_cmp8 byte
diff --git a/src/runtime/libfuzzer_amd64.s b/src/runtime/libfuzzer_amd64.s
new file mode 100644
index 0000000..890fde3
--- /dev/null
+++ b/src/runtime/libfuzzer_amd64.s
@@ -0,0 +1,42 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build libfuzzer
+
+#include "go_asm.h"
+#include "go_tls.h"
+#include "textflag.h"
+
+// Based on race_amd64.s; see commentary there.
+
+#ifdef GOOS_windows
+#define RARG0 CX
+#define RARG1 DX
+#else
+#define RARG0 DI
+#define RARG1 SI
+#endif
+
+// void runtime·libfuzzerCall(fn, arg0, arg1 uintptr)
+// Calls C function fn from libFuzzer and passes 2 arguments to it.
+TEXT runtime·libfuzzerCall(SB), NOSPLIT, $0-24
+ MOVQ fn+0(FP), AX
+ MOVQ arg0+8(FP), RARG0
+ MOVQ arg1+16(FP), RARG1
+
+ get_tls(R12)
+ MOVQ g(R12), R14
+ MOVQ g_m(R14), R13
+
+ // Switch to g0 stack.
+ MOVQ SP, R12 // callee-saved, preserved across the CALL
+ MOVQ m_g0(R13), R10
+ CMPQ R10, R14
+ JE call // already on g0
+ MOVQ (g_sched+gobuf_sp)(R10), SP
+call:
+ ANDQ $~15, SP // alignment for gcc ABI
+ CALL AX
+ MOVQ R12, SP
+ RET
diff --git a/src/runtime/libfuzzer_arm64.s b/src/runtime/libfuzzer_arm64.s
new file mode 100644
index 0000000..121673e
--- /dev/null
+++ b/src/runtime/libfuzzer_arm64.s
@@ -0,0 +1,31 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build libfuzzer
+
+#include "go_asm.h"
+#include "textflag.h"
+
+// Based on race_arm64.s; see commentary there.
+
+// func runtime·libfuzzerCall(fn, arg0, arg1 uintptr)
+// Calls C function fn from libFuzzer and passes 2 arguments to it.
+TEXT runtime·libfuzzerCall(SB), NOSPLIT, $0-24
+ MOVD fn+0(FP), R9
+ MOVD arg0+8(FP), R0
+ MOVD arg1+16(FP), R1
+
+ MOVD g_m(g), R10
+
+ // Switch to g0 stack.
+ MOVD RSP, R19 // callee-saved, preserved across the CALL
+ MOVD m_g0(R10), R11
+ CMP R11, g
+ BEQ call // already on g0
+ MOVD (g_sched+gobuf_sp)(R11), R12
+ MOVD R12, RSP
+call:
+ BL R9
+ MOVD R19, RSP
+ RET
diff --git a/src/runtime/lock_futex.go b/src/runtime/lock_futex.go
index d2828b1..91467fd 100644
--- a/src/runtime/lock_futex.go
+++ b/src/runtime/lock_futex.go
@@ -44,6 +44,10 @@
}
func lock(l *mutex) {
+ lockWithRank(l, getLockRank(l))
+}
+
+func lock2(l *mutex) {
gp := getg()
if gp.m.locks < 0 {
@@ -104,6 +108,10 @@
}
func unlock(l *mutex) {
+ unlockWithRank(l)
+}
+
+func unlock2(l *mutex) {
v := atomic.Xchg(key32(&l.key), mutex_unlocked)
if v == mutex_unlocked {
throw("unlock of unlocked lock")
@@ -230,8 +238,8 @@
return ok
}
-func beforeIdle() bool {
- return false
+func beforeIdle(int64) (*g, bool) {
+ return nil, false
}
func checkTimeouts() {}
diff --git a/src/runtime/lock_js.go b/src/runtime/lock_js.go
index c038499..14bdc76 100644
--- a/src/runtime/lock_js.go
+++ b/src/runtime/lock_js.go
@@ -26,6 +26,10 @@
)
func lock(l *mutex) {
+ lockWithRank(l, getLockRank(l))
+}
+
+func lock2(l *mutex) {
if l.key == mutex_locked {
// js/wasm is single-threaded so we should never
// observe this.
@@ -40,6 +44,10 @@
}
func unlock(l *mutex) {
+ unlockWithRank(l)
+}
+
+func unlock2(l *mutex) {
if l.key == mutex_unlocked {
throw("unlock of unlocked lock")
}
@@ -111,6 +119,8 @@
gopark(nil, nil, waitReasonSleep, traceEvNone, 1)
clearTimeoutEvent(id) // note might have woken early, clear timeout
+ clearIdleID()
+
mp = acquirem()
delete(notes, n)
delete(notesWithTimeout, n)
@@ -144,31 +154,64 @@
}
}
-var returnedEventHandler *g
+// events is a stack of calls from JavaScript into Go.
+var events []*event
-func init() {
- // At the toplevel we need an extra goroutine that handles asynchronous events.
- initg := getg()
- go func() {
- returnedEventHandler = getg()
- goready(initg, 1)
-
- gopark(nil, nil, waitReasonZero, traceEvNone, 1)
- returnedEventHandler = nil
-
- pause(getcallersp() - 16)
- }()
- gopark(nil, nil, waitReasonZero, traceEvNone, 1)
+type event struct {
+ // g was the active goroutine when the call from JavaScript occurred.
+ // It needs to be active when returning to JavaScript.
+ gp *g
+ // returned reports whether the event handler has returned.
+ // When all goroutines are idle and the event handler has returned,
+ // then g gets resumed and returns the execution to JavaScript.
+ returned bool
}
+// The timeout event started by beforeIdle.
+var idleID int32
+
// beforeIdle gets called by the scheduler if no goroutine is awake.
-// We resume the event handler (if available) which will pause the execution.
-func beforeIdle() bool {
- if returnedEventHandler != nil {
- goready(returnedEventHandler, 1)
- return true
+// If we are not already handling an event, then we pause for an async event.
+// If an event handler returned, we resume it and it will pause the execution.
+// beforeIdle either returns the specific goroutine to schedule next or
+// indicates with otherReady that some goroutine became ready.
+func beforeIdle(delay int64) (gp *g, otherReady bool) {
+ if delay > 0 {
+ clearIdleID()
+ if delay < 1e6 {
+ delay = 1
+ } else if delay < 1e15 {
+ delay = delay / 1e6
+ } else {
+ // An arbitrary cap on how long to wait for a timer.
+ // 1e9 ms == ~11.5 days.
+ delay = 1e9
+ }
+ idleID = scheduleTimeoutEvent(delay)
}
- return false
+
+ if len(events) == 0 {
+ go handleAsyncEvent()
+ return nil, true
+ }
+
+ e := events[len(events)-1]
+ if e.returned {
+ return e.gp, false
+ }
+ return nil, false
+}
+
+func handleAsyncEvent() {
+ pause(getcallersp() - 16)
+}
+
+// clearIdleID clears our record of the timeout started by beforeIdle.
+func clearIdleID() {
+ if idleID != 0 {
+ clearTimeoutEvent(idleID)
+ idleID = 0
+ }
}
// pause sets SP to newsp and pauses the execution of Go's WebAssembly code until an event is triggered.
@@ -181,18 +224,29 @@
// clearTimeoutEvent clears a timeout event scheduled by scheduleTimeoutEvent.
func clearTimeoutEvent(id int32)
+// handleEvent gets invoked on a call from JavaScript into Go. It calls the event handler of the syscall/js package
+// and then parks the handler goroutine to allow other goroutines to run before giving execution back to JavaScript.
+// When no other goroutine is awake any more, beforeIdle resumes the handler goroutine. Now that the same goroutine
+// is running as was running when the call came in from JavaScript, execution can be safely passed back to JavaScript.
func handleEvent() {
- prevReturnedEventHandler := returnedEventHandler
- returnedEventHandler = nil
+ e := &event{
+ gp: getg(),
+ returned: false,
+ }
+ events = append(events, e)
- checkTimeouts()
eventHandler()
- returnedEventHandler = getg()
+ clearIdleID()
+
+ // wait until all goroutines are idle
+ e.returned = true
gopark(nil, nil, waitReasonZero, traceEvNone, 1)
- returnedEventHandler = prevReturnedEventHandler
+ events[len(events)-1] = nil
+ events = events[:len(events)-1]
+ // return execution to JavaScript
pause(getcallersp() - 16)
}
diff --git a/src/runtime/lock_sema.go b/src/runtime/lock_sema.go
index b36c97f..671e524 100644
--- a/src/runtime/lock_sema.go
+++ b/src/runtime/lock_sema.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build aix darwin nacl netbsd openbsd plan9 solaris windows
+// +build aix darwin netbsd openbsd plan9 solaris windows
package runtime
@@ -33,6 +33,10 @@
)
func lock(l *mutex) {
+ lockWithRank(l, getLockRank(l))
+}
+
+func lock2(l *mutex) {
gp := getg()
if gp.m.locks < 0 {
throw("runtime·lock: lock count")
@@ -89,9 +93,13 @@
}
}
+func unlock(l *mutex) {
+ unlockWithRank(l)
+}
+
//go:nowritebarrier
// We might not be holding a p in this code.
-func unlock(l *mutex) {
+func unlock2(l *mutex) {
gp := getg()
var mp *m
for {
@@ -289,8 +297,8 @@
return ok
}
-func beforeIdle() bool {
- return false
+func beforeIdle(int64) (*g, bool) {
+ return nil, false
}
func checkTimeouts() {}
diff --git a/src/runtime/lockrank.go b/src/runtime/lockrank.go
new file mode 100644
index 0000000..0001935
--- /dev/null
+++ b/src/runtime/lockrank.go
@@ -0,0 +1,254 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file records the static ranks of the locks in the runtime. If a lock
+// is not given a rank, then it is assumed to be a leaf lock, which means no other
+// lock can be acquired while it is held. Therefore, leaf locks do not need to be
+// given an explicit rank. We list all of the architecture-independent leaf locks
+// for documentation purposes, but don't list any of the architecture-dependent
+// locks (which are all leaf locks). debugLock is ignored for ranking, since it is used
+// when printing out lock ranking errors.
+//
+// lockInit(l *mutex, rank int) is used to set the rank of lock before it is used.
+// If there is no clear place to initialize a lock, then the rank of a lock can be
+// specified during the lock call itself via lockWithrank(l *mutex, rank int).
+//
+// Besides the static lock ranking (which is a total ordering of the locks), we
+// also represent and enforce the actual partial order among the locks in the
+// arcs[] array below. That is, if it is possible that lock B can be acquired when
+// lock A is the previous acquired lock that is still held, then there should be
+// an entry for A in arcs[B][]. We will currently fail not only if the total order
+// (the lock ranking) is violated, but also if there is a missing entry in the
+// partial order.
+
+package runtime
+
+type lockRank int
+
+// Constants representing the lock rank of the architecture-independent locks in
+// the runtime. Locks with lower rank must be taken before locks with higher
+// rank.
+const (
+ lockRankDummy lockRank = iota
+
+ // Locks held above sched
+ lockRankSysmon
+ lockRankScavenge
+ lockRankForcegc
+ lockRankSweepWaiters
+ lockRankAssistQueue
+ lockRankCpuprof
+ lockRankSweep
+
+ lockRankSched
+ lockRankDeadlock
+ lockRankPanic
+ lockRankAllg
+ lockRankAllp
+ lockRankPollDesc
+
+ lockRankTimers // Multiple timers locked simultaneously in destroy()
+ lockRankItab
+ lockRankReflectOffs
+ lockRankHchan // Multiple hchans acquired in lock order in syncadjustsudogs()
+ lockRankFin
+ lockRankNotifyList
+ lockRankTraceBuf
+ lockRankTraceStrings
+ lockRankMspanSpecial
+ lockRankProf
+ lockRankGcBitsArenas
+ lockRankRoot
+ lockRankTrace
+ lockRankTraceStackTab
+ lockRankNetpollInit
+
+ lockRankRwmutexW
+ lockRankRwmutexR
+
+ lockRankMcentral // For !go115NewMCentralImpl
+ lockRankSpine // For !go115NewMCentralImpl
+ lockRankSpanSetSpine
+ lockRankGscan
+ lockRankStackpool
+ lockRankStackLarge
+ lockRankDefer
+ lockRankSudog
+
+ // Memory-related non-leaf locks
+ lockRankWbufSpans
+ lockRankMheap
+ lockRankMheapSpecial
+
+ // Memory-related leaf locks
+ lockRankGlobalAlloc
+
+ // Other leaf locks
+ lockRankGFree
+ // Generally, hchan must be acquired before gscan. But in one specific
+ // case (in syncadjustsudogs from markroot after the g has been suspended
+ // by suspendG), we allow gscan to be acquired, and then an hchan lock. To
+ // allow this case, we get this lockRankHchanLeaf rank in
+ // syncadjustsudogs(), rather than lockRankHchan. By using this special
+ // rank, we don't allow any further locks to be acquired other than more
+ // hchan locks.
+ lockRankHchanLeaf
+
+ // Leaf locks with no dependencies, so these constants are not actually used anywhere.
+ // There are other architecture-dependent leaf locks as well.
+ lockRankNewmHandoff
+ lockRankDebugPtrmask
+ lockRankFaketimeState
+ lockRankTicks
+ lockRankRaceFini
+ lockRankPollCache
+ lockRankDebug
+)
+
+// lockRankLeafRank is the rank of lock that does not have a declared rank, and hence is
+// a leaf lock.
+const lockRankLeafRank lockRank = 1000
+
+// lockNames gives the names associated with each of the above ranks
+var lockNames = []string{
+ lockRankDummy: "",
+
+ lockRankSysmon: "sysmon",
+ lockRankScavenge: "scavenge",
+ lockRankForcegc: "forcegc",
+ lockRankSweepWaiters: "sweepWaiters",
+ lockRankAssistQueue: "assistQueue",
+ lockRankCpuprof: "cpuprof",
+ lockRankSweep: "sweep",
+
+ lockRankSched: "sched",
+ lockRankDeadlock: "deadlock",
+ lockRankPanic: "panic",
+ lockRankAllg: "allg",
+ lockRankAllp: "allp",
+ lockRankPollDesc: "pollDesc",
+
+ lockRankTimers: "timers",
+ lockRankItab: "itab",
+ lockRankReflectOffs: "reflectOffs",
+
+ lockRankHchan: "hchan",
+ lockRankFin: "fin",
+ lockRankNotifyList: "notifyList",
+ lockRankTraceBuf: "traceBuf",
+ lockRankTraceStrings: "traceStrings",
+ lockRankMspanSpecial: "mspanSpecial",
+ lockRankProf: "prof",
+ lockRankGcBitsArenas: "gcBitsArenas",
+ lockRankRoot: "root",
+ lockRankTrace: "trace",
+ lockRankTraceStackTab: "traceStackTab",
+ lockRankNetpollInit: "netpollInit",
+
+ lockRankRwmutexW: "rwmutexW",
+ lockRankRwmutexR: "rwmutexR",
+
+ lockRankMcentral: "mcentral",
+ lockRankSpine: "spine",
+ lockRankSpanSetSpine: "spanSetSpine",
+ lockRankGscan: "gscan",
+ lockRankStackpool: "stackpool",
+ lockRankStackLarge: "stackLarge",
+ lockRankDefer: "defer",
+ lockRankSudog: "sudog",
+
+ lockRankWbufSpans: "wbufSpans",
+ lockRankMheap: "mheap",
+ lockRankMheapSpecial: "mheapSpecial",
+
+ lockRankGlobalAlloc: "globalAlloc.mutex",
+
+ lockRankGFree: "gFree",
+ lockRankHchanLeaf: "hchanLeaf",
+
+ lockRankNewmHandoff: "newmHandoff.lock",
+ lockRankDebugPtrmask: "debugPtrmask.lock",
+ lockRankFaketimeState: "faketimeState.lock",
+ lockRankTicks: "ticks.lock",
+ lockRankRaceFini: "raceFiniLock",
+ lockRankPollCache: "pollCache.lock",
+ lockRankDebug: "debugLock",
+}
+
+func (rank lockRank) String() string {
+ if rank == 0 {
+ return "UNKNOWN"
+ }
+ if rank == lockRankLeafRank {
+ return "LEAF"
+ }
+ return lockNames[rank]
+}
+
+// lockPartialOrder is a partial order among the various lock types, listing the immediate
+// ordering that has actually been observed in the runtime. Each entry (which
+// corresponds to a particular lock rank) specifies the list of locks that can be
+// already be held immediately "above" it.
+//
+// So, for example, the lockRankSched entry shows that all the locks preceding it in
+// rank can actually be held. The fin lock shows that only the sched, timers, or
+// hchan lock can be held immediately above it when it is acquired.
+var lockPartialOrder [][]lockRank = [][]lockRank{
+ lockRankDummy: {},
+ lockRankSysmon: {},
+ lockRankScavenge: {lockRankSysmon},
+ lockRankForcegc: {lockRankSysmon},
+ lockRankSweepWaiters: {},
+ lockRankAssistQueue: {},
+ lockRankCpuprof: {},
+ lockRankSweep: {},
+ lockRankSched: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep},
+ lockRankDeadlock: {lockRankDeadlock},
+ lockRankPanic: {lockRankDeadlock},
+ lockRankAllg: {lockRankSysmon, lockRankSched, lockRankPanic},
+ lockRankAllp: {lockRankSysmon, lockRankSched},
+ lockRankPollDesc: {},
+ lockRankTimers: {lockRankSysmon, lockRankScavenge, lockRankSched, lockRankAllp, lockRankPollDesc, lockRankTimers},
+ lockRankItab: {},
+ lockRankReflectOffs: {lockRankItab},
+ lockRankHchan: {lockRankScavenge, lockRankSweep, lockRankHchan},
+ lockRankFin: {lockRankSysmon, lockRankScavenge, lockRankSched, lockRankAllg, lockRankTimers, lockRankHchan},
+ lockRankNotifyList: {},
+ lockRankTraceBuf: {lockRankSysmon, lockRankScavenge},
+ lockRankTraceStrings: {lockRankTraceBuf},
+ lockRankMspanSpecial: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings},
+ lockRankProf: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
+ lockRankGcBitsArenas: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
+ lockRankRoot: {},
+ lockRankTrace: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankSched, lockRankHchan, lockRankTraceBuf, lockRankTraceStrings, lockRankRoot, lockRankSweep},
+ lockRankTraceStackTab: {lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankSched, lockRankAllg, lockRankTimers, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankRoot, lockRankTrace},
+ lockRankNetpollInit: {lockRankTimers},
+
+ lockRankRwmutexW: {},
+ lockRankRwmutexR: {lockRankRwmutexW},
+
+ lockRankMcentral: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
+ lockRankSpine: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
+ lockRankSpanSetSpine: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
+ lockRankGscan: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankTraceBuf, lockRankTraceStrings, lockRankRoot, lockRankNotifyList, lockRankProf, lockRankGcBitsArenas, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankMcentral, lockRankSpine, lockRankSpanSetSpine},
+ lockRankStackpool: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankRwmutexR, lockRankMcentral, lockRankSpine, lockRankSpanSetSpine, lockRankGscan},
+ lockRankStackLarge: {lockRankSysmon, lockRankAssistQueue, lockRankSched, lockRankItab, lockRankHchan, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankMcentral, lockRankSpanSetSpine, lockRankGscan},
+ lockRankDefer: {},
+ lockRankSudog: {lockRankNotifyList, lockRankHchan},
+ lockRankWbufSpans: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankSched, lockRankAllg, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankNotifyList, lockRankTraceStrings, lockRankMspanSpecial, lockRankProf, lockRankRoot, lockRankGscan, lockRankDefer, lockRankSudog},
+ lockRankMheap: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan, lockRankMspanSpecial, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankMcentral, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankDefer, lockRankSudog, lockRankWbufSpans, lockRankSpanSetSpine},
+ lockRankMheapSpecial: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
+ lockRankGlobalAlloc: {lockRankProf, lockRankSpine, lockRankSpanSetSpine, lockRankMheap, lockRankMheapSpecial},
+
+ lockRankGFree: {lockRankSched},
+ lockRankHchanLeaf: {lockRankGscan, lockRankHchanLeaf},
+
+ lockRankNewmHandoff: {},
+ lockRankDebugPtrmask: {},
+ lockRankFaketimeState: {},
+ lockRankTicks: {},
+ lockRankRaceFini: {},
+ lockRankPollCache: {},
+ lockRankDebug: {},
+}
diff --git a/src/runtime/lockrank_off.go b/src/runtime/lockrank_off.go
new file mode 100644
index 0000000..891589c
--- /dev/null
+++ b/src/runtime/lockrank_off.go
@@ -0,0 +1,32 @@
+// +build !goexperiment.staticlockranking
+
+package runtime
+
+// // lockRankStruct is embedded in mutex, but is empty when staticklockranking is
+// disabled (the default)
+type lockRankStruct struct {
+}
+
+func lockInit(l *mutex, rank lockRank) {
+}
+
+func getLockRank(l *mutex) lockRank {
+ return 0
+}
+
+func lockWithRank(l *mutex, rank lockRank) {
+ lock2(l)
+}
+
+func acquireLockRank(rank lockRank) {
+}
+
+func unlockWithRank(l *mutex) {
+ unlock2(l)
+}
+
+func releaseLockRank(rank lockRank) {
+}
+
+func lockWithRankMayAcquire(l *mutex, rank lockRank) {
+}
diff --git a/src/runtime/lockrank_on.go b/src/runtime/lockrank_on.go
new file mode 100644
index 0000000..cf4151f
--- /dev/null
+++ b/src/runtime/lockrank_on.go
@@ -0,0 +1,210 @@
+// +build goexperiment.staticlockranking
+
+package runtime
+
+import (
+ "unsafe"
+)
+
+// lockRankStruct is embedded in mutex
+type lockRankStruct struct {
+ // static lock ranking of the lock
+ rank lockRank
+ // pad field to make sure lockRankStruct is a multiple of 8 bytes, even on
+ // 32-bit systems.
+ pad int
+}
+
+// init checks that the partial order in lockPartialOrder fits within the total
+// order determined by the order of the lockRank constants.
+func init() {
+ for rank, list := range lockPartialOrder {
+ for _, entry := range list {
+ if entry > lockRank(rank) {
+ println("lockPartial order row", lockRank(rank).String(), "entry", entry.String())
+ throw("lockPartialOrder table is inconsistent with total lock ranking order")
+ }
+ }
+ }
+}
+
+func lockInit(l *mutex, rank lockRank) {
+ l.rank = rank
+}
+
+func getLockRank(l *mutex) lockRank {
+ return l.rank
+}
+
+// The following functions are the entry-points to record lock
+// operations.
+// All of these are nosplit and switch to the system stack immediately
+// to avoid stack growths. Since a stack growth could itself have lock
+// operations, this prevents re-entrant calls.
+
+// lockWithRank is like lock(l), but allows the caller to specify a lock rank
+// when acquiring a non-static lock.
+//go:nosplit
+func lockWithRank(l *mutex, rank lockRank) {
+ if l == &debuglock || l == &paniclk {
+ // debuglock is only used for println/printlock(). Don't do lock
+ // rank recording for it, since print/println are used when
+ // printing out a lock ordering problem below.
+ //
+ // paniclk has an ordering problem, since it can be acquired
+ // during a panic with any other locks held (especially if the
+ // panic is because of a directed segv), and yet also allg is
+ // acquired after paniclk in tracebackothers()). This is a genuine
+ // problem, so for now we don't do lock rank recording for paniclk
+ // either.
+ lock2(l)
+ return
+ }
+ if rank == 0 {
+ rank = lockRankLeafRank
+ }
+ gp := getg()
+ // Log the new class.
+ systemstack(func() {
+ i := gp.m.locksHeldLen
+ if i >= len(gp.m.locksHeld) {
+ throw("too many locks held concurrently for rank checking")
+ }
+ gp.m.locksHeld[i].rank = rank
+ gp.m.locksHeld[i].lockAddr = uintptr(unsafe.Pointer(l))
+ gp.m.locksHeldLen++
+
+ // i is the index of the lock being acquired
+ if i > 0 {
+ checkRanks(gp, gp.m.locksHeld[i-1].rank, rank)
+ }
+ lock2(l)
+ })
+}
+
+// acquireLockRank acquires a rank which is not associated with a mutex lock
+//go:nosplit
+func acquireLockRank(rank lockRank) {
+ gp := getg()
+ // Log the new class.
+ systemstack(func() {
+ i := gp.m.locksHeldLen
+ if i >= len(gp.m.locksHeld) {
+ throw("too many locks held concurrently for rank checking")
+ }
+ gp.m.locksHeld[i].rank = rank
+ gp.m.locksHeld[i].lockAddr = 0
+ gp.m.locksHeldLen++
+
+ // i is the index of the lock being acquired
+ if i > 0 {
+ checkRanks(gp, gp.m.locksHeld[i-1].rank, rank)
+ }
+ })
+}
+
+// checkRanks checks if goroutine g, which has mostly recently acquired a lock
+// with rank 'prevRank', can now acquire a lock with rank 'rank'.
+func checkRanks(gp *g, prevRank, rank lockRank) {
+ rankOK := false
+ if rank < prevRank {
+ // If rank < prevRank, then we definitely have a rank error
+ rankOK = false
+ } else if rank == lockRankLeafRank {
+ // If new lock is a leaf lock, then the preceding lock can
+ // be anything except another leaf lock.
+ rankOK = prevRank < lockRankLeafRank
+ } else {
+ // We've now verified the total lock ranking, but we
+ // also enforce the partial ordering specified by
+ // lockPartialOrder as well. Two locks with the same rank
+ // can only be acquired at the same time if explicitly
+ // listed in the lockPartialOrder table.
+ list := lockPartialOrder[rank]
+ for _, entry := range list {
+ if entry == prevRank {
+ rankOK = true
+ break
+ }
+ }
+ }
+ if !rankOK {
+ printlock()
+ println(gp.m.procid, " ======")
+ for j, held := range gp.m.locksHeld[:gp.m.locksHeldLen] {
+ println(j, ":", held.rank.String(), held.rank, unsafe.Pointer(gp.m.locksHeld[j].lockAddr))
+ }
+ throw("lock ordering problem")
+ }
+}
+
+//go:nosplit
+func unlockWithRank(l *mutex) {
+ if l == &debuglock || l == &paniclk {
+ // See comment at beginning of lockWithRank.
+ unlock2(l)
+ return
+ }
+ gp := getg()
+ systemstack(func() {
+ found := false
+ for i := gp.m.locksHeldLen - 1; i >= 0; i-- {
+ if gp.m.locksHeld[i].lockAddr == uintptr(unsafe.Pointer(l)) {
+ found = true
+ copy(gp.m.locksHeld[i:gp.m.locksHeldLen-1], gp.m.locksHeld[i+1:gp.m.locksHeldLen])
+ gp.m.locksHeldLen--
+ break
+ }
+ }
+ if !found {
+ println(gp.m.procid, ":", l.rank.String(), l.rank, l)
+ throw("unlock without matching lock acquire")
+ }
+ unlock2(l)
+ })
+}
+
+// releaseLockRank releases a rank which is not associated with a mutex lock
+//go:nosplit
+func releaseLockRank(rank lockRank) {
+ gp := getg()
+ systemstack(func() {
+ found := false
+ for i := gp.m.locksHeldLen - 1; i >= 0; i-- {
+ if gp.m.locksHeld[i].rank == rank && gp.m.locksHeld[i].lockAddr == 0 {
+ found = true
+ copy(gp.m.locksHeld[i:gp.m.locksHeldLen-1], gp.m.locksHeld[i+1:gp.m.locksHeldLen])
+ gp.m.locksHeldLen--
+ break
+ }
+ }
+ if !found {
+ println(gp.m.procid, ":", rank.String(), rank)
+ throw("lockRank release without matching lockRank acquire")
+ }
+ })
+}
+
+//go:nosplit
+func lockWithRankMayAcquire(l *mutex, rank lockRank) {
+ gp := getg()
+ if gp.m.locksHeldLen == 0 {
+ // No possibilty of lock ordering problem if no other locks held
+ return
+ }
+
+ systemstack(func() {
+ i := gp.m.locksHeldLen
+ if i >= len(gp.m.locksHeld) {
+ throw("too many locks held concurrently for rank checking")
+ }
+ // Temporarily add this lock to the locksHeld list, so
+ // checkRanks() will print out list, including this lock, if there
+ // is a lock ordering problem.
+ gp.m.locksHeld[i].rank = rank
+ gp.m.locksHeld[i].lockAddr = uintptr(unsafe.Pointer(l))
+ gp.m.locksHeldLen++
+ checkRanks(gp, gp.m.locksHeld[i-1].rank, rank)
+ gp.m.locksHeldLen--
+ })
+}
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index d768054..eaf8db7 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -19,7 +19,7 @@
// fixalloc: a free-list allocator for fixed-size off-heap objects,
// used to manage storage used by the allocator.
// mheap: the malloc heap, managed at page (8192-byte) granularity.
-// mspan: a run of pages managed by the mheap.
+// mspan: a run of in-use pages managed by the mheap.
// mcentral: collects all spans of a given size class.
// mcache: a per-P cache of mspans with free space.
// mstats: allocation statistics.
@@ -56,20 +56,16 @@
// it is placed on the mcentral free list for the mspan's size
// class.
//
-// 3. Otherwise, if all objects in the mspan are free, the mspan
-// is now "idle", so it is returned to the mheap and no longer
-// has a size class.
-// This may coalesce it with adjacent idle mspans.
-//
-// 4. If an mspan remains idle for long enough, return its pages
-// to the operating system.
+// 3. Otherwise, if all objects in the mspan are free, the mspan's
+// pages are returned to the mheap and the mspan is now dead.
//
// Allocating and freeing a large object uses the mheap
// directly, bypassing the mcache and mcentral.
//
-// Free object slots in an mspan are zeroed only if mspan.needzero is
-// false. If needzero is true, objects are zeroed as they are
-// allocated. There are various benefits to delaying zeroing this way:
+// If mspan.needzero is false, then free object slots in the mspan are
+// already zeroed. Otherwise if needzero is true, objects are zeroed as
+// they are allocated. There are various benefits to delaying zeroing
+// this way:
//
// 1. Stack frame allocation can avoid zeroing altogether.
//
@@ -197,17 +193,21 @@
// exceed Go's 48 bit limit, it's extremely unlikely in
// practice.
//
- // On aix/ppc64, the limits is increased to 1<<60 to accept addresses
- // returned by mmap syscall. These are in range:
- // 0x0a00000000000000 - 0x0afffffffffffff
- //
// On 32-bit platforms, we accept the full 32-bit address
// space because doing so is cheap.
// mips32 only has access to the low 2GB of virtual memory, so
// we further limit it to 31 bits.
//
+ // On darwin/arm64, although 64-bit pointers are presumably
+ // available, pointers are truncated to 33 bits. Furthermore,
+ // only the top 4 GiB of the address space are actually available
+ // to the application, but we allow the whole 33 bits anyway for
+ // simplicity.
+ // TODO(mknyszek): Consider limiting it to 32 bits and using
+ // arenaBaseOffset to offset into the top 4 GiB.
+ //
// WebAssembly currently has a limit of 4GB linear memory.
- heapAddrBits = (_64bit*(1-sys.GoarchWasm)*(1-sys.GoosAix))*48 + (1-_64bit+sys.GoarchWasm)*(32-(sys.GoarchMips+sys.GoarchMipsle)) + 60*sys.GoosAix
+ heapAddrBits = (_64bit*(1-sys.GoarchWasm)*(1-sys.GoosDarwin*sys.GoarchArm64))*48 + (1-_64bit+sys.GoarchWasm)*(32-(sys.GoarchMips+sys.GoarchMipsle)) + 33*sys.GoosDarwin*sys.GoarchArm64
// maxAlloc is the maximum size of an allocation. On 64-bit,
// it's theoretically possible to allocate 1<<heapAddrBits bytes. On
@@ -226,7 +226,6 @@
// Platform Addr bits Arena size L1 entries L2 entries
// -------------- --------- ---------- ---------- -----------
// */64-bit 48 64MB 1 4M (32MB)
- // aix/64-bit 60 256MB 4096 4M (32MB)
// windows/64-bit 48 4MB 64 1M (8MB)
// */32-bit 32 4MB 1 1024 (4KB)
// */mips(le) 31 4MB 1 512 (2KB)
@@ -248,7 +247,7 @@
// logHeapArenaBytes is log_2 of heapArenaBytes. For clarity,
// prefer using heapArenaBytes where possible (we need the
// constant to compute some other constants).
- logHeapArenaBytes = (6+20)*(_64bit*(1-sys.GoosWindows)*(1-sys.GoosAix)*(1-sys.GoarchWasm)) + (2+20)*(_64bit*sys.GoosWindows) + (2+20)*(1-_64bit) + (8+20)*sys.GoosAix + (2+20)*sys.GoarchWasm
+ logHeapArenaBytes = (6+20)*(_64bit*(1-sys.GoosWindows)*(1-sys.GoarchWasm)) + (2+20)*(_64bit*sys.GoosWindows) + (2+20)*(1-_64bit) + (2+20)*sys.GoarchWasm
// heapArenaBitmapBytes is the size of each heap arena's bitmap.
heapArenaBitmapBytes = heapArenaBytes / (sys.PtrSize * 8 / 2)
@@ -268,10 +267,7 @@
// We use the L1 map on 64-bit Windows because the arena size
// is small, but the address space is still 48 bits, and
// there's a high cost to having a large L2.
- //
- // We use the L1 map on aix/ppc64 to keep the same L2 value
- // as on Linux.
- arenaL1Bits = 6*(_64bit*sys.GoosWindows) + 12*sys.GoosAix
+ arenaL1Bits = 6 * (_64bit * sys.GoosWindows)
// arenaL2Bits is the number of bits of the arena number
// covered by the second level arena index.
@@ -298,9 +294,15 @@
// bits. This offset lets us handle "negative" addresses (or
// high addresses if viewed as unsigned).
//
+ // On aix/ppc64, this offset allows to keep the heapAddrBits to
+ // 48. Otherwize, it would be 60 in order to handle mmap addresses
+ // (in range 0x0a00000000000000 - 0x0afffffffffffff). But in this
+ // case, the memory reserved in (s *pageAlloc).init for chunks
+ // is causing important slowdowns.
+ //
// On other platforms, the user address space is contiguous
// and starts at 0, so no offset is necessary.
- arenaBaseOffset uintptr = sys.GoarchAmd64 * (1 << 47)
+ arenaBaseOffset = 0xffff800000000000*sys.GoarchAmd64 + 0x0a00000000000000*sys.GoosAix
// Max number of threads to run garbage collection.
// 2, 3, and 4 are all plausible maximums depending
@@ -433,6 +435,10 @@
// The OS init code failed to fetch the physical page size.
throw("failed to get system page size")
}
+ if physPageSize > maxPhysPageSize {
+ print("system page size (", physPageSize, ") is larger than maximum page size (", maxPhysPageSize, ")\n")
+ throw("bad system page size")
+ }
if physPageSize < minPhysPageSize {
print("system page size (", physPageSize, ") is smaller than minimum page size (", minPhysPageSize, ")\n")
throw("bad system page size")
@@ -445,6 +451,13 @@
print("system huge page size (", physHugePageSize, ") must be a power of 2\n")
throw("bad system huge page size")
}
+ if physHugePageSize > maxPhysHugePageSize {
+ // physHugePageSize is greater than the maximum supported huge page size.
+ // Don't throw here, like in the other cases, since a system configured
+ // in this way isn't wrong, we just don't have the code to support them.
+ // Instead, silently set the huge page size to zero.
+ physHugePageSize = 0
+ }
if physHugePageSize != 0 {
// Since physHugePageSize is a power of 2, it suffices to increase
// physHugePageShift until 1<<physHugePageShift == physHugePageSize.
@@ -452,11 +465,21 @@
physHugePageShift++
}
}
+ if pagesPerArena%pagesPerSpanRoot != 0 {
+ print("pagesPerArena (", pagesPerArena, ") is not divisible by pagesPerSpanRoot (", pagesPerSpanRoot, ")\n")
+ throw("bad pagesPerSpanRoot")
+ }
+ if pagesPerArena%pagesPerReclaimerChunk != 0 {
+ print("pagesPerArena (", pagesPerArena, ") is not divisible by pagesPerReclaimerChunk (", pagesPerReclaimerChunk, ")\n")
+ throw("bad pagesPerReclaimerChunk")
+ }
// Initialize the heap.
mheap_.init()
- _g_ := getg()
- _g_.m.mcache = allocmcache()
+ mcache0 = allocmcache()
+ lockInit(&gcBitsArenas.lock, lockRankGcBitsArenas)
+ lockInit(&proflock, lockRankProf)
+ lockInit(&globalAlloc.mutex, lockRankGlobalAlloc)
// Create initial arena growth hints.
if sys.PtrSize == 8 {
@@ -490,6 +513,7 @@
// allocation at 0x40 << 32 because when using 4k pages with 3-level
// translation buffers, the user address space is limited to 39 bits
// On darwin/arm64, the address space is even smaller.
+ //
// On AIX, mmaps starts at 0x0A00000000000000 for 64-bit.
// processes.
for i := 0x7f; i >= 0; i-- {
@@ -568,7 +592,7 @@
if mheap_.heapArenaAlloc.next <= p && p < mheap_.heapArenaAlloc.end {
p = mheap_.heapArenaAlloc.end
}
- p = round(p+(256<<10), heapArenaBytes)
+ p = alignUp(p+(256<<10), heapArenaBytes)
// Because we're worried about fragmentation on
// 32-bit, we try to make a large initial reservation.
arenaSizes := []uintptr{
@@ -580,7 +604,7 @@
a, size := sysReserveAligned(unsafe.Pointer(p), arenaSize, heapArenaBytes)
if a != nil {
mheap_.arena.init(uintptr(a), size)
- p = uintptr(a) + size // For hint below
+ p = mheap_.arena.end // For hint below
break
}
}
@@ -601,7 +625,7 @@
//
// h must be locked.
func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) {
- n = round(n, heapArenaBytes)
+ n = alignUp(n, heapArenaBytes)
// First, try the arena pre-reservation.
v = h.arena.alloc(n, heapArenaBytes, &memstats.heap_sys)
@@ -784,7 +808,7 @@
// re-reserve the aligned sub-region. This may race,
// so we may have to try again.
sysFree(unsafe.Pointer(p), size+align, nil)
- p = round(p, align)
+ p = alignUp(p, align)
p2 := sysReserve(unsafe.Pointer(p), size)
if p != uintptr(p2) {
// Must have raced. Try again.
@@ -798,7 +822,7 @@
return p2, size
default:
// Trim off the unaligned parts.
- pAligned := round(p, align)
+ pAligned := alignUp(p, align)
sysFree(unsafe.Pointer(p), pAligned-p, nil)
end := pAligned + size
endLen := (p + size + align) - end
@@ -939,7 +963,20 @@
shouldhelpgc := false
dataSize := size
- c := gomcache()
+ var c *mcache
+ if mp.p != 0 {
+ c = mp.p.ptr().mcache
+ } else {
+ // We will be called without a P while bootstrapping,
+ // in which case we use mcache0, which is set in mallocinit.
+ // mcache0 is cleared when bootstrapping is complete,
+ // by procresize.
+ c = mcache0
+ if c == nil {
+ throw("malloc called with no P")
+ }
+ }
+ var span *mspan
var x unsafe.Pointer
noscan := typ == nil || typ.ptrdata == 0
if size <= maxSmallSize {
@@ -976,11 +1013,11 @@
off := c.tinyoffset
// Align tiny pointer for required (conservative) alignment.
if size&7 == 0 {
- off = round(off, 8)
+ off = alignUp(off, 8)
} else if size&3 == 0 {
- off = round(off, 4)
+ off = alignUp(off, 4)
} else if size&1 == 0 {
- off = round(off, 2)
+ off = alignUp(off, 2)
}
if off+size <= maxTinySize && c.tiny != 0 {
// The object fits into existing tiny block.
@@ -992,10 +1029,10 @@
return x
}
// Allocate a new maxTinySize block.
- span := c.alloc[tinySpanClass]
+ span = c.alloc[tinySpanClass]
v := nextFreeFast(span)
if v == 0 {
- v, _, shouldhelpgc = c.nextFree(tinySpanClass)
+ v, span, shouldhelpgc = c.nextFree(tinySpanClass)
}
x = unsafe.Pointer(v)
(*[2]uint64)(x)[0] = 0
@@ -1010,13 +1047,13 @@
} else {
var sizeclass uint8
if size <= smallSizeMax-8 {
- sizeclass = size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv]
+ sizeclass = size_to_class8[divRoundUp(size, smallSizeDiv)]
} else {
- sizeclass = size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv]
+ sizeclass = size_to_class128[divRoundUp(size-smallSizeMax, largeSizeDiv)]
}
size = uintptr(class_to_size[sizeclass])
spc := makeSpanClass(sizeclass, noscan)
- span := c.alloc[spc]
+ span = c.alloc[spc]
v := nextFreeFast(span)
if v == 0 {
v, span, shouldhelpgc = c.nextFree(spc)
@@ -1027,15 +1064,14 @@
}
}
} else {
- var s *mspan
shouldhelpgc = true
systemstack(func() {
- s = largeAlloc(size, needzero, noscan)
+ span = largeAlloc(size, needzero, noscan)
})
- s.freeindex = 1
- s.allocCount = 1
- x = unsafe.Pointer(s.base())
- size = s.elemsize
+ span.freeindex = 1
+ span.allocCount = 1
+ x = unsafe.Pointer(span.base())
+ size = span.elemsize
}
var scanSize uintptr
@@ -1076,7 +1112,7 @@
// This may be racing with GC so do it atomically if there can be
// a race marking the bit.
if gcphase != _GCoff {
- gcmarknewobject(uintptr(x), size, scanSize)
+ gcmarknewobject(span, uintptr(x), size, scanSize)
}
if raceenabled {
@@ -1135,10 +1171,16 @@
// pays the debt down to npage pages.
deductSweepCredit(npages*_PageSize, npages)
- s := mheap_.alloc(npages, makeSpanClass(0, noscan), true, needzero)
+ spc := makeSpanClass(0, noscan)
+ s := mheap_.alloc(npages, spc, needzero)
if s == nil {
throw("out of memory")
}
+ if go115NewMCentralImpl {
+ // Put the large span in the mcentral swept list so that it's
+ // visible to the background sweeper.
+ mheap_.central[spc].mcentral.fullSwept(mheap_.sweepgen).push(s)
+ }
s.limit = s.base() + size
heapBitsForAddr(s.base()).initSpan(s)
return s
@@ -1179,7 +1221,16 @@
}
func profilealloc(mp *m, x unsafe.Pointer, size uintptr) {
- mp.mcache.next_sample = nextSample()
+ var c *mcache
+ if mp.p != 0 {
+ c = mp.p.ptr().mcache
+ } else {
+ c = mcache0
+ if c == nil {
+ throw("profilealloc called with no P")
+ }
+ }
+ c.next_sample = nextSample()
mProf_Malloc(x, size)
}
@@ -1313,7 +1364,7 @@
lock(&globalAlloc.mutex)
persistent = &globalAlloc.persistentAlloc
}
- persistent.off = round(persistent.off, align)
+ persistent.off = alignUp(persistent.off, align)
if persistent.off+size > persistentChunkSize || persistent.base == nil {
persistent.base = (*notInHeap)(sysAlloc(persistentChunkSize, &memstats.other_sys))
if persistent.base == nil {
@@ -1331,7 +1382,7 @@
break
}
}
- persistent.off = round(sys.PtrSize, align)
+ persistent.off = alignUp(sys.PtrSize, align)
}
p := persistent.base.add(persistent.off)
persistent.off += size
@@ -1372,17 +1423,24 @@
}
func (l *linearAlloc) init(base, size uintptr) {
+ if base+size < base {
+ // Chop off the last byte. The runtime isn't prepared
+ // to deal with situations where the bounds could overflow.
+ // Leave that memory reserved, though, so we don't map it
+ // later.
+ size -= 1
+ }
l.next, l.mapped = base, base
l.end = base + size
}
func (l *linearAlloc) alloc(size, align uintptr, sysStat *uint64) unsafe.Pointer {
- p := round(l.next, align)
+ p := alignUp(l.next, align)
if p+size > l.end {
return nil
}
l.next = p + size
- if pEnd := round(l.next-1, physPageSize); pEnd > l.mapped {
+ if pEnd := alignUp(l.next-1, physPageSize); pEnd > l.mapped {
// Transition from Reserved to Prepared to Ready.
sysMap(unsafe.Pointer(l.mapped), pEnd-l.mapped, sysStat)
sysUsed(unsafe.Pointer(l.mapped), pEnd-l.mapped)
diff --git a/src/runtime/malloc_test.go b/src/runtime/malloc_test.go
index a2d5864..5c97f54 100644
--- a/src/runtime/malloc_test.go
+++ b/src/runtime/malloc_test.go
@@ -168,6 +168,14 @@
}
}
+func TestPageCacheLeak(t *testing.T) {
+ defer GOMAXPROCS(GOMAXPROCS(1))
+ leaked := PageCachePagesLeaked()
+ if leaked != 0 {
+ t.Fatalf("found %d leaked pages in page caches", leaked)
+ }
+}
+
func TestPhysicalMemoryUtilization(t *testing.T) {
got := runTestProg(t, "testprog", "GCPhys")
want := "OK\n"
@@ -176,6 +184,19 @@
}
}
+func TestScavengedBitsCleared(t *testing.T) {
+ var mismatches [128]BitsMismatch
+ if n, ok := CheckScavengedBitsCleared(mismatches[:]); !ok {
+ t.Errorf("uncleared scavenged bits")
+ for _, m := range mismatches[:n] {
+ t.Logf("\t@ address 0x%x", m.Base)
+ t.Logf("\t| got: %064b", m.Got)
+ t.Logf("\t| want: %064b", m.Want)
+ }
+ t.FailNow()
+ }
+}
+
type acLink struct {
x [1 << 20]byte
}
@@ -183,14 +204,6 @@
var arenaCollisionSink []*acLink
func TestArenaCollision(t *testing.T) {
- if GOOS == "darwin" && race.Enabled {
- // Skip this test on Darwin in race mode because Darwin 10.10 has
- // issues following arena hints and runs out of them in race mode, so
- // MAP_FIXED is used to ensure we keep the heap in the memory region the
- // race detector expects.
- // TODO(mknyszek): Delete this when Darwin 10.10 is no longer supported.
- t.Skip("disabled on Darwin with race mode since MAP_FIXED is used")
- }
testenv.MustHaveExec(t)
// Test that mheap.sysAlloc handles collisions with other
diff --git a/src/runtime/map.go b/src/runtime/map.go
index 386f965..399c1b0 100644
--- a/src/runtime/map.go
+++ b/src/runtime/map.go
@@ -66,7 +66,7 @@
bucketCnt = 1 << bucketCntBits
// Maximum average load of a bucket that triggers growth is 6.5.
- // Represent as loadFactorNum/loadFactDen, to allow integer math.
+ // Represent as loadFactorNum/loadFactorDen, to allow integer math.
loadFactorNum = 13
loadFactorDen = 2
@@ -403,15 +403,14 @@
}
if h == nil || h.count == 0 {
if t.hashMightPanic() {
- t.key.alg.hash(key, 0) // see issue 23734
+ t.hasher(key, 0) // see issue 23734
}
return unsafe.Pointer(&zeroVal[0])
}
if h.flags&hashWriting != 0 {
throw("concurrent map read and map write")
}
- alg := t.key.alg
- hash := alg.hash(key, uintptr(h.hash0))
+ hash := t.hasher(key, uintptr(h.hash0))
m := bucketMask(h.B)
b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
@@ -438,7 +437,7 @@
if t.indirectkey() {
k = *((*unsafe.Pointer)(k))
}
- if alg.equal(key, k) {
+ if t.key.equal(key, k) {
e := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize))
if t.indirectelem() {
e = *((*unsafe.Pointer)(e))
@@ -462,15 +461,14 @@
}
if h == nil || h.count == 0 {
if t.hashMightPanic() {
- t.key.alg.hash(key, 0) // see issue 23734
+ t.hasher(key, 0) // see issue 23734
}
return unsafe.Pointer(&zeroVal[0]), false
}
if h.flags&hashWriting != 0 {
throw("concurrent map read and map write")
}
- alg := t.key.alg
- hash := alg.hash(key, uintptr(h.hash0))
+ hash := t.hasher(key, uintptr(h.hash0))
m := bucketMask(h.B)
b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
@@ -497,7 +495,7 @@
if t.indirectkey() {
k = *((*unsafe.Pointer)(k))
}
- if alg.equal(key, k) {
+ if t.key.equal(key, k) {
e := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize))
if t.indirectelem() {
e = *((*unsafe.Pointer)(e))
@@ -514,8 +512,7 @@
if h == nil || h.count == 0 {
return nil, nil
}
- alg := t.key.alg
- hash := alg.hash(key, uintptr(h.hash0))
+ hash := t.hasher(key, uintptr(h.hash0))
m := bucketMask(h.B)
b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
@@ -542,7 +539,7 @@
if t.indirectkey() {
k = *((*unsafe.Pointer)(k))
}
- if alg.equal(key, k) {
+ if t.key.equal(key, k) {
e := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize))
if t.indirectelem() {
e = *((*unsafe.Pointer)(e))
@@ -587,10 +584,9 @@
if h.flags&hashWriting != 0 {
throw("concurrent map writes")
}
- alg := t.key.alg
- hash := alg.hash(key, uintptr(h.hash0))
+ hash := t.hasher(key, uintptr(h.hash0))
- // Set hashWriting after calling alg.hash, since alg.hash may panic,
+ // Set hashWriting after calling t.hasher, since t.hasher may panic,
// in which case we have not actually done a write.
h.flags ^= hashWriting
@@ -627,7 +623,7 @@
if t.indirectkey() {
k = *((*unsafe.Pointer)(k))
}
- if !alg.equal(key, k) {
+ if !t.key.equal(key, k) {
continue
}
// already have a mapping for key. Update it.
@@ -698,7 +694,7 @@
}
if h == nil || h.count == 0 {
if t.hashMightPanic() {
- t.key.alg.hash(key, 0) // see issue 23734
+ t.hasher(key, 0) // see issue 23734
}
return
}
@@ -706,10 +702,9 @@
throw("concurrent map writes")
}
- alg := t.key.alg
- hash := alg.hash(key, uintptr(h.hash0))
+ hash := t.hasher(key, uintptr(h.hash0))
- // Set hashWriting after calling alg.hash, since alg.hash may panic,
+ // Set hashWriting after calling t.hasher, since t.hasher may panic,
// in which case we have not actually done a write (delete).
h.flags ^= hashWriting
@@ -734,7 +729,7 @@
if t.indirectkey() {
k2 = *((*unsafe.Pointer)(k2))
}
- if !alg.equal(key, k2) {
+ if !t.key.equal(key, k2) {
continue
}
// Only clear key if there are pointers in it.
@@ -862,7 +857,6 @@
b := it.bptr
i := it.i
checkBucket := it.checkBucket
- alg := t.key.alg
next:
if b == nil {
@@ -916,10 +910,10 @@
// through the oldbucket, skipping any keys that will go
// to the other new bucket (each oldbucket expands to two
// buckets during a grow).
- if t.reflexivekey() || alg.equal(k, k) {
+ if t.reflexivekey() || t.key.equal(k, k) {
// If the item in the oldbucket is not destined for
// the current new bucket in the iteration, skip it.
- hash := alg.hash(k, uintptr(h.hash0))
+ hash := t.hasher(k, uintptr(h.hash0))
if hash&bucketMask(it.B) != checkBucket {
continue
}
@@ -937,7 +931,7 @@
}
}
if (b.tophash[offi] != evacuatedX && b.tophash[offi] != evacuatedY) ||
- !(t.reflexivekey() || alg.equal(k, k)) {
+ !(t.reflexivekey() || t.key.equal(k, k)) {
// This is the golden data, we can return it.
// OR
// key!=key, so the entry can't be deleted or updated, so we can just return it.
@@ -1174,8 +1168,8 @@
if !h.sameSizeGrow() {
// Compute hash to make our evacuation decision (whether we need
// to send this key/elem to bucket x or bucket y).
- hash := t.key.alg.hash(k2, uintptr(h.hash0))
- if h.flags&iterator != 0 && !t.reflexivekey() && !t.key.alg.equal(k2, k2) {
+ hash := t.hasher(k2, uintptr(h.hash0))
+ if h.flags&iterator != 0 && !t.reflexivekey() && !t.key.equal(k2, k2) {
// If key != key (NaNs), then the hash could be (and probably
// will be) entirely different from the old hash. Moreover,
// it isn't reproducible. Reproducibility is required in the
@@ -1269,16 +1263,12 @@
}
}
-func ismapkey(t *_type) bool {
- return t.alg.hash != nil
-}
-
// Reflect stubs. Called from ../reflect/asm_*.s
//go:linkname reflect_makemap reflect.makemap
func reflect_makemap(t *maptype, cap int) *hmap {
// Check invariants and reflects math.
- if !ismapkey(t.key) {
+ if t.key.equal == nil {
throw("runtime.reflect_makemap: unsupported map key type")
}
if t.key.size > maxKeySize && (!t.indirectkey() || t.keysize != uint8(sys.PtrSize)) ||
@@ -1381,10 +1371,5 @@
return h.count
}
-//go:linkname reflect_ismapkey reflect.ismapkey
-func reflect_ismapkey(t *_type) bool {
- return ismapkey(t)
-}
-
-const maxZero = 1024 // must match value in cmd/compile/internal/gc/walk.go
+const maxZero = 1024 // must match value in cmd/compile/internal/gc/walk.go:zeroValSize
var zeroVal [maxZero]byte
diff --git a/src/runtime/map_benchmark_test.go b/src/runtime/map_benchmark_test.go
index d37dadc..893cb6c 100644
--- a/src/runtime/map_benchmark_test.go
+++ b/src/runtime/map_benchmark_test.go
@@ -251,7 +251,7 @@
}
func BenchmarkMapCycle(b *testing.B) {
- // Arrange map entries to be a permuation, so that
+ // Arrange map entries to be a permutation, so that
// we hit all entries, and one lookup is data dependent
// on the previous lookup.
const N = 3127
@@ -483,3 +483,52 @@
})
}
}
+
+var BoolSink bool
+
+func BenchmarkMapInterfaceString(b *testing.B) {
+ m := map[interface{}]bool{}
+
+ for i := 0; i < 100; i++ {
+ m[fmt.Sprintf("%d", i)] = true
+ }
+
+ key := (interface{})("A")
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ BoolSink = m[key]
+ }
+}
+func BenchmarkMapInterfacePtr(b *testing.B) {
+ m := map[interface{}]bool{}
+
+ for i := 0; i < 100; i++ {
+ i := i
+ m[&i] = true
+ }
+
+ key := new(int)
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ BoolSink = m[key]
+ }
+}
+
+var (
+ hintLessThan8 = 7
+ hintGreaterThan8 = 32
+)
+
+func BenchmarkNewEmptyMapHintLessThan8(b *testing.B) {
+ b.ReportAllocs()
+ for i := 0; i < b.N; i++ {
+ _ = make(map[int]int, hintLessThan8)
+ }
+}
+
+func BenchmarkNewEmptyMapHintGreaterThan8(b *testing.B) {
+ b.ReportAllocs()
+ for i := 0; i < b.N; i++ {
+ _ = make(map[int]int, hintGreaterThan8)
+ }
+}
diff --git a/src/runtime/map_fast32.go b/src/runtime/map_fast32.go
index 0ab75ca..534454f 100644
--- a/src/runtime/map_fast32.go
+++ b/src/runtime/map_fast32.go
@@ -25,7 +25,7 @@
// One-bucket table. No need to hash.
b = (*bmap)(h.buckets)
} else {
- hash := t.key.alg.hash(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+ hash := t.hasher(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
m := bucketMask(h.B)
b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
@@ -65,7 +65,7 @@
// One-bucket table. No need to hash.
b = (*bmap)(h.buckets)
} else {
- hash := t.key.alg.hash(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+ hash := t.hasher(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
m := bucketMask(h.B)
b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
@@ -100,9 +100,9 @@
if h.flags&hashWriting != 0 {
throw("concurrent map writes")
}
- hash := t.key.alg.hash(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+ hash := t.hasher(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
- // Set hashWriting after calling alg.hash for consistency with mapassign.
+ // Set hashWriting after calling t.hasher for consistency with mapassign.
h.flags ^= hashWriting
if h.buckets == nil {
@@ -190,9 +190,9 @@
if h.flags&hashWriting != 0 {
throw("concurrent map writes")
}
- hash := t.key.alg.hash(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+ hash := t.hasher(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
- // Set hashWriting after calling alg.hash for consistency with mapassign.
+ // Set hashWriting after calling t.hasher for consistency with mapassign.
h.flags ^= hashWriting
if h.buckets == nil {
@@ -281,9 +281,9 @@
throw("concurrent map writes")
}
- hash := t.key.alg.hash(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+ hash := t.hasher(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
- // Set hashWriting after calling alg.hash for consistency with mapdelete
+ // Set hashWriting after calling t.hasher for consistency with mapdelete
h.flags ^= hashWriting
bucket := hash & bucketMask(h.B)
@@ -400,7 +400,7 @@
if !h.sameSizeGrow() {
// Compute hash to make our evacuation decision (whether we need
// to send this key/elem to bucket x or bucket y).
- hash := t.key.alg.hash(k, uintptr(h.hash0))
+ hash := t.hasher(k, uintptr(h.hash0))
if hash&newbit != 0 {
useY = 1
}
diff --git a/src/runtime/map_fast64.go b/src/runtime/map_fast64.go
index 4d420e7..1669c7c 100644
--- a/src/runtime/map_fast64.go
+++ b/src/runtime/map_fast64.go
@@ -25,7 +25,7 @@
// One-bucket table. No need to hash.
b = (*bmap)(h.buckets)
} else {
- hash := t.key.alg.hash(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+ hash := t.hasher(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
m := bucketMask(h.B)
b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
@@ -65,7 +65,7 @@
// One-bucket table. No need to hash.
b = (*bmap)(h.buckets)
} else {
- hash := t.key.alg.hash(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+ hash := t.hasher(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
m := bucketMask(h.B)
b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
@@ -100,9 +100,9 @@
if h.flags&hashWriting != 0 {
throw("concurrent map writes")
}
- hash := t.key.alg.hash(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+ hash := t.hasher(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
- // Set hashWriting after calling alg.hash for consistency with mapassign.
+ // Set hashWriting after calling t.hasher for consistency with mapassign.
h.flags ^= hashWriting
if h.buckets == nil {
@@ -190,9 +190,9 @@
if h.flags&hashWriting != 0 {
throw("concurrent map writes")
}
- hash := t.key.alg.hash(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+ hash := t.hasher(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
- // Set hashWriting after calling alg.hash for consistency with mapassign.
+ // Set hashWriting after calling t.hasher for consistency with mapassign.
h.flags ^= hashWriting
if h.buckets == nil {
@@ -281,9 +281,9 @@
throw("concurrent map writes")
}
- hash := t.key.alg.hash(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+ hash := t.hasher(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
- // Set hashWriting after calling alg.hash for consistency with mapdelete
+ // Set hashWriting after calling t.hasher for consistency with mapdelete
h.flags ^= hashWriting
bucket := hash & bucketMask(h.B)
@@ -400,7 +400,7 @@
if !h.sameSizeGrow() {
// Compute hash to make our evacuation decision (whether we need
// to send this key/elem to bucket x or bucket y).
- hash := t.key.alg.hash(k, uintptr(h.hash0))
+ hash := t.hasher(k, uintptr(h.hash0))
if hash&newbit != 0 {
useY = 1
}
diff --git a/src/runtime/map_faststr.go b/src/runtime/map_faststr.go
index 069994f..069cda6 100644
--- a/src/runtime/map_faststr.go
+++ b/src/runtime/map_faststr.go
@@ -76,7 +76,7 @@
return unsafe.Pointer(&zeroVal[0])
}
dohash:
- hash := t.key.alg.hash(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
+ hash := t.hasher(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
m := bucketMask(h.B)
b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
@@ -171,7 +171,7 @@
return unsafe.Pointer(&zeroVal[0]), false
}
dohash:
- hash := t.key.alg.hash(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
+ hash := t.hasher(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
m := bucketMask(h.B)
b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
@@ -211,9 +211,9 @@
throw("concurrent map writes")
}
key := stringStructOf(&s)
- hash := t.key.alg.hash(noescape(unsafe.Pointer(&s)), uintptr(h.hash0))
+ hash := t.hasher(noescape(unsafe.Pointer(&s)), uintptr(h.hash0))
- // Set hashWriting after calling alg.hash for consistency with mapassign.
+ // Set hashWriting after calling t.hasher for consistency with mapassign.
h.flags ^= hashWriting
if h.buckets == nil {
@@ -307,9 +307,9 @@
}
key := stringStructOf(&ky)
- hash := t.key.alg.hash(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
+ hash := t.hasher(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
- // Set hashWriting after calling alg.hash for consistency with mapdelete
+ // Set hashWriting after calling t.hasher for consistency with mapdelete
h.flags ^= hashWriting
bucket := hash & bucketMask(h.B)
@@ -429,7 +429,7 @@
if !h.sameSizeGrow() {
// Compute hash to make our evacuation decision (whether we need
// to send this key/elem to bucket x or bucket y).
- hash := t.key.alg.hash(k, uintptr(h.hash0))
+ hash := t.hasher(k, uintptr(h.hash0))
if hash&newbit != 0 {
useY = 1
}
diff --git a/src/runtime/map_test.go b/src/runtime/map_test.go
index ee9468d..1b7ccad 100644
--- a/src/runtime/map_test.go
+++ b/src/runtime/map_test.go
@@ -1156,3 +1156,64 @@
}
runtime.MapTombstoneCheck(m)
}
+
+type canString int
+
+func (c canString) String() string {
+ return fmt.Sprintf("%d", int(c))
+}
+
+func TestMapInterfaceKey(t *testing.T) {
+ // Test all the special cases in runtime.typehash.
+ type GrabBag struct {
+ f32 float32
+ f64 float64
+ c64 complex64
+ c128 complex128
+ s string
+ i0 interface{}
+ i1 interface {
+ String() string
+ }
+ a [4]string
+ }
+
+ m := map[interface{}]bool{}
+ // Put a bunch of data in m, so that a bad hash is likely to
+ // lead to a bad bucket, which will lead to a missed lookup.
+ for i := 0; i < 1000; i++ {
+ m[i] = true
+ }
+ m[GrabBag{f32: 1.0}] = true
+ if !m[GrabBag{f32: 1.0}] {
+ panic("f32 not found")
+ }
+ m[GrabBag{f64: 1.0}] = true
+ if !m[GrabBag{f64: 1.0}] {
+ panic("f64 not found")
+ }
+ m[GrabBag{c64: 1.0i}] = true
+ if !m[GrabBag{c64: 1.0i}] {
+ panic("c64 not found")
+ }
+ m[GrabBag{c128: 1.0i}] = true
+ if !m[GrabBag{c128: 1.0i}] {
+ panic("c128 not found")
+ }
+ m[GrabBag{s: "foo"}] = true
+ if !m[GrabBag{s: "foo"}] {
+ panic("string not found")
+ }
+ m[GrabBag{i0: "foo"}] = true
+ if !m[GrabBag{i0: "foo"}] {
+ panic("interface{} not found")
+ }
+ m[GrabBag{i1: canString(5)}] = true
+ if !m[GrabBag{i1: canString(5)}] {
+ panic("interface{String() string} not found")
+ }
+ m[GrabBag{a: [4]string{"foo", "bar", "baz", "bop"}}] = true
+ if !m[GrabBag{a: [4]string{"foo", "bar", "baz", "bop"}}] {
+ panic("array not found")
+ }
+}
diff --git a/src/runtime/mbarrier.go b/src/runtime/mbarrier.go
index df3ab6f..f7875d3 100644
--- a/src/runtime/mbarrier.go
+++ b/src/runtime/mbarrier.go
@@ -157,8 +157,8 @@
if dst == src {
return
}
- if typ.ptrdata != 0 {
- bulkBarrierPreWrite(uintptr(dst), uintptr(src), typ.size)
+ if writeBarrier.needed && typ.ptrdata != 0 {
+ bulkBarrierPreWrite(uintptr(dst), uintptr(src), typ.ptrdata)
}
// There's a race here: if some other goroutine can write to
// src, it may change some pointer in src after we've
@@ -193,17 +193,18 @@
// typedmemmovepartial is like typedmemmove but assumes that
// dst and src point off bytes into the value and only copies size bytes.
+// off must be a multiple of sys.PtrSize.
//go:linkname reflect_typedmemmovepartial reflect.typedmemmovepartial
func reflect_typedmemmovepartial(typ *_type, dst, src unsafe.Pointer, off, size uintptr) {
- if writeBarrier.needed && typ.ptrdata != 0 && size >= sys.PtrSize {
- // Pointer-align start address for bulk barrier.
- adst, asrc, asize := dst, src, size
- if frag := -off & (sys.PtrSize - 1); frag != 0 {
- adst = add(dst, frag)
- asrc = add(src, frag)
- asize -= frag
+ if writeBarrier.needed && typ.ptrdata > off && size >= sys.PtrSize {
+ if off&(sys.PtrSize-1) != 0 {
+ panic("reflect: internal error: misaligned offset")
}
- bulkBarrierPreWrite(uintptr(adst), uintptr(asrc), asize&^(sys.PtrSize-1))
+ pwsize := alignDown(size, sys.PtrSize)
+ if poff := typ.ptrdata - off; pwsize > poff {
+ pwsize = poff
+ }
+ bulkBarrierPreWrite(uintptr(dst), uintptr(src), pwsize)
}
memmove(dst, src, size)
@@ -230,16 +231,14 @@
}
//go:nosplit
-func typedslicecopy(typ *_type, dst, src slice) int {
- n := dst.len
- if n > src.len {
- n = src.len
+func typedslicecopy(typ *_type, dstPtr unsafe.Pointer, dstLen int, srcPtr unsafe.Pointer, srcLen int) int {
+ n := dstLen
+ if n > srcLen {
+ n = srcLen
}
if n == 0 {
return 0
}
- dstp := dst.array
- srcp := src.array
// The compiler emits calls to typedslicecopy before
// instrumentation runs, so unlike the other copying and
@@ -248,19 +247,19 @@
if raceenabled {
callerpc := getcallerpc()
pc := funcPC(slicecopy)
- racewriterangepc(dstp, uintptr(n)*typ.size, callerpc, pc)
- racereadrangepc(srcp, uintptr(n)*typ.size, callerpc, pc)
+ racewriterangepc(dstPtr, uintptr(n)*typ.size, callerpc, pc)
+ racereadrangepc(srcPtr, uintptr(n)*typ.size, callerpc, pc)
}
if msanenabled {
- msanwrite(dstp, uintptr(n)*typ.size)
- msanread(srcp, uintptr(n)*typ.size)
+ msanwrite(dstPtr, uintptr(n)*typ.size)
+ msanread(srcPtr, uintptr(n)*typ.size)
}
if writeBarrier.cgo {
- cgoCheckSliceCopy(typ, dst, src, n)
+ cgoCheckSliceCopy(typ, dstPtr, srcPtr, n)
}
- if dstp == srcp {
+ if dstPtr == srcPtr {
return n
}
@@ -270,11 +269,12 @@
// before calling typedslicecopy.
size := uintptr(n) * typ.size
if writeBarrier.needed {
- bulkBarrierPreWrite(uintptr(dstp), uintptr(srcp), size)
+ pwsize := size - typ.size + typ.ptrdata
+ bulkBarrierPreWrite(uintptr(dstPtr), uintptr(srcPtr), pwsize)
}
// See typedmemmove for a discussion of the race between the
// barrier and memmove.
- memmove(dstp, srcp, size)
+ memmove(dstPtr, srcPtr, size)
return n
}
@@ -304,7 +304,7 @@
memmove(dst.array, src.array, size)
return n
}
- return typedslicecopy(elemType, dst, src)
+ return typedslicecopy(elemType, dst.array, dst.len, src.array, src.len)
}
// typedmemclr clears the typed memory at ptr with type typ. The
@@ -317,8 +317,8 @@
//
//go:nosplit
func typedmemclr(typ *_type, ptr unsafe.Pointer) {
- if typ.ptrdata != 0 {
- bulkBarrierPreWrite(uintptr(ptr), 0, typ.size)
+ if writeBarrier.needed && typ.ptrdata != 0 {
+ bulkBarrierPreWrite(uintptr(ptr), 0, typ.ptrdata)
}
memclrNoHeapPointers(ptr, typ.size)
}
@@ -330,7 +330,7 @@
//go:linkname reflect_typedmemclrpartial reflect.typedmemclrpartial
func reflect_typedmemclrpartial(typ *_type, ptr unsafe.Pointer, off, size uintptr) {
- if typ.ptrdata != 0 {
+ if writeBarrier.needed && typ.ptrdata != 0 {
bulkBarrierPreWrite(uintptr(ptr), 0, size)
}
memclrNoHeapPointers(ptr, size)
diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go
index 30ec5f1..35332c9 100644
--- a/src/runtime/mbitmap.go
+++ b/src/runtime/mbitmap.go
@@ -243,6 +243,10 @@
}
// isFree reports whether the index'th object in s is unallocated.
+//
+// The caller must ensure s.state is mSpanInUse, and there must have
+// been no preemption points since ensuring this (which could allow a
+// GC transition, which would allow the state to change).
func (s *mspan) isFree(index uintptr) bool {
if index < s.freeindex {
return false
@@ -349,6 +353,33 @@
return
}
+// badPointer throws bad pointer in heap panic.
+func badPointer(s *mspan, p, refBase, refOff uintptr) {
+ // Typically this indicates an incorrect use
+ // of unsafe or cgo to store a bad pointer in
+ // the Go heap. It may also indicate a runtime
+ // bug.
+ //
+ // TODO(austin): We could be more aggressive
+ // and detect pointers to unallocated objects
+ // in allocated spans.
+ printlock()
+ print("runtime: pointer ", hex(p))
+ state := s.state.get()
+ if state != mSpanInUse {
+ print(" to unallocated span")
+ } else {
+ print(" to unused region of span")
+ }
+ print(" span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", state, "\n")
+ if refBase != 0 {
+ print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n")
+ gcDumpObject("object", refBase, refOff)
+ }
+ getg().m.traceback = 2
+ throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)")
+}
+
// findObject returns the base address for the heap object containing
// the address p, the object's span, and the index of the object in s.
// If p does not point into a heap object, it returns base == 0.
@@ -359,42 +390,30 @@
// refBase and refOff optionally give the base address of the object
// in which the pointer p was found and the byte offset at which it
// was found. These are used for error reporting.
+//
+// It is nosplit so it is safe for p to be a pointer to the current goroutine's stack.
+// Since p is a uintptr, it would not be adjusted if the stack were to move.
+//go:nosplit
func findObject(p, refBase, refOff uintptr) (base uintptr, s *mspan, objIndex uintptr) {
s = spanOf(p)
+ // If s is nil, the virtual address has never been part of the heap.
+ // This pointer may be to some mmap'd region, so we allow it.
+ if s == nil {
+ return
+ }
// If p is a bad pointer, it may not be in s's bounds.
- if s == nil || p < s.base() || p >= s.limit || s.state != mSpanInUse {
- if s == nil || s.state == mSpanManual {
- // If s is nil, the virtual address has never been part of the heap.
- // This pointer may be to some mmap'd region, so we allow it.
- // Pointers into stacks are also ok, the runtime manages these explicitly.
+ //
+ // Check s.state to synchronize with span initialization
+ // before checking other fields. See also spanOfHeap.
+ if state := s.state.get(); state != mSpanInUse || p < s.base() || p >= s.limit {
+ // Pointers into stacks are also ok, the runtime manages these explicitly.
+ if state == mSpanManual {
return
}
-
// The following ensures that we are rigorous about what data
// structures hold valid pointers.
if debug.invalidptr != 0 {
- // Typically this indicates an incorrect use
- // of unsafe or cgo to store a bad pointer in
- // the Go heap. It may also indicate a runtime
- // bug.
- //
- // TODO(austin): We could be more aggressive
- // and detect pointers to unallocated objects
- // in allocated spans.
- printlock()
- print("runtime: pointer ", hex(p))
- if s.state != mSpanInUse {
- print(" to unallocated span")
- } else {
- print(" to unused region of span")
- }
- print(" span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", s.state, "\n")
- if refBase != 0 {
- print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n")
- gcDumpObject("object", refBase, refOff)
- }
- getg().m.traceback = 2
- throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)")
+ badPointer(s, p, refBase, refOff)
}
return
}
@@ -609,7 +628,7 @@
}
}
return
- } else if s.state != mSpanInUse || dst < s.base() || s.limit <= dst {
+ } else if s.state.get() != mSpanInUse || dst < s.base() || s.limit <= dst {
// dst was heap memory at some point, but isn't now.
// It can't be a global. It must be either our stack,
// or in the case of direct channel sends, it could be
@@ -781,29 +800,19 @@
// words to pointer/scan.
// Otherwise, it initializes all words to scalar/dead.
func (h heapBits) initSpan(s *mspan) {
- size, n, total := s.layout()
-
- // Init the markbit structures
- s.freeindex = 0
- s.allocCache = ^uint64(0) // all 1s indicating all free.
- s.nelems = n
- s.allocBits = nil
- s.gcmarkBits = nil
- s.gcmarkBits = newMarkBits(s.nelems)
- s.allocBits = newAllocBits(s.nelems)
-
// Clear bits corresponding to objects.
- nw := total / sys.PtrSize
+ nw := (s.npages << _PageShift) / sys.PtrSize
if nw%wordsPerBitmapByte != 0 {
throw("initSpan: unaligned length")
}
if h.shift != 0 {
throw("initSpan: unaligned base")
}
+ isPtrs := sys.PtrSize == 8 && s.elemsize == sys.PtrSize
for nw > 0 {
hNext, anw := h.forwardOrBoundary(nw)
nbyte := anw / wordsPerBitmapByte
- if sys.PtrSize == 8 && size == sys.PtrSize {
+ if isPtrs {
bitp := h.bitp
for i := uintptr(0); i < nbyte; i++ {
*bitp = bitPointerAll | bitScanAll
@@ -856,58 +865,22 @@
}
}
-// oneBitCount is indexed by byte and produces the
-// number of 1 bits in that byte. For example 128 has 1 bit set
-// and oneBitCount[128] will holds 1.
-var oneBitCount = [256]uint8{
- 0, 1, 1, 2, 1, 2, 2, 3,
- 1, 2, 2, 3, 2, 3, 3, 4,
- 1, 2, 2, 3, 2, 3, 3, 4,
- 2, 3, 3, 4, 3, 4, 4, 5,
- 1, 2, 2, 3, 2, 3, 3, 4,
- 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5,
- 3, 4, 4, 5, 4, 5, 5, 6,
- 1, 2, 2, 3, 2, 3, 3, 4,
- 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5,
- 3, 4, 4, 5, 4, 5, 5, 6,
- 2, 3, 3, 4, 3, 4, 4, 5,
- 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6,
- 4, 5, 5, 6, 5, 6, 6, 7,
- 1, 2, 2, 3, 2, 3, 3, 4,
- 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5,
- 3, 4, 4, 5, 4, 5, 5, 6,
- 2, 3, 3, 4, 3, 4, 4, 5,
- 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6,
- 4, 5, 5, 6, 5, 6, 6, 7,
- 2, 3, 3, 4, 3, 4, 4, 5,
- 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6,
- 4, 5, 5, 6, 5, 6, 6, 7,
- 3, 4, 4, 5, 4, 5, 5, 6,
- 4, 5, 5, 6, 5, 6, 6, 7,
- 4, 5, 5, 6, 5, 6, 6, 7,
- 5, 6, 6, 7, 6, 7, 7, 8}
-
// countAlloc returns the number of objects allocated in span s by
// scanning the allocation bitmap.
-// TODO:(rlh) Use popcount intrinsic.
func (s *mspan) countAlloc() int {
count := 0
- maxIndex := s.nelems / 8
- for i := uintptr(0); i < maxIndex; i++ {
- mrkBits := *s.gcmarkBits.bytep(i)
- count += int(oneBitCount[mrkBits])
- }
- if bitsInLastByte := s.nelems % 8; bitsInLastByte != 0 {
- mrkBits := *s.gcmarkBits.bytep(maxIndex)
- mask := uint8((1 << bitsInLastByte) - 1)
- bits := mrkBits & mask
- count += int(oneBitCount[bits])
+ bytes := divRoundUp(s.nelems, 8)
+ // Iterate over each 8-byte chunk and count allocations
+ // with an intrinsic. Note that newMarkBits guarantees that
+ // gcmarkBits will be 8-byte aligned, so we don't have to
+ // worry about edge cases, irrelevant bits will simply be zero.
+ for i := uintptr(0); i < bytes; i += 8 {
+ // Extract 64 bits from the byte pointer and get a OnesCount.
+ // Note that the unsafe cast here doesn't preserve endianness,
+ // but that's OK. We only care about how many bits are 1, not
+ // about the order we discover them in.
+ mrkBits := *(*uint64)(unsafe.Pointer(s.gcmarkBits.bytep(i)))
+ count += sys.OnesCount64(mrkBits)
}
return count
}
@@ -1912,7 +1885,11 @@
// The bitmask starts at s.startAddr.
// The result must be deallocated with dematerializeGCProg.
func materializeGCProg(ptrdata uintptr, prog *byte) *mspan {
- s := mheap_.allocManual((ptrdata/(8*sys.PtrSize)+pageSize-1)/pageSize, &memstats.gc_sys)
+ // Each word of ptrdata needs one bit in the bitmap.
+ bitmapBytes := divRoundUp(ptrdata, 8*sys.PtrSize)
+ // Compute the number of pages needed for bitmapBytes.
+ pages := divRoundUp(bitmapBytes, pageSize)
+ s := mheap_.allocManual(pages, &memstats.gc_sys)
runGCProg(addb(prog, 4), nil, (*byte)(unsafe.Pointer(s.startAddr)), 1)
return s
}
diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go
index d4fa9a0..5bceb51 100644
--- a/src/runtime/mcache.go
+++ b/src/runtime/mcache.go
@@ -131,7 +131,11 @@
if s.sweepgen != mheap_.sweepgen+3 {
throw("bad sweepgen in refill")
}
- atomic.Store(&s.sweepgen, mheap_.sweepgen)
+ if go115NewMCentralImpl {
+ mheap_.central[spc].mcentral.uncacheSpan(s)
+ } else {
+ atomic.Store(&s.sweepgen, mheap_.sweepgen)
+ }
}
// Get a new cached span from the central lists.
diff --git a/src/runtime/mcentral.go b/src/runtime/mcentral.go
index cd59010..ed49d86 100644
--- a/src/runtime/mcentral.go
+++ b/src/runtime/mcentral.go
@@ -20,8 +20,31 @@
type mcentral struct {
lock mutex
spanclass spanClass
- nonempty mSpanList // list of spans with a free object, ie a nonempty free list
- empty mSpanList // list of spans with no free objects (or cached in an mcache)
+
+ // For !go115NewMCentralImpl.
+ nonempty mSpanList // list of spans with a free object, ie a nonempty free list
+ empty mSpanList // list of spans with no free objects (or cached in an mcache)
+
+ // partial and full contain two mspan sets: one of swept in-use
+ // spans, and one of unswept in-use spans. These two trade
+ // roles on each GC cycle. The unswept set is drained either by
+ // allocation or by the background sweeper in every GC cycle,
+ // so only two roles are necessary.
+ //
+ // sweepgen is increased by 2 on each GC cycle, so the swept
+ // spans are in partial[sweepgen/2%2] and the unswept spans are in
+ // partial[1-sweepgen/2%2]. Sweeping pops spans from the
+ // unswept set and pushes spans that are still in-use on the
+ // swept set. Likewise, allocating an in-use span pushes it
+ // on the swept set.
+ //
+ // Some parts of the sweeper can sweep arbitrary spans, and hence
+ // can't remove them from the unswept set, but will add the span
+ // to the appropriate swept list. As a result, the parts of the
+ // sweeper and mcentral that do consume from the unswept list may
+ // encounter swept spans, and these should be ignored.
+ partial [2]spanSet // list of spans with a free object
+ full [2]spanSet // list of spans with no free objects
// nmalloc is the cumulative count of objects allocated from
// this mcentral, assuming all spans in mcaches are
@@ -32,12 +55,168 @@
// Initialize a single central free list.
func (c *mcentral) init(spc spanClass) {
c.spanclass = spc
- c.nonempty.init()
- c.empty.init()
+ if go115NewMCentralImpl {
+ lockInit(&c.partial[0].spineLock, lockRankSpanSetSpine)
+ lockInit(&c.partial[1].spineLock, lockRankSpanSetSpine)
+ lockInit(&c.full[0].spineLock, lockRankSpanSetSpine)
+ lockInit(&c.full[1].spineLock, lockRankSpanSetSpine)
+ } else {
+ c.nonempty.init()
+ c.empty.init()
+ lockInit(&c.lock, lockRankMcentral)
+ }
+}
+
+// partialUnswept returns the spanSet which holds partially-filled
+// unswept spans for this sweepgen.
+func (c *mcentral) partialUnswept(sweepgen uint32) *spanSet {
+ return &c.partial[1-sweepgen/2%2]
+}
+
+// partialSwept returns the spanSet which holds partially-filled
+// swept spans for this sweepgen.
+func (c *mcentral) partialSwept(sweepgen uint32) *spanSet {
+ return &c.partial[sweepgen/2%2]
+}
+
+// fullUnswept returns the spanSet which holds unswept spans without any
+// free slots for this sweepgen.
+func (c *mcentral) fullUnswept(sweepgen uint32) *spanSet {
+ return &c.full[1-sweepgen/2%2]
+}
+
+// fullSwept returns the spanSet which holds swept spans without any
+// free slots for this sweepgen.
+func (c *mcentral) fullSwept(sweepgen uint32) *spanSet {
+ return &c.full[sweepgen/2%2]
}
// Allocate a span to use in an mcache.
func (c *mcentral) cacheSpan() *mspan {
+ if !go115NewMCentralImpl {
+ return c.oldCacheSpan()
+ }
+ // Deduct credit for this span allocation and sweep if necessary.
+ spanBytes := uintptr(class_to_allocnpages[c.spanclass.sizeclass()]) * _PageSize
+ deductSweepCredit(spanBytes, 0)
+
+ sg := mheap_.sweepgen
+
+ traceDone := false
+ if trace.enabled {
+ traceGCSweepStart()
+ }
+
+ // If we sweep spanBudget spans without finding any free
+ // space, just allocate a fresh span. This limits the amount
+ // of time we can spend trying to find free space and
+ // amortizes the cost of small object sweeping over the
+ // benefit of having a full free span to allocate from. By
+ // setting this to 100, we limit the space overhead to 1%.
+ //
+ // TODO(austin,mknyszek): This still has bad worst-case
+ // throughput. For example, this could find just one free slot
+ // on the 100th swept span. That limits allocation latency, but
+ // still has very poor throughput. We could instead keep a
+ // running free-to-used budget and switch to fresh span
+ // allocation if the budget runs low.
+ spanBudget := 100
+
+ var s *mspan
+
+ // Try partial swept spans first.
+ if s = c.partialSwept(sg).pop(); s != nil {
+ goto havespan
+ }
+
+ // Now try partial unswept spans.
+ for ; spanBudget >= 0; spanBudget-- {
+ s = c.partialUnswept(sg).pop()
+ if s == nil {
+ break
+ }
+ if atomic.Load(&s.sweepgen) == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) {
+ // We got ownership of the span, so let's sweep it and use it.
+ s.sweep(true)
+ goto havespan
+ }
+ // We failed to get ownership of the span, which means it's being or
+ // has been swept by an asynchronous sweeper that just couldn't remove it
+ // from the unswept list. That sweeper took ownership of the span and
+ // responsibility for either freeing it to the heap or putting it on the
+ // right swept list. Either way, we should just ignore it (and it's unsafe
+ // for us to do anything else).
+ }
+ // Now try full unswept spans, sweeping them and putting them into the
+ // right list if we fail to get a span.
+ for ; spanBudget >= 0; spanBudget-- {
+ s = c.fullUnswept(sg).pop()
+ if s == nil {
+ break
+ }
+ if atomic.Load(&s.sweepgen) == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) {
+ // We got ownership of the span, so let's sweep it.
+ s.sweep(true)
+ // Check if there's any free space.
+ freeIndex := s.nextFreeIndex()
+ if freeIndex != s.nelems {
+ s.freeindex = freeIndex
+ goto havespan
+ }
+ // Add it to the swept list, because sweeping didn't give us any free space.
+ c.fullSwept(sg).push(s)
+ }
+ // See comment for partial unswept spans.
+ }
+ if trace.enabled {
+ traceGCSweepDone()
+ traceDone = true
+ }
+
+ // We failed to get a span from the mcentral so get one from mheap.
+ s = c.grow()
+ if s == nil {
+ return nil
+ }
+
+ // At this point s is a span that should have free slots.
+havespan:
+ if trace.enabled && !traceDone {
+ traceGCSweepDone()
+ }
+ n := int(s.nelems) - int(s.allocCount)
+ if n == 0 || s.freeindex == s.nelems || uintptr(s.allocCount) == s.nelems {
+ throw("span has no free objects")
+ }
+ // Assume all objects from this span will be allocated in the
+ // mcache. If it gets uncached, we'll adjust this.
+ atomic.Xadd64(&c.nmalloc, int64(n))
+ usedBytes := uintptr(s.allocCount) * s.elemsize
+ atomic.Xadd64(&memstats.heap_live, int64(spanBytes)-int64(usedBytes))
+ if trace.enabled {
+ // heap_live changed.
+ traceHeapAlloc()
+ }
+ if gcBlackenEnabled != 0 {
+ // heap_live changed.
+ gcController.revise()
+ }
+ freeByteBase := s.freeindex &^ (64 - 1)
+ whichByte := freeByteBase / 8
+ // Init alloc bits cache.
+ s.refillAllocCache(whichByte)
+
+ // Adjust the allocCache so that s.freeindex corresponds to the low bit in
+ // s.allocCache.
+ s.allocCache >>= s.freeindex % 64
+
+ return s
+}
+
+// Allocate a span to use in an mcache.
+//
+// For !go115NewMCentralImpl.
+func (c *mcentral) oldCacheSpan() *mspan {
// Deduct credit for this span allocation and sweep if necessary.
spanBytes := uintptr(class_to_allocnpages[c.spanclass.sizeclass()]) * _PageSize
deductSweepCredit(spanBytes, 0)
@@ -147,7 +326,77 @@
}
// Return span from an mcache.
+//
+// s must have a span class corresponding to this
+// mcentral and it must not be empty.
func (c *mcentral) uncacheSpan(s *mspan) {
+ if !go115NewMCentralImpl {
+ c.oldUncacheSpan(s)
+ return
+ }
+ if s.allocCount == 0 {
+ throw("uncaching span but s.allocCount == 0")
+ }
+
+ sg := mheap_.sweepgen
+ stale := s.sweepgen == sg+1
+
+ // Fix up sweepgen.
+ if stale {
+ // Span was cached before sweep began. It's our
+ // responsibility to sweep it.
+ //
+ // Set sweepgen to indicate it's not cached but needs
+ // sweeping and can't be allocated from. sweep will
+ // set s.sweepgen to indicate s is swept.
+ atomic.Store(&s.sweepgen, sg-1)
+ } else {
+ // Indicate that s is no longer cached.
+ atomic.Store(&s.sweepgen, sg)
+ }
+ n := int(s.nelems) - int(s.allocCount)
+
+ // Fix up statistics.
+ if n > 0 {
+ // cacheSpan updated alloc assuming all objects on s
+ // were going to be allocated. Adjust for any that
+ // weren't. We must do this before potentially
+ // sweeping the span.
+ atomic.Xadd64(&c.nmalloc, -int64(n))
+
+ if !stale {
+ // (*mcentral).cacheSpan conservatively counted
+ // unallocated slots in heap_live. Undo this.
+ //
+ // If this span was cached before sweep, then
+ // heap_live was totally recomputed since
+ // caching this span, so we don't do this for
+ // stale spans.
+ atomic.Xadd64(&memstats.heap_live, -int64(n)*int64(s.elemsize))
+ }
+ }
+
+ // Put the span in the appropriate place.
+ if stale {
+ // It's stale, so just sweep it. Sweeping will put it on
+ // the right list.
+ s.sweep(false)
+ } else {
+ if n > 0 {
+ // Put it back on the partial swept list.
+ c.partialSwept(sg).push(s)
+ } else {
+ // There's no free space and it's not stale, so put it on the
+ // full swept list.
+ c.fullSwept(sg).push(s)
+ }
+ }
+}
+
+// Return span from an mcache.
+//
+// For !go115NewMCentralImpl.
+func (c *mcentral) oldUncacheSpan(s *mspan) {
if s.allocCount == 0 {
throw("uncaching span but s.allocCount == 0")
}
@@ -206,6 +455,8 @@
// freeSpan reports whether s was returned to the heap.
// If preserve=true, it does not move s (the caller
// must take care of it).
+//
+// For !go115NewMCentralImpl.
func (c *mcentral) freeSpan(s *mspan, preserve bool, wasempty bool) bool {
if sg := mheap_.sweepgen; s.sweepgen == sg+1 || s.sweepgen == sg+3 {
throw("freeSpan given cached span")
@@ -243,7 +494,7 @@
c.nonempty.remove(s)
unlock(&c.lock)
- mheap_.freeSpan(s, false)
+ mheap_.freeSpan(s)
return true
}
@@ -252,7 +503,7 @@
npages := uintptr(class_to_allocnpages[c.spanclass.sizeclass()])
size := uintptr(class_to_size[c.spanclass.sizeclass()])
- s := mheap_.alloc(npages, c.spanclass, false, true)
+ s := mheap_.alloc(npages, c.spanclass, true)
if s == nil {
return nil
}
diff --git a/src/runtime/mem_aix.go b/src/runtime/mem_aix.go
index eeebfa7..7e145b0 100644
--- a/src/runtime/mem_aix.go
+++ b/src/runtime/mem_aix.go
@@ -63,14 +63,15 @@
mSysStatInc(sysStat, n)
// AIX does not allow mapping a range that is already mapped.
- // So always unmap first even if it is already unmapped.
- munmap(v, n)
- p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
-
+ // So, call mprotect to change permissions.
+ // Note that sysMap is always called with a non-nil pointer
+ // since it transitions a Reserved memory region to Prepared,
+ // so mprotect is always possible.
+ _, err := mprotect(v, n, _PROT_READ|_PROT_WRITE)
if err == _ENOMEM {
throw("runtime: out of memory")
}
- if p != v || err != 0 {
+ if err != 0 {
throw("runtime: cannot map pages in arena address space")
}
}
diff --git a/src/runtime/mem_bsd.go b/src/runtime/mem_bsd.go
index 08a2391..4d860e7 100644
--- a/src/runtime/mem_bsd.go
+++ b/src/runtime/mem_bsd.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build dragonfly freebsd nacl netbsd openbsd solaris
+// +build dragonfly freebsd netbsd openbsd solaris
package runtime
@@ -44,8 +44,18 @@
mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0)
}
+// Indicates not to reserve swap space for the mapping.
+const _sunosMAP_NORESERVE = 0x40
+
func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer {
- p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
+ flags := int32(_MAP_ANON | _MAP_PRIVATE)
+ if GOOS == "solaris" || GOOS == "illumos" {
+ // Be explicit that we don't want to reserve swap space
+ // for PROT_NONE anonymous mappings. This avoids an issue
+ // wherein large mappings can cause fork to fail.
+ flags |= _sunosMAP_NORESERVE
+ }
+ p, err := mmap(v, n, _PROT_NONE, flags, -1, 0)
if err != 0 {
return nil
}
diff --git a/src/runtime/mem_darwin.go b/src/runtime/mem_darwin.go
index 86d9fca..3b5d565 100644
--- a/src/runtime/mem_darwin.go
+++ b/src/runtime/mem_darwin.go
@@ -49,19 +49,7 @@
}
func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer {
- flags := int32(_MAP_ANON | _MAP_PRIVATE)
- if raceenabled {
- // Currently the race detector expects memory to live within a certain
- // range, and on Darwin 10.10 mmap is prone to ignoring hints, moreso
- // than later versions and other BSDs (#26475). So, even though it's
- // potentially dangerous to MAP_FIXED, we do it in the race detection
- // case because it'll help maintain the race detector's invariants.
- //
- // TODO(mknyszek): Drop this once support for Darwin 10.10 is dropped,
- // and reconsider this when #24133 is addressed.
- flags |= _MAP_FIXED
- }
- p, err := mmap(v, n, _PROT_NONE, flags, -1, 0)
+ p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
if err != 0 {
return nil
}
diff --git a/src/runtime/mem_js.go b/src/runtime/mem_js.go
index de90f53..092b3d4 100644
--- a/src/runtime/mem_js.go
+++ b/src/runtime/mem_js.go
@@ -7,7 +7,6 @@
package runtime
import (
- "runtime/internal/sys"
"unsafe"
)
@@ -52,18 +51,23 @@
return nil
}
- if reserveEnd < lastmoduledatap.end {
- reserveEnd = lastmoduledatap.end
+ // Round up the initial reserveEnd to 64 KiB so that
+ // reservations are always aligned to the page size.
+ initReserveEnd := alignUp(lastmoduledatap.end, physPageSize)
+ if reserveEnd < initReserveEnd {
+ reserveEnd = initReserveEnd
}
v = unsafe.Pointer(reserveEnd)
- reserveEnd += n
+ reserveEnd += alignUp(n, physPageSize)
current := currentMemory()
- needed := int32(reserveEnd/sys.DefaultPhysPageSize + 1)
+ // reserveEnd is always at a page boundary.
+ needed := int32(reserveEnd / physPageSize)
if current < needed {
if growMemory(needed-current) == -1 {
return nil
}
+ resetMemoryDataView()
}
return v
@@ -72,6 +76,10 @@
func currentMemory() int32
func growMemory(pages int32) int32
+// resetMemoryDataView signals the JS front-end that WebAssembly's memory.grow instruction has been used.
+// This allows the front-end to replace the old DataView object with a new one.
+func resetMemoryDataView()
+
func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) {
mSysStatInc(sysStat, n)
}
diff --git a/src/runtime/mem_linux.go b/src/runtime/mem_linux.go
index 524915f..59b0bca 100644
--- a/src/runtime/mem_linux.go
+++ b/src/runtime/mem_linux.go
@@ -70,11 +70,11 @@
var head, tail uintptr
if uintptr(v)&(physHugePageSize-1) != 0 {
// Compute huge page containing v.
- head = uintptr(v) &^ (physHugePageSize - 1)
+ head = alignDown(uintptr(v), physHugePageSize)
}
if (uintptr(v)+n)&(physHugePageSize-1) != 0 {
// Compute huge page containing v+n-1.
- tail = (uintptr(v) + n - 1) &^ (physHugePageSize - 1)
+ tail = alignDown(uintptr(v)+n-1, physHugePageSize)
}
// Note that madvise will return EINVAL if the flag is
@@ -131,9 +131,9 @@
func sysHugePage(v unsafe.Pointer, n uintptr) {
if physHugePageSize != 0 {
// Round v up to a huge page boundary.
- beg := (uintptr(v) + (physHugePageSize - 1)) &^ (physHugePageSize - 1)
+ beg := alignUp(uintptr(v), physHugePageSize)
// Round v+n down to a huge page boundary.
- end := (uintptr(v) + n) &^ (physHugePageSize - 1)
+ end := alignDown(uintptr(v)+n, physHugePageSize)
if beg < end {
madvise(unsafe.Pointer(beg), end-beg, _MADV_HUGEPAGE)
diff --git a/src/runtime/mem_plan9.go b/src/runtime/mem_plan9.go
index 688cdd3..4fea851 100644
--- a/src/runtime/mem_plan9.go
+++ b/src/runtime/mem_plan9.go
@@ -193,7 +193,7 @@
// so try to extend the address space.
p = sbrk(n)
}
- if p == nil {
+ if p == nil && v == nil {
p = memAlloc(n)
memCheck()
}
diff --git a/src/runtime/memclr_amd64p32.s b/src/runtime/memclr_amd64p32.s
deleted file mode 100644
index 71040f3..0000000
--- a/src/runtime/memclr_amd64p32.s
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-// func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
-TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT,$0-8
- MOVL ptr+0(FP), DI
- MOVL n+4(FP), CX
- MOVQ CX, BX
- ANDQ $3, BX
- SHRQ $2, CX
- MOVQ $0, AX
- CLD
- REP
- STOSL
- MOVQ BX, CX
- REP
- STOSB
- // Note: we zero only 4 bytes at a time so that the tail is at most
- // 3 bytes. That guarantees that we aren't zeroing pointers with STOSB.
- // See issue 13160.
- RET
diff --git a/src/runtime/memclr_arm.s b/src/runtime/memclr_arm.s
index ea3c67a..7326b8b 100644
--- a/src/runtime/memclr_arm.s
+++ b/src/runtime/memclr_arm.s
@@ -1,5 +1,5 @@
// Inferno's libkern/memset-arm.s
-// https://bitbucket.org/inferno-os/inferno-os/src/default/libkern/memset-arm.s
+// https://bitbucket.org/inferno-os/inferno-os/src/master/libkern/memset-arm.s
//
// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
// Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved.
diff --git a/src/runtime/memclr_mips64x.s b/src/runtime/memclr_mips64x.s
index 111983b..4c2292e 100644
--- a/src/runtime/memclr_mips64x.s
+++ b/src/runtime/memclr_mips64x.s
@@ -4,6 +4,7 @@
// +build mips64 mips64le
+#include "go_asm.h"
#include "textflag.h"
// func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
@@ -12,6 +13,60 @@
MOVV n+8(FP), R2
ADDV R1, R2, R4
+ // if less than 16 bytes or no MSA, do words check
+ SGTU $16, R2, R3
+ BNE R3, no_msa
+ MOVBU internal∕cpu·MIPS64X+const_offsetMIPS64XHasMSA(SB), R3
+ BEQ R3, R0, no_msa
+
+ VMOVB $0, W0
+
+ SGTU $128, R2, R3
+ BEQ R3, msa_large
+
+ AND $15, R2, R5
+ XOR R2, R5, R6
+ ADDVU R1, R6
+
+msa_small:
+ VMOVB W0, (R1)
+ ADDVU $16, R1
+ SGTU R6, R1, R3
+ BNE R3, R0, msa_small
+ BEQ R5, R0, done
+ VMOVB W0, -16(R4)
+ JMP done
+
+msa_large:
+ AND $127, R2, R5
+ XOR R2, R5, R6
+ ADDVU R1, R6
+
+msa_large_loop:
+ VMOVB W0, (R1)
+ VMOVB W0, 16(R1)
+ VMOVB W0, 32(R1)
+ VMOVB W0, 48(R1)
+ VMOVB W0, 64(R1)
+ VMOVB W0, 80(R1)
+ VMOVB W0, 96(R1)
+ VMOVB W0, 112(R1)
+
+ ADDVU $128, R1
+ SGTU R6, R1, R3
+ BNE R3, R0, msa_large_loop
+ BEQ R5, R0, done
+ VMOVB W0, -128(R4)
+ VMOVB W0, -112(R4)
+ VMOVB W0, -96(R4)
+ VMOVB W0, -80(R4)
+ VMOVB W0, -64(R4)
+ VMOVB W0, -48(R4)
+ VMOVB W0, -32(R4)
+ VMOVB W0, -16(R4)
+ JMP done
+
+no_msa:
// if less than 8 bytes, do one byte at a time
SGTU $8, R2, R3
BNE R3, out
diff --git a/src/runtime/memclr_riscv64.s b/src/runtime/memclr_riscv64.s
new file mode 100644
index 0000000..ba7704e
--- /dev/null
+++ b/src/runtime/memclr_riscv64.s
@@ -0,0 +1,44 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// void runtime·memclrNoHeapPointers(void*, uintptr)
+TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT,$0-16
+ MOV ptr+0(FP), T1
+ MOV n+8(FP), T2
+ ADD T1, T2, T4
+
+ // If less than eight bytes, do one byte at a time.
+ SLTU $8, T2, T3
+ BNE T3, ZERO, outcheck
+
+ // Do one byte at a time until eight-aligned.
+ JMP aligncheck
+align:
+ MOVB ZERO, (T1)
+ ADD $1, T1
+aligncheck:
+ AND $7, T1, T3
+ BNE T3, ZERO, align
+
+ // Do eight bytes at a time as long as there is room.
+ ADD $-7, T4, T5
+ JMP wordscheck
+words:
+ MOV ZERO, (T1)
+ ADD $8, T1
+wordscheck:
+ SLTU T5, T1, T3
+ BNE T3, ZERO, words
+
+ JMP outcheck
+out:
+ MOVB ZERO, (T1)
+ ADD $1, T1
+outcheck:
+ BNE T1, T4, out
+
+done:
+ RET
diff --git a/src/runtime/memmove_386.s b/src/runtime/memmove_386.s
index 7b54070..d99546c 100644
--- a/src/runtime/memmove_386.s
+++ b/src/runtime/memmove_386.s
@@ -1,5 +1,5 @@
// Inferno's libkern/memmove-386.s
-// https://bitbucket.org/inferno-os/inferno-os/src/default/libkern/memmove-386.s
+// https://bitbucket.org/inferno-os/inferno-os/src/master/libkern/memmove-386.s
//
// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
// Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved.
@@ -28,6 +28,8 @@
#include "go_asm.h"
#include "textflag.h"
+// See memmove Go doc for important implementation constraints.
+
// func memmove(to, from unsafe.Pointer, n uintptr)
TEXT runtime·memmove(SB), NOSPLIT, $0-12
MOVL to+0(FP), DI
diff --git a/src/runtime/memmove_amd64.s b/src/runtime/memmove_amd64.s
index b4243a8..d91641a 100644
--- a/src/runtime/memmove_amd64.s
+++ b/src/runtime/memmove_amd64.s
@@ -1,5 +1,5 @@
// Derived from Inferno's libkern/memmove-386.s (adapted for amd64)
-// https://bitbucket.org/inferno-os/inferno-os/src/default/libkern/memmove-386.s
+// https://bitbucket.org/inferno-os/inferno-os/src/master/libkern/memmove-386.s
//
// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
// Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved.
@@ -28,6 +28,8 @@
#include "go_asm.h"
#include "textflag.h"
+// See memmove Go doc for important implementation constraints.
+
// func memmove(to, from unsafe.Pointer, n uintptr)
TEXT runtime·memmove(SB), NOSPLIT, $0-24
diff --git a/src/runtime/memmove_amd64p32.s b/src/runtime/memmove_amd64p32.s
deleted file mode 100644
index 1140773..0000000
--- a/src/runtime/memmove_amd64p32.s
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-// This could use MOVSQ, but we use MOVSL so that if an object ends in
-// a 4 byte pointer, we copy it as a unit instead of byte by byte.
-
-// func memmove(to, from unsafe.Pointer, n uintptr)
-TEXT runtime·memmove(SB), NOSPLIT, $0-12
- MOVL to+0(FP), DI
- MOVL from+4(FP), SI
- MOVL n+8(FP), BX
-
- CMPL SI, DI
- JLS back
-
-forward:
- MOVL BX, CX
- SHRL $2, CX
- ANDL $3, BX
- REP; MOVSL
- MOVL BX, CX
- REP; MOVSB
- RET
-
-back:
- MOVL SI, CX
- ADDL BX, CX
- CMPL CX, DI
- JLS forward
-
- ADDL BX, DI
- ADDL BX, SI
- STD
-
- MOVL BX, CX
- SHRL $2, CX
- ANDL $3, BX
- SUBL $4, DI
- SUBL $4, SI
- REP; MOVSL
- ADDL $3, DI
- ADDL $3, SI
- MOVL BX, CX
- REP; MOVSB
- CLD
-
- // Note: we copy only 4 bytes at a time so that the tail is at most
- // 3 bytes. That guarantees that we aren't copying pointers with MOVSB.
- // See issue 13160.
- RET
diff --git a/src/runtime/memmove_arm.s b/src/runtime/memmove_arm.s
index 8352fb7..43d53fa 100644
--- a/src/runtime/memmove_arm.s
+++ b/src/runtime/memmove_arm.s
@@ -1,5 +1,5 @@
// Inferno's libkern/memmove-arm.s
-// https://bitbucket.org/inferno-os/inferno-os/src/default/libkern/memmove-arm.s
+// https://bitbucket.org/inferno-os/inferno-os/src/master/libkern/memmove-arm.s
//
// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
// Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved.
@@ -58,6 +58,8 @@
#define FW3 R4
#define FR3 R8 /* shared with TE */
+// See memmove Go doc for important implementation constraints.
+
// func memmove(to, from unsafe.Pointer, n uintptr)
TEXT runtime·memmove(SB), NOSPLIT, $4-12
_memmove:
diff --git a/src/runtime/memmove_arm64.s b/src/runtime/memmove_arm64.s
index ac29f94..dbb7e9a 100644
--- a/src/runtime/memmove_arm64.s
+++ b/src/runtime/memmove_arm64.s
@@ -4,6 +4,8 @@
#include "textflag.h"
+// See memmove Go doc for important implementation constraints.
+
// func memmove(to, from unsafe.Pointer, n uintptr)
TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24
MOVD to+0(FP), R3
@@ -22,7 +24,7 @@
CMP R3, R4
BLT backward
- // Copying forward proceeds by copying R7/8 words then copying R6 bytes.
+ // Copying forward proceeds by copying R7/32 quadwords then R6 <= 31 tail bytes.
// R3 and R4 are advanced as we copy.
// (There may be implementations of armv8 where copying by bytes until
@@ -30,11 +32,12 @@
// optimization, but the on the one tested so far (xgene) it did not
// make a significance difference.)
- CBZ R7, noforwardlarge // Do we need to do any doubleword-by-doubleword copying?
+ CBZ R7, noforwardlarge // Do we need to do any quadword copying?
ADD R3, R7, R9 // R9 points just past where we copy by word
forwardlargeloop:
+ // Copy 32 bytes at a time.
LDP.P 32(R4), (R8, R10)
STP.P (R8, R10), 32(R3)
LDP -16(R4), (R11, R12)
@@ -43,10 +46,26 @@
CBNZ R7, forwardlargeloop
noforwardlarge:
- CBNZ R6, forwardtail // Do we need to do any byte-by-byte copying?
+ CBNZ R6, forwardtail // Do we need to copy any tail bytes?
RET
forwardtail:
+ // There are R6 <= 31 bytes remaining to copy.
+ // This is large enough to still contain pointers,
+ // which must be copied atomically.
+ // Copy the next 16 bytes, then 8 bytes, then any remaining bytes.
+ TBZ $4, R6, 3(PC) // write 16 bytes if R6&16 != 0
+ LDP.P 16(R4), (R8, R10)
+ STP.P (R8, R10), 16(R3)
+
+ TBZ $3, R6, 3(PC) // write 8 bytes if R6&8 != 0
+ MOVD.P 8(R4), R8
+ MOVD.P R8, 8(R3)
+
+ AND $7, R6
+ CBNZ R6, 2(PC)
+ RET
+
ADD R3, R6, R9 // R9 points just past the destination memory
forwardtailloop:
@@ -90,7 +109,7 @@
RET
backward:
- // Copying backwards proceeds by copying R6 bytes then copying R7/8 words.
+ // Copying backwards first copies R6 <= 31 tail bytes, then R7/32 quadwords.
// R3 and R4 are advanced to the end of the destination/source buffers
// respectively and moved back as we copy.
@@ -99,13 +118,28 @@
CBZ R6, nobackwardtail // Do we need to do any byte-by-byte copying?
- SUB R6, R3, R9 // R9 points at the lowest destination byte that should be copied by byte.
+ AND $7, R6, R12
+ CBZ R12, backwardtaillarge
+
+ SUB R12, R3, R9 // R9 points at the lowest destination byte that should be copied by byte.
backwardtailloop:
+ // Copy sub-pointer-size tail.
MOVBU.W -1(R4), R8
MOVBU.W R8, -1(R3)
CMP R9, R3
BNE backwardtailloop
+backwardtaillarge:
+ // Do 8/16-byte write if possible.
+ // See comment at forwardtail.
+ TBZ $3, R6, 3(PC)
+ MOVD.W -8(R4), R8
+ MOVD.W R8, -8(R3)
+
+ TBZ $4, R6, 3(PC)
+ LDP.W -16(R4), (R8, R10)
+ STP.W (R8, R10), -16(R3)
+
nobackwardtail:
CBNZ R7, backwardlarge // Do we need to do any doubleword-by-doubleword copying?
RET
diff --git a/src/runtime/memmove_mips64x.s b/src/runtime/memmove_mips64x.s
index a4cb7dc..8a1b88a 100644
--- a/src/runtime/memmove_mips64x.s
+++ b/src/runtime/memmove_mips64x.s
@@ -6,6 +6,8 @@
#include "textflag.h"
+// See memmove Go doc for important implementation constraints.
+
// func memmove(to, from unsafe.Pointer, n uintptr)
TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24
MOVV to+0(FP), R1
diff --git a/src/runtime/memmove_mipsx.s b/src/runtime/memmove_mipsx.s
index 13544a3..6c86558 100644
--- a/src/runtime/memmove_mipsx.s
+++ b/src/runtime/memmove_mipsx.s
@@ -14,6 +14,8 @@
#define MOVWLO MOVWL
#endif
+// See memmove Go doc for important implementation constraints.
+
// func memmove(to, from unsafe.Pointer, n uintptr)
TEXT runtime·memmove(SB),NOSPLIT,$-0-12
MOVW n+8(FP), R3
diff --git a/src/runtime/memmove_plan9_386.s b/src/runtime/memmove_plan9_386.s
index 65dec93..cfce0e9 100644
--- a/src/runtime/memmove_plan9_386.s
+++ b/src/runtime/memmove_plan9_386.s
@@ -1,5 +1,5 @@
// Inferno's libkern/memmove-386.s
-// https://bitbucket.org/inferno-os/inferno-os/src/default/libkern/memmove-386.s
+// https://bitbucket.org/inferno-os/inferno-os/src/master/libkern/memmove-386.s
//
// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
// Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved.
@@ -25,6 +25,8 @@
#include "textflag.h"
+// See memmove Go doc for important implementation constraints.
+
// func memmove(to, from unsafe.Pointer, n uintptr)
TEXT runtime·memmove(SB), NOSPLIT, $0-12
MOVL to+0(FP), DI
diff --git a/src/runtime/memmove_plan9_amd64.s b/src/runtime/memmove_plan9_amd64.s
index b729c7c..217aa60 100644
--- a/src/runtime/memmove_plan9_amd64.s
+++ b/src/runtime/memmove_plan9_amd64.s
@@ -1,5 +1,5 @@
// Derived from Inferno's libkern/memmove-386.s (adapted for amd64)
-// https://bitbucket.org/inferno-os/inferno-os/src/default/libkern/memmove-386.s
+// https://bitbucket.org/inferno-os/inferno-os/src/master/libkern/memmove-386.s
//
// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
// Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved.
@@ -25,6 +25,8 @@
#include "textflag.h"
+// See memmove Go doc for important implementation constraints.
+
// func memmove(to, from unsafe.Pointer, n uintptr)
TEXT runtime·memmove(SB), NOSPLIT, $0-24
diff --git a/src/runtime/memmove_ppc64x.s b/src/runtime/memmove_ppc64x.s
index 60cbcc4..edc6452 100644
--- a/src/runtime/memmove_ppc64x.s
+++ b/src/runtime/memmove_ppc64x.s
@@ -6,79 +6,115 @@
#include "textflag.h"
+// See memmove Go doc for important implementation constraints.
+
// func memmove(to, from unsafe.Pointer, n uintptr)
+
+// target address
+#define TGT R3
+// source address
+#define SRC R4
+// length to move
+#define LEN R5
+// number of doublewords
+#define DWORDS R6
+// number of bytes < 8
+#define BYTES R7
+// const 16 used as index
+#define IDX16 R8
+// temp used for copies, etc.
+#define TMP R9
+// number of 32 byte chunks
+#define QWORDS R10
+
TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24
- MOVD to+0(FP), R3
- MOVD from+8(FP), R4
- MOVD n+16(FP), R5
+ MOVD to+0(FP), TGT
+ MOVD from+8(FP), SRC
+ MOVD n+16(FP), LEN
// Determine if there are doublewords to
// copy so a more efficient move can be done
check:
- ANDCC $7, R5, R7 // R7: bytes to copy
- SRD $3, R5, R6 // R6: double words to copy
- CMP R6, $0, CR1 // CR1[EQ] set if no double words to copy
+ ANDCC $7, LEN, BYTES // R7: bytes to copy
+ SRD $3, LEN, DWORDS // R6: double words to copy
+ MOVFL CR0, CR3 // save CR from ANDCC
+ CMP DWORDS, $0, CR1 // CR1[EQ] set if no double words to copy
// Determine overlap by subtracting dest - src and comparing against the
- // length. The catches the cases where src and dest are in different types
+ // length. This catches the cases where src and dest are in different types
// of storage such as stack and static to avoid doing backward move when not
// necessary.
- SUB R4, R3, R8 // dest - src
- CMPU R8, R5, CR2 // < len?
+ SUB SRC, TGT, TMP // dest - src
+ CMPU TMP, LEN, CR2 // < len?
BC 12, 8, backward // BLT CR2 backward
// Copying forward if no overlap.
- BC 12, 6, noforwardlarge // "BEQ CR1, noforwardlarge"
- SRDCC $2,R6,R8 // 32 byte chunks?
- BNE forward32setup //
- MOVD R6,CTR // R6 = number of double words
-
- // Move double words
-
-forward8:
- MOVD 0(R4), R8 // double word
- ADD $8,R4
- MOVD R8, 0(R3) //
- ADD $8,R3
- BC 16, 0, forward8
- BR noforwardlarge // handle remainder
+ BC 12, 6, checkbytes // BEQ CR1, checkbytes
+ SRDCC $2, DWORDS, QWORDS // 32 byte chunks?
+ BEQ lt32gt8 // < 32 bytes
// Prepare for moves of 32 bytes at a time.
forward32setup:
- DCBTST (R3) // prepare data cache
- DCBT (R4)
- MOVD R8, CTR // double work count
- MOVD $16, R8
+ DCBTST (TGT) // prepare data cache
+ DCBT (SRC)
+ MOVD QWORDS, CTR // Number of 32 byte chunks
+ MOVD $16, IDX16 // 16 for index
forward32:
- LXVD2X (R4+R0), VS32 // load 16 bytes
- LXVD2X (R4+R8), VS33
- ADD $32, R4
- STXVD2X VS32, (R3+R0) // store 16 bytes
- STXVD2X VS33, (R3+R8)
- ADD $32,R3 // bump up for next set
+ LXVD2X (R0)(SRC), VS32 // load 16 bytes
+ LXVD2X (IDX16)(SRC), VS33 // load 16 bytes
+ ADD $32, SRC
+ STXVD2X VS32, (R0)(TGT) // store 16 bytes
+ STXVD2X VS33, (IDX16)(TGT)
+ ADD $32,TGT // bump up for next set
BC 16, 0, forward32 // continue
- RLDCLCC $61,R5,$3,R6 // remaining doublewords
- BEQ noforwardlarge
- MOVD R6,CTR // set up the CTR
- BR forward8
+ ANDCC $3, DWORDS // remaining doublewords
+ BEQ checkbytes // only bytes remain
-noforwardlarge:
- CMP R7,$0 // any remaining bytes
- BC 4, 1, LR // ble lr
+lt32gt8:
+ // At this point >= 8 and < 32
+ // Move 16 bytes if possible
+ CMP DWORDS, $2
+ BLT lt16
+ LXVD2X (R0)(SRC), VS32
+ ADD $-2, DWORDS
+ STXVD2X VS32, (R0)(TGT)
+ ADD $16, SRC
+ ADD $16, TGT
-forwardtail:
- MOVD R7, CTR // move tail bytes
-
-forwardtailloop:
- MOVBZ 0(R4), R8 // move single bytes
- ADD $1,R4
- MOVBZ R8, 0(R3)
- ADD $1,R3
- BC 16, 0, forwardtailloop
+lt16: // Move 8 bytes if possible
+ CMP DWORDS, $1
+ BLT checkbytes
+ MOVD 0(SRC), TMP
+ ADD $8, SRC
+ MOVD TMP, 0(TGT)
+ ADD $8, TGT
+checkbytes:
+ BC 12, 14, LR // BEQ lr
+lt8: // Move word if possible
+ CMP BYTES, $4
+ BLT lt4
+ MOVWZ 0(SRC), TMP
+ ADD $-4, BYTES
+ MOVW TMP, 0(TGT)
+ ADD $4, SRC
+ ADD $4, TGT
+lt4: // Move halfword if possible
+ CMP BYTES, $2
+ BLT lt2
+ MOVHZ 0(SRC), TMP
+ ADD $-2, BYTES
+ MOVH TMP, 0(TGT)
+ ADD $2, SRC
+ ADD $2, TGT
+lt2: // Move last byte if 1 left
+ CMP BYTES, $1
+ BC 12, 0, LR // ble lr
+ MOVBZ 0(SRC), TMP
+ MOVBZ TMP, 0(TGT)
RET
backward:
@@ -86,51 +122,51 @@
// R3 and R4 are advanced to the end of the destination/source buffers
// respectively and moved back as we copy.
- ADD R5, R4, R4 // end of source
- ADD R3, R5, R3 // end of dest
+ ADD LEN, SRC, SRC // end of source
+ ADD TGT, LEN, TGT // end of dest
BEQ nobackwardtail // earlier condition
- MOVD R7, CTR // bytes to move
+ MOVD BYTES, CTR // bytes to move
backwardtailloop:
- MOVBZ -1(R4), R8 // point to last byte
- SUB $1,R4
- MOVBZ R8, -1(R3)
- SUB $1,R3
+ MOVBZ -1(SRC), TMP // point to last byte
+ SUB $1,SRC
+ MOVBZ TMP, -1(TGT)
+ SUB $1,TGT
BC 16, 0, backwardtailloop // bndz
nobackwardtail:
BC 4, 5, LR // ble CR1 lr
backwardlarge:
- MOVD R6, CTR
- SUB R3, R4, R9 // Use vsx if moving
- CMP R9, $32 // at least 32 byte chunks
+ MOVD DWORDS, CTR
+ SUB TGT, SRC, TMP // Use vsx if moving
+ CMP TMP, $32 // at least 32 byte chunks
BLT backwardlargeloop // and distance >= 32
- SRDCC $2,R6,R8 // 32 byte chunks
+ SRDCC $2,DWORDS,QWORDS // 32 byte chunks
BNE backward32setup
backwardlargeloop:
- MOVD -8(R4), R8
- SUB $8,R4
- MOVD R8, -8(R3)
- SUB $8,R3
+ MOVD -8(SRC), TMP
+ SUB $8,SRC
+ MOVD TMP, -8(TGT)
+ SUB $8,TGT
BC 16, 0, backwardlargeloop // bndz
RET
backward32setup:
- MOVD R8, CTR // set up loop ctr
- MOVD $16, R8 // 32 bytes at at time
+ MOVD QWORDS, CTR // set up loop ctr
+ MOVD $16, IDX16 // 32 bytes at at time
backward32loop:
- SUB $32, R4
- SUB $32, R3
- LXVD2X (R4+R0), VS32 // load 16 bytes
- LXVD2X (R4+R8), VS33
- STXVD2X VS32, (R3+R0) // store 16 bytes
- STXVD2X VS33, (R3+R8)
+ SUB $32, TGT
+ SUB $32, SRC
+ LXVD2X (R0)(TGT), VS32 // load 16 bytes
+ LXVD2X (IDX16)(TGT), VS33
+ STXVD2X VS32, (R0)(SRC) // store 16 bytes
+ STXVD2X VS33, (IDX16)(SRC)
BC 16, 0, backward32loop // bndz
BC 4, 5, LR // ble CR1 lr
- MOVD R6, CTR
+ MOVD DWORDS, CTR
BR backwardlargeloop
diff --git a/src/runtime/memmove_riscv64.s b/src/runtime/memmove_riscv64.s
new file mode 100644
index 0000000..5dec8d0
--- /dev/null
+++ b/src/runtime/memmove_riscv64.s
@@ -0,0 +1,98 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// See memmove Go doc for important implementation constraints.
+
+// void runtime·memmove(void*, void*, uintptr)
+TEXT runtime·memmove(SB),NOSPLIT,$-0-24
+ MOV to+0(FP), T0
+ MOV from+8(FP), T1
+ MOV n+16(FP), T2
+ ADD T1, T2, T5
+
+ // If the destination is ahead of the source, start at the end of the
+ // buffer and go backward.
+ BLTU T1, T0, b
+
+ // If less than eight bytes, do one byte at a time.
+ SLTU $8, T2, T3
+ BNE T3, ZERO, f_outcheck
+
+ // Do one byte at a time until from is eight-aligned.
+ JMP f_aligncheck
+f_align:
+ MOVB (T1), T3
+ MOVB T3, (T0)
+ ADD $1, T0
+ ADD $1, T1
+f_aligncheck:
+ AND $7, T1, T3
+ BNE T3, ZERO, f_align
+
+ // Do eight bytes at a time as long as there is room.
+ ADD $-7, T5, T6
+ JMP f_wordscheck
+f_words:
+ MOV (T1), T3
+ MOV T3, (T0)
+ ADD $8, T0
+ ADD $8, T1
+f_wordscheck:
+ SLTU T6, T1, T3
+ BNE T3, ZERO, f_words
+
+ // Finish off the remaining partial word.
+ JMP f_outcheck
+f_out:
+ MOVB (T1), T3
+ MOVB T3, (T0)
+ ADD $1, T0
+ ADD $1, T1
+f_outcheck:
+ BNE T1, T5, f_out
+
+ RET
+
+b:
+ ADD T0, T2, T4
+ // If less than eight bytes, do one byte at a time.
+ SLTU $8, T2, T3
+ BNE T3, ZERO, b_outcheck
+
+ // Do one byte at a time until from+n is eight-aligned.
+ JMP b_aligncheck
+b_align:
+ ADD $-1, T4
+ ADD $-1, T5
+ MOVB (T5), T3
+ MOVB T3, (T4)
+b_aligncheck:
+ AND $7, T5, T3
+ BNE T3, ZERO, b_align
+
+ // Do eight bytes at a time as long as there is room.
+ ADD $7, T1, T6
+ JMP b_wordscheck
+b_words:
+ ADD $-8, T4
+ ADD $-8, T5
+ MOV (T5), T3
+ MOV T3, (T4)
+b_wordscheck:
+ SLTU T5, T6, T3
+ BNE T3, ZERO, b_words
+
+ // Finish off the remaining partial word.
+ JMP b_outcheck
+b_out:
+ ADD $-1, T4
+ ADD $-1, T5
+ MOVB (T5), T3
+ MOVB T3, (T4)
+b_outcheck:
+ BNE T5, T1, b_out
+
+ RET
diff --git a/src/runtime/memmove_s390x.s b/src/runtime/memmove_s390x.s
index 4ce98b0..f4c2b87 100644
--- a/src/runtime/memmove_s390x.s
+++ b/src/runtime/memmove_s390x.s
@@ -4,6 +4,8 @@
#include "textflag.h"
+// See memmove Go doc for important implementation constraints.
+
// func memmove(to, from unsafe.Pointer, n uintptr)
TEXT runtime·memmove(SB),NOSPLIT|NOFRAME,$0-24
MOVD to+0(FP), R6
diff --git a/src/runtime/memmove_test.go b/src/runtime/memmove_test.go
index 0b2e191..396c130 100644
--- a/src/runtime/memmove_test.go
+++ b/src/runtime/memmove_test.go
@@ -11,7 +11,9 @@
"internal/race"
"internal/testenv"
. "runtime"
+ "sync/atomic"
"testing"
+ "unsafe"
)
func TestMemmove(t *testing.T) {
@@ -206,6 +208,71 @@
return l
}
+// Ensure that memmove writes pointers atomically, so the GC won't
+// observe a partially updated pointer.
+func TestMemmoveAtomicity(t *testing.T) {
+ if race.Enabled {
+ t.Skip("skip under the race detector -- this test is intentionally racy")
+ }
+
+ var x int
+
+ for _, backward := range []bool{true, false} {
+ for _, n := range []int{3, 4, 5, 6, 7, 8, 9, 10, 15, 25, 49} {
+ n := n
+
+ // test copying [N]*int.
+ sz := uintptr(n * PtrSize)
+ name := fmt.Sprint(sz)
+ if backward {
+ name += "-backward"
+ } else {
+ name += "-forward"
+ }
+ t.Run(name, func(t *testing.T) {
+ // Use overlapping src and dst to force forward/backward copy.
+ var s [100]*int
+ src := s[n-1 : 2*n-1]
+ dst := s[:n]
+ if backward {
+ src, dst = dst, src
+ }
+ for i := range src {
+ src[i] = &x
+ }
+ for i := range dst {
+ dst[i] = nil
+ }
+
+ var ready uint32
+ go func() {
+ sp := unsafe.Pointer(&src[0])
+ dp := unsafe.Pointer(&dst[0])
+ atomic.StoreUint32(&ready, 1)
+ for i := 0; i < 10000; i++ {
+ Memmove(dp, sp, sz)
+ MemclrNoHeapPointers(dp, sz)
+ }
+ atomic.StoreUint32(&ready, 2)
+ }()
+
+ for atomic.LoadUint32(&ready) == 0 {
+ Gosched()
+ }
+
+ for atomic.LoadUint32(&ready) != 2 {
+ for i := range dst {
+ p := dst[i]
+ if p != nil && p != &x {
+ t.Fatalf("got partially updated pointer %p at dst[%d], want either nil or %p", p, i, &x)
+ }
+ }
+ }
+ })
+ }
+ }
+}
+
func benchmarkSizes(b *testing.B, sizes []int, fn func(b *testing.B, n int)) {
for _, n := range sizes {
b.Run(fmt.Sprint(n), func(b *testing.B) {
diff --git a/src/runtime/memmove_wasm.s b/src/runtime/memmove_wasm.s
index d5e2016..8525fea 100644
--- a/src/runtime/memmove_wasm.s
+++ b/src/runtime/memmove_wasm.s
@@ -4,6 +4,8 @@
#include "textflag.h"
+// See memmove Go doc for important implementation constraints.
+
// func memmove(to, from unsafe.Pointer, n uintptr)
TEXT runtime·memmove(SB), NOSPLIT, $0-24
MOVD to+0(FP), R0
diff --git a/src/runtime/mfinal.go b/src/runtime/mfinal.go
index 37b2c38..d6c85a8 100644
--- a/src/runtime/mfinal.go
+++ b/src/runtime/mfinal.go
@@ -407,9 +407,9 @@
// compute size needed for return parameters
nret := uintptr(0)
for _, t := range ft.out() {
- nret = round(nret, uintptr(t.align)) + uintptr(t.size)
+ nret = alignUp(nret, uintptr(t.align)) + uintptr(t.size)
}
- nret = round(nret, sys.PtrSize)
+ nret = alignUp(nret, sys.PtrSize)
// make sure we have a finalizer goroutine
createfing()
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 823b556..b349951 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -139,6 +139,10 @@
_ConcurrentSweep = true
_FinBlockSize = 4 * 1024
+ // debugScanConservative enables debug logging for stack
+ // frames that are scanned conservatively.
+ debugScanConservative = false
+
// sweepMinHeapDistance is a lower bound on the heap distance
// (in bytes) reserved for concurrent sweeping between GC
// cycles.
@@ -187,6 +191,9 @@
work.startSema = 1
work.markDoneSema = 1
+ lockInit(&work.sweepWaiters.lock, lockRankSweepWaiters)
+ lockInit(&work.assistQueue.lock, lockRankAssistQueue)
+ lockInit(&work.wbufSpans.lock, lockRankWbufSpans)
}
func readgogc() int32 {
@@ -488,25 +495,25 @@
}
live := atomic.Load64(&memstats.heap_live)
- var heapGoal, scanWorkExpected int64
- if live <= memstats.next_gc {
- // We're under the soft goal. Pace GC to complete at
- // next_gc assuming the heap is in steady-state.
- heapGoal = int64(memstats.next_gc)
+ // Assume we're under the soft goal. Pace GC to complete at
+ // next_gc assuming the heap is in steady-state.
+ heapGoal := int64(memstats.next_gc)
- // Compute the expected scan work remaining.
- //
- // This is estimated based on the expected
- // steady-state scannable heap. For example, with
- // GOGC=100, only half of the scannable heap is
- // expected to be live, so that's what we target.
- //
- // (This is a float calculation to avoid overflowing on
- // 100*heap_scan.)
- scanWorkExpected = int64(float64(memstats.heap_scan) * 100 / float64(100+gcpercent))
- } else {
- // We're past the soft goal. Pace GC so that in the
- // worst case it will complete by the hard goal.
+ // Compute the expected scan work remaining.
+ //
+ // This is estimated based on the expected
+ // steady-state scannable heap. For example, with
+ // GOGC=100, only half of the scannable heap is
+ // expected to be live, so that's what we target.
+ //
+ // (This is a float calculation to avoid overflowing on
+ // 100*heap_scan.)
+ scanWorkExpected := int64(float64(memstats.heap_scan) * 100 / float64(100+gcpercent))
+
+ if live > memstats.next_gc || c.scanWork > scanWorkExpected {
+ // We're past the soft goal, or we've already done more scan
+ // work than we expected. Pace GC so that in the worst case it
+ // will complete by the hard goal.
const maxOvershoot = 1.1
heapGoal = int64(float64(memstats.next_gc) * maxOvershoot)
@@ -518,7 +525,7 @@
//
// Note that we currently count allocations during GC as both
// scannable heap (heap_scan) and scan work completed
- // (scanWork), so allocation will change this difference will
+ // (scanWork), so allocation will change this difference
// slowly in the soft regime and not at all in the hard
// regime.
scanWorkRemaining := scanWorkExpected - c.scanWork
@@ -764,17 +771,39 @@
}
// Set the trigger ratio, capped to reasonable bounds.
- if triggerRatio < 0 {
- // This can happen if the mutator is allocating very
- // quickly or the GC is scanning very slowly.
- triggerRatio = 0
- } else if gcpercent >= 0 {
+ if gcpercent >= 0 {
+ scalingFactor := float64(gcpercent) / 100
// Ensure there's always a little margin so that the
// mutator assist ratio isn't infinity.
- maxTriggerRatio := 0.95 * float64(gcpercent) / 100
+ maxTriggerRatio := 0.95 * scalingFactor
if triggerRatio > maxTriggerRatio {
triggerRatio = maxTriggerRatio
}
+
+ // If we let triggerRatio go too low, then if the application
+ // is allocating very rapidly we might end up in a situation
+ // where we're allocating black during a nearly always-on GC.
+ // The result of this is a growing heap and ultimately an
+ // increase in RSS. By capping us at a point >0, we're essentially
+ // saying that we're OK using more CPU during the GC to prevent
+ // this growth in RSS.
+ //
+ // The current constant was chosen empirically: given a sufficiently
+ // fast/scalable allocator with 48 Ps that could drive the trigger ratio
+ // to <0.05, this constant causes applications to retain the same peak
+ // RSS compared to not having this allocator.
+ minTriggerRatio := 0.6 * scalingFactor
+ if triggerRatio < minTriggerRatio {
+ triggerRatio = minTriggerRatio
+ }
+ } else if triggerRatio < 0 {
+ // gcpercent < 0, so just make sure we're not getting a negative
+ // triggerRatio. This case isn't expected to happen in practice,
+ // and doesn't really matter because if gcpercent < 0 then we won't
+ // ever consume triggerRatio further on in this function, but let's
+ // just be defensive here; the triggerRatio being negative is almost
+ // certainly undesirable.
+ triggerRatio = 0
}
memstats.triggerRatio = triggerRatio
@@ -845,7 +874,8 @@
heapDistance = _PageSize
}
pagesSwept := atomic.Load64(&mheap_.pagesSwept)
- sweepDistancePages := int64(mheap_.pagesInUse) - int64(pagesSwept)
+ pagesInUse := atomic.Load64(&mheap_.pagesInUse)
+ sweepDistancePages := int64(pagesInUse) - int64(pagesSwept)
if sweepDistancePages <= 0 {
mheap_.sweepPagesPerByte = 0
} else {
@@ -1248,6 +1278,7 @@
}
// Ok, we're doing it! Stop everybody else
+ semacquire(&gcsema)
semacquire(&worldsema)
if trace.enabled {
@@ -1287,6 +1318,7 @@
systemstack(func() {
finishsweep_m()
})
+
// clearpools before we start the GC. If we wait they memory will not be
// reclaimed until the next GC cycle.
clearpools()
@@ -1340,15 +1372,26 @@
// the world.
gcController.markStartTime = now
+ // In STW mode, we could block the instant systemstack
+ // returns, so make sure we're not preemptible.
+ mp = acquirem()
+
// Concurrent mark.
systemstack(func() {
now = startTheWorldWithSema(trace.enabled)
work.pauseNS += now - work.pauseStart
work.tMark = now
})
- // In STW mode, we could block the instant systemstack
- // returns, so don't do anything important here. Make sure we
- // block rather than returning to user code.
+
+ // Release the world sema before Gosched() in STW mode
+ // because we will need to reacquire it later but before
+ // this goroutine becomes runnable again, and we could
+ // self-deadlock otherwise.
+ semrelease(&worldsema)
+ releasem(mp)
+
+ // Make sure we block instead of returning to user code
+ // in STW mode.
if mode != gcBackgroundMode {
Gosched()
}
@@ -1415,6 +1458,10 @@
return
}
+ // forEachP needs worldsema to execute, and we'll need it to
+ // stop the world later, so acquire worldsema now.
+ semacquire(&worldsema)
+
// Flush all local buffers and collect flushedWork flags.
gcMarkDoneFlushed = 0
systemstack(func() {
@@ -1475,6 +1522,7 @@
// work to do. Keep going. It's possible the
// transition condition became true again during the
// ragged barrier, so re-check it.
+ semrelease(&worldsema)
goto top
}
@@ -1551,6 +1599,7 @@
now := startTheWorldWithSema(true)
work.pauseNS += now - work.pauseStart
})
+ semrelease(&worldsema)
goto top
}
}
@@ -1649,6 +1698,10 @@
throw("gc done but gcphase != _GCoff")
}
+ // Record next_gc and heap_inuse for scavenger.
+ memstats.last_next_gc = memstats.next_gc
+ memstats.last_heap_inuse = memstats.heap_inuse
+
// Update GC trigger and pacing for the next cycle.
gcSetTriggerRatio(nextTriggerRatio)
@@ -1761,6 +1814,7 @@
}
semrelease(&worldsema)
+ semrelease(&gcsema)
// Careful: another GC cycle may start now.
releasem(mp)
@@ -2083,6 +2137,9 @@
// gcSweep must be called on the system stack because it acquires the heap
// lock. See mheap for details.
+//
+// The world must be stopped.
+//
//go:systemstack
func gcSweep(mode gcMode) {
if gcphase != _GCoff {
@@ -2092,7 +2149,7 @@
lock(&mheap_.lock)
mheap_.sweepgen += 2
mheap_.sweepdone = 0
- if mheap_.sweepSpans[mheap_.sweepgen/2%2].index != 0 {
+ if !go115NewMCentralImpl && mheap_.sweepSpans[mheap_.sweepgen/2%2].index != 0 {
// We should have drained this list during the last
// sweep phase. We certainly need to start this phase
// with an empty swept list.
@@ -2104,6 +2161,10 @@
mheap_.reclaimCredit = 0
unlock(&mheap_.lock)
+ if go115NewMCentralImpl {
+ sweep.centralIndex.clear()
+ }
+
if !_ConcurrentSweep || mode == gcForceBlockMode {
// Special case synchronous sweep.
// Record that no proportional sweeping has to happen.
@@ -2150,8 +2211,7 @@
// allgs doesn't change.
lock(&allglock)
for _, gp := range allgs {
- gp.gcscandone = false // set to true in gcphasework
- gp.gcscanvalid = false // stack has not been scanned
+ gp.gcscandone = false // set to true in gcphasework
gp.gcAssistBytes = 0
}
unlock(&allglock)
diff --git a/src/runtime/mgclarge.go b/src/runtime/mgclarge.go
deleted file mode 100644
index 414db10..0000000
--- a/src/runtime/mgclarge.go
+++ /dev/null
@@ -1,657 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Page heap.
-//
-// See malloc.go for the general overview.
-//
-// Allocation policy is the subject of this file. All free spans live in
-// a treap for most of their time being free. See
-// https://en.wikipedia.org/wiki/Treap or
-// https://faculty.washington.edu/aragon/pubs/rst89.pdf for an overview.
-// sema.go also holds an implementation of a treap.
-//
-// Each treapNode holds a single span. The treap is sorted by base address
-// and each span necessarily has a unique base address.
-// Spans are returned based on a first-fit algorithm, acquiring the span
-// with the lowest base address which still satisfies the request.
-//
-// The first-fit algorithm is possible due to an augmentation of each
-// treapNode to maintain the size of the largest span in the subtree rooted
-// at that treapNode. Below we refer to this invariant as the maxPages
-// invariant.
-//
-// The primary routines are
-// insert: adds a span to the treap
-// remove: removes the span from that treap that best fits the required size
-// removeSpan: which removes a specific span from the treap
-//
-// Whenever a pointer to a span which is owned by the treap is acquired, that
-// span must not be mutated. To mutate a span in the treap, remove it first.
-//
-// mheap_.lock must be held when manipulating this data structure.
-
-package runtime
-
-import (
- "unsafe"
-)
-
-//go:notinheap
-type mTreap struct {
- treap *treapNode
- unscavHugePages uintptr // number of unscavenged huge pages in the treap
-}
-
-//go:notinheap
-type treapNode struct {
- right *treapNode // all treapNodes > this treap node
- left *treapNode // all treapNodes < this treap node
- parent *treapNode // direct parent of this node, nil if root
- key uintptr // base address of the span, used as primary sort key
- span *mspan // span at base address key
- maxPages uintptr // the maximum size of any span in this subtree, including the root
- priority uint32 // random number used by treap algorithm to keep tree probabilistically balanced
- types treapIterFilter // the types of spans available in this subtree
-}
-
-// updateInvariants is a helper method which has a node recompute its own
-// maxPages and types values by looking at its own span as well as the
-// values of its direct children.
-//
-// Returns true if anything changed.
-func (t *treapNode) updateInvariants() bool {
- m, i := t.maxPages, t.types
- t.maxPages = t.span.npages
- t.types = t.span.treapFilter()
- if t.left != nil {
- t.types |= t.left.types
- if t.maxPages < t.left.maxPages {
- t.maxPages = t.left.maxPages
- }
- }
- if t.right != nil {
- t.types |= t.right.types
- if t.maxPages < t.right.maxPages {
- t.maxPages = t.right.maxPages
- }
- }
- return m != t.maxPages || i != t.types
-}
-
-// findMinimal finds the minimal (lowest base addressed) node in the treap
-// which matches the criteria set out by the filter f and returns nil if
-// none exists.
-//
-// This algorithm is functionally the same as (*mTreap).find, so see that
-// method for more details.
-func (t *treapNode) findMinimal(f treapIterFilter) *treapNode {
- if t == nil || !f.matches(t.types) {
- return nil
- }
- for t != nil {
- if t.left != nil && f.matches(t.left.types) {
- t = t.left
- } else if f.matches(t.span.treapFilter()) {
- break
- } else if t.right != nil && f.matches(t.right.types) {
- t = t.right
- } else {
- println("runtime: f=", f)
- throw("failed to find minimal node matching filter")
- }
- }
- return t
-}
-
-// findMaximal finds the maximal (highest base addressed) node in the treap
-// which matches the criteria set out by the filter f and returns nil if
-// none exists.
-//
-// This algorithm is the logical inversion of findMinimal and just changes
-// the order of the left and right tests.
-func (t *treapNode) findMaximal(f treapIterFilter) *treapNode {
- if t == nil || !f.matches(t.types) {
- return nil
- }
- for t != nil {
- if t.right != nil && f.matches(t.right.types) {
- t = t.right
- } else if f.matches(t.span.treapFilter()) {
- break
- } else if t.left != nil && f.matches(t.left.types) {
- t = t.left
- } else {
- println("runtime: f=", f)
- throw("failed to find minimal node matching filter")
- }
- }
- return t
-}
-
-// pred returns the predecessor of t in the treap subject to the criteria
-// specified by the filter f. Returns nil if no such predecessor exists.
-func (t *treapNode) pred(f treapIterFilter) *treapNode {
- if t.left != nil && f.matches(t.left.types) {
- // The node has a left subtree which contains at least one matching
- // node, find the maximal matching node in that subtree.
- return t.left.findMaximal(f)
- }
- // Lacking a left subtree, look to the parents.
- p := t // previous node
- t = t.parent
- for t != nil {
- // Walk up the tree until we find a node that has a left subtree
- // that we haven't already visited.
- if t.right == p {
- if f.matches(t.span.treapFilter()) {
- // If this node matches, then it's guaranteed to be the
- // predecessor since everything to its left is strictly
- // greater.
- return t
- } else if t.left != nil && f.matches(t.left.types) {
- // Failing the root of this subtree, if its left subtree has
- // something, that's where we'll find our predecessor.
- return t.left.findMaximal(f)
- }
- }
- p = t
- t = t.parent
- }
- // If the parent is nil, then we've hit the root without finding
- // a suitable left subtree containing the node (and the predecessor
- // wasn't on the path). Thus, there's no predecessor, so just return
- // nil.
- return nil
-}
-
-// succ returns the successor of t in the treap subject to the criteria
-// specified by the filter f. Returns nil if no such successor exists.
-func (t *treapNode) succ(f treapIterFilter) *treapNode {
- // See pred. This method is just the logical inversion of it.
- if t.right != nil && f.matches(t.right.types) {
- return t.right.findMinimal(f)
- }
- p := t
- t = t.parent
- for t != nil {
- if t.left == p {
- if f.matches(t.span.treapFilter()) {
- return t
- } else if t.right != nil && f.matches(t.right.types) {
- return t.right.findMinimal(f)
- }
- }
- p = t
- t = t.parent
- }
- return nil
-}
-
-// isSpanInTreap is handy for debugging. One should hold the heap lock, usually
-// mheap_.lock().
-func (t *treapNode) isSpanInTreap(s *mspan) bool {
- if t == nil {
- return false
- }
- return t.span == s || t.left.isSpanInTreap(s) || t.right.isSpanInTreap(s)
-}
-
-// walkTreap is handy for debugging and testing.
-// Starting at some treapnode t, for example the root, do a depth first preorder walk of
-// the tree executing fn at each treap node. One should hold the heap lock, usually
-// mheap_.lock().
-func (t *treapNode) walkTreap(fn func(tn *treapNode)) {
- if t == nil {
- return
- }
- fn(t)
- t.left.walkTreap(fn)
- t.right.walkTreap(fn)
-}
-
-// checkTreapNode when used in conjunction with walkTreap can usually detect a
-// poorly formed treap.
-func checkTreapNode(t *treapNode) {
- if t == nil {
- return
- }
- if t.span.next != nil || t.span.prev != nil || t.span.list != nil {
- throw("span may be on an mSpanList while simultaneously in the treap")
- }
- if t.span.base() != t.key {
- println("runtime: checkTreapNode treapNode t=", t, " t.key=", t.key,
- "t.span.base()=", t.span.base())
- throw("why does span.base() and treap.key do not match?")
- }
- if t.left != nil && t.key < t.left.key {
- throw("found out-of-order spans in treap (left child has greater base address)")
- }
- if t.right != nil && t.key > t.right.key {
- throw("found out-of-order spans in treap (right child has lesser base address)")
- }
-}
-
-// validateInvariants is handy for debugging and testing.
-// It ensures that the various invariants on each treap node are
-// appropriately maintained throughout the treap by walking the
-// treap in a post-order manner.
-func (t *treapNode) validateInvariants() (uintptr, treapIterFilter) {
- if t == nil {
- return 0, 0
- }
- leftMax, leftTypes := t.left.validateInvariants()
- rightMax, rightTypes := t.right.validateInvariants()
- max := t.span.npages
- if leftMax > max {
- max = leftMax
- }
- if rightMax > max {
- max = rightMax
- }
- if max != t.maxPages {
- println("runtime: t.maxPages=", t.maxPages, "want=", max)
- throw("maxPages invariant violated in treap")
- }
- typ := t.span.treapFilter() | leftTypes | rightTypes
- if typ != t.types {
- println("runtime: t.types=", t.types, "want=", typ)
- throw("types invariant violated in treap")
- }
- return max, typ
-}
-
-// treapIterType represents the type of iteration to perform
-// over the treap. Each different flag is represented by a bit
-// in the type, and types may be combined together by a bitwise
-// or operation.
-//
-// Note that only 5 bits are available for treapIterType, do not
-// use the 3 higher-order bits. This constraint is to allow for
-// expansion into a treapIterFilter, which is a uint32.
-type treapIterType uint8
-
-const (
- treapIterScav treapIterType = 1 << iota // scavenged spans
- treapIterHuge // spans containing at least one huge page
- treapIterBits = iota
-)
-
-// treapIterFilter is a bitwise filter of different spans by binary
-// properties. Each bit of a treapIterFilter represents a unique
-// combination of bits set in a treapIterType, in other words, it
-// represents the power set of a treapIterType.
-//
-// The purpose of this representation is to allow the existence of
-// a specific span type to bubble up in the treap (see the types
-// field on treapNode).
-//
-// More specifically, any treapIterType may be transformed into a
-// treapIterFilter for a specific combination of flags via the
-// following operation: 1 << (0x1f&treapIterType).
-type treapIterFilter uint32
-
-// treapFilterAll represents the filter which allows all spans.
-const treapFilterAll = ^treapIterFilter(0)
-
-// treapFilter creates a new treapIterFilter from two treapIterTypes.
-// mask represents a bitmask for which flags we should check against
-// and match for the expected result after applying the mask.
-func treapFilter(mask, match treapIterType) treapIterFilter {
- allow := treapIterFilter(0)
- for i := treapIterType(0); i < 1<<treapIterBits; i++ {
- if mask&i == match {
- allow |= 1 << i
- }
- }
- return allow
-}
-
-// matches returns true if m and f intersect.
-func (f treapIterFilter) matches(m treapIterFilter) bool {
- return f&m != 0
-}
-
-// treapFilter returns the treapIterFilter exactly matching this span,
-// i.e. popcount(result) == 1.
-func (s *mspan) treapFilter() treapIterFilter {
- have := treapIterType(0)
- if s.scavenged {
- have |= treapIterScav
- }
- if s.hugePages() > 0 {
- have |= treapIterHuge
- }
- return treapIterFilter(uint32(1) << (0x1f & have))
-}
-
-// treapIter is a bidirectional iterator type which may be used to iterate over a
-// an mTreap in-order forwards (increasing order) or backwards (decreasing order).
-// Its purpose is to hide details about the treap from users when trying to iterate
-// over it.
-//
-// To create iterators over the treap, call start or end on an mTreap.
-type treapIter struct {
- f treapIterFilter
- t *treapNode
-}
-
-// span returns the span at the current position in the treap.
-// If the treap is not valid, span will panic.
-func (i *treapIter) span() *mspan {
- return i.t.span
-}
-
-// valid returns whether the iterator represents a valid position
-// in the mTreap.
-func (i *treapIter) valid() bool {
- return i.t != nil
-}
-
-// next moves the iterator forward by one. Once the iterator
-// ceases to be valid, calling next will panic.
-func (i treapIter) next() treapIter {
- i.t = i.t.succ(i.f)
- return i
-}
-
-// prev moves the iterator backwards by one. Once the iterator
-// ceases to be valid, calling prev will panic.
-func (i treapIter) prev() treapIter {
- i.t = i.t.pred(i.f)
- return i
-}
-
-// start returns an iterator which points to the start of the treap (the
-// left-most node in the treap) subject to mask and match constraints.
-func (root *mTreap) start(mask, match treapIterType) treapIter {
- f := treapFilter(mask, match)
- return treapIter{f, root.treap.findMinimal(f)}
-}
-
-// end returns an iterator which points to the end of the treap (the
-// right-most node in the treap) subject to mask and match constraints.
-func (root *mTreap) end(mask, match treapIterType) treapIter {
- f := treapFilter(mask, match)
- return treapIter{f, root.treap.findMaximal(f)}
-}
-
-// mutate allows one to mutate the span without removing it from the treap via a
-// callback. The span's base and size are allowed to change as long as the span
-// remains in the same order relative to its predecessor and successor.
-//
-// Note however that any operation that causes a treap rebalancing inside of fn
-// is strictly forbidden, as that may cause treap node metadata to go
-// out-of-sync.
-func (root *mTreap) mutate(i treapIter, fn func(span *mspan)) {
- s := i.span()
- // Save some state about the span for later inspection.
- hpages := s.hugePages()
- scavenged := s.scavenged
- // Call the mutator.
- fn(s)
- // Update unscavHugePages appropriately.
- if !scavenged {
- mheap_.free.unscavHugePages -= hpages
- }
- if !s.scavenged {
- mheap_.free.unscavHugePages += s.hugePages()
- }
- // Update the key in case the base changed.
- i.t.key = s.base()
- // Updating invariants up the tree needs to happen if
- // anything changed at all, so just go ahead and do it
- // unconditionally.
- //
- // If it turns out nothing changed, it'll exit quickly.
- t := i.t
- for t != nil && t.updateInvariants() {
- t = t.parent
- }
-}
-
-// insert adds span to the large span treap.
-func (root *mTreap) insert(span *mspan) {
- if !span.scavenged {
- root.unscavHugePages += span.hugePages()
- }
- base := span.base()
- var last *treapNode
- pt := &root.treap
- for t := *pt; t != nil; t = *pt {
- last = t
- if t.key < base {
- pt = &t.right
- } else if t.key > base {
- pt = &t.left
- } else {
- throw("inserting span already in treap")
- }
- }
-
- // Add t as new leaf in tree of span size and unique addrs.
- // The balanced tree is a treap using priority as the random heap priority.
- // That is, it is a binary tree ordered according to the key,
- // but then among the space of possible binary trees respecting those
- // keys, it is kept balanced on average by maintaining a heap ordering
- // on the priority: s.priority <= both s.right.priority and s.right.priority.
- // https://en.wikipedia.org/wiki/Treap
- // https://faculty.washington.edu/aragon/pubs/rst89.pdf
-
- t := (*treapNode)(mheap_.treapalloc.alloc())
- t.key = span.base()
- t.priority = fastrand()
- t.span = span
- t.maxPages = span.npages
- t.types = span.treapFilter()
- t.parent = last
- *pt = t // t now at a leaf.
-
- // Update the tree to maintain the various invariants.
- i := t
- for i.parent != nil && i.parent.updateInvariants() {
- i = i.parent
- }
-
- // Rotate up into tree according to priority.
- for t.parent != nil && t.parent.priority > t.priority {
- if t != nil && t.span.base() != t.key {
- println("runtime: insert t=", t, "t.key=", t.key)
- println("runtime: t.span=", t.span, "t.span.base()=", t.span.base())
- throw("span and treap node base addresses do not match")
- }
- if t.parent.left == t {
- root.rotateRight(t.parent)
- } else {
- if t.parent.right != t {
- throw("treap insert finds a broken treap")
- }
- root.rotateLeft(t.parent)
- }
- }
-}
-
-func (root *mTreap) removeNode(t *treapNode) {
- if !t.span.scavenged {
- root.unscavHugePages -= t.span.hugePages()
- }
- if t.span.base() != t.key {
- throw("span and treap node base addresses do not match")
- }
- // Rotate t down to be leaf of tree for removal, respecting priorities.
- for t.right != nil || t.left != nil {
- if t.right == nil || t.left != nil && t.left.priority < t.right.priority {
- root.rotateRight(t)
- } else {
- root.rotateLeft(t)
- }
- }
- // Remove t, now a leaf.
- if t.parent != nil {
- p := t.parent
- if p.left == t {
- p.left = nil
- } else {
- p.right = nil
- }
- // Walk up the tree updating invariants until no updates occur.
- for p != nil && p.updateInvariants() {
- p = p.parent
- }
- } else {
- root.treap = nil
- }
- // Return the found treapNode's span after freeing the treapNode.
- mheap_.treapalloc.free(unsafe.Pointer(t))
-}
-
-// find searches for, finds, and returns the treap iterator over all spans
-// representing the position of the span with the smallest base address which is
-// at least npages in size. If no span has at least npages it returns an invalid
-// iterator.
-//
-// This algorithm is as follows:
-// * If there's a left child and its subtree can satisfy this allocation,
-// continue down that subtree.
-// * If there's no such left child, check if the root of this subtree can
-// satisfy the allocation. If so, we're done.
-// * If the root cannot satisfy the allocation either, continue down the
-// right subtree if able.
-// * Else, break and report that we cannot satisfy the allocation.
-//
-// The preference for left, then current, then right, results in us getting
-// the left-most node which will contain the span with the lowest base
-// address.
-//
-// Note that if a request cannot be satisfied the fourth case will be
-// reached immediately at the root, since neither the left subtree nor
-// the right subtree will have a sufficient maxPages, whilst the root
-// node is also unable to satisfy it.
-func (root *mTreap) find(npages uintptr) treapIter {
- t := root.treap
- for t != nil {
- if t.span == nil {
- throw("treap node with nil span found")
- }
- // Iterate over the treap trying to go as far left
- // as possible while simultaneously ensuring that the
- // subtrees we choose always have a span which can
- // satisfy the allocation.
- if t.left != nil && t.left.maxPages >= npages {
- t = t.left
- } else if t.span.npages >= npages {
- // Before going right, if this span can satisfy the
- // request, stop here.
- break
- } else if t.right != nil && t.right.maxPages >= npages {
- t = t.right
- } else {
- t = nil
- }
- }
- return treapIter{treapFilterAll, t}
-}
-
-// removeSpan searches for, finds, deletes span along with
-// the associated treap node. If the span is not in the treap
-// then t will eventually be set to nil and the t.span
-// will throw.
-func (root *mTreap) removeSpan(span *mspan) {
- base := span.base()
- t := root.treap
- for t.span != span {
- if t.key < base {
- t = t.right
- } else if t.key > base {
- t = t.left
- }
- }
- root.removeNode(t)
-}
-
-// erase removes the element referred to by the current position of the
-// iterator. This operation consumes the given iterator, so it should no
-// longer be used. It is up to the caller to get the next or previous
-// iterator before calling erase, if need be.
-func (root *mTreap) erase(i treapIter) {
- root.removeNode(i.t)
-}
-
-// rotateLeft rotates the tree rooted at node x.
-// turning (x a (y b c)) into (y (x a b) c).
-func (root *mTreap) rotateLeft(x *treapNode) {
- // p -> (x a (y b c))
- p := x.parent
- a, y := x.left, x.right
- b, c := y.left, y.right
-
- y.left = x
- x.parent = y
- y.right = c
- if c != nil {
- c.parent = y
- }
- x.left = a
- if a != nil {
- a.parent = x
- }
- x.right = b
- if b != nil {
- b.parent = x
- }
-
- y.parent = p
- if p == nil {
- root.treap = y
- } else if p.left == x {
- p.left = y
- } else {
- if p.right != x {
- throw("large span treap rotateLeft")
- }
- p.right = y
- }
-
- x.updateInvariants()
- y.updateInvariants()
-}
-
-// rotateRight rotates the tree rooted at node y.
-// turning (y (x a b) c) into (x a (y b c)).
-func (root *mTreap) rotateRight(y *treapNode) {
- // p -> (y (x a b) c)
- p := y.parent
- x, c := y.left, y.right
- a, b := x.left, x.right
-
- x.left = a
- if a != nil {
- a.parent = x
- }
- x.right = y
- y.parent = x
- y.left = b
- if b != nil {
- b.parent = y
- }
- y.right = c
- if c != nil {
- c.parent = y
- }
-
- x.parent = p
- if p == nil {
- root.treap = x
- } else if p.left == y {
- p.left = x
- } else {
- if p.right != y {
- throw("large span treap rotateRight")
- }
- p.right = x
- }
-
- y.updateInvariants()
- x.updateInvariants()
-}
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index 2c63724..fe988c4 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -21,10 +21,6 @@
// BSS root.
rootBlockBytes = 256 << 10
- // rootBlockSpans is the number of spans to scan per span
- // root.
- rootBlockSpans = 8 * 1024 // 64MB worth of spans
-
// maxObletBytes is the maximum bytes of an object to scan at
// once. Larger objects will be split up into "oblets" of at
// most this size. Since we can scan 1–2 MB/ms, 128 KB bounds
@@ -41,22 +37,32 @@
// a syscall, so its overhead is nontrivial). Higher values
// make the system less responsive to incoming work.
drainCheckThreshold = 100000
+
+ // pagesPerSpanRoot indicates how many pages to scan from a span root
+ // at a time. Used by special root marking.
+ //
+ // Higher values improve throughput by increasing locality, but
+ // increase the minimum latency of a marking operation.
+ //
+ // Must be a multiple of the pageInUse bitmap element size and
+ // must also evenly divide pagesPerArena.
+ pagesPerSpanRoot = 512
+
+ // go115NewMarkrootSpans is a feature flag that indicates whether
+ // to use the new bitmap-based markrootSpans implementation.
+ go115NewMarkrootSpans = true
)
// gcMarkRootPrepare queues root scanning jobs (stacks, globals, and
// some miscellany) and initializes scanning-related state.
//
-// The caller must have call gcCopySpans().
-//
// The world must be stopped.
-//
-//go:nowritebarrier
func gcMarkRootPrepare() {
work.nFlushCacheRoots = 0
// Compute how many data and BSS root blocks there are.
nBlocks := func(bytes uintptr) int {
- return int((bytes + rootBlockBytes - 1) / rootBlockBytes)
+ return int(divRoundUp(bytes, rootBlockBytes))
}
work.nDataRoots = 0
@@ -81,13 +87,24 @@
//
// We depend on addfinalizer to mark objects that get
// finalizers after root marking.
- //
- // We're only interested in scanning the in-use spans,
- // which will all be swept at this point. More spans
- // may be added to this list during concurrent GC, but
- // we only care about spans that were allocated before
- // this mark phase.
- work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks()
+ if go115NewMarkrootSpans {
+ // We're going to scan the whole heap (that was available at the time the
+ // mark phase started, i.e. markArenas) for in-use spans which have specials.
+ //
+ // Break up the work into arenas, and further into chunks.
+ //
+ // Snapshot allArenas as markArenas. This snapshot is safe because allArenas
+ // is append-only.
+ mheap_.markArenas = mheap_.allArenas[:len(mheap_.allArenas):len(mheap_.allArenas)]
+ work.nSpanRoots = len(mheap_.markArenas) * (pagesPerArena / pagesPerSpanRoot)
+ } else {
+ // We're only interested in scanning the in-use spans,
+ // which will all be swept at this point. More spans
+ // may be added to this list during concurrent GC, but
+ // we only care about spans that were allocated before
+ // this mark phase.
+ work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks()
+ }
// Scan stacks.
//
@@ -125,8 +142,7 @@
fail:
println("gp", gp, "goid", gp.goid,
"status", readgstatus(gp),
- "gcscandone", gp.gcscandone,
- "gcscanvalid", gp.gcscanvalid)
+ "gcscandone", gp.gcscandone)
unlock(&allglock) // Avoid self-deadlock with traceback.
throw("scan missed a g")
}
@@ -197,7 +213,7 @@
gp.waitsince = work.tstart
}
- // scang must be done on the system stack in case
+ // scanstack must be done on the system stack in case
// we're trying to scan our own stack.
systemstack(func() {
// If this is a self-scan, put the user G in
@@ -211,14 +227,24 @@
userG.waitreason = waitReasonGarbageCollectionScan
}
- // TODO: scang blocks until gp's stack has
- // been scanned, which may take a while for
+ // TODO: suspendG blocks (and spins) until gp
+ // stops, which may take a while for
// running goroutines. Consider doing this in
// two phases where the first is non-blocking:
// we scan the stacks we can and ask running
// goroutines to scan themselves; and the
// second blocks.
- scang(gp, gcw)
+ stopped := suspendG(gp)
+ if stopped.dead {
+ gp.gcscandone = true
+ return
+ }
+ if gp.gcscandone {
+ throw("g already scanned")
+ }
+ scanstack(gp, gcw)
+ gp.gcscandone = true
+ resumeG(stopped)
if selfScan {
casgstatus(userG, _Gwaiting, _Grunning)
@@ -237,14 +263,18 @@
throw("rootBlockBytes must be a multiple of 8*ptrSize")
}
- b := b0 + uintptr(shard)*rootBlockBytes
- if b >= b0+n0 {
+ // Note that if b0 is toward the end of the address space,
+ // then b0 + rootBlockBytes might wrap around.
+ // These tests are written to avoid any possible overflow.
+ off := uintptr(shard) * rootBlockBytes
+ if off >= n0 {
return
}
+ b := b0 + off
ptrmask := (*uint8)(add(unsafe.Pointer(ptrmask0), uintptr(shard)*(rootBlockBytes/(8*sys.PtrSize))))
n := uintptr(rootBlockBytes)
- if b+n > b0+n0 {
- n = b0 + n0 - b
+ if off+n > n0 {
+ n = n0 - off
}
// Scan this shard.
@@ -282,10 +312,96 @@
unlock(&sched.gFree.lock)
}
-// markrootSpans marks roots for one shard of work.spans.
+// markrootSpans marks roots for one shard of markArenas.
//
//go:nowritebarrier
func markrootSpans(gcw *gcWork, shard int) {
+ if !go115NewMarkrootSpans {
+ oldMarkrootSpans(gcw, shard)
+ return
+ }
+ // Objects with finalizers have two GC-related invariants:
+ //
+ // 1) Everything reachable from the object must be marked.
+ // This ensures that when we pass the object to its finalizer,
+ // everything the finalizer can reach will be retained.
+ //
+ // 2) Finalizer specials (which are not in the garbage
+ // collected heap) are roots. In practice, this means the fn
+ // field must be scanned.
+ sg := mheap_.sweepgen
+
+ // Find the arena and page index into that arena for this shard.
+ ai := mheap_.markArenas[shard/(pagesPerArena/pagesPerSpanRoot)]
+ ha := mheap_.arenas[ai.l1()][ai.l2()]
+ arenaPage := uint(uintptr(shard) * pagesPerSpanRoot % pagesPerArena)
+
+ // Construct slice of bitmap which we'll iterate over.
+ specialsbits := ha.pageSpecials[arenaPage/8:]
+ specialsbits = specialsbits[:pagesPerSpanRoot/8]
+ for i := range specialsbits {
+ // Find set bits, which correspond to spans with specials.
+ specials := atomic.Load8(&specialsbits[i])
+ if specials == 0 {
+ continue
+ }
+ for j := uint(0); j < 8; j++ {
+ if specials&(1<<j) == 0 {
+ continue
+ }
+ // Find the span for this bit.
+ //
+ // This value is guaranteed to be non-nil because having
+ // specials implies that the span is in-use, and since we're
+ // currently marking we can be sure that we don't have to worry
+ // about the span being freed and re-used.
+ s := ha.spans[arenaPage+uint(i)*8+j]
+
+ // The state must be mSpanInUse if the specials bit is set, so
+ // sanity check that.
+ if state := s.state.get(); state != mSpanInUse {
+ print("s.state = ", state, "\n")
+ throw("non in-use span found with specials bit set")
+ }
+ // Check that this span was swept (it may be cached or uncached).
+ if !useCheckmark && !(s.sweepgen == sg || s.sweepgen == sg+3) {
+ // sweepgen was updated (+2) during non-checkmark GC pass
+ print("sweep ", s.sweepgen, " ", sg, "\n")
+ throw("gc: unswept span")
+ }
+
+ // Lock the specials to prevent a special from being
+ // removed from the list while we're traversing it.
+ lock(&s.speciallock)
+ for sp := s.specials; sp != nil; sp = sp.next {
+ if sp.kind != _KindSpecialFinalizer {
+ continue
+ }
+ // don't mark finalized object, but scan it so we
+ // retain everything it points to.
+ spf := (*specialfinalizer)(unsafe.Pointer(sp))
+ // A finalizer can be set for an inner byte of an object, find object beginning.
+ p := s.base() + uintptr(spf.special.offset)/s.elemsize*s.elemsize
+
+ // Mark everything that can be reached from
+ // the object (but *not* the object itself or
+ // we'll never collect it).
+ scanobject(p, gcw)
+
+ // The special itself is a root.
+ scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw, nil)
+ }
+ unlock(&s.speciallock)
+ }
+ }
+}
+
+// oldMarkrootSpans marks roots for one shard of work.spans.
+//
+// For go115NewMarkrootSpans = false.
+//
+//go:nowritebarrier
+func oldMarkrootSpans(gcw *gcWork, shard int) {
// Objects with finalizers have two GC-related invariants:
//
// 1) Everything reachable from the object must be marked.
@@ -302,13 +418,21 @@
sg := mheap_.sweepgen
spans := mheap_.sweepSpans[mheap_.sweepgen/2%2].block(shard)
// Note that work.spans may not include spans that were
- // allocated between entering the scan phase and now. This is
- // okay because any objects with finalizers in those spans
- // must have been allocated and given finalizers after we
- // entered the scan phase, so addfinalizer will have ensured
- // the above invariants for them.
- for _, s := range spans {
- if s.state != mSpanInUse {
+ // allocated between entering the scan phase and now. We may
+ // also race with spans being added into sweepSpans when they're
+ // just created, and as a result we may see nil pointers in the
+ // spans slice. This is okay because any objects with finalizers
+ // in those spans must have been allocated and given finalizers
+ // after we entered the scan phase, so addfinalizer will have
+ // ensured the above invariants for them.
+ for i := 0; i < len(spans); i++ {
+ // sweepBuf.block requires that we read pointers from the block atomically.
+ // It also requires that we ignore nil pointers.
+ s := (*mspan)(atomic.Loadp(unsafe.Pointer(&spans[i])))
+
+ // This is racing with spans being initialized, so
+ // check the state carefully.
+ if s == nil || s.state.get() != mSpanInUse {
continue
}
// Check that this span was swept (it may be cached or uncached).
@@ -654,16 +778,16 @@
// scanstack scans gp's stack, greying all pointers found on the stack.
//
+// scanstack will also shrink the stack if it is safe to do so. If it
+// is not, it schedules a stack shrink for the next synchronous safe
+// point.
+//
// scanstack is marked go:systemstack because it must not be preempted
// while using a workbuf.
//
//go:nowritebarrier
//go:systemstack
func scanstack(gp *g, gcw *gcWork) {
- if gp.gcscanvalid {
- return
- }
-
if readgstatus(gp)&_Gscan == 0 {
print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n")
throw("scanstack - bad status")
@@ -686,8 +810,13 @@
throw("can't scan our own stack")
}
- // Shrink the stack if not much of it is being used.
- shrinkstack(gp)
+ if isShrinkStackSafe(gp) {
+ // Shrink the stack if not much of it is being used.
+ shrinkstack(gp)
+ } else {
+ // Otherwise, shrink the stack at the next sync safe point.
+ gp.preemptShrink = true
+ }
var state stackScanState
state.stack = gp.stack
@@ -696,6 +825,10 @@
println("stack trace goroutine", gp.goid)
}
+ if debugScanConservative && gp.asyncSafePoint {
+ print("scanning async preempted goroutine ", gp.goid, " stack [", hex(gp.stack.lo), ",", hex(gp.stack.hi), ")\n")
+ }
+
// Scan the saved context register. This is effectively a live
// register that gets moved back and forth between the
// register and sched.ctxt without a write barrier.
@@ -737,13 +870,17 @@
}
if gp._panic != nil {
// Panics are always stack allocated.
- state.putPtr(uintptr(unsafe.Pointer(gp._panic)))
+ state.putPtr(uintptr(unsafe.Pointer(gp._panic)), false)
}
// Find and scan all reachable stack objects.
+ //
+ // The state's pointer queue prioritizes precise pointers over
+ // conservative pointers so that we'll prefer scanning stack
+ // objects precisely.
state.buildIndex()
for {
- p := state.getPtr()
+ p, conservative := state.getPtr()
if p == 0 {
break
}
@@ -758,7 +895,13 @@
}
obj.setType(nil) // Don't scan it again.
if stackTraceDebug {
- println(" live stkobj at", hex(state.stack.lo+uintptr(obj.off)), "of type", t.string())
+ printlock()
+ print(" live stkobj at", hex(state.stack.lo+uintptr(obj.off)), "of type", t.string())
+ if conservative {
+ print(" (conservative)")
+ }
+ println()
+ printunlock()
}
gcdata := t.gcdata
var s *mspan
@@ -776,7 +919,12 @@
gcdata = (*byte)(unsafe.Pointer(s.startAddr))
}
- scanblock(state.stack.lo+uintptr(obj.off), t.ptrdata, gcdata, gcw, &state)
+ b := state.stack.lo + uintptr(obj.off)
+ if conservative {
+ scanConservative(b, t.ptrdata, gcdata, gcw, &state)
+ } else {
+ scanblock(b, t.ptrdata, gcdata, gcw, &state)
+ }
if s != nil {
dematerializeGCProg(s)
@@ -800,11 +948,9 @@
x.nobj = 0
putempty((*workbuf)(unsafe.Pointer(x)))
}
- if state.buf != nil || state.freeBuf != nil {
+ if state.buf != nil || state.cbuf != nil || state.freeBuf != nil {
throw("remaining pointer buffers")
}
-
- gp.gcscanvalid = true
}
// Scan a stack frame: local variables and function arguments/results.
@@ -814,6 +960,50 @@
print("scanframe ", funcname(frame.fn), "\n")
}
+ isAsyncPreempt := frame.fn.valid() && frame.fn.funcID == funcID_asyncPreempt
+ isDebugCall := frame.fn.valid() && frame.fn.funcID == funcID_debugCallV1
+ if state.conservative || isAsyncPreempt || isDebugCall {
+ if debugScanConservative {
+ println("conservatively scanning function", funcname(frame.fn), "at PC", hex(frame.continpc))
+ }
+
+ // Conservatively scan the frame. Unlike the precise
+ // case, this includes the outgoing argument space
+ // since we may have stopped while this function was
+ // setting up a call.
+ //
+ // TODO: We could narrow this down if the compiler
+ // produced a single map per function of stack slots
+ // and registers that ever contain a pointer.
+ if frame.varp != 0 {
+ size := frame.varp - frame.sp
+ if size > 0 {
+ scanConservative(frame.sp, size, nil, gcw, state)
+ }
+ }
+
+ // Scan arguments to this frame.
+ if frame.arglen != 0 {
+ // TODO: We could pass the entry argument map
+ // to narrow this down further.
+ scanConservative(frame.argp, frame.arglen, nil, gcw, state)
+ }
+
+ if isAsyncPreempt || isDebugCall {
+ // This function's frame contained the
+ // registers for the asynchronously stopped
+ // parent frame. Scan the parent
+ // conservatively.
+ state.conservative = true
+ } else {
+ // We only wanted to scan those two frames
+ // conservatively. Clear the flag for future
+ // frames.
+ state.conservative = false
+ }
+ return
+ }
+
locals, args, objs := getStackMap(frame, &state.cache, false)
// Scan local variables if stack frame has been allocated.
@@ -879,6 +1069,8 @@
// credit to gcController.bgScanCredit every gcCreditSlack units of
// scan work.
//
+// gcDrain will always return if there is a pending STW.
+//
//go:nowritebarrier
func gcDrain(gcw *gcWork, flags gcDrainFlags) {
if !writeBarrier.needed {
@@ -907,7 +1099,8 @@
// Drain root marking jobs.
if work.markrootNext < work.markrootJobs {
- for !(preemptible && gp.preempt) {
+ // Stop if we're preemptible or if someone wants to STW.
+ for !(gp.preempt && (preemptible || atomic.Load(&sched.gcwaiting) != 0)) {
job := atomic.Xadd(&work.markrootNext, +1) - 1
if job >= work.markrootJobs {
break
@@ -920,7 +1113,8 @@
}
// Drain heap marking jobs.
- for !(preemptible && gp.preempt) {
+ // Stop if we're preemptible or if someone wants to STW.
+ for !(gp.preempt && (preemptible || atomic.Load(&sched.gcwaiting) != 0)) {
// Try to keep work available on the global queue. We used to
// check if there were waiting workers, but it's better to
// just keep work available than to make workers wait. In the
@@ -1086,7 +1280,7 @@
if obj, span, objIndex := findObject(p, b, i); obj != 0 {
greyobject(obj, b, i, span, gcw, objIndex)
} else if stk != nil && p >= stk.stack.lo && p < stk.stack.hi {
- stk.putPtr(p)
+ stk.putPtr(p, false)
}
}
}
@@ -1196,6 +1390,101 @@
gcw.scanWork += int64(i)
}
+// scanConservative scans block [b, b+n) conservatively, treating any
+// pointer-like value in the block as a pointer.
+//
+// If ptrmask != nil, only words that are marked in ptrmask are
+// considered as potential pointers.
+//
+// If state != nil, it's assumed that [b, b+n) is a block in the stack
+// and may contain pointers to stack objects.
+func scanConservative(b, n uintptr, ptrmask *uint8, gcw *gcWork, state *stackScanState) {
+ if debugScanConservative {
+ printlock()
+ print("conservatively scanning [", hex(b), ",", hex(b+n), ")\n")
+ hexdumpWords(b, b+n, func(p uintptr) byte {
+ if ptrmask != nil {
+ word := (p - b) / sys.PtrSize
+ bits := *addb(ptrmask, word/8)
+ if (bits>>(word%8))&1 == 0 {
+ return '$'
+ }
+ }
+
+ val := *(*uintptr)(unsafe.Pointer(p))
+ if state != nil && state.stack.lo <= val && val < state.stack.hi {
+ return '@'
+ }
+
+ span := spanOfHeap(val)
+ if span == nil {
+ return ' '
+ }
+ idx := span.objIndex(val)
+ if span.isFree(idx) {
+ return ' '
+ }
+ return '*'
+ })
+ printunlock()
+ }
+
+ for i := uintptr(0); i < n; i += sys.PtrSize {
+ if ptrmask != nil {
+ word := i / sys.PtrSize
+ bits := *addb(ptrmask, word/8)
+ if bits == 0 {
+ // Skip 8 words (the loop increment will do the 8th)
+ //
+ // This must be the first time we've
+ // seen this word of ptrmask, so i
+ // must be 8-word-aligned, but check
+ // our reasoning just in case.
+ if i%(sys.PtrSize*8) != 0 {
+ throw("misaligned mask")
+ }
+ i += sys.PtrSize*8 - sys.PtrSize
+ continue
+ }
+ if (bits>>(word%8))&1 == 0 {
+ continue
+ }
+ }
+
+ val := *(*uintptr)(unsafe.Pointer(b + i))
+
+ // Check if val points into the stack.
+ if state != nil && state.stack.lo <= val && val < state.stack.hi {
+ // val may point to a stack object. This
+ // object may be dead from last cycle and
+ // hence may contain pointers to unallocated
+ // objects, but unlike heap objects we can't
+ // tell if it's already dead. Hence, if all
+ // pointers to this object are from
+ // conservative scanning, we have to scan it
+ // defensively, too.
+ state.putPtr(val, true)
+ continue
+ }
+
+ // Check if val points to a heap span.
+ span := spanOfHeap(val)
+ if span == nil {
+ continue
+ }
+
+ // Check if val points to an allocated object.
+ idx := span.objIndex(val)
+ if span.isFree(idx) {
+ continue
+ }
+
+ // val points to an allocated object. Mark it.
+ obj := span.base() + idx*span.elemsize
+ greyobject(obj, b, i, span, gcw, idx)
+ }
+}
+
// Shade the object if it isn't already.
// The object is not nil and known to be in the heap.
// Preemption must be disabled.
@@ -1294,15 +1583,15 @@
return
}
print(" s.base()=", hex(s.base()), " s.limit=", hex(s.limit), " s.spanclass=", s.spanclass, " s.elemsize=", s.elemsize, " s.state=")
- if 0 <= s.state && int(s.state) < len(mSpanStateNames) {
- print(mSpanStateNames[s.state], "\n")
+ if state := s.state.get(); 0 <= state && int(state) < len(mSpanStateNames) {
+ print(mSpanStateNames[state], "\n")
} else {
- print("unknown(", s.state, ")\n")
+ print("unknown(", state, ")\n")
}
skipped := false
size := s.elemsize
- if s.state == mSpanManual && size == 0 {
+ if s.state.get() == mSpanManual && size == 0 {
// We're printing something from a stack frame. We
// don't know how big it is, so just show up to an
// including off.
@@ -1338,11 +1627,21 @@
//
//go:nowritebarrier
//go:nosplit
-func gcmarknewobject(obj, size, scanSize uintptr) {
+func gcmarknewobject(span *mspan, obj, size, scanSize uintptr) {
if useCheckmark { // The world should be stopped so this should not happen.
throw("gcmarknewobject called while doing checkmark")
}
- markBitsForAddr(obj).setMarked()
+
+ // Mark object.
+ objIndex := span.objIndex(obj)
+ span.markBitsForIndex(objIndex).setMarked()
+
+ // Mark span.
+ arena, pageIdx, pageMask := pageIndexOf(span.base())
+ if arena.pageMarks[pageIdx]&pageMask == 0 {
+ atomic.Or8(&arena.pageMarks[pageIdx], pageMask)
+ }
+
gcw := &getg().m.p.ptr().gcw
gcw.bytesMarked += uint64(size)
gcw.scanWork += int64(scanSize)
@@ -1390,7 +1689,7 @@
func initCheckmarks() {
useCheckmark = true
for _, s := range mheap_.allspans {
- if s.state == mSpanInUse {
+ if s.state.get() == mSpanInUse {
heapBitsForAddr(s.base()).initCheckmarkSpan(s.layout())
}
}
@@ -1399,7 +1698,7 @@
func clearCheckmarks() {
useCheckmark = false
for _, s := range mheap_.allspans {
- if s.state == mSpanInUse {
+ if s.state.get() == mSpanInUse {
heapBitsForAddr(s.base()).clearCheckmarkSpan(s.layout())
}
}
diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go
index 45a9eb2..b74da10 100644
--- a/src/runtime/mgcscavenge.go
+++ b/src/runtime/mgcscavenge.go
@@ -17,7 +17,29 @@
// scavenger's primary goal is to bring the estimated heap RSS of the
// application down to a goal.
//
-// That goal is defined as (retainExtraPercent+100) / 100 * next_gc.
+// That goal is defined as:
+// (retainExtraPercent+100) / 100 * (next_gc / last_next_gc) * last_heap_inuse
+//
+// Essentially, we wish to have the application's RSS track the heap goal, but
+// the heap goal is defined in terms of bytes of objects, rather than pages like
+// RSS. As a result, we need to take into account for fragmentation internal to
+// spans. next_gc / last_next_gc defines the ratio between the current heap goal
+// and the last heap goal, which tells us by how much the heap is growing and
+// shrinking. We estimate what the heap will grow to in terms of pages by taking
+// this ratio and multiplying it by heap_inuse at the end of the last GC, which
+// allows us to account for this additional fragmentation. Note that this
+// procedure makes the assumption that the degree of fragmentation won't change
+// dramatically over the next GC cycle. Overestimating the amount of
+// fragmentation simply results in higher memory use, which will be accounted
+// for by the next pacing up date. Underestimating the fragmentation however
+// could lead to performance degradation. Handling this case is not within the
+// scope of the scavenger. Situations where the amount of fragmentation balloons
+// over the course of a single GC cycle should be considered pathologies,
+// flagged as bugs, and fixed appropriately.
+//
+// An additional factor of retainExtraPercent is added as a buffer to help ensure
+// that there's more unscavenged memory to allocate out of, since each allocation
+// out of scavenged memory incurs a potentially expensive page fault.
//
// The goal is updated after each GC and the scavenger's pacing parameters
// (which live in mheap_) are updated to match. The pacing parameters work much
@@ -33,25 +55,18 @@
package runtime
+import (
+ "runtime/internal/atomic"
+ "runtime/internal/sys"
+ "unsafe"
+)
+
const (
// The background scavenger is paced according to these parameters.
//
// scavengePercent represents the portion of mutator time we're willing
// to spend on scavenging in percent.
- //
- // scavengePageLatency is a worst-case estimate (order-of-magnitude) of
- // the time it takes to scavenge one (regular-sized) page of memory.
- // scavengeHugePageLatency is the same but for huge pages.
- //
- // scavengePagePeriod is derived from scavengePercent and scavengePageLatency,
- // and represents the average time between scavenging one page that we're
- // aiming for. scavengeHugePagePeriod is the same but for huge pages.
- // These constants are core to the scavenge pacing algorithm.
- scavengePercent = 1 // 1%
- scavengePageLatency = 10e3 // 10µs
- scavengeHugePageLatency = 10e3 // 10µs
- scavengePagePeriod = scavengePageLatency / (scavengePercent / 100.0)
- scavengeHugePagePeriod = scavengePageLatency / (scavengePercent / 100.0)
+ scavengePercent = 1 // 1%
// retainExtraPercent represents the amount of memory over the heap goal
// that the scavenger should keep as a buffer space for the allocator.
@@ -61,34 +76,62 @@
// incurs an additional cost), to account for heap fragmentation and
// the ever-changing layout of the heap.
retainExtraPercent = 10
+
+ // maxPagesPerPhysPage is the maximum number of supported runtime pages per
+ // physical page, based on maxPhysPageSize.
+ maxPagesPerPhysPage = maxPhysPageSize / pageSize
+
+ // scavengeCostRatio is the approximate ratio between the costs of using previously
+ // scavenged memory and scavenging memory.
+ //
+ // For most systems the cost of scavenging greatly outweighs the costs
+ // associated with using scavenged memory, making this constant 0. On other systems
+ // (especially ones where "sysUsed" is not just a no-op) this cost is non-trivial.
+ //
+ // This ratio is used as part of multiplicative factor to help the scavenger account
+ // for the additional costs of using scavenged memory in its pacing.
+ scavengeCostRatio = 0.7 * sys.GoosDarwin
+
+ // scavengeReservationShards determines the amount of memory the scavenger
+ // should reserve for scavenging at a time. Specifically, the amount of
+ // memory reserved is (heap size in bytes) / scavengeReservationShards.
+ scavengeReservationShards = 64
)
// heapRetained returns an estimate of the current heap RSS.
-//
-// mheap_.lock must be held or the world must be stopped.
func heapRetained() uint64 {
- return memstats.heap_sys - memstats.heap_released
+ return atomic.Load64(&memstats.heap_sys) - atomic.Load64(&memstats.heap_released)
}
// gcPaceScavenger updates the scavenger's pacing, particularly
// its rate and RSS goal.
//
// The RSS goal is based on the current heap goal with a small overhead
-// to accomodate non-determinism in the allocator.
+// to accommodate non-determinism in the allocator.
//
// The pacing is based on scavengePageRate, which applies to both regular and
// huge pages. See that constant for more information.
//
// mheap_.lock must be held or the world must be stopped.
func gcPaceScavenger() {
- // Compute our scavenging goal and align it to a physical page boundary
- // to make the following calculations more exact.
- retainedGoal := memstats.next_gc
+ // If we're called before the first GC completed, disable scavenging.
+ // We never scavenge before the 2nd GC cycle anyway (we don't have enough
+ // information about the heap yet) so this is fine, and avoids a fault
+ // or garbage data later.
+ if memstats.last_next_gc == 0 {
+ mheap_.scavengeGoal = ^uint64(0)
+ return
+ }
+ // Compute our scavenging goal.
+ goalRatio := float64(memstats.next_gc) / float64(memstats.last_next_gc)
+ retainedGoal := uint64(float64(memstats.last_heap_inuse) * goalRatio)
// Add retainExtraPercent overhead to retainedGoal. This calculation
// looks strange but the purpose is to arrive at an integer division
// (e.g. if retainExtraPercent = 12.5, then we get a divisor of 8)
// that also avoids the overflow from a multiplication.
retainedGoal += retainedGoal / (1.0 / (retainExtraPercent / 100.0))
+ // Align it to a physical page boundary to make the following calculations
+ // a bit more exact.
retainedGoal = (retainedGoal + uint64(physPageSize) - 1) &^ (uint64(physPageSize) - 1)
// Represents where we are now in the heap's contribution to RSS in bytes.
@@ -104,87 +147,47 @@
// physical page.
retainedNow := heapRetained()
- // If we're already below our goal, publish the goal in case it changed
- // then disable the background scavenger.
- if retainedNow <= retainedGoal {
- mheap_.scavengeRetainedGoal = retainedGoal
- mheap_.scavengeBytesPerNS = 0
+ // If we're already below our goal, or within one page of our goal, then disable
+ // the background scavenger. We disable the background scavenger if there's
+ // less than one physical page of work to do because it's not worth it.
+ if retainedNow <= retainedGoal || retainedNow-retainedGoal < uint64(physPageSize) {
+ mheap_.scavengeGoal = ^uint64(0)
return
}
-
- // Now we start to compute the total amount of work necessary and the total
- // amount of time we're willing to give the scavenger to complete this work.
- // This will involve calculating how much of the work consists of huge pages
- // and how much consists of regular pages since the former can let us scavenge
- // more memory in the same time.
- totalWork := retainedNow - retainedGoal
-
- // On systems without huge page support, all work is regular work.
- regularWork := totalWork
- hugeTime := uint64(0)
-
- // On systems where we have huge pages, we want to do as much of the
- // scavenging work as possible on huge pages, because the costs are the
- // same per page, but we can give back more more memory in a shorter
- // period of time.
- if physHugePageSize != 0 {
- // Start by computing the amount of free memory we have in huge pages
- // in total. Trivially, this is all the huge page work we need to do.
- hugeWork := uint64(mheap_.free.unscavHugePages) << physHugePageShift
-
- // ...but it could turn out that there's more huge work to do than
- // total work, so cap it at total work. This might happen for very large
- // heaps where the additional factor of retainExtraPercent can make it so
- // that there are free chunks of memory larger than a huge page that we don't want
- // to scavenge.
- if hugeWork >= totalWork {
- hugePages := totalWork >> physHugePageShift
- hugeWork = hugePages << physHugePageShift
- }
- // Everything that's not huge work is regular work. At this point we
- // know huge work so we can calculate how much time that will take
- // based on scavengePageRate (which applies to pages of any size).
- regularWork = totalWork - hugeWork
- hugeTime = (hugeWork >> physHugePageShift) * scavengeHugePagePeriod
- }
- // Finally, we can compute how much time it'll take to do the regular work
- // and the total time to do all the work.
- regularTime := regularWork / uint64(physPageSize) * scavengePagePeriod
- totalTime := hugeTime + regularTime
-
- now := nanotime()
-
- lock(&scavenge.lock)
-
- // Update all the pacing parameters in mheap with scavenge.lock held,
- // so that scavenge.gen is kept in sync with the updated values.
- mheap_.scavengeRetainedGoal = retainedGoal
- mheap_.scavengeRetainedBasis = retainedNow
- mheap_.scavengeTimeBasis = now
- mheap_.scavengeBytesPerNS = float64(totalWork) / float64(totalTime)
- scavenge.gen++ // increase scavenge generation
-
- // Wake up background scavenger if needed, since the pacing was just updated.
- wakeScavengerLocked()
-
- unlock(&scavenge.lock)
+ mheap_.scavengeGoal = retainedGoal
}
-// State of the background scavenger.
+// Sleep/wait state of the background scavenger.
var scavenge struct {
- lock mutex
- g *g
- parked bool
- timer *timer
- gen uint32 // read with either lock or mheap_.lock, write with both
+ lock mutex
+ g *g
+ parked bool
+ timer *timer
+ sysmonWake uint32 // Set atomically.
}
-// wakeScavengerLocked unparks the scavenger if necessary. It must be called
-// after any pacing update.
+// readyForScavenger signals sysmon to wake the scavenger because
+// there may be new work to do.
//
-// scavenge.lock must be held.
-func wakeScavengerLocked() {
+// There may be a significant delay between when this function runs
+// and when the scavenger is kicked awake, but it may be safely invoked
+// in contexts where wakeScavenger is unsafe to call directly.
+func readyForScavenger() {
+ atomic.Store(&scavenge.sysmonWake, 1)
+}
+
+// wakeScavenger immediately unparks the scavenger if necessary.
+//
+// May run without a P, but it may allocate, so it must not be called
+// on any allocation path.
+//
+// mheap_.lock, scavenge.lock, and sched.lock must not be held.
+func wakeScavenger() {
+ lock(&scavenge.lock)
if scavenge.parked {
+ // Notify sysmon that it shouldn't bother waking up the scavenger.
+ atomic.Store(&scavenge.sysmonWake, 0)
+
// Try to stop the timer but we don't really care if we succeed.
// It's possible that either a timer was never started, or that
// we're racing with it.
@@ -194,45 +197,51 @@
stopTimer(scavenge.timer)
// Unpark the goroutine and tell it that there may have been a pacing
- // change.
+ // change. Note that we skip the scheduler's runnext slot because we
+ // want to avoid having the scavenger interfere with the fair
+ // scheduling of user goroutines. In effect, this schedules the
+ // scavenger at a "lower priority" but that's OK because it'll
+ // catch up on the work it missed when it does get scheduled.
scavenge.parked = false
- ready(scavenge.g, 0, true)
+
+ // Ready the goroutine by injecting it. We use injectglist instead
+ // of ready or goready in order to allow us to run this function
+ // without a P. injectglist also avoids placing the goroutine in
+ // the current P's runnext slot, which is desireable to prevent
+ // the scavenger from interfering with user goroutine scheduling
+ // too much.
+ var list gList
+ list.push(scavenge.g)
+ injectglist(&list)
}
+ unlock(&scavenge.lock)
}
// scavengeSleep attempts to put the scavenger to sleep for ns.
-// It also checks to see if gen != scavenge.gen before going to sleep,
-// and aborts if true (meaning an update had occurred).
//
// Note that this function should only be called by the scavenger.
//
// The scavenger may be woken up earlier by a pacing change, and it may not go
// to sleep at all if there's a pending pacing change.
//
-// Returns false if awoken early (i.e. true means a complete sleep).
-func scavengeSleep(gen uint32, ns int64) bool {
+// Returns the amount of time actually slept.
+func scavengeSleep(ns int64) int64 {
lock(&scavenge.lock)
- // If there was an update, just abort the sleep.
- if scavenge.gen != gen {
- unlock(&scavenge.lock)
- return false
- }
-
// Set the timer.
- now := nanotime()
- scavenge.timer.when = now + ns
- startTimer(scavenge.timer)
+ //
+ // This must happen here instead of inside gopark
+ // because we can't close over any variables without
+ // failing escape analysis.
+ start := nanotime()
+ resetTimer(scavenge.timer, start+ns)
- // Park the goroutine. It's fine that we don't publish the
- // fact that the timer was set; even if the timer wakes up
- // and fire scavengeReady before we park, it'll block on
- // scavenge.lock.
+ // Mark ourself as asleep and go to sleep.
scavenge.parked = true
goparkunlock(&scavenge.lock, waitReasonSleep, traceEvGoSleep, 2)
- // Return true if we completed the full sleep.
- return (nanotime() - now) >= ns
+ // Return how long we actually slept for.
+ return nanotime() - start
}
// Background scavenger.
@@ -243,123 +252,676 @@
func bgscavenge(c chan int) {
scavenge.g = getg()
+ lockInit(&scavenge.lock, lockRankScavenge)
lock(&scavenge.lock)
scavenge.parked = true
scavenge.timer = new(timer)
scavenge.timer.f = func(_ interface{}, _ uintptr) {
- lock(&scavenge.lock)
- wakeScavengerLocked()
- unlock(&scavenge.lock)
+ wakeScavenger()
}
c <- 1
goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1)
- // Parameters for sleeping.
+ // Exponentially-weighted moving average of the fraction of time this
+ // goroutine spends scavenging (that is, percent of a single CPU).
+ // It represents a measure of scheduling overheads which might extend
+ // the sleep or the critical time beyond what's expected. Assume no
+ // overhead to begin with.
//
- // If we end up doing more work than we need, we should avoid spinning
- // until we have more work to do: instead, we know exactly how much time
- // until more work will need to be done, so we sleep.
- //
- // We should avoid sleeping for less than minSleepNS because Gosched()
- // overheads among other things will work out better in that case.
- //
- // There's no reason to set a maximum on sleep time because we'll always
- // get woken up earlier if there's any kind of update that could change
- // the scavenger's pacing.
- //
- // retryDelayNS tracks how much to sleep next time we fail to do any
- // useful work.
- const minSleepNS = int64(100 * 1000) // 100 µs
-
- retryDelayNS := minSleepNS
+ // TODO(mknyszek): Consider making this based on total CPU time of the
+ // application (i.e. scavengePercent * GOMAXPROCS). This isn't really
+ // feasible now because the scavenger acquires the heap lock over the
+ // scavenging operation, which means scavenging effectively blocks
+ // allocators and isn't scalable. However, given a scalable allocator,
+ // it makes sense to also make the scavenger scale with it; if you're
+ // allocating more frequently, then presumably you're also generating
+ // more work for the scavenger.
+ const idealFraction = scavengePercent / 100.0
+ scavengeEWMA := float64(idealFraction)
for {
released := uintptr(0)
- park := false
- ttnext := int64(0)
- gen := uint32(0)
+
+ // Time in scavenging critical section.
+ crit := float64(0)
// Run on the system stack since we grab the heap lock,
// and a stack growth with the heap lock means a deadlock.
systemstack(func() {
lock(&mheap_.lock)
- gen = scavenge.gen
-
// If background scavenging is disabled or if there's no work to do just park.
- retained := heapRetained()
- if mheap_.scavengeBytesPerNS == 0 || retained <= mheap_.scavengeRetainedGoal {
+ retained, goal := heapRetained(), mheap_.scavengeGoal
+ if retained <= goal {
unlock(&mheap_.lock)
- park = true
return
}
- // Calculate how big we want the retained heap to be
- // at this point in time.
- //
- // The formula is for that of a line, y = b - mx
- // We want y (want),
- // m = scavengeBytesPerNS (> 0)
- // x = time between scavengeTimeBasis and now
- // b = scavengeRetainedBasis
- rate := mheap_.scavengeBytesPerNS
- tdist := nanotime() - mheap_.scavengeTimeBasis
- rdist := uint64(rate * float64(tdist))
- want := mheap_.scavengeRetainedBasis - rdist
+ // Scavenge one page, and measure the amount of time spent scavenging.
+ start := nanotime()
+ released = mheap_.pages.scavenge(physPageSize, true)
+ mheap_.pages.scav.released += released
+ crit = float64(nanotime() - start)
- // If we're above the line, scavenge to get below the
- // line.
- if retained > want {
- released = mheap_.scavengeLocked(uintptr(retained - want))
- }
unlock(&mheap_.lock)
-
- // If we over-scavenged a bit, calculate how much time it'll
- // take at the current rate for us to make that up. We definitely
- // won't have any work to do until at least that amount of time
- // passes.
- if released > uintptr(retained-want) {
- extra := released - uintptr(retained-want)
- ttnext = int64(float64(extra) / rate)
- }
})
- if park {
+ if released == 0 {
lock(&scavenge.lock)
scavenge.parked = true
goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1)
continue
}
- if debug.gctrace > 0 {
- if released > 0 {
- print("scvg: ", released>>20, " MB released\n")
- }
- print("scvg: inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n")
+ if released < physPageSize {
+ // If this happens, it means that we may have attempted to release part
+ // of a physical page, but the likely effect of that is that it released
+ // the whole physical page, some of which may have still been in-use.
+ // This could lead to memory corruption. Throw.
+ throw("released less than one physical page of memory")
}
- if released == 0 {
- // If we were unable to release anything this may be because there's
- // no free memory available to scavenge. Go to sleep and try again.
- if scavengeSleep(gen, retryDelayNS) {
- // If we successfully slept through the delay, back off exponentially.
- retryDelayNS *= 2
- }
- continue
- }
- retryDelayNS = minSleepNS
-
- if ttnext > 0 && ttnext > minSleepNS {
- // If there's an appreciable amount of time until the next scavenging
- // goal, just sleep. We'll get woken up if anything changes and this
- // way we avoid spinning.
- scavengeSleep(gen, ttnext)
- continue
+ // On some platforms we may see crit as zero if the time it takes to scavenge
+ // memory is less than the minimum granularity of its clock (e.g. Windows).
+ // In this case, just assume scavenging takes 10 µs per regular physical page
+ // (determined empirically), and conservatively ignore the impact of huge pages
+ // on timing.
+ //
+ // We shouldn't ever see a crit value less than zero unless there's a bug of
+ // some kind, either on our side or in the platform we're running on, but be
+ // defensive in that case as well.
+ const approxCritNSPerPhysicalPage = 10e3
+ if crit <= 0 {
+ crit = approxCritNSPerPhysicalPage * float64(released/physPageSize)
}
- // Give something else a chance to run, no locks are held.
- Gosched()
+ // Multiply the critical time by 1 + the ratio of the costs of using
+ // scavenged memory vs. scavenging memory. This forces us to pay down
+ // the cost of reusing this memory eagerly by sleeping for a longer period
+ // of time and scavenging less frequently. More concretely, we avoid situations
+ // where we end up scavenging so often that we hurt allocation performance
+ // because of the additional overheads of using scavenged memory.
+ crit *= 1 + scavengeCostRatio
+
+ // If we spent more than 10 ms (for example, if the OS scheduled us away, or someone
+ // put their machine to sleep) in the critical section, bound the time we use to
+ // calculate at 10 ms to avoid letting the sleep time get arbitrarily high.
+ const maxCrit = 10e6
+ if crit > maxCrit {
+ crit = maxCrit
+ }
+
+ // Compute the amount of time to sleep, assuming we want to use at most
+ // scavengePercent of CPU time. Take into account scheduling overheads
+ // that may extend the length of our sleep by multiplying by how far
+ // off we are from the ideal ratio. For example, if we're sleeping too
+ // much, then scavengeEMWA < idealFraction, so we'll adjust the sleep time
+ // down.
+ adjust := scavengeEWMA / idealFraction
+ sleepTime := int64(adjust * crit / (scavengePercent / 100.0))
+
+ // Go to sleep.
+ slept := scavengeSleep(sleepTime)
+
+ // Compute the new ratio.
+ fraction := crit / (crit + float64(slept))
+
+ // Set a lower bound on the fraction.
+ // Due to OS-related anomalies we may "sleep" for an inordinate amount
+ // of time. Let's avoid letting the ratio get out of hand by bounding
+ // the sleep time we use in our EWMA.
+ const minFraction = 1 / 1000
+ if fraction < minFraction {
+ fraction = minFraction
+ }
+
+ // Update scavengeEWMA by merging in the new crit/slept ratio.
+ const alpha = 0.5
+ scavengeEWMA = alpha*fraction + (1-alpha)*scavengeEWMA
}
}
+
+// scavenge scavenges nbytes worth of free pages, starting with the
+// highest address first. Successive calls continue from where it left
+// off until the heap is exhausted. Call scavengeStartGen to bring it
+// back to the top of the heap.
+//
+// Returns the amount of memory scavenged in bytes.
+//
+// s.mheapLock must be held, but may be temporarily released if
+// mayUnlock == true.
+//
+// Must run on the system stack because s.mheapLock must be held.
+//
+//go:systemstack
+func (s *pageAlloc) scavenge(nbytes uintptr, mayUnlock bool) uintptr {
+ var (
+ addrs addrRange
+ gen uint32
+ )
+ released := uintptr(0)
+ for released < nbytes {
+ if addrs.size() == 0 {
+ if addrs, gen = s.scavengeReserve(); addrs.size() == 0 {
+ break
+ }
+ }
+ r, a := s.scavengeOne(addrs, nbytes-released, mayUnlock)
+ released += r
+ addrs = a
+ }
+ // Only unreserve the space which hasn't been scavenged or searched
+ // to ensure we always make progress.
+ s.scavengeUnreserve(addrs, gen)
+ return released
+}
+
+// printScavTrace prints a scavenge trace line to standard error.
+//
+// released should be the amount of memory released since the last time this
+// was called, and forced indicates whether the scavenge was forced by the
+// application.
+func printScavTrace(gen uint32, released uintptr, forced bool) {
+ printlock()
+ print("scav ", gen, " ",
+ released>>10, " KiB work, ",
+ atomic.Load64(&memstats.heap_released)>>10, " KiB total, ",
+ (atomic.Load64(&memstats.heap_inuse)*100)/heapRetained(), "% util",
+ )
+ if forced {
+ print(" (forced)")
+ }
+ println()
+ printunlock()
+}
+
+// scavengeStartGen starts a new scavenge generation, resetting
+// the scavenger's search space to the full in-use address space.
+//
+// s.mheapLock must be held.
+//
+// Must run on the system stack because s.mheapLock must be held.
+//
+//go:systemstack
+func (s *pageAlloc) scavengeStartGen() {
+ if debug.scavtrace > 0 {
+ printScavTrace(s.scav.gen, s.scav.released, false)
+ }
+ s.inUse.cloneInto(&s.scav.inUse)
+
+ // Pick the new starting address for the scavenger cycle.
+ var startAddr offAddr
+ if s.scav.scavLWM.lessThan(s.scav.freeHWM) {
+ // The "free" high watermark exceeds the "scavenged" low watermark,
+ // so there are free scavengable pages in parts of the address space
+ // that the scavenger already searched, the high watermark being the
+ // highest one. Pick that as our new starting point to ensure we
+ // see those pages.
+ startAddr = s.scav.freeHWM
+ } else {
+ // The "free" high watermark does not exceed the "scavenged" low
+ // watermark. This means the allocator didn't free any memory in
+ // the range we scavenged last cycle, so we might as well continue
+ // scavenging from where we were.
+ startAddr = s.scav.scavLWM
+ }
+ s.scav.inUse.removeGreaterEqual(startAddr.addr())
+
+ // reservationBytes may be zero if s.inUse.totalBytes is small, or if
+ // scavengeReservationShards is large. This case is fine as the scavenger
+ // will simply be turned off, but it does mean that scavengeReservationShards,
+ // in concert with pallocChunkBytes, dictates the minimum heap size at which
+ // the scavenger triggers. In practice this minimum is generally less than an
+ // arena in size, so virtually every heap has the scavenger on.
+ s.scav.reservationBytes = alignUp(s.inUse.totalBytes, pallocChunkBytes) / scavengeReservationShards
+ s.scav.gen++
+ s.scav.released = 0
+ s.scav.freeHWM = minOffAddr
+ s.scav.scavLWM = maxOffAddr
+}
+
+// scavengeReserve reserves a contiguous range of the address space
+// for scavenging. The maximum amount of space it reserves is proportional
+// to the size of the heap. The ranges are reserved from the high addresses
+// first.
+//
+// Returns the reserved range and the scavenge generation number for it.
+//
+// s.mheapLock must be held.
+//
+// Must run on the system stack because s.mheapLock must be held.
+//
+//go:systemstack
+func (s *pageAlloc) scavengeReserve() (addrRange, uint32) {
+ // Start by reserving the minimum.
+ r := s.scav.inUse.removeLast(s.scav.reservationBytes)
+
+ // Return early if the size is zero; we don't want to use
+ // the bogus address below.
+ if r.size() == 0 {
+ return r, s.scav.gen
+ }
+
+ // The scavenger requires that base be aligned to a
+ // palloc chunk because that's the unit of operation for
+ // the scavenger, so align down, potentially extending
+ // the range.
+ newBase := alignDown(r.base.addr(), pallocChunkBytes)
+
+ // Remove from inUse however much extra we just pulled out.
+ s.scav.inUse.removeGreaterEqual(newBase)
+ r.base = offAddr{newBase}
+ return r, s.scav.gen
+}
+
+// scavengeUnreserve returns an unscavenged portion of a range that was
+// previously reserved with scavengeReserve.
+//
+// s.mheapLock must be held.
+//
+// Must run on the system stack because s.mheapLock must be held.
+//
+//go:systemstack
+func (s *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) {
+ if r.size() == 0 || gen != s.scav.gen {
+ return
+ }
+ if r.base.addr()%pallocChunkBytes != 0 {
+ throw("unreserving unaligned region")
+ }
+ s.scav.inUse.add(r)
+}
+
+// scavengeOne walks over address range work until it finds
+// a contiguous run of pages to scavenge. It will try to scavenge
+// at most max bytes at once, but may scavenge more to avoid
+// breaking huge pages. Once it scavenges some memory it returns
+// how much it scavenged in bytes.
+//
+// Returns the number of bytes scavenged and the part of work
+// which was not yet searched.
+//
+// work's base address must be aligned to pallocChunkBytes.
+//
+// s.mheapLock must be held, but may be temporarily released if
+// mayUnlock == true.
+//
+// Must run on the system stack because s.mheapLock must be held.
+//
+//go:systemstack
+func (s *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (uintptr, addrRange) {
+ // Defensively check if we've recieved an empty address range.
+ // If so, just return.
+ if work.size() == 0 {
+ // Nothing to do.
+ return 0, work
+ }
+ // Check the prerequisites of work.
+ if work.base.addr()%pallocChunkBytes != 0 {
+ throw("scavengeOne called with unaligned work region")
+ }
+ // Calculate the maximum number of pages to scavenge.
+ //
+ // This should be alignUp(max, pageSize) / pageSize but max can and will
+ // be ^uintptr(0), so we need to be very careful not to overflow here.
+ // Rather than use alignUp, calculate the number of pages rounded down
+ // first, then add back one if necessary.
+ maxPages := max / pageSize
+ if max%pageSize != 0 {
+ maxPages++
+ }
+
+ // Calculate the minimum number of pages we can scavenge.
+ //
+ // Because we can only scavenge whole physical pages, we must
+ // ensure that we scavenge at least minPages each time, aligned
+ // to minPages*pageSize.
+ minPages := physPageSize / pageSize
+ if minPages < 1 {
+ minPages = 1
+ }
+
+ // Helpers for locking and unlocking only if mayUnlock == true.
+ lockHeap := func() {
+ if mayUnlock {
+ lock(s.mheapLock)
+ }
+ }
+ unlockHeap := func() {
+ if mayUnlock {
+ unlock(s.mheapLock)
+ }
+ }
+
+ // Fast path: check the chunk containing the top-most address in work,
+ // starting at that address's page index in the chunk.
+ //
+ // Note that work.end() is exclusive, so get the chunk we care about
+ // by subtracting 1.
+ maxAddr := work.limit.addr() - 1
+ maxChunk := chunkIndex(maxAddr)
+ if s.summary[len(s.summary)-1][maxChunk].max() >= uint(minPages) {
+ // We only bother looking for a candidate if there at least
+ // minPages free pages at all.
+ base, npages := s.chunkOf(maxChunk).findScavengeCandidate(chunkPageIndex(maxAddr), minPages, maxPages)
+
+ // If we found something, scavenge it and return!
+ if npages != 0 {
+ work.limit = offAddr{s.scavengeRangeLocked(maxChunk, base, npages)}
+ return uintptr(npages) * pageSize, work
+ }
+ }
+ // Update the limit to reflect the fact that we checked maxChunk already.
+ work.limit = offAddr{chunkBase(maxChunk)}
+
+ // findCandidate finds the next scavenge candidate in work optimistically.
+ //
+ // Returns the candidate chunk index and true on success, and false on failure.
+ //
+ // The heap need not be locked.
+ findCandidate := func(work addrRange) (chunkIdx, bool) {
+ // Iterate over this work's chunks.
+ for i := chunkIndex(work.limit.addr() - 1); i >= chunkIndex(work.base.addr()); i-- {
+ // If this chunk is totally in-use or has no unscavenged pages, don't bother
+ // doing a more sophisticated check.
+ //
+ // Note we're accessing the summary and the chunks without a lock, but
+ // that's fine. We're being optimistic anyway.
+
+ // Check quickly if there are enough free pages at all.
+ if s.summary[len(s.summary)-1][i].max() < uint(minPages) {
+ continue
+ }
+
+ // Run over the chunk looking harder for a candidate. Again, we could
+ // race with a lot of different pieces of code, but we're just being
+ // optimistic. Make sure we load the l2 pointer atomically though, to
+ // avoid races with heap growth. It may or may not be possible to also
+ // see a nil pointer in this case if we do race with heap growth, but
+ // just defensively ignore the nils. This operation is optimistic anyway.
+ l2 := (*[1 << pallocChunksL2Bits]pallocData)(atomic.Loadp(unsafe.Pointer(&s.chunks[i.l1()])))
+ if l2 != nil && l2[i.l2()].hasScavengeCandidate(minPages) {
+ return i, true
+ }
+ }
+ return 0, false
+ }
+
+ // Slow path: iterate optimistically over the in-use address space
+ // looking for any free and unscavenged page. If we think we see something,
+ // lock and verify it!
+ for work.size() != 0 {
+ unlockHeap()
+
+ // Search for the candidate.
+ candidateChunkIdx, ok := findCandidate(work)
+
+ // Lock the heap. We need to do this now if we found a candidate or not.
+ // If we did, we'll verify it. If not, we need to lock before returning
+ // anyway.
+ lockHeap()
+
+ if !ok {
+ // We didn't find a candidate, so we're done.
+ work.limit = work.base
+ break
+ }
+
+ // Find, verify, and scavenge if we can.
+ chunk := s.chunkOf(candidateChunkIdx)
+ base, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, maxPages)
+ if npages > 0 {
+ work.limit = offAddr{s.scavengeRangeLocked(candidateChunkIdx, base, npages)}
+ return uintptr(npages) * pageSize, work
+ }
+
+ // We were fooled, so let's continue from where we left off.
+ work.limit = offAddr{chunkBase(candidateChunkIdx)}
+ }
+ return 0, work
+}
+
+// scavengeRangeLocked scavenges the given region of memory.
+// The region of memory is described by its chunk index (ci),
+// the starting page index of the region relative to that
+// chunk (base), and the length of the region in pages (npages).
+//
+// Returns the base address of the scavenged region.
+//
+// s.mheapLock must be held.
+func (s *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr {
+ s.chunkOf(ci).scavenged.setRange(base, npages)
+
+ // Compute the full address for the start of the range.
+ addr := chunkBase(ci) + uintptr(base)*pageSize
+
+ // Update the scavenge low watermark.
+ if oAddr := (offAddr{addr}); oAddr.lessThan(s.scav.scavLWM) {
+ s.scav.scavLWM = oAddr
+ }
+
+ // Only perform the actual scavenging if we're not in a test.
+ // It's dangerous to do so otherwise.
+ if s.test {
+ return addr
+ }
+ sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize)
+
+ // Update global accounting only when not in test, otherwise
+ // the runtime's accounting will be wrong.
+ mSysStatInc(&memstats.heap_released, uintptr(npages)*pageSize)
+ return addr
+}
+
+// fillAligned returns x but with all zeroes in m-aligned
+// groups of m bits set to 1 if any bit in the group is non-zero.
+//
+// For example, fillAligned(0x0100a3, 8) == 0xff00ff.
+//
+// Note that if m == 1, this is a no-op.
+//
+// m must be a power of 2 <= maxPagesPerPhysPage.
+func fillAligned(x uint64, m uint) uint64 {
+ apply := func(x uint64, c uint64) uint64 {
+ // The technique used it here is derived from
+ // https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
+ // and extended for more than just bytes (like nibbles
+ // and uint16s) by using an appropriate constant.
+ //
+ // To summarize the technique, quoting from that page:
+ // "[It] works by first zeroing the high bits of the [8]
+ // bytes in the word. Subsequently, it adds a number that
+ // will result in an overflow to the high bit of a byte if
+ // any of the low bits were initially set. Next the high
+ // bits of the original word are ORed with these values;
+ // thus, the high bit of a byte is set iff any bit in the
+ // byte was set. Finally, we determine if any of these high
+ // bits are zero by ORing with ones everywhere except the
+ // high bits and inverting the result."
+ return ^((((x & c) + c) | x) | c)
+ }
+ // Transform x to contain a 1 bit at the top of each m-aligned
+ // group of m zero bits.
+ switch m {
+ case 1:
+ return x
+ case 2:
+ x = apply(x, 0x5555555555555555)
+ case 4:
+ x = apply(x, 0x7777777777777777)
+ case 8:
+ x = apply(x, 0x7f7f7f7f7f7f7f7f)
+ case 16:
+ x = apply(x, 0x7fff7fff7fff7fff)
+ case 32:
+ x = apply(x, 0x7fffffff7fffffff)
+ case 64: // == maxPagesPerPhysPage
+ x = apply(x, 0x7fffffffffffffff)
+ default:
+ throw("bad m value")
+ }
+ // Now, the top bit of each m-aligned group in x is set
+ // that group was all zero in the original x.
+
+ // From each group of m bits subtract 1.
+ // Because we know only the top bits of each
+ // m-aligned group are set, we know this will
+ // set each group to have all the bits set except
+ // the top bit, so just OR with the original
+ // result to set all the bits.
+ return ^((x - (x >> (m - 1))) | x)
+}
+
+// hasScavengeCandidate returns true if there's any min-page-aligned groups of
+// min pages of free-and-unscavenged memory in the region represented by this
+// pallocData.
+//
+// min must be a non-zero power of 2 <= maxPagesPerPhysPage.
+func (m *pallocData) hasScavengeCandidate(min uintptr) bool {
+ if min&(min-1) != 0 || min == 0 {
+ print("runtime: min = ", min, "\n")
+ throw("min must be a non-zero power of 2")
+ } else if min > maxPagesPerPhysPage {
+ print("runtime: min = ", min, "\n")
+ throw("min too large")
+ }
+
+ // The goal of this search is to see if the chunk contains any free and unscavenged memory.
+ for i := len(m.scavenged) - 1; i >= 0; i-- {
+ // 1s are scavenged OR non-free => 0s are unscavenged AND free
+ //
+ // TODO(mknyszek): Consider splitting up fillAligned into two
+ // functions, since here we technically could get by with just
+ // the first half of its computation. It'll save a few instructions
+ // but adds some additional code complexity.
+ x := fillAligned(m.scavenged[i]|m.pallocBits[i], uint(min))
+
+ // Quickly skip over chunks of non-free or scavenged pages.
+ if x != ^uint64(0) {
+ return true
+ }
+ }
+ return false
+}
+
+// findScavengeCandidate returns a start index and a size for this pallocData
+// segment which represents a contiguous region of free and unscavenged memory.
+//
+// searchIdx indicates the page index within this chunk to start the search, but
+// note that findScavengeCandidate searches backwards through the pallocData. As a
+// a result, it will return the highest scavenge candidate in address order.
+//
+// min indicates a hard minimum size and alignment for runs of pages. That is,
+// findScavengeCandidate will not return a region smaller than min pages in size,
+// or that is min pages or greater in size but not aligned to min. min must be
+// a non-zero power of 2 <= maxPagesPerPhysPage.
+//
+// max is a hint for how big of a region is desired. If max >= pallocChunkPages, then
+// findScavengeCandidate effectively returns entire free and unscavenged regions.
+// If max < pallocChunkPages, it may truncate the returned region such that size is
+// max. However, findScavengeCandidate may still return a larger region if, for
+// example, it chooses to preserve huge pages, or if max is not aligned to min (it
+// will round up). That is, even if max is small, the returned size is not guaranteed
+// to be equal to max. max is allowed to be less than min, in which case it is as if
+// max == min.
+func (m *pallocData) findScavengeCandidate(searchIdx uint, min, max uintptr) (uint, uint) {
+ if min&(min-1) != 0 || min == 0 {
+ print("runtime: min = ", min, "\n")
+ throw("min must be a non-zero power of 2")
+ } else if min > maxPagesPerPhysPage {
+ print("runtime: min = ", min, "\n")
+ throw("min too large")
+ }
+ // max may not be min-aligned, so we might accidentally truncate to
+ // a max value which causes us to return a non-min-aligned value.
+ // To prevent this, align max up to a multiple of min (which is always
+ // a power of 2). This also prevents max from ever being less than
+ // min, unless it's zero, so handle that explicitly.
+ if max == 0 {
+ max = min
+ } else {
+ max = alignUp(max, min)
+ }
+
+ i := int(searchIdx / 64)
+ // Start by quickly skipping over blocks of non-free or scavenged pages.
+ for ; i >= 0; i-- {
+ // 1s are scavenged OR non-free => 0s are unscavenged AND free
+ x := fillAligned(m.scavenged[i]|m.pallocBits[i], uint(min))
+ if x != ^uint64(0) {
+ break
+ }
+ }
+ if i < 0 {
+ // Failed to find any free/unscavenged pages.
+ return 0, 0
+ }
+ // We have something in the 64-bit chunk at i, but it could
+ // extend further. Loop until we find the extent of it.
+
+ // 1s are scavenged OR non-free => 0s are unscavenged AND free
+ x := fillAligned(m.scavenged[i]|m.pallocBits[i], uint(min))
+ z1 := uint(sys.LeadingZeros64(^x))
+ run, end := uint(0), uint(i)*64+(64-z1)
+ if x<<z1 != 0 {
+ // After shifting out z1 bits, we still have 1s,
+ // so the run ends inside this word.
+ run = uint(sys.LeadingZeros64(x << z1))
+ } else {
+ // After shifting out z1 bits, we have no more 1s.
+ // This means the run extends to the bottom of the
+ // word so it may extend into further words.
+ run = 64 - z1
+ for j := i - 1; j >= 0; j-- {
+ x := fillAligned(m.scavenged[j]|m.pallocBits[j], uint(min))
+ run += uint(sys.LeadingZeros64(x))
+ if x != 0 {
+ // The run stopped in this word.
+ break
+ }
+ }
+ }
+
+ // Split the run we found if it's larger than max but hold on to
+ // our original length, since we may need it later.
+ size := run
+ if size > uint(max) {
+ size = uint(max)
+ }
+ start := end - size
+
+ // Each huge page is guaranteed to fit in a single palloc chunk.
+ //
+ // TODO(mknyszek): Support larger huge page sizes.
+ // TODO(mknyszek): Consider taking pages-per-huge-page as a parameter
+ // so we can write tests for this.
+ if physHugePageSize > pageSize && physHugePageSize > physPageSize {
+ // We have huge pages, so let's ensure we don't break one by scavenging
+ // over a huge page boundary. If the range [start, start+size) overlaps with
+ // a free-and-unscavenged huge page, we want to grow the region we scavenge
+ // to include that huge page.
+
+ // Compute the huge page boundary above our candidate.
+ pagesPerHugePage := uintptr(physHugePageSize / pageSize)
+ hugePageAbove := uint(alignUp(uintptr(start), pagesPerHugePage))
+
+ // If that boundary is within our current candidate, then we may be breaking
+ // a huge page.
+ if hugePageAbove <= end {
+ // Compute the huge page boundary below our candidate.
+ hugePageBelow := uint(alignDown(uintptr(start), pagesPerHugePage))
+
+ if hugePageBelow >= end-run {
+ // We're in danger of breaking apart a huge page since start+size crosses
+ // a huge page boundary and rounding down start to the nearest huge
+ // page boundary is included in the full run we found. Include the entire
+ // huge page in the bound by rounding down to the huge page size.
+ size = size + (start - hugePageBelow)
+ start = hugePageBelow
+ }
+ }
+ }
+ return start, size
+}
diff --git a/src/runtime/mgcscavenge_test.go b/src/runtime/mgcscavenge_test.go
new file mode 100644
index 0000000..7f619b1
--- /dev/null
+++ b/src/runtime/mgcscavenge_test.go
@@ -0,0 +1,443 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "fmt"
+ "math/rand"
+ . "runtime"
+ "testing"
+)
+
+// makePallocData produces an initialized PallocData by setting
+// the ranges of described in alloc and scavenge.
+func makePallocData(alloc, scavenged []BitRange) *PallocData {
+ b := new(PallocData)
+ for _, v := range alloc {
+ if v.N == 0 {
+ // Skip N==0. It's harmless and allocRange doesn't
+ // handle this case.
+ continue
+ }
+ b.AllocRange(v.I, v.N)
+ }
+ for _, v := range scavenged {
+ if v.N == 0 {
+ // See the previous loop.
+ continue
+ }
+ b.ScavengedSetRange(v.I, v.N)
+ }
+ return b
+}
+
+func TestFillAligned(t *testing.T) {
+ fillAlignedSlow := func(x uint64, m uint) uint64 {
+ if m == 1 {
+ return x
+ }
+ out := uint64(0)
+ for i := uint(0); i < 64; i += m {
+ for j := uint(0); j < m; j++ {
+ if x&(uint64(1)<<(i+j)) != 0 {
+ out |= ((uint64(1) << m) - 1) << i
+ break
+ }
+ }
+ }
+ return out
+ }
+ check := func(x uint64, m uint) {
+ want := fillAlignedSlow(x, m)
+ if got := FillAligned(x, m); got != want {
+ t.Logf("got: %064b", got)
+ t.Logf("want: %064b", want)
+ t.Errorf("bad fillAligned(%016x, %d)", x, m)
+ }
+ }
+ for m := uint(1); m <= 64; m *= 2 {
+ tests := []uint64{
+ 0x0000000000000000,
+ 0x00000000ffffffff,
+ 0xffffffff00000000,
+ 0x8000000000000001,
+ 0xf00000000000000f,
+ 0xf00000010050000f,
+ 0xffffffffffffffff,
+ 0x0000000000000001,
+ 0x0000000000000002,
+ 0x0000000000000008,
+ uint64(1) << (m - 1),
+ uint64(1) << m,
+ // Try a few fixed arbitrary examples.
+ 0xb02b9effcf137016,
+ 0x3975a076a9fbff18,
+ 0x0f8c88ec3b81506e,
+ 0x60f14d80ef2fa0e6,
+ }
+ for _, test := range tests {
+ check(test, m)
+ }
+ for i := 0; i < 1000; i++ {
+ // Try a pseudo-random numbers.
+ check(rand.Uint64(), m)
+
+ if m > 1 {
+ // For m != 1, let's construct a slightly more interesting
+ // random test. Generate a bitmap which is either 0 or
+ // randomly set bits for each m-aligned group of m bits.
+ val := uint64(0)
+ for n := uint(0); n < 64; n += m {
+ // For each group of m bits, flip a coin:
+ // * Leave them as zero.
+ // * Set them randomly.
+ if rand.Uint64()%2 == 0 {
+ val |= (rand.Uint64() & ((1 << m) - 1)) << n
+ }
+ }
+ check(val, m)
+ }
+ }
+ }
+}
+
+func TestPallocDataFindScavengeCandidate(t *testing.T) {
+ type test struct {
+ alloc, scavenged []BitRange
+ min, max uintptr
+ want BitRange
+ }
+ tests := map[string]test{
+ "MixedMin1": {
+ alloc: []BitRange{{0, 40}, {42, PallocChunkPages - 42}},
+ scavenged: []BitRange{{0, 41}, {42, PallocChunkPages - 42}},
+ min: 1,
+ max: PallocChunkPages,
+ want: BitRange{41, 1},
+ },
+ "MultiMin1": {
+ alloc: []BitRange{{0, 63}, {65, 20}, {87, PallocChunkPages - 87}},
+ scavenged: []BitRange{{86, 1}},
+ min: 1,
+ max: PallocChunkPages,
+ want: BitRange{85, 1},
+ },
+ }
+ // Try out different page minimums.
+ for m := uintptr(1); m <= 64; m *= 2 {
+ suffix := fmt.Sprintf("Min%d", m)
+ tests["AllFree"+suffix] = test{
+ min: m,
+ max: PallocChunkPages,
+ want: BitRange{0, PallocChunkPages},
+ }
+ tests["AllScavenged"+suffix] = test{
+ scavenged: []BitRange{{0, PallocChunkPages}},
+ min: m,
+ max: PallocChunkPages,
+ want: BitRange{0, 0},
+ }
+ tests["NoneFree"+suffix] = test{
+ alloc: []BitRange{{0, PallocChunkPages}},
+ scavenged: []BitRange{{PallocChunkPages / 2, PallocChunkPages / 2}},
+ min: m,
+ max: PallocChunkPages,
+ want: BitRange{0, 0},
+ }
+ tests["StartFree"+suffix] = test{
+ alloc: []BitRange{{uint(m), PallocChunkPages - uint(m)}},
+ min: m,
+ max: PallocChunkPages,
+ want: BitRange{0, uint(m)},
+ }
+ tests["StartFree"+suffix] = test{
+ alloc: []BitRange{{uint(m), PallocChunkPages - uint(m)}},
+ min: m,
+ max: PallocChunkPages,
+ want: BitRange{0, uint(m)},
+ }
+ tests["EndFree"+suffix] = test{
+ alloc: []BitRange{{0, PallocChunkPages - uint(m)}},
+ min: m,
+ max: PallocChunkPages,
+ want: BitRange{PallocChunkPages - uint(m), uint(m)},
+ }
+ tests["Straddle64"+suffix] = test{
+ alloc: []BitRange{{0, 64 - uint(m)}, {64 + uint(m), PallocChunkPages - (64 + uint(m))}},
+ min: m,
+ max: 2 * m,
+ want: BitRange{64 - uint(m), 2 * uint(m)},
+ }
+ tests["BottomEdge64WithFull"+suffix] = test{
+ alloc: []BitRange{{64, 64}, {128 + 3*uint(m), PallocChunkPages - (128 + 3*uint(m))}},
+ scavenged: []BitRange{{1, 10}},
+ min: m,
+ max: 3 * m,
+ want: BitRange{128, 3 * uint(m)},
+ }
+ tests["BottomEdge64WithPocket"+suffix] = test{
+ alloc: []BitRange{{64, 62}, {127, 1}, {128 + 3*uint(m), PallocChunkPages - (128 + 3*uint(m))}},
+ scavenged: []BitRange{{1, 10}},
+ min: m,
+ max: 3 * m,
+ want: BitRange{128, 3 * uint(m)},
+ }
+ tests["Max0"+suffix] = test{
+ scavenged: []BitRange{{0, PallocChunkPages - uint(m)}},
+ min: m,
+ max: 0,
+ want: BitRange{PallocChunkPages - uint(m), uint(m)},
+ }
+ if m <= 8 {
+ tests["OneFree"] = test{
+ alloc: []BitRange{{0, 40}, {40 + uint(m), PallocChunkPages - (40 + uint(m))}},
+ min: m,
+ max: PallocChunkPages,
+ want: BitRange{40, uint(m)},
+ }
+ tests["OneScavenged"] = test{
+ alloc: []BitRange{{0, 40}, {40 + uint(m), PallocChunkPages - (40 + uint(m))}},
+ scavenged: []BitRange{{40, 1}},
+ min: m,
+ max: PallocChunkPages,
+ want: BitRange{0, 0},
+ }
+ }
+ if m > 1 {
+ tests["MaxUnaligned"+suffix] = test{
+ scavenged: []BitRange{{0, PallocChunkPages - uint(m*2-1)}},
+ min: m,
+ max: m - 2,
+ want: BitRange{PallocChunkPages - uint(m), uint(m)},
+ }
+ tests["SkipSmall"+suffix] = test{
+ alloc: []BitRange{{0, 64 - uint(m)}, {64, 5}, {70, 11}, {82, PallocChunkPages - 82}},
+ min: m,
+ max: m,
+ want: BitRange{64 - uint(m), uint(m)},
+ }
+ tests["SkipMisaligned"+suffix] = test{
+ alloc: []BitRange{{0, 64 - uint(m)}, {64, 63}, {127 + uint(m), PallocChunkPages - (127 + uint(m))}},
+ min: m,
+ max: m,
+ want: BitRange{64 - uint(m), uint(m)},
+ }
+ tests["MaxLessThan"+suffix] = test{
+ scavenged: []BitRange{{0, PallocChunkPages - uint(m)}},
+ min: m,
+ max: 1,
+ want: BitRange{PallocChunkPages - uint(m), uint(m)},
+ }
+ }
+ }
+ if PhysHugePageSize > uintptr(PageSize) {
+ // Check hugepage preserving behavior.
+ bits := uint(PhysHugePageSize / uintptr(PageSize))
+ tests["PreserveHugePageBottom"] = test{
+ alloc: []BitRange{{bits + 2, PallocChunkPages - (bits + 2)}},
+ min: 1,
+ max: 3, // Make it so that max would have us try to break the huge page.
+ want: BitRange{0, bits + 2},
+ }
+ if 3*bits < PallocChunkPages {
+ // We need at least 3 huge pages in a chunk for this test to make sense.
+ tests["PreserveHugePageMiddle"] = test{
+ alloc: []BitRange{{0, bits - 10}, {2*bits + 10, PallocChunkPages - (2*bits + 10)}},
+ min: 1,
+ max: 12, // Make it so that max would have us try to break the huge page.
+ want: BitRange{bits, bits + 10},
+ }
+ }
+ tests["PreserveHugePageTop"] = test{
+ alloc: []BitRange{{0, PallocChunkPages - bits}},
+ min: 1,
+ max: 1, // Even one page would break a huge page in this case.
+ want: BitRange{PallocChunkPages - bits, bits},
+ }
+ }
+ for name, v := range tests {
+ v := v
+ t.Run(name, func(t *testing.T) {
+ b := makePallocData(v.alloc, v.scavenged)
+ start, size := b.FindScavengeCandidate(PallocChunkPages-1, v.min, v.max)
+ got := BitRange{start, size}
+ if !(got.N == 0 && v.want.N == 0) && got != v.want {
+ t.Fatalf("candidate mismatch: got %v, want %v", got, v.want)
+ }
+ })
+ }
+}
+
+// Tests end-to-end scavenging on a pageAlloc.
+func TestPageAllocScavenge(t *testing.T) {
+ if GOOS == "openbsd" && testing.Short() {
+ t.Skip("skipping because virtual memory is limited; see #36210")
+ }
+ type test struct {
+ request, expect uintptr
+ }
+ minPages := PhysPageSize / PageSize
+ if minPages < 1 {
+ minPages = 1
+ }
+ type setup struct {
+ beforeAlloc map[ChunkIdx][]BitRange
+ beforeScav map[ChunkIdx][]BitRange
+ expect []test
+ afterScav map[ChunkIdx][]BitRange
+ }
+ tests := map[string]setup{
+ "AllFreeUnscavExhaust": {
+ beforeAlloc: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {},
+ BaseChunkIdx + 2: {},
+ },
+ beforeScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {},
+ BaseChunkIdx + 2: {},
+ },
+ expect: []test{
+ {^uintptr(0), 3 * PallocChunkPages * PageSize},
+ },
+ afterScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages}},
+ },
+ },
+ "NoneFreeUnscavExhaust": {
+ beforeAlloc: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {},
+ BaseChunkIdx + 2: {{0, PallocChunkPages}},
+ },
+ beforeScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {},
+ },
+ expect: []test{
+ {^uintptr(0), 0},
+ },
+ afterScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {},
+ },
+ },
+ "ScavHighestPageFirst": {
+ beforeAlloc: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ },
+ beforeScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{uint(minPages), PallocChunkPages - uint(2*minPages)}},
+ },
+ expect: []test{
+ {1, minPages * PageSize},
+ },
+ afterScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{uint(minPages), PallocChunkPages - uint(minPages)}},
+ },
+ },
+ "ScavMultiple": {
+ beforeAlloc: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ },
+ beforeScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{uint(minPages), PallocChunkPages - uint(2*minPages)}},
+ },
+ expect: []test{
+ {minPages * PageSize, minPages * PageSize},
+ {minPages * PageSize, minPages * PageSize},
+ },
+ afterScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ },
+ },
+ "ScavMultiple2": {
+ beforeAlloc: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {},
+ },
+ beforeScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{uint(minPages), PallocChunkPages - uint(2*minPages)}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages - uint(2*minPages)}},
+ },
+ expect: []test{
+ {2 * minPages * PageSize, 2 * minPages * PageSize},
+ {minPages * PageSize, minPages * PageSize},
+ {minPages * PageSize, minPages * PageSize},
+ },
+ afterScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ },
+ },
+ "ScavDiscontiguous": {
+ beforeAlloc: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 0xe: {},
+ },
+ beforeScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{uint(minPages), PallocChunkPages - uint(2*minPages)}},
+ BaseChunkIdx + 0xe: {{uint(2 * minPages), PallocChunkPages - uint(2*minPages)}},
+ },
+ expect: []test{
+ {2 * minPages * PageSize, 2 * minPages * PageSize},
+ {^uintptr(0), 2 * minPages * PageSize},
+ {^uintptr(0), 0},
+ },
+ afterScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 0xe: {{0, PallocChunkPages}},
+ },
+ },
+ }
+ if PageAlloc64Bit != 0 {
+ tests["ScavAllVeryDiscontiguous"] = setup{
+ beforeAlloc: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 0x1000: {},
+ },
+ beforeScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 0x1000: {},
+ },
+ expect: []test{
+ {^uintptr(0), 2 * PallocChunkPages * PageSize},
+ {^uintptr(0), 0},
+ },
+ afterScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 0x1000: {{0, PallocChunkPages}},
+ },
+ }
+ }
+ for name, v := range tests {
+ v := v
+ runTest := func(t *testing.T, mayUnlock bool) {
+ b := NewPageAlloc(v.beforeAlloc, v.beforeScav)
+ defer FreePageAlloc(b)
+
+ for iter, h := range v.expect {
+ if got := b.Scavenge(h.request, mayUnlock); got != h.expect {
+ t.Fatalf("bad scavenge #%d: want %d, got %d", iter+1, h.expect, got)
+ }
+ }
+ want := NewPageAlloc(v.beforeAlloc, v.afterScav)
+ defer FreePageAlloc(want)
+
+ checkPageAlloc(t, want, b)
+ }
+ t.Run(name, func(t *testing.T) {
+ runTest(t, false)
+ })
+ t.Run(name+"MayUnlock", func(t *testing.T) {
+ runTest(t, true)
+ })
+ }
+}
diff --git a/src/runtime/mgcstack.go b/src/runtime/mgcstack.go
index baeaa4f..211d882 100644
--- a/src/runtime/mgcstack.go
+++ b/src/runtime/mgcstack.go
@@ -175,12 +175,23 @@
// stack limits
stack stack
+ // conservative indicates that the next frame must be scanned conservatively.
+ // This applies only to the innermost frame at an async safe-point.
+ conservative bool
+
// buf contains the set of possible pointers to stack objects.
// Organized as a LIFO linked list of buffers.
// All buffers except possibly the head buffer are full.
buf *stackWorkBuf
freeBuf *stackWorkBuf // keep around one free buffer for allocation hysteresis
+ // cbuf contains conservative pointers to stack objects. If
+ // all pointers to a stack object are obtained via
+ // conservative scanning, then the stack object may be dead
+ // and may contain dead pointers, so it must be scanned
+ // defensively.
+ cbuf *stackWorkBuf
+
// list of stack objects
// Objects are in increasing address order.
head *stackObjectBuf
@@ -194,17 +205,21 @@
// Add p as a potential pointer to a stack object.
// p must be a stack address.
-func (s *stackScanState) putPtr(p uintptr) {
+func (s *stackScanState) putPtr(p uintptr, conservative bool) {
if p < s.stack.lo || p >= s.stack.hi {
throw("address not a stack address")
}
- buf := s.buf
+ head := &s.buf
+ if conservative {
+ head = &s.cbuf
+ }
+ buf := *head
if buf == nil {
// Initial setup.
buf = (*stackWorkBuf)(unsafe.Pointer(getempty()))
buf.nobj = 0
buf.next = nil
- s.buf = buf
+ *head = buf
} else if buf.nobj == len(buf.obj) {
if s.freeBuf != nil {
buf = s.freeBuf
@@ -213,8 +228,8 @@
buf = (*stackWorkBuf)(unsafe.Pointer(getempty()))
}
buf.nobj = 0
- buf.next = s.buf
- s.buf = buf
+ buf.next = *head
+ *head = buf
}
buf.obj[buf.nobj] = p
buf.nobj++
@@ -222,30 +237,39 @@
// Remove and return a potential pointer to a stack object.
// Returns 0 if there are no more pointers available.
-func (s *stackScanState) getPtr() uintptr {
- buf := s.buf
- if buf == nil {
- // Never had any data.
- return 0
- }
- if buf.nobj == 0 {
- if s.freeBuf != nil {
- // Free old freeBuf.
- putempty((*workbuf)(unsafe.Pointer(s.freeBuf)))
- }
- // Move buf to the freeBuf.
- s.freeBuf = buf
- buf = buf.next
- s.buf = buf
+//
+// This prefers non-conservative pointers so we scan stack objects
+// precisely if there are any non-conservative pointers to them.
+func (s *stackScanState) getPtr() (p uintptr, conservative bool) {
+ for _, head := range []**stackWorkBuf{&s.buf, &s.cbuf} {
+ buf := *head
if buf == nil {
- // No more data.
- putempty((*workbuf)(unsafe.Pointer(s.freeBuf)))
- s.freeBuf = nil
- return 0
+ // Never had any data.
+ continue
}
+ if buf.nobj == 0 {
+ if s.freeBuf != nil {
+ // Free old freeBuf.
+ putempty((*workbuf)(unsafe.Pointer(s.freeBuf)))
+ }
+ // Move buf to the freeBuf.
+ s.freeBuf = buf
+ buf = buf.next
+ *head = buf
+ if buf == nil {
+ // No more data in this list.
+ continue
+ }
+ }
+ buf.nobj--
+ return buf.obj[buf.nobj], head == &s.cbuf
}
- buf.nobj--
- return buf.obj[buf.nobj]
+ // No more data in either list.
+ if s.freeBuf != nil {
+ putempty((*workbuf)(unsafe.Pointer(s.freeBuf)))
+ s.freeBuf = nil
+ }
+ return 0, false
}
// addObject adds a stack object at addr of type typ to the set of stack objects.
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go
index 5f1c90b..3aa3afc 100644
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -10,7 +10,7 @@
// can free a whole span if none of the objects are marked, but that
// isn't its goal. This can be driven either synchronously by
// mcentral.cacheSpan for mcentral spans, or asynchronously by
-// sweepone from the list of all in-use spans in mheap_.sweepSpans.
+// sweepone, which looks at all the mcentral lists.
//
// * The span reclaimer looks for spans that contain no marked objects
// and frees whole spans. This is a separate algorithm because
@@ -40,6 +40,80 @@
nbgsweep uint32
npausesweep uint32
+
+ // centralIndex is the current unswept span class.
+ // It represents an index into the mcentral span
+ // sets. Accessed and updated via its load and
+ // update methods. Not protected by a lock.
+ //
+ // Reset at mark termination.
+ // Used by mheap.nextSpanForSweep.
+ centralIndex sweepClass
+}
+
+// sweepClass is a spanClass and one bit to represent whether we're currently
+// sweeping partial or full spans.
+type sweepClass uint32
+
+const (
+ numSweepClasses = numSpanClasses * 2
+ sweepClassDone sweepClass = sweepClass(^uint32(0))
+)
+
+func (s *sweepClass) load() sweepClass {
+ return sweepClass(atomic.Load((*uint32)(s)))
+}
+
+func (s *sweepClass) update(sNew sweepClass) {
+ // Only update *s if its current value is less than sNew,
+ // since *s increases monotonically.
+ sOld := s.load()
+ for sOld < sNew && !atomic.Cas((*uint32)(s), uint32(sOld), uint32(sNew)) {
+ sOld = s.load()
+ }
+ // TODO(mknyszek): This isn't the only place we have
+ // an atomic monotonically increasing counter. It would
+ // be nice to have an "atomic max" which is just implemented
+ // as the above on most architectures. Some architectures
+ // like RISC-V however have native support for an atomic max.
+}
+
+func (s *sweepClass) clear() {
+ atomic.Store((*uint32)(s), 0)
+}
+
+// split returns the underlying span class as well as
+// whether we're interested in the full or partial
+// unswept lists for that class, indicated as a boolean
+// (true means "full").
+func (s sweepClass) split() (spc spanClass, full bool) {
+ return spanClass(s >> 1), s&1 == 0
+}
+
+// nextSpanForSweep finds and pops the next span for sweeping from the
+// central sweep buffers. It returns ownership of the span to the caller.
+// Returns nil if no such span exists.
+func (h *mheap) nextSpanForSweep() *mspan {
+ sg := h.sweepgen
+ for sc := sweep.centralIndex.load(); sc < numSweepClasses; sc++ {
+ spc, full := sc.split()
+ c := &h.central[spc].mcentral
+ var s *mspan
+ if full {
+ s = c.fullUnswept(sg).pop()
+ } else {
+ s = c.partialUnswept(sg).pop()
+ }
+ if s != nil {
+ // Write down that we found something so future sweepers
+ // can start from here.
+ sweep.centralIndex.update(sc)
+ return s
+ }
+ }
+ // Write down that we found nothing.
+ sweep.centralIndex.update(sweepClassDone)
+ return nil
}
// finishsweep_m ensures that all spans are swept.
@@ -58,12 +132,31 @@
sweep.npausesweep++
}
+ if go115NewMCentralImpl {
+ // Reset all the unswept buffers, which should be empty.
+ // Do this in sweep termination as opposed to mark termination
+ // so that we can catch unswept spans and reclaim blocks as
+ // soon as possible.
+ sg := mheap_.sweepgen
+ for i := range mheap_.central {
+ c := &mheap_.central[i].mcentral
+ c.partialUnswept(sg).reset()
+ c.fullUnswept(sg).reset()
+ }
+ }
+
+ // Sweeping is done, so if the scavenger isn't already awake,
+ // wake it up. There's definitely work for it to do at this
+ // point.
+ wakeScavenger()
+
nextMarkBitArenaEpoch()
}
func bgsweep(c chan int) {
sweep.g = getg()
+ lockInit(&sweep.lock, lockRankSweep)
lock(&sweep.lock)
sweep.parked = true
c <- 1
@@ -109,17 +202,21 @@
var s *mspan
sg := mheap_.sweepgen
for {
- s = mheap_.sweepSpans[1-sg/2%2].pop()
+ if go115NewMCentralImpl {
+ s = mheap_.nextSpanForSweep()
+ } else {
+ s = mheap_.sweepSpans[1-sg/2%2].pop()
+ }
if s == nil {
atomic.Store(&mheap_.sweepdone, 1)
break
}
- if s.state != mSpanInUse {
+ if state := s.state.get(); state != mSpanInUse {
// This can happen if direct sweeping already
// swept this span, but in that case the sweep
// generation should always be up-to-date.
if !(s.sweepgen == sg || s.sweepgen == sg+3) {
- print("runtime: bad span s.state=", s.state, " s.sweepgen=", s.sweepgen, " sweepgen=", sg, "\n")
+ print("runtime: bad span s.state=", state, " s.sweepgen=", s.sweepgen, " sweepgen=", sg, "\n")
throw("non in-use span in unswept list")
}
continue
@@ -149,6 +246,27 @@
// Decrement the number of active sweepers and if this is the
// last one print trace information.
if atomic.Xadd(&mheap_.sweepers, -1) == 0 && atomic.Load(&mheap_.sweepdone) != 0 {
+ // Since the sweeper is done, move the scavenge gen forward (signalling
+ // that there's new work to do) and wake the scavenger.
+ //
+ // The scavenger is signaled by the last sweeper because once
+ // sweeping is done, we will definitely have useful work for
+ // the scavenger to do, since the scavenger only runs over the
+ // heap once per GC cyle. This update is not done during sweep
+ // termination because in some cases there may be a long delay
+ // between sweep done and sweep termination (e.g. not enough
+ // allocations to trigger a GC) which would be nice to fill in
+ // with scavenging work.
+ systemstack(func() {
+ lock(&mheap_.lock)
+ mheap_.pages.scavengeStartGen()
+ unlock(&mheap_.lock)
+ })
+ // Since we might sweep in an allocation path, it's not possible
+ // for us to wake the scavenger directly via wakeScavenger, since
+ // it could allocate. Ask sysmon to do it for us instead.
+ readyForScavenger()
+
if debug.gcpacertrace > 0 {
print("pacer: sweep done at heap size ", memstats.heap_live>>20, "MB; allocated ", (memstats.heap_live-mheap_.sweepHeapLiveBasis)>>20, "MB during sweep; swept ", mheap_.pagesSwept, " pages at ", sweepRatio, " pages/byte\n")
}
@@ -204,6 +322,9 @@
// If preserve=true, don't return it to heap nor relink in mcentral lists;
// caller takes care of it.
func (s *mspan) sweep(preserve bool) bool {
+ if !go115NewMCentralImpl {
+ return s.oldSweep(preserve)
+ }
// It's critical that we enter this function with preemption disabled,
// GC must not start while we are in the middle of this function.
_g_ := getg()
@@ -211,8 +332,8 @@
throw("mspan.sweep: m is not locked")
}
sweepgen := mheap_.sweepgen
- if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
- print("mspan.sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
+ if state := s.state.get(); state != mSpanInUse || s.sweepgen != sweepgen-1 {
+ print("mspan.sweep: state=", state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
throw("mspan.sweep: bad span state")
}
@@ -224,10 +345,8 @@
spc := s.spanclass
size := s.elemsize
- res := false
- c := _g_.m.mcache
- freeToHeap := false
+ c := _g_.m.p.ptr().mcache
// The allocBits indicate which unmarked objects don't need to be
// processed since they were free at the end of the last GC cycle
@@ -245,6 +364,7 @@
// 2. A tiny object can have several finalizers setup for different offsets.
// If such object is not marked, we need to queue all finalizers at once.
// Both 1 and 2 are possible at the same time.
+ hadSpecials := s.specials != nil
specialp := &s.specials
special := *specialp
for special != nil {
@@ -289,6 +409,262 @@
special = *specialp
}
}
+ if hadSpecials && s.specials == nil {
+ spanHasNoSpecials(s)
+ }
+
+ if debug.allocfreetrace != 0 || debug.clobberfree != 0 || raceenabled || msanenabled {
+ // Find all newly freed objects. This doesn't have to
+ // efficient; allocfreetrace has massive overhead.
+ mbits := s.markBitsForBase()
+ abits := s.allocBitsForIndex(0)
+ for i := uintptr(0); i < s.nelems; i++ {
+ if !mbits.isMarked() && (abits.index < s.freeindex || abits.isMarked()) {
+ x := s.base() + i*s.elemsize
+ if debug.allocfreetrace != 0 {
+ tracefree(unsafe.Pointer(x), size)
+ }
+ if debug.clobberfree != 0 {
+ clobberfree(unsafe.Pointer(x), size)
+ }
+ if raceenabled {
+ racefree(unsafe.Pointer(x), size)
+ }
+ if msanenabled {
+ msanfree(unsafe.Pointer(x), size)
+ }
+ }
+ mbits.advance()
+ abits.advance()
+ }
+ }
+
+ // Check for zombie objects.
+ if s.freeindex < s.nelems {
+ // Everything < freeindex is allocated and hence
+ // cannot be zombies.
+ //
+ // Check the first bitmap byte, where we have to be
+ // careful with freeindex.
+ obj := s.freeindex
+ if (*s.gcmarkBits.bytep(obj / 8)&^*s.allocBits.bytep(obj / 8))>>(obj%8) != 0 {
+ s.reportZombies()
+ }
+ // Check remaining bytes.
+ for i := obj/8 + 1; i < divRoundUp(s.nelems, 8); i++ {
+ if *s.gcmarkBits.bytep(i)&^*s.allocBits.bytep(i) != 0 {
+ s.reportZombies()
+ }
+ }
+ }
+
+ // Count the number of free objects in this span.
+ nalloc := uint16(s.countAlloc())
+ nfreed := s.allocCount - nalloc
+ if nalloc > s.allocCount {
+ // The zombie check above should have caught this in
+ // more detail.
+ print("runtime: nelems=", s.nelems, " nalloc=", nalloc, " previous allocCount=", s.allocCount, " nfreed=", nfreed, "\n")
+ throw("sweep increased allocation count")
+ }
+
+ s.allocCount = nalloc
+ s.freeindex = 0 // reset allocation index to start of span.
+ if trace.enabled {
+ getg().m.p.ptr().traceReclaimed += uintptr(nfreed) * s.elemsize
+ }
+
+ // gcmarkBits becomes the allocBits.
+ // get a fresh cleared gcmarkBits in preparation for next GC
+ s.allocBits = s.gcmarkBits
+ s.gcmarkBits = newMarkBits(s.nelems)
+
+ // Initialize alloc bits cache.
+ s.refillAllocCache(0)
+
+ // The span must be in our exclusive ownership until we update sweepgen,
+ // check for potential races.
+ if state := s.state.get(); state != mSpanInUse || s.sweepgen != sweepgen-1 {
+ print("mspan.sweep: state=", state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
+ throw("mspan.sweep: bad span state after sweep")
+ }
+ if s.sweepgen == sweepgen+1 || s.sweepgen == sweepgen+3 {
+ throw("swept cached span")
+ }
+
+ // We need to set s.sweepgen = h.sweepgen only when all blocks are swept,
+ // because of the potential for a concurrent free/SetFinalizer.
+ //
+ // But we need to set it before we make the span available for allocation
+ // (return it to heap or mcentral), because allocation code assumes that a
+ // span is already swept if available for allocation.
+ //
+ // Serialization point.
+ // At this point the mark bits are cleared and allocation ready
+ // to go so release the span.
+ atomic.Store(&s.sweepgen, sweepgen)
+
+ if spc.sizeclass() != 0 {
+ // Handle spans for small objects.
+ if nfreed > 0 {
+ // Only mark the span as needing zeroing if we've freed any
+ // objects, because a fresh span that had been allocated into,
+ // wasn't totally filled, but then swept, still has all of its
+ // free slots zeroed.
+ s.needzero = 1
+ c.local_nsmallfree[spc.sizeclass()] += uintptr(nfreed)
+ }
+ if !preserve {
+ // The caller may not have removed this span from whatever
+ // unswept set its on but taken ownership of the span for
+ // sweeping by updating sweepgen. If this span still is in
+ // an unswept set, then the mcentral will pop it off the
+ // set, check its sweepgen, and ignore it.
+ if nalloc == 0 {
+ // Free totally free span directly back to the heap.
+ mheap_.freeSpan(s)
+ return true
+ }
+ // Return span back to the right mcentral list.
+ if uintptr(nalloc) == s.nelems {
+ mheap_.central[spc].mcentral.fullSwept(sweepgen).push(s)
+ } else {
+ mheap_.central[spc].mcentral.partialSwept(sweepgen).push(s)
+ }
+ }
+ } else if !preserve {
+ // Handle spans for large objects.
+ if nfreed != 0 {
+ // Free large object span to heap.
+
+ // NOTE(rsc,dvyukov): The original implementation of efence
+ // in CL 22060046 used sysFree instead of sysFault, so that
+ // the operating system would eventually give the memory
+ // back to us again, so that an efence program could run
+ // longer without running out of memory. Unfortunately,
+ // calling sysFree here without any kind of adjustment of the
+ // heap data structures means that when the memory does
+ // come back to us, we have the wrong metadata for it, either in
+ // the mspan structures or in the garbage collection bitmap.
+ // Using sysFault here means that the program will run out of
+ // memory fairly quickly in efence mode, but at least it won't
+ // have mysterious crashes due to confused memory reuse.
+ // It should be possible to switch back to sysFree if we also
+ // implement and then call some kind of mheap.deleteSpan.
+ if debug.efence > 0 {
+ s.limit = 0 // prevent mlookup from finding this span
+ sysFault(unsafe.Pointer(s.base()), size)
+ } else {
+ mheap_.freeSpan(s)
+ }
+ c.local_nlargefree++
+ c.local_largefree += size
+ return true
+ }
+
+ // Add a large span directly onto the full+swept list.
+ mheap_.central[spc].mcentral.fullSwept(sweepgen).push(s)
+ }
+ return false
+}
+
+// Sweep frees or collects finalizers for blocks not marked in the mark phase.
+// It clears the mark bits in preparation for the next GC round.
+// Returns true if the span was returned to heap.
+// If preserve=true, don't return it to heap nor relink in mcentral lists;
+// caller takes care of it.
+//
+// For !go115NewMCentralImpl.
+func (s *mspan) oldSweep(preserve bool) bool {
+ // It's critical that we enter this function with preemption disabled,
+ // GC must not start while we are in the middle of this function.
+ _g_ := getg()
+ if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 {
+ throw("mspan.sweep: m is not locked")
+ }
+ sweepgen := mheap_.sweepgen
+ if state := s.state.get(); state != mSpanInUse || s.sweepgen != sweepgen-1 {
+ print("mspan.sweep: state=", state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
+ throw("mspan.sweep: bad span state")
+ }
+
+ if trace.enabled {
+ traceGCSweepSpan(s.npages * _PageSize)
+ }
+
+ atomic.Xadd64(&mheap_.pagesSwept, int64(s.npages))
+
+ spc := s.spanclass
+ size := s.elemsize
+ res := false
+
+ c := _g_.m.p.ptr().mcache
+ freeToHeap := false
+
+ // The allocBits indicate which unmarked objects don't need to be
+ // processed since they were free at the end of the last GC cycle
+ // and were not allocated since then.
+ // If the allocBits index is >= s.freeindex and the bit
+ // is not marked then the object remains unallocated
+ // since the last GC.
+ // This situation is analogous to being on a freelist.
+
+ // Unlink & free special records for any objects we're about to free.
+ // Two complications here:
+ // 1. An object can have both finalizer and profile special records.
+ // In such case we need to queue finalizer for execution,
+ // mark the object as live and preserve the profile special.
+ // 2. A tiny object can have several finalizers setup for different offsets.
+ // If such object is not marked, we need to queue all finalizers at once.
+ // Both 1 and 2 are possible at the same time.
+ hadSpecials := s.specials != nil
+ specialp := &s.specials
+ special := *specialp
+ for special != nil {
+ // A finalizer can be set for an inner byte of an object, find object beginning.
+ objIndex := uintptr(special.offset) / size
+ p := s.base() + objIndex*size
+ mbits := s.markBitsForIndex(objIndex)
+ if !mbits.isMarked() {
+ // This object is not marked and has at least one special record.
+ // Pass 1: see if it has at least one finalizer.
+ hasFin := false
+ endOffset := p - s.base() + size
+ for tmp := special; tmp != nil && uintptr(tmp.offset) < endOffset; tmp = tmp.next {
+ if tmp.kind == _KindSpecialFinalizer {
+ // Stop freeing of object if it has a finalizer.
+ mbits.setMarkedNonAtomic()
+ hasFin = true
+ break
+ }
+ }
+ // Pass 2: queue all finalizers _or_ handle profile record.
+ for special != nil && uintptr(special.offset) < endOffset {
+ // Find the exact byte for which the special was setup
+ // (as opposed to object beginning).
+ p := s.base() + uintptr(special.offset)
+ if special.kind == _KindSpecialFinalizer || !hasFin {
+ // Splice out special record.
+ y := special
+ special = special.next
+ *specialp = special
+ freespecial(y, unsafe.Pointer(p), size)
+ } else {
+ // This is profile record, but the object has finalizers (so kept alive).
+ // Keep special record.
+ specialp = &special.next
+ special = *specialp
+ }
+ }
+ } else {
+ // object is still live: keep special record
+ specialp = &special.next
+ special = *specialp
+ }
+ }
+ if go115NewMarkrootSpans && hadSpecials && s.specials == nil {
+ spanHasNoSpecials(s)
+ }
if debug.allocfreetrace != 0 || debug.clobberfree != 0 || raceenabled || msanenabled {
// Find all newly freed objects. This doesn't have to
@@ -351,8 +727,8 @@
if freeToHeap || nfreed == 0 {
// The span must be in our exclusive ownership until we update sweepgen,
// check for potential races.
- if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
- print("mspan.sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
+ if state := s.state.get(); state != mSpanInUse || s.sweepgen != sweepgen-1 {
+ print("mspan.sweep: state=", state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
throw("mspan.sweep: bad span state after sweep")
}
// Serialization point.
@@ -386,7 +762,7 @@
s.limit = 0 // prevent mlookup from finding this span
sysFault(unsafe.Pointer(s.base()), size)
} else {
- mheap_.freeSpan(s, true)
+ mheap_.freeSpan(s)
}
c.local_nlargefree++
c.local_largefree += size
@@ -400,6 +776,57 @@
return res
}
+// reportZombies reports any marked but free objects in s and throws.
+//
+// This generally means one of the following:
+//
+// 1. User code converted a pointer to a uintptr and then back
+// unsafely, and a GC ran while the uintptr was the only reference to
+// an object.
+//
+// 2. User code (or a compiler bug) constructed a bad pointer that
+// points to a free slot, often a past-the-end pointer.
+//
+// 3. The GC two cycles ago missed a pointer and freed a live object,
+// but it was still live in the last cycle, so this GC cycle found a
+// pointer to that object and marked it.
+func (s *mspan) reportZombies() {
+ printlock()
+ print("runtime: marked free object in span ", s, ", elemsize=", s.elemsize, " freeindex=", s.freeindex, " (bad use of unsafe.Pointer? try -d=checkptr)\n")
+ mbits := s.markBitsForBase()
+ abits := s.allocBitsForIndex(0)
+ for i := uintptr(0); i < s.nelems; i++ {
+ addr := s.base() + i*s.elemsize
+ print(hex(addr))
+ alloc := i < s.freeindex || abits.isMarked()
+ if alloc {
+ print(" alloc")
+ } else {
+ print(" free ")
+ }
+ if mbits.isMarked() {
+ print(" marked ")
+ } else {
+ print(" unmarked")
+ }
+ zombie := mbits.isMarked() && !alloc
+ if zombie {
+ print(" zombie")
+ }
+ print("\n")
+ if zombie {
+ length := s.elemsize
+ if length > 1024 {
+ length = 1024
+ }
+ hexdumpWords(addr, addr+length, nil)
+ }
+ mbits.advance()
+ abits.advance()
+ }
+ throw("found pointer to free object")
+}
+
// deductSweepCredit deducts sweep credit for allocating a span of
// size spanBytes. This must be performed *before* the span is
// allocated to ensure the system has enough credit. If necessary, it
diff --git a/src/runtime/mgcsweepbuf.go b/src/runtime/mgcsweepbuf.go
index 0491f7c..1f722c3 100644
--- a/src/runtime/mgcsweepbuf.go
+++ b/src/runtime/mgcsweepbuf.go
@@ -111,8 +111,9 @@
unlock(&b.spineLock)
}
- // We have a block. Insert the span.
- block.spans[bottom] = s
+ // We have a block. Insert the span atomically, since there may be
+ // concurrent readers via the block API.
+ atomic.StorepNoWB(unsafe.Pointer(&block.spans[bottom]), unsafe.Pointer(s))
}
// pop removes and returns a span from buffer b, or nil if b is empty.
@@ -143,11 +144,13 @@
// intervening pops. Spans that are pushed after the call may also
// appear in these blocks.
func (b *gcSweepBuf) numBlocks() int {
- return int((atomic.Load(&b.index) + gcSweepBlockEntries - 1) / gcSweepBlockEntries)
+ return int(divRoundUp(uintptr(atomic.Load(&b.index)), gcSweepBlockEntries))
}
// block returns the spans in the i'th block of buffer b. block is
-// safe to call concurrently with push.
+// safe to call concurrently with push. The block may contain nil
+// pointers that must be ignored, and each entry in the block must be
+// loaded atomically.
func (b *gcSweepBuf) block(i int) []*mspan {
// Perform bounds check before loading spine address since
// push ensures the allocated length is at least spineLen.
@@ -169,11 +172,5 @@
} else {
spans = block.spans[:bottom]
}
-
- // push may have reserved a slot but not filled it yet, so
- // trim away unused entries.
- for len(spans) > 0 && spans[len(spans)-1] == nil {
- spans = spans[:len(spans)-1]
- }
return spans
}
diff --git a/src/runtime/mgcwork.go b/src/runtime/mgcwork.go
index f2c16d7..4610165 100644
--- a/src/runtime/mgcwork.go
+++ b/src/runtime/mgcwork.go
@@ -126,12 +126,12 @@
if debugCachedWork {
alreadyFailed := w.putGen == w.pauseGen
w.putGen = w.pauseGen
- if m := getg().m; m.locks > 0 || m.mallocing != 0 || m.preemptoff != "" || m.p.ptr().status != _Prunning {
+ if !canPreemptM(getg().m) {
// If we were to spin, the runtime may
- // deadlock: the condition above prevents
- // preemption (see newstack), which could
- // prevent gcMarkDone from finishing the
- // ragged barrier and releasing the spin.
+ // deadlock. Since we can't be preempted, the
+ // spin could prevent gcMarkDone from
+ // finishing the ragged barrier, which is what
+ // releases us from the spin.
return
}
for atomic.Load(&gcWorkPauseGen) == w.pauseGen {
@@ -178,6 +178,10 @@
flushed := false
wbuf := w.wbuf1
+ // Record that this may acquire the wbufSpans or heap lock to
+ // allocate a workbuf.
+ lockWithRankMayAcquire(&work.wbufSpans.lock, lockRankWbufSpans)
+ lockWithRankMayAcquire(&mheap_.lock, lockRankMheap)
if wbuf == nil {
w.init()
wbuf = w.wbuf1
@@ -423,6 +427,10 @@
b.checkempty()
}
}
+ // Record that this may acquire the wbufSpans or heap lock to
+ // allocate a workbuf.
+ lockWithRankMayAcquire(&work.wbufSpans.lock, lockRankWbufSpans)
+ lockWithRankMayAcquire(&mheap_.lock, lockRankMheap)
if b == nil {
// Allocate more workbufs.
var s *mspan
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 706603a..2c7bfd8 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -15,10 +15,45 @@
"unsafe"
)
-// minPhysPageSize is a lower-bound on the physical page size. The
-// true physical page size may be larger than this. In contrast,
-// sys.PhysPageSize is an upper-bound on the physical page size.
-const minPhysPageSize = 4096
+const (
+ // minPhysPageSize is a lower-bound on the physical page size. The
+ // true physical page size may be larger than this. In contrast,
+ // sys.PhysPageSize is an upper-bound on the physical page size.
+ minPhysPageSize = 4096
+
+ // maxPhysPageSize is the maximum page size the runtime supports.
+ maxPhysPageSize = 512 << 10
+
+ // maxPhysHugePageSize sets an upper-bound on the maximum huge page size
+ // that the runtime supports.
+ maxPhysHugePageSize = pallocChunkBytes
+
+ // pagesPerReclaimerChunk indicates how many pages to scan from the
+ // pageInUse bitmap at a time. Used by the page reclaimer.
+ //
+ // Higher values reduce contention on scanning indexes (such as
+ // h.reclaimIndex), but increase the minimum latency of the
+ // operation.
+ //
+ // The time required to scan this many pages can vary a lot depending
+ // on how many spans are actually freed. Experimentally, it can
+ // scan for pages at ~300 GB/ms on a 2.6GHz Core i7, but can only
+ // free spans at ~32 MB/ms. Using 512 pages bounds this at
+ // roughly 100µs.
+ //
+ // Must be a multiple of the pageInUse bitmap element size and
+ // must also evenly divid pagesPerArena.
+ pagesPerReclaimerChunk = 512
+
+ // go115NewMCentralImpl is a feature flag for the new mcentral implementation.
+ //
+ // This flag depends on go115NewMarkrootSpans because the new mcentral
+ // implementation requires that markroot spans no longer rely on mgcsweepbufs.
+ // The definition of this flag helps ensure that if there's a problem with
+ // the new markroot spans implementation and it gets turned off, that the new
+ // mcentral implementation also gets turned off so the runtime isn't broken.
+ go115NewMCentralImpl = true && go115NewMarkrootSpans
+)
// Main malloc heap.
// The heap itself is the "free" and "scav" treaps,
@@ -32,10 +67,10 @@
// lock must only be acquired on the system stack, otherwise a g
// could self-deadlock if its stack grows with the lock held.
lock mutex
- free mTreap // free spans
- sweepgen uint32 // sweep generation, see comment in mspan
- sweepdone uint32 // all spans are swept
- sweepers uint32 // number of active sweepone calls
+ pages pageAlloc // page allocation data structure
+ sweepgen uint32 // sweep generation, see comment in mspan; written during STW
+ sweepdone uint32 // all spans are swept
+ sweepers uint32 // number of active sweepone calls
// allspans is a slice of all mspans ever created. Each mspan
// appears exactly once.
@@ -59,6 +94,8 @@
// unswept stack and pushes spans that are still in-use on the
// swept stack. Likewise, allocating an in-use span pushes it
// on the swept stack.
+ //
+ // For !go115NewMCentralImpl.
sweepSpans [2]gcSweepBuf
_ uint32 // align uint64 fields on 32-bit for atomics
@@ -81,7 +118,7 @@
// accounting for current progress. If we could only adjust
// the slope, it would create a discontinuity in debt if any
// progress has already been made.
- pagesInUse uint64 // pages of spans in stats mSpanInUse; R/W with mheap.lock
+ pagesInUse uint64 // pages of spans in stats mSpanInUse; updated atomically
pagesSwept uint64 // pages swept this cycle; updated atomically
pagesSweptBasis uint64 // pagesSwept to use as the origin of the sweep ratio; updated atomically
sweepHeapLiveBasis uint64 // value of heap_live to use as the origin of sweep ratio; written with lock, read without
@@ -89,24 +126,10 @@
// TODO(austin): pagesInUse should be a uintptr, but the 386
// compiler can't 8-byte align fields.
- // Scavenger pacing parameters
- //
- // The two basis parameters and the scavenge ratio parallel the proportional
- // sweeping implementation, the primary differences being that:
- // * Scavenging concerns itself with RSS, estimated as heapRetained()
- // * Rather than pacing the scavenger to the GC, it is paced to a
- // time-based rate computed in gcPaceScavenger.
- //
- // scavengeRetainedGoal represents our goal RSS.
- //
- // All fields must be accessed with lock.
- //
- // TODO(mknyszek): Consider abstracting the basis fields and the scavenge ratio
- // into its own type so that this logic may be shared with proportional sweeping.
- scavengeTimeBasis int64
- scavengeRetainedBasis uint64
- scavengeBytesPerNS float64
- scavengeRetainedGoal uint64
+ // scavengeGoal is the amount of total retained heap memory (measured by
+ // heapRetained) that the runtime will try to maintain by returning memory
+ // to the OS.
+ scavengeGoal uint64
// Page reclaimer state
@@ -185,7 +208,19 @@
// simply blocking GC (by disabling preemption).
sweepArenas []arenaIdx
- _ uint32 // ensure 64-bit alignment of central
+ // markArenas is a snapshot of allArenas taken at the beginning
+ // of the mark cycle. Because allArenas is append-only, neither
+ // this slice nor its contents will change during the mark, so
+ // it can be read safely.
+ markArenas []arenaIdx
+
+ // curArena is the arena that the heap is currently growing
+ // into. This should always be physPageSize-aligned.
+ curArena struct {
+ base, end uintptr
+ }
+
+ // _ uint32 // ensure 64-bit alignment of central
// central free lists for small size classes.
// the padding makes sure that the mcentrals are
@@ -199,7 +234,6 @@
spanalloc fixalloc // allocator for span*
cachealloc fixalloc // allocator for mcache*
- treapalloc fixalloc // allocator for treapNodes*
specialfinalizeralloc fixalloc // allocator for specialfinalizer*
specialprofilealloc fixalloc // allocator for specialprofile*
speciallock mutex // lock for special record allocators.
@@ -213,10 +247,6 @@
// A heapArena stores metadata for a heap arena. heapArenas are stored
// outside of the Go heap and accessed via the mheap_.arenas index.
//
-// This gets allocated directly from the OS, so ideally it should be a
-// multiple of the system page size. For example, avoid adding small
-// fields.
-//
//go:notinheap
type heapArena struct {
// bitmap stores the pointer/scalar bitmap for the words in
@@ -242,7 +272,7 @@
// but only the bit corresponding to the first page in each
// span is used.
//
- // Writes are protected by mheap_.lock.
+ // Reads and writes are atomic.
pageInUse [pagesPerArena / 8]uint8
// pageMarks is a bitmap that indicates which spans have any
@@ -259,6 +289,28 @@
// faster scanning, but we don't have 64-bit atomic bit
// operations.
pageMarks [pagesPerArena / 8]uint8
+
+ // pageSpecials is a bitmap that indicates which spans have
+ // specials (finalizers or other). Like pageInUse, only the bit
+ // corresponding to the first page in each span is used.
+ //
+ // Writes are done atomically whenever a special is added to
+ // a span and whenever the last special is removed from a span.
+ // Reads are done atomically to find spans containing specials
+ // during marking.
+ pageSpecials [pagesPerArena / 8]uint8
+
+ // zeroedBase marks the first byte of the first page in this
+ // arena which hasn't been used yet and is therefore already
+ // zero. zeroedBase is relative to the arena base.
+ // Increases monotonically until it hits heapArenaBytes.
+ //
+ // This field is sufficient to determine if an allocation
+ // needs to be zeroed because the page allocator follows an
+ // address-ordered first-fit policy.
+ //
+ // Read atomically and written with an atomic CAS.
+ zeroedBase uintptr
}
// arenaHint is a hint for where to grow the heap arenas. See
@@ -298,13 +350,20 @@
// * During GC (gcphase != _GCoff), a span *must not* transition from
// manual or in-use to free. Because concurrent GC may read a pointer
// and then look up its span, the span state must be monotonic.
+//
+// Setting mspan.state to mSpanInUse or mSpanManual must be done
+// atomically and only after all other span fields are valid.
+// Likewise, if inspecting a span is contingent on it being
+// mSpanInUse, the state should be loaded atomically and checked
+// before depending on other fields. This allows the garbage collector
+// to safely deal with potentially invalid pointers, since resolving
+// such pointers may race with a span being allocated.
type mSpanState uint8
const (
mSpanDead mSpanState = iota
mSpanInUse // allocated for garbage collected heap
mSpanManual // allocated for manual management (e.g., stack allocator)
- mSpanFree
)
// mSpanStateNames are the names of the span states, indexed by
@@ -316,6 +375,21 @@
"mSpanFree",
}
+// mSpanStateBox holds an mSpanState and provides atomic operations on
+// it. This is a separate type to disallow accidental comparison or
+// assignment with mSpanState.
+type mSpanStateBox struct {
+ s mSpanState
+}
+
+func (b *mSpanStateBox) set(s mSpanState) {
+ atomic.Store8((*uint8)(&b.s), uint8(s))
+}
+
+func (b *mSpanStateBox) get() mSpanState {
+ return mSpanState(atomic.Load8((*uint8)(&b.s)))
+}
+
// mSpanList heads a linked list of spans.
//
//go:notinheap
@@ -397,19 +471,18 @@
// h->sweepgen is incremented by 2 after every GC
sweepgen uint32
- divMul uint16 // for divide by elemsize - divMagic.mul
- baseMask uint16 // if non-0, elemsize is a power of 2, & this will get object allocation base
- allocCount uint16 // number of allocated objects
- spanclass spanClass // size class and noscan (uint8)
- state mSpanState // mspaninuse etc
- needzero uint8 // needs to be zeroed before allocation
- divShift uint8 // for divide by elemsize - divMagic.shift
- divShift2 uint8 // for divide by elemsize - divMagic.shift2
- scavenged bool // whether this span has had its pages released to the OS
- elemsize uintptr // computed from sizeclass or from npages
- limit uintptr // end of data in span
- speciallock mutex // guards specials list
- specials *special // linked list of special records sorted by offset.
+ divMul uint16 // for divide by elemsize - divMagic.mul
+ baseMask uint16 // if non-0, elemsize is a power of 2, & this will get object allocation base
+ allocCount uint16 // number of allocated objects
+ spanclass spanClass // size class and noscan (uint8)
+ state mSpanStateBox // mSpanInUse etc; accessed atomically (get/set methods)
+ needzero uint8 // needs to be zeroed before allocation
+ divShift uint8 // for divide by elemsize - divMagic.shift
+ divShift2 uint8 // for divide by elemsize - divMagic.shift2
+ elemsize uintptr // computed from sizeclass or from npages
+ limit uintptr // end of data in span
+ speciallock mutex // guards specials list
+ specials *special // linked list of special records sorted by offset.
}
func (s *mspan) base() uintptr {
@@ -425,181 +498,6 @@
return
}
-// physPageBounds returns the start and end of the span
-// rounded in to the physical page size.
-func (s *mspan) physPageBounds() (uintptr, uintptr) {
- start := s.base()
- end := start + s.npages<<_PageShift
- if physPageSize > _PageSize {
- // Round start and end in.
- start = (start + physPageSize - 1) &^ (physPageSize - 1)
- end &^= physPageSize - 1
- }
- return start, end
-}
-
-func (h *mheap) coalesce(s *mspan) {
- // merge is a helper which merges other into s, deletes references to other
- // in heap metadata, and then discards it. other must be adjacent to s.
- merge := func(a, b, other *mspan) {
- // Caller must ensure a.startAddr < b.startAddr and that either a or
- // b is s. a and b must be adjacent. other is whichever of the two is
- // not s.
-
- if pageSize < physPageSize && a.scavenged && b.scavenged {
- // If we're merging two scavenged spans on systems where
- // pageSize < physPageSize, then their boundary should always be on
- // a physical page boundary, due to the realignment that happens
- // during coalescing. Throw if this case is no longer true, which
- // means the implementation should probably be changed to scavenge
- // along the boundary.
- _, start := a.physPageBounds()
- end, _ := b.physPageBounds()
- if start != end {
- println("runtime: a.base=", hex(a.base()), "a.npages=", a.npages)
- println("runtime: b.base=", hex(b.base()), "b.npages=", b.npages)
- println("runtime: physPageSize=", physPageSize, "pageSize=", pageSize)
- throw("neighboring scavenged spans boundary is not a physical page boundary")
- }
- }
-
- // Adjust s via base and npages and also in heap metadata.
- s.npages += other.npages
- s.needzero |= other.needzero
- if a == s {
- h.setSpan(s.base()+s.npages*pageSize-1, s)
- } else {
- s.startAddr = other.startAddr
- h.setSpan(s.base(), s)
- }
-
- // The size is potentially changing so the treap needs to delete adjacent nodes and
- // insert back as a combined node.
- h.free.removeSpan(other)
- other.state = mSpanDead
- h.spanalloc.free(unsafe.Pointer(other))
- }
-
- // realign is a helper which shrinks other and grows s such that their
- // boundary is on a physical page boundary.
- realign := func(a, b, other *mspan) {
- // Caller must ensure a.startAddr < b.startAddr and that either a or
- // b is s. a and b must be adjacent. other is whichever of the two is
- // not s.
-
- // If pageSize >= physPageSize then spans are always aligned
- // to physical page boundaries, so just exit.
- if pageSize >= physPageSize {
- return
- }
- // Since we're resizing other, we must remove it from the treap.
- h.free.removeSpan(other)
-
- // Round boundary to the nearest physical page size, toward the
- // scavenged span.
- boundary := b.startAddr
- if a.scavenged {
- boundary &^= (physPageSize - 1)
- } else {
- boundary = (boundary + physPageSize - 1) &^ (physPageSize - 1)
- }
- a.npages = (boundary - a.startAddr) / pageSize
- b.npages = (b.startAddr + b.npages*pageSize - boundary) / pageSize
- b.startAddr = boundary
-
- h.setSpan(boundary-1, a)
- h.setSpan(boundary, b)
-
- // Re-insert other now that it has a new size.
- h.free.insert(other)
- }
-
- hpMiddle := s.hugePages()
-
- // Coalesce with earlier, later spans.
- var hpBefore uintptr
- if before := spanOf(s.base() - 1); before != nil && before.state == mSpanFree {
- if s.scavenged == before.scavenged {
- hpBefore = before.hugePages()
- merge(before, s, before)
- } else {
- realign(before, s, before)
- }
- }
-
- // Now check to see if next (greater addresses) span is free and can be coalesced.
- var hpAfter uintptr
- if after := spanOf(s.base() + s.npages*pageSize); after != nil && after.state == mSpanFree {
- if s.scavenged == after.scavenged {
- hpAfter = after.hugePages()
- merge(s, after, after)
- } else {
- realign(s, after, after)
- }
- }
- if !s.scavenged && s.hugePages() > hpBefore+hpMiddle+hpAfter {
- // If s has grown such that it now may contain more huge pages than it
- // and its now-coalesced neighbors did before, then mark the whole region
- // as huge-page-backable.
- //
- // Otherwise, on systems where we break up huge pages (like Linux)
- // s may not be backed by huge pages because it could be made up of
- // pieces which are broken up in the underlying VMA. The primary issue
- // with this is that it can lead to a poor estimate of the amount of
- // free memory backed by huge pages for determining the scavenging rate.
- //
- // TODO(mknyszek): Measure the performance characteristics of sysHugePage
- // and determine whether it makes sense to only sysHugePage on the pages
- // that matter, or if it's better to just mark the whole region.
- sysHugePage(unsafe.Pointer(s.base()), s.npages*pageSize)
- }
-}
-
-// hugePages returns the number of aligned physical huge pages in the memory
-// regioned owned by this mspan.
-func (s *mspan) hugePages() uintptr {
- if physHugePageSize == 0 || s.npages < physHugePageSize/pageSize {
- return 0
- }
- start := s.base()
- end := start + s.npages*pageSize
- if physHugePageSize > pageSize {
- // Round start and end in.
- start = (start + physHugePageSize - 1) &^ (physHugePageSize - 1)
- end &^= physHugePageSize - 1
- }
- if start < end {
- return (end - start) >> physHugePageShift
- }
- return 0
-}
-
-func (s *mspan) scavenge() uintptr {
- // start and end must be rounded in, otherwise madvise
- // will round them *out* and release more memory
- // than we want.
- start, end := s.physPageBounds()
- if end <= start {
- // start and end don't span a whole physical page.
- return 0
- }
- released := end - start
- memstats.heap_released += uint64(released)
- s.scavenged = true
- sysUnused(unsafe.Pointer(start), released)
- return released
-}
-
-// released returns the number of bytes in this span
-// which were returned back to the OS.
-func (s *mspan) released() uintptr {
- if !s.scavenged {
- return 0
- }
- start, end := s.physPageBounds()
- return end - start
-}
-
// recordspan adds a newly allocated span to h.allspans.
//
// This only happens the first time a span is allocated from
@@ -678,13 +576,13 @@
//
//go:nosplit
func arenaIndex(p uintptr) arenaIdx {
- return arenaIdx((p + arenaBaseOffset) / heapArenaBytes)
+ return arenaIdx((p - arenaBaseOffset) / heapArenaBytes)
}
// arenaBase returns the low address of the region covered by heap
// arena i.
func arenaBase(i arenaIdx) uintptr {
- return uintptr(i)*heapArenaBytes - arenaBaseOffset
+ return uintptr(i)*heapArenaBytes + arenaBaseOffset
}
type arenaIdx uint
@@ -726,7 +624,7 @@
if s == nil || b < s.base() {
return false
}
- switch s.state {
+ switch s.state.get() {
case mSpanInUse, mSpanManual:
return b < s.limit
default:
@@ -793,9 +691,12 @@
//go:nosplit
func spanOfHeap(p uintptr) *mspan {
s := spanOf(p)
- // If p is not allocated, it may point to a stale span, so we
- // have to check the span's bounds and state.
- if s == nil || p < s.base() || p >= s.limit || s.state != mSpanInUse {
+ // s is nil if it's never been allocated. Otherwise, we check
+ // its state first because we don't trust this pointer, so we
+ // have to synchronize with span initialization. Then, it's
+ // still possible we picked up a stale span pointer, so we
+ // have to check the span's bounds.
+ if s == nil || s.state.get() != mSpanInUse || p < s.base() || p >= s.limit {
return nil
}
return s
@@ -813,7 +714,11 @@
// Initialize the heap.
func (h *mheap) init() {
- h.treapalloc.init(unsafe.Sizeof(treapNode{}), nil, nil, &memstats.other_sys)
+ lockInit(&h.lock, lockRankMheap)
+ lockInit(&h.sweepSpans[0].spineLock, lockRankSpine)
+ lockInit(&h.sweepSpans[1].spineLock, lockRankSpine)
+ lockInit(&h.speciallock, lockRankMheapSpecial)
+
h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys)
h.specialfinalizeralloc.init(unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys)
@@ -834,6 +739,8 @@
for i := range h.central {
h.central[i].mcentral.init(spanClass(i))
}
+
+ h.pages.init(&h.lock, &memstats.gc_sys)
}
// reclaim sweeps and reclaims at least npage pages into the heap.
@@ -843,23 +750,10 @@
//
// h must NOT be locked.
func (h *mheap) reclaim(npage uintptr) {
- // This scans pagesPerChunk at a time. Higher values reduce
- // contention on h.reclaimPos, but increase the minimum
- // latency of performing a reclaim.
- //
- // Must be a multiple of the pageInUse bitmap element size.
- //
- // The time required by this can vary a lot depending on how
- // many spans are actually freed. Experimentally, it can scan
- // for pages at ~300 GB/ms on a 2.6GHz Core i7, but can only
- // free spans at ~32 MB/ms. Using 512 pages bounds this at
- // roughly 100µs.
- //
// TODO(austin): Half of the time spent freeing spans is in
// locking/unlocking the heap (even with low contention). We
// could make the slow path here several times faster by
// batching heap frees.
- const pagesPerChunk = 512
// Bail early if there's no more reclaim work.
if atomic.Load64(&h.reclaimIndex) >= 1<<63 {
@@ -892,7 +786,7 @@
}
// Claim a chunk of work.
- idx := uintptr(atomic.Xadd64(&h.reclaimIndex, pagesPerChunk) - pagesPerChunk)
+ idx := uintptr(atomic.Xadd64(&h.reclaimIndex, pagesPerReclaimerChunk) - pagesPerReclaimerChunk)
if idx/pagesPerArena >= uintptr(len(arenas)) {
// Page reclaiming is done.
atomic.Store64(&h.reclaimIndex, 1<<63)
@@ -906,7 +800,7 @@
}
// Scan this chunk.
- nfound := h.reclaimChunk(arenas, idx, pagesPerChunk)
+ nfound := h.reclaimChunk(arenas, idx, pagesPerReclaimerChunk)
if nfound <= npage {
npage -= nfound
} else {
@@ -928,7 +822,9 @@
// reclaimChunk sweeps unmarked spans that start at page indexes [pageIdx, pageIdx+n).
// It returns the number of pages returned to the heap.
//
-// h.lock must be held and the caller must be non-preemptible.
+// h.lock must be held and the caller must be non-preemptible. Note: h.lock may be
+// temporarily unlocked and re-locked in order to do sweeping or if tracing is
+// enabled.
func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr {
// The heap lock must be held because this accesses the
// heapArena.spans arrays using potentially non-live pointers.
@@ -954,7 +850,7 @@
// Scan this bitmap chunk for spans that are in-use
// but have no marked objects on them.
for i := range inUse {
- inUseUnmarked := inUse[i] &^ marked[i]
+ inUseUnmarked := atomic.Load8(&inUse[i]) &^ marked[i]
if inUseUnmarked == 0 {
continue
}
@@ -973,7 +869,7 @@
// spans were freed when we dropped the
// lock and we don't want to get stale
// pointers from the spans array.
- inUseUnmarked = inUse[i] &^ marked[i]
+ inUseUnmarked = atomic.Load8(&inUse[i]) &^ marked[i]
}
}
}
@@ -984,106 +880,31 @@
n -= uintptr(len(inUse) * 8)
}
if trace.enabled {
+ unlock(&h.lock)
// Account for pages scanned but not reclaimed.
traceGCSweepSpan((n0 - nFreed) * pageSize)
+ lock(&h.lock)
}
return nFreed
}
-// alloc_m is the internal implementation of mheap.alloc.
-//
-// alloc_m must run on the system stack because it locks the heap, so
-// any stack growth during alloc_m would self-deadlock.
-//
-//go:systemstack
-func (h *mheap) alloc_m(npage uintptr, spanclass spanClass, large bool) *mspan {
- _g_ := getg()
-
- // To prevent excessive heap growth, before allocating n pages
- // we need to sweep and reclaim at least n pages.
- if h.sweepdone == 0 {
- h.reclaim(npage)
- }
-
- lock(&h.lock)
- // transfer stats from cache to global
- memstats.heap_scan += uint64(_g_.m.mcache.local_scan)
- _g_.m.mcache.local_scan = 0
- memstats.tinyallocs += uint64(_g_.m.mcache.local_tinyallocs)
- _g_.m.mcache.local_tinyallocs = 0
-
- s := h.allocSpanLocked(npage, &memstats.heap_inuse)
- if s != nil {
- // Record span info, because gc needs to be
- // able to map interior pointer to containing span.
- atomic.Store(&s.sweepgen, h.sweepgen)
- h.sweepSpans[h.sweepgen/2%2].push(s) // Add to swept in-use list.
- s.state = mSpanInUse
- s.allocCount = 0
- s.spanclass = spanclass
- if sizeclass := spanclass.sizeclass(); sizeclass == 0 {
- s.elemsize = s.npages << _PageShift
- s.divShift = 0
- s.divMul = 0
- s.divShift2 = 0
- s.baseMask = 0
- } else {
- s.elemsize = uintptr(class_to_size[sizeclass])
- m := &class_to_divmagic[sizeclass]
- s.divShift = m.shift
- s.divMul = m.mul
- s.divShift2 = m.shift2
- s.baseMask = m.baseMask
- }
-
- // Mark in-use span in arena page bitmap.
- arena, pageIdx, pageMask := pageIndexOf(s.base())
- arena.pageInUse[pageIdx] |= pageMask
-
- // update stats, sweep lists
- h.pagesInUse += uint64(npage)
- if large {
- memstats.heap_objects++
- mheap_.largealloc += uint64(s.elemsize)
- mheap_.nlargealloc++
- atomic.Xadd64(&memstats.heap_live, int64(npage<<_PageShift))
- }
- }
- // heap_scan and heap_live were updated.
- if gcBlackenEnabled != 0 {
- gcController.revise()
- }
-
- if trace.enabled {
- traceHeapAlloc()
- }
-
- // h.spans is accessed concurrently without synchronization
- // from other threads. Hence, there must be a store/store
- // barrier here to ensure the writes to h.spans above happen
- // before the caller can publish a pointer p to an object
- // allocated from s. As soon as this happens, the garbage
- // collector running on another processor could read p and
- // look up s in h.spans. The unlock acts as the barrier to
- // order these writes. On the read side, the data dependency
- // between p and the index in h.spans orders the reads.
- unlock(&h.lock)
- return s
-}
-
// alloc allocates a new span of npage pages from the GC'd heap.
//
-// Either large must be true or spanclass must indicates the span's
-// size class and scannability.
+// spanclass indicates the span's size class and scannability.
//
// If needzero is true, the memory for the returned span will be zeroed.
-func (h *mheap) alloc(npage uintptr, spanclass spanClass, large bool, needzero bool) *mspan {
+func (h *mheap) alloc(npages uintptr, spanclass spanClass, needzero bool) *mspan {
// Don't do any operations that lock the heap on the G stack.
// It might trigger stack growth, and the stack growth code needs
// to be able to allocate heap.
var s *mspan
systemstack(func() {
- s = h.alloc_m(npage, spanclass, large)
+ // To prevent excessive heap growth, before allocating n pages
+ // we need to sweep and reclaim at least n pages.
+ if h.sweepdone == 0 {
+ h.reclaim(npages)
+ }
+ s = h.allocSpan(npages, false, spanclass, &memstats.heap_inuse)
})
if s != nil {
@@ -1105,35 +926,12 @@
// The memory backing the returned span may not be zeroed if
// span.needzero is set.
//
-// allocManual must be called on the system stack because it acquires
-// the heap lock. See mheap for details.
+// allocManual must be called on the system stack because it may
+// acquire the heap lock via allocSpan. See mheap for details.
//
//go:systemstack
-func (h *mheap) allocManual(npage uintptr, stat *uint64) *mspan {
- lock(&h.lock)
- s := h.allocSpanLocked(npage, stat)
- if s != nil {
- s.state = mSpanManual
- s.manualFreeList = 0
- s.allocCount = 0
- s.spanclass = 0
- s.nelems = 0
- s.elemsize = 0
- s.limit = s.base() + s.npages<<_PageShift
- // Manually managed memory doesn't count toward heap_sys.
- memstats.heap_sys -= uint64(s.npages << _PageShift)
- }
-
- // This unlock acts as a release barrier. See mheap.alloc_m.
- unlock(&h.lock)
-
- return s
-}
-
-// setSpan modifies the span map so spanOf(base) is s.
-func (h *mheap) setSpan(base uintptr, s *mspan) {
- ai := arenaIndex(base)
- h.arenas[ai.l1()][ai.l2()].spans[(base/pageSize)%pagesPerArena] = s
+func (h *mheap) allocManual(npages uintptr, stat *uint64) *mspan {
+ return h.allocSpan(npages, true, 0, stat)
}
// setSpans modifies the span map so [spanOf(base), spanOf(base+npage*pageSize))
@@ -1152,94 +950,377 @@
}
}
-// Allocates a span of the given size. h must be locked.
-// The returned span has been removed from the
-// free structures, but its state is still mSpanFree.
-func (h *mheap) allocSpanLocked(npage uintptr, stat *uint64) *mspan {
- t := h.free.find(npage)
- if t.valid() {
- goto HaveSpan
+// allocNeedsZero checks if the region of address space [base, base+npage*pageSize),
+// assumed to be allocated, needs to be zeroed, updating heap arena metadata for
+// future allocations.
+//
+// This must be called each time pages are allocated from the heap, even if the page
+// allocator can otherwise prove the memory it's allocating is already zero because
+// they're fresh from the operating system. It updates heapArena metadata that is
+// critical for future page allocations.
+//
+// There are no locking constraints on this method.
+func (h *mheap) allocNeedsZero(base, npage uintptr) (needZero bool) {
+ for npage > 0 {
+ ai := arenaIndex(base)
+ ha := h.arenas[ai.l1()][ai.l2()]
+
+ zeroedBase := atomic.Loaduintptr(&ha.zeroedBase)
+ arenaBase := base % heapArenaBytes
+ if arenaBase < zeroedBase {
+ // We extended into the non-zeroed part of the
+ // arena, so this region needs to be zeroed before use.
+ //
+ // zeroedBase is monotonically increasing, so if we see this now then
+ // we can be sure we need to zero this memory region.
+ //
+ // We still need to update zeroedBase for this arena, and
+ // potentially more arenas.
+ needZero = true
+ }
+ // We may observe arenaBase > zeroedBase if we're racing with one or more
+ // allocations which are acquiring memory directly before us in the address
+ // space. But, because we know no one else is acquiring *this* memory, it's
+ // still safe to not zero.
+
+ // Compute how far into the arena we extend into, capped
+ // at heapArenaBytes.
+ arenaLimit := arenaBase + npage*pageSize
+ if arenaLimit > heapArenaBytes {
+ arenaLimit = heapArenaBytes
+ }
+ // Increase ha.zeroedBase so it's >= arenaLimit.
+ // We may be racing with other updates.
+ for arenaLimit > zeroedBase {
+ if atomic.Casuintptr(&ha.zeroedBase, zeroedBase, arenaLimit) {
+ break
+ }
+ zeroedBase = atomic.Loaduintptr(&ha.zeroedBase)
+ // Sanity check zeroedBase.
+ if zeroedBase <= arenaLimit && zeroedBase > arenaBase {
+ // The zeroedBase moved into the space we were trying to
+ // claim. That's very bad, and indicates someone allocated
+ // the same region we did.
+ throw("potentially overlapping in-use allocations detected")
+ }
+ }
+
+ // Move base forward and subtract from npage to move into
+ // the next arena, or finish.
+ base += arenaLimit - arenaBase
+ npage -= (arenaLimit - arenaBase) / pageSize
}
- if !h.grow(npage) {
+ return
+}
+
+// tryAllocMSpan attempts to allocate an mspan object from
+// the P-local cache, but may fail.
+//
+// h need not be locked.
+//
+// This caller must ensure that its P won't change underneath
+// it during this function. Currently to ensure that we enforce
+// that the function is run on the system stack, because that's
+// the only place it is used now. In the future, this requirement
+// may be relaxed if its use is necessary elsewhere.
+//
+//go:systemstack
+func (h *mheap) tryAllocMSpan() *mspan {
+ pp := getg().m.p.ptr()
+ // If we don't have a p or the cache is empty, we can't do
+ // anything here.
+ if pp == nil || pp.mspancache.len == 0 {
return nil
}
- t = h.free.find(npage)
- if t.valid() {
- goto HaveSpan
+ // Pull off the last entry in the cache.
+ s := pp.mspancache.buf[pp.mspancache.len-1]
+ pp.mspancache.len--
+ return s
+}
+
+// allocMSpanLocked allocates an mspan object.
+//
+// h must be locked.
+//
+// allocMSpanLocked must be called on the system stack because
+// its caller holds the heap lock. See mheap for details.
+// Running on the system stack also ensures that we won't
+// switch Ps during this function. See tryAllocMSpan for details.
+//
+//go:systemstack
+func (h *mheap) allocMSpanLocked() *mspan {
+ pp := getg().m.p.ptr()
+ if pp == nil {
+ // We don't have a p so just do the normal thing.
+ return (*mspan)(h.spanalloc.alloc())
}
- throw("grew heap, but no adequate free span found")
+ // Refill the cache if necessary.
+ if pp.mspancache.len == 0 {
+ const refillCount = len(pp.mspancache.buf) / 2
+ for i := 0; i < refillCount; i++ {
+ pp.mspancache.buf[i] = (*mspan)(h.spanalloc.alloc())
+ }
+ pp.mspancache.len = refillCount
+ }
+ // Pull off the last entry in the cache.
+ s := pp.mspancache.buf[pp.mspancache.len-1]
+ pp.mspancache.len--
+ return s
+}
+
+// freeMSpanLocked free an mspan object.
+//
+// h must be locked.
+//
+// freeMSpanLocked must be called on the system stack because
+// its caller holds the heap lock. See mheap for details.
+// Running on the system stack also ensures that we won't
+// switch Ps during this function. See tryAllocMSpan for details.
+//
+//go:systemstack
+func (h *mheap) freeMSpanLocked(s *mspan) {
+ pp := getg().m.p.ptr()
+ // First try to free the mspan directly to the cache.
+ if pp != nil && pp.mspancache.len < len(pp.mspancache.buf) {
+ pp.mspancache.buf[pp.mspancache.len] = s
+ pp.mspancache.len++
+ return
+ }
+ // Failing that (or if we don't have a p), just free it to
+ // the heap.
+ h.spanalloc.free(unsafe.Pointer(s))
+}
+
+// allocSpan allocates an mspan which owns npages worth of memory.
+//
+// If manual == false, allocSpan allocates a heap span of class spanclass
+// and updates heap accounting. If manual == true, allocSpan allocates a
+// manually-managed span (spanclass is ignored), and the caller is
+// responsible for any accounting related to its use of the span. Either
+// way, allocSpan will atomically add the bytes in the newly allocated
+// span to *sysStat.
+//
+// The returned span is fully initialized.
+//
+// h must not be locked.
+//
+// allocSpan must be called on the system stack both because it acquires
+// the heap lock and because it must block GC transitions.
+//
+//go:systemstack
+func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysStat *uint64) (s *mspan) {
+ // Function-global state.
+ gp := getg()
+ base, scav := uintptr(0), uintptr(0)
+
+ // If the allocation is small enough, try the page cache!
+ pp := gp.m.p.ptr()
+ if pp != nil && npages < pageCachePages/4 {
+ c := &pp.pcache
+
+ // If the cache is empty, refill it.
+ if c.empty() {
+ lock(&h.lock)
+ *c = h.pages.allocToCache()
+ unlock(&h.lock)
+ }
+
+ // Try to allocate from the cache.
+ base, scav = c.alloc(npages)
+ if base != 0 {
+ s = h.tryAllocMSpan()
+
+ if s != nil && gcBlackenEnabled == 0 && (manual || spanclass.sizeclass() != 0) {
+ goto HaveSpan
+ }
+ // We're either running duing GC, failed to acquire a mspan,
+ // or the allocation is for a large object. This means we
+ // have to lock the heap and do a bunch of extra work,
+ // so go down the HaveBaseLocked path.
+ //
+ // We must do this during GC to avoid skew with heap_scan
+ // since we flush mcache stats whenever we lock.
+ //
+ // TODO(mknyszek): It would be nice to not have to
+ // lock the heap if it's a large allocation, but
+ // it's fine for now. The critical section here is
+ // short and large object allocations are relatively
+ // infrequent.
+ }
+ }
+
+ // For one reason or another, we couldn't get the
+ // whole job done without the heap lock.
+ lock(&h.lock)
+
+ if base == 0 {
+ // Try to acquire a base address.
+ base, scav = h.pages.alloc(npages)
+ if base == 0 {
+ if !h.grow(npages) {
+ unlock(&h.lock)
+ return nil
+ }
+ base, scav = h.pages.alloc(npages)
+ if base == 0 {
+ throw("grew heap, but no adequate free space found")
+ }
+ }
+ }
+ if s == nil {
+ // We failed to get an mspan earlier, so grab
+ // one now that we have the heap lock.
+ s = h.allocMSpanLocked()
+ }
+ if !manual {
+ // This is a heap span, so we should do some additional accounting
+ // which may only be done with the heap locked.
+
+ // Transfer stats from mcache to global.
+ var c *mcache
+ if gp.m.p != 0 {
+ c = gp.m.p.ptr().mcache
+ } else {
+ // This case occurs while bootstrapping.
+ // See the similar code in mallocgc.
+ c = mcache0
+ if c == nil {
+ throw("mheap.allocSpan called with no P")
+ }
+ }
+ memstats.heap_scan += uint64(c.local_scan)
+ c.local_scan = 0
+ memstats.tinyallocs += uint64(c.local_tinyallocs)
+ c.local_tinyallocs = 0
+
+ // Do some additional accounting if it's a large allocation.
+ if spanclass.sizeclass() == 0 {
+ mheap_.largealloc += uint64(npages * pageSize)
+ mheap_.nlargealloc++
+ atomic.Xadd64(&memstats.heap_live, int64(npages*pageSize))
+ }
+
+ // Either heap_live or heap_scan could have been updated.
+ if gcBlackenEnabled != 0 {
+ gcController.revise()
+ }
+ }
+ unlock(&h.lock)
HaveSpan:
- s := t.span()
- if s.state != mSpanFree {
- throw("candidate mspan for allocation is not free")
+ // At this point, both s != nil and base != 0, and the heap
+ // lock is no longer held. Initialize the span.
+ s.init(base, npages)
+ if h.allocNeedsZero(base, npages) {
+ s.needzero = 1
}
-
- // First, subtract any memory that was released back to
- // the OS from s. We will add back what's left if necessary.
- memstats.heap_released -= uint64(s.released())
-
- if s.npages == npage {
- h.free.erase(t)
- } else if s.npages > npage {
- // Trim off the lower bits and make that our new span.
- // Do this in-place since this operation does not
- // affect the original span's location in the treap.
- n := (*mspan)(h.spanalloc.alloc())
- h.free.mutate(t, func(s *mspan) {
- n.init(s.base(), npage)
- s.npages -= npage
- s.startAddr = s.base() + npage*pageSize
- h.setSpan(s.base()-1, n)
- h.setSpan(s.base(), s)
- h.setSpan(n.base(), n)
- n.needzero = s.needzero
- // n may not be big enough to actually be scavenged, but that's fine.
- // We still want it to appear to be scavenged so that we can do the
- // right bookkeeping later on in this function (i.e. sysUsed).
- n.scavenged = s.scavenged
- // Check if s is still scavenged.
- if s.scavenged {
- start, end := s.physPageBounds()
- if start < end {
- memstats.heap_released += uint64(end - start)
- } else {
- s.scavenged = false
- }
- }
- })
- s = n
+ nbytes := npages * pageSize
+ if manual {
+ s.manualFreeList = 0
+ s.nelems = 0
+ s.limit = s.base() + s.npages*pageSize
+ // Manually managed memory doesn't count toward heap_sys.
+ mSysStatDec(&memstats.heap_sys, s.npages*pageSize)
+ s.state.set(mSpanManual)
} else {
- throw("candidate mspan for allocation is too small")
+ // We must set span properties before the span is published anywhere
+ // since we're not holding the heap lock.
+ s.spanclass = spanclass
+ if sizeclass := spanclass.sizeclass(); sizeclass == 0 {
+ s.elemsize = nbytes
+ s.nelems = 1
+
+ s.divShift = 0
+ s.divMul = 0
+ s.divShift2 = 0
+ s.baseMask = 0
+ } else {
+ s.elemsize = uintptr(class_to_size[sizeclass])
+ s.nelems = nbytes / s.elemsize
+
+ m := &class_to_divmagic[sizeclass]
+ s.divShift = m.shift
+ s.divMul = m.mul
+ s.divShift2 = m.shift2
+ s.baseMask = m.baseMask
+ }
+
+ // Initialize mark and allocation structures.
+ s.freeindex = 0
+ s.allocCache = ^uint64(0) // all 1s indicating all free.
+ s.gcmarkBits = newMarkBits(s.nelems)
+ s.allocBits = newAllocBits(s.nelems)
+
+ // It's safe to access h.sweepgen without the heap lock because it's
+ // only ever updated with the world stopped and we run on the
+ // systemstack which blocks a STW transition.
+ atomic.Store(&s.sweepgen, h.sweepgen)
+
+ // Now that the span is filled in, set its state. This
+ // is a publication barrier for the other fields in
+ // the span. While valid pointers into this span
+ // should never be visible until the span is returned,
+ // if the garbage collector finds an invalid pointer,
+ // access to the span may race with initialization of
+ // the span. We resolve this race by atomically
+ // setting the state after the span is fully
+ // initialized, and atomically checking the state in
+ // any situation where a pointer is suspect.
+ s.state.set(mSpanInUse)
}
- // "Unscavenge" s only AFTER splitting so that
- // we only sysUsed whatever we actually need.
- if s.scavenged {
+
+ // Commit and account for any scavenged memory that the span now owns.
+ if scav != 0 {
// sysUsed all the pages that are actually available
- // in the span. Note that we don't need to decrement
- // heap_released since we already did so earlier.
- sysUsed(unsafe.Pointer(s.base()), s.npages<<_PageShift)
- s.scavenged = false
+ // in the span since some of them might be scavenged.
+ sysUsed(unsafe.Pointer(base), nbytes)
+ mSysStatDec(&memstats.heap_released, scav)
+ }
+ // Update stats.
+ mSysStatInc(sysStat, nbytes)
+ mSysStatDec(&memstats.heap_idle, nbytes)
- // Since we allocated out of a scavenged span, we just
- // grew the RSS. Mitigate this by scavenging enough free
- // space to make up for it but only if we need to.
+ // Publish the span in various locations.
+
+ // This is safe to call without the lock held because the slots
+ // related to this span will only ever be read or modified by
+ // this thread until pointers into the span are published (and
+ // we execute a publication barrier at the end of this function
+ // before that happens) or pageInUse is updated.
+ h.setSpans(s.base(), npages, s)
+
+ if !manual {
+ if !go115NewMCentralImpl {
+ // Add to swept in-use list.
+ //
+ // This publishes the span to root marking.
+ //
+ // h.sweepgen is guaranteed to only change during STW,
+ // and preemption is disabled in the page allocator.
+ h.sweepSpans[h.sweepgen/2%2].push(s)
+ }
+
+ // Mark in-use span in arena page bitmap.
//
- // scavengeLocked may cause coalescing, so prevent
- // coalescing with s by temporarily changing its state.
- s.state = mSpanManual
- h.scavengeIfNeededLocked(s.npages * pageSize)
- s.state = mSpanFree
+ // This publishes the span to the page sweeper, so
+ // it's imperative that the span be completely initialized
+ // prior to this line.
+ arena, pageIdx, pageMask := pageIndexOf(s.base())
+ atomic.Or8(&arena.pageInUse[pageIdx], pageMask)
+
+ // Update related page sweeper stats.
+ atomic.Xadd64(&h.pagesInUse, int64(npages))
+
+ if trace.enabled {
+ // Trace that a heap alloc occurred.
+ traceHeapAlloc()
+ }
}
- h.setSpans(s.base(), npage, s)
+ // Make sure the newly allocated span will be observed
+ // by the GC before pointers into the span are published.
+ publicationBarrier()
- *stat += uint64(npage << _PageShift)
- memstats.heap_idle -= uint64(npage << _PageShift)
-
- if s.inList() {
- throw("still in list")
- }
return s
}
@@ -1248,54 +1329,92 @@
//
// h must be locked.
func (h *mheap) grow(npage uintptr) bool {
- ask := npage << _PageShift
- v, size := h.sysAlloc(ask)
- if v == nil {
- print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
- return false
+ // We must grow the heap in whole palloc chunks.
+ ask := alignUp(npage, pallocChunkPages) * pageSize
+
+ totalGrowth := uintptr(0)
+ // This may overflow because ask could be very large
+ // and is otherwise unrelated to h.curArena.base.
+ end := h.curArena.base + ask
+ nBase := alignUp(end, physPageSize)
+ if nBase > h.curArena.end || /* overflow */ end < h.curArena.base {
+ // Not enough room in the current arena. Allocate more
+ // arena space. This may not be contiguous with the
+ // current arena, so we have to request the full ask.
+ av, asize := h.sysAlloc(ask)
+ if av == nil {
+ print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
+ return false
+ }
+
+ if uintptr(av) == h.curArena.end {
+ // The new space is contiguous with the old
+ // space, so just extend the current space.
+ h.curArena.end = uintptr(av) + asize
+ } else {
+ // The new space is discontiguous. Track what
+ // remains of the current space and switch to
+ // the new space. This should be rare.
+ if size := h.curArena.end - h.curArena.base; size != 0 {
+ h.pages.grow(h.curArena.base, size)
+ totalGrowth += size
+ }
+ // Switch to the new space.
+ h.curArena.base = uintptr(av)
+ h.curArena.end = uintptr(av) + asize
+ }
+
+ // The memory just allocated counts as both released
+ // and idle, even though it's not yet backed by spans.
+ //
+ // The allocation is always aligned to the heap arena
+ // size which is always > physPageSize, so its safe to
+ // just add directly to heap_released.
+ mSysStatInc(&memstats.heap_released, asize)
+ mSysStatInc(&memstats.heap_idle, asize)
+
+ // Recalculate nBase.
+ // We know this won't overflow, because sysAlloc returned
+ // a valid region starting at h.curArena.base which is at
+ // least ask bytes in size.
+ nBase = alignUp(h.curArena.base+ask, physPageSize)
}
- // Create a fake "in use" span and free it, so that the
- // right accounting and coalescing happens.
- s := (*mspan)(h.spanalloc.alloc())
- s.init(uintptr(v), size/pageSize)
- h.setSpans(s.base(), s.npages, s)
- s.state = mSpanFree
- memstats.heap_idle += uint64(size)
- // (*mheap).sysAlloc returns untouched/uncommitted memory.
- s.scavenged = true
- // s is always aligned to the heap arena size which is always > physPageSize,
- // so its totally safe to just add directly to heap_released. Coalescing,
- // if possible, will also always be correct in terms of accounting, because
- // s.base() must be a physical page boundary.
- memstats.heap_released += uint64(size)
- h.coalesce(s)
- h.free.insert(s)
+ // Grow into the current arena.
+ v := h.curArena.base
+ h.curArena.base = nBase
+ h.pages.grow(v, nBase-v)
+ totalGrowth += nBase - v
+
+ // We just caused a heap growth, so scavenge down what will soon be used.
+ // By scavenging inline we deal with the failure to allocate out of
+ // memory fragments by scavenging the memory fragments that are least
+ // likely to be re-used.
+ if retained := heapRetained(); retained+uint64(totalGrowth) > h.scavengeGoal {
+ todo := totalGrowth
+ if overage := uintptr(retained + uint64(totalGrowth) - h.scavengeGoal); todo > overage {
+ todo = overage
+ }
+ h.pages.scavenge(todo, false)
+ }
return true
}
// Free the span back into the heap.
-//
-// large must match the value of large passed to mheap.alloc. This is
-// used for accounting.
-func (h *mheap) freeSpan(s *mspan, large bool) {
+func (h *mheap) freeSpan(s *mspan) {
systemstack(func() {
- mp := getg().m
+ c := getg().m.p.ptr().mcache
lock(&h.lock)
- memstats.heap_scan += uint64(mp.mcache.local_scan)
- mp.mcache.local_scan = 0
- memstats.tinyallocs += uint64(mp.mcache.local_tinyallocs)
- mp.mcache.local_tinyallocs = 0
+ memstats.heap_scan += uint64(c.local_scan)
+ c.local_scan = 0
+ memstats.tinyallocs += uint64(c.local_tinyallocs)
+ c.local_tinyallocs = 0
if msanenabled {
// Tell msan that this entire span is no longer in use.
base := unsafe.Pointer(s.base())
bytes := s.npages << _PageShift
msanfree(base, bytes)
}
- if large {
- // Match accounting done in mheap.alloc.
- memstats.heap_objects--
- }
if gcBlackenEnabled != 0 {
// heap_scan changed.
gcController.revise()
@@ -1319,14 +1438,14 @@
func (h *mheap) freeManual(s *mspan, stat *uint64) {
s.needzero = 1
lock(&h.lock)
- *stat -= uint64(s.npages << _PageShift)
- memstats.heap_sys += uint64(s.npages << _PageShift)
+ mSysStatDec(stat, s.npages*pageSize)
+ mSysStatInc(&memstats.heap_sys, s.npages*pageSize)
h.freeSpanLocked(s, false, true)
unlock(&h.lock)
}
func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool) {
- switch s.state {
+ switch s.state.get() {
case mSpanManual:
if s.allocCount != 0 {
throw("mheap.freeSpanLocked - invalid stack free")
@@ -1336,161 +1455,50 @@
print("mheap.freeSpanLocked - span ", s, " ptr ", hex(s.base()), " allocCount ", s.allocCount, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n")
throw("mheap.freeSpanLocked - invalid free")
}
- h.pagesInUse -= uint64(s.npages)
+ atomic.Xadd64(&h.pagesInUse, -int64(s.npages))
// Clear in-use bit in arena page bitmap.
arena, pageIdx, pageMask := pageIndexOf(s.base())
- arena.pageInUse[pageIdx] &^= pageMask
+ atomic.And8(&arena.pageInUse[pageIdx], ^pageMask)
default:
throw("mheap.freeSpanLocked - invalid span state")
}
if acctinuse {
- memstats.heap_inuse -= uint64(s.npages << _PageShift)
+ mSysStatDec(&memstats.heap_inuse, s.npages*pageSize)
}
if acctidle {
- memstats.heap_idle += uint64(s.npages << _PageShift)
+ mSysStatInc(&memstats.heap_idle, s.npages*pageSize)
}
- s.state = mSpanFree
- // Coalesce span with neighbors.
- h.coalesce(s)
+ // Mark the space as free.
+ h.pages.free(s.base(), s.npages)
- // Insert s into the treap.
- h.free.insert(s)
+ // Free the span structure. We no longer have a use for it.
+ s.state.set(mSpanDead)
+ h.freeMSpanLocked(s)
}
-// scavengeSplit takes t.span() and attempts to split off a span containing size
-// (in bytes) worth of physical pages from the back.
-//
-// The split point is only approximately defined by size since the split point
-// is aligned to physPageSize and pageSize every time. If physHugePageSize is
-// non-zero and the split point would break apart a huge page in the span, then
-// the split point is also aligned to physHugePageSize.
-//
-// If the desired split point ends up at the base of s, or if size is obviously
-// much larger than s, then a split is not possible and this method returns nil.
-// Otherwise if a split occurred it returns the newly-created span.
-func (h *mheap) scavengeSplit(t treapIter, size uintptr) *mspan {
- s := t.span()
- start, end := s.physPageBounds()
- if end <= start || end-start <= size {
- // Size covers the whole span.
- return nil
- }
- // The span is bigger than what we need, so compute the base for the new
- // span if we decide to split.
- base := end - size
- // Round down to the next physical or logical page, whichever is bigger.
- base &^= (physPageSize - 1) | (pageSize - 1)
- if base <= start {
- return nil
- }
- if physHugePageSize > pageSize && base&^(physHugePageSize-1) >= start {
- // We're in danger of breaking apart a huge page, so include the entire
- // huge page in the bound by rounding down to the huge page size.
- // base should still be aligned to pageSize.
- base &^= physHugePageSize - 1
- }
- if base == start {
- // After all that we rounded base down to s.base(), so no need to split.
- return nil
- }
- if base < start {
- print("runtime: base=", base, ", s.npages=", s.npages, ", s.base()=", s.base(), ", size=", size, "\n")
- print("runtime: physPageSize=", physPageSize, ", physHugePageSize=", physHugePageSize, "\n")
- throw("bad span split base")
- }
-
- // Split s in-place, removing from the back.
- n := (*mspan)(h.spanalloc.alloc())
- nbytes := s.base() + s.npages*pageSize - base
- h.free.mutate(t, func(s *mspan) {
- n.init(base, nbytes/pageSize)
- s.npages -= nbytes / pageSize
- h.setSpan(n.base()-1, s)
- h.setSpan(n.base(), n)
- h.setSpan(n.base()+nbytes-1, n)
- n.needzero = s.needzero
- n.state = s.state
- })
- return n
-}
-
-// scavengeLocked scavenges nbytes worth of spans in the free treap by
-// starting from the span with the highest base address and working down.
-// It then takes those spans and places them in scav.
-//
-// Returns the amount of memory scavenged in bytes. h must be locked.
-func (h *mheap) scavengeLocked(nbytes uintptr) uintptr {
- released := uintptr(0)
- // Iterate over spans with huge pages first, then spans without.
- const mask = treapIterScav | treapIterHuge
- for _, match := range []treapIterType{treapIterHuge, 0} {
- // Iterate over the treap backwards (from highest address to lowest address)
- // scavenging spans until we've reached our quota of nbytes.
- for t := h.free.end(mask, match); released < nbytes && t.valid(); {
- s := t.span()
- start, end := s.physPageBounds()
- if start >= end {
- // This span doesn't cover at least one physical page, so skip it.
- t = t.prev()
- continue
- }
- n := t.prev()
- if span := h.scavengeSplit(t, nbytes-released); span != nil {
- s = span
- } else {
- h.free.erase(t)
- }
- released += s.scavenge()
- // Now that s is scavenged, we must eagerly coalesce it
- // with its neighbors to prevent having two spans with
- // the same scavenged state adjacent to each other.
- h.coalesce(s)
- t = n
- h.free.insert(s)
- }
- }
- return released
-}
-
-// scavengeIfNeededLocked calls scavengeLocked if we're currently above the
-// scavenge goal in order to prevent the mutator from out-running the
-// the scavenger.
-//
-// h must be locked.
-func (h *mheap) scavengeIfNeededLocked(size uintptr) {
- if r := heapRetained(); r+uint64(size) > h.scavengeRetainedGoal {
- todo := uint64(size)
- // If we're only going to go a little bit over, just request what
- // we actually need done.
- if overage := r + uint64(size) - h.scavengeRetainedGoal; overage < todo {
- todo = overage
- }
- h.scavengeLocked(uintptr(todo))
- }
-}
-
-// scavengeAll visits each node in the free treap and scavenges the
-// treapNode's span. It then removes the scavenged span from
-// unscav and adds it into scav before continuing.
+// scavengeAll acquires the heap lock (blocking any additional
+// manipulation of the page allocator) and iterates over the whole
+// heap, scavenging every free page available.
func (h *mheap) scavengeAll() {
// Disallow malloc or panic while holding the heap lock. We do
- // this here because this is an non-mallocgc entry-point to
+ // this here because this is a non-mallocgc entry-point to
// the mheap API.
gp := getg()
gp.m.mallocing++
lock(&h.lock)
- released := h.scavengeLocked(^uintptr(0))
+ // Start a new scavenge generation so we have a chance to walk
+ // over the whole heap.
+ h.pages.scavengeStartGen()
+ released := h.pages.scavenge(^uintptr(0), false)
+ gen := h.pages.scav.gen
unlock(&h.lock)
gp.m.mallocing--
- if debug.gctrace > 0 {
- if released > 0 {
- print("forced scvg: ", released>>20, " MB released\n")
- }
- print("forced scvg: inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n")
+ if debug.scavtrace > 0 {
+ printScavTrace(gen, released, true)
}
}
@@ -1511,14 +1519,14 @@
span.allocCount = 0
span.spanclass = 0
span.elemsize = 0
- span.state = mSpanDead
- span.scavenged = false
span.speciallock.key = 0
span.specials = nil
span.needzero = 0
span.freeindex = 0
span.allocBits = nil
span.gcmarkBits = nil
+ span.state.set(mSpanDead)
+ lockInit(&span.speciallock, lockRankMspanSpecial)
}
func (span *mspan) inList() bool {
@@ -1632,6 +1640,22 @@
kind byte // kind of special
}
+// spanHasSpecials marks a span as having specials in the arena bitmap.
+func spanHasSpecials(s *mspan) {
+ arenaPage := (s.base() / pageSize) % pagesPerArena
+ ai := arenaIndex(s.base())
+ ha := mheap_.arenas[ai.l1()][ai.l2()]
+ atomic.Or8(&ha.pageSpecials[arenaPage/8], uint8(1)<<(arenaPage%8))
+}
+
+// spanHasNoSpecials marks a span as having no specials in the arena bitmap.
+func spanHasNoSpecials(s *mspan) {
+ arenaPage := (s.base() / pageSize) % pagesPerArena
+ ai := arenaIndex(s.base())
+ ha := mheap_.arenas[ai.l1()][ai.l2()]
+ atomic.And8(&ha.pageSpecials[arenaPage/8], ^(uint8(1) << (arenaPage % 8)))
+}
+
// Adds the special record s to the list of special records for
// the object p. All fields of s should be filled in except for
// offset & next, which this routine will fill in.
@@ -1677,6 +1701,9 @@
s.offset = uint16(offset)
s.next = *t
*t = s
+ if go115NewMarkrootSpans {
+ spanHasSpecials(span)
+ }
unlock(&span.speciallock)
releasem(mp)
@@ -1700,6 +1727,7 @@
offset := uintptr(p) - span.base()
+ var result *special
lock(&span.speciallock)
t := &span.specials
for {
@@ -1711,15 +1739,17 @@
// "interior" specials (p must be exactly equal to s->offset).
if offset == uintptr(s.offset) && kind == s.kind {
*t = s.next
- unlock(&span.speciallock)
- releasem(mp)
- return s
+ result = s
+ break
}
t = &s.next
}
+ if go115NewMarkrootSpans && span.specials == nil {
+ spanHasNoSpecials(span)
+ }
unlock(&span.speciallock)
releasem(mp)
- return nil
+ return result
}
// The described object has a finalizer set for it.
diff --git a/src/runtime/mkduff.go b/src/runtime/mkduff.go
index b6fe701..6c7a4cf 100644
--- a/src/runtime/mkduff.go
+++ b/src/runtime/mkduff.go
@@ -194,7 +194,9 @@
}
func copyPPC64x(w io.Writer) {
- fmt.Fprintln(w, "// TODO: Implement runtime·duffcopy.")
+ // duffcopy is not used on PPC64.
+ fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0")
+ fmt.Fprintln(w, "\tUNDEF")
}
func tagsMIPS64x(w io.Writer) {
@@ -216,5 +218,13 @@
}
func copyMIPS64x(w io.Writer) {
- fmt.Fprintln(w, "// TODO: Implement runtime·duffcopy.")
+ fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0")
+ for i := 0; i < 128; i++ {
+ fmt.Fprintln(w, "\tMOVV\t(R1), R23")
+ fmt.Fprintln(w, "\tADDV\t$8, R1")
+ fmt.Fprintln(w, "\tMOVV\tR23, (R2)")
+ fmt.Fprintln(w, "\tADDV\t$8, R2")
+ fmt.Fprintln(w)
+ }
+ fmt.Fprintln(w, "\tRET")
}
diff --git a/src/runtime/mknacl.sh b/src/runtime/mknacl.sh
deleted file mode 100644
index 306ae3d..0000000
--- a/src/runtime/mknacl.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2013 The Go Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style
-# license that can be found in the LICENSE file.
-
-cat /Users/rsc/pub/native_client/src/trusted/service_runtime/include/bits/nacl_syscalls.h |
- awk '
- BEGIN {
- printf("// Code generated by mknacl.sh; DO NOT EDIT.\n")
- }
- NF==3 && $1=="#define" && $2~/^NACL_sys_/ {
- name=$2
- sub(/^NACL_sys_/, "SYS_", name)
- printf("#define %s %s\n", name, $3)
- }' >syscall_nacl.h
diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go
new file mode 100644
index 0000000..1fe7766
--- /dev/null
+++ b/src/runtime/mkpreempt.go
@@ -0,0 +1,575 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// mkpreempt generates the asyncPreempt functions for each
+// architecture.
+package main
+
+import (
+ "flag"
+ "fmt"
+ "io"
+ "log"
+ "os"
+ "strings"
+)
+
+// Copied from cmd/compile/internal/ssa/gen/*Ops.go
+
+var regNames386 = []string{
+ "AX",
+ "CX",
+ "DX",
+ "BX",
+ "SP",
+ "BP",
+ "SI",
+ "DI",
+ "X0",
+ "X1",
+ "X2",
+ "X3",
+ "X4",
+ "X5",
+ "X6",
+ "X7",
+}
+
+var regNamesAMD64 = []string{
+ "AX",
+ "CX",
+ "DX",
+ "BX",
+ "SP",
+ "BP",
+ "SI",
+ "DI",
+ "R8",
+ "R9",
+ "R10",
+ "R11",
+ "R12",
+ "R13",
+ "R14",
+ "R15",
+ "X0",
+ "X1",
+ "X2",
+ "X3",
+ "X4",
+ "X5",
+ "X6",
+ "X7",
+ "X8",
+ "X9",
+ "X10",
+ "X11",
+ "X12",
+ "X13",
+ "X14",
+ "X15",
+}
+
+var out io.Writer
+
+var arches = map[string]func(){
+ "386": gen386,
+ "amd64": genAMD64,
+ "arm": genARM,
+ "arm64": genARM64,
+ "mips64x": func() { genMIPS(true) },
+ "mipsx": func() { genMIPS(false) },
+ "ppc64x": genPPC64,
+ "riscv64": genRISCV64,
+ "s390x": genS390X,
+ "wasm": genWasm,
+}
+var beLe = map[string]bool{"mips64x": true, "mipsx": true, "ppc64x": true}
+
+func main() {
+ flag.Parse()
+ if flag.NArg() > 0 {
+ out = os.Stdout
+ for _, arch := range flag.Args() {
+ gen, ok := arches[arch]
+ if !ok {
+ log.Fatalf("unknown arch %s", arch)
+ }
+ header(arch)
+ gen()
+ }
+ return
+ }
+
+ for arch, gen := range arches {
+ f, err := os.Create(fmt.Sprintf("preempt_%s.s", arch))
+ if err != nil {
+ log.Fatal(err)
+ }
+ out = f
+ header(arch)
+ gen()
+ if err := f.Close(); err != nil {
+ log.Fatal(err)
+ }
+ }
+}
+
+func header(arch string) {
+ fmt.Fprintf(out, "// Code generated by mkpreempt.go; DO NOT EDIT.\n\n")
+ if beLe[arch] {
+ base := arch[:len(arch)-1]
+ fmt.Fprintf(out, "// +build %s %sle\n\n", base, base)
+ }
+ fmt.Fprintf(out, "#include \"go_asm.h\"\n")
+ fmt.Fprintf(out, "#include \"textflag.h\"\n\n")
+ fmt.Fprintf(out, "TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0\n")
+}
+
+func p(f string, args ...interface{}) {
+ fmted := fmt.Sprintf(f, args...)
+ fmt.Fprintf(out, "\t%s\n", strings.Replace(fmted, "\n", "\n\t", -1))
+}
+
+func label(l string) {
+ fmt.Fprintf(out, "%s\n", l)
+}
+
+type layout struct {
+ stack int
+ regs []regPos
+ sp string // stack pointer register
+}
+
+type regPos struct {
+ pos int
+
+ op string
+ reg string
+
+ // If this register requires special save and restore, these
+ // give those operations with a %d placeholder for the stack
+ // offset.
+ save, restore string
+}
+
+func (l *layout) add(op, reg string, size int) {
+ l.regs = append(l.regs, regPos{op: op, reg: reg, pos: l.stack})
+ l.stack += size
+}
+
+func (l *layout) addSpecial(save, restore string, size int) {
+ l.regs = append(l.regs, regPos{save: save, restore: restore, pos: l.stack})
+ l.stack += size
+}
+
+func (l *layout) save() {
+ for _, reg := range l.regs {
+ if reg.save != "" {
+ p(reg.save, reg.pos)
+ } else {
+ p("%s %s, %d(%s)", reg.op, reg.reg, reg.pos, l.sp)
+ }
+ }
+}
+
+func (l *layout) restore() {
+ for i := len(l.regs) - 1; i >= 0; i-- {
+ reg := l.regs[i]
+ if reg.restore != "" {
+ p(reg.restore, reg.pos)
+ } else {
+ p("%s %d(%s), %s", reg.op, reg.pos, l.sp, reg.reg)
+ }
+ }
+}
+
+func gen386() {
+ p("PUSHFL")
+
+ // Save general purpose registers.
+ var l = layout{sp: "SP"}
+ for _, reg := range regNames386 {
+ if reg == "SP" || strings.HasPrefix(reg, "X") {
+ continue
+ }
+ l.add("MOVL", reg, 4)
+ }
+
+ // Save the 387 state.
+ l.addSpecial(
+ "FSAVE %d(SP)\nFLDCW runtime·controlWord64(SB)",
+ "FRSTOR %d(SP)",
+ 108)
+
+ // Save SSE state only if supported.
+ lSSE := layout{stack: l.stack, sp: "SP"}
+ for i := 0; i < 8; i++ {
+ lSSE.add("MOVUPS", fmt.Sprintf("X%d", i), 16)
+ }
+
+ p("ADJSP $%d", lSSE.stack)
+ p("NOP SP")
+ l.save()
+ p("CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1\nJNE nosse")
+ lSSE.save()
+ label("nosse:")
+ p("CALL ·asyncPreempt2(SB)")
+ p("CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1\nJNE nosse2")
+ lSSE.restore()
+ label("nosse2:")
+ l.restore()
+ p("ADJSP $%d", -lSSE.stack)
+
+ p("POPFL")
+ p("RET")
+}
+
+func genAMD64() {
+ // Assign stack offsets.
+ var l = layout{sp: "SP"}
+ for _, reg := range regNamesAMD64 {
+ if reg == "SP" || reg == "BP" {
+ continue
+ }
+ if strings.HasPrefix(reg, "X") {
+ l.add("MOVUPS", reg, 16)
+ } else {
+ l.add("MOVQ", reg, 8)
+ }
+ }
+
+ // TODO: MXCSR register?
+
+ p("PUSHQ BP")
+ p("MOVQ SP, BP")
+ p("// Save flags before clobbering them")
+ p("PUSHFQ")
+ p("// obj doesn't understand ADD/SUB on SP, but does understand ADJSP")
+ p("ADJSP $%d", l.stack)
+ p("// But vet doesn't know ADJSP, so suppress vet stack checking")
+ p("NOP SP")
+
+ // Apparently, the signal handling code path in darwin kernel leaves
+ // the upper bits of Y registers in a dirty state, which causes
+ // many SSE operations (128-bit and narrower) become much slower.
+ // Clear the upper bits to get to a clean state. See issue #37174.
+ // It is safe here as Go code don't use the upper bits of Y registers.
+ p("#ifdef GOOS_darwin")
+ p("CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0")
+ p("JE 2(PC)")
+ p("VZEROUPPER")
+ p("#endif")
+
+ l.save()
+ p("CALL ·asyncPreempt2(SB)")
+ l.restore()
+ p("ADJSP $%d", -l.stack)
+ p("POPFQ")
+ p("POPQ BP")
+ p("RET")
+}
+
+func genARM() {
+ // Add integer registers R0-R12.
+ // R13 (SP), R14 (LR), R15 (PC) are special and not saved here.
+ var l = layout{sp: "R13", stack: 4} // add LR slot
+ for i := 0; i <= 12; i++ {
+ reg := fmt.Sprintf("R%d", i)
+ if i == 10 {
+ continue // R10 is g register, no need to save/restore
+ }
+ l.add("MOVW", reg, 4)
+ }
+ // Add flag register.
+ l.addSpecial(
+ "MOVW CPSR, R0\nMOVW R0, %d(R13)",
+ "MOVW %d(R13), R0\nMOVW R0, CPSR",
+ 4)
+
+ // Add floating point registers F0-F15 and flag register.
+ var lfp = layout{stack: l.stack, sp: "R13"}
+ lfp.addSpecial(
+ "MOVW FPCR, R0\nMOVW R0, %d(R13)",
+ "MOVW %d(R13), R0\nMOVW R0, FPCR",
+ 4)
+ for i := 0; i <= 15; i++ {
+ reg := fmt.Sprintf("F%d", i)
+ lfp.add("MOVD", reg, 8)
+ }
+
+ p("MOVW.W R14, -%d(R13)", lfp.stack) // allocate frame, save LR
+ l.save()
+ p("MOVB ·goarm(SB), R0\nCMP $6, R0\nBLT nofp") // test goarm, and skip FP registers if goarm=5.
+ lfp.save()
+ label("nofp:")
+ p("CALL ·asyncPreempt2(SB)")
+ p("MOVB ·goarm(SB), R0\nCMP $6, R0\nBLT nofp2") // test goarm, and skip FP registers if goarm=5.
+ lfp.restore()
+ label("nofp2:")
+ l.restore()
+
+ p("MOVW %d(R13), R14", lfp.stack) // sigctxt.pushCall pushes LR on stack, restore it
+ p("MOVW.P %d(R13), R15", lfp.stack+4) // load PC, pop frame (including the space pushed by sigctxt.pushCall)
+ p("UNDEF") // shouldn't get here
+}
+
+func genARM64() {
+ // Add integer registers R0-R26
+ // R27 (REGTMP), R28 (g), R29 (FP), R30 (LR), R31 (SP) are special
+ // and not saved here.
+ var l = layout{sp: "RSP", stack: 8} // add slot to save PC of interrupted instruction
+ for i := 0; i <= 26; i++ {
+ if i == 18 {
+ continue // R18 is not used, skip
+ }
+ reg := fmt.Sprintf("R%d", i)
+ l.add("MOVD", reg, 8)
+ }
+ // Add flag registers.
+ l.addSpecial(
+ "MOVD NZCV, R0\nMOVD R0, %d(RSP)",
+ "MOVD %d(RSP), R0\nMOVD R0, NZCV",
+ 8)
+ l.addSpecial(
+ "MOVD FPSR, R0\nMOVD R0, %d(RSP)",
+ "MOVD %d(RSP), R0\nMOVD R0, FPSR",
+ 8)
+ // TODO: FPCR? I don't think we'll change it, so no need to save.
+ // Add floating point registers F0-F31.
+ for i := 0; i <= 31; i++ {
+ reg := fmt.Sprintf("F%d", i)
+ l.add("FMOVD", reg, 8)
+ }
+ if l.stack%16 != 0 {
+ l.stack += 8 // SP needs 16-byte alignment
+ }
+
+ // allocate frame, save PC of interrupted instruction (in LR)
+ p("MOVD R30, %d(RSP)", -l.stack)
+ p("SUB $%d, RSP", l.stack)
+ p("#ifdef GOOS_linux")
+ p("MOVD R29, -8(RSP)") // save frame pointer (only used on Linux)
+ p("SUB $8, RSP, R29") // set up new frame pointer
+ p("#endif")
+ // On darwin, save the LR again after decrementing SP. We run the
+ // signal handler on the G stack (as it doesn't support SA_ONSTACK),
+ // so any writes below SP may be clobbered.
+ p("#ifdef GOOS_darwin")
+ p("MOVD R30, (RSP)")
+ p("#endif")
+
+ l.save()
+ p("CALL ·asyncPreempt2(SB)")
+ l.restore()
+
+ p("MOVD %d(RSP), R30", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
+ p("#ifdef GOOS_linux")
+ p("MOVD -8(RSP), R29") // restore frame pointer
+ p("#endif")
+ p("MOVD (RSP), R27") // load PC to REGTMP
+ p("ADD $%d, RSP", l.stack+16) // pop frame (including the space pushed by sigctxt.pushCall)
+ p("JMP (R27)")
+}
+
+func genMIPS(_64bit bool) {
+ mov := "MOVW"
+ movf := "MOVF"
+ add := "ADD"
+ sub := "SUB"
+ r28 := "R28"
+ regsize := 4
+ softfloat := "GOMIPS_softfloat"
+ if _64bit {
+ mov = "MOVV"
+ movf = "MOVD"
+ add = "ADDV"
+ sub = "SUBV"
+ r28 = "RSB"
+ regsize = 8
+ softfloat = "GOMIPS64_softfloat"
+ }
+
+ // Add integer registers R1-R22, R24-R25, R28
+ // R0 (zero), R23 (REGTMP), R29 (SP), R30 (g), R31 (LR) are special,
+ // and not saved here. R26 and R27 are reserved by kernel and not used.
+ var l = layout{sp: "R29", stack: regsize} // add slot to save PC of interrupted instruction (in LR)
+ for i := 1; i <= 25; i++ {
+ if i == 23 {
+ continue // R23 is REGTMP
+ }
+ reg := fmt.Sprintf("R%d", i)
+ l.add(mov, reg, regsize)
+ }
+ l.add(mov, r28, regsize)
+ l.addSpecial(
+ mov+" HI, R1\n"+mov+" R1, %d(R29)",
+ mov+" %d(R29), R1\n"+mov+" R1, HI",
+ regsize)
+ l.addSpecial(
+ mov+" LO, R1\n"+mov+" R1, %d(R29)",
+ mov+" %d(R29), R1\n"+mov+" R1, LO",
+ regsize)
+
+ // Add floating point control/status register FCR31 (FCR0-FCR30 are irrelevant)
+ var lfp = layout{sp: "R29", stack: l.stack}
+ lfp.addSpecial(
+ mov+" FCR31, R1\n"+mov+" R1, %d(R29)",
+ mov+" %d(R29), R1\n"+mov+" R1, FCR31",
+ regsize)
+ // Add floating point registers F0-F31.
+ for i := 0; i <= 31; i++ {
+ reg := fmt.Sprintf("F%d", i)
+ lfp.add(movf, reg, regsize)
+ }
+
+ // allocate frame, save PC of interrupted instruction (in LR)
+ p(mov+" R31, -%d(R29)", lfp.stack)
+ p(sub+" $%d, R29", lfp.stack)
+
+ l.save()
+ p("#ifndef %s", softfloat)
+ lfp.save()
+ p("#endif")
+ p("CALL ·asyncPreempt2(SB)")
+ p("#ifndef %s", softfloat)
+ lfp.restore()
+ p("#endif")
+ l.restore()
+
+ p(mov+" %d(R29), R31", lfp.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
+ p(mov + " (R29), R23") // load PC to REGTMP
+ p(add+" $%d, R29", lfp.stack+regsize) // pop frame (including the space pushed by sigctxt.pushCall)
+ p("JMP (R23)")
+}
+
+func genPPC64() {
+ // Add integer registers R3-R29
+ // R0 (zero), R1 (SP), R30 (g) are special and not saved here.
+ // R2 (TOC pointer in PIC mode), R12 (function entry address in PIC mode) have been saved in sigctxt.pushCall.
+ // R31 (REGTMP) will be saved manually.
+ var l = layout{sp: "R1", stack: 32 + 8} // MinFrameSize on PPC64, plus one word for saving R31
+ for i := 3; i <= 29; i++ {
+ if i == 12 || i == 13 {
+ // R12 has been saved in sigctxt.pushCall.
+ // R13 is TLS pointer, not used by Go code. we must NOT
+ // restore it, otherwise if we parked and resumed on a
+ // different thread we'll mess up TLS addresses.
+ continue
+ }
+ reg := fmt.Sprintf("R%d", i)
+ l.add("MOVD", reg, 8)
+ }
+ l.addSpecial(
+ "MOVW CR, R31\nMOVW R31, %d(R1)",
+ "MOVW %d(R1), R31\nMOVFL R31, $0xff", // this is MOVW R31, CR
+ 8) // CR is 4-byte wide, but just keep the alignment
+ l.addSpecial(
+ "MOVD XER, R31\nMOVD R31, %d(R1)",
+ "MOVD %d(R1), R31\nMOVD R31, XER",
+ 8)
+ // Add floating point registers F0-F31.
+ for i := 0; i <= 31; i++ {
+ reg := fmt.Sprintf("F%d", i)
+ l.add("FMOVD", reg, 8)
+ }
+ // Add floating point control/status register FPSCR.
+ l.addSpecial(
+ "MOVFL FPSCR, F0\nFMOVD F0, %d(R1)",
+ "FMOVD %d(R1), F0\nMOVFL F0, FPSCR",
+ 8)
+
+ p("MOVD R31, -%d(R1)", l.stack-32) // save R31 first, we'll use R31 for saving LR
+ p("MOVD LR, R31")
+ p("MOVDU R31, -%d(R1)", l.stack) // allocate frame, save PC of interrupted instruction (in LR)
+
+ l.save()
+ p("CALL ·asyncPreempt2(SB)")
+ l.restore()
+
+ p("MOVD %d(R1), R31", l.stack) // sigctxt.pushCall has pushed LR, R2, R12 (at interrupt) on stack, restore them
+ p("MOVD R31, LR")
+ p("MOVD %d(R1), R2", l.stack+8)
+ p("MOVD %d(R1), R12", l.stack+16)
+ p("MOVD (R1), R31") // load PC to CTR
+ p("MOVD R31, CTR")
+ p("MOVD 32(R1), R31") // restore R31
+ p("ADD $%d, R1", l.stack+32) // pop frame (including the space pushed by sigctxt.pushCall)
+ p("JMP (CTR)")
+}
+
+func genRISCV64() {
+ // X0 (zero), X1 (LR), X2 (SP), X4 (g), X31 (TMP) are special.
+ var l = layout{sp: "X2", stack: 8}
+
+ // Add integer registers (X3, X5-X30).
+ for i := 3; i < 31; i++ {
+ if i == 4 {
+ continue
+ }
+ reg := fmt.Sprintf("X%d", i)
+ l.add("MOV", reg, 8)
+ }
+
+ // Add floating point registers (F0-F31).
+ for i := 0; i <= 31; i++ {
+ reg := fmt.Sprintf("F%d", i)
+ l.add("MOVD", reg, 8)
+ }
+
+ p("MOV X1, -%d(X2)", l.stack)
+ p("ADD $-%d, X2", l.stack)
+ l.save()
+ p("CALL ·asyncPreempt2(SB)")
+ l.restore()
+ p("MOV %d(X2), X1", l.stack)
+ p("MOV (X2), X31")
+ p("ADD $%d, X2", l.stack+8)
+ p("JMP (X31)")
+}
+
+func genS390X() {
+ // Add integer registers R0-R12
+ // R13 (g), R14 (LR), R15 (SP) are special, and not saved here.
+ // Saving R10 (REGTMP) is not necessary, but it is saved anyway.
+ var l = layout{sp: "R15", stack: 16} // add slot to save PC of interrupted instruction and flags
+ l.addSpecial(
+ "STMG R0, R12, %d(R15)",
+ "LMG %d(R15), R0, R12",
+ 13*8)
+ // Add floating point registers F0-F31.
+ for i := 0; i <= 15; i++ {
+ reg := fmt.Sprintf("F%d", i)
+ l.add("FMOVD", reg, 8)
+ }
+
+ // allocate frame, save PC of interrupted instruction (in LR) and flags (condition code)
+ p("IPM R10") // save flags upfront, as ADD will clobber flags
+ p("MOVD R14, -%d(R15)", l.stack)
+ p("ADD $-%d, R15", l.stack)
+ p("MOVW R10, 8(R15)") // save flags
+
+ l.save()
+ p("CALL ·asyncPreempt2(SB)")
+ l.restore()
+
+ p("MOVD %d(R15), R14", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
+ p("ADD $%d, R15", l.stack+8) // pop frame (including the space pushed by sigctxt.pushCall)
+ p("MOVWZ -%d(R15), R10", l.stack) // load flags to REGTMP
+ p("TMLH R10, $(3<<12)") // restore flags
+ p("MOVD -%d(R15), R10", l.stack+8) // load PC to REGTMP
+ p("JMP (R10)")
+}
+
+func genWasm() {
+ p("// No async preemption on wasm")
+ p("UNDEF")
+}
+
+func notImplemented() {
+ p("// Not implemented yet")
+ p("JMP ·abort(SB)")
+}
diff --git a/src/runtime/mmap.go b/src/runtime/mmap.go
index 2868f3f..9fe31cb 100644
--- a/src/runtime/mmap.go
+++ b/src/runtime/mmap.go
@@ -5,7 +5,6 @@
// +build !plan9
// +build !solaris
// +build !windows
-// +build !nacl
// +build !linux !amd64
// +build !linux !arm64
// +build !js
diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go
new file mode 100644
index 0000000..60f7f9f
--- /dev/null
+++ b/src/runtime/mpagealloc.go
@@ -0,0 +1,951 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Page allocator.
+//
+// The page allocator manages mapped pages (defined by pageSize, NOT
+// physPageSize) for allocation and re-use. It is embedded into mheap.
+//
+// Pages are managed using a bitmap that is sharded into chunks.
+// In the bitmap, 1 means in-use, and 0 means free. The bitmap spans the
+// process's address space. Chunks are managed in a sparse-array-style structure
+// similar to mheap.arenas, since the bitmap may be large on some systems.
+//
+// The bitmap is efficiently searched by using a radix tree in combination
+// with fast bit-wise intrinsics. Allocation is performed using an address-ordered
+// first-fit approach.
+//
+// Each entry in the radix tree is a summary that describes three properties of
+// a particular region of the address space: the number of contiguous free pages
+// at the start and end of the region it represents, and the maximum number of
+// contiguous free pages found anywhere in that region.
+//
+// Each level of the radix tree is stored as one contiguous array, which represents
+// a different granularity of subdivision of the processes' address space. Thus, this
+// radix tree is actually implicit in these large arrays, as opposed to having explicit
+// dynamically-allocated pointer-based node structures. Naturally, these arrays may be
+// quite large for system with large address spaces, so in these cases they are mapped
+// into memory as needed. The leaf summaries of the tree correspond to a bitmap chunk.
+//
+// The root level (referred to as L0 and index 0 in pageAlloc.summary) has each
+// summary represent the largest section of address space (16 GiB on 64-bit systems),
+// with each subsequent level representing successively smaller subsections until we
+// reach the finest granularity at the leaves, a chunk.
+//
+// More specifically, each summary in each level (except for leaf summaries)
+// represents some number of entries in the following level. For example, each
+// summary in the root level may represent a 16 GiB region of address space,
+// and in the next level there could be 8 corresponding entries which represent 2
+// GiB subsections of that 16 GiB region, each of which could correspond to 8
+// entries in the next level which each represent 256 MiB regions, and so on.
+//
+// Thus, this design only scales to heaps so large, but can always be extended to
+// larger heaps by simply adding levels to the radix tree, which mostly costs
+// additional virtual address space. The choice of managing large arrays also means
+// that a large amount of virtual address space may be reserved by the runtime.
+
+package runtime
+
+import (
+ "runtime/internal/atomic"
+ "unsafe"
+)
+
+const (
+ // The size of a bitmap chunk, i.e. the amount of bits (that is, pages) to consider
+ // in the bitmap at once.
+ pallocChunkPages = 1 << logPallocChunkPages
+ pallocChunkBytes = pallocChunkPages * pageSize
+ logPallocChunkPages = 9
+ logPallocChunkBytes = logPallocChunkPages + pageShift
+
+ // The number of radix bits for each level.
+ //
+ // The value of 3 is chosen such that the block of summaries we need to scan at
+ // each level fits in 64 bytes (2^3 summaries * 8 bytes per summary), which is
+ // close to the L1 cache line width on many systems. Also, a value of 3 fits 4 tree
+ // levels perfectly into the 21-bit pallocBits summary field at the root level.
+ //
+ // The following equation explains how each of the constants relate:
+ // summaryL0Bits + (summaryLevels-1)*summaryLevelBits + logPallocChunkBytes = heapAddrBits
+ //
+ // summaryLevels is an architecture-dependent value defined in mpagealloc_*.go.
+ summaryLevelBits = 3
+ summaryL0Bits = heapAddrBits - logPallocChunkBytes - (summaryLevels-1)*summaryLevelBits
+
+ // pallocChunksL2Bits is the number of bits of the chunk index number
+ // covered by the second level of the chunks map.
+ //
+ // See (*pageAlloc).chunks for more details. Update the documentation
+ // there should this change.
+ pallocChunksL2Bits = heapAddrBits - logPallocChunkBytes - pallocChunksL1Bits
+ pallocChunksL1Shift = pallocChunksL2Bits
+)
+
+// Maximum searchAddr value, which indicates that the heap has no free space.
+//
+// We alias maxOffAddr just to make it clear that this is the maximum address
+// for the page allocator's search space. See maxOffAddr for details.
+var maxSearchAddr = maxOffAddr
+
+// Global chunk index.
+//
+// Represents an index into the leaf level of the radix tree.
+// Similar to arenaIndex, except instead of arenas, it divides the address
+// space into chunks.
+type chunkIdx uint
+
+// chunkIndex returns the global index of the palloc chunk containing the
+// pointer p.
+func chunkIndex(p uintptr) chunkIdx {
+ return chunkIdx((p - arenaBaseOffset) / pallocChunkBytes)
+}
+
+// chunkIndex returns the base address of the palloc chunk at index ci.
+func chunkBase(ci chunkIdx) uintptr {
+ return uintptr(ci)*pallocChunkBytes + arenaBaseOffset
+}
+
+// chunkPageIndex computes the index of the page that contains p,
+// relative to the chunk which contains p.
+func chunkPageIndex(p uintptr) uint {
+ return uint(p % pallocChunkBytes / pageSize)
+}
+
+// l1 returns the index into the first level of (*pageAlloc).chunks.
+func (i chunkIdx) l1() uint {
+ if pallocChunksL1Bits == 0 {
+ // Let the compiler optimize this away if there's no
+ // L1 map.
+ return 0
+ } else {
+ return uint(i) >> pallocChunksL1Shift
+ }
+}
+
+// l2 returns the index into the second level of (*pageAlloc).chunks.
+func (i chunkIdx) l2() uint {
+ if pallocChunksL1Bits == 0 {
+ return uint(i)
+ } else {
+ return uint(i) & (1<<pallocChunksL2Bits - 1)
+ }
+}
+
+// offAddrToLevelIndex converts an address in the offset address space
+// to the index into summary[level] containing addr.
+func offAddrToLevelIndex(level int, addr offAddr) int {
+ return int((addr.a - arenaBaseOffset) >> levelShift[level])
+}
+
+// levelIndexToOffAddr converts an index into summary[level] into
+// the corresponding address in the offset address space.
+func levelIndexToOffAddr(level, idx int) offAddr {
+ return offAddr{(uintptr(idx) << levelShift[level]) + arenaBaseOffset}
+}
+
+// addrsToSummaryRange converts base and limit pointers into a range
+// of entries for the given summary level.
+//
+// The returned range is inclusive on the lower bound and exclusive on
+// the upper bound.
+func addrsToSummaryRange(level int, base, limit uintptr) (lo int, hi int) {
+ // This is slightly more nuanced than just a shift for the exclusive
+ // upper-bound. Note that the exclusive upper bound may be within a
+ // summary at this level, meaning if we just do the obvious computation
+ // hi will end up being an inclusive upper bound. Unfortunately, just
+ // adding 1 to that is too broad since we might be on the very edge of
+ // of a summary's max page count boundary for this level
+ // (1 << levelLogPages[level]). So, make limit an inclusive upper bound
+ // then shift, then add 1, so we get an exclusive upper bound at the end.
+ lo = int((base - arenaBaseOffset) >> levelShift[level])
+ hi = int(((limit-1)-arenaBaseOffset)>>levelShift[level]) + 1
+ return
+}
+
+// blockAlignSummaryRange aligns indices into the given level to that
+// level's block width (1 << levelBits[level]). It assumes lo is inclusive
+// and hi is exclusive, and so aligns them down and up respectively.
+func blockAlignSummaryRange(level int, lo, hi int) (int, int) {
+ e := uintptr(1) << levelBits[level]
+ return int(alignDown(uintptr(lo), e)), int(alignUp(uintptr(hi), e))
+}
+
+type pageAlloc struct {
+ // Radix tree of summaries.
+ //
+ // Each slice's cap represents the whole memory reservation.
+ // Each slice's len reflects the allocator's maximum known
+ // mapped heap address for that level.
+ //
+ // The backing store of each summary level is reserved in init
+ // and may or may not be committed in grow (small address spaces
+ // may commit all the memory in init).
+ //
+ // The purpose of keeping len <= cap is to enforce bounds checks
+ // on the top end of the slice so that instead of an unknown
+ // runtime segmentation fault, we get a much friendlier out-of-bounds
+ // error.
+ //
+ // To iterate over a summary level, use inUse to determine which ranges
+ // are currently available. Otherwise one might try to access
+ // memory which is only Reserved which may result in a hard fault.
+ //
+ // We may still get segmentation faults < len since some of that
+ // memory may not be committed yet.
+ summary [summaryLevels][]pallocSum
+
+ // chunks is a slice of bitmap chunks.
+ //
+ // The total size of chunks is quite large on most 64-bit platforms
+ // (O(GiB) or more) if flattened, so rather than making one large mapping
+ // (which has problems on some platforms, even when PROT_NONE) we use a
+ // two-level sparse array approach similar to the arena index in mheap.
+ //
+ // To find the chunk containing a memory address `a`, do:
+ // chunkOf(chunkIndex(a))
+ //
+ // Below is a table describing the configuration for chunks for various
+ // heapAddrBits supported by the runtime.
+ //
+ // heapAddrBits | L1 Bits | L2 Bits | L2 Entry Size
+ // ------------------------------------------------
+ // 32 | 0 | 10 | 128 KiB
+ // 33 (iOS) | 0 | 11 | 256 KiB
+ // 48 | 13 | 13 | 1 MiB
+ //
+ // There's no reason to use the L1 part of chunks on 32-bit, the
+ // address space is small so the L2 is small. For platforms with a
+ // 48-bit address space, we pick the L1 such that the L2 is 1 MiB
+ // in size, which is a good balance between low granularity without
+ // making the impact on BSS too high (note the L1 is stored directly
+ // in pageAlloc).
+ //
+ // To iterate over the bitmap, use inUse to determine which ranges
+ // are currently available. Otherwise one might iterate over unused
+ // ranges.
+ //
+ // TODO(mknyszek): Consider changing the definition of the bitmap
+ // such that 1 means free and 0 means in-use so that summaries and
+ // the bitmaps align better on zero-values.
+ chunks [1 << pallocChunksL1Bits]*[1 << pallocChunksL2Bits]pallocData
+
+ // The address to start an allocation search with. It must never
+ // point to any memory that is not contained in inUse, i.e.
+ // inUse.contains(searchAddr) must always be true.
+ //
+ // When added with arenaBaseOffset, we guarantee that
+ // all valid heap addresses (when also added with
+ // arenaBaseOffset) below this value are allocated and
+ // not worth searching.
+ //
+ // Note that adding in arenaBaseOffset transforms addresses
+ // to a new address space with a linear view of the full address
+ // space on architectures with segmented address spaces.
+ searchAddr offAddr
+
+ // start and end represent the chunk indices
+ // which pageAlloc knows about. It assumes
+ // chunks in the range [start, end) are
+ // currently ready to use.
+ start, end chunkIdx
+
+ // inUse is a slice of ranges of address space which are
+ // known by the page allocator to be currently in-use (passed
+ // to grow).
+ //
+ // This field is currently unused on 32-bit architectures but
+ // is harmless to track. We care much more about having a
+ // contiguous heap in these cases and take additional measures
+ // to ensure that, so in nearly all cases this should have just
+ // 1 element.
+ //
+ // All access is protected by the mheapLock.
+ inUse addrRanges
+
+ // scav stores the scavenger state.
+ //
+ // All fields are protected by mheapLock.
+ scav struct {
+ // inUse is a slice of ranges of address space which have not
+ // yet been looked at by the scavenger.
+ inUse addrRanges
+
+ // gen is the scavenge generation number.
+ gen uint32
+
+ // reservationBytes is how large of a reservation should be made
+ // in bytes of address space for each scavenge iteration.
+ reservationBytes uintptr
+
+ // released is the amount of memory released this generation.
+ released uintptr
+
+ // scavLWM is the lowest (offset) address that the scavenger reached this
+ // scavenge generation.
+ scavLWM offAddr
+
+ // freeHWM is the highest (offset) address of a page that was freed to
+ // the page allocator this scavenge generation.
+ freeHWM offAddr
+ }
+
+ // mheap_.lock. This level of indirection makes it possible
+ // to test pageAlloc indepedently of the runtime allocator.
+ mheapLock *mutex
+
+ // sysStat is the runtime memstat to update when new system
+ // memory is committed by the pageAlloc for allocation metadata.
+ sysStat *uint64
+
+ // Whether or not this struct is being used in tests.
+ test bool
+}
+
+func (s *pageAlloc) init(mheapLock *mutex, sysStat *uint64) {
+ if levelLogPages[0] > logMaxPackedValue {
+ // We can't represent 1<<levelLogPages[0] pages, the maximum number
+ // of pages we need to represent at the root level, in a summary, which
+ // is a big problem. Throw.
+ print("runtime: root level max pages = ", 1<<levelLogPages[0], "\n")
+ print("runtime: summary max pages = ", maxPackedValue, "\n")
+ throw("root level max pages doesn't fit in summary")
+ }
+ s.sysStat = sysStat
+
+ // Initialize s.inUse.
+ s.inUse.init(sysStat)
+
+ // System-dependent initialization.
+ s.sysInit()
+
+ // Start with the searchAddr in a state indicating there's no free memory.
+ s.searchAddr = maxSearchAddr
+
+ // Set the mheapLock.
+ s.mheapLock = mheapLock
+
+ // Initialize scavenge tracking state.
+ s.scav.scavLWM = maxSearchAddr
+}
+
+// chunkOf returns the chunk at the given chunk index.
+func (s *pageAlloc) chunkOf(ci chunkIdx) *pallocData {
+ return &s.chunks[ci.l1()][ci.l2()]
+}
+
+// grow sets up the metadata for the address range [base, base+size).
+// It may allocate metadata, in which case *s.sysStat will be updated.
+//
+// s.mheapLock must be held.
+func (s *pageAlloc) grow(base, size uintptr) {
+ // Round up to chunks, since we can't deal with increments smaller
+ // than chunks. Also, sysGrow expects aligned values.
+ limit := alignUp(base+size, pallocChunkBytes)
+ base = alignDown(base, pallocChunkBytes)
+
+ // Grow the summary levels in a system-dependent manner.
+ // We just update a bunch of additional metadata here.
+ s.sysGrow(base, limit)
+
+ // Update s.start and s.end.
+ // If no growth happened yet, start == 0. This is generally
+ // safe since the zero page is unmapped.
+ firstGrowth := s.start == 0
+ start, end := chunkIndex(base), chunkIndex(limit)
+ if firstGrowth || start < s.start {
+ s.start = start
+ }
+ if end > s.end {
+ s.end = end
+ }
+ // Note that [base, limit) will never overlap with any existing
+ // range inUse because grow only ever adds never-used memory
+ // regions to the page allocator.
+ s.inUse.add(makeAddrRange(base, limit))
+
+ // A grow operation is a lot like a free operation, so if our
+ // chunk ends up below s.searchAddr, update s.searchAddr to the
+ // new address, just like in free.
+ if b := (offAddr{base}); b.lessThan(s.searchAddr) {
+ s.searchAddr = b
+ }
+
+ // Add entries into chunks, which is sparse, if needed. Then,
+ // initialize the bitmap.
+ //
+ // Newly-grown memory is always considered scavenged.
+ // Set all the bits in the scavenged bitmaps high.
+ for c := chunkIndex(base); c < chunkIndex(limit); c++ {
+ if s.chunks[c.l1()] == nil {
+ // Create the necessary l2 entry.
+ //
+ // Store it atomically to avoid races with readers which
+ // don't acquire the heap lock.
+ r := sysAlloc(unsafe.Sizeof(*s.chunks[0]), s.sysStat)
+ atomic.StorepNoWB(unsafe.Pointer(&s.chunks[c.l1()]), r)
+ }
+ s.chunkOf(c).scavenged.setRange(0, pallocChunkPages)
+ }
+
+ // Update summaries accordingly. The grow acts like a free, so
+ // we need to ensure this newly-free memory is visible in the
+ // summaries.
+ s.update(base, size/pageSize, true, false)
+}
+
+// update updates heap metadata. It must be called each time the bitmap
+// is updated.
+//
+// If contig is true, update does some optimizations assuming that there was
+// a contiguous allocation or free between addr and addr+npages. alloc indicates
+// whether the operation performed was an allocation or a free.
+//
+// s.mheapLock must be held.
+func (s *pageAlloc) update(base, npages uintptr, contig, alloc bool) {
+ // base, limit, start, and end are inclusive.
+ limit := base + npages*pageSize - 1
+ sc, ec := chunkIndex(base), chunkIndex(limit)
+
+ // Handle updating the lowest level first.
+ if sc == ec {
+ // Fast path: the allocation doesn't span more than one chunk,
+ // so update this one and if the summary didn't change, return.
+ x := s.summary[len(s.summary)-1][sc]
+ y := s.chunkOf(sc).summarize()
+ if x == y {
+ return
+ }
+ s.summary[len(s.summary)-1][sc] = y
+ } else if contig {
+ // Slow contiguous path: the allocation spans more than one chunk
+ // and at least one summary is guaranteed to change.
+ summary := s.summary[len(s.summary)-1]
+
+ // Update the summary for chunk sc.
+ summary[sc] = s.chunkOf(sc).summarize()
+
+ // Update the summaries for chunks in between, which are
+ // either totally allocated or freed.
+ whole := s.summary[len(s.summary)-1][sc+1 : ec]
+ if alloc {
+ // Should optimize into a memclr.
+ for i := range whole {
+ whole[i] = 0
+ }
+ } else {
+ for i := range whole {
+ whole[i] = freeChunkSum
+ }
+ }
+
+ // Update the summary for chunk ec.
+ summary[ec] = s.chunkOf(ec).summarize()
+ } else {
+ // Slow general path: the allocation spans more than one chunk
+ // and at least one summary is guaranteed to change.
+ //
+ // We can't assume a contiguous allocation happened, so walk over
+ // every chunk in the range and manually recompute the summary.
+ summary := s.summary[len(s.summary)-1]
+ for c := sc; c <= ec; c++ {
+ summary[c] = s.chunkOf(c).summarize()
+ }
+ }
+
+ // Walk up the radix tree and update the summaries appropriately.
+ changed := true
+ for l := len(s.summary) - 2; l >= 0 && changed; l-- {
+ // Update summaries at level l from summaries at level l+1.
+ changed = false
+
+ // "Constants" for the previous level which we
+ // need to compute the summary from that level.
+ logEntriesPerBlock := levelBits[l+1]
+ logMaxPages := levelLogPages[l+1]
+
+ // lo and hi describe all the parts of the level we need to look at.
+ lo, hi := addrsToSummaryRange(l, base, limit+1)
+
+ // Iterate over each block, updating the corresponding summary in the less-granular level.
+ for i := lo; i < hi; i++ {
+ children := s.summary[l+1][i<<logEntriesPerBlock : (i+1)<<logEntriesPerBlock]
+ sum := mergeSummaries(children, logMaxPages)
+ old := s.summary[l][i]
+ if old != sum {
+ changed = true
+ s.summary[l][i] = sum
+ }
+ }
+ }
+}
+
+// allocRange marks the range of memory [base, base+npages*pageSize) as
+// allocated. It also updates the summaries to reflect the newly-updated
+// bitmap.
+//
+// Returns the amount of scavenged memory in bytes present in the
+// allocated range.
+//
+// s.mheapLock must be held.
+func (s *pageAlloc) allocRange(base, npages uintptr) uintptr {
+ limit := base + npages*pageSize - 1
+ sc, ec := chunkIndex(base), chunkIndex(limit)
+ si, ei := chunkPageIndex(base), chunkPageIndex(limit)
+
+ scav := uint(0)
+ if sc == ec {
+ // The range doesn't cross any chunk boundaries.
+ chunk := s.chunkOf(sc)
+ scav += chunk.scavenged.popcntRange(si, ei+1-si)
+ chunk.allocRange(si, ei+1-si)
+ } else {
+ // The range crosses at least one chunk boundary.
+ chunk := s.chunkOf(sc)
+ scav += chunk.scavenged.popcntRange(si, pallocChunkPages-si)
+ chunk.allocRange(si, pallocChunkPages-si)
+ for c := sc + 1; c < ec; c++ {
+ chunk := s.chunkOf(c)
+ scav += chunk.scavenged.popcntRange(0, pallocChunkPages)
+ chunk.allocAll()
+ }
+ chunk = s.chunkOf(ec)
+ scav += chunk.scavenged.popcntRange(0, ei+1)
+ chunk.allocRange(0, ei+1)
+ }
+ s.update(base, npages, true, true)
+ return uintptr(scav) * pageSize
+}
+
+// find searches for the first (address-ordered) contiguous free region of
+// npages in size and returns a base address for that region.
+//
+// It uses s.searchAddr to prune its search and assumes that no palloc chunks
+// below chunkIndex(s.searchAddr) contain any free memory at all.
+//
+// find also computes and returns a candidate s.searchAddr, which may or
+// may not prune more of the address space than s.searchAddr already does.
+//
+// find represents the slow path and the full radix tree search.
+//
+// Returns a base address of 0 on failure, in which case the candidate
+// searchAddr returned is invalid and must be ignored.
+//
+// s.mheapLock must be held.
+func (s *pageAlloc) find(npages uintptr) (uintptr, offAddr) {
+ // Search algorithm.
+ //
+ // This algorithm walks each level l of the radix tree from the root level
+ // to the leaf level. It iterates over at most 1 << levelBits[l] of entries
+ // in a given level in the radix tree, and uses the summary information to
+ // find either:
+ // 1) That a given subtree contains a large enough contiguous region, at
+ // which point it continues iterating on the next level, or
+ // 2) That there are enough contiguous boundary-crossing bits to satisfy
+ // the allocation, at which point it knows exactly where to start
+ // allocating from.
+ //
+ // i tracks the index into the current level l's structure for the
+ // contiguous 1 << levelBits[l] entries we're actually interested in.
+ //
+ // NOTE: Technically this search could allocate a region which crosses
+ // the arenaBaseOffset boundary, which when arenaBaseOffset != 0, is
+ // a discontinuity. However, the only way this could happen is if the
+ // page at the zero address is mapped, and this is impossible on
+ // every system we support where arenaBaseOffset != 0. So, the
+ // discontinuity is already encoded in the fact that the OS will never
+ // map the zero page for us, and this function doesn't try to handle
+ // this case in any way.
+
+ // i is the beginning of the block of entries we're searching at the
+ // current level.
+ i := 0
+
+ // firstFree is the region of address space that we are certain to
+ // find the first free page in the heap. base and bound are the inclusive
+ // bounds of this window, and both are addresses in the linearized, contiguous
+ // view of the address space (with arenaBaseOffset pre-added). At each level,
+ // this window is narrowed as we find the memory region containing the
+ // first free page of memory. To begin with, the range reflects the
+ // full process address space.
+ //
+ // firstFree is updated by calling foundFree each time free space in the
+ // heap is discovered.
+ //
+ // At the end of the search, base.addr() is the best new
+ // searchAddr we could deduce in this search.
+ firstFree := struct {
+ base, bound offAddr
+ }{
+ base: minOffAddr,
+ bound: maxOffAddr,
+ }
+ // foundFree takes the given address range [addr, addr+size) and
+ // updates firstFree if it is a narrower range. The input range must
+ // either be fully contained within firstFree or not overlap with it
+ // at all.
+ //
+ // This way, we'll record the first summary we find with any free
+ // pages on the root level and narrow that down if we descend into
+ // that summary. But as soon as we need to iterate beyond that summary
+ // in a level to find a large enough range, we'll stop narrowing.
+ foundFree := func(addr offAddr, size uintptr) {
+ if firstFree.base.lessEqual(addr) && addr.add(size-1).lessEqual(firstFree.bound) {
+ // This range fits within the current firstFree window, so narrow
+ // down the firstFree window to the base and bound of this range.
+ firstFree.base = addr
+ firstFree.bound = addr.add(size - 1)
+ } else if !(addr.add(size-1).lessThan(firstFree.base) || firstFree.bound.lessThan(addr)) {
+ // This range only partially overlaps with the firstFree range,
+ // so throw.
+ print("runtime: addr = ", hex(addr.addr()), ", size = ", size, "\n")
+ print("runtime: base = ", hex(firstFree.base.addr()), ", bound = ", hex(firstFree.bound.addr()), "\n")
+ throw("range partially overlaps")
+ }
+ }
+
+ // lastSum is the summary which we saw on the previous level that made us
+ // move on to the next level. Used to print additional information in the
+ // case of a catastrophic failure.
+ // lastSumIdx is that summary's index in the previous level.
+ lastSum := packPallocSum(0, 0, 0)
+ lastSumIdx := -1
+
+nextLevel:
+ for l := 0; l < len(s.summary); l++ {
+ // For the root level, entriesPerBlock is the whole level.
+ entriesPerBlock := 1 << levelBits[l]
+ logMaxPages := levelLogPages[l]
+
+ // We've moved into a new level, so let's update i to our new
+ // starting index. This is a no-op for level 0.
+ i <<= levelBits[l]
+
+ // Slice out the block of entries we care about.
+ entries := s.summary[l][i : i+entriesPerBlock]
+
+ // Determine j0, the first index we should start iterating from.
+ // The searchAddr may help us eliminate iterations if we followed the
+ // searchAddr on the previous level or we're on the root leve, in which
+ // case the searchAddr should be the same as i after levelShift.
+ j0 := 0
+ if searchIdx := offAddrToLevelIndex(l, s.searchAddr); searchIdx&^(entriesPerBlock-1) == i {
+ j0 = searchIdx & (entriesPerBlock - 1)
+ }
+
+ // Run over the level entries looking for
+ // a contiguous run of at least npages either
+ // within an entry or across entries.
+ //
+ // base contains the page index (relative to
+ // the first entry's first page) of the currently
+ // considered run of consecutive pages.
+ //
+ // size contains the size of the currently considered
+ // run of consecutive pages.
+ var base, size uint
+ for j := j0; j < len(entries); j++ {
+ sum := entries[j]
+ if sum == 0 {
+ // A full entry means we broke any streak and
+ // that we should skip it altogether.
+ size = 0
+ continue
+ }
+
+ // We've encountered a non-zero summary which means
+ // free memory, so update firstFree.
+ foundFree(levelIndexToOffAddr(l, i+j), (uintptr(1)<<logMaxPages)*pageSize)
+
+ s := sum.start()
+ if size+s >= uint(npages) {
+ // If size == 0 we don't have a run yet,
+ // which means base isn't valid. So, set
+ // base to the first page in this block.
+ if size == 0 {
+ base = uint(j) << logMaxPages
+ }
+ // We hit npages; we're done!
+ size += s
+ break
+ }
+ if sum.max() >= uint(npages) {
+ // The entry itself contains npages contiguous
+ // free pages, so continue on the next level
+ // to find that run.
+ i += j
+ lastSumIdx = i
+ lastSum = sum
+ continue nextLevel
+ }
+ if size == 0 || s < 1<<logMaxPages {
+ // We either don't have a current run started, or this entry
+ // isn't totally free (meaning we can't continue the current
+ // one), so try to begin a new run by setting size and base
+ // based on sum.end.
+ size = sum.end()
+ base = uint(j+1)<<logMaxPages - size
+ continue
+ }
+ // The entry is completely free, so continue the run.
+ size += 1 << logMaxPages
+ }
+ if size >= uint(npages) {
+ // We found a sufficiently large run of free pages straddling
+ // some boundary, so compute the address and return it.
+ addr := levelIndexToOffAddr(l, i).add(uintptr(base) * pageSize).addr()
+ return addr, firstFree.base
+ }
+ if l == 0 {
+ // We're at level zero, so that means we've exhausted our search.
+ return 0, maxSearchAddr
+ }
+
+ // We're not at level zero, and we exhausted the level we were looking in.
+ // This means that either our calculations were wrong or the level above
+ // lied to us. In either case, dump some useful state and throw.
+ print("runtime: summary[", l-1, "][", lastSumIdx, "] = ", lastSum.start(), ", ", lastSum.max(), ", ", lastSum.end(), "\n")
+ print("runtime: level = ", l, ", npages = ", npages, ", j0 = ", j0, "\n")
+ print("runtime: s.searchAddr = ", hex(s.searchAddr.addr()), ", i = ", i, "\n")
+ print("runtime: levelShift[level] = ", levelShift[l], ", levelBits[level] = ", levelBits[l], "\n")
+ for j := 0; j < len(entries); j++ {
+ sum := entries[j]
+ print("runtime: summary[", l, "][", i+j, "] = (", sum.start(), ", ", sum.max(), ", ", sum.end(), ")\n")
+ }
+ throw("bad summary data")
+ }
+
+ // Since we've gotten to this point, that means we haven't found a
+ // sufficiently-sized free region straddling some boundary (chunk or larger).
+ // This means the last summary we inspected must have had a large enough "max"
+ // value, so look inside the chunk to find a suitable run.
+ //
+ // After iterating over all levels, i must contain a chunk index which
+ // is what the final level represents.
+ ci := chunkIdx(i)
+ j, searchIdx := s.chunkOf(ci).find(npages, 0)
+ if j == ^uint(0) {
+ // We couldn't find any space in this chunk despite the summaries telling
+ // us it should be there. There's likely a bug, so dump some state and throw.
+ sum := s.summary[len(s.summary)-1][i]
+ print("runtime: summary[", len(s.summary)-1, "][", i, "] = (", sum.start(), ", ", sum.max(), ", ", sum.end(), ")\n")
+ print("runtime: npages = ", npages, "\n")
+ throw("bad summary data")
+ }
+
+ // Compute the address at which the free space starts.
+ addr := chunkBase(ci) + uintptr(j)*pageSize
+
+ // Since we actually searched the chunk, we may have
+ // found an even narrower free window.
+ searchAddr := chunkBase(ci) + uintptr(searchIdx)*pageSize
+ foundFree(offAddr{searchAddr}, chunkBase(ci+1)-searchAddr)
+ return addr, firstFree.base
+}
+
+// alloc allocates npages worth of memory from the page heap, returning the base
+// address for the allocation and the amount of scavenged memory in bytes
+// contained in the region [base address, base address + npages*pageSize).
+//
+// Returns a 0 base address on failure, in which case other returned values
+// should be ignored.
+//
+// s.mheapLock must be held.
+func (s *pageAlloc) alloc(npages uintptr) (addr uintptr, scav uintptr) {
+ // If the searchAddr refers to a region which has a higher address than
+ // any known chunk, then we know we're out of memory.
+ if chunkIndex(s.searchAddr.addr()) >= s.end {
+ return 0, 0
+ }
+
+ // If npages has a chance of fitting in the chunk where the searchAddr is,
+ // search it directly.
+ searchAddr := minOffAddr
+ if pallocChunkPages-chunkPageIndex(s.searchAddr.addr()) >= uint(npages) {
+ // npages is guaranteed to be no greater than pallocChunkPages here.
+ i := chunkIndex(s.searchAddr.addr())
+ if max := s.summary[len(s.summary)-1][i].max(); max >= uint(npages) {
+ j, searchIdx := s.chunkOf(i).find(npages, chunkPageIndex(s.searchAddr.addr()))
+ if j == ^uint(0) {
+ print("runtime: max = ", max, ", npages = ", npages, "\n")
+ print("runtime: searchIdx = ", chunkPageIndex(s.searchAddr.addr()), ", s.searchAddr = ", hex(s.searchAddr.addr()), "\n")
+ throw("bad summary data")
+ }
+ addr = chunkBase(i) + uintptr(j)*pageSize
+ searchAddr = offAddr{chunkBase(i) + uintptr(searchIdx)*pageSize}
+ goto Found
+ }
+ }
+ // We failed to use a searchAddr for one reason or another, so try
+ // the slow path.
+ addr, searchAddr = s.find(npages)
+ if addr == 0 {
+ if npages == 1 {
+ // We failed to find a single free page, the smallest unit
+ // of allocation. This means we know the heap is completely
+ // exhausted. Otherwise, the heap still might have free
+ // space in it, just not enough contiguous space to
+ // accommodate npages.
+ s.searchAddr = maxSearchAddr
+ }
+ return 0, 0
+ }
+Found:
+ // Go ahead and actually mark the bits now that we have an address.
+ scav = s.allocRange(addr, npages)
+
+ // If we found a higher searchAddr, we know that all the
+ // heap memory before that searchAddr in an offset address space is
+ // allocated, so bump s.searchAddr up to the new one.
+ if s.searchAddr.lessThan(searchAddr) {
+ s.searchAddr = searchAddr
+ }
+ return addr, scav
+}
+
+// free returns npages worth of memory starting at base back to the page heap.
+//
+// s.mheapLock must be held.
+func (s *pageAlloc) free(base, npages uintptr) {
+ // If we're freeing pages below the s.searchAddr, update searchAddr.
+ if b := (offAddr{base}); b.lessThan(s.searchAddr) {
+ s.searchAddr = b
+ }
+ // Update the free high watermark for the scavenger.
+ limit := base + npages*pageSize - 1
+ if offLimit := (offAddr{limit}); s.scav.freeHWM.lessThan(offLimit) {
+ s.scav.freeHWM = offLimit
+ }
+ if npages == 1 {
+ // Fast path: we're clearing a single bit, and we know exactly
+ // where it is, so mark it directly.
+ i := chunkIndex(base)
+ s.chunkOf(i).free1(chunkPageIndex(base))
+ } else {
+ // Slow path: we're clearing more bits so we may need to iterate.
+ sc, ec := chunkIndex(base), chunkIndex(limit)
+ si, ei := chunkPageIndex(base), chunkPageIndex(limit)
+
+ if sc == ec {
+ // The range doesn't cross any chunk boundaries.
+ s.chunkOf(sc).free(si, ei+1-si)
+ } else {
+ // The range crosses at least one chunk boundary.
+ s.chunkOf(sc).free(si, pallocChunkPages-si)
+ for c := sc + 1; c < ec; c++ {
+ s.chunkOf(c).freeAll()
+ }
+ s.chunkOf(ec).free(0, ei+1)
+ }
+ }
+ s.update(base, npages, true, false)
+}
+
+const (
+ pallocSumBytes = unsafe.Sizeof(pallocSum(0))
+
+ // maxPackedValue is the maximum value that any of the three fields in
+ // the pallocSum may take on.
+ maxPackedValue = 1 << logMaxPackedValue
+ logMaxPackedValue = logPallocChunkPages + (summaryLevels-1)*summaryLevelBits
+
+ freeChunkSum = pallocSum(uint64(pallocChunkPages) |
+ uint64(pallocChunkPages<<logMaxPackedValue) |
+ uint64(pallocChunkPages<<(2*logMaxPackedValue)))
+)
+
+// pallocSum is a packed summary type which packs three numbers: start, max,
+// and end into a single 8-byte value. Each of these values are a summary of
+// a bitmap and are thus counts, each of which may have a maximum value of
+// 2^21 - 1, or all three may be equal to 2^21. The latter case is represented
+// by just setting the 64th bit.
+type pallocSum uint64
+
+// packPallocSum takes a start, max, and end value and produces a pallocSum.
+func packPallocSum(start, max, end uint) pallocSum {
+ if max == maxPackedValue {
+ return pallocSum(uint64(1 << 63))
+ }
+ return pallocSum((uint64(start) & (maxPackedValue - 1)) |
+ ((uint64(max) & (maxPackedValue - 1)) << logMaxPackedValue) |
+ ((uint64(end) & (maxPackedValue - 1)) << (2 * logMaxPackedValue)))
+}
+
+// start extracts the start value from a packed sum.
+func (p pallocSum) start() uint {
+ if uint64(p)&uint64(1<<63) != 0 {
+ return maxPackedValue
+ }
+ return uint(uint64(p) & (maxPackedValue - 1))
+}
+
+// max extracts the max value from a packed sum.
+func (p pallocSum) max() uint {
+ if uint64(p)&uint64(1<<63) != 0 {
+ return maxPackedValue
+ }
+ return uint((uint64(p) >> logMaxPackedValue) & (maxPackedValue - 1))
+}
+
+// end extracts the end value from a packed sum.
+func (p pallocSum) end() uint {
+ if uint64(p)&uint64(1<<63) != 0 {
+ return maxPackedValue
+ }
+ return uint((uint64(p) >> (2 * logMaxPackedValue)) & (maxPackedValue - 1))
+}
+
+// unpack unpacks all three values from the summary.
+func (p pallocSum) unpack() (uint, uint, uint) {
+ if uint64(p)&uint64(1<<63) != 0 {
+ return maxPackedValue, maxPackedValue, maxPackedValue
+ }
+ return uint(uint64(p) & (maxPackedValue - 1)),
+ uint((uint64(p) >> logMaxPackedValue) & (maxPackedValue - 1)),
+ uint((uint64(p) >> (2 * logMaxPackedValue)) & (maxPackedValue - 1))
+}
+
+// mergeSummaries merges consecutive summaries which may each represent at
+// most 1 << logMaxPagesPerSum pages each together into one.
+func mergeSummaries(sums []pallocSum, logMaxPagesPerSum uint) pallocSum {
+ // Merge the summaries in sums into one.
+ //
+ // We do this by keeping a running summary representing the merged
+ // summaries of sums[:i] in start, max, and end.
+ start, max, end := sums[0].unpack()
+ for i := 1; i < len(sums); i++ {
+ // Merge in sums[i].
+ si, mi, ei := sums[i].unpack()
+
+ // Merge in sums[i].start only if the running summary is
+ // completely free, otherwise this summary's start
+ // plays no role in the combined sum.
+ if start == uint(i)<<logMaxPagesPerSum {
+ start += si
+ }
+
+ // Recompute the max value of the running sum by looking
+ // across the boundary between the running sum and sums[i]
+ // and at the max sums[i], taking the greatest of those two
+ // and the max of the running sum.
+ if end+si > max {
+ max = end + si
+ }
+ if mi > max {
+ max = mi
+ }
+
+ // Merge in end by checking if this new summary is totally
+ // free. If it is, then we want to extend the running sum's
+ // end by the new summary. If not, then we have some alloc'd
+ // pages in there and we just want to take the end value in
+ // sums[i].
+ if ei == 1<<logMaxPagesPerSum {
+ end += 1 << logMaxPagesPerSum
+ } else {
+ end = ei
+ }
+ }
+ return packPallocSum(start, max, end)
+}
diff --git a/src/runtime/mpagealloc_32bit.go b/src/runtime/mpagealloc_32bit.go
new file mode 100644
index 0000000..6658a90
--- /dev/null
+++ b/src/runtime/mpagealloc_32bit.go
@@ -0,0 +1,116 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build 386 arm mips mipsle wasm darwin,arm64
+
+// wasm is a treated as a 32-bit architecture for the purposes of the page
+// allocator, even though it has 64-bit pointers. This is because any wasm
+// pointer always has its top 32 bits as zero, so the effective heap address
+// space is only 2^32 bytes in size (see heapAddrBits).
+
+// darwin/arm64 is treated as a 32-bit architecture for the purposes of the
+// page allocator, even though it has 64-bit pointers and a 33-bit address
+// space (see heapAddrBits). The 33 bit address space cannot be rounded up
+// to 64 bits because there are too many summary levels to fit in just 33
+// bits.
+
+package runtime
+
+import "unsafe"
+
+const (
+ // The number of levels in the radix tree.
+ summaryLevels = 4
+
+ // Constants for testing.
+ pageAlloc32Bit = 1
+ pageAlloc64Bit = 0
+
+ // Number of bits needed to represent all indices into the L1 of the
+ // chunks map.
+ //
+ // See (*pageAlloc).chunks for more details. Update the documentation
+ // there should this number change.
+ pallocChunksL1Bits = 0
+)
+
+// See comment in mpagealloc_64bit.go.
+var levelBits = [summaryLevels]uint{
+ summaryL0Bits,
+ summaryLevelBits,
+ summaryLevelBits,
+ summaryLevelBits,
+}
+
+// See comment in mpagealloc_64bit.go.
+var levelShift = [summaryLevels]uint{
+ heapAddrBits - summaryL0Bits,
+ heapAddrBits - summaryL0Bits - 1*summaryLevelBits,
+ heapAddrBits - summaryL0Bits - 2*summaryLevelBits,
+ heapAddrBits - summaryL0Bits - 3*summaryLevelBits,
+}
+
+// See comment in mpagealloc_64bit.go.
+var levelLogPages = [summaryLevels]uint{
+ logPallocChunkPages + 3*summaryLevelBits,
+ logPallocChunkPages + 2*summaryLevelBits,
+ logPallocChunkPages + 1*summaryLevelBits,
+ logPallocChunkPages,
+}
+
+// See mpagealloc_64bit.go for details.
+func (s *pageAlloc) sysInit() {
+ // Calculate how much memory all our entries will take up.
+ //
+ // This should be around 12 KiB or less.
+ totalSize := uintptr(0)
+ for l := 0; l < summaryLevels; l++ {
+ totalSize += (uintptr(1) << (heapAddrBits - levelShift[l])) * pallocSumBytes
+ }
+ totalSize = alignUp(totalSize, physPageSize)
+
+ // Reserve memory for all levels in one go. There shouldn't be much for 32-bit.
+ reservation := sysReserve(nil, totalSize)
+ if reservation == nil {
+ throw("failed to reserve page summary memory")
+ }
+ // There isn't much. Just map it and mark it as used immediately.
+ sysMap(reservation, totalSize, s.sysStat)
+ sysUsed(reservation, totalSize)
+
+ // Iterate over the reservation and cut it up into slices.
+ //
+ // Maintain i as the byte offset from reservation where
+ // the new slice should start.
+ for l, shift := range levelShift {
+ entries := 1 << (heapAddrBits - shift)
+
+ // Put this reservation into a slice.
+ sl := notInHeapSlice{(*notInHeap)(reservation), 0, entries}
+ s.summary[l] = *(*[]pallocSum)(unsafe.Pointer(&sl))
+
+ reservation = add(reservation, uintptr(entries)*pallocSumBytes)
+ }
+}
+
+// See mpagealloc_64bit.go for details.
+func (s *pageAlloc) sysGrow(base, limit uintptr) {
+ if base%pallocChunkBytes != 0 || limit%pallocChunkBytes != 0 {
+ print("runtime: base = ", hex(base), ", limit = ", hex(limit), "\n")
+ throw("sysGrow bounds not aligned to pallocChunkBytes")
+ }
+
+ // Walk up the tree and update the summary slices.
+ for l := len(s.summary) - 1; l >= 0; l-- {
+ // Figure out what part of the summary array this new address space needs.
+ // Note that we need to align the ranges to the block width (1<<levelBits[l])
+ // at this level because the full block is needed to compute the summary for
+ // the next level.
+ lo, hi := addrsToSummaryRange(l, base, limit)
+ _, hi = blockAlignSummaryRange(l, lo, hi)
+ if hi > len(s.summary[l]) {
+ s.summary[l] = s.summary[l][:hi]
+ }
+ }
+}
diff --git a/src/runtime/mpagealloc_64bit.go b/src/runtime/mpagealloc_64bit.go
new file mode 100644
index 0000000..831626e
--- /dev/null
+++ b/src/runtime/mpagealloc_64bit.go
@@ -0,0 +1,180 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build amd64 !darwin,arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x
+
+// See mpagealloc_32bit.go for why darwin/arm64 is excluded here.
+
+package runtime
+
+import "unsafe"
+
+const (
+ // The number of levels in the radix tree.
+ summaryLevels = 5
+
+ // Constants for testing.
+ pageAlloc32Bit = 0
+ pageAlloc64Bit = 1
+
+ // Number of bits needed to represent all indices into the L1 of the
+ // chunks map.
+ //
+ // See (*pageAlloc).chunks for more details. Update the documentation
+ // there should this number change.
+ pallocChunksL1Bits = 13
+)
+
+// levelBits is the number of bits in the radix for a given level in the super summary
+// structure.
+//
+// The sum of all the entries of levelBits should equal heapAddrBits.
+var levelBits = [summaryLevels]uint{
+ summaryL0Bits,
+ summaryLevelBits,
+ summaryLevelBits,
+ summaryLevelBits,
+ summaryLevelBits,
+}
+
+// levelShift is the number of bits to shift to acquire the radix for a given level
+// in the super summary structure.
+//
+// With levelShift, one can compute the index of the summary at level l related to a
+// pointer p by doing:
+// p >> levelShift[l]
+var levelShift = [summaryLevels]uint{
+ heapAddrBits - summaryL0Bits,
+ heapAddrBits - summaryL0Bits - 1*summaryLevelBits,
+ heapAddrBits - summaryL0Bits - 2*summaryLevelBits,
+ heapAddrBits - summaryL0Bits - 3*summaryLevelBits,
+ heapAddrBits - summaryL0Bits - 4*summaryLevelBits,
+}
+
+// levelLogPages is log2 the maximum number of runtime pages in the address space
+// a summary in the given level represents.
+//
+// The leaf level always represents exactly log2 of 1 chunk's worth of pages.
+var levelLogPages = [summaryLevels]uint{
+ logPallocChunkPages + 4*summaryLevelBits,
+ logPallocChunkPages + 3*summaryLevelBits,
+ logPallocChunkPages + 2*summaryLevelBits,
+ logPallocChunkPages + 1*summaryLevelBits,
+ logPallocChunkPages,
+}
+
+// sysInit performs architecture-dependent initialization of fields
+// in pageAlloc. pageAlloc should be uninitialized except for sysStat
+// if any runtime statistic should be updated.
+func (s *pageAlloc) sysInit() {
+ // Reserve memory for each level. This will get mapped in
+ // as R/W by setArenas.
+ for l, shift := range levelShift {
+ entries := 1 << (heapAddrBits - shift)
+
+ // Reserve b bytes of memory anywhere in the address space.
+ b := alignUp(uintptr(entries)*pallocSumBytes, physPageSize)
+ r := sysReserve(nil, b)
+ if r == nil {
+ throw("failed to reserve page summary memory")
+ }
+
+ // Put this reservation into a slice.
+ sl := notInHeapSlice{(*notInHeap)(r), 0, entries}
+ s.summary[l] = *(*[]pallocSum)(unsafe.Pointer(&sl))
+ }
+}
+
+// sysGrow performs architecture-dependent operations on heap
+// growth for the page allocator, such as mapping in new memory
+// for summaries. It also updates the length of the slices in
+// s.summary.
+//
+// base is the base of the newly-added heap memory and limit is
+// the first address past the end of the newly-added heap memory.
+// Both must be aligned to pallocChunkBytes.
+//
+// The caller must update s.start and s.end after calling sysGrow.
+func (s *pageAlloc) sysGrow(base, limit uintptr) {
+ if base%pallocChunkBytes != 0 || limit%pallocChunkBytes != 0 {
+ print("runtime: base = ", hex(base), ", limit = ", hex(limit), "\n")
+ throw("sysGrow bounds not aligned to pallocChunkBytes")
+ }
+
+ // addrRangeToSummaryRange converts a range of addresses into a range
+ // of summary indices which must be mapped to support those addresses
+ // in the summary range.
+ addrRangeToSummaryRange := func(level int, r addrRange) (int, int) {
+ sumIdxBase, sumIdxLimit := addrsToSummaryRange(level, r.base.addr(), r.limit.addr())
+ return blockAlignSummaryRange(level, sumIdxBase, sumIdxLimit)
+ }
+
+ // summaryRangeToSumAddrRange converts a range of indices in any
+ // level of s.summary into page-aligned addresses which cover that
+ // range of indices.
+ summaryRangeToSumAddrRange := func(level, sumIdxBase, sumIdxLimit int) addrRange {
+ baseOffset := alignDown(uintptr(sumIdxBase)*pallocSumBytes, physPageSize)
+ limitOffset := alignUp(uintptr(sumIdxLimit)*pallocSumBytes, physPageSize)
+ base := unsafe.Pointer(&s.summary[level][0])
+ return addrRange{
+ offAddr{uintptr(add(base, baseOffset))},
+ offAddr{uintptr(add(base, limitOffset))},
+ }
+ }
+
+ // addrRangeToSumAddrRange is a convienience function that converts
+ // an address range r to the address range of the given summary level
+ // that stores the summaries for r.
+ addrRangeToSumAddrRange := func(level int, r addrRange) addrRange {
+ sumIdxBase, sumIdxLimit := addrRangeToSummaryRange(level, r)
+ return summaryRangeToSumAddrRange(level, sumIdxBase, sumIdxLimit)
+ }
+
+ // Find the first inUse index which is strictly greater than base.
+ //
+ // Because this function will never be asked remap the same memory
+ // twice, this index is effectively the index at which we would insert
+ // this new growth, and base will never overlap/be contained within
+ // any existing range.
+ //
+ // This will be used to look at what memory in the summary array is already
+ // mapped before and after this new range.
+ inUseIndex := s.inUse.findSucc(base)
+
+ // Walk up the radix tree and map summaries in as needed.
+ for l := range s.summary {
+ // Figure out what part of the summary array this new address space needs.
+ needIdxBase, needIdxLimit := addrRangeToSummaryRange(l, makeAddrRange(base, limit))
+
+ // Update the summary slices with a new upper-bound. This ensures
+ // we get tight bounds checks on at least the top bound.
+ //
+ // We must do this regardless of whether we map new memory.
+ if needIdxLimit > len(s.summary[l]) {
+ s.summary[l] = s.summary[l][:needIdxLimit]
+ }
+
+ // Compute the needed address range in the summary array for level l.
+ need := summaryRangeToSumAddrRange(l, needIdxBase, needIdxLimit)
+
+ // Prune need down to what needs to be newly mapped. Some parts of it may
+ // already be mapped by what inUse describes due to page alignment requirements
+ // for mapping. prune's invariants are guaranteed by the fact that this
+ // function will never be asked to remap the same memory twice.
+ if inUseIndex > 0 {
+ need = need.subtract(addrRangeToSumAddrRange(l, s.inUse.ranges[inUseIndex-1]))
+ }
+ if inUseIndex < len(s.inUse.ranges) {
+ need = need.subtract(addrRangeToSumAddrRange(l, s.inUse.ranges[inUseIndex]))
+ }
+ // It's possible that after our pruning above, there's nothing new to map.
+ if need.size() == 0 {
+ continue
+ }
+
+ // Map and commit need.
+ sysMap(unsafe.Pointer(need.base.addr()), need.size(), s.sysStat)
+ sysUsed(unsafe.Pointer(need.base.addr()), need.size())
+ }
+}
diff --git a/src/runtime/mpagealloc_test.go b/src/runtime/mpagealloc_test.go
new file mode 100644
index 0000000..89a4a25
--- /dev/null
+++ b/src/runtime/mpagealloc_test.go
@@ -0,0 +1,978 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "fmt"
+ . "runtime"
+ "testing"
+)
+
+func checkPageAlloc(t *testing.T, want, got *PageAlloc) {
+ // Ensure start and end are correct.
+ wantStart, wantEnd := want.Bounds()
+ gotStart, gotEnd := got.Bounds()
+ if gotStart != wantStart {
+ t.Fatalf("start values not equal: got %d, want %d", gotStart, wantStart)
+ }
+ if gotEnd != wantEnd {
+ t.Fatalf("end values not equal: got %d, want %d", gotEnd, wantEnd)
+ }
+
+ for i := gotStart; i < gotEnd; i++ {
+ // Check the bitmaps. Note that we may have nil data.
+ gb, wb := got.PallocData(i), want.PallocData(i)
+ if gb == nil && wb == nil {
+ continue
+ }
+ if (gb == nil && wb != nil) || (gb != nil && wb == nil) {
+ t.Errorf("chunk %d nilness mismatch", i)
+ }
+ if !checkPallocBits(t, gb.PallocBits(), wb.PallocBits()) {
+ t.Logf("in chunk %d (mallocBits)", i)
+ }
+ if !checkPallocBits(t, gb.Scavenged(), wb.Scavenged()) {
+ t.Logf("in chunk %d (scavenged)", i)
+ }
+ }
+ // TODO(mknyszek): Verify summaries too?
+}
+
+func TestPageAllocGrow(t *testing.T) {
+ if GOOS == "openbsd" && testing.Short() {
+ t.Skip("skipping because virtual memory is limited; see #36210")
+ }
+ type test struct {
+ chunks []ChunkIdx
+ inUse []AddrRange
+ }
+ tests := map[string]test{
+ "One": {
+ chunks: []ChunkIdx{
+ BaseChunkIdx,
+ },
+ inUse: []AddrRange{
+ {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)},
+ },
+ },
+ "Contiguous2": {
+ chunks: []ChunkIdx{
+ BaseChunkIdx,
+ BaseChunkIdx + 1,
+ },
+ inUse: []AddrRange{
+ {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+2, 0)},
+ },
+ },
+ "Contiguous5": {
+ chunks: []ChunkIdx{
+ BaseChunkIdx,
+ BaseChunkIdx + 1,
+ BaseChunkIdx + 2,
+ BaseChunkIdx + 3,
+ BaseChunkIdx + 4,
+ },
+ inUse: []AddrRange{
+ {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+5, 0)},
+ },
+ },
+ "Discontiguous": {
+ chunks: []ChunkIdx{
+ BaseChunkIdx,
+ BaseChunkIdx + 2,
+ BaseChunkIdx + 4,
+ },
+ inUse: []AddrRange{
+ {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)},
+ {PageBase(BaseChunkIdx+2, 0), PageBase(BaseChunkIdx+3, 0)},
+ {PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+5, 0)},
+ },
+ },
+ "Mixed": {
+ chunks: []ChunkIdx{
+ BaseChunkIdx,
+ BaseChunkIdx + 1,
+ BaseChunkIdx + 2,
+ BaseChunkIdx + 4,
+ },
+ inUse: []AddrRange{
+ {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+3, 0)},
+ {PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+5, 0)},
+ },
+ },
+ "WildlyDiscontiguous": {
+ chunks: []ChunkIdx{
+ BaseChunkIdx,
+ BaseChunkIdx + 1,
+ BaseChunkIdx + 0x10,
+ BaseChunkIdx + 0x21,
+ },
+ inUse: []AddrRange{
+ {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+2, 0)},
+ {PageBase(BaseChunkIdx+0x10, 0), PageBase(BaseChunkIdx+0x11, 0)},
+ {PageBase(BaseChunkIdx+0x21, 0), PageBase(BaseChunkIdx+0x22, 0)},
+ },
+ },
+ "ManyDiscontiguous": {
+ // The initial cap is 16. Test 33 ranges, to exercise the growth path (twice).
+ chunks: []ChunkIdx{
+ BaseChunkIdx, BaseChunkIdx + 2, BaseChunkIdx + 4, BaseChunkIdx + 6,
+ BaseChunkIdx + 8, BaseChunkIdx + 10, BaseChunkIdx + 12, BaseChunkIdx + 14,
+ BaseChunkIdx + 16, BaseChunkIdx + 18, BaseChunkIdx + 20, BaseChunkIdx + 22,
+ BaseChunkIdx + 24, BaseChunkIdx + 26, BaseChunkIdx + 28, BaseChunkIdx + 30,
+ BaseChunkIdx + 32, BaseChunkIdx + 34, BaseChunkIdx + 36, BaseChunkIdx + 38,
+ BaseChunkIdx + 40, BaseChunkIdx + 42, BaseChunkIdx + 44, BaseChunkIdx + 46,
+ BaseChunkIdx + 48, BaseChunkIdx + 50, BaseChunkIdx + 52, BaseChunkIdx + 54,
+ BaseChunkIdx + 56, BaseChunkIdx + 58, BaseChunkIdx + 60, BaseChunkIdx + 62,
+ BaseChunkIdx + 64,
+ },
+ inUse: []AddrRange{
+ {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)},
+ {PageBase(BaseChunkIdx+2, 0), PageBase(BaseChunkIdx+3, 0)},
+ {PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+5, 0)},
+ {PageBase(BaseChunkIdx+6, 0), PageBase(BaseChunkIdx+7, 0)},
+ {PageBase(BaseChunkIdx+8, 0), PageBase(BaseChunkIdx+9, 0)},
+ {PageBase(BaseChunkIdx+10, 0), PageBase(BaseChunkIdx+11, 0)},
+ {PageBase(BaseChunkIdx+12, 0), PageBase(BaseChunkIdx+13, 0)},
+ {PageBase(BaseChunkIdx+14, 0), PageBase(BaseChunkIdx+15, 0)},
+ {PageBase(BaseChunkIdx+16, 0), PageBase(BaseChunkIdx+17, 0)},
+ {PageBase(BaseChunkIdx+18, 0), PageBase(BaseChunkIdx+19, 0)},
+ {PageBase(BaseChunkIdx+20, 0), PageBase(BaseChunkIdx+21, 0)},
+ {PageBase(BaseChunkIdx+22, 0), PageBase(BaseChunkIdx+23, 0)},
+ {PageBase(BaseChunkIdx+24, 0), PageBase(BaseChunkIdx+25, 0)},
+ {PageBase(BaseChunkIdx+26, 0), PageBase(BaseChunkIdx+27, 0)},
+ {PageBase(BaseChunkIdx+28, 0), PageBase(BaseChunkIdx+29, 0)},
+ {PageBase(BaseChunkIdx+30, 0), PageBase(BaseChunkIdx+31, 0)},
+ {PageBase(BaseChunkIdx+32, 0), PageBase(BaseChunkIdx+33, 0)},
+ {PageBase(BaseChunkIdx+34, 0), PageBase(BaseChunkIdx+35, 0)},
+ {PageBase(BaseChunkIdx+36, 0), PageBase(BaseChunkIdx+37, 0)},
+ {PageBase(BaseChunkIdx+38, 0), PageBase(BaseChunkIdx+39, 0)},
+ {PageBase(BaseChunkIdx+40, 0), PageBase(BaseChunkIdx+41, 0)},
+ {PageBase(BaseChunkIdx+42, 0), PageBase(BaseChunkIdx+43, 0)},
+ {PageBase(BaseChunkIdx+44, 0), PageBase(BaseChunkIdx+45, 0)},
+ {PageBase(BaseChunkIdx+46, 0), PageBase(BaseChunkIdx+47, 0)},
+ {PageBase(BaseChunkIdx+48, 0), PageBase(BaseChunkIdx+49, 0)},
+ {PageBase(BaseChunkIdx+50, 0), PageBase(BaseChunkIdx+51, 0)},
+ {PageBase(BaseChunkIdx+52, 0), PageBase(BaseChunkIdx+53, 0)},
+ {PageBase(BaseChunkIdx+54, 0), PageBase(BaseChunkIdx+55, 0)},
+ {PageBase(BaseChunkIdx+56, 0), PageBase(BaseChunkIdx+57, 0)},
+ {PageBase(BaseChunkIdx+58, 0), PageBase(BaseChunkIdx+59, 0)},
+ {PageBase(BaseChunkIdx+60, 0), PageBase(BaseChunkIdx+61, 0)},
+ {PageBase(BaseChunkIdx+62, 0), PageBase(BaseChunkIdx+63, 0)},
+ {PageBase(BaseChunkIdx+64, 0), PageBase(BaseChunkIdx+65, 0)},
+ },
+ },
+ }
+ if PageAlloc64Bit != 0 {
+ tests["ExtremelyDiscontiguous"] = test{
+ chunks: []ChunkIdx{
+ BaseChunkIdx,
+ BaseChunkIdx + 0x100000, // constant translates to O(TiB)
+ },
+ inUse: []AddrRange{
+ {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)},
+ {PageBase(BaseChunkIdx+0x100000, 0), PageBase(BaseChunkIdx+0x100001, 0)},
+ },
+ }
+ }
+ for name, v := range tests {
+ v := v
+ t.Run(name, func(t *testing.T) {
+ // By creating a new pageAlloc, we will
+ // grow it for each chunk defined in x.
+ x := make(map[ChunkIdx][]BitRange)
+ for _, c := range v.chunks {
+ x[c] = []BitRange{}
+ }
+ b := NewPageAlloc(x, nil)
+ defer FreePageAlloc(b)
+
+ got := b.InUse()
+ want := v.inUse
+
+ // Check for mismatches.
+ if len(got) != len(want) {
+ t.Fail()
+ } else {
+ for i := range want {
+ if want[i] != got[i] {
+ t.Fail()
+ break
+ }
+ }
+ }
+ if t.Failed() {
+ t.Logf("found inUse mismatch")
+ t.Logf("got:")
+ for i, r := range got {
+ t.Logf("\t#%d [0x%x, 0x%x)", i, r.Base, r.Limit)
+ }
+ t.Logf("want:")
+ for i, r := range want {
+ t.Logf("\t#%d [0x%x, 0x%x)", i, r.Base, r.Limit)
+ }
+ }
+ })
+ }
+}
+
+func TestPageAllocAlloc(t *testing.T) {
+ if GOOS == "openbsd" && testing.Short() {
+ t.Skip("skipping because virtual memory is limited; see #36210")
+ }
+ type hit struct {
+ npages, base, scav uintptr
+ }
+ type test struct {
+ scav map[ChunkIdx][]BitRange
+ before map[ChunkIdx][]BitRange
+ after map[ChunkIdx][]BitRange
+ hits []hit
+ }
+ tests := map[string]test{
+ "AllFree1": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 1}, {2, 2}},
+ },
+ hits: []hit{
+ {1, PageBase(BaseChunkIdx, 0), PageSize},
+ {1, PageBase(BaseChunkIdx, 1), 0},
+ {1, PageBase(BaseChunkIdx, 2), PageSize},
+ {1, PageBase(BaseChunkIdx, 3), PageSize},
+ {1, PageBase(BaseChunkIdx, 4), 0},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 5}},
+ },
+ },
+ "ManyArena1": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages - 1}},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages}},
+ },
+ hits: []hit{
+ {1, PageBase(BaseChunkIdx+2, PallocChunkPages-1), PageSize},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages}},
+ },
+ },
+ "NotContiguous1": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 0xff: {{0, 0}},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 0xff: {{0, PallocChunkPages}},
+ },
+ hits: []hit{
+ {1, PageBase(BaseChunkIdx+0xff, 0), PageSize},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 0xff: {{0, 1}},
+ },
+ },
+ "AllFree2": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 3}, {7, 1}},
+ },
+ hits: []hit{
+ {2, PageBase(BaseChunkIdx, 0), 2 * PageSize},
+ {2, PageBase(BaseChunkIdx, 2), PageSize},
+ {2, PageBase(BaseChunkIdx, 4), 0},
+ {2, PageBase(BaseChunkIdx, 6), PageSize},
+ {2, PageBase(BaseChunkIdx, 8), 0},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 10}},
+ },
+ },
+ "Straddle2": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages - 1}},
+ BaseChunkIdx + 1: {{1, PallocChunkPages - 1}},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{PallocChunkPages - 1, 1}},
+ BaseChunkIdx + 1: {},
+ },
+ hits: []hit{
+ {2, PageBase(BaseChunkIdx, PallocChunkPages-1), PageSize},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ },
+ },
+ "AllFree5": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 8}, {9, 1}, {17, 5}},
+ },
+ hits: []hit{
+ {5, PageBase(BaseChunkIdx, 0), 5 * PageSize},
+ {5, PageBase(BaseChunkIdx, 5), 4 * PageSize},
+ {5, PageBase(BaseChunkIdx, 10), 0},
+ {5, PageBase(BaseChunkIdx, 15), 3 * PageSize},
+ {5, PageBase(BaseChunkIdx, 20), 2 * PageSize},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 25}},
+ },
+ },
+ "AllFree64": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{21, 1}, {63, 65}},
+ },
+ hits: []hit{
+ {64, PageBase(BaseChunkIdx, 0), 2 * PageSize},
+ {64, PageBase(BaseChunkIdx, 64), 64 * PageSize},
+ {64, PageBase(BaseChunkIdx, 128), 0},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 192}},
+ },
+ },
+ "AllFree65": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{129, 1}},
+ },
+ hits: []hit{
+ {65, PageBase(BaseChunkIdx, 0), 0},
+ {65, PageBase(BaseChunkIdx, 65), PageSize},
+ {65, PageBase(BaseChunkIdx, 130), 0},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 195}},
+ },
+ },
+ "ExhaustPallocChunkPages-3": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{10, 1}},
+ },
+ hits: []hit{
+ {PallocChunkPages - 3, PageBase(BaseChunkIdx, 0), PageSize},
+ {PallocChunkPages - 3, 0, 0},
+ {1, PageBase(BaseChunkIdx, PallocChunkPages-3), 0},
+ {2, PageBase(BaseChunkIdx, PallocChunkPages-2), 0},
+ {1, 0, 0},
+ {PallocChunkPages - 3, 0, 0},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ },
+ },
+ "AllFreePallocChunkPages": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 1}, {PallocChunkPages - 1, 1}},
+ },
+ hits: []hit{
+ {PallocChunkPages, PageBase(BaseChunkIdx, 0), 2 * PageSize},
+ {PallocChunkPages, 0, 0},
+ {1, 0, 0},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ },
+ },
+ "StraddlePallocChunkPages": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages / 2}},
+ BaseChunkIdx + 1: {{PallocChunkPages / 2, PallocChunkPages / 2}},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {{3, 100}},
+ },
+ hits: []hit{
+ {PallocChunkPages, PageBase(BaseChunkIdx, PallocChunkPages/2), 100 * PageSize},
+ {PallocChunkPages, 0, 0},
+ {1, 0, 0},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ },
+ },
+ "StraddlePallocChunkPages+1": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages / 2}},
+ BaseChunkIdx + 1: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ },
+ hits: []hit{
+ {PallocChunkPages + 1, PageBase(BaseChunkIdx, PallocChunkPages/2), (PallocChunkPages + 1) * PageSize},
+ {PallocChunkPages, 0, 0},
+ {1, PageBase(BaseChunkIdx+1, PallocChunkPages/2+1), PageSize},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages/2 + 2}},
+ },
+ },
+ "AllFreePallocChunkPages*2": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {},
+ },
+ hits: []hit{
+ {PallocChunkPages * 2, PageBase(BaseChunkIdx, 0), 0},
+ {PallocChunkPages * 2, 0, 0},
+ {1, 0, 0},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ },
+ },
+ "NotContiguousPallocChunkPages*2": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 0x40: {},
+ BaseChunkIdx + 0x41: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 0x40: {},
+ BaseChunkIdx + 0x41: {},
+ },
+ hits: []hit{
+ {PallocChunkPages * 2, PageBase(BaseChunkIdx+0x40, 0), 0},
+ {21, PageBase(BaseChunkIdx, 0), 21 * PageSize},
+ {1, PageBase(BaseChunkIdx, 21), PageSize},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 22}},
+ BaseChunkIdx + 0x40: {{0, PallocChunkPages}},
+ BaseChunkIdx + 0x41: {{0, PallocChunkPages}},
+ },
+ },
+ "StraddlePallocChunkPages*2": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages / 2}},
+ BaseChunkIdx + 1: {},
+ BaseChunkIdx + 2: {{PallocChunkPages / 2, PallocChunkPages / 2}},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 7}},
+ BaseChunkIdx + 1: {{3, 5}, {121, 10}},
+ BaseChunkIdx + 2: {{PallocChunkPages/2 + 12, 2}},
+ },
+ hits: []hit{
+ {PallocChunkPages * 2, PageBase(BaseChunkIdx, PallocChunkPages/2), 15 * PageSize},
+ {PallocChunkPages * 2, 0, 0},
+ {1, 0, 0},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages}},
+ },
+ },
+ "StraddlePallocChunkPages*5/4": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages * 3 / 4}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages * 3 / 4}},
+ BaseChunkIdx + 3: {{0, 0}},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{PallocChunkPages / 2, PallocChunkPages/4 + 1}},
+ BaseChunkIdx + 2: {{PallocChunkPages / 3, 1}},
+ BaseChunkIdx + 3: {{PallocChunkPages * 2 / 3, 1}},
+ },
+ hits: []hit{
+ {PallocChunkPages * 5 / 4, PageBase(BaseChunkIdx+2, PallocChunkPages*3/4), PageSize},
+ {PallocChunkPages * 5 / 4, 0, 0},
+ {1, PageBase(BaseChunkIdx+1, PallocChunkPages*3/4), PageSize},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages*3/4 + 1}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages}},
+ BaseChunkIdx + 3: {{0, PallocChunkPages}},
+ },
+ },
+ "AllFreePallocChunkPages*7+5": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {},
+ BaseChunkIdx + 2: {},
+ BaseChunkIdx + 3: {},
+ BaseChunkIdx + 4: {},
+ BaseChunkIdx + 5: {},
+ BaseChunkIdx + 6: {},
+ BaseChunkIdx + 7: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{50, 1}},
+ BaseChunkIdx + 1: {{31, 1}},
+ BaseChunkIdx + 2: {{7, 1}},
+ BaseChunkIdx + 3: {{200, 1}},
+ BaseChunkIdx + 4: {{3, 1}},
+ BaseChunkIdx + 5: {{51, 1}},
+ BaseChunkIdx + 6: {{20, 1}},
+ BaseChunkIdx + 7: {{1, 1}},
+ },
+ hits: []hit{
+ {PallocChunkPages*7 + 5, PageBase(BaseChunkIdx, 0), 8 * PageSize},
+ {PallocChunkPages*7 + 5, 0, 0},
+ {1, PageBase(BaseChunkIdx+7, 5), 0},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages}},
+ BaseChunkIdx + 3: {{0, PallocChunkPages}},
+ BaseChunkIdx + 4: {{0, PallocChunkPages}},
+ BaseChunkIdx + 5: {{0, PallocChunkPages}},
+ BaseChunkIdx + 6: {{0, PallocChunkPages}},
+ BaseChunkIdx + 7: {{0, 6}},
+ },
+ },
+ }
+ if PageAlloc64Bit != 0 {
+ const chunkIdxBigJump = 0x100000 // chunk index offset which translates to O(TiB)
+
+ // This test attempts to trigger a bug wherein we look at unmapped summary
+ // memory that isn't just in the case where we exhaust the heap.
+ //
+ // It achieves this by placing a chunk such that its summary will be
+ // at the very end of a physical page. It then also places another chunk
+ // much further up in the address space, such that any allocations into the
+ // first chunk do not exhaust the heap and the second chunk's summary is not in the
+ // page immediately adjacent to the first chunk's summary's page.
+ // Allocating into this first chunk to exhaustion and then into the second
+ // chunk may then trigger a check in the allocator which erroneously looks at
+ // unmapped summary memory and crashes.
+
+ // Figure out how many chunks are in a physical page, then align BaseChunkIdx
+ // to a physical page in the chunk summary array. Here we only assume that
+ // each summary array is aligned to some physical page.
+ sumsPerPhysPage := ChunkIdx(PhysPageSize / PallocSumBytes)
+ baseChunkIdx := BaseChunkIdx &^ (sumsPerPhysPage - 1)
+ tests["DiscontiguousMappedSumBoundary"] = test{
+ before: map[ChunkIdx][]BitRange{
+ baseChunkIdx + sumsPerPhysPage - 1: {},
+ baseChunkIdx + chunkIdxBigJump: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ baseChunkIdx + sumsPerPhysPage - 1: {},
+ baseChunkIdx + chunkIdxBigJump: {},
+ },
+ hits: []hit{
+ {PallocChunkPages - 1, PageBase(baseChunkIdx+sumsPerPhysPage-1, 0), 0},
+ {1, PageBase(baseChunkIdx+sumsPerPhysPage-1, PallocChunkPages-1), 0},
+ {1, PageBase(baseChunkIdx+chunkIdxBigJump, 0), 0},
+ {PallocChunkPages - 1, PageBase(baseChunkIdx+chunkIdxBigJump, 1), 0},
+ {1, 0, 0},
+ },
+ after: map[ChunkIdx][]BitRange{
+ baseChunkIdx + sumsPerPhysPage - 1: {{0, PallocChunkPages}},
+ baseChunkIdx + chunkIdxBigJump: {{0, PallocChunkPages}},
+ },
+ }
+ }
+ for name, v := range tests {
+ v := v
+ t.Run(name, func(t *testing.T) {
+ b := NewPageAlloc(v.before, v.scav)
+ defer FreePageAlloc(b)
+
+ for iter, i := range v.hits {
+ a, s := b.Alloc(i.npages)
+ if a != i.base {
+ t.Fatalf("bad alloc #%d: want base 0x%x, got 0x%x", iter+1, i.base, a)
+ }
+ if s != i.scav {
+ t.Fatalf("bad alloc #%d: want scav %d, got %d", iter+1, i.scav, s)
+ }
+ }
+ want := NewPageAlloc(v.after, v.scav)
+ defer FreePageAlloc(want)
+
+ checkPageAlloc(t, want, b)
+ })
+ }
+}
+
+func TestPageAllocExhaust(t *testing.T) {
+ if GOOS == "openbsd" && testing.Short() {
+ t.Skip("skipping because virtual memory is limited; see #36210")
+ }
+ for _, npages := range []uintptr{1, 2, 3, 4, 5, 8, 16, 64, 1024, 1025, 2048, 2049} {
+ npages := npages
+ t.Run(fmt.Sprintf("%d", npages), func(t *testing.T) {
+ // Construct b.
+ bDesc := make(map[ChunkIdx][]BitRange)
+ for i := ChunkIdx(0); i < 4; i++ {
+ bDesc[BaseChunkIdx+i] = []BitRange{}
+ }
+ b := NewPageAlloc(bDesc, nil)
+ defer FreePageAlloc(b)
+
+ // Allocate into b with npages until we've exhausted the heap.
+ nAlloc := (PallocChunkPages * 4) / int(npages)
+ for i := 0; i < nAlloc; i++ {
+ addr := PageBase(BaseChunkIdx, uint(i)*uint(npages))
+ if a, _ := b.Alloc(npages); a != addr {
+ t.Fatalf("bad alloc #%d: want 0x%x, got 0x%x", i+1, addr, a)
+ }
+ }
+
+ // Check to make sure the next allocation fails.
+ if a, _ := b.Alloc(npages); a != 0 {
+ t.Fatalf("bad alloc #%d: want 0, got 0x%x", nAlloc, a)
+ }
+
+ // Construct what we want the heap to look like now.
+ allocPages := nAlloc * int(npages)
+ wantDesc := make(map[ChunkIdx][]BitRange)
+ for i := ChunkIdx(0); i < 4; i++ {
+ if allocPages >= PallocChunkPages {
+ wantDesc[BaseChunkIdx+i] = []BitRange{{0, PallocChunkPages}}
+ allocPages -= PallocChunkPages
+ } else if allocPages > 0 {
+ wantDesc[BaseChunkIdx+i] = []BitRange{{0, uint(allocPages)}}
+ allocPages = 0
+ } else {
+ wantDesc[BaseChunkIdx+i] = []BitRange{}
+ }
+ }
+ want := NewPageAlloc(wantDesc, nil)
+ defer FreePageAlloc(want)
+
+ // Check to make sure the heap b matches what we want.
+ checkPageAlloc(t, want, b)
+ })
+ }
+}
+
+func TestPageAllocFree(t *testing.T) {
+ if GOOS == "openbsd" && testing.Short() {
+ t.Skip("skipping because virtual memory is limited; see #36210")
+ }
+ tests := map[string]struct {
+ before map[ChunkIdx][]BitRange
+ after map[ChunkIdx][]BitRange
+ npages uintptr
+ frees []uintptr
+ }{
+ "Free1": {
+ npages: 1,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, 0),
+ PageBase(BaseChunkIdx, 1),
+ PageBase(BaseChunkIdx, 2),
+ PageBase(BaseChunkIdx, 3),
+ PageBase(BaseChunkIdx, 4),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{5, PallocChunkPages - 5}},
+ },
+ },
+ "ManyArena1": {
+ npages: 1,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, PallocChunkPages/2),
+ PageBase(BaseChunkIdx+1, 0),
+ PageBase(BaseChunkIdx+2, PallocChunkPages-1),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages / 2}, {PallocChunkPages/2 + 1, PallocChunkPages/2 - 1}},
+ BaseChunkIdx + 1: {{1, PallocChunkPages - 1}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages - 1}},
+ },
+ },
+ "Free2": {
+ npages: 2,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, 0),
+ PageBase(BaseChunkIdx, 2),
+ PageBase(BaseChunkIdx, 4),
+ PageBase(BaseChunkIdx, 6),
+ PageBase(BaseChunkIdx, 8),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{10, PallocChunkPages - 10}},
+ },
+ },
+ "Straddle2": {
+ npages: 2,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{PallocChunkPages - 1, 1}},
+ BaseChunkIdx + 1: {{0, 1}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, PallocChunkPages-1),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {},
+ },
+ },
+ "Free5": {
+ npages: 5,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, 0),
+ PageBase(BaseChunkIdx, 5),
+ PageBase(BaseChunkIdx, 10),
+ PageBase(BaseChunkIdx, 15),
+ PageBase(BaseChunkIdx, 20),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{25, PallocChunkPages - 25}},
+ },
+ },
+ "Free64": {
+ npages: 64,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, 0),
+ PageBase(BaseChunkIdx, 64),
+ PageBase(BaseChunkIdx, 128),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{192, PallocChunkPages - 192}},
+ },
+ },
+ "Free65": {
+ npages: 65,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, 0),
+ PageBase(BaseChunkIdx, 65),
+ PageBase(BaseChunkIdx, 130),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{195, PallocChunkPages - 195}},
+ },
+ },
+ "FreePallocChunkPages": {
+ npages: PallocChunkPages,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, 0),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ },
+ },
+ "StraddlePallocChunkPages": {
+ npages: PallocChunkPages,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{PallocChunkPages / 2, PallocChunkPages / 2}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages / 2}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, PallocChunkPages/2),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {},
+ },
+ },
+ "StraddlePallocChunkPages+1": {
+ npages: PallocChunkPages + 1,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, PallocChunkPages/2),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages / 2}},
+ BaseChunkIdx + 1: {{PallocChunkPages/2 + 1, PallocChunkPages/2 - 1}},
+ },
+ },
+ "FreePallocChunkPages*2": {
+ npages: PallocChunkPages * 2,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, 0),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {},
+ },
+ },
+ "StraddlePallocChunkPages*2": {
+ npages: PallocChunkPages * 2,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, PallocChunkPages/2),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages / 2}},
+ BaseChunkIdx + 1: {},
+ BaseChunkIdx + 2: {{PallocChunkPages / 2, PallocChunkPages / 2}},
+ },
+ },
+ "AllFreePallocChunkPages*7+5": {
+ npages: PallocChunkPages*7 + 5,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages}},
+ BaseChunkIdx + 3: {{0, PallocChunkPages}},
+ BaseChunkIdx + 4: {{0, PallocChunkPages}},
+ BaseChunkIdx + 5: {{0, PallocChunkPages}},
+ BaseChunkIdx + 6: {{0, PallocChunkPages}},
+ BaseChunkIdx + 7: {{0, PallocChunkPages}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, 0),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {},
+ BaseChunkIdx + 2: {},
+ BaseChunkIdx + 3: {},
+ BaseChunkIdx + 4: {},
+ BaseChunkIdx + 5: {},
+ BaseChunkIdx + 6: {},
+ BaseChunkIdx + 7: {{5, PallocChunkPages - 5}},
+ },
+ },
+ }
+ for name, v := range tests {
+ v := v
+ t.Run(name, func(t *testing.T) {
+ b := NewPageAlloc(v.before, nil)
+ defer FreePageAlloc(b)
+
+ for _, addr := range v.frees {
+ b.Free(addr, v.npages)
+ }
+ want := NewPageAlloc(v.after, nil)
+ defer FreePageAlloc(want)
+
+ checkPageAlloc(t, want, b)
+ })
+ }
+}
+
+func TestPageAllocAllocAndFree(t *testing.T) {
+ if GOOS == "openbsd" && testing.Short() {
+ t.Skip("skipping because virtual memory is limited; see #36210")
+ }
+ type hit struct {
+ alloc bool
+ npages uintptr
+ base uintptr
+ }
+ tests := map[string]struct {
+ init map[ChunkIdx][]BitRange
+ hits []hit
+ }{
+ // TODO(mknyszek): Write more tests here.
+ "Chunks8": {
+ init: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {},
+ BaseChunkIdx + 2: {},
+ BaseChunkIdx + 3: {},
+ BaseChunkIdx + 4: {},
+ BaseChunkIdx + 5: {},
+ BaseChunkIdx + 6: {},
+ BaseChunkIdx + 7: {},
+ },
+ hits: []hit{
+ {true, PallocChunkPages * 8, PageBase(BaseChunkIdx, 0)},
+ {false, PallocChunkPages * 8, PageBase(BaseChunkIdx, 0)},
+ {true, PallocChunkPages * 8, PageBase(BaseChunkIdx, 0)},
+ {false, PallocChunkPages * 8, PageBase(BaseChunkIdx, 0)},
+ {true, PallocChunkPages * 8, PageBase(BaseChunkIdx, 0)},
+ {false, PallocChunkPages * 8, PageBase(BaseChunkIdx, 0)},
+ {true, 1, PageBase(BaseChunkIdx, 0)},
+ {false, 1, PageBase(BaseChunkIdx, 0)},
+ {true, PallocChunkPages * 8, PageBase(BaseChunkIdx, 0)},
+ },
+ },
+ }
+ for name, v := range tests {
+ v := v
+ t.Run(name, func(t *testing.T) {
+ b := NewPageAlloc(v.init, nil)
+ defer FreePageAlloc(b)
+
+ for iter, i := range v.hits {
+ if i.alloc {
+ if a, _ := b.Alloc(i.npages); a != i.base {
+ t.Fatalf("bad alloc #%d: want 0x%x, got 0x%x", iter+1, i.base, a)
+ }
+ } else {
+ b.Free(i.base, i.npages)
+ }
+ }
+ })
+ }
+}
diff --git a/src/runtime/mpagecache.go b/src/runtime/mpagecache.go
new file mode 100644
index 0000000..683a997
--- /dev/null
+++ b/src/runtime/mpagecache.go
@@ -0,0 +1,161 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+ "runtime/internal/sys"
+ "unsafe"
+)
+
+const pageCachePages = 8 * unsafe.Sizeof(pageCache{}.cache)
+
+// pageCache represents a per-p cache of pages the allocator can
+// allocate from without a lock. More specifically, it represents
+// a pageCachePages*pageSize chunk of memory with 0 or more free
+// pages in it.
+type pageCache struct {
+ base uintptr // base address of the chunk
+ cache uint64 // 64-bit bitmap representing free pages (1 means free)
+ scav uint64 // 64-bit bitmap representing scavenged pages (1 means scavenged)
+}
+
+// empty returns true if the pageCache has any free pages, and false
+// otherwise.
+func (c *pageCache) empty() bool {
+ return c.cache == 0
+}
+
+// alloc allocates npages from the page cache and is the main entry
+// point for allocation.
+//
+// Returns a base address and the amount of scavenged memory in the
+// allocated region in bytes.
+//
+// Returns a base address of zero on failure, in which case the
+// amount of scavenged memory should be ignored.
+func (c *pageCache) alloc(npages uintptr) (uintptr, uintptr) {
+ if c.cache == 0 {
+ return 0, 0
+ }
+ if npages == 1 {
+ i := uintptr(sys.TrailingZeros64(c.cache))
+ scav := (c.scav >> i) & 1
+ c.cache &^= 1 << i // set bit to mark in-use
+ c.scav &^= 1 << i // clear bit to mark unscavenged
+ return c.base + i*pageSize, uintptr(scav) * pageSize
+ }
+ return c.allocN(npages)
+}
+
+// allocN is a helper which attempts to allocate npages worth of pages
+// from the cache. It represents the general case for allocating from
+// the page cache.
+//
+// Returns a base address and the amount of scavenged memory in the
+// allocated region in bytes.
+func (c *pageCache) allocN(npages uintptr) (uintptr, uintptr) {
+ i := findBitRange64(c.cache, uint(npages))
+ if i >= 64 {
+ return 0, 0
+ }
+ mask := ((uint64(1) << npages) - 1) << i
+ scav := sys.OnesCount64(c.scav & mask)
+ c.cache &^= mask // mark in-use bits
+ c.scav &^= mask // clear scavenged bits
+ return c.base + uintptr(i*pageSize), uintptr(scav) * pageSize
+}
+
+// flush empties out unallocated free pages in the given cache
+// into s. Then, it clears the cache, such that empty returns
+// true.
+//
+// s.mheapLock must be held or the world must be stopped.
+func (c *pageCache) flush(s *pageAlloc) {
+ if c.empty() {
+ return
+ }
+ ci := chunkIndex(c.base)
+ pi := chunkPageIndex(c.base)
+
+ // This method is called very infrequently, so just do the
+ // slower, safer thing by iterating over each bit individually.
+ for i := uint(0); i < 64; i++ {
+ if c.cache&(1<<i) != 0 {
+ s.chunkOf(ci).free1(pi + i)
+ }
+ if c.scav&(1<<i) != 0 {
+ s.chunkOf(ci).scavenged.setRange(pi+i, 1)
+ }
+ }
+ // Since this is a lot like a free, we need to make sure
+ // we update the searchAddr just like free does.
+ if b := (offAddr{c.base}); b.lessThan(s.searchAddr) {
+ s.searchAddr = b
+ }
+ s.update(c.base, pageCachePages, false, false)
+ *c = pageCache{}
+}
+
+// allocToCache acquires a pageCachePages-aligned chunk of free pages which
+// may not be contiguous, and returns a pageCache structure which owns the
+// chunk.
+//
+// s.mheapLock must be held.
+func (s *pageAlloc) allocToCache() pageCache {
+ // If the searchAddr refers to a region which has a higher address than
+ // any known chunk, then we know we're out of memory.
+ if chunkIndex(s.searchAddr.addr()) >= s.end {
+ return pageCache{}
+ }
+ c := pageCache{}
+ ci := chunkIndex(s.searchAddr.addr()) // chunk index
+ if s.summary[len(s.summary)-1][ci] != 0 {
+ // Fast path: there's free pages at or near the searchAddr address.
+ chunk := s.chunkOf(ci)
+ j, _ := chunk.find(1, chunkPageIndex(s.searchAddr.addr()))
+ if j == ^uint(0) {
+ throw("bad summary data")
+ }
+ c = pageCache{
+ base: chunkBase(ci) + alignDown(uintptr(j), 64)*pageSize,
+ cache: ^chunk.pages64(j),
+ scav: chunk.scavenged.block64(j),
+ }
+ } else {
+ // Slow path: the searchAddr address had nothing there, so go find
+ // the first free page the slow way.
+ addr, _ := s.find(1)
+ if addr == 0 {
+ // We failed to find adequate free space, so mark the searchAddr as OoM
+ // and return an empty pageCache.
+ s.searchAddr = maxSearchAddr
+ return pageCache{}
+ }
+ ci := chunkIndex(addr)
+ chunk := s.chunkOf(ci)
+ c = pageCache{
+ base: alignDown(addr, 64*pageSize),
+ cache: ^chunk.pages64(chunkPageIndex(addr)),
+ scav: chunk.scavenged.block64(chunkPageIndex(addr)),
+ }
+ }
+
+ // Set the bits as allocated and clear the scavenged bits.
+ s.allocRange(c.base, pageCachePages)
+
+ // Update as an allocation, but note that it's not contiguous.
+ s.update(c.base, pageCachePages, false, true)
+
+ // Set the search address to the last page represented by the cache.
+ // Since all of the pages in this block are going to the cache, and we
+ // searched for the first free page, we can confidently start at the
+ // next page.
+ //
+ // However, s.searchAddr is not allowed to point into unmapped heap memory
+ // unless it is maxSearchAddr, so make it the last page as opposed to
+ // the page after.
+ s.searchAddr = offAddr{c.base + pageSize*(pageCachePages-1)}
+ return c
+}
diff --git a/src/runtime/mpagecache_test.go b/src/runtime/mpagecache_test.go
new file mode 100644
index 0000000..2ed0c0a
--- /dev/null
+++ b/src/runtime/mpagecache_test.go
@@ -0,0 +1,399 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "math/rand"
+ . "runtime"
+ "testing"
+)
+
+func checkPageCache(t *testing.T, got, want PageCache) {
+ if got.Base() != want.Base() {
+ t.Errorf("bad pageCache base: got 0x%x, want 0x%x", got.Base(), want.Base())
+ }
+ if got.Cache() != want.Cache() {
+ t.Errorf("bad pageCache bits: got %016x, want %016x", got.Base(), want.Base())
+ }
+ if got.Scav() != want.Scav() {
+ t.Errorf("bad pageCache scav: got %016x, want %016x", got.Scav(), want.Scav())
+ }
+}
+
+func TestPageCacheAlloc(t *testing.T) {
+ base := PageBase(BaseChunkIdx, 0)
+ type hit struct {
+ npages uintptr
+ base uintptr
+ scav uintptr
+ }
+ tests := map[string]struct {
+ cache PageCache
+ hits []hit
+ }{
+ "Empty": {
+ cache: NewPageCache(base, 0, 0),
+ hits: []hit{
+ {1, 0, 0},
+ {2, 0, 0},
+ {3, 0, 0},
+ {4, 0, 0},
+ {5, 0, 0},
+ {11, 0, 0},
+ {12, 0, 0},
+ {16, 0, 0},
+ {27, 0, 0},
+ {32, 0, 0},
+ {43, 0, 0},
+ {57, 0, 0},
+ {64, 0, 0},
+ {121, 0, 0},
+ },
+ },
+ "Lo1": {
+ cache: NewPageCache(base, 0x1, 0x1),
+ hits: []hit{
+ {1, base, PageSize},
+ {1, 0, 0},
+ {10, 0, 0},
+ },
+ },
+ "Hi1": {
+ cache: NewPageCache(base, 0x1<<63, 0x1),
+ hits: []hit{
+ {1, base + 63*PageSize, 0},
+ {1, 0, 0},
+ {10, 0, 0},
+ },
+ },
+ "Swiss1": {
+ cache: NewPageCache(base, 0x20005555, 0x5505),
+ hits: []hit{
+ {2, 0, 0},
+ {1, base, PageSize},
+ {1, base + 2*PageSize, PageSize},
+ {1, base + 4*PageSize, 0},
+ {1, base + 6*PageSize, 0},
+ {1, base + 8*PageSize, PageSize},
+ {1, base + 10*PageSize, PageSize},
+ {1, base + 12*PageSize, PageSize},
+ {1, base + 14*PageSize, PageSize},
+ {1, base + 29*PageSize, 0},
+ {1, 0, 0},
+ {10, 0, 0},
+ },
+ },
+ "Lo2": {
+ cache: NewPageCache(base, 0x3, 0x2<<62),
+ hits: []hit{
+ {2, base, 0},
+ {2, 0, 0},
+ {1, 0, 0},
+ },
+ },
+ "Hi2": {
+ cache: NewPageCache(base, 0x3<<62, 0x3<<62),
+ hits: []hit{
+ {2, base + 62*PageSize, 2 * PageSize},
+ {2, 0, 0},
+ {1, 0, 0},
+ },
+ },
+ "Swiss2": {
+ cache: NewPageCache(base, 0x3333<<31, 0x3030<<31),
+ hits: []hit{
+ {2, base + 31*PageSize, 0},
+ {2, base + 35*PageSize, 2 * PageSize},
+ {2, base + 39*PageSize, 0},
+ {2, base + 43*PageSize, 2 * PageSize},
+ {2, 0, 0},
+ },
+ },
+ "Hi53": {
+ cache: NewPageCache(base, ((uint64(1)<<53)-1)<<10, ((uint64(1)<<16)-1)<<10),
+ hits: []hit{
+ {53, base + 10*PageSize, 16 * PageSize},
+ {53, 0, 0},
+ {1, 0, 0},
+ },
+ },
+ "Full53": {
+ cache: NewPageCache(base, ^uint64(0), ((uint64(1)<<16)-1)<<10),
+ hits: []hit{
+ {53, base, 16 * PageSize},
+ {53, 0, 0},
+ {1, base + 53*PageSize, 0},
+ },
+ },
+ "Full64": {
+ cache: NewPageCache(base, ^uint64(0), ^uint64(0)),
+ hits: []hit{
+ {64, base, 64 * PageSize},
+ {64, 0, 0},
+ {1, 0, 0},
+ },
+ },
+ "FullMixed": {
+ cache: NewPageCache(base, ^uint64(0), ^uint64(0)),
+ hits: []hit{
+ {5, base, 5 * PageSize},
+ {7, base + 5*PageSize, 7 * PageSize},
+ {1, base + 12*PageSize, 1 * PageSize},
+ {23, base + 13*PageSize, 23 * PageSize},
+ {63, 0, 0},
+ {3, base + 36*PageSize, 3 * PageSize},
+ {3, base + 39*PageSize, 3 * PageSize},
+ {3, base + 42*PageSize, 3 * PageSize},
+ {12, base + 45*PageSize, 12 * PageSize},
+ {11, 0, 0},
+ {4, base + 57*PageSize, 4 * PageSize},
+ {4, 0, 0},
+ {6, 0, 0},
+ {36, 0, 0},
+ {2, base + 61*PageSize, 2 * PageSize},
+ {3, 0, 0},
+ {1, base + 63*PageSize, 1 * PageSize},
+ {4, 0, 0},
+ {2, 0, 0},
+ {62, 0, 0},
+ {1, 0, 0},
+ },
+ },
+ }
+ for name, test := range tests {
+ test := test
+ t.Run(name, func(t *testing.T) {
+ c := test.cache
+ for i, h := range test.hits {
+ b, s := c.Alloc(h.npages)
+ if b != h.base {
+ t.Fatalf("bad alloc base #%d: got 0x%x, want 0x%x", i, b, h.base)
+ }
+ if s != h.scav {
+ t.Fatalf("bad alloc scav #%d: got %d, want %d", i, s, h.scav)
+ }
+ }
+ })
+ }
+}
+
+func TestPageCacheFlush(t *testing.T) {
+ if GOOS == "openbsd" && testing.Short() {
+ t.Skip("skipping because virtual memory is limited; see #36210")
+ }
+ bits64ToBitRanges := func(bits uint64, base uint) []BitRange {
+ var ranges []BitRange
+ start, size := uint(0), uint(0)
+ for i := 0; i < 64; i++ {
+ if bits&(1<<i) != 0 {
+ if size == 0 {
+ start = uint(i) + base
+ }
+ size++
+ } else {
+ if size != 0 {
+ ranges = append(ranges, BitRange{start, size})
+ size = 0
+ }
+ }
+ }
+ if size != 0 {
+ ranges = append(ranges, BitRange{start, size})
+ }
+ return ranges
+ }
+ runTest := func(t *testing.T, base uint, cache, scav uint64) {
+ // Set up the before state.
+ beforeAlloc := map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{base, 64}},
+ }
+ beforeScav := map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ }
+ b := NewPageAlloc(beforeAlloc, beforeScav)
+ defer FreePageAlloc(b)
+
+ // Create and flush the cache.
+ c := NewPageCache(PageBase(BaseChunkIdx, base), cache, scav)
+ c.Flush(b)
+ if !c.Empty() {
+ t.Errorf("pageCache flush did not clear cache")
+ }
+
+ // Set up the expected after state.
+ afterAlloc := map[ChunkIdx][]BitRange{
+ BaseChunkIdx: bits64ToBitRanges(^cache, base),
+ }
+ afterScav := map[ChunkIdx][]BitRange{
+ BaseChunkIdx: bits64ToBitRanges(scav, base),
+ }
+ want := NewPageAlloc(afterAlloc, afterScav)
+ defer FreePageAlloc(want)
+
+ // Check to see if it worked.
+ checkPageAlloc(t, want, b)
+ }
+
+ // Empty.
+ runTest(t, 0, 0, 0)
+
+ // Full.
+ runTest(t, 0, ^uint64(0), ^uint64(0))
+
+ // Random.
+ for i := 0; i < 100; i++ {
+ // Generate random valid base within a chunk.
+ base := uint(rand.Intn(PallocChunkPages/64)) * 64
+
+ // Generate random cache.
+ cache := rand.Uint64()
+ scav := rand.Uint64() & cache
+
+ // Run the test.
+ runTest(t, base, cache, scav)
+ }
+}
+
+func TestPageAllocAllocToCache(t *testing.T) {
+ if GOOS == "openbsd" && testing.Short() {
+ t.Skip("skipping because virtual memory is limited; see #36210")
+ }
+ type test struct {
+ before map[ChunkIdx][]BitRange
+ scav map[ChunkIdx][]BitRange
+ hits []PageCache // expected base addresses and patterns
+ after map[ChunkIdx][]BitRange
+ }
+ tests := map[string]test{
+ "AllFree": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{1, 1}, {64, 64}},
+ },
+ hits: []PageCache{
+ NewPageCache(PageBase(BaseChunkIdx, 0), ^uint64(0), 0x2),
+ NewPageCache(PageBase(BaseChunkIdx, 64), ^uint64(0), ^uint64(0)),
+ NewPageCache(PageBase(BaseChunkIdx, 128), ^uint64(0), 0),
+ NewPageCache(PageBase(BaseChunkIdx, 192), ^uint64(0), 0),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 256}},
+ },
+ },
+ "ManyArena": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages - 64}},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {},
+ },
+ hits: []PageCache{
+ NewPageCache(PageBase(BaseChunkIdx+2, PallocChunkPages-64), ^uint64(0), 0),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages}},
+ },
+ },
+ "NotContiguous": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 0xff: {{0, 0}},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 0xff: {{31, 67}},
+ },
+ hits: []PageCache{
+ NewPageCache(PageBase(BaseChunkIdx+0xff, 0), ^uint64(0), ((uint64(1)<<33)-1)<<31),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 0xff: {{0, 64}},
+ },
+ },
+ "First": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 32}, {33, 31}, {96, 32}},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{1, 4}, {31, 5}, {66, 2}},
+ },
+ hits: []PageCache{
+ NewPageCache(PageBase(BaseChunkIdx, 0), 1<<32, 1<<32),
+ NewPageCache(PageBase(BaseChunkIdx, 64), (uint64(1)<<32)-1, 0x3<<2),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 128}},
+ },
+ },
+ "Fail": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ },
+ hits: []PageCache{
+ NewPageCache(0, 0, 0),
+ NewPageCache(0, 0, 0),
+ NewPageCache(0, 0, 0),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ },
+ },
+ }
+ if PageAlloc64Bit != 0 {
+ const chunkIdxBigJump = 0x100000 // chunk index offset which translates to O(TiB)
+
+ // This test is similar to the one with the same name for
+ // pageAlloc.alloc and serves the same purpose.
+ // See mpagealloc_test.go for details.
+ sumsPerPhysPage := ChunkIdx(PhysPageSize / PallocSumBytes)
+ baseChunkIdx := BaseChunkIdx &^ (sumsPerPhysPage - 1)
+ tests["DiscontiguousMappedSumBoundary"] = test{
+ before: map[ChunkIdx][]BitRange{
+ baseChunkIdx + sumsPerPhysPage - 1: {{0, PallocChunkPages - 1}},
+ baseChunkIdx + chunkIdxBigJump: {{1, PallocChunkPages - 1}},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ baseChunkIdx + sumsPerPhysPage - 1: {},
+ baseChunkIdx + chunkIdxBigJump: {},
+ },
+ hits: []PageCache{
+ NewPageCache(PageBase(baseChunkIdx+sumsPerPhysPage-1, PallocChunkPages-64), 1<<63, 0),
+ NewPageCache(PageBase(baseChunkIdx+chunkIdxBigJump, 0), 1, 0),
+ NewPageCache(0, 0, 0),
+ },
+ after: map[ChunkIdx][]BitRange{
+ baseChunkIdx + sumsPerPhysPage - 1: {{0, PallocChunkPages}},
+ baseChunkIdx + chunkIdxBigJump: {{0, PallocChunkPages}},
+ },
+ }
+ }
+ for name, v := range tests {
+ v := v
+ t.Run(name, func(t *testing.T) {
+ b := NewPageAlloc(v.before, v.scav)
+ defer FreePageAlloc(b)
+
+ for _, expect := range v.hits {
+ checkPageCache(t, b.AllocToCache(), expect)
+ if t.Failed() {
+ return
+ }
+ }
+ want := NewPageAlloc(v.after, v.scav)
+ defer FreePageAlloc(want)
+
+ checkPageAlloc(t, want, b)
+ })
+ }
+}
diff --git a/src/runtime/mpallocbits.go b/src/runtime/mpallocbits.go
new file mode 100644
index 0000000..a801134
--- /dev/null
+++ b/src/runtime/mpallocbits.go
@@ -0,0 +1,388 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+ "runtime/internal/sys"
+)
+
+// pageBits is a bitmap representing one bit per page in a palloc chunk.
+type pageBits [pallocChunkPages / 64]uint64
+
+// get returns the value of the i'th bit in the bitmap.
+func (b *pageBits) get(i uint) uint {
+ return uint((b[i/64] >> (i % 64)) & 1)
+}
+
+// block64 returns the 64-bit aligned block of bits containing the i'th bit.
+func (b *pageBits) block64(i uint) uint64 {
+ return b[i/64]
+}
+
+// set sets bit i of pageBits.
+func (b *pageBits) set(i uint) {
+ b[i/64] |= 1 << (i % 64)
+}
+
+// setRange sets bits in the range [i, i+n).
+func (b *pageBits) setRange(i, n uint) {
+ _ = b[i/64]
+ if n == 1 {
+ // Fast path for the n == 1 case.
+ b.set(i)
+ return
+ }
+ // Set bits [i, j].
+ j := i + n - 1
+ if i/64 == j/64 {
+ b[i/64] |= ((uint64(1) << n) - 1) << (i % 64)
+ return
+ }
+ _ = b[j/64]
+ // Set leading bits.
+ b[i/64] |= ^uint64(0) << (i % 64)
+ for k := i/64 + 1; k < j/64; k++ {
+ b[k] = ^uint64(0)
+ }
+ // Set trailing bits.
+ b[j/64] |= (uint64(1) << (j%64 + 1)) - 1
+}
+
+// setAll sets all the bits of b.
+func (b *pageBits) setAll() {
+ for i := range b {
+ b[i] = ^uint64(0)
+ }
+}
+
+// clear clears bit i of pageBits.
+func (b *pageBits) clear(i uint) {
+ b[i/64] &^= 1 << (i % 64)
+}
+
+// clearRange clears bits in the range [i, i+n).
+func (b *pageBits) clearRange(i, n uint) {
+ _ = b[i/64]
+ if n == 1 {
+ // Fast path for the n == 1 case.
+ b.clear(i)
+ return
+ }
+ // Clear bits [i, j].
+ j := i + n - 1
+ if i/64 == j/64 {
+ b[i/64] &^= ((uint64(1) << n) - 1) << (i % 64)
+ return
+ }
+ _ = b[j/64]
+ // Clear leading bits.
+ b[i/64] &^= ^uint64(0) << (i % 64)
+ for k := i/64 + 1; k < j/64; k++ {
+ b[k] = 0
+ }
+ // Clear trailing bits.
+ b[j/64] &^= (uint64(1) << (j%64 + 1)) - 1
+}
+
+// clearAll frees all the bits of b.
+func (b *pageBits) clearAll() {
+ for i := range b {
+ b[i] = 0
+ }
+}
+
+// popcntRange counts the number of set bits in the
+// range [i, i+n).
+func (b *pageBits) popcntRange(i, n uint) (s uint) {
+ if n == 1 {
+ return uint((b[i/64] >> (i % 64)) & 1)
+ }
+ _ = b[i/64]
+ j := i + n - 1
+ if i/64 == j/64 {
+ return uint(sys.OnesCount64((b[i/64] >> (i % 64)) & ((1 << n) - 1)))
+ }
+ _ = b[j/64]
+ s += uint(sys.OnesCount64(b[i/64] >> (i % 64)))
+ for k := i/64 + 1; k < j/64; k++ {
+ s += uint(sys.OnesCount64(b[k]))
+ }
+ s += uint(sys.OnesCount64(b[j/64] & ((1 << (j%64 + 1)) - 1)))
+ return
+}
+
+// pallocBits is a bitmap that tracks page allocations for at most one
+// palloc chunk.
+//
+// The precise representation is an implementation detail, but for the
+// sake of documentation, 0s are free pages and 1s are allocated pages.
+type pallocBits pageBits
+
+// consec8tab is a table containing the number of consecutive
+// zero bits for any uint8 value.
+//
+// The table is generated by calling consec8(i) for each
+// possible uint8 value, which is defined as:
+//
+// // consec8 counts the maximum number of consecutive 0 bits
+// // in a uint8.
+// func consec8(n uint8) int {
+// n = ^n
+// i := 0
+// for n != 0 {
+// n &= (n << 1)
+// i++
+// }
+// return i
+// }
+var consec8tab = [256]uint{
+ 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2,
+ 4, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2,
+ 6, 5, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2,
+ 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1,
+ 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1,
+ 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1,
+ 7, 6, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2,
+ 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1,
+ 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1,
+ 6, 5, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2,
+ 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1,
+ 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1,
+ 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 0,
+}
+
+// summarize returns a packed summary of the bitmap in pallocBits.
+func (b *pallocBits) summarize() pallocSum {
+ // TODO(mknyszek): There may be something more clever to be done
+ // here to make the summarize operation more efficient. For example,
+ // we can compute start and end with 64-bit wide operations easily,
+ // but max is a bit more complex. Perhaps there exists some way to
+ // leverage the 64-bit start and end to our advantage?
+ var start, max, end uint
+ for i := 0; i < len(b); i++ {
+ a := b[i]
+ for j := 0; j < 64; j += 8 {
+ k := uint8(a >> j)
+
+ // Compute start.
+ si := uint(sys.TrailingZeros8(k))
+ if start == uint(i*64+j) {
+ start += si
+ }
+
+ // Compute max.
+ if end+si > max {
+ max = end + si
+ }
+ if mi := consec8tab[k]; mi > max {
+ max = mi
+ }
+
+ // Compute end.
+ if k == 0 {
+ end += 8
+ } else {
+ end = uint(sys.LeadingZeros8(k))
+ }
+ }
+ }
+ return packPallocSum(start, max, end)
+}
+
+// find searches for npages contiguous free pages in pallocBits and returns
+// the index where that run starts, as well as the index of the first free page
+// it found in the search. searchIdx represents the first known free page and
+// where to begin the search from.
+//
+// If find fails to find any free space, it returns an index of ^uint(0) and
+// the new searchIdx should be ignored.
+//
+// Note that if npages == 1, the two returned values will always be identical.
+func (b *pallocBits) find(npages uintptr, searchIdx uint) (uint, uint) {
+ if npages == 1 {
+ addr := b.find1(searchIdx)
+ return addr, addr
+ } else if npages <= 64 {
+ return b.findSmallN(npages, searchIdx)
+ }
+ return b.findLargeN(npages, searchIdx)
+}
+
+// find1 is a helper for find which searches for a single free page
+// in the pallocBits and returns the index.
+//
+// See find for an explanation of the searchIdx parameter.
+func (b *pallocBits) find1(searchIdx uint) uint {
+ for i := searchIdx / 64; i < uint(len(b)); i++ {
+ x := b[i]
+ if x == ^uint64(0) {
+ continue
+ }
+ return i*64 + uint(sys.TrailingZeros64(^x))
+ }
+ return ^uint(0)
+}
+
+// findSmallN is a helper for find which searches for npages contiguous free pages
+// in this pallocBits and returns the index where that run of contiguous pages
+// starts as well as the index of the first free page it finds in its search.
+//
+// See find for an explanation of the searchIdx parameter.
+//
+// Returns a ^uint(0) index on failure and the new searchIdx should be ignored.
+//
+// findSmallN assumes npages <= 64, where any such contiguous run of pages
+// crosses at most one aligned 64-bit boundary in the bits.
+func (b *pallocBits) findSmallN(npages uintptr, searchIdx uint) (uint, uint) {
+ end, newSearchIdx := uint(0), ^uint(0)
+ for i := searchIdx / 64; i < uint(len(b)); i++ {
+ bi := b[i]
+ if bi == ^uint64(0) {
+ end = 0
+ continue
+ }
+ // First see if we can pack our allocation in the trailing
+ // zeros plus the end of the last 64 bits.
+ start := uint(sys.TrailingZeros64(bi))
+ if newSearchIdx == ^uint(0) {
+ // The new searchIdx is going to be at these 64 bits after any
+ // 1s we file, so count trailing 1s.
+ newSearchIdx = i*64 + uint(sys.TrailingZeros64(^bi))
+ }
+ if end+start >= uint(npages) {
+ return i*64 - end, newSearchIdx
+ }
+ // Next, check the interior of the 64-bit chunk.
+ j := findBitRange64(^bi, uint(npages))
+ if j < 64 {
+ return i*64 + j, newSearchIdx
+ }
+ end = uint(sys.LeadingZeros64(bi))
+ }
+ return ^uint(0), newSearchIdx
+}
+
+// findLargeN is a helper for find which searches for npages contiguous free pages
+// in this pallocBits and returns the index where that run starts, as well as the
+// index of the first free page it found it its search.
+//
+// See alloc for an explanation of the searchIdx parameter.
+//
+// Returns a ^uint(0) index on failure and the new searchIdx should be ignored.
+//
+// findLargeN assumes npages > 64, where any such run of free pages
+// crosses at least one aligned 64-bit boundary in the bits.
+func (b *pallocBits) findLargeN(npages uintptr, searchIdx uint) (uint, uint) {
+ start, size, newSearchIdx := ^uint(0), uint(0), ^uint(0)
+ for i := searchIdx / 64; i < uint(len(b)); i++ {
+ x := b[i]
+ if x == ^uint64(0) {
+ size = 0
+ continue
+ }
+ if newSearchIdx == ^uint(0) {
+ // The new searchIdx is going to be at these 64 bits after any
+ // 1s we file, so count trailing 1s.
+ newSearchIdx = i*64 + uint(sys.TrailingZeros64(^x))
+ }
+ if size == 0 {
+ size = uint(sys.LeadingZeros64(x))
+ start = i*64 + 64 - size
+ continue
+ }
+ s := uint(sys.TrailingZeros64(x))
+ if s+size >= uint(npages) {
+ size += s
+ return start, newSearchIdx
+ }
+ if s < 64 {
+ size = uint(sys.LeadingZeros64(x))
+ start = i*64 + 64 - size
+ continue
+ }
+ size += 64
+ }
+ if size < uint(npages) {
+ return ^uint(0), newSearchIdx
+ }
+ return start, newSearchIdx
+}
+
+// allocRange allocates the range [i, i+n).
+func (b *pallocBits) allocRange(i, n uint) {
+ (*pageBits)(b).setRange(i, n)
+}
+
+// allocAll allocates all the bits of b.
+func (b *pallocBits) allocAll() {
+ (*pageBits)(b).setAll()
+}
+
+// free1 frees a single page in the pallocBits at i.
+func (b *pallocBits) free1(i uint) {
+ (*pageBits)(b).clear(i)
+}
+
+// free frees the range [i, i+n) of pages in the pallocBits.
+func (b *pallocBits) free(i, n uint) {
+ (*pageBits)(b).clearRange(i, n)
+}
+
+// freeAll frees all the bits of b.
+func (b *pallocBits) freeAll() {
+ (*pageBits)(b).clearAll()
+}
+
+// pages64 returns a 64-bit bitmap representing a block of 64 pages aligned
+// to 64 pages. The returned block of pages is the one containing the i'th
+// page in this pallocBits. Each bit represents whether the page is in-use.
+func (b *pallocBits) pages64(i uint) uint64 {
+ return (*pageBits)(b).block64(i)
+}
+
+// findBitRange64 returns the bit index of the first set of
+// n consecutive 1 bits. If no consecutive set of 1 bits of
+// size n may be found in c, then it returns an integer >= 64.
+func findBitRange64(c uint64, n uint) uint {
+ i := uint(0)
+ cont := uint(sys.TrailingZeros64(^c))
+ for cont < n && i < 64 {
+ i += cont
+ i += uint(sys.TrailingZeros64(c >> i))
+ cont = uint(sys.TrailingZeros64(^(c >> i)))
+ }
+ return i
+}
+
+// pallocData encapsulates pallocBits and a bitmap for
+// whether or not a given page is scavenged in a single
+// structure. It's effectively a pallocBits with
+// additional functionality.
+//
+// Update the comment on (*pageAlloc).chunks should this
+// structure change.
+type pallocData struct {
+ pallocBits
+ scavenged pageBits
+}
+
+// allocRange sets bits [i, i+n) in the bitmap to 1 and
+// updates the scavenged bits appropriately.
+func (m *pallocData) allocRange(i, n uint) {
+ // Clear the scavenged bits when we alloc the range.
+ m.pallocBits.allocRange(i, n)
+ m.scavenged.clearRange(i, n)
+}
+
+// allocAll sets every bit in the bitmap to 1 and updates
+// the scavenged bits appropriately.
+func (m *pallocData) allocAll() {
+ // Clear the scavenged bits when we alloc the range.
+ m.pallocBits.allocAll()
+ m.scavenged.clearAll()
+}
diff --git a/src/runtime/mpallocbits_test.go b/src/runtime/mpallocbits_test.go
new file mode 100644
index 0000000..71a29f3
--- /dev/null
+++ b/src/runtime/mpallocbits_test.go
@@ -0,0 +1,510 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "fmt"
+ "math/rand"
+ . "runtime"
+ "testing"
+)
+
+// Ensures that got and want are the same, and if not, reports
+// detailed diff information.
+func checkPallocBits(t *testing.T, got, want *PallocBits) bool {
+ d := DiffPallocBits(got, want)
+ if len(d) != 0 {
+ t.Errorf("%d range(s) different", len(d))
+ for _, bits := range d {
+ t.Logf("\t@ bit index %d", bits.I)
+ t.Logf("\t| got: %s", StringifyPallocBits(got, bits))
+ t.Logf("\t| want: %s", StringifyPallocBits(want, bits))
+ }
+ return false
+ }
+ return true
+}
+
+// makePallocBits produces an initialized PallocBits by setting
+// the ranges in s to 1 and the rest to zero.
+func makePallocBits(s []BitRange) *PallocBits {
+ b := new(PallocBits)
+ for _, v := range s {
+ b.AllocRange(v.I, v.N)
+ }
+ return b
+}
+
+// Ensures that PallocBits.AllocRange works, which is a fundamental
+// method used for testing and initialization since it's used by
+// makePallocBits.
+func TestPallocBitsAllocRange(t *testing.T) {
+ test := func(t *testing.T, i, n uint, want *PallocBits) {
+ checkPallocBits(t, makePallocBits([]BitRange{{i, n}}), want)
+ }
+ t.Run("OneLow", func(t *testing.T) {
+ want := new(PallocBits)
+ want[0] = 0x1
+ test(t, 0, 1, want)
+ })
+ t.Run("OneHigh", func(t *testing.T) {
+ want := new(PallocBits)
+ want[PallocChunkPages/64-1] = 1 << 63
+ test(t, PallocChunkPages-1, 1, want)
+ })
+ t.Run("Inner", func(t *testing.T) {
+ want := new(PallocBits)
+ want[2] = 0x3e
+ test(t, 129, 5, want)
+ })
+ t.Run("Aligned", func(t *testing.T) {
+ want := new(PallocBits)
+ want[2] = ^uint64(0)
+ want[3] = ^uint64(0)
+ test(t, 128, 128, want)
+ })
+ t.Run("Begin", func(t *testing.T) {
+ want := new(PallocBits)
+ want[0] = ^uint64(0)
+ want[1] = ^uint64(0)
+ want[2] = ^uint64(0)
+ want[3] = ^uint64(0)
+ want[4] = ^uint64(0)
+ want[5] = 0x1
+ test(t, 0, 321, want)
+ })
+ t.Run("End", func(t *testing.T) {
+ want := new(PallocBits)
+ want[PallocChunkPages/64-1] = ^uint64(0)
+ want[PallocChunkPages/64-2] = ^uint64(0)
+ want[PallocChunkPages/64-3] = ^uint64(0)
+ want[PallocChunkPages/64-4] = 1 << 63
+ test(t, PallocChunkPages-(64*3+1), 64*3+1, want)
+ })
+ t.Run("All", func(t *testing.T) {
+ want := new(PallocBits)
+ for i := range want {
+ want[i] = ^uint64(0)
+ }
+ test(t, 0, PallocChunkPages, want)
+ })
+}
+
+// Inverts every bit in the PallocBits.
+func invertPallocBits(b *PallocBits) {
+ for i := range b {
+ b[i] = ^b[i]
+ }
+}
+
+// Ensures two packed summaries are identical, and reports a detailed description
+// of the difference if they're not.
+func checkPallocSum(t *testing.T, got, want PallocSum) {
+ if got.Start() != want.Start() {
+ t.Errorf("inconsistent start: got %d, want %d", got.Start(), want.Start())
+ }
+ if got.Max() != want.Max() {
+ t.Errorf("inconsistent max: got %d, want %d", got.Max(), want.Max())
+ }
+ if got.End() != want.End() {
+ t.Errorf("inconsistent end: got %d, want %d", got.End(), want.End())
+ }
+}
+
+func TestMallocBitsPopcntRange(t *testing.T) {
+ type test struct {
+ i, n uint // bit range to popcnt over.
+ want uint // expected popcnt result on that range.
+ }
+ tests := map[string]struct {
+ init []BitRange // bit ranges to set to 1 in the bitmap.
+ tests []test // a set of popcnt tests to run over the bitmap.
+ }{
+ "None": {
+ tests: []test{
+ {0, 1, 0},
+ {5, 3, 0},
+ {2, 11, 0},
+ {PallocChunkPages/4 + 1, PallocChunkPages / 2, 0},
+ {0, PallocChunkPages, 0},
+ },
+ },
+ "All": {
+ init: []BitRange{{0, PallocChunkPages}},
+ tests: []test{
+ {0, 1, 1},
+ {5, 3, 3},
+ {2, 11, 11},
+ {PallocChunkPages/4 + 1, PallocChunkPages / 2, PallocChunkPages / 2},
+ {0, PallocChunkPages, PallocChunkPages},
+ },
+ },
+ "Half": {
+ init: []BitRange{{PallocChunkPages / 2, PallocChunkPages / 2}},
+ tests: []test{
+ {0, 1, 0},
+ {5, 3, 0},
+ {2, 11, 0},
+ {PallocChunkPages/2 - 1, 1, 0},
+ {PallocChunkPages / 2, 1, 1},
+ {PallocChunkPages/2 + 10, 1, 1},
+ {PallocChunkPages/2 - 1, 2, 1},
+ {PallocChunkPages / 4, PallocChunkPages / 4, 0},
+ {PallocChunkPages / 4, PallocChunkPages/4 + 1, 1},
+ {PallocChunkPages/4 + 1, PallocChunkPages / 2, PallocChunkPages/4 + 1},
+ {0, PallocChunkPages, PallocChunkPages / 2},
+ },
+ },
+ "OddBound": {
+ init: []BitRange{{0, 111}},
+ tests: []test{
+ {0, 1, 1},
+ {5, 3, 3},
+ {2, 11, 11},
+ {110, 2, 1},
+ {99, 50, 12},
+ {110, 1, 1},
+ {111, 1, 0},
+ {99, 1, 1},
+ {120, 1, 0},
+ {PallocChunkPages / 2, PallocChunkPages / 2, 0},
+ {0, PallocChunkPages, 111},
+ },
+ },
+ "Scattered": {
+ init: []BitRange{
+ {1, 3}, {5, 1}, {7, 1}, {10, 2}, {13, 1}, {15, 4},
+ {21, 1}, {23, 1}, {26, 2}, {30, 5}, {36, 2}, {40, 3},
+ {44, 6}, {51, 1}, {53, 2}, {58, 3}, {63, 1}, {67, 2},
+ {71, 10}, {84, 1}, {89, 7}, {99, 2}, {103, 1}, {107, 2},
+ {111, 1}, {113, 1}, {115, 1}, {118, 1}, {120, 2}, {125, 5},
+ },
+ tests: []test{
+ {0, 11, 6},
+ {0, 64, 39},
+ {13, 64, 40},
+ {64, 64, 34},
+ {0, 128, 73},
+ {1, 128, 74},
+ {0, PallocChunkPages, 75},
+ },
+ },
+ }
+ for name, v := range tests {
+ v := v
+ t.Run(name, func(t *testing.T) {
+ b := makePallocBits(v.init)
+ for _, h := range v.tests {
+ if got := b.PopcntRange(h.i, h.n); got != h.want {
+ t.Errorf("bad popcnt (i=%d, n=%d): got %d, want %d", h.i, h.n, got, h.want)
+ }
+ }
+ })
+ }
+}
+
+// Ensures computing bit summaries works as expected by generating random
+// bitmaps and checking against a reference implementation.
+func TestPallocBitsSummarizeRandom(t *testing.T) {
+ b := new(PallocBits)
+ for i := 0; i < 1000; i++ {
+ // Randomize bitmap.
+ for i := range b {
+ b[i] = rand.Uint64()
+ }
+ // Check summary against reference implementation.
+ checkPallocSum(t, b.Summarize(), SummarizeSlow(b))
+ }
+}
+
+// Ensures computing bit summaries works as expected.
+func TestPallocBitsSummarize(t *testing.T) {
+ var emptySum = PackPallocSum(PallocChunkPages, PallocChunkPages, PallocChunkPages)
+ type test struct {
+ free []BitRange // Ranges of free (zero) bits.
+ hits []PallocSum
+ }
+ tests := make(map[string]test)
+ tests["NoneFree"] = test{
+ free: []BitRange{},
+ hits: []PallocSum{
+ PackPallocSum(0, 0, 0),
+ },
+ }
+ tests["OnlyStart"] = test{
+ free: []BitRange{{0, 10}},
+ hits: []PallocSum{
+ PackPallocSum(10, 10, 0),
+ },
+ }
+ tests["OnlyEnd"] = test{
+ free: []BitRange{{PallocChunkPages - 40, 40}},
+ hits: []PallocSum{
+ PackPallocSum(0, 40, 40),
+ },
+ }
+ tests["StartAndEnd"] = test{
+ free: []BitRange{{0, 11}, {PallocChunkPages - 23, 23}},
+ hits: []PallocSum{
+ PackPallocSum(11, 23, 23),
+ },
+ }
+ tests["StartMaxEnd"] = test{
+ free: []BitRange{{0, 4}, {50, 100}, {PallocChunkPages - 4, 4}},
+ hits: []PallocSum{
+ PackPallocSum(4, 100, 4),
+ },
+ }
+ tests["OnlyMax"] = test{
+ free: []BitRange{{1, 20}, {35, 241}, {PallocChunkPages - 50, 30}},
+ hits: []PallocSum{
+ PackPallocSum(0, 241, 0),
+ },
+ }
+ tests["MultiMax"] = test{
+ free: []BitRange{{35, 2}, {40, 5}, {100, 5}},
+ hits: []PallocSum{
+ PackPallocSum(0, 5, 0),
+ },
+ }
+ tests["One"] = test{
+ free: []BitRange{{2, 1}},
+ hits: []PallocSum{
+ PackPallocSum(0, 1, 0),
+ },
+ }
+ tests["AllFree"] = test{
+ free: []BitRange{{0, PallocChunkPages}},
+ hits: []PallocSum{
+ emptySum,
+ },
+ }
+ for name, v := range tests {
+ v := v
+ t.Run(name, func(t *testing.T) {
+ b := makePallocBits(v.free)
+ // In the PallocBits we create 1's represent free spots, but in our actual
+ // PallocBits 1 means not free, so invert.
+ invertPallocBits(b)
+ for _, h := range v.hits {
+ checkPallocSum(t, b.Summarize(), h)
+ }
+ })
+ }
+}
+
+// Benchmarks how quickly we can summarize a PallocBits.
+func BenchmarkPallocBitsSummarize(b *testing.B) {
+ buf0 := new(PallocBits)
+ buf1 := new(PallocBits)
+ for i := 0; i < len(buf1); i++ {
+ buf1[i] = ^uint64(0)
+ }
+ bufa := new(PallocBits)
+ for i := 0; i < len(bufa); i++ {
+ bufa[i] = 0xaa
+ }
+ for _, buf := range []*PallocBits{buf0, buf1, bufa} {
+ b.Run(fmt.Sprintf("Unpacked%02X", buf[0]), func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ buf.Summarize()
+ }
+ })
+ }
+}
+
+// Ensures page allocation works.
+func TestPallocBitsAlloc(t *testing.T) {
+ tests := map[string]struct {
+ before []BitRange
+ after []BitRange
+ npages uintptr
+ hits []uint
+ }{
+ "AllFree1": {
+ npages: 1,
+ hits: []uint{0, 1, 2, 3, 4, 5},
+ after: []BitRange{{0, 6}},
+ },
+ "AllFree2": {
+ npages: 2,
+ hits: []uint{0, 2, 4, 6, 8, 10},
+ after: []BitRange{{0, 12}},
+ },
+ "AllFree5": {
+ npages: 5,
+ hits: []uint{0, 5, 10, 15, 20},
+ after: []BitRange{{0, 25}},
+ },
+ "AllFree64": {
+ npages: 64,
+ hits: []uint{0, 64, 128},
+ after: []BitRange{{0, 192}},
+ },
+ "AllFree65": {
+ npages: 65,
+ hits: []uint{0, 65, 130},
+ after: []BitRange{{0, 195}},
+ },
+ "SomeFree64": {
+ before: []BitRange{{0, 32}, {64, 32}, {100, PallocChunkPages - 100}},
+ npages: 64,
+ hits: []uint{^uint(0)},
+ after: []BitRange{{0, 32}, {64, 32}, {100, PallocChunkPages - 100}},
+ },
+ "NoneFree1": {
+ before: []BitRange{{0, PallocChunkPages}},
+ npages: 1,
+ hits: []uint{^uint(0), ^uint(0)},
+ after: []BitRange{{0, PallocChunkPages}},
+ },
+ "NoneFree2": {
+ before: []BitRange{{0, PallocChunkPages}},
+ npages: 2,
+ hits: []uint{^uint(0), ^uint(0)},
+ after: []BitRange{{0, PallocChunkPages}},
+ },
+ "NoneFree5": {
+ before: []BitRange{{0, PallocChunkPages}},
+ npages: 5,
+ hits: []uint{^uint(0), ^uint(0)},
+ after: []BitRange{{0, PallocChunkPages}},
+ },
+ "NoneFree65": {
+ before: []BitRange{{0, PallocChunkPages}},
+ npages: 65,
+ hits: []uint{^uint(0), ^uint(0)},
+ after: []BitRange{{0, PallocChunkPages}},
+ },
+ "ExactFit1": {
+ before: []BitRange{{0, PallocChunkPages/2 - 3}, {PallocChunkPages/2 - 2, PallocChunkPages/2 + 2}},
+ npages: 1,
+ hits: []uint{PallocChunkPages/2 - 3, ^uint(0)},
+ after: []BitRange{{0, PallocChunkPages}},
+ },
+ "ExactFit2": {
+ before: []BitRange{{0, PallocChunkPages/2 - 3}, {PallocChunkPages/2 - 1, PallocChunkPages/2 + 1}},
+ npages: 2,
+ hits: []uint{PallocChunkPages/2 - 3, ^uint(0)},
+ after: []BitRange{{0, PallocChunkPages}},
+ },
+ "ExactFit5": {
+ before: []BitRange{{0, PallocChunkPages/2 - 3}, {PallocChunkPages/2 + 2, PallocChunkPages/2 - 2}},
+ npages: 5,
+ hits: []uint{PallocChunkPages/2 - 3, ^uint(0)},
+ after: []BitRange{{0, PallocChunkPages}},
+ },
+ "ExactFit65": {
+ before: []BitRange{{0, PallocChunkPages/2 - 31}, {PallocChunkPages/2 + 34, PallocChunkPages/2 - 34}},
+ npages: 65,
+ hits: []uint{PallocChunkPages/2 - 31, ^uint(0)},
+ after: []BitRange{{0, PallocChunkPages}},
+ },
+ "SomeFree161": {
+ before: []BitRange{{0, 185}, {331, 1}},
+ npages: 161,
+ hits: []uint{332},
+ after: []BitRange{{0, 185}, {331, 162}},
+ },
+ }
+ for name, v := range tests {
+ v := v
+ t.Run(name, func(t *testing.T) {
+ b := makePallocBits(v.before)
+ for iter, i := range v.hits {
+ a, _ := b.Find(v.npages, 0)
+ if i != a {
+ t.Fatalf("find #%d picked wrong index: want %d, got %d", iter+1, i, a)
+ }
+ if i != ^uint(0) {
+ b.AllocRange(a, uint(v.npages))
+ }
+ }
+ want := makePallocBits(v.after)
+ checkPallocBits(t, b, want)
+ })
+ }
+}
+
+// Ensures page freeing works.
+func TestPallocBitsFree(t *testing.T) {
+ tests := map[string]struct {
+ beforeInv []BitRange
+ afterInv []BitRange
+ frees []uint
+ npages uintptr
+ }{
+ "SomeFree": {
+ npages: 1,
+ beforeInv: []BitRange{{0, 32}, {64, 32}, {100, 1}},
+ frees: []uint{32},
+ afterInv: []BitRange{{0, 33}, {64, 32}, {100, 1}},
+ },
+ "NoneFree1": {
+ npages: 1,
+ frees: []uint{0, 1, 2, 3, 4, 5},
+ afterInv: []BitRange{{0, 6}},
+ },
+ "NoneFree2": {
+ npages: 2,
+ frees: []uint{0, 2, 4, 6, 8, 10},
+ afterInv: []BitRange{{0, 12}},
+ },
+ "NoneFree5": {
+ npages: 5,
+ frees: []uint{0, 5, 10, 15, 20},
+ afterInv: []BitRange{{0, 25}},
+ },
+ "NoneFree64": {
+ npages: 64,
+ frees: []uint{0, 64, 128},
+ afterInv: []BitRange{{0, 192}},
+ },
+ "NoneFree65": {
+ npages: 65,
+ frees: []uint{0, 65, 130},
+ afterInv: []BitRange{{0, 195}},
+ },
+ }
+ for name, v := range tests {
+ v := v
+ t.Run(name, func(t *testing.T) {
+ b := makePallocBits(v.beforeInv)
+ invertPallocBits(b)
+ for _, i := range v.frees {
+ b.Free(i, uint(v.npages))
+ }
+ want := makePallocBits(v.afterInv)
+ invertPallocBits(want)
+ checkPallocBits(t, b, want)
+ })
+ }
+}
+
+func TestFindBitRange64(t *testing.T) {
+ check := func(x uint64, n uint, result uint) {
+ i := FindBitRange64(x, n)
+ if result == ^uint(0) && i < 64 {
+ t.Errorf("case (%016x, %d): got %d, want failure", x, n, i)
+ } else if result != ^uint(0) && i != result {
+ t.Errorf("case (%016x, %d): got %d, want %d", x, n, i, result)
+ }
+ }
+ for i := uint(0); i <= 64; i++ {
+ check(^uint64(0), i, 0)
+ }
+ check(0, 0, 0)
+ for i := uint(1); i <= 64; i++ {
+ check(0, i, ^uint(0))
+ }
+ check(0x8000000000000000, 1, 63)
+ check(0xc000010001010000, 2, 62)
+ check(0xc000010001030000, 2, 16)
+ check(0xe000030001030000, 3, 61)
+ check(0xe000030001070000, 3, 16)
+ check(0xffff03ff01070000, 16, 48)
+ check(0xffff03ff0107ffff, 16, 0)
+ check(0x0fff03ff01079fff, 16, ^uint(0))
+}
diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go
index 2bd41b6..128498d 100644
--- a/src/runtime/mprof.go
+++ b/src/runtime/mprof.go
@@ -711,13 +711,16 @@
return
}
-// GoroutineProfile returns n, the number of records in the active goroutine stack profile.
-// If len(p) >= n, GoroutineProfile copies the profile into p and returns n, true.
-// If len(p) < n, GoroutineProfile does not change p and returns n, false.
-//
-// Most clients should use the runtime/pprof package instead
-// of calling GoroutineProfile directly.
-func GoroutineProfile(p []StackRecord) (n int, ok bool) {
+//go:linkname runtime_goroutineProfileWithLabels runtime/pprof.runtime_goroutineProfileWithLabels
+func runtime_goroutineProfileWithLabels(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
+ return goroutineProfileWithLabels(p, labels)
+}
+
+// labels may be nil. If labels is non-nil, it must have the same length as p.
+func goroutineProfileWithLabels(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
+ if labels != nil && len(labels) != len(p) {
+ labels = nil
+ }
gp := getg()
isOK := func(gp1 *g) bool {
@@ -737,7 +740,7 @@
if n <= len(p) {
ok = true
- r := p
+ r, lbl := p, labels
// Save current goroutine.
sp := getcallersp()
@@ -747,6 +750,12 @@
})
r = r[1:]
+ // If we have a place to put our goroutine labelmap, insert it there.
+ if labels != nil {
+ lbl[0] = gp.labels
+ lbl = lbl[1:]
+ }
+
// Save other goroutines.
for _, gp1 := range allgs {
if isOK(gp1) {
@@ -756,16 +765,30 @@
break
}
saveg(^uintptr(0), ^uintptr(0), gp1, &r[0])
+ if labels != nil {
+ lbl[0] = gp1.labels
+ lbl = lbl[1:]
+ }
r = r[1:]
}
}
}
startTheWorld()
-
return n, ok
}
+// GoroutineProfile returns n, the number of records in the active goroutine stack profile.
+// If len(p) >= n, GoroutineProfile copies the profile into p and returns n, true.
+// If len(p) < n, GoroutineProfile does not change p and returns n, false.
+//
+// Most clients should use the runtime/pprof package instead
+// of calling GoroutineProfile directly.
+func GoroutineProfile(p []StackRecord) (n int, ok bool) {
+
+ return goroutineProfileWithLabels(p, nil)
+}
+
func saveg(pc, sp uintptr, gp *g, r *StackRecord) {
n := gentraceback(pc, sp, 0, gp, 0, &r.Stack0[0], len(r.Stack0), nil, nil, 0)
if n < len(r.Stack0) {
diff --git a/src/runtime/mranges.go b/src/runtime/mranges.go
new file mode 100644
index 0000000..e23d077
--- /dev/null
+++ b/src/runtime/mranges.go
@@ -0,0 +1,321 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Address range data structure.
+//
+// This file contains an implementation of a data structure which
+// manages ordered address ranges.
+
+package runtime
+
+import (
+ "runtime/internal/sys"
+ "unsafe"
+)
+
+// addrRange represents a region of address space.
+//
+// An addrRange must never span a gap in the address space.
+type addrRange struct {
+ // base and limit together represent the region of address space
+ // [base, limit). That is, base is inclusive, limit is exclusive.
+ // These are address over an offset view of the address space on
+ // platforms with a segmented address space, that is, on platforms
+ // where arenaBaseOffset != 0.
+ base, limit offAddr
+}
+
+// makeAddrRange creates a new address range from two virtual addresses.
+//
+// Throws if the base and limit are not in the same memory segment.
+func makeAddrRange(base, limit uintptr) addrRange {
+ r := addrRange{offAddr{base}, offAddr{limit}}
+ if (base-arenaBaseOffset >= base) != (limit-arenaBaseOffset >= limit) {
+ throw("addr range base and limit are not in the same memory segment")
+ }
+ return r
+}
+
+// size returns the size of the range represented in bytes.
+func (a addrRange) size() uintptr {
+ if !a.base.lessThan(a.limit) {
+ return 0
+ }
+ // Subtraction is safe because limit and base must be in the same
+ // segment of the address space.
+ return a.limit.diff(a.base)
+}
+
+// contains returns whether or not the range contains a given address.
+func (a addrRange) contains(addr uintptr) bool {
+ return a.base.lessEqual(offAddr{addr}) && (offAddr{addr}).lessThan(a.limit)
+}
+
+// subtract takes the addrRange toPrune and cuts out any overlap with
+// from, then returns the new range. subtract assumes that a and b
+// either don't overlap at all, only overlap on one side, or are equal.
+// If b is strictly contained in a, thus forcing a split, it will throw.
+func (a addrRange) subtract(b addrRange) addrRange {
+ if b.base.lessEqual(a.base) && a.limit.lessEqual(b.limit) {
+ return addrRange{}
+ } else if a.base.lessThan(b.base) && b.limit.lessThan(a.limit) {
+ throw("bad prune")
+ } else if b.limit.lessThan(a.limit) && a.base.lessThan(b.limit) {
+ a.base = b.limit
+ } else if a.base.lessThan(b.base) && b.base.lessThan(a.limit) {
+ a.limit = b.base
+ }
+ return a
+}
+
+// removeGreaterEqual removes all addresses in a greater than or equal
+// to addr and returns the new range.
+func (a addrRange) removeGreaterEqual(addr uintptr) addrRange {
+ if (offAddr{addr}).lessEqual(a.base) {
+ return addrRange{}
+ }
+ if a.limit.lessEqual(offAddr{addr}) {
+ return a
+ }
+ return makeAddrRange(a.base.addr(), addr)
+}
+
+var (
+ // minOffAddr is the minimum address in the offset space, and
+ // it corresponds to the virtual address arenaBaseOffset.
+ minOffAddr = offAddr{arenaBaseOffset}
+
+ // maxOffAddr is the maximum address in the offset address
+ // space. It corresponds to the highest virtual address representable
+ // by the page alloc chunk and heap arena maps.
+ maxOffAddr = offAddr{(((1 << heapAddrBits) - 1) + arenaBaseOffset) & uintptrMask}
+)
+
+// offAddr represents an address in a contiguous view
+// of the address space on systems where the address space is
+// segmented. On other systems, it's just a normal address.
+type offAddr struct {
+ // a is just the virtual address, but should never be used
+ // directly. Call addr() to get this value instead.
+ a uintptr
+}
+
+// add adds a uintptr offset to the offAddr.
+func (l offAddr) add(bytes uintptr) offAddr {
+ return offAddr{a: l.a + bytes}
+}
+
+// sub subtracts a uintptr offset from the offAddr.
+func (l offAddr) sub(bytes uintptr) offAddr {
+ return offAddr{a: l.a - bytes}
+}
+
+// diff returns the amount of bytes in between the
+// two offAddrs.
+func (l1 offAddr) diff(l2 offAddr) uintptr {
+ return l1.a - l2.a
+}
+
+// lessThan returns true if l1 is less than l2 in the offset
+// address space.
+func (l1 offAddr) lessThan(l2 offAddr) bool {
+ return (l1.a - arenaBaseOffset) < (l2.a - arenaBaseOffset)
+}
+
+// lessEqual returns true if l1 is less than or equal to l2 in
+// the offset address space.
+func (l1 offAddr) lessEqual(l2 offAddr) bool {
+ return (l1.a - arenaBaseOffset) <= (l2.a - arenaBaseOffset)
+}
+
+// equal returns true if the two offAddr values are equal.
+func (l1 offAddr) equal(l2 offAddr) bool {
+ // No need to compare in the offset space, it
+ // means the same thing.
+ return l1 == l2
+}
+
+// addr returns the virtual address for this offset address.
+func (l offAddr) addr() uintptr {
+ return l.a
+}
+
+// addrRanges is a data structure holding a collection of ranges of
+// address space.
+//
+// The ranges are coalesced eagerly to reduce the
+// number ranges it holds.
+//
+// The slice backing store for this field is persistentalloc'd
+// and thus there is no way to free it.
+//
+// addrRanges is not thread-safe.
+type addrRanges struct {
+ // ranges is a slice of ranges sorted by base.
+ ranges []addrRange
+
+ // totalBytes is the total amount of address space in bytes counted by
+ // this addrRanges.
+ totalBytes uintptr
+
+ // sysStat is the stat to track allocations by this type
+ sysStat *uint64
+}
+
+func (a *addrRanges) init(sysStat *uint64) {
+ ranges := (*notInHeapSlice)(unsafe.Pointer(&a.ranges))
+ ranges.len = 0
+ ranges.cap = 16
+ ranges.array = (*notInHeap)(persistentalloc(unsafe.Sizeof(addrRange{})*uintptr(ranges.cap), sys.PtrSize, sysStat))
+ a.sysStat = sysStat
+ a.totalBytes = 0
+}
+
+// findSucc returns the first index in a such that base is
+// less than the base of the addrRange at that index.
+func (a *addrRanges) findSucc(addr uintptr) int {
+ // TODO(mknyszek): Consider a binary search for large arrays.
+ // While iterating over these ranges is potentially expensive,
+ // the expected number of ranges is small, ideally just 1,
+ // since Go heaps are usually mostly contiguous.
+ base := offAddr{addr}
+ for i := range a.ranges {
+ if base.lessThan(a.ranges[i].base) {
+ return i
+ }
+ }
+ return len(a.ranges)
+}
+
+// contains returns true if a covers the address addr.
+func (a *addrRanges) contains(addr uintptr) bool {
+ i := a.findSucc(addr)
+ if i == 0 {
+ return false
+ }
+ return a.ranges[i-1].contains(addr)
+}
+
+// add inserts a new address range to a.
+//
+// r must not overlap with any address range in a.
+func (a *addrRanges) add(r addrRange) {
+ // The copies in this function are potentially expensive, but this data
+ // structure is meant to represent the Go heap. At worst, copying this
+ // would take ~160µs assuming a conservative copying rate of 25 GiB/s (the
+ // copy will almost never trigger a page fault) for a 1 TiB heap with 4 MiB
+ // arenas which is completely discontiguous. ~160µs is still a lot, but in
+ // practice most platforms have 64 MiB arenas (which cuts this by a factor
+ // of 16) and Go heaps are usually mostly contiguous, so the chance that
+ // an addrRanges even grows to that size is extremely low.
+
+ // Because we assume r is not currently represented in a,
+ // findSucc gives us our insertion index.
+ i := a.findSucc(r.base.addr())
+ coalescesDown := i > 0 && a.ranges[i-1].limit.equal(r.base)
+ coalescesUp := i < len(a.ranges) && r.limit.equal(a.ranges[i].base)
+ if coalescesUp && coalescesDown {
+ // We have neighbors and they both border us.
+ // Merge a.ranges[i-1], r, and a.ranges[i] together into a.ranges[i-1].
+ a.ranges[i-1].limit = a.ranges[i].limit
+
+ // Delete a.ranges[i].
+ copy(a.ranges[i:], a.ranges[i+1:])
+ a.ranges = a.ranges[:len(a.ranges)-1]
+ } else if coalescesDown {
+ // We have a neighbor at a lower address only and it borders us.
+ // Merge the new space into a.ranges[i-1].
+ a.ranges[i-1].limit = r.limit
+ } else if coalescesUp {
+ // We have a neighbor at a higher address only and it borders us.
+ // Merge the new space into a.ranges[i].
+ a.ranges[i].base = r.base
+ } else {
+ // We may or may not have neighbors which don't border us.
+ // Add the new range.
+ if len(a.ranges)+1 > cap(a.ranges) {
+ // Grow the array. Note that this leaks the old array, but since
+ // we're doubling we have at most 2x waste. For a 1 TiB heap and
+ // 4 MiB arenas which are all discontiguous (both very conservative
+ // assumptions), this would waste at most 4 MiB of memory.
+ oldRanges := a.ranges
+ ranges := (*notInHeapSlice)(unsafe.Pointer(&a.ranges))
+ ranges.len = len(oldRanges) + 1
+ ranges.cap = cap(oldRanges) * 2
+ ranges.array = (*notInHeap)(persistentalloc(unsafe.Sizeof(addrRange{})*uintptr(ranges.cap), sys.PtrSize, a.sysStat))
+
+ // Copy in the old array, but make space for the new range.
+ copy(a.ranges[:i], oldRanges[:i])
+ copy(a.ranges[i+1:], oldRanges[i:])
+ } else {
+ a.ranges = a.ranges[:len(a.ranges)+1]
+ copy(a.ranges[i+1:], a.ranges[i:])
+ }
+ a.ranges[i] = r
+ }
+ a.totalBytes += r.size()
+}
+
+// removeLast removes and returns the highest-addressed contiguous range
+// of a, or the last nBytes of that range, whichever is smaller. If a is
+// empty, it returns an empty range.
+func (a *addrRanges) removeLast(nBytes uintptr) addrRange {
+ if len(a.ranges) == 0 {
+ return addrRange{}
+ }
+ r := a.ranges[len(a.ranges)-1]
+ size := r.size()
+ if size > nBytes {
+ newEnd := r.limit.sub(nBytes)
+ a.ranges[len(a.ranges)-1].limit = newEnd
+ a.totalBytes -= nBytes
+ return addrRange{newEnd, r.limit}
+ }
+ a.ranges = a.ranges[:len(a.ranges)-1]
+ a.totalBytes -= size
+ return r
+}
+
+// removeGreaterEqual removes the ranges of a which are above addr, and additionally
+// splits any range containing addr.
+func (a *addrRanges) removeGreaterEqual(addr uintptr) {
+ pivot := a.findSucc(addr)
+ if pivot == 0 {
+ // addr is before all ranges in a.
+ a.totalBytes = 0
+ a.ranges = a.ranges[:0]
+ return
+ }
+ removed := uintptr(0)
+ for _, r := range a.ranges[pivot:] {
+ removed += r.size()
+ }
+ if r := a.ranges[pivot-1]; r.contains(addr) {
+ removed += r.size()
+ r = r.removeGreaterEqual(addr)
+ if r.size() == 0 {
+ pivot--
+ } else {
+ removed -= r.size()
+ a.ranges[pivot-1] = r
+ }
+ }
+ a.ranges = a.ranges[:pivot]
+ a.totalBytes -= removed
+}
+
+// cloneInto makes a deep clone of a's state into b, re-using
+// b's ranges if able.
+func (a *addrRanges) cloneInto(b *addrRanges) {
+ if len(a.ranges) > cap(b.ranges) {
+ // Grow the array.
+ ranges := (*notInHeapSlice)(unsafe.Pointer(&b.ranges))
+ ranges.len = 0
+ ranges.cap = cap(a.ranges)
+ ranges.array = (*notInHeap)(persistentalloc(unsafe.Sizeof(addrRange{})*uintptr(ranges.cap), sys.PtrSize, b.sysStat))
+ }
+ b.ranges = b.ranges[:len(a.ranges)]
+ b.totalBytes = a.totalBytes
+ copy(b.ranges, a.ranges)
+}
diff --git a/src/runtime/msan_arm64.s b/src/runtime/msan_arm64.s
index 4dfe5e3..5e29f1a 100644
--- a/src/runtime/msan_arm64.s
+++ b/src/runtime/msan_arm64.s
@@ -47,9 +47,10 @@
// Switches SP to g0 stack and calls (FARG). Arguments already set.
TEXT msancall<>(SB), NOSPLIT, $0-0
+ MOVD RSP, R19 // callee-saved
+ CBZ g, g0stack // no g, still on a system stack
MOVD g_m(g), R10
MOVD m_g0(R10), R11
- MOVD RSP, R19 // callee-saved
CMP R11, g
BEQ g0stack
diff --git a/src/runtime/msize.go b/src/runtime/msize.go
index 0accb83..c56aa5a 100644
--- a/src/runtime/msize.go
+++ b/src/runtime/msize.go
@@ -13,13 +13,13 @@
func roundupsize(size uintptr) uintptr {
if size < _MaxSmallSize {
if size <= smallSizeMax-8 {
- return uintptr(class_to_size[size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv]])
+ return uintptr(class_to_size[size_to_class8[divRoundUp(size, smallSizeDiv)]])
} else {
- return uintptr(class_to_size[size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv]])
+ return uintptr(class_to_size[size_to_class128[divRoundUp(size-smallSizeMax, largeSizeDiv)]])
}
}
if size+_PageSize < size {
return size
}
- return round(size, _PageSize)
+ return alignUp(size, _PageSize)
}
diff --git a/src/runtime/mspanset.go b/src/runtime/mspanset.go
new file mode 100644
index 0000000..490eed4
--- /dev/null
+++ b/src/runtime/mspanset.go
@@ -0,0 +1,354 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+ "internal/cpu"
+ "runtime/internal/atomic"
+ "runtime/internal/sys"
+ "unsafe"
+)
+
+// A spanSet is a set of *mspans.
+//
+// spanSet is safe for concurrent push and pop operations.
+type spanSet struct {
+ // A spanSet is a two-level data structure consisting of a
+ // growable spine that points to fixed-sized blocks. The spine
+ // can be accessed without locks, but adding a block or
+ // growing it requires taking the spine lock.
+ //
+ // Because each mspan covers at least 8K of heap and takes at
+ // most 8 bytes in the spanSet, the growth of the spine is
+ // quite limited.
+ //
+ // The spine and all blocks are allocated off-heap, which
+ // allows this to be used in the memory manager and avoids the
+ // need for write barriers on all of these. spanSetBlocks are
+ // managed in a pool, though never freed back to the operating
+ // system. We never release spine memory because there could be
+ // concurrent lock-free access and we're likely to reuse it
+ // anyway. (In principle, we could do this during STW.)
+
+ spineLock mutex
+ spine unsafe.Pointer // *[N]*spanSetBlock, accessed atomically
+ spineLen uintptr // Spine array length, accessed atomically
+ spineCap uintptr // Spine array cap, accessed under lock
+
+ // index is the head and tail of the spanSet in a single field.
+ // The head and the tail both represent an index into the logical
+ // concatenation of all blocks, with the head always behind or
+ // equal to the tail (indicating an empty set). This field is
+ // always accessed atomically.
+ //
+ // The head and the tail are only 32 bits wide, which means we
+ // can only support up to 2^32 pushes before a reset. If every
+ // span in the heap were stored in this set, and each span were
+ // the minimum size (1 runtime page, 8 KiB), then roughly the
+ // smallest heap which would be unrepresentable is 32 TiB in size.
+ index headTailIndex
+}
+
+const (
+ spanSetBlockEntries = 512 // 4KB on 64-bit
+ spanSetInitSpineCap = 256 // Enough for 1GB heap on 64-bit
+)
+
+type spanSetBlock struct {
+ // Free spanSetBlocks are managed via a lock-free stack.
+ lfnode
+
+ // popped is the number of pop operations that have occurred on
+ // this block. This number is used to help determine when a block
+ // may be safely recycled.
+ popped uint32
+
+ // spans is the set of spans in this block.
+ spans [spanSetBlockEntries]*mspan
+}
+
+// push adds span s to buffer b. push is safe to call concurrently
+// with other push and pop operations.
+func (b *spanSet) push(s *mspan) {
+ // Obtain our slot.
+ cursor := uintptr(b.index.incTail().tail() - 1)
+ top, bottom := cursor/spanSetBlockEntries, cursor%spanSetBlockEntries
+
+ // Do we need to add a block?
+ spineLen := atomic.Loaduintptr(&b.spineLen)
+ var block *spanSetBlock
+retry:
+ if top < spineLen {
+ spine := atomic.Loadp(unsafe.Pointer(&b.spine))
+ blockp := add(spine, sys.PtrSize*top)
+ block = (*spanSetBlock)(atomic.Loadp(blockp))
+ } else {
+ // Add a new block to the spine, potentially growing
+ // the spine.
+ lock(&b.spineLock)
+ // spineLen cannot change until we release the lock,
+ // but may have changed while we were waiting.
+ spineLen = atomic.Loaduintptr(&b.spineLen)
+ if top < spineLen {
+ unlock(&b.spineLock)
+ goto retry
+ }
+
+ if spineLen == b.spineCap {
+ // Grow the spine.
+ newCap := b.spineCap * 2
+ if newCap == 0 {
+ newCap = spanSetInitSpineCap
+ }
+ newSpine := persistentalloc(newCap*sys.PtrSize, cpu.CacheLineSize, &memstats.gc_sys)
+ if b.spineCap != 0 {
+ // Blocks are allocated off-heap, so
+ // no write barriers.
+ memmove(newSpine, b.spine, b.spineCap*sys.PtrSize)
+ }
+ // Spine is allocated off-heap, so no write barrier.
+ atomic.StorepNoWB(unsafe.Pointer(&b.spine), newSpine)
+ b.spineCap = newCap
+ // We can't immediately free the old spine
+ // since a concurrent push with a lower index
+ // could still be reading from it. We let it
+ // leak because even a 1TB heap would waste
+ // less than 2MB of memory on old spines. If
+ // this is a problem, we could free old spines
+ // during STW.
+ }
+
+ // Allocate a new block from the pool.
+ block = spanSetBlockPool.alloc()
+
+ // Add it to the spine.
+ blockp := add(b.spine, sys.PtrSize*top)
+ // Blocks are allocated off-heap, so no write barrier.
+ atomic.StorepNoWB(blockp, unsafe.Pointer(block))
+ atomic.Storeuintptr(&b.spineLen, spineLen+1)
+ unlock(&b.spineLock)
+ }
+
+ // We have a block. Insert the span atomically, since there may be
+ // concurrent readers via the block API.
+ atomic.StorepNoWB(unsafe.Pointer(&block.spans[bottom]), unsafe.Pointer(s))
+}
+
+// pop removes and returns a span from buffer b, or nil if b is empty.
+// pop is safe to call concurrently with other pop and push operations.
+func (b *spanSet) pop() *mspan {
+ var head, tail uint32
+claimLoop:
+ for {
+ headtail := b.index.load()
+ head, tail = headtail.split()
+ if head >= tail {
+ // The buf is empty, as far as we can tell.
+ return nil
+ }
+ // Check if the head position we want to claim is actually
+ // backed by a block.
+ spineLen := atomic.Loaduintptr(&b.spineLen)
+ if spineLen <= uintptr(head)/spanSetBlockEntries {
+ // We're racing with a spine growth and the allocation of
+ // a new block (and maybe a new spine!), and trying to grab
+ // the span at the index which is currently being pushed.
+ // Instead of spinning, let's just notify the caller that
+ // there's nothing currently here. Spinning on this is
+ // almost definitely not worth it.
+ return nil
+ }
+ // Try to claim the current head by CASing in an updated head.
+ // This may fail transiently due to a push which modifies the
+ // tail, so keep trying while the head isn't changing.
+ want := head
+ for want == head {
+ if b.index.cas(headtail, makeHeadTailIndex(want+1, tail)) {
+ break claimLoop
+ }
+ headtail = b.index.load()
+ head, tail = headtail.split()
+ }
+ // We failed to claim the spot we were after and the head changed,
+ // meaning a popper got ahead of us. Try again from the top because
+ // the buf may not be empty.
+ }
+ top, bottom := head/spanSetBlockEntries, head%spanSetBlockEntries
+
+ // We may be reading a stale spine pointer, but because the length
+ // grows monotonically and we've already verified it, we'll definitely
+ // be reading from a valid block.
+ spine := atomic.Loadp(unsafe.Pointer(&b.spine))
+ blockp := add(spine, sys.PtrSize*uintptr(top))
+
+ // Given that the spine length is correct, we know we will never
+ // see a nil block here, since the length is always updated after
+ // the block is set.
+ block := (*spanSetBlock)(atomic.Loadp(blockp))
+ s := (*mspan)(atomic.Loadp(unsafe.Pointer(&block.spans[bottom])))
+ for s == nil {
+ // We raced with the span actually being set, but given that we
+ // know a block for this span exists, the race window here is
+ // extremely small. Try again.
+ s = (*mspan)(atomic.Loadp(unsafe.Pointer(&block.spans[bottom])))
+ }
+ // Clear the pointer. This isn't strictly necessary, but defensively
+ // avoids accidentally re-using blocks which could lead to memory
+ // corruption. This way, we'll get a nil pointer access instead.
+ atomic.StorepNoWB(unsafe.Pointer(&block.spans[bottom]), nil)
+
+ // Increase the popped count. If we are the last possible popper
+ // in the block (note that bottom need not equal spanSetBlockEntries-1
+ // due to races) then it's our resposibility to free the block.
+ //
+ // If we increment popped to spanSetBlockEntries, we can be sure that
+ // we're the last popper for this block, and it's thus safe to free it.
+ // Every other popper must have crossed this barrier (and thus finished
+ // popping its corresponding mspan) by the time we get here. Because
+ // we're the last popper, we also don't have to worry about concurrent
+ // pushers (there can't be any). Note that we may not be the popper
+ // which claimed the last slot in the block, we're just the last one
+ // to finish popping.
+ if atomic.Xadd(&block.popped, 1) == spanSetBlockEntries {
+ // Clear the block's pointer.
+ atomic.StorepNoWB(blockp, nil)
+
+ // Return the block to the block pool.
+ spanSetBlockPool.free(block)
+ }
+ return s
+}
+
+// reset resets a spanSet which is empty. It will also clean up
+// any left over blocks.
+//
+// Throws if the buf is not empty.
+//
+// reset may not be called concurrently with any other operations
+// on the span set.
+func (b *spanSet) reset() {
+ head, tail := b.index.load().split()
+ if head < tail {
+ print("head = ", head, ", tail = ", tail, "\n")
+ throw("attempt to clear non-empty span set")
+ }
+ top := head / spanSetBlockEntries
+ if uintptr(top) < b.spineLen {
+ // If the head catches up to the tail and the set is empty,
+ // we may not clean up the block containing the head and tail
+ // since it may be pushed into again. In order to avoid leaking
+ // memory since we're going to reset the head and tail, clean
+ // up such a block now, if it exists.
+ blockp := (**spanSetBlock)(add(b.spine, sys.PtrSize*uintptr(top)))
+ block := *blockp
+ if block != nil {
+ // Sanity check the popped value.
+ if block.popped == 0 {
+ // popped should never be zero because that means we have
+ // pushed at least one value but not yet popped if this
+ // block pointer is not nil.
+ throw("span set block with unpopped elements found in reset")
+ }
+ if block.popped == spanSetBlockEntries {
+ // popped should also never be equal to spanSetBlockEntries
+ // because the last popper should have made the block pointer
+ // in this slot nil.
+ throw("fully empty unfreed span set block found in reset")
+ }
+
+ // Clear the pointer to the block.
+ atomic.StorepNoWB(unsafe.Pointer(blockp), nil)
+
+ // Return the block to the block pool.
+ spanSetBlockPool.free(block)
+ }
+ }
+ b.index.reset()
+ atomic.Storeuintptr(&b.spineLen, 0)
+}
+
+// spanSetBlockPool is a global pool of spanSetBlocks.
+var spanSetBlockPool spanSetBlockAlloc
+
+// spanSetBlockAlloc represents a concurrent pool of spanSetBlocks.
+type spanSetBlockAlloc struct {
+ stack lfstack
+}
+
+// alloc tries to grab a spanSetBlock out of the pool, and if it fails
+// persistentallocs a new one and returns it.
+func (p *spanSetBlockAlloc) alloc() *spanSetBlock {
+ if s := (*spanSetBlock)(p.stack.pop()); s != nil {
+ return s
+ }
+ return (*spanSetBlock)(persistentalloc(unsafe.Sizeof(spanSetBlock{}), cpu.CacheLineSize, &memstats.gc_sys))
+}
+
+// free returns a spanSetBlock back to the pool.
+func (p *spanSetBlockAlloc) free(block *spanSetBlock) {
+ atomic.Store(&block.popped, 0)
+ p.stack.push(&block.lfnode)
+}
+
+// haidTailIndex represents a combined 32-bit head and 32-bit tail
+// of a queue into a single 64-bit value.
+type headTailIndex uint64
+
+// makeHeadTailIndex creates a headTailIndex value from a separate
+// head and tail.
+func makeHeadTailIndex(head, tail uint32) headTailIndex {
+ return headTailIndex(uint64(head)<<32 | uint64(tail))
+}
+
+// head returns the head of a headTailIndex value.
+func (h headTailIndex) head() uint32 {
+ return uint32(h >> 32)
+}
+
+// tail returns the tail of a headTailIndex value.
+func (h headTailIndex) tail() uint32 {
+ return uint32(h)
+}
+
+// split splits the headTailIndex value into its parts.
+func (h headTailIndex) split() (head uint32, tail uint32) {
+ return h.head(), h.tail()
+}
+
+// load atomically reads a headTailIndex value.
+func (h *headTailIndex) load() headTailIndex {
+ return headTailIndex(atomic.Load64((*uint64)(h)))
+}
+
+// cas atomically compares-and-swaps a headTailIndex value.
+func (h *headTailIndex) cas(old, new headTailIndex) bool {
+ return atomic.Cas64((*uint64)(h), uint64(old), uint64(new))
+}
+
+// incHead atomically increments the head of a headTailIndex.
+func (h *headTailIndex) incHead() headTailIndex {
+ return headTailIndex(atomic.Xadd64((*uint64)(h), (1 << 32)))
+}
+
+// decHead atomically decrements the head of a headTailIndex.
+func (h *headTailIndex) decHead() headTailIndex {
+ return headTailIndex(atomic.Xadd64((*uint64)(h), -(1 << 32)))
+}
+
+// incTail atomically increments the tail of a headTailIndex.
+func (h *headTailIndex) incTail() headTailIndex {
+ ht := headTailIndex(atomic.Xadd64((*uint64)(h), +1))
+ // Check for overflow.
+ if ht.tail() == 0 {
+ print("runtime: head = ", ht.head(), ", tail = ", ht.tail(), "\n")
+ throw("headTailIndex overflow")
+ }
+ return ht
+}
+
+// reset clears the headTailIndex to (0, 0).
+func (h *headTailIndex) reset() {
+ atomic.Store64((*uint64)(h), 0)
+}
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
index 421580e..6a8a34d 100644
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -31,7 +31,7 @@
nfree uint64 // number of frees
// Statistics about malloc heap.
- // Protected by mheap.lock
+ // Updated atomically, or with the world stopped.
//
// Like MemStats, heap_sys and heap_inuse do not count memory
// in manually-managed spans.
@@ -40,19 +40,22 @@
heap_idle uint64 // bytes in idle spans
heap_inuse uint64 // bytes in mSpanInUse spans
heap_released uint64 // bytes released to the os
- heap_objects uint64 // total number of allocated objects
+
+ // heap_objects is not used by the runtime directly and instead
+ // computed on the fly by updatememstats.
+ heap_objects uint64 // total number of allocated objects
// Statistics about allocation of low-level fixed-size structures.
// Protected by FixAlloc locks.
- stacks_inuse uint64 // bytes in manually-managed stack spans
+ stacks_inuse uint64 // bytes in manually-managed stack spans; updated atomically or during STW
stacks_sys uint64 // only counts newosproc0 stack in mstats; differs from MemStats.StackSys
mspan_inuse uint64 // mspan structures
mspan_sys uint64
mcache_inuse uint64 // mcache structures
mcache_sys uint64
buckhash_sys uint64 // profiling bucket hash table
- gc_sys uint64
- other_sys uint64
+ gc_sys uint64 // updated atomically or during STW
+ other_sys uint64 // updated atomically or during STW
// Statistics about garbage collector.
// Protected by mheap or stopping the world during GC.
@@ -79,6 +82,8 @@
last_gc_nanotime uint64 // last gc (monotonic time)
tinyallocs uint64 // number of tiny allocations that didn't cause actual allocation; not exported to go directly
+ last_next_gc uint64 // next_gc for the previous GC
+ last_heap_inuse uint64 // heap_inuse at mark termination of the previous GC
// triggerRatio is the heap growth ratio that triggers marking.
//
@@ -508,6 +513,12 @@
//go:nowritebarrier
func updatememstats() {
+ // Flush mcaches to mcentral before doing anything else.
+ //
+ // Flushing to the mcentral may in general cause stats to
+ // change as mcentral data structures are manipulated.
+ systemstack(flushallmcaches)
+
memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse)
memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse)
memstats.sys = memstats.heap_sys + memstats.stacks_sys + memstats.mspan_sys +
@@ -518,7 +529,7 @@
// Calculate memory allocator stats.
// During program execution we only count number of frees and amount of freed memory.
- // Current number of alive object in the heap and amount of alive heap memory
+ // Current number of alive objects in the heap and amount of alive heap memory
// are calculated by scanning all spans.
// Total number of mallocs is calculated as number of frees plus number of alive objects.
// Similarly, total amount of allocated memory is calculated as amount of freed memory
@@ -532,9 +543,6 @@
memstats.by_size[i].nfree = 0
}
- // Flush mcache's to mcentral.
- systemstack(flushallmcaches)
-
// Aggregate local stats.
cachestats()
diff --git a/src/runtime/mwbbuf.go b/src/runtime/mwbbuf.go
index f444452..632769c 100644
--- a/src/runtime/mwbbuf.go
+++ b/src/runtime/mwbbuf.go
@@ -296,6 +296,13 @@
continue
}
mbits.setMarked()
+
+ // Mark span.
+ arena, pageIdx, pageMask := pageIndexOf(span.base())
+ if arena.pageMarks[pageIdx]&pageMask == 0 {
+ atomic.Or8(&arena.pageMarks[pageIdx], pageMask)
+ }
+
if span.spanclass.noscan() {
gcw.bytesMarked += uint64(span.elemsize)
continue
diff --git a/src/runtime/nbpipe_fcntl_libc_test.go b/src/runtime/nbpipe_fcntl_libc_test.go
new file mode 100644
index 0000000..b38c583
--- /dev/null
+++ b/src/runtime/nbpipe_fcntl_libc_test.go
@@ -0,0 +1,18 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build aix darwin solaris
+
+package runtime_test
+
+import (
+ "runtime"
+ "syscall"
+)
+
+// Call fcntl libc function rather than calling syscall.
+func fcntl(fd uintptr, cmd int, arg uintptr) (uintptr, syscall.Errno) {
+ res, errno := runtime.Fcntl(fd, uintptr(cmd), arg)
+ return res, syscall.Errno(errno)
+}
diff --git a/src/runtime/nbpipe_fcntl_unix_test.go b/src/runtime/nbpipe_fcntl_unix_test.go
new file mode 100644
index 0000000..75acdb6
--- /dev/null
+++ b/src/runtime/nbpipe_fcntl_unix_test.go
@@ -0,0 +1,17 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build dragonfly freebsd linux netbsd openbsd
+
+package runtime_test
+
+import (
+ "internal/syscall/unix"
+ "syscall"
+)
+
+func fcntl(fd uintptr, cmd int, arg uintptr) (uintptr, syscall.Errno) {
+ res, _, err := syscall.Syscall(unix.FcntlSyscall, fd, uintptr(cmd), arg)
+ return res, err
+}
diff --git a/src/runtime/nbpipe_pipe.go b/src/runtime/nbpipe_pipe.go
new file mode 100644
index 0000000..822b294
--- /dev/null
+++ b/src/runtime/nbpipe_pipe.go
@@ -0,0 +1,19 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build aix darwin dragonfly
+
+package runtime
+
+func nonblockingPipe() (r, w int32, errno int32) {
+ r, w, errno = pipe()
+ if errno != 0 {
+ return -1, -1, errno
+ }
+ closeonexec(r)
+ setNonblock(r)
+ closeonexec(w)
+ setNonblock(w)
+ return r, w, errno
+}
diff --git a/src/runtime/nbpipe_pipe2.go b/src/runtime/nbpipe_pipe2.go
new file mode 100644
index 0000000..e3639d9
--- /dev/null
+++ b/src/runtime/nbpipe_pipe2.go
@@ -0,0 +1,22 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build freebsd linux netbsd openbsd solaris
+
+package runtime
+
+func nonblockingPipe() (r, w int32, errno int32) {
+ r, w, errno = pipe2(_O_NONBLOCK | _O_CLOEXEC)
+ if errno == -_ENOSYS {
+ r, w, errno = pipe()
+ if errno != 0 {
+ return -1, -1, errno
+ }
+ closeonexec(r)
+ setNonblock(r)
+ closeonexec(w)
+ setNonblock(w)
+ }
+ return r, w, errno
+}
diff --git a/src/runtime/nbpipe_test.go b/src/runtime/nbpipe_test.go
new file mode 100644
index 0000000..d739f57
--- /dev/null
+++ b/src/runtime/nbpipe_test.go
@@ -0,0 +1,93 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris
+
+package runtime_test
+
+import (
+ "runtime"
+ "syscall"
+ "testing"
+ "unsafe"
+)
+
+func TestNonblockingPipe(t *testing.T) {
+ t.Parallel()
+
+ // NonblockingPipe is the test name for nonblockingPipe.
+ r, w, errno := runtime.NonblockingPipe()
+ if errno != 0 {
+ t.Fatal(syscall.Errno(errno))
+ }
+ defer func() {
+ runtime.Close(r)
+ runtime.Close(w)
+ }()
+
+ checkIsPipe(t, r, w)
+ checkNonblocking(t, r, "reader")
+ checkCloseonexec(t, r, "reader")
+ checkNonblocking(t, w, "writer")
+ checkCloseonexec(t, w, "writer")
+}
+
+func checkIsPipe(t *testing.T, r, w int32) {
+ bw := byte(42)
+ if n := runtime.Write(uintptr(w), unsafe.Pointer(&bw), 1); n != 1 {
+ t.Fatalf("Write(w, &b, 1) == %d, expected 1", n)
+ }
+ var br byte
+ if n := runtime.Read(r, unsafe.Pointer(&br), 1); n != 1 {
+ t.Fatalf("Read(r, &b, 1) == %d, expected 1", n)
+ }
+ if br != bw {
+ t.Errorf("pipe read %d, expected %d", br, bw)
+ }
+}
+
+func checkNonblocking(t *testing.T, fd int32, name string) {
+ t.Helper()
+ flags, errno := fcntl(uintptr(fd), syscall.F_GETFL, 0)
+ if errno != 0 {
+ t.Errorf("fcntl(%s, F_GETFL) failed: %v", name, syscall.Errno(errno))
+ } else if flags&syscall.O_NONBLOCK == 0 {
+ t.Errorf("O_NONBLOCK not set in %s flags %#x", name, flags)
+ }
+}
+
+func checkCloseonexec(t *testing.T, fd int32, name string) {
+ t.Helper()
+ flags, errno := fcntl(uintptr(fd), syscall.F_GETFD, 0)
+ if errno != 0 {
+ t.Errorf("fcntl(%s, F_GETFD) failed: %v", name, syscall.Errno(errno))
+ } else if flags&syscall.FD_CLOEXEC == 0 {
+ t.Errorf("FD_CLOEXEC not set in %s flags %#x", name, flags)
+ }
+}
+
+func TestSetNonblock(t *testing.T) {
+ t.Parallel()
+
+ r, w, errno := runtime.Pipe()
+ if errno != 0 {
+ t.Fatal(syscall.Errno(errno))
+ }
+ defer func() {
+ runtime.Close(r)
+ runtime.Close(w)
+ }()
+
+ checkIsPipe(t, r, w)
+
+ runtime.SetNonblock(r)
+ runtime.SetNonblock(w)
+ checkNonblocking(t, r, "reader")
+ checkNonblocking(t, w, "writer")
+
+ runtime.Closeonexec(r)
+ runtime.Closeonexec(w)
+ checkCloseonexec(t, r, "reader")
+ checkCloseonexec(t, w, "writer")
+}
diff --git a/src/runtime/netpoll.go b/src/runtime/netpoll.go
index 73bbc5e..34ea82a 100644
--- a/src/runtime/netpoll.go
+++ b/src/runtime/netpoll.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build aix darwin dragonfly freebsd js,wasm linux nacl netbsd openbsd solaris windows
+// +build aix darwin dragonfly freebsd js,wasm linux netbsd openbsd solaris windows
package runtime
@@ -12,12 +12,35 @@
)
// Integrated network poller (platform-independent part).
-// A particular implementation (epoll/kqueue) must define the following functions:
-// func netpollinit() // to initialize the poller
-// func netpollopen(fd uintptr, pd *pollDesc) int32 // to arm edge-triggered notifications
-// and associate fd with pd.
-// An implementation must call the following function to denote that the pd is ready.
-// func netpollready(gpp **g, pd *pollDesc, mode int32)
+// A particular implementation (epoll/kqueue/port/AIX/Windows)
+// must define the following functions:
+//
+// func netpollinit()
+// Initialize the poller. Only called once.
+//
+// func netpollopen(fd uintptr, pd *pollDesc) int32
+// Arm edge-triggered notifications for fd. The pd argument is to pass
+// back to netpollready when fd is ready. Return an errno value.
+//
+// func netpoll(delta int64) gList
+// Poll the network. If delta < 0, block indefinitely. If delta == 0,
+// poll without blocking. If delta > 0, block for up to delta nanoseconds.
+// Return a list of goroutines built by calling netpollready.
+//
+// func netpollBreak()
+// Wake up the network poller, assumed to be blocked in netpoll.
+//
+// func netpollIsPollDescriptor(fd uintptr) bool
+// Reports whether fd is a file descriptor used by the poller.
+
+// Error codes returned by runtime_pollReset and runtime_pollWait.
+// These must match the values in internal/poll/fd_poll_runtime.go.
+const (
+ pollNoError = 0 // no error
+ pollErrClosing = 1 // descriptor is closed
+ pollErrTimeout = 2 // I/O timeout
+ pollErrNotPollable = 3 // general error polling descriptor
+)
// pollDesc contains 2 binary semaphores, rg and wg, to park reader and writer
// goroutines respectively. The semaphore can be in the following states:
@@ -25,12 +48,12 @@
// a goroutine consumes the notification by changing the state to nil.
// pdWait - a goroutine prepares to park on the semaphore, but not yet parked;
// the goroutine commits to park by changing the state to G pointer,
-// or, alternatively, concurrent io notification changes the state to READY,
+// or, alternatively, concurrent io notification changes the state to pdReady,
// or, alternatively, concurrent timeout/close changes the state to nil.
// G pointer - the goroutine is blocked on the semaphore;
-// io notification or timeout/close changes the state to READY or nil respectively
+// io notification or timeout/close changes the state to pdReady or nil respectively
// and unparks the goroutine.
-// nil - nothing of the above.
+// nil - none of the above.
const (
pdReady uintptr = 1
pdWait uintptr = 2
@@ -79,15 +102,28 @@
}
var (
- netpollInited uint32
+ netpollInitLock mutex
+ netpollInited uint32
+
pollcache pollCache
netpollWaiters uint32
)
//go:linkname poll_runtime_pollServerInit internal/poll.runtime_pollServerInit
func poll_runtime_pollServerInit() {
- netpollinit()
- atomic.Store(&netpollInited, 1)
+ netpollGenericInit()
+}
+
+func netpollGenericInit() {
+ if atomic.Load(&netpollInited) == 0 {
+ lockInit(&netpollInitLock, lockRankNetpollInit)
+ lock(&netpollInitLock)
+ if netpollInited == 0 {
+ netpollinit()
+ atomic.Store(&netpollInited, 1)
+ }
+ unlock(&netpollInitLock)
+ }
}
func netpollinited() bool {
@@ -99,14 +135,7 @@
// poll_runtime_isPollServerDescriptor reports whether fd is a
// descriptor being used by netpoll.
func poll_runtime_isPollServerDescriptor(fd uintptr) bool {
- fds := netpolldescriptor()
- if GOOS != "aix" {
- return fd == fds
- } else {
- // AIX have a pipe in its netpoll implementation.
- // Therefore, two fd are returned by netpolldescriptor using a mask.
- return fd == fds&0xFFFF || fd == (fds>>16)&0xFFFF
- }
+ return netpollIsPollDescriptor(fd)
}
//go:linkname poll_runtime_pollOpen internal/poll.runtime_pollOpen
@@ -157,40 +186,47 @@
unlock(&c.lock)
}
+// poll_runtime_pollReset, which is internal/poll.runtime_pollReset,
+// prepares a descriptor for polling in mode, which is 'r' or 'w'.
+// This returns an error code; the codes are defined above.
//go:linkname poll_runtime_pollReset internal/poll.runtime_pollReset
func poll_runtime_pollReset(pd *pollDesc, mode int) int {
- err := netpollcheckerr(pd, int32(mode))
- if err != 0 {
- return err
+ errcode := netpollcheckerr(pd, int32(mode))
+ if errcode != pollNoError {
+ return errcode
}
if mode == 'r' {
pd.rg = 0
} else if mode == 'w' {
pd.wg = 0
}
- return 0
+ return pollNoError
}
+// poll_runtime_pollWait, which is internal/poll.runtime_pollWait,
+// waits for a descriptor to be ready for reading or writing,
+// according to mode, which is 'r' or 'w'.
+// This returns an error code; the codes are defined above.
//go:linkname poll_runtime_pollWait internal/poll.runtime_pollWait
func poll_runtime_pollWait(pd *pollDesc, mode int) int {
- err := netpollcheckerr(pd, int32(mode))
- if err != 0 {
- return err
+ errcode := netpollcheckerr(pd, int32(mode))
+ if errcode != pollNoError {
+ return errcode
}
// As for now only Solaris, illumos, and AIX use level-triggered IO.
if GOOS == "solaris" || GOOS == "illumos" || GOOS == "aix" {
netpollarm(pd, mode)
}
for !netpollblock(pd, int32(mode), false) {
- err = netpollcheckerr(pd, int32(mode))
- if err != 0 {
- return err
+ errcode = netpollcheckerr(pd, int32(mode))
+ if errcode != pollNoError {
+ return errcode
}
// Can happen if timeout has fired and unblocked us,
// but before we had a chance to run, timeout has been reset.
// Pretend it has not happened and retry.
}
- return 0
+ return pollNoError
}
//go:linkname poll_runtime_pollWaitCanceled internal/poll.runtime_pollWaitCanceled
@@ -232,13 +268,12 @@
if pd.rt.f == nil {
if pd.rd > 0 {
pd.rt.f = rtf
- pd.rt.when = pd.rd
// Copy current seq into the timer arg.
// Timer func will check the seq against current descriptor seq,
// if they differ the descriptor was reused or timers were reset.
pd.rt.arg = pd
pd.rt.seq = pd.rseq
- addtimer(&pd.rt)
+ resettimer(&pd.rt, pd.rd)
}
} else if pd.rd != rd0 || combo != combo0 {
pd.rseq++ // invalidate current timers
@@ -252,10 +287,9 @@
if pd.wt.f == nil {
if pd.wd > 0 && !combo {
pd.wt.f = netpollWriteDeadline
- pd.wt.when = pd.wd
pd.wt.arg = pd
pd.wt.seq = pd.wseq
- addtimer(&pd.wt)
+ resettimer(&pd.wt, pd.wd)
}
} else if pd.wd != wd0 || combo != combo0 {
pd.wseq++ // invalidate current timers
@@ -316,8 +350,13 @@
}
}
-// make pd ready, newly runnable goroutines (if any) are added to toRun.
-// May run during STW, so write barriers are not allowed.
+// netpollready is called by the platform-specific netpoll function.
+// It declares that the fd associated with pd is ready for I/O.
+// The toRun argument is used to build a list of goroutines to return
+// from netpoll. The mode argument is 'r', 'w', or 'r'+'w' to indicate
+// whether the fd is ready for reading or writing or both.
+//
+// This may run while the world is stopped, so write barriers are not allowed.
//go:nowritebarrier
func netpollready(toRun *gList, pd *pollDesc, mode int32) {
var rg, wg *g
@@ -337,18 +376,18 @@
func netpollcheckerr(pd *pollDesc, mode int32) int {
if pd.closing {
- return 1 // ErrFileClosing or ErrNetClosing
+ return pollErrClosing
}
if (mode == 'r' && pd.rd < 0) || (mode == 'w' && pd.wd < 0) {
- return 2 // ErrTimeout
+ return pollErrTimeout
}
// Report an event scanning error only on a read event.
// An error on a write event will be captured in a subsequent
// write call that is able to report a more specific error.
if mode == 'r' && pd.everr {
- return 3 // ErrNotPollable
+ return pollErrNotPollable
}
- return 0
+ return pollNoError
}
func netpollblockcommit(gp *g, gpp unsafe.Pointer) bool {
@@ -375,7 +414,7 @@
gpp = &pd.wg
}
- // set the gpp semaphore to WAIT
+ // set the gpp semaphore to pdWait
for {
old := *gpp
if old == pdReady {
@@ -390,13 +429,13 @@
}
}
- // need to recheck error states after setting gpp to WAIT
+ // need to recheck error states after setting gpp to pdWait
// this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl
// do the opposite: store to closing/rd/wd, membarrier, load of rg/wg
if waitio || netpollcheckerr(pd, mode) == 0 {
gopark(netpollblockcommit, unsafe.Pointer(gpp), waitReasonIOWait, traceEvGoBlockNet, 5)
}
- // be careful to not lose concurrent READY notification
+ // be careful to not lose concurrent pdReady notification
old := atomic.Xchguintptr(gpp, 0)
if old > pdWait {
throw("runtime: corrupted polldesc")
@@ -416,7 +455,7 @@
return nil
}
if old == 0 && !ioready {
- // Only set READY for ioready. runtime_pollWait
+ // Only set pdReady for ioready. runtime_pollWait
// will check for timeout/cancel before waiting.
return nil
}
@@ -425,7 +464,7 @@
new = pdReady
}
if atomic.Casuintptr(gpp, old, new) {
- if old == pdReady || old == pdWait {
+ if old == pdWait {
old = 0
}
return (*g)(unsafe.Pointer(old))
@@ -504,6 +543,7 @@
}
pd := c.first
c.first = pd.link
+ lockInit(&pd.lock, lockRankPollDesc)
unlock(&c.lock)
return pd
}
diff --git a/src/runtime/netpoll_aix.go b/src/runtime/netpoll_aix.go
index f0ba094..4590ed8 100644
--- a/src/runtime/netpoll_aix.go
+++ b/src/runtime/netpoll_aix.go
@@ -4,7 +4,10 @@
package runtime
-import "unsafe"
+import (
+ "runtime/internal/atomic"
+ "unsafe"
+)
// This is based on the former libgo/runtime/netpoll_select.c implementation
// except that it uses poll instead of select and is written in Go.
@@ -21,12 +24,6 @@
return int32(r), int32(err)
}
-//go:nosplit
-func fcntl(fd, cmd int32, arg uintptr) int32 {
- r, _ := syscall3(&libc_fcntl, uintptr(fd), uintptr(cmd), arg)
- return int32(r)
-}
-
// pollfd represents the poll structure for AIX operating system.
type pollfd struct {
fd int32
@@ -38,7 +35,6 @@
const _POLLOUT = 0x0002
const _POLLHUP = 0x2000
const _POLLERR = 0x4000
-const _O_NONBLOCK = 0x4
var (
pfds []pollfd
@@ -48,25 +44,18 @@
rdwake int32
wrwake int32
pendingUpdates int32
+
+ netpollWakeSig uint32 // used to avoid duplicate calls of netpollBreak
)
func netpollinit() {
- var p [2]int32
-
// Create the pipe we use to wakeup poll.
- if err := pipe(&p[0]); err < 0 {
+ r, w, errno := nonblockingPipe()
+ if errno != 0 {
throw("netpollinit: failed to create pipe")
}
- rdwake = p[0]
- wrwake = p[1]
-
- fl := uintptr(fcntl(rdwake, _F_GETFL, 0))
- fcntl(rdwake, _F_SETFL, fl|_O_NONBLOCK)
- fcntl(rdwake, _F_SETFD, _FD_CLOEXEC)
-
- fl = uintptr(fcntl(wrwake, _F_GETFL, 0))
- fcntl(wrwake, _F_SETFL, fl|_O_NONBLOCK)
- fcntl(wrwake, _F_SETFD, _FD_CLOEXEC)
+ rdwake = r
+ wrwake = w
// Pre-allocate array of pollfd structures for poll.
pfds = make([]pollfd, 1, 128)
@@ -79,12 +68,8 @@
pds[0] = nil
}
-func netpolldescriptor() uintptr {
- // Both fd must be returned
- if rdwake > 0xFFFF || wrwake > 0xFFFF {
- throw("netpolldescriptor: invalid fd number")
- }
- return uintptr(rdwake<<16 | wrwake)
+func netpollIsPollDescriptor(fd uintptr) bool {
+ return fd == uintptr(rdwake) || fd == uintptr(wrwake)
}
// netpollwakeup writes on wrwake to wakeup poll before any changes.
@@ -148,12 +133,35 @@
unlock(&mtxset)
}
+// netpollBreak interrupts a poll.
+func netpollBreak() {
+ if atomic.Cas(&netpollWakeSig, 0, 1) {
+ b := [1]byte{0}
+ write(uintptr(wrwake), unsafe.Pointer(&b[0]), 1)
+ }
+}
+
+// netpoll checks for ready network connections.
+// Returns list of goroutines that become runnable.
+// delay < 0: blocks indefinitely
+// delay == 0: does not block, just polls
+// delay > 0: block for up to that many nanoseconds
//go:nowritebarrierrec
-func netpoll(block bool) gList {
- timeout := ^uintptr(0)
- if !block {
- timeout = 0
+func netpoll(delay int64) gList {
+ var timeout uintptr
+ if delay < 0 {
+ timeout = ^uintptr(0)
+ } else if delay == 0 {
+ // TODO: call poll with timeout == 0
return gList{}
+ } else if delay < 1e6 {
+ timeout = 1
+ } else if delay < 1e15 {
+ timeout = uintptr(delay / 1e6)
+ } else {
+ // An arbitrary cap on how long to wait for a timer.
+ // 1e9 ms == ~11.5 days.
+ timeout = 1e9
}
retry:
lock(&mtxpoll)
@@ -168,20 +176,30 @@
throw("poll failed")
}
unlock(&mtxset)
+ // If a timed sleep was interrupted, just return to
+ // recalculate how long we should sleep now.
+ if timeout > 0 {
+ return gList{}
+ }
goto retry
}
// Check if some descriptors need to be changed
if n != 0 && pfds[0].revents&(_POLLIN|_POLLHUP|_POLLERR) != 0 {
- var b [1]byte
- for read(rdwake, unsafe.Pointer(&b[0]), 1) == 1 {
+ if delay != 0 {
+ // A netpollwakeup could be picked up by a
+ // non-blocking poll. Only clear the wakeup
+ // if blocking.
+ var b [1]byte
+ for read(rdwake, unsafe.Pointer(&b[0]), 1) == 1 {
+ }
+ atomic.Store(&netpollWakeSig, 0)
}
- // Do not look at the other fds in this case as the mode may have changed
- // XXX only additions of flags are made, so maybe it is ok
- unlock(&mtxset)
- goto retry
+ // Still look at the other fds even if the mode may have
+ // changed, as netpollBreak might have been called.
+ n--
}
var toRun gList
- for i := 0; i < len(pfds) && n > 0; i++ {
+ for i := 1; i < len(pfds) && n > 0; i++ {
pfd := &pfds[i]
var mode int32
@@ -203,8 +221,5 @@
}
}
unlock(&mtxset)
- if block && toRun.empty() {
- goto retry
- }
return toRun
}
diff --git a/src/runtime/netpoll_epoll.go b/src/runtime/netpoll_epoll.go
index 8f49309..58f4fa8 100644
--- a/src/runtime/netpoll_epoll.go
+++ b/src/runtime/netpoll_epoll.go
@@ -6,7 +6,10 @@
package runtime
-import "unsafe"
+import (
+ "runtime/internal/atomic"
+ "unsafe"
+)
func epollcreate(size int32) int32
func epollcreate1(flags int32) int32
@@ -20,24 +23,42 @@
var (
epfd int32 = -1 // epoll descriptor
+
+ netpollBreakRd, netpollBreakWr uintptr // for netpollBreak
+
+ netpollWakeSig uint32 // used to avoid duplicate calls of netpollBreak
)
func netpollinit() {
epfd = epollcreate1(_EPOLL_CLOEXEC)
- if epfd >= 0 {
- return
- }
- epfd = epollcreate(1024)
- if epfd >= 0 {
+ if epfd < 0 {
+ epfd = epollcreate(1024)
+ if epfd < 0 {
+ println("runtime: epollcreate failed with", -epfd)
+ throw("runtime: netpollinit failed")
+ }
closeonexec(epfd)
- return
}
- println("runtime: epollcreate failed with", -epfd)
- throw("runtime: netpollinit failed")
+ r, w, errno := nonblockingPipe()
+ if errno != 0 {
+ println("runtime: pipe failed with", -errno)
+ throw("runtime: pipe failed")
+ }
+ ev := epollevent{
+ events: _EPOLLIN,
+ }
+ *(**uintptr)(unsafe.Pointer(&ev.data)) = &netpollBreakRd
+ errno = epollctl(epfd, _EPOLL_CTL_ADD, r, &ev)
+ if errno != 0 {
+ println("runtime: epollctl failed with", -errno)
+ throw("runtime: epollctl failed")
+ }
+ netpollBreakRd = uintptr(r)
+ netpollBreakWr = uintptr(w)
}
-func netpolldescriptor() uintptr {
- return uintptr(epfd)
+func netpollIsPollDescriptor(fd uintptr) bool {
+ return fd == uintptr(epfd) || fd == netpollBreakRd || fd == netpollBreakWr
}
func netpollopen(fd uintptr, pd *pollDesc) int32 {
@@ -56,15 +77,49 @@
throw("runtime: unused")
}
-// polls for ready network connections
-// returns list of goroutines that become runnable
-func netpoll(block bool) gList {
+// netpollBreak interrupts an epollwait.
+func netpollBreak() {
+ if atomic.Cas(&netpollWakeSig, 0, 1) {
+ for {
+ var b byte
+ n := write(netpollBreakWr, unsafe.Pointer(&b), 1)
+ if n == 1 {
+ break
+ }
+ if n == -_EINTR {
+ continue
+ }
+ if n == -_EAGAIN {
+ return
+ }
+ println("runtime: netpollBreak write failed with", -n)
+ throw("runtime: netpollBreak write failed")
+ }
+ }
+}
+
+// netpoll checks for ready network connections.
+// Returns list of goroutines that become runnable.
+// delay < 0: blocks indefinitely
+// delay == 0: does not block, just polls
+// delay > 0: block for up to that many nanoseconds
+func netpoll(delay int64) gList {
if epfd == -1 {
return gList{}
}
- waitms := int32(-1)
- if !block {
+ var waitms int32
+ if delay < 0 {
+ waitms = -1
+ } else if delay == 0 {
waitms = 0
+ } else if delay < 1e6 {
+ waitms = 1
+ } else if delay < 1e15 {
+ waitms = int32(delay / 1e6)
+ } else {
+ // An arbitrary cap on how long to wait for a timer.
+ // 1e9 ms == ~11.5 days.
+ waitms = 1e9
}
var events [128]epollevent
retry:
@@ -74,6 +129,11 @@
println("runtime: epollwait on fd", epfd, "failed with", -n)
throw("runtime: netpoll failed")
}
+ // If a timed sleep was interrupted, just return to
+ // recalculate how long we should sleep now.
+ if waitms > 0 {
+ return gList{}
+ }
goto retry
}
var toRun gList
@@ -82,6 +142,23 @@
if ev.events == 0 {
continue
}
+
+ if *(**uintptr)(unsafe.Pointer(&ev.data)) == &netpollBreakRd {
+ if ev.events != _EPOLLIN {
+ println("runtime: netpoll: break fd ready for", ev.events)
+ throw("runtime: netpoll: break fd ready for something unexpected")
+ }
+ if delay != 0 {
+ // netpollBreak could be picked up by a
+ // nonblocking poll. Only read the byte
+ // if blocking.
+ var tmp [16]byte
+ read(int32(netpollBreakRd), noescape(unsafe.Pointer(&tmp[0])), int32(len(tmp)))
+ atomic.Store(&netpollWakeSig, 0)
+ }
+ continue
+ }
+
var mode int32
if ev.events&(_EPOLLIN|_EPOLLRDHUP|_EPOLLHUP|_EPOLLERR) != 0 {
mode += 'r'
@@ -98,8 +175,5 @@
netpollready(&toRun, pd, mode)
}
}
- if block && toRun.empty() {
- goto retry
- }
return toRun
}
diff --git a/src/runtime/netpoll_fake.go b/src/runtime/netpoll_fake.go
index 5b1a63a..b2af3b8 100644
--- a/src/runtime/netpoll_fake.go
+++ b/src/runtime/netpoll_fake.go
@@ -2,18 +2,18 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// Fake network poller for NaCl and wasm/js.
-// Should never be used, because NaCl and wasm/js network connections do not honor "SetNonblock".
+// Fake network poller for wasm/js.
+// Should never be used, because wasm/js network connections do not honor "SetNonblock".
-// +build nacl js,wasm
+// +build js,wasm
package runtime
func netpollinit() {
}
-func netpolldescriptor() uintptr {
- return ^uintptr(0)
+func netpollIsPollDescriptor(fd uintptr) bool {
+ return false
}
func netpollopen(fd uintptr, pd *pollDesc) int32 {
@@ -27,6 +27,9 @@
func netpollarm(pd *pollDesc, mode int) {
}
-func netpoll(block bool) gList {
+func netpollBreak() {
+}
+
+func netpoll(delay int64) gList {
return gList{}
}
diff --git a/src/runtime/netpoll_kqueue.go b/src/runtime/netpoll_kqueue.go
index a8880e8..3bd93c1 100644
--- a/src/runtime/netpoll_kqueue.go
+++ b/src/runtime/netpoll_kqueue.go
@@ -8,10 +8,17 @@
// Integrated network poller (kqueue-based implementation).
-import "unsafe"
+import (
+ "runtime/internal/atomic"
+ "unsafe"
+)
var (
kq int32 = -1
+
+ netpollBreakRd, netpollBreakWr uintptr // for netpollBreak
+
+ netpollWakeSig uint32 // used to avoid duplicate calls of netpollBreak
)
func netpollinit() {
@@ -21,10 +28,27 @@
throw("runtime: netpollinit failed")
}
closeonexec(kq)
+ r, w, errno := nonblockingPipe()
+ if errno != 0 {
+ println("runtime: pipe failed with", -errno)
+ throw("runtime: pipe failed")
+ }
+ ev := keventt{
+ filter: _EVFILT_READ,
+ flags: _EV_ADD,
+ }
+ *(*uintptr)(unsafe.Pointer(&ev.ident)) = uintptr(r)
+ n := kevent(kq, &ev, 1, nil, 0, nil)
+ if n < 0 {
+ println("runtime: kevent failed with", -n)
+ throw("runtime: kevent failed")
+ }
+ netpollBreakRd = uintptr(r)
+ netpollBreakWr = uintptr(w)
}
-func netpolldescriptor() uintptr {
- return uintptr(kq)
+func netpollIsPollDescriptor(fd uintptr) bool {
+ return fd == uintptr(kq) || fd == netpollBreakRd || fd == netpollBreakWr
}
func netpollopen(fd uintptr, pd *pollDesc) int32 {
@@ -57,15 +81,45 @@
throw("runtime: unused")
}
-// Polls for ready network connections.
+// netpollBreak interrupts a kevent.
+func netpollBreak() {
+ if atomic.Cas(&netpollWakeSig, 0, 1) {
+ for {
+ var b byte
+ n := write(netpollBreakWr, unsafe.Pointer(&b), 1)
+ if n == 1 || n == -_EAGAIN {
+ break
+ }
+ if n == -_EINTR {
+ continue
+ }
+ println("runtime: netpollBreak write failed with", -n)
+ throw("runtime: netpollBreak write failed")
+ }
+ }
+}
+
+// netpoll checks for ready network connections.
// Returns list of goroutines that become runnable.
-func netpoll(block bool) gList {
+// delay < 0: blocks indefinitely
+// delay == 0: does not block, just polls
+// delay > 0: block for up to that many nanoseconds
+func netpoll(delay int64) gList {
if kq == -1 {
return gList{}
}
var tp *timespec
var ts timespec
- if !block {
+ if delay < 0 {
+ tp = nil
+ } else if delay == 0 {
+ tp = &ts
+ } else {
+ ts.setNsec(delay)
+ if ts.tv_sec > 1e6 {
+ // Darwin returns EINVAL if the sleep time is too long.
+ ts.tv_sec = 1e6
+ }
tp = &ts
}
var events [64]keventt
@@ -76,11 +130,33 @@
println("runtime: kevent on fd", kq, "failed with", -n)
throw("runtime: netpoll failed")
}
+ // If a timed sleep was interrupted, just return to
+ // recalculate how long we should sleep now.
+ if delay > 0 {
+ return gList{}
+ }
goto retry
}
var toRun gList
for i := 0; i < int(n); i++ {
ev := &events[i]
+
+ if uintptr(ev.ident) == netpollBreakRd {
+ if ev.filter != _EVFILT_READ {
+ println("runtime: netpoll: break fd ready for", ev.filter)
+ throw("runtime: netpoll: break fd ready for something unexpected")
+ }
+ if delay != 0 {
+ // netpollBreak could be picked up by a
+ // nonblocking poll. Only read the byte
+ // if blocking.
+ var tmp [16]byte
+ read(int32(netpollBreakRd), noescape(unsafe.Pointer(&tmp[0])), int32(len(tmp)))
+ atomic.Store(&netpollWakeSig, 0)
+ }
+ continue
+ }
+
var mode int32
switch ev.filter {
case _EVFILT_READ:
@@ -110,8 +186,5 @@
netpollready(&toRun, pd, mode)
}
}
- if block && toRun.empty() {
- goto retry
- }
return toRun
}
diff --git a/src/runtime/netpoll_os_test.go b/src/runtime/netpoll_os_test.go
new file mode 100644
index 0000000..b96b9f3
--- /dev/null
+++ b/src/runtime/netpoll_os_test.go
@@ -0,0 +1,28 @@
+package runtime_test
+
+import (
+ "runtime"
+ "sync"
+ "testing"
+)
+
+var wg sync.WaitGroup
+
+func init() {
+ runtime.NetpollGenericInit()
+}
+
+func BenchmarkNetpollBreak(b *testing.B) {
+ b.StartTimer()
+ for i := 0; i < b.N; i++ {
+ for j := 0; j < 10; j++ {
+ wg.Add(1)
+ go func() {
+ runtime.NetpollBreak()
+ wg.Done()
+ }()
+ }
+ }
+ wg.Wait()
+ b.StopTimer()
+}
diff --git a/src/runtime/netpoll_solaris.go b/src/runtime/netpoll_solaris.go
index ddddb27..d217d5b 100644
--- a/src/runtime/netpoll_solaris.go
+++ b/src/runtime/netpoll_solaris.go
@@ -4,7 +4,10 @@
package runtime
-import "unsafe"
+import (
+ "runtime/internal/atomic"
+ "unsafe"
+)
// Solaris runtime-integrated network poller.
//
@@ -71,25 +74,29 @@
//go:cgo_import_dynamic libc_port_associate port_associate "libc.so"
//go:cgo_import_dynamic libc_port_dissociate port_dissociate "libc.so"
//go:cgo_import_dynamic libc_port_getn port_getn "libc.so"
+//go:cgo_import_dynamic libc_port_alert port_alert "libc.so"
//go:linkname libc_port_create libc_port_create
//go:linkname libc_port_associate libc_port_associate
//go:linkname libc_port_dissociate libc_port_dissociate
//go:linkname libc_port_getn libc_port_getn
+//go:linkname libc_port_alert libc_port_alert
var (
libc_port_create,
libc_port_associate,
libc_port_dissociate,
- libc_port_getn libcFunc
+ libc_port_getn,
+ libc_port_alert libcFunc
+ netpollWakeSig uint32 // used to avoid duplicate calls of netpollBreak
)
func errno() int32 {
return *getg().m.perrno
}
-func fcntl(fd, cmd int32, arg uintptr) int32 {
- return int32(sysvicall3(&libc_fcntl, uintptr(fd), uintptr(cmd), arg))
+func fcntl(fd, cmd, arg int32) int32 {
+ return int32(sysvicall3(&libc_fcntl, uintptr(fd), uintptr(cmd), uintptr(arg)))
}
func port_create() int32 {
@@ -108,6 +115,10 @@
return int32(sysvicall5(&libc_port_getn, uintptr(port), uintptr(unsafe.Pointer(evs)), uintptr(max), uintptr(unsafe.Pointer(nget)), uintptr(unsafe.Pointer(timeout))))
}
+func port_alert(port int32, flags, events uint32, user uintptr) int32 {
+ return int32(sysvicall4(&libc_port_alert, uintptr(port), uintptr(flags), uintptr(events), user))
+}
+
var portfd int32 = -1
func netpollinit() {
@@ -121,8 +132,8 @@
throw("runtime: netpollinit failed")
}
-func netpolldescriptor() uintptr {
- return uintptr(portfd)
+func netpollIsPollDescriptor(fd uintptr) bool {
+ return fd == uintptr(portfd)
}
func netpollopen(fd uintptr, pd *pollDesc) int32 {
@@ -178,27 +189,70 @@
unlock(&pd.lock)
}
-// polls for ready network connections
-// returns list of goroutines that become runnable
-func netpoll(block bool) gList {
+// netpollBreak interrupts a port_getn wait.
+func netpollBreak() {
+ if atomic.Cas(&netpollWakeSig, 0, 1) {
+ // Use port_alert to put portfd into alert mode.
+ // This will wake up all threads sleeping in port_getn on portfd,
+ // and cause their calls to port_getn to return immediately.
+ // Further, until portfd is taken out of alert mode,
+ // all calls to port_getn will return immediately.
+ if port_alert(portfd, _PORT_ALERT_UPDATE, _POLLHUP, uintptr(unsafe.Pointer(&portfd))) < 0 {
+ if e := errno(); e != _EBUSY {
+ println("runtime: port_alert failed with", e)
+ throw("runtime: netpoll: port_alert failed")
+ }
+ }
+ }
+}
+
+// netpoll checks for ready network connections.
+// Returns list of goroutines that become runnable.
+// delay < 0: blocks indefinitely
+// delay == 0: does not block, just polls
+// delay > 0: block for up to that many nanoseconds
+func netpoll(delay int64) gList {
if portfd == -1 {
return gList{}
}
var wait *timespec
- var zero timespec
- if !block {
- wait = &zero
+ var ts timespec
+ if delay < 0 {
+ wait = nil
+ } else if delay == 0 {
+ wait = &ts
+ } else {
+ ts.setNsec(delay)
+ if ts.tv_sec > 1e6 {
+ // An arbitrary cap on how long to wait for a timer.
+ // 1e6 s == ~11.5 days.
+ ts.tv_sec = 1e6
+ }
+ wait = &ts
}
var events [128]portevent
retry:
var n uint32 = 1
- if port_getn(portfd, &events[0], uint32(len(events)), &n, wait) < 0 {
- if e := errno(); e != _EINTR {
+ r := port_getn(portfd, &events[0], uint32(len(events)), &n, wait)
+ e := errno()
+ if r < 0 && e == _ETIME && n > 0 {
+ // As per port_getn(3C), an ETIME failure does not preclude the
+ // delivery of some number of events. Treat a timeout failure
+ // with delivered events as a success.
+ r = 0
+ }
+ if r < 0 {
+ if e != _EINTR && e != _ETIME {
print("runtime: port_getn on fd ", portfd, " failed (errno=", e, ")\n")
throw("runtime: netpoll failed")
}
+ // If a timed sleep was interrupted and there are no events,
+ // just return to recalculate how long we should sleep now.
+ if delay > 0 {
+ return gList{}
+ }
goto retry
}
@@ -206,6 +260,25 @@
for i := 0; i < int(n); i++ {
ev := &events[i]
+ if ev.portev_source == _PORT_SOURCE_ALERT {
+ if ev.portev_events != _POLLHUP || unsafe.Pointer(ev.portev_user) != unsafe.Pointer(&portfd) {
+ throw("runtime: netpoll: bad port_alert wakeup")
+ }
+ if delay != 0 {
+ // Now that a blocking call to netpoll
+ // has seen the alert, take portfd
+ // back out of alert mode.
+ // See the comment in netpollBreak.
+ if port_alert(portfd, 0, 0, 0) < 0 {
+ e := errno()
+ println("runtime: port_alert failed with", e)
+ throw("runtime: netpoll: port_alert failed")
+ }
+ atomic.Store(&netpollWakeSig, 0)
+ }
+ continue
+ }
+
if ev.portev_events == 0 {
continue
}
@@ -242,8 +315,5 @@
}
}
- if block && toRun.empty() {
- goto retry
- }
return toRun
}
diff --git a/src/runtime/netpoll_stub.go b/src/runtime/netpoll_stub.go
index f585333..f86f2f6 100644
--- a/src/runtime/netpoll_stub.go
+++ b/src/runtime/netpoll_stub.go
@@ -6,16 +6,53 @@
package runtime
+import "runtime/internal/atomic"
+
+var netpollInited uint32
var netpollWaiters uint32
+var netpollStubLock mutex
+var netpollNote note
+
+// netpollBroken, protected by netpollBrokenLock, avoids a double notewakeup.
+var netpollBrokenLock mutex
+var netpollBroken bool
+
+func netpollGenericInit() {
+ atomic.Store(&netpollInited, 1)
+}
+
+func netpollBreak() {
+ lock(&netpollBrokenLock)
+ broken := netpollBroken
+ netpollBroken = true
+ if !broken {
+ notewakeup(&netpollNote)
+ }
+ unlock(&netpollBrokenLock)
+}
+
// Polls for ready network connections.
// Returns list of goroutines that become runnable.
-func netpoll(block bool) gList {
+func netpoll(delay int64) gList {
// Implementation for platforms that do not support
// integrated network poller.
+ if delay != 0 {
+ // This lock ensures that only one goroutine tries to use
+ // the note. It should normally be completely uncontended.
+ lock(&netpollStubLock)
+
+ lock(&netpollBrokenLock)
+ noteclear(&netpollNote)
+ netpollBroken = false
+ unlock(&netpollBrokenLock)
+
+ notetsleep(&netpollNote, delay)
+ unlock(&netpollStubLock)
+ }
return gList{}
}
func netpollinited() bool {
- return false
+ return atomic.Load(&netpollInited) != 0
}
diff --git a/src/runtime/netpoll_windows.go b/src/runtime/netpoll_windows.go
index 07ef15c..4c1cd26 100644
--- a/src/runtime/netpoll_windows.go
+++ b/src/runtime/netpoll_windows.go
@@ -5,6 +5,7 @@
package runtime
import (
+ "runtime/internal/atomic"
"unsafe"
)
@@ -31,7 +32,11 @@
qty uint32
}
-var iocphandle uintptr = _INVALID_HANDLE_VALUE // completion port io handle
+var (
+ iocphandle uintptr = _INVALID_HANDLE_VALUE // completion port io handle
+
+ netpollWakeSig uint32 // used to avoid duplicate calls of netpollBreak
+)
func netpollinit() {
iocphandle = stdcall4(_CreateIoCompletionPort, _INVALID_HANDLE_VALUE, 0, 0, _DWORD_MAX)
@@ -41,8 +46,8 @@
}
}
-func netpolldescriptor() uintptr {
- return iocphandle
+func netpollIsPollDescriptor(fd uintptr) bool {
+ return fd == iocphandle
}
func netpollopen(fd uintptr, pd *pollDesc) int32 {
@@ -61,11 +66,23 @@
throw("runtime: unused")
}
-// Polls for completed network IO.
+func netpollBreak() {
+ if atomic.Cas(&netpollWakeSig, 0, 1) {
+ if stdcall4(_PostQueuedCompletionStatus, iocphandle, 0, 0, 0) == 0 {
+ println("runtime: netpoll: PostQueuedCompletionStatus failed (errno=", getlasterror(), ")")
+ throw("runtime: netpoll: PostQueuedCompletionStatus failed")
+ }
+ }
+}
+
+// netpoll checks for ready network connections.
// Returns list of goroutines that become runnable.
-func netpoll(block bool) gList {
+// delay < 0: blocks indefinitely
+// delay == 0: does not block, just polls
+// delay > 0: block for up to that many nanoseconds
+func netpoll(delay int64) gList {
var entries [64]overlappedEntry
- var wait, qty, key, flags, n, i uint32
+ var wait, qty, flags, n, i uint32
var errno int32
var op *net_op
var toRun gList
@@ -75,74 +92,62 @@
if iocphandle == _INVALID_HANDLE_VALUE {
return gList{}
}
- wait = 0
- if block {
+ if delay < 0 {
wait = _INFINITE
+ } else if delay == 0 {
+ wait = 0
+ } else if delay < 1e6 {
+ wait = 1
+ } else if delay < 1e15 {
+ wait = uint32(delay / 1e6)
+ } else {
+ // An arbitrary cap on how long to wait for a timer.
+ // 1e9 ms == ~11.5 days.
+ wait = 1e9
}
-retry:
- if _GetQueuedCompletionStatusEx != nil {
- n = uint32(len(entries) / int(gomaxprocs))
- if n < 8 {
- n = 8
- }
- if block {
- mp.blocked = true
- }
- if stdcall6(_GetQueuedCompletionStatusEx, iocphandle, uintptr(unsafe.Pointer(&entries[0])), uintptr(n), uintptr(unsafe.Pointer(&n)), uintptr(wait), 0) == 0 {
- mp.blocked = false
- errno = int32(getlasterror())
- if !block && errno == _WAIT_TIMEOUT {
- return gList{}
- }
- println("runtime: GetQueuedCompletionStatusEx failed (errno=", errno, ")")
- throw("runtime: netpoll failed")
- }
+
+ n = uint32(len(entries) / int(gomaxprocs))
+ if n < 8 {
+ n = 8
+ }
+ if delay != 0 {
+ mp.blocked = true
+ }
+ if stdcall6(_GetQueuedCompletionStatusEx, iocphandle, uintptr(unsafe.Pointer(&entries[0])), uintptr(n), uintptr(unsafe.Pointer(&n)), uintptr(wait), 0) == 0 {
mp.blocked = false
- for i = 0; i < n; i++ {
- op = entries[i].op
+ errno = int32(getlasterror())
+ if errno == _WAIT_TIMEOUT {
+ return gList{}
+ }
+ println("runtime: GetQueuedCompletionStatusEx failed (errno=", errno, ")")
+ throw("runtime: netpoll failed")
+ }
+ mp.blocked = false
+ for i = 0; i < n; i++ {
+ op = entries[i].op
+ if op != nil {
errno = 0
qty = 0
if stdcall5(_WSAGetOverlappedResult, op.pd.fd, uintptr(unsafe.Pointer(op)), uintptr(unsafe.Pointer(&qty)), 0, uintptr(unsafe.Pointer(&flags))) == 0 {
errno = int32(getlasterror())
}
handlecompletion(&toRun, op, errno, qty)
- }
- } else {
- op = nil
- errno = 0
- qty = 0
- if block {
- mp.blocked = true
- }
- if stdcall5(_GetQueuedCompletionStatus, iocphandle, uintptr(unsafe.Pointer(&qty)), uintptr(unsafe.Pointer(&key)), uintptr(unsafe.Pointer(&op)), uintptr(wait)) == 0 {
- mp.blocked = false
- errno = int32(getlasterror())
- if !block && errno == _WAIT_TIMEOUT {
- return gList{}
+ } else {
+ atomic.Store(&netpollWakeSig, 0)
+ if delay == 0 {
+ // Forward the notification to the
+ // blocked poller.
+ netpollBreak()
}
- if op == nil {
- println("runtime: GetQueuedCompletionStatus failed (errno=", errno, ")")
- throw("runtime: netpoll failed")
- }
- // dequeued failed IO packet, so report that
}
- mp.blocked = false
- handlecompletion(&toRun, op, errno, qty)
- }
- if block && toRun.empty() {
- goto retry
}
return toRun
}
func handlecompletion(toRun *gList, op *net_op, errno int32, qty uint32) {
- if op == nil {
- println("runtime: GetQueuedCompletionStatus returned op == nil")
- throw("runtime: netpoll failed")
- }
mode := op.mode
if mode != 'r' && mode != 'w' {
- println("runtime: GetQueuedCompletionStatus returned invalid mode=", mode)
+ println("runtime: GetQueuedCompletionStatusEx returned invalid mode=", mode)
throw("runtime: netpoll failed")
}
op.errno = errno
diff --git a/src/runtime/os2_aix.go b/src/runtime/os2_aix.go
index 162d93e..31ac6dd 100644
--- a/src/runtime/os2_aix.go
+++ b/src/runtime/os2_aix.go
@@ -6,7 +6,7 @@
// Pollset syscalls are in netpoll_aix.go.
// The implementation is based on Solaris and Windows.
// Each syscall is made by calling its libc symbol using asmcgocall and asmsyscall6
-// asssembly functions.
+// assembly functions.
package runtime
@@ -38,6 +38,7 @@
//go:cgo_import_dynamic libc_madvise madvise "libc.a/shr_64.o"
//go:cgo_import_dynamic libc_malloc malloc "libc.a/shr_64.o"
//go:cgo_import_dynamic libc_mmap mmap "libc.a/shr_64.o"
+//go:cgo_import_dynamic libc_mprotect mprotect "libc.a/shr_64.o"
//go:cgo_import_dynamic libc_munmap munmap "libc.a/shr_64.o"
//go:cgo_import_dynamic libc_open open "libc.a/shr_64.o"
//go:cgo_import_dynamic libc_pipe pipe "libc.a/shr_64.o"
@@ -64,6 +65,8 @@
//go:cgo_import_dynamic libpthread_attr_setstackaddr pthread_attr_setstackaddr "libpthread.a/shr_xpg5_64.o"
//go:cgo_import_dynamic libpthread_create pthread_create "libpthread.a/shr_xpg5_64.o"
//go:cgo_import_dynamic libpthread_sigthreadmask sigthreadmask "libpthread.a/shr_xpg5_64.o"
+//go:cgo_import_dynamic libpthread_self pthread_self "libpthread.a/shr_xpg5_64.o"
+//go:cgo_import_dynamic libpthread_kill pthread_kill "libpthread.a/shr_xpg5_64.o"
//go:linkname libc__Errno libc__Errno
//go:linkname libc_clock_gettime libc_clock_gettime
@@ -75,6 +78,7 @@
//go:linkname libc_madvise libc_madvise
//go:linkname libc_malloc libc_malloc
//go:linkname libc_mmap libc_mmap
+//go:linkname libc_mprotect libc_mprotect
//go:linkname libc_munmap libc_munmap
//go:linkname libc_open libc_open
//go:linkname libc_pipe libc_pipe
@@ -101,6 +105,8 @@
//go:linkname libpthread_attr_setstackaddr libpthread_attr_setstackaddr
//go:linkname libpthread_create libpthread_create
//go:linkname libpthread_sigthreadmask libpthread_sigthreadmask
+//go:linkname libpthread_self libpthread_self
+//go:linkname libpthread_kill libpthread_kill
var (
//libc
@@ -114,6 +120,7 @@
libc_madvise,
libc_malloc,
libc_mmap,
+ libc_mprotect,
libc_munmap,
libc_open,
libc_pipe,
@@ -139,7 +146,9 @@
libpthread_attr_setdetachstate,
libpthread_attr_setstackaddr,
libpthread_create,
- libpthread_sigthreadmask libFunc
+ libpthread_sigthreadmask,
+ libpthread_self,
+ libpthread_kill libFunc
)
type libFunc uintptr
@@ -390,25 +399,32 @@
exit1(code)
}
-func write1(fd, p uintptr, n int32) int32
+func write2(fd, p uintptr, n int32) int32
//go:nosplit
-func write(fd uintptr, p unsafe.Pointer, n int32) int32 {
+func write1(fd uintptr, p unsafe.Pointer, n int32) int32 {
_g_ := getg()
// Check the validity of g because without a g during
// newosproc0.
if _g_ != nil {
- r, _ := syscall3(&libc_write, uintptr(fd), uintptr(p), uintptr(n))
+ r, errno := syscall3(&libc_write, uintptr(fd), uintptr(p), uintptr(n))
+ if int32(r) < 0 {
+ return -int32(errno)
+ }
return int32(r)
}
- return write1(fd, uintptr(p), n)
+ // Note that in this case we can't return a valid errno value.
+ return write2(fd, uintptr(p), n)
}
//go:nosplit
func read(fd int32, p unsafe.Pointer, n int32) int32 {
- r, _ := syscall3(&libc_read, uintptr(fd), uintptr(p), uintptr(n))
+ r, errno := syscall3(&libc_read, uintptr(fd), uintptr(p), uintptr(n))
+ if int32(r) < 0 {
+ return -int32(errno)
+ }
return int32(r)
}
@@ -425,9 +441,10 @@
}
//go:nosplit
-func pipe(fd *int32) int32 {
- r, _ := syscall1(&libc_pipe, uintptr(unsafe.Pointer(fd)))
- return int32(r)
+func pipe() (r, w int32, errno int32) {
+ var p [2]int32
+ _, err := syscall1(&libc_pipe, uintptr(noescape(unsafe.Pointer(&p[0]))))
+ return p[0], p[1], int32(err)
}
// mmap calls the mmap system call.
@@ -445,6 +462,15 @@
}
//go:nosplit
+func mprotect(addr unsafe.Pointer, n uintptr, prot int32) (unsafe.Pointer, int) {
+ r, err0 := syscall3(&libc_mprotect, uintptr(addr), uintptr(n), uintptr(prot))
+ if r == ^uintptr(0) {
+ return nil, int(err0)
+ }
+ return unsafe.Pointer(r), int(err0)
+}
+
+//go:nosplit
func munmap(addr unsafe.Pointer, n uintptr) {
r, err := syscall2(&libc_munmap, uintptr(addr), uintptr(n))
if int32(r) == -1 {
@@ -716,3 +742,14 @@
sigprocmask1(uintptr(how), uintptr(unsafe.Pointer(new)), uintptr(unsafe.Pointer(old)))
}
+
+//go:nosplit
+func pthread_self() pthread {
+ r, _ := syscall0(&libpthread_self)
+ return pthread(r)
+}
+
+//go:nosplit
+func signalM(mp *m, sig int) {
+ syscall2(&libpthread_kill, uintptr(pthread(mp.procid)), uintptr(sig))
+}
diff --git a/src/runtime/os2_nacl.go b/src/runtime/os2_nacl.go
deleted file mode 100644
index b84cb18..0000000
--- a/src/runtime/os2_nacl.go
+++ /dev/null
@@ -1,155 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-const (
- _NSIG = 32
- _SI_USER = 1
-
- // native_client/src/trusted/service_runtime/include/sys/errno.h
- // The errors are mainly copied from Linux.
- _EPERM = 1 /* Operation not permitted */
- _ENOENT = 2 /* No such file or directory */
- _ESRCH = 3 /* No such process */
- _EINTR = 4 /* Interrupted system call */
- _EIO = 5 /* I/O error */
- _ENXIO = 6 /* No such device or address */
- _E2BIG = 7 /* Argument list too long */
- _ENOEXEC = 8 /* Exec format error */
- _EBADF = 9 /* Bad file number */
- _ECHILD = 10 /* No child processes */
- _EAGAIN = 11 /* Try again */
- // _ENOMEM is defined in mem_bsd.go for nacl.
- // _ENOMEM = 12 /* Out of memory */
- _EACCES = 13 /* Permission denied */
- _EFAULT = 14 /* Bad address */
- _EBUSY = 16 /* Device or resource busy */
- _EEXIST = 17 /* File exists */
- _EXDEV = 18 /* Cross-device link */
- _ENODEV = 19 /* No such device */
- _ENOTDIR = 20 /* Not a directory */
- _EISDIR = 21 /* Is a directory */
- _EINVAL = 22 /* Invalid argument */
- _ENFILE = 23 /* File table overflow */
- _EMFILE = 24 /* Too many open files */
- _ENOTTY = 25 /* Not a typewriter */
- _EFBIG = 27 /* File too large */
- _ENOSPC = 28 /* No space left on device */
- _ESPIPE = 29 /* Illegal seek */
- _EROFS = 30 /* Read-only file system */
- _EMLINK = 31 /* Too many links */
- _EPIPE = 32 /* Broken pipe */
- _ENAMETOOLONG = 36 /* File name too long */
- _ENOSYS = 38 /* Function not implemented */
- _EDQUOT = 122 /* Quota exceeded */
- _EDOM = 33 /* Math arg out of domain of func */
- _ERANGE = 34 /* Math result not representable */
- _EDEADLK = 35 /* Deadlock condition */
- _ENOLCK = 37 /* No record locks available */
- _ENOTEMPTY = 39 /* Directory not empty */
- _ELOOP = 40 /* Too many symbolic links */
- _ENOMSG = 42 /* No message of desired type */
- _EIDRM = 43 /* Identifier removed */
- _ECHRNG = 44 /* Channel number out of range */
- _EL2NSYNC = 45 /* Level 2 not synchronized */
- _EL3HLT = 46 /* Level 3 halted */
- _EL3RST = 47 /* Level 3 reset */
- _ELNRNG = 48 /* Link number out of range */
- _EUNATCH = 49 /* Protocol driver not attached */
- _ENOCSI = 50 /* No CSI structure available */
- _EL2HLT = 51 /* Level 2 halted */
- _EBADE = 52 /* Invalid exchange */
- _EBADR = 53 /* Invalid request descriptor */
- _EXFULL = 54 /* Exchange full */
- _ENOANO = 55 /* No anode */
- _EBADRQC = 56 /* Invalid request code */
- _EBADSLT = 57 /* Invalid slot */
- _EDEADLOCK = _EDEADLK /* File locking deadlock error */
- _EBFONT = 59 /* Bad font file fmt */
- _ENOSTR = 60 /* Device not a stream */
- _ENODATA = 61 /* No data (for no delay io) */
- _ETIME = 62 /* Timer expired */
- _ENOSR = 63 /* Out of streams resources */
- _ENONET = 64 /* Machine is not on the network */
- _ENOPKG = 65 /* Package not installed */
- _EREMOTE = 66 /* The object is remote */
- _ENOLINK = 67 /* The link has been severed */
- _EADV = 68 /* Advertise error */
- _ESRMNT = 69 /* Srmount error */
- _ECOMM = 70 /* Communication error on send */
- _EPROTO = 71 /* Protocol error */
- _EMULTIHOP = 72 /* Multihop attempted */
- _EDOTDOT = 73 /* Cross mount point (not really error) */
- _EBADMSG = 74 /* Trying to read unreadable message */
- _EOVERFLOW = 75 /* Value too large for defined data type */
- _ENOTUNIQ = 76 /* Given log. name not unique */
- _EBADFD = 77 /* f.d. invalid for this operation */
- _EREMCHG = 78 /* Remote address changed */
- _ELIBACC = 79 /* Can't access a needed shared lib */
- _ELIBBAD = 80 /* Accessing a corrupted shared lib */
- _ELIBSCN = 81 /* .lib section in a.out corrupted */
- _ELIBMAX = 82 /* Attempting to link in too many libs */
- _ELIBEXEC = 83 /* Attempting to exec a shared library */
- _EILSEQ = 84
- _EUSERS = 87
- _ENOTSOCK = 88 /* Socket operation on non-socket */
- _EDESTADDRREQ = 89 /* Destination address required */
- _EMSGSIZE = 90 /* Message too long */
- _EPROTOTYPE = 91 /* Protocol wrong type for socket */
- _ENOPROTOOPT = 92 /* Protocol not available */
- _EPROTONOSUPPORT = 93 /* Unknown protocol */
- _ESOCKTNOSUPPORT = 94 /* Socket type not supported */
- _EOPNOTSUPP = 95 /* Operation not supported on transport endpoint */
- _EPFNOSUPPORT = 96 /* Protocol family not supported */
- _EAFNOSUPPORT = 97 /* Address family not supported by protocol family */
- _EADDRINUSE = 98 /* Address already in use */
- _EADDRNOTAVAIL = 99 /* Address not available */
- _ENETDOWN = 100 /* Network interface is not configured */
- _ENETUNREACH = 101 /* Network is unreachable */
- _ENETRESET = 102
- _ECONNABORTED = 103 /* Connection aborted */
- _ECONNRESET = 104 /* Connection reset by peer */
- _ENOBUFS = 105 /* No buffer space available */
- _EISCONN = 106 /* Socket is already connected */
- _ENOTCONN = 107 /* Socket is not connected */
- _ESHUTDOWN = 108 /* Can't send after socket shutdown */
- _ETOOMANYREFS = 109
- _ETIMEDOUT = 110 /* Connection timed out */
- _ECONNREFUSED = 111 /* Connection refused */
- _EHOSTDOWN = 112 /* Host is down */
- _EHOSTUNREACH = 113 /* Host is unreachable */
- _EALREADY = 114 /* Socket already connected */
- _EINPROGRESS = 115 /* Connection already in progress */
- _ESTALE = 116
- _ENOTSUP = _EOPNOTSUPP /* Not supported */
- _ENOMEDIUM = 123 /* No medium (in tape drive) */
- _ECANCELED = 125 /* Operation canceled. */
- _ELBIN = 2048 /* Inode is remote (not really error) */
- _EFTYPE = 2049 /* Inappropriate file type or format */
- _ENMFILE = 2050 /* No more files */
- _EPROCLIM = 2051
- _ENOSHARE = 2052 /* No such host or network path */
- _ECASECLASH = 2053 /* Filename exists with different case */
- _EWOULDBLOCK = _EAGAIN /* Operation would block */
-
- // native_client/src/trusted/service_runtime/include/bits/mman.h.
- // NOTE: DO NOT USE native_client/src/shared/imc/nacl_imc_c.h.
- // Those MAP_*values are different from these.
- _PROT_NONE = 0x0
- _PROT_READ = 0x1
- _PROT_WRITE = 0x2
- _PROT_EXEC = 0x4
-
- _MAP_SHARED = 0x1
- _MAP_PRIVATE = 0x2
- _MAP_FIXED = 0x10
- _MAP_ANON = 0x20
-
- _MADV_FREE = 0
- _SIGFPE = 8
- _FPE_INTDIV = 0
-)
-
-type siginfo struct{}
diff --git a/src/runtime/os3_solaris.go b/src/runtime/os3_solaris.go
index b5a11e8..d6e36fb 100644
--- a/src/runtime/os3_solaris.go
+++ b/src/runtime/os3_solaris.go
@@ -29,6 +29,8 @@
//go:cgo_import_dynamic libc_pthread_attr_setdetachstate pthread_attr_setdetachstate "libc.so"
//go:cgo_import_dynamic libc_pthread_attr_setstack pthread_attr_setstack "libc.so"
//go:cgo_import_dynamic libc_pthread_create pthread_create "libc.so"
+//go:cgo_import_dynamic libc_pthread_self pthread_self "libc.so"
+//go:cgo_import_dynamic libc_pthread_kill pthread_kill "libc.so"
//go:cgo_import_dynamic libc_raise raise "libc.so"
//go:cgo_import_dynamic libc_read read "libc.so"
//go:cgo_import_dynamic libc_select select "libc.so"
@@ -44,6 +46,8 @@
//go:cgo_import_dynamic libc_sysconf sysconf "libc.so"
//go:cgo_import_dynamic libc_usleep usleep "libc.so"
//go:cgo_import_dynamic libc_write write "libc.so"
+//go:cgo_import_dynamic libc_pipe pipe "libc.so"
+//go:cgo_import_dynamic libc_pipe2 pipe2 "libc.so"
//go:linkname libc____errno libc____errno
//go:linkname libc_clock_gettime libc_clock_gettime
@@ -61,6 +65,8 @@
//go:linkname libc_pthread_attr_setdetachstate libc_pthread_attr_setdetachstate
//go:linkname libc_pthread_attr_setstack libc_pthread_attr_setstack
//go:linkname libc_pthread_create libc_pthread_create
+//go:linkname libc_pthread_self libc_pthread_self
+//go:linkname libc_pthread_kill libc_pthread_kill
//go:linkname libc_raise libc_raise
//go:linkname libc_read libc_read
//go:linkname libc_select libc_select
@@ -76,6 +82,8 @@
//go:linkname libc_sysconf libc_sysconf
//go:linkname libc_usleep libc_usleep
//go:linkname libc_write libc_write
+//go:linkname libc_pipe libc_pipe
+//go:linkname libc_pipe2 libc_pipe2
var (
libc____errno,
@@ -94,6 +102,8 @@
libc_pthread_attr_setdetachstate,
libc_pthread_attr_setstack,
libc_pthread_create,
+ libc_pthread_self,
+ libc_pthread_kill,
libc_raise,
libc_read,
libc_sched_yield,
@@ -108,19 +118,13 @@
libc_sigprocmask,
libc_sysconf,
libc_usleep,
- libc_write libcFunc
+ libc_write,
+ libc_pipe,
+ libc_pipe2 libcFunc
)
var sigset_all = sigset{[4]uint32{^uint32(0), ^uint32(0), ^uint32(0), ^uint32(0)}}
-func getncpu() int32 {
- n := int32(sysconf(__SC_NPROCESSORS_ONLN))
- if n < 1 {
- return 1
- }
- return n
-}
-
func getPageSize() uintptr {
n := int32(sysconf(__SC_PAGESIZE))
if n <= 0 {
@@ -214,6 +218,8 @@
asmcgocall(unsafe.Pointer(funcPC(miniterrno)), unsafe.Pointer(&libc____errno))
minitSignals()
+
+ getg().m.procid = uint64(pthread_self())
}
// Called from dropm to undo the effect of an minit.
@@ -393,11 +399,16 @@
sysvicall2(&libc_munmap, uintptr(addr), uintptr(n))
}
-func nanotime1()
+const (
+ _CLOCK_REALTIME = 3
+ _CLOCK_MONOTONIC = 4
+)
//go:nosplit
-func nanotime() int64 {
- return int64(sysvicall0((*libcFunc)(unsafe.Pointer(funcPC(nanotime1)))))
+func nanotime1() int64 {
+ var ts mts
+ sysvicall2(&libc_clock_gettime, _CLOCK_MONOTONIC, uintptr(unsafe.Pointer(&ts)))
+ return ts.tv_sec*1e9 + ts.tv_nsec
}
//go:nosplit
@@ -429,6 +440,14 @@
return int32(sysvicall4(&libc_pthread_create, uintptr(unsafe.Pointer(thread)), uintptr(unsafe.Pointer(attr)), uintptr(fn), uintptr(arg)))
}
+func pthread_self() pthread {
+ return pthread(sysvicall0(&libc_pthread_self))
+}
+
+func signalM(mp *m, sig int) {
+ sysvicall2(&libc_pthread_kill, uintptr(pthread(mp.procid)), uintptr(sig))
+}
+
//go:nosplit
//go:nowritebarrierrec
func raise(sig uint32) /* int32 */ {
@@ -442,7 +461,11 @@
//go:nosplit
func read(fd int32, buf unsafe.Pointer, nbyte int32) int32 {
- return int32(sysvicall3(&libc_read, uintptr(fd), uintptr(buf), uintptr(nbyte)))
+ r1, err := sysvicall3Err(&libc_read, uintptr(fd), uintptr(buf), uintptr(nbyte))
+ if c := int32(r1); c >= 0 {
+ return c
+ }
+ return -int32(err)
}
//go:nosplit
@@ -498,9 +521,44 @@
usleep1(µs)
}
+func walltime1() (sec int64, nsec int32) {
+ var ts mts
+ sysvicall2(&libc_clock_gettime, _CLOCK_REALTIME, uintptr(unsafe.Pointer(&ts)))
+ return ts.tv_sec, int32(ts.tv_nsec)
+}
+
//go:nosplit
-func write(fd uintptr, buf unsafe.Pointer, nbyte int32) int32 {
- return int32(sysvicall3(&libc_write, uintptr(fd), uintptr(buf), uintptr(nbyte)))
+func write1(fd uintptr, buf unsafe.Pointer, nbyte int32) int32 {
+ r1, err := sysvicall3Err(&libc_write, fd, uintptr(buf), uintptr(nbyte))
+ if c := int32(r1); c >= 0 {
+ return c
+ }
+ return -int32(err)
+}
+
+//go:nosplit
+func pipe() (r, w int32, errno int32) {
+ var p [2]int32
+ _, e := sysvicall1Err(&libc_pipe, uintptr(noescape(unsafe.Pointer(&p))))
+ return p[0], p[1], int32(e)
+}
+
+//go:nosplit
+func pipe2(flags int32) (r, w int32, errno int32) {
+ var p [2]int32
+ _, e := sysvicall2Err(&libc_pipe2, uintptr(noescape(unsafe.Pointer(&p))), uintptr(flags))
+ return p[0], p[1], int32(e)
+}
+
+//go:nosplit
+func closeonexec(fd int32) {
+ fcntl(fd, _F_SETFD, _FD_CLOEXEC)
+}
+
+//go:nosplit
+func setNonblock(fd int32) {
+ flags := fcntl(fd, _F_GETFL, 0)
+ fcntl(fd, _F_SETFL, flags|_O_NONBLOCK)
}
func osyield1()
diff --git a/src/runtime/os_aix.go b/src/runtime/os_aix.go
index 197869f..9a6b8ae 100644
--- a/src/runtime/os_aix.go
+++ b/src/runtime/os_aix.go
@@ -175,6 +175,7 @@
func minit() {
miniterrno()
minitSignals()
+ getg().m.procid = uint64(pthread_self())
}
func unminit() {
@@ -323,7 +324,7 @@
)
//go:nosplit
-func nanotime() int64 {
+func nanotime1() int64 {
tp := ×pec{}
if clock_gettime(_CLOCK_REALTIME, tp) != 0 {
throw("syscall clock_gettime failed")
@@ -331,7 +332,7 @@
return tp.tv_sec*1000000000 + tp.tv_nsec
}
-func walltime() (sec int64, nsec int32) {
+func walltime1() (sec int64, nsec int32) {
ts := ×pec{}
if clock_gettime(_CLOCK_REALTIME, ts) != 0 {
throw("syscall clock_gettime failed")
@@ -357,3 +358,20 @@
cpu.HWCap2 |= cpu.PPC_FEATURE2_ARCH_3_00
}
}
+
+//go:nosplit
+func fcntl(fd, cmd, arg int32) int32 {
+ r, _ := syscall3(&libc_fcntl, uintptr(fd), uintptr(cmd), uintptr(arg))
+ return int32(r)
+}
+
+//go:nosplit
+func closeonexec(fd int32) {
+ fcntl(fd, _F_SETFD, _FD_CLOEXEC)
+}
+
+//go:nosplit
+func setNonblock(fd int32) {
+ flags := fcntl(fd, _F_GETFL, 0)
+ fcntl(fd, _F_SETFL, flags|_O_NONBLOCK)
+}
diff --git a/src/runtime/os_darwin.go b/src/runtime/os_darwin.go
index 1614b66..01c40b4 100644
--- a/src/runtime/os_darwin.go
+++ b/src/runtime/os_darwin.go
@@ -289,20 +289,21 @@
// Called to initialize a new m (including the bootstrap m).
// Called on the new thread, cannot allocate memory.
func minit() {
- // The alternate signal stack is buggy on arm and arm64.
+ // The alternate signal stack is buggy on arm64.
// The signal handler handles it directly.
- if GOARCH != "arm" && GOARCH != "arm64" {
+ if GOARCH != "arm64" {
minitSignalStack()
}
minitSignalMask()
+ getg().m.procid = uint64(pthread_self())
}
// Called from dropm to undo the effect of an minit.
//go:nosplit
func unminit() {
- // The alternate signal stack is buggy on arm and arm64.
+ // The alternate signal stack is buggy on arm64.
// See minit.
- if GOARCH != "arm" && GOARCH != "arm64" {
+ if GOARCH != "arm64" {
unminitSignals()
}
}
@@ -406,3 +407,7 @@
executablePath = executablePath[len(prefix):]
}
}
+
+func signalM(mp *m, sig int) {
+ pthread_kill(pthread(mp.procid), uint32(sig))
+}
diff --git a/src/runtime/os_darwin_arm.go b/src/runtime/os_darwin_arm.go
deleted file mode 100644
index ee1bd17..0000000
--- a/src/runtime/os_darwin_arm.go
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-func checkgoarm() {
- // TODO(minux): FP checks like in os_linux_arm.go.
-
- // osinit not called yet, so ncpu not set: must use getncpu directly.
- if getncpu() > 1 && goarm < 7 {
- print("runtime: this system has multiple CPUs and must use\n")
- print("atomic synchronization instructions. Recompile using GOARM=7.\n")
- exit(1)
- }
-}
-
-//go:nosplit
-func cputicks() int64 {
- // Currently cputicks() is used in blocking profiler and to seed runtime·fastrand().
- // runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
- // TODO: need more entropy to better seed fastrand.
- return nanotime()
-}
diff --git a/src/runtime/os_darwin_arm64.go b/src/runtime/os_darwin_arm64.go
index 8de132d..b808150 100644
--- a/src/runtime/os_darwin_arm64.go
+++ b/src/runtime/os_darwin_arm64.go
@@ -8,6 +8,5 @@
func cputicks() int64 {
// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand().
// runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
- // TODO: need more entropy to better seed fastrand.
return nanotime()
}
diff --git a/src/runtime/os_dragonfly.go b/src/runtime/os_dragonfly.go
index 4fda7ea..6578fcb 100644
--- a/src/runtime/os_dragonfly.go
+++ b/src/runtime/os_dragonfly.go
@@ -38,9 +38,11 @@
//go:noescape
func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
-func raise(sig uint32)
func raiseproc(sig uint32)
+func lwp_gettid() int32
+func lwp_kill(pid, tid int32, sig int)
+
//go:noescape
func sys_umtx_sleep(addr *uint32, val, timeout int32) int32
@@ -54,6 +56,9 @@
//go:noescape
func kevent(kq int32, ch *keventt, nch int32, ev *keventt, nev int32, ts *timespec) int32
func closeonexec(fd int32)
+func setNonblock(fd int32)
+
+func pipe() (r, w int32, errno int32)
const stackSystem = 0
@@ -148,7 +153,7 @@
start_func: funcPC(lwp_start),
arg: unsafe.Pointer(mp),
stack: uintptr(stk),
- tid1: unsafe.Pointer(&mp.procid),
+ tid1: nil, // minit will record tid
tid2: nil,
}
@@ -188,10 +193,7 @@
// Called to initialize a new m (including the bootstrap m).
// Called on the new thread, cannot allocate memory.
func minit() {
- // m.procid is a uint64, but lwp_start writes an int32. Fix it up.
- _g_ := getg()
- _g_.m.procid = uint64(*(*int32)(unsafe.Pointer(&_g_.m.procid)))
-
+ getg().m.procid = uint64(lwp_gettid())
minitSignals()
}
@@ -285,3 +287,17 @@
}
}
}
+
+// raise sends a signal to the calling thread.
+//
+// It must be nosplit because it is used by the signal handler before
+// it definitely has a Go stack.
+//
+//go:nosplit
+func raise(sig uint32) {
+ lwp_kill(-1, lwp_gettid(), int(sig))
+}
+
+func signalM(mp *m, sig int) {
+ lwp_kill(-1, int32(mp.procid), sig)
+}
diff --git a/src/runtime/os_freebsd.go b/src/runtime/os_freebsd.go
index cbb72cf..730973a 100644
--- a/src/runtime/os_freebsd.go
+++ b/src/runtime/os_freebsd.go
@@ -26,9 +26,11 @@
//go:noescape
func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
-func raise(sig uint32)
func raiseproc(sig uint32)
+func thr_self() thread
+func thr_kill(tid thread, sig int)
+
//go:noescape
func sys_umtx_op(addr *uint32, mode int32, val uint32, uaddr1 uintptr, ut *umtx_time) int32
@@ -38,7 +40,11 @@
//go:noescape
func kevent(kq int32, ch *keventt, nch int32, ev *keventt, nev int32, ts *timespec) int32
+
+func pipe() (r, w int32, errno int32)
+func pipe2(flags int32) (r, w int32, errno int32)
func closeonexec(fd int32)
+func setNonblock(fd int32)
// From FreeBSD's <sys/sysctl.h>
const (
@@ -194,7 +200,7 @@
arg: unsafe.Pointer(mp),
stack_base: mp.g0.stack.lo,
stack_size: uintptr(stk) - mp.g0.stack.lo,
- child_tid: unsafe.Pointer(&mp.procid),
+ child_tid: nil, // minit will record tid
parent_tid: nil,
tls_base: unsafe.Pointer(&mp.tls[0]),
tls_size: unsafe.Sizeof(mp.tls),
@@ -230,7 +236,7 @@
arg: nil,
stack_base: uintptr(stack), //+stacksize?
stack_size: stacksize,
- child_tid: unsafe.Pointer(&m0.procid),
+ child_tid: nil, // minit will record tid
parent_tid: nil,
tls_base: unsafe.Pointer(&m0.tls[0]),
tls_size: unsafe.Sizeof(m0.tls),
@@ -289,12 +295,7 @@
// Called to initialize a new m (including the bootstrap m).
// Called on the new thread, cannot allocate memory.
func minit() {
- // m.procid is a uint64, but thr_new writes a uint32 on 32-bit systems.
- // Fix it up. (Only matters on big-endian, but be clean anyway.)
- if sys.PtrSize == 4 {
- _g_ := getg()
- _g_.m.procid = uint64(*(*uint32)(unsafe.Pointer(&_g_.m.procid)))
- }
+ getg().m.procid = uint64(thr_self())
// On FreeBSD before about April 2017 there was a bug such
// that calling execve from a thread other than the main
@@ -422,3 +423,17 @@
// asmSigaction is implemented in assembly.
//go:noescape
func asmSigaction(sig uintptr, new, old *sigactiont) int32
+
+// raise sends a signal to the calling thread.
+//
+// It must be nosplit because it is used by the signal handler before
+// it definitely has a Go stack.
+//
+//go:nosplit
+func raise(sig uint32) {
+ thr_kill(thr_self(), int(sig))
+}
+
+func signalM(mp *m, sig int) {
+ thr_kill(thread(mp.procid), sig)
+}
diff --git a/src/runtime/os_freebsd_arm.go b/src/runtime/os_freebsd_arm.go
index 3edd381..3feaa5e 100644
--- a/src/runtime/os_freebsd_arm.go
+++ b/src/runtime/os_freebsd_arm.go
@@ -44,6 +44,5 @@
func cputicks() int64 {
// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand().
// runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
- // TODO: need more entropy to better seed fastrand.
return nanotime()
}
diff --git a/src/runtime/os_freebsd_arm64.go b/src/runtime/os_freebsd_arm64.go
new file mode 100644
index 0000000..51ebf9d
--- /dev/null
+++ b/src/runtime/os_freebsd_arm64.go
@@ -0,0 +1,155 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "internal/cpu"
+
+const (
+ hwcap_FP = 1 << 0
+ hwcap_ASIMD = 1 << 1
+ hwcap_EVTSTRM = 1 << 2
+ hwcap_AES = 1 << 3
+ hwcap_PMULL = 1 << 4
+ hwcap_SHA1 = 1 << 5
+ hwcap_SHA2 = 1 << 6
+ hwcap_CRC32 = 1 << 7
+ hwcap_ATOMICS = 1 << 8
+ hwcap_FPHP = 1 << 9
+ hwcap_ASIMDHP = 1 << 10
+ hwcap_CPUID = 1 << 11
+ hwcap_ASIMDRDM = 1 << 12
+ hwcap_JSCVT = 1 << 13
+ hwcap_FCMA = 1 << 14
+ hwcap_LRCPC = 1 << 15
+ hwcap_DCPOP = 1 << 16
+ hwcap_SHA3 = 1 << 17
+ hwcap_SM3 = 1 << 18
+ hwcap_SM4 = 1 << 19
+ hwcap_ASIMDDP = 1 << 20
+ hwcap_SHA512 = 1 << 21
+ hwcap_SVE = 1 << 22
+ hwcap_ASIMDFHM = 1 << 23
+)
+
+func getisar0() uint64
+func getisar1() uint64
+func getpfr0() uint64
+
+// no hwcap support on FreeBSD aarch64, we need to retrieve the info from
+// ID_AA64ISAR0_EL1, ID_AA64ISAR1_EL1 and ID_AA64PFR0_EL1
+func archauxv(tag, val uintptr) {
+ var isar0, isar1, pfr0 uint64
+
+ isar0 = getisar0()
+ isar1 = getisar1()
+ pfr0 = getpfr0()
+
+ // ID_AA64ISAR0_EL1
+ switch extractBits(isar0, 4, 7) {
+ case 1:
+ cpu.HWCap |= hwcap_AES
+ case 2:
+ cpu.HWCap |= hwcap_PMULL | hwcap_AES
+ }
+
+ switch extractBits(isar0, 8, 11) {
+ case 1:
+ cpu.HWCap |= hwcap_SHA1
+ }
+
+ switch extractBits(isar0, 12, 15) {
+ case 1:
+ cpu.HWCap |= hwcap_SHA2
+ case 2:
+ cpu.HWCap |= hwcap_SHA2 | hwcap_SHA512
+ }
+
+ switch extractBits(isar0, 16, 19) {
+ case 1:
+ cpu.HWCap |= hwcap_CRC32
+ }
+
+ switch extractBits(isar0, 20, 23) {
+ case 2:
+ cpu.HWCap |= hwcap_ATOMICS
+ }
+
+ switch extractBits(isar0, 28, 31) {
+ case 1:
+ cpu.HWCap |= hwcap_ASIMDRDM
+ }
+
+ switch extractBits(isar0, 32, 35) {
+ case 1:
+ cpu.HWCap |= hwcap_SHA3
+ }
+
+ switch extractBits(isar0, 36, 39) {
+ case 1:
+ cpu.HWCap |= hwcap_SM3
+ }
+
+ switch extractBits(isar0, 40, 43) {
+ case 1:
+ cpu.HWCap |= hwcap_SM4
+ }
+
+ switch extractBits(isar0, 44, 47) {
+ case 1:
+ cpu.HWCap |= hwcap_ASIMDDP
+ }
+
+ // ID_AA64ISAR1_EL1
+ switch extractBits(isar1, 0, 3) {
+ case 1:
+ cpu.HWCap |= hwcap_DCPOP
+ }
+
+ switch extractBits(isar1, 12, 15) {
+ case 1:
+ cpu.HWCap |= hwcap_JSCVT
+ }
+
+ switch extractBits(isar1, 16, 19) {
+ case 1:
+ cpu.HWCap |= hwcap_FCMA
+ }
+
+ switch extractBits(isar1, 20, 23) {
+ case 1:
+ cpu.HWCap |= hwcap_LRCPC
+ }
+
+ // ID_AA64PFR0_EL1
+ switch extractBits(pfr0, 16, 19) {
+ case 0:
+ cpu.HWCap |= hwcap_FP
+ case 1:
+ cpu.HWCap |= hwcap_FP | hwcap_FPHP
+ }
+
+ switch extractBits(pfr0, 20, 23) {
+ case 0:
+ cpu.HWCap |= hwcap_ASIMD
+ case 1:
+ cpu.HWCap |= hwcap_ASIMD | hwcap_ASIMDHP
+ }
+
+ switch extractBits(pfr0, 32, 35) {
+ case 1:
+ cpu.HWCap |= hwcap_SVE
+ }
+}
+
+func extractBits(data uint64, start, end uint) uint {
+ return (uint)(data>>start) & ((1 << (end - start + 1)) - 1)
+}
+
+//go:nosplit
+func cputicks() int64 {
+ // Currently cputicks() is used in blocking profiler and to seed fastrand().
+ // nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
+ return nanotime()
+}
diff --git a/src/runtime/os_freebsd_noauxv.go b/src/runtime/os_freebsd_noauxv.go
index 01efb9b..c6a4992 100644
--- a/src/runtime/os_freebsd_noauxv.go
+++ b/src/runtime/os_freebsd_noauxv.go
@@ -3,7 +3,7 @@
// license that can be found in the LICENSE file.
// +build freebsd
-// +build !arm
+// +build !arm,!arm64
package runtime
diff --git a/src/runtime/os_illumos.go b/src/runtime/os_illumos.go
new file mode 100644
index 0000000..c3c3e4e
--- /dev/null
+++ b/src/runtime/os_illumos.go
@@ -0,0 +1,132 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+ "unsafe"
+)
+
+//go:cgo_import_dynamic libc_getrctl getrctl "libc.so"
+//go:cgo_import_dynamic libc_rctlblk_get_local_action rctlblk_get_local_action "libc.so"
+//go:cgo_import_dynamic libc_rctlblk_get_local_flags rctlblk_get_local_flags "libc.so"
+//go:cgo_import_dynamic libc_rctlblk_get_value rctlblk_get_value "libc.so"
+//go:cgo_import_dynamic libc_rctlblk_size rctlblk_size "libc.so"
+
+//go:linkname libc_getrctl libc_getrctl
+//go:linkname libc_rctlblk_get_local_action libc_rctlblk_get_local_action
+//go:linkname libc_rctlblk_get_local_flags libc_rctlblk_get_local_flags
+//go:linkname libc_rctlblk_get_value libc_rctlblk_get_value
+//go:linkname libc_rctlblk_size libc_rctlblk_size
+
+var (
+ libc_getrctl,
+ libc_rctlblk_get_local_action,
+ libc_rctlblk_get_local_flags,
+ libc_rctlblk_get_value,
+ libc_rctlblk_size libcFunc
+)
+
+// Return the minimum value seen for the zone CPU cap, or 0 if no cap is
+// detected.
+func getcpucap() uint64 {
+ // The resource control block is an opaque object whose size is only
+ // known to libc. In practice, given the contents, it is unlikely to
+ // grow beyond 8KB so we'll use a static buffer of that size here.
+ const rblkmaxsize = 8 * 1024
+ if rctlblk_size() > rblkmaxsize {
+ return 0
+ }
+
+ // The "zone.cpu-cap" resource control, as described in
+ // resource_controls(5), "sets a limit on the amount of CPU time that
+ // can be used by a zone. The unit used is the percentage of a single
+ // CPU that can be used by all user threads in a zone, expressed as an
+ // integer." A C string of the name must be passed to getrctl(2).
+ name := []byte("zone.cpu-cap\x00")
+
+ // To iterate over the list of values for a particular resource
+ // control, we need two blocks: one for the previously read value and
+ // one for the next value.
+ var rblk0 [rblkmaxsize]byte
+ var rblk1 [rblkmaxsize]byte
+ rblk := &rblk0[0]
+ rblkprev := &rblk1[0]
+
+ var flag uint32 = _RCTL_FIRST
+ var capval uint64 = 0
+
+ for {
+ if getrctl(unsafe.Pointer(&name[0]), unsafe.Pointer(rblkprev), unsafe.Pointer(rblk), flag) != 0 {
+ // The end of the sequence is reported as an ENOENT
+ // failure, but determining the CPU cap is not critical
+ // here. We'll treat any failure as if it were the end
+ // of sequence.
+ break
+ }
+
+ lflags := rctlblk_get_local_flags(unsafe.Pointer(rblk))
+ action := rctlblk_get_local_action(unsafe.Pointer(rblk))
+ if (lflags&_RCTL_LOCAL_MAXIMAL) == 0 && action == _RCTL_LOCAL_DENY {
+ // This is a finite (not maximal) value representing a
+ // cap (deny) action.
+ v := rctlblk_get_value(unsafe.Pointer(rblk))
+ if capval == 0 || capval > v {
+ capval = v
+ }
+ }
+
+ // Swap the blocks around so that we can fetch the next value
+ t := rblk
+ rblk = rblkprev
+ rblkprev = t
+ flag = _RCTL_NEXT
+ }
+
+ return capval
+}
+
+func getncpu() int32 {
+ n := int32(sysconf(__SC_NPROCESSORS_ONLN))
+ if n < 1 {
+ return 1
+ }
+
+ if cents := int32(getcpucap()); cents > 0 {
+ // Convert from a percentage of CPUs to a number of CPUs,
+ // rounding up to make use of a fractional CPU
+ // e.g., 336% becomes 4 CPUs
+ ncap := (cents + 99) / 100
+ if ncap < n {
+ return ncap
+ }
+ }
+
+ return n
+}
+
+//go:nosplit
+func getrctl(controlname, oldbuf, newbuf unsafe.Pointer, flags uint32) uintptr {
+ return sysvicall4(&libc_getrctl, uintptr(controlname), uintptr(oldbuf), uintptr(newbuf), uintptr(flags))
+}
+
+//go:nosplit
+func rctlblk_get_local_action(buf unsafe.Pointer) uintptr {
+ return sysvicall2(&libc_rctlblk_get_local_action, uintptr(buf), uintptr(0))
+}
+
+//go:nosplit
+func rctlblk_get_local_flags(buf unsafe.Pointer) uintptr {
+ return sysvicall1(&libc_rctlblk_get_local_flags, uintptr(buf))
+}
+
+//go:nosplit
+func rctlblk_get_value(buf unsafe.Pointer) uint64 {
+ return uint64(sysvicall1(&libc_rctlblk_get_value, uintptr(buf)))
+}
+
+//go:nosplit
+func rctlblk_size() uintptr {
+ return sysvicall0(&libc_rctlblk_size)
+}
diff --git a/src/runtime/os_js.go b/src/runtime/os_js.go
index ad6db18..ff0ee3a 100644
--- a/src/runtime/os_js.go
+++ b/src/runtime/os_js.go
@@ -12,7 +12,7 @@
func exit(code int32)
-func write(fd uintptr, p unsafe.Pointer, n int32) int32 {
+func write1(fd uintptr, p unsafe.Pointer, n int32) int32 {
if fd > 2 {
throw("runtime.write to fd > 2 is unsupported")
}
@@ -131,7 +131,6 @@
func cputicks() int64 {
// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand().
// runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
- // TODO: need more entropy to better seed fastrand.
return nanotime()
}
@@ -143,3 +142,9 @@
// gsignalStack is unused on js.
type gsignalStack struct{}
+
+const preemptMSupported = false
+
+func preemptM(mp *m) {
+ // No threads, so nothing to do.
+}
diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go
index d4a9bd4..7b95ff2 100644
--- a/src/runtime/os_linux.go
+++ b/src/runtime/os_linux.go
@@ -5,6 +5,7 @@
package runtime
import (
+ "runtime/internal/atomic"
"runtime/internal/sys"
"unsafe"
)
@@ -116,6 +117,13 @@
_CLONE_NEWUTS = 0x4000000
_CLONE_NEWIPC = 0x8000000
+ // As of QEMU 2.8.0 (5ea2fc84d), user emulation requires all six of these
+ // flags to be set when creating a thread; attempts to share the other
+ // five but leave SYSVSEM unshared will fail with -EINVAL.
+ //
+ // In non-QEMU environments CLONE_SYSVSEM is inconsequential as we do not
+ // use System V semaphores.
+
cloneFlags = _CLONE_VM | /* share memory */
_CLONE_FS | /* share cwd, etc */
_CLONE_FILES | /* share fd table */
@@ -269,13 +277,14 @@
if fd < 0 {
return 0
}
- n := read(fd, noescape(unsafe.Pointer(&numbuf[0])), int32(len(numbuf)))
+ ptr := noescape(unsafe.Pointer(&numbuf[0]))
+ n := read(fd, ptr, int32(len(numbuf)))
closefd(fd)
if n <= 0 {
return 0
}
- l := n - 1 // remove trailing newline
- v, ok := atoi(slicebytetostringtmp(numbuf[:l]))
+ n-- // remove trailing newline
+ v, ok := atoi(slicebytetostringtmp((*byte)(ptr), int(n)))
if !ok || v < 0 {
v = 0
}
@@ -289,6 +298,7 @@
func osinit() {
ncpu = getproccount()
physHugePageSize = getHugePageSize()
+ osArchInit()
}
var urandom_dev = []byte("/dev/urandom\x00")
@@ -318,11 +328,20 @@
initsig(true)
}
+// gsignalInitQuirk, if non-nil, is called for every allocated gsignal G.
+//
+// TODO(austin): Remove this after Go 1.15 when we remove the
+// mlockGsignal workaround.
+var gsignalInitQuirk func(gsignal *g)
+
// Called to initialize a new m (including the bootstrap m).
// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
func mpreinit(mp *m) {
mp.gsignal = malg(32 * 1024) // Linux wants >= 2K
mp.gsignal.m = mp
+ if gsignalInitQuirk != nil {
+ gsignalInitQuirk(mp.gsignal)
+ }
}
func gettid() uint32
@@ -332,7 +351,9 @@
func minit() {
minitSignals()
- // for debuggers, in case cgo created the thread
+ // Cgo-created threads and the bootstrap m are missing a
+ // procid. We need this for asynchronous preemption and it's
+ // useful in debuggers.
getg().m.procid = uint64(gettid())
}
@@ -372,6 +393,10 @@
func sched_getaffinity(pid, len uintptr, buf *byte) int32
func osyield()
+func pipe() (r, w int32, errno int32)
+func pipe2(flags int32) (r, w int32, errno int32)
+func setNonblock(fd int32)
+
//go:nosplit
//go:nowritebarrierrec
func setsig(i uint32, fn uintptr) {
@@ -452,3 +477,25 @@
// rt_sigaction is implemented in assembly.
//go:noescape
func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32
+
+func getpid() int
+func tgkill(tgid, tid, sig int)
+
+// touchStackBeforeSignal stores an errno value. If non-zero, it means
+// that we should touch the signal stack before sending a signal.
+// This is used on systems that have a bug when the signal stack must
+// be faulted in. See #35777 and #37436.
+//
+// This is accessed atomically as it is set and read in different threads.
+//
+// TODO(austin): Remove this after Go 1.15 when we remove the
+// mlockGsignal workaround.
+var touchStackBeforeSignal uint32
+
+// signalM sends a signal to mp.
+func signalM(mp *m, sig int) {
+ if atomic.Load(&touchStackBeforeSignal) != 0 {
+ atomic.Cas((*uint32)(unsafe.Pointer(mp.gsignal.stack.hi-4)), 0, 0)
+ }
+ tgkill(getpid(), int(mp.procid), sig)
+}
diff --git a/src/runtime/os_linux_arm.go b/src/runtime/os_linux_arm.go
index 207b0e4..b590da7 100644
--- a/src/runtime/os_linux_arm.go
+++ b/src/runtime/os_linux_arm.go
@@ -11,8 +11,6 @@
_HWCAP_VFPv3 = 1 << 13 // introduced in 2.6.30
)
-var randomNumber uint32
-
func checkgoarm() {
// On Android, /proc/self/auxv might be unreadable and hwcap won't
// reflect the CPU capabilities. Assume that every Android arm device
@@ -34,13 +32,6 @@
func archauxv(tag, val uintptr) {
switch tag {
- case _AT_RANDOM:
- // sysargs filled in startupRandomData, but that
- // pointer may not be word aligned, so we must treat
- // it as a byte array.
- randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
- uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
-
case _AT_HWCAP:
cpu.HWCap = uint(val)
case _AT_HWCAP2:
@@ -48,10 +39,11 @@
}
}
+func osArchInit() {}
+
//go:nosplit
func cputicks() int64 {
// Currently cputicks() is used in blocking profiler and to seed fastrand().
// nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
- // randomNumber provides better seeding of fastrand.
- return nanotime() + int64(randomNumber)
+ return nanotime()
}
diff --git a/src/runtime/os_linux_arm64.go b/src/runtime/os_linux_arm64.go
index 2d6f68b..19968dc 100644
--- a/src/runtime/os_linux_arm64.go
+++ b/src/runtime/os_linux_arm64.go
@@ -8,17 +8,8 @@
import "internal/cpu"
-var randomNumber uint32
-
func archauxv(tag, val uintptr) {
switch tag {
- case _AT_RANDOM:
- // sysargs filled in startupRandomData, but that
- // pointer may not be word aligned, so we must treat
- // it as a byte array.
- randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
- uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
-
case _AT_HWCAP:
// arm64 doesn't have a 'cpuid' instruction equivalent and relies on
// HWCAP/HWCAP2 bits for hardware capabilities.
@@ -36,10 +27,11 @@
}
}
+func osArchInit() {}
+
//go:nosplit
func cputicks() int64 {
// Currently cputicks() is used in blocking profiler and to seed fastrand().
// nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
- // randomNumber provides better seeding of fastrand.
- return nanotime() + int64(randomNumber)
+ return nanotime()
}
diff --git a/src/runtime/os_linux_mips64x.go b/src/runtime/os_linux_mips64x.go
index 0d7b84d..4ff66f9 100644
--- a/src/runtime/os_linux_mips64x.go
+++ b/src/runtime/os_linux_mips64x.go
@@ -7,25 +7,22 @@
package runtime
-var randomNumber uint32
+import "internal/cpu"
func archauxv(tag, val uintptr) {
switch tag {
- case _AT_RANDOM:
- // sysargs filled in startupRandomData, but that
- // pointer may not be word aligned, so we must treat
- // it as a byte array.
- randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
- uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
+ case _AT_HWCAP:
+ cpu.HWCap = uint(val)
}
}
+func osArchInit() {}
+
//go:nosplit
func cputicks() int64 {
// Currently cputicks() is used in blocking profiler and to seed fastrand().
// nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
- // randomNumber provides better seeding of fastrand.
- return nanotime() + int64(randomNumber)
+ return nanotime()
}
const (
diff --git a/src/runtime/os_linux_mipsx.go b/src/runtime/os_linux_mipsx.go
index e0548ec..87962ed 100644
--- a/src/runtime/os_linux_mipsx.go
+++ b/src/runtime/os_linux_mipsx.go
@@ -7,25 +7,16 @@
package runtime
-var randomNumber uint32
-
func archauxv(tag, val uintptr) {
- switch tag {
- case _AT_RANDOM:
- // sysargs filled in startupRandomData, but that
- // pointer may not be word aligned, so we must treat
- // it as a byte array.
- randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
- uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
- }
}
+func osArchInit() {}
+
//go:nosplit
func cputicks() int64 {
// Currently cputicks() is used in blocking profiler and to seed fastrand().
// nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
- // randomNumber provides better seeding of fastrand1.
- return nanotime() + int64(randomNumber)
+ return nanotime()
}
const (
diff --git a/src/runtime/os_linux_novdso.go b/src/runtime/os_linux_novdso.go
index e54c1c4..155f415 100644
--- a/src/runtime/os_linux_novdso.go
+++ b/src/runtime/os_linux_novdso.go
@@ -3,7 +3,7 @@
// license that can be found in the LICENSE file.
// +build linux
-// +build !386,!amd64,!arm,!arm64,!ppc64,!ppc64le
+// +build !386,!amd64,!arm,!arm64,!mips64,!mips64le,!ppc64,!ppc64le
package runtime
diff --git a/src/runtime/os_linux_ppc64x.go b/src/runtime/os_linux_ppc64x.go
index cc79cc4..3aedc23 100644
--- a/src/runtime/os_linux_ppc64x.go
+++ b/src/runtime/os_linux_ppc64x.go
@@ -20,3 +20,5 @@
cpu.HWCap2 = uint(val)
}
}
+
+func osArchInit() {}
diff --git a/src/runtime/os_linux_riscv64.go b/src/runtime/os_linux_riscv64.go
new file mode 100644
index 0000000..9be88a5
--- /dev/null
+++ b/src/runtime/os_linux_riscv64.go
@@ -0,0 +1,7 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+func osArchInit() {}
diff --git a/src/runtime/os_linux_s390x.go b/src/runtime/os_linux_s390x.go
index 55d35c7..ee18fd1 100644
--- a/src/runtime/os_linux_s390x.go
+++ b/src/runtime/os_linux_s390x.go
@@ -17,3 +17,5 @@
cpu.S390X.HasVX = val&_HWCAP_S390_VX != 0
}
}
+
+func osArchInit() {}
diff --git a/src/runtime/os_linux_x86.go b/src/runtime/os_linux_x86.go
new file mode 100644
index 0000000..d001e6e
--- /dev/null
+++ b/src/runtime/os_linux_x86.go
@@ -0,0 +1,93 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux
+// +build 386 amd64
+
+package runtime
+
+import "runtime/internal/atomic"
+
+//go:noescape
+func uname(utsname *new_utsname) int
+
+func mlock(addr, len uintptr) int
+
+func osArchInit() {
+ // Linux 5.2 introduced a bug that can corrupt vector
+ // registers on return from a signal if the signal stack isn't
+ // faulted in:
+ // https://bugzilla.kernel.org/show_bug.cgi?id=205663
+ //
+ // It was fixed in 5.3.15, 5.4.2, and all 5.5 and later
+ // kernels.
+ //
+ // If we're on an affected kernel, work around this issue by
+ // mlocking the top page of every signal stack. This doesn't
+ // help for signal stacks created in C, but there's not much
+ // we can do about that.
+ //
+ // TODO(austin): Remove this in Go 1.15, at which point it
+ // will be unlikely to encounter any of the affected kernels
+ // in the wild.
+
+ var uts new_utsname
+ if uname(&uts) < 0 {
+ throw("uname failed")
+ }
+ // Check for null terminator to ensure gostringnocopy doesn't
+ // walk off the end of the release string.
+ found := false
+ for _, b := range uts.release {
+ if b == 0 {
+ found = true
+ break
+ }
+ }
+ if !found {
+ return
+ }
+ rel := gostringnocopy(&uts.release[0])
+
+ major, minor, patch, ok := parseRelease(rel)
+ if !ok {
+ return
+ }
+
+ if major == 5 && (minor == 2 || minor == 3 && patch < 15 || minor == 4 && patch < 2) {
+ gsignalInitQuirk = mlockGsignal
+ if m0.gsignal != nil {
+ throw("gsignal quirk too late")
+ }
+ throwReportQuirk = throwBadKernel
+ }
+}
+
+func mlockGsignal(gsignal *g) {
+ if atomic.Load(&touchStackBeforeSignal) != 0 {
+ // mlock has already failed, don't try again.
+ return
+ }
+
+ // This mlock call may fail, but we don't report the failure.
+ // Instead, if something goes badly wrong, we rely on prepareSignalM
+ // and throwBadKernel to do further mitigation and to report a problem
+ // to the user if mitigation fails. This is because many
+ // systems have a limit on the total mlock size, and many kernels
+ // that appear to have bad versions are actually patched to avoid the
+ // bug described above. We want Go 1.14 to run on those systems.
+ // See #37436.
+ if errno := mlock(gsignal.stack.hi-physPageSize, physPageSize); errno < 0 {
+ atomic.Store(&touchStackBeforeSignal, uint32(-errno))
+ }
+}
+
+// throwBadKernel is called, via throwReportQuirk, by throw.
+func throwBadKernel() {
+ if errno := atomic.Load(&touchStackBeforeSignal); errno != 0 {
+ println("runtime: note: your Linux kernel may be buggy")
+ println("runtime: note: see https://golang.org/wiki/LinuxKernelSignalVectorBug")
+ println("runtime: note: mlock workaround for kernel bug failed with errno", errno)
+ }
+}
diff --git a/src/runtime/os_nacl.go b/src/runtime/os_nacl.go
deleted file mode 100644
index 2b9a1cf..0000000
--- a/src/runtime/os_nacl.go
+++ /dev/null
@@ -1,328 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import "unsafe"
-
-type mOS struct {
- waitsema int32 // semaphore for parking on locks
- waitsemacount int32
- waitsemalock int32
-}
-
-func nacl_exception_stack(p uintptr, size int32) int32
-func nacl_exception_handler(fn uintptr, arg unsafe.Pointer) int32
-func nacl_sem_create(flag int32) int32
-func nacl_sem_wait(sem int32) int32
-func nacl_sem_post(sem int32) int32
-func nacl_mutex_create(flag int32) int32
-func nacl_mutex_lock(mutex int32) int32
-func nacl_mutex_trylock(mutex int32) int32
-func nacl_mutex_unlock(mutex int32) int32
-func nacl_cond_create(flag int32) int32
-func nacl_cond_wait(cond, n int32) int32
-func nacl_cond_signal(cond int32) int32
-func nacl_cond_broadcast(cond int32) int32
-
-//go:noescape
-func nacl_cond_timed_wait_abs(cond, lock int32, ts *timespec) int32
-func nacl_thread_create(fn uintptr, stk, tls, xx unsafe.Pointer) int32
-
-//go:noescape
-func nacl_nanosleep(ts, extra *timespec) int32
-func nanotime() int64
-func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (p unsafe.Pointer, err int)
-func exit(code int32)
-func osyield()
-
-//go:noescape
-func write(fd uintptr, p unsafe.Pointer, n int32) int32
-
-//go:linkname os_sigpipe os.sigpipe
-func os_sigpipe() {
- throw("too many writes on closed pipe")
-}
-
-func dieFromSignal(sig uint32) {
- exit(2)
-}
-
-func sigpanic() {
- g := getg()
- if !canpanic(g) {
- throw("unexpected signal during runtime execution")
- }
-
- // Native Client only invokes the exception handler for memory faults.
- g.sig = _SIGSEGV
- panicmem()
-}
-
-func raiseproc(sig uint32) {
-}
-
-// Stubs so tests can link correctly. These should never be called.
-func open(name *byte, mode, perm int32) int32
-func closefd(fd int32) int32
-func read(fd int32, p unsafe.Pointer, n int32) int32
-
-type sigset struct{}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
-func mpreinit(mp *m) {
- mp.gsignal = malg(32 * 1024)
- mp.gsignal.m = mp
-}
-
-func sigtramp(ctxt byte)
-
-//go:nosplit
-func msigsave(mp *m) {
-}
-
-//go:nosplit
-func msigrestore(sigmask sigset) {
-}
-
-//go:nosplit
-//go:nowritebarrierrec
-func clearSignalHandlers() {
-}
-
-//go:nosplit
-func sigblock() {
-}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the new thread, cannot allocate memory.
-func minit() {
- _g_ := getg()
-
- // Initialize signal handling
- ret := nacl_exception_stack(_g_.m.gsignal.stack.lo, 32*1024)
- if ret < 0 {
- print("runtime: nacl_exception_stack: error ", -ret, "\n")
- }
-
- ret = nacl_exception_handler(funcPC(sigtramp), nil)
- if ret < 0 {
- print("runtime: nacl_exception_handler: error ", -ret, "\n")
- }
-}
-
-// Called from dropm to undo the effect of an minit.
-func unminit() {
-}
-
-func osinit() {
- ncpu = 1
- getg().m.procid = 2
- //nacl_exception_handler(funcPC(sigtramp), nil);
- physPageSize = 65536
-}
-
-func signame(sig uint32) string {
- if sig >= uint32(len(sigtable)) {
- return ""
- }
- return sigtable[sig].name
-}
-
-//go:nosplit
-func crash() {
- *(*int32)(nil) = 0
-}
-
-//go:noescape
-func getRandomData([]byte)
-
-func goenvs() {
- goenvs_unix()
-}
-
-func initsig(preinit bool) {
-}
-
-//go:nosplit
-func usleep(us uint32) {
- var ts timespec
-
- ts.tv_sec = int64(us / 1e6)
- ts.tv_nsec = int32(us%1e6) * 1e3
- nacl_nanosleep(&ts, nil)
-}
-
-func mstart_nacl()
-
-// May run with m.p==nil, so write barriers are not allowed.
-//go:nowritebarrier
-func newosproc(mp *m) {
- stk := unsafe.Pointer(mp.g0.stack.hi)
- mp.tls[0] = uintptr(unsafe.Pointer(mp.g0))
- mp.tls[1] = uintptr(unsafe.Pointer(mp))
- ret := nacl_thread_create(funcPC(mstart_nacl), stk, unsafe.Pointer(&mp.tls[2]), nil)
- if ret < 0 {
- print("nacl_thread_create: error ", -ret, "\n")
- throw("newosproc")
- }
-}
-
-//go:noescape
-func exitThread(wait *uint32)
-
-//go:nosplit
-func semacreate(mp *m) {
- if mp.waitsema != 0 {
- return
- }
- systemstack(func() {
- mu := nacl_mutex_create(0)
- if mu < 0 {
- print("nacl_mutex_create: error ", -mu, "\n")
- throw("semacreate")
- }
- c := nacl_cond_create(0)
- if c < 0 {
- print("nacl_cond_create: error ", -c, "\n")
- throw("semacreate")
- }
- mp.waitsema = c
- mp.waitsemalock = mu
- })
-}
-
-//go:nosplit
-func semasleep(ns int64) int32 {
- var ret int32
- systemstack(func() {
- _g_ := getg()
- if nacl_mutex_lock(_g_.m.waitsemalock) < 0 {
- throw("semasleep")
- }
- var ts timespec
- if ns >= 0 {
- end := ns + nanotime()
- ts.tv_sec = end / 1e9
- ts.tv_nsec = int32(end % 1e9)
- }
- for _g_.m.waitsemacount == 0 {
- if ns < 0 {
- if nacl_cond_wait(_g_.m.waitsema, _g_.m.waitsemalock) < 0 {
- throw("semasleep")
- }
- } else {
- r := nacl_cond_timed_wait_abs(_g_.m.waitsema, _g_.m.waitsemalock, &ts)
- if r == -_ETIMEDOUT {
- nacl_mutex_unlock(_g_.m.waitsemalock)
- ret = -1
- return
- }
- if r < 0 {
- throw("semasleep")
- }
- }
- }
-
- _g_.m.waitsemacount = 0
- nacl_mutex_unlock(_g_.m.waitsemalock)
- ret = 0
- })
- return ret
-}
-
-//go:nosplit
-func semawakeup(mp *m) {
- systemstack(func() {
- if nacl_mutex_lock(mp.waitsemalock) < 0 {
- throw("semawakeup")
- }
- if mp.waitsemacount != 0 {
- throw("semawakeup")
- }
- mp.waitsemacount = 1
- nacl_cond_signal(mp.waitsema)
- nacl_mutex_unlock(mp.waitsemalock)
- })
-}
-
-// This runs on a foreign stack, without an m or a g. No stack split.
-//go:nosplit
-//go:norace
-//go:nowritebarrierrec
-func badsignal(sig uintptr) {
- cgocallback(unsafe.Pointer(funcPC(badsignalgo)), noescape(unsafe.Pointer(&sig)), unsafe.Sizeof(sig), 0)
-}
-
-func badsignalgo(sig uintptr) {
- if !sigsend(uint32(sig)) {
- // A foreign thread received the signal sig, and the
- // Go code does not want to handle it.
- raisebadsignal(uint32(sig))
- }
-}
-
-// This runs on a foreign stack, without an m or a g. No stack split.
-//go:nosplit
-func badsignal2() {
- write(2, unsafe.Pointer(&badsignal1[0]), int32(len(badsignal1)))
- exit(2)
-}
-
-var badsignal1 = []byte("runtime: signal received on thread not created by Go.\n")
-
-func raisebadsignal(sig uint32) {
- badsignal2()
-}
-
-func madvise(addr unsafe.Pointer, n uintptr, flags int32) {}
-func munmap(addr unsafe.Pointer, n uintptr) {}
-func setProcessCPUProfiler(hz int32) {}
-func setThreadCPUProfiler(hz int32) {}
-func sigdisable(uint32) {}
-func sigenable(uint32) {}
-func sigignore(uint32) {}
-func closeonexec(int32) {}
-
-// gsignalStack is unused on nacl.
-type gsignalStack struct{}
-
-var writelock uint32 // test-and-set spin lock for write
-
-// lastfaketime stores the last faketime value written to fd 1 or 2.
-var lastfaketime int64
-
-// lastfaketimefd stores the fd to which lastfaketime was written.
-//
-// Subsequent writes to the same fd may use the same timestamp,
-// but the timestamp must increase if the fd changes.
-var lastfaketimefd int32
-
-/*
-An attempt at IRT. Doesn't work. See end of sys_nacl_amd64.s.
-
-void (*nacl_irt_query)(void);
-
-int8 nacl_irt_basic_v0_1_str[] = "nacl-irt-basic-0.1";
-void *nacl_irt_basic_v0_1[6]; // exit, gettod, clock, nanosleep, sched_yield, sysconf
-int32 nacl_irt_basic_v0_1_size = sizeof(nacl_irt_basic_v0_1);
-
-int8 nacl_irt_memory_v0_3_str[] = "nacl-irt-memory-0.3";
-void *nacl_irt_memory_v0_3[3]; // mmap, munmap, mprotect
-int32 nacl_irt_memory_v0_3_size = sizeof(nacl_irt_memory_v0_3);
-
-int8 nacl_irt_thread_v0_1_str[] = "nacl-irt-thread-0.1";
-void *nacl_irt_thread_v0_1[3]; // thread_create, thread_exit, thread_nice
-int32 nacl_irt_thread_v0_1_size = sizeof(nacl_irt_thread_v0_1);
-*/
-
-// The following functions are implemented in runtime assembly.
-// Provide a Go declaration to go with its assembly definitions.
-
-//go:linkname syscall_naclWrite syscall.naclWrite
-func syscall_naclWrite(fd int, b []byte) int
-
-//go:linkname syscall_now syscall.now
-func syscall_now() (sec int64, nsec int32)
diff --git a/src/runtime/os_nacl_arm.go b/src/runtime/os_nacl_arm.go
deleted file mode 100644
index 8669ee7..0000000
--- a/src/runtime/os_nacl_arm.go
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-func checkgoarm() {
- // TODO(minux): FP checks like in os_linux_arm.go.
-
- // NaCl/ARM only supports ARMv7
- if goarm != 7 {
- print("runtime: NaCl requires ARMv7. Recompile using GOARM=7.\n")
- exit(1)
- }
-}
-
-//go:nosplit
-func cputicks() int64 {
- // Currently cputicks() is used in blocking profiler and to seed runtime·fastrand().
- // runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
- // TODO: need more entropy to better seed fastrand.
- return nanotime()
-}
diff --git a/src/runtime/os_netbsd.go b/src/runtime/os_netbsd.go
index da024cd..97106c7 100644
--- a/src/runtime/os_netbsd.go
+++ b/src/runtime/os_netbsd.go
@@ -24,8 +24,6 @@
// From <sys/lwp.h>
_LWP_DETACHED = 0x00000040
-
- _EAGAIN = 35
)
type mOS struct {
@@ -49,9 +47,10 @@
func lwp_tramp()
-func raise(sig uint32)
func raiseproc(sig uint32)
+func lwp_kill(tid int32, sig int)
+
//go:noescape
func getcontext(ctxt unsafe.Pointer)
@@ -72,7 +71,11 @@
//go:noescape
func kevent(kq int32, ch *keventt, nch int32, ev *keventt, nev int32, ts *timespec) int32
+
+func pipe() (r, w int32, errno int32)
+func pipe2(flags int32) (r, w int32, errno int32)
func closeonexec(fd int32)
+func setNonblock(fd int32)
const (
_ESRCH = 3
@@ -362,3 +365,17 @@
}
}
}
+
+// raise sends signal to the calling thread.
+//
+// It must be nosplit because it is used by the signal handler before
+// it definitely has a Go stack.
+//
+//go:nosplit
+func raise(sig uint32) {
+ lwp_kill(lwp_self(), int(sig))
+}
+
+func signalM(mp *m, sig int) {
+ lwp_kill(int32(mp.procid), sig)
+}
diff --git a/src/runtime/os_netbsd_arm.go b/src/runtime/os_netbsd_arm.go
index 95603da..b5ec23e 100644
--- a/src/runtime/os_netbsd_arm.go
+++ b/src/runtime/os_netbsd_arm.go
@@ -30,6 +30,5 @@
func cputicks() int64 {
// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand().
// runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
- // TODO: need more entropy to better seed fastrand.
return nanotime()
}
diff --git a/src/runtime/os_netbsd_arm64.go b/src/runtime/os_netbsd_arm64.go
index fd81eb7..8d21b0a 100644
--- a/src/runtime/os_netbsd_arm64.go
+++ b/src/runtime/os_netbsd_arm64.go
@@ -19,6 +19,5 @@
func cputicks() int64 {
// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand().
// runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
- // TODO: need more entropy to better seed fastrand.
return nanotime()
}
diff --git a/src/runtime/os_only_solaris.go b/src/runtime/os_only_solaris.go
new file mode 100644
index 0000000..e2f5409
--- /dev/null
+++ b/src/runtime/os_only_solaris.go
@@ -0,0 +1,18 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Solaris code that doesn't also apply to illumos.
+
+// +build !illumos
+
+package runtime
+
+func getncpu() int32 {
+ n := int32(sysconf(__SC_NPROCESSORS_ONLN))
+ if n < 1 {
+ return 1
+ }
+
+ return n
+}
diff --git a/src/runtime/os_openbsd.go b/src/runtime/os_openbsd.go
index 2d6334e..b486b83 100644
--- a/src/runtime/os_openbsd.go
+++ b/src/runtime/os_openbsd.go
@@ -42,9 +42,11 @@
//go:noescape
func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
-func raise(sig uint32)
func raiseproc(sig uint32)
+func getthrid() int32
+func thrkill(tid int32, sig int)
+
//go:noescape
func tfork(param *tforkt, psize uintptr, mm *m, gg *g, fn uintptr) int32
@@ -60,11 +62,14 @@
//go:noescape
func kevent(kq int32, ch *keventt, nch int32, ev *keventt, nev int32, ts *timespec) int32
+
+func pipe() (r, w int32, errno int32)
+func pipe2(flags int32) (r, w int32, errno int32)
func closeonexec(fd int32)
+func setNonblock(fd int32)
const (
_ESRCH = 3
- _EAGAIN = 35
_EWOULDBLOCK = _EAGAIN
_ENOTSUP = 91
@@ -190,7 +195,7 @@
// rather than at the top of it.
param := tforkt{
tf_tcb: unsafe.Pointer(&mp.tls[0]),
- tf_tid: (*int32)(unsafe.Pointer(&mp.procid)),
+ tf_tid: nil, // minit will record tid
tf_stack: uintptr(stk) - sys.PtrSize,
}
@@ -238,10 +243,7 @@
// Called to initialize a new m (including the bootstrap m).
// Called on the new thread, can not allocate memory.
func minit() {
- // m.procid is a uint64, but tfork writes an int32. Fix it up.
- _g_ := getg()
- _g_.m.procid = uint64(*(*int32)(unsafe.Pointer(&_g_.m.procid)))
-
+ getg().m.procid = uint64(getthrid())
minitSignals()
}
@@ -337,3 +339,11 @@
throw("remapping stack memory failed")
}
}
+
+func raise(sig uint32) {
+ thrkill(getthrid(), int(sig))
+}
+
+func signalM(mp *m, sig int) {
+ thrkill(int32(mp.procid), sig)
+}
diff --git a/src/runtime/os_openbsd_arm.go b/src/runtime/os_openbsd_arm.go
index be2e1e9..0a24096 100644
--- a/src/runtime/os_openbsd_arm.go
+++ b/src/runtime/os_openbsd_arm.go
@@ -19,6 +19,5 @@
func cputicks() int64 {
// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand().
// runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
- // TODO: need more entropy to better seed fastrand.
return nanotime()
}
diff --git a/src/runtime/os_openbsd_arm64.go b/src/runtime/os_openbsd_arm64.go
index f15a95b..d559a2a 100644
--- a/src/runtime/os_openbsd_arm64.go
+++ b/src/runtime/os_openbsd_arm64.go
@@ -12,7 +12,6 @@
func cputicks() int64 {
// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand().
// runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
- // TODO: need more entropy to better seed fastrand.
return nanotime()
}
diff --git a/src/runtime/os_plan9.go b/src/runtime/os_plan9.go
index d7ea1ef..2bea105 100644
--- a/src/runtime/os_plan9.go
+++ b/src/runtime/os_plan9.go
@@ -293,7 +293,6 @@
ncpu = getproccount()
physPageSize = getPageSize()
getg().m.procid = getpid()
- notify(unsafe.Pointer(funcPC(sigtramp)))
}
//go:nosplit
@@ -311,6 +310,9 @@
}
func initsig(preinit bool) {
+ if !preinit {
+ notify(unsafe.Pointer(funcPC(sigtramp)))
+ }
}
//go:nosplit
@@ -328,7 +330,7 @@
}
//go:nosplit
-func nanotime() int64 {
+func nanotime1() int64 {
var scratch int64
ns := nsec(&scratch)
// TODO(aram): remove hack after I fix _nsec in the pc64 kernel.
@@ -373,7 +375,7 @@
return -1
}
len := findnull(&msg[0])
- if write(uintptr(fd), unsafe.Pointer(&msg[0]), int32(len)) != int64(len) {
+ if write1(uintptr(fd), unsafe.Pointer(&msg[0]), int32(len)) != int32(len) {
closefd(fd)
return -1
}
@@ -451,8 +453,8 @@
}
//go:nosplit
-func write(fd uintptr, buf unsafe.Pointer, n int32) int64 {
- return int64(pwrite(int32(fd), buf, n, -1))
+func write1(fd uintptr, buf unsafe.Pointer, n int32) int32 {
+ return pwrite(int32(fd), buf, n, -1)
}
var _badsignal = []byte("runtime: signal received on thread not created by Go.\n")
@@ -483,3 +485,11 @@
}
return sigtable[sig].name
}
+
+const preemptMSupported = false
+
+func preemptM(mp *m) {
+ // Not currently supported.
+ //
+ // TODO: Use a note like we use signals on POSIX OSes
+}
diff --git a/src/runtime/os_plan9_arm.go b/src/runtime/os_plan9_arm.go
index fdce1e7..f165a34 100644
--- a/src/runtime/os_plan9_arm.go
+++ b/src/runtime/os_plan9_arm.go
@@ -12,6 +12,5 @@
func cputicks() int64 {
// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand().
// runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
- // TODO: need more entropy to better seed fastrand.
return nanotime()
}
diff --git a/src/runtime/os_solaris.go b/src/runtime/os_solaris.go
index 989edb5..89129e5 100644
--- a/src/runtime/os_solaris.go
+++ b/src/runtime/os_solaris.go
@@ -63,6 +63,15 @@
//go:nosplit
func sysvicall1(fn *libcFunc, a1 uintptr) uintptr {
+ r1, _ := sysvicall1Err(fn, a1)
+ return r1
+}
+
+//go:nosplit
+
+// sysvicall1Err returns both the system call result and the errno value.
+// This is used by sysvicall1 and pipe.
+func sysvicall1Err(fn *libcFunc, a1 uintptr) (r1, err uintptr) {
// Leave caller's PC/SP around for traceback.
gp := getg()
var mp *m
@@ -88,11 +97,21 @@
if mp != nil {
mp.libcallsp = 0
}
- return libcall.r1
+ return libcall.r1, libcall.err
}
//go:nosplit
func sysvicall2(fn *libcFunc, a1, a2 uintptr) uintptr {
+ r1, _ := sysvicall2Err(fn, a1, a2)
+ return r1
+}
+
+//go:nosplit
+//go:cgo_unsafe_args
+
+// sysvicall2Err returns both the system call result and the errno value.
+// This is used by sysvicall2 and pipe2.
+func sysvicall2Err(fn *libcFunc, a1, a2 uintptr) (uintptr, uintptr) {
// Leave caller's PC/SP around for traceback.
gp := getg()
var mp *m
@@ -117,11 +136,21 @@
if mp != nil {
mp.libcallsp = 0
}
- return libcall.r1
+ return libcall.r1, libcall.err
}
//go:nosplit
func sysvicall3(fn *libcFunc, a1, a2, a3 uintptr) uintptr {
+ r1, _ := sysvicall3Err(fn, a1, a2, a3)
+ return r1
+}
+
+//go:nosplit
+//go:cgo_unsafe_args
+
+// sysvicall3Err returns both the system call result and the errno value.
+// This is used by sysicall3 and write1.
+func sysvicall3Err(fn *libcFunc, a1, a2, a3 uintptr) (r1, err uintptr) {
// Leave caller's PC/SP around for traceback.
gp := getg()
var mp *m
@@ -146,7 +175,7 @@
if mp != nil {
mp.libcallsp = 0
}
- return libcall.r1
+ return libcall.r1, libcall.err
}
//go:nosplit
diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go
index 074ae0f..a584ada 100644
--- a/src/runtime/os_windows.go
+++ b/src/runtime/os_windows.go
@@ -6,6 +6,7 @@
import (
"runtime/internal/atomic"
+ "runtime/internal/sys"
"unsafe"
)
@@ -27,13 +28,15 @@
//go:cgo_import_dynamic runtime._GetEnvironmentStringsW GetEnvironmentStringsW%0 "kernel32.dll"
//go:cgo_import_dynamic runtime._GetProcAddress GetProcAddress%2 "kernel32.dll"
//go:cgo_import_dynamic runtime._GetProcessAffinityMask GetProcessAffinityMask%3 "kernel32.dll"
-//go:cgo_import_dynamic runtime._GetQueuedCompletionStatus GetQueuedCompletionStatus%5 "kernel32.dll"
+//go:cgo_import_dynamic runtime._GetQueuedCompletionStatusEx GetQueuedCompletionStatusEx%6 "kernel32.dll"
//go:cgo_import_dynamic runtime._GetStdHandle GetStdHandle%1 "kernel32.dll"
//go:cgo_import_dynamic runtime._GetSystemDirectoryA GetSystemDirectoryA%2 "kernel32.dll"
//go:cgo_import_dynamic runtime._GetSystemInfo GetSystemInfo%1 "kernel32.dll"
//go:cgo_import_dynamic runtime._GetThreadContext GetThreadContext%2 "kernel32.dll"
+//go:cgo_import_dynamic runtime._SetThreadContext SetThreadContext%2 "kernel32.dll"
//go:cgo_import_dynamic runtime._LoadLibraryW LoadLibraryW%1 "kernel32.dll"
//go:cgo_import_dynamic runtime._LoadLibraryA LoadLibraryA%1 "kernel32.dll"
+//go:cgo_import_dynamic runtime._PostQueuedCompletionStatus PostQueuedCompletionStatus%4 "kernel32.dll"
//go:cgo_import_dynamic runtime._ResumeThread ResumeThread%1 "kernel32.dll"
//go:cgo_import_dynamic runtime._SetConsoleCtrlHandler SetConsoleCtrlHandler%2 "kernel32.dll"
//go:cgo_import_dynamic runtime._SetErrorMode SetErrorMode%1 "kernel32.dll"
@@ -49,6 +52,7 @@
//go:cgo_import_dynamic runtime._VirtualFree VirtualFree%3 "kernel32.dll"
//go:cgo_import_dynamic runtime._VirtualQuery VirtualQuery%3 "kernel32.dll"
//go:cgo_import_dynamic runtime._WaitForSingleObject WaitForSingleObject%2 "kernel32.dll"
+//go:cgo_import_dynamic runtime._WaitForMultipleObjects WaitForMultipleObjects%4 "kernel32.dll"
//go:cgo_import_dynamic runtime._WriteConsoleW WriteConsoleW%5 "kernel32.dll"
//go:cgo_import_dynamic runtime._WriteFile WriteFile%5 "kernel32.dll"
@@ -71,14 +75,16 @@
_GetEnvironmentStringsW,
_GetProcAddress,
_GetProcessAffinityMask,
- _GetQueuedCompletionStatus,
+ _GetQueuedCompletionStatusEx,
_GetStdHandle,
_GetSystemDirectoryA,
_GetSystemInfo,
_GetSystemTimeAsFileTime,
_GetThreadContext,
+ _SetThreadContext,
_LoadLibraryW,
_LoadLibraryA,
+ _PostQueuedCompletionStatus,
_QueryPerformanceCounter,
_QueryPerformanceFrequency,
_ResumeThread,
@@ -96,6 +102,7 @@
_VirtualFree,
_VirtualQuery,
_WaitForSingleObject,
+ _WaitForMultipleObjects,
_WriteConsoleW,
_WriteFile,
_ stdFunction
@@ -104,7 +111,6 @@
// We will load syscalls, if available, before using them.
_AddDllDirectory,
_AddVectoredContinueHandler,
- _GetQueuedCompletionStatusEx,
_LoadLibraryExA,
_LoadLibraryExW,
_ stdFunction
@@ -139,7 +145,34 @@
func ctrlhandler()
type mOS struct {
- waitsema uintptr // semaphore for parking on locks
+ threadLock mutex // protects "thread" and prevents closing
+ thread uintptr // thread handle
+
+ waitsema uintptr // semaphore for parking on locks
+ resumesema uintptr // semaphore to indicate suspend/resume
+
+ // preemptExtLock synchronizes preemptM with entry/exit from
+ // external C code.
+ //
+ // This protects against races between preemptM calling
+ // SuspendThread and external code on this thread calling
+ // ExitProcess. If these happen concurrently, it's possible to
+ // exit the suspending thread and suspend the exiting thread,
+ // leading to deadlock.
+ //
+ // 0 indicates this M is not being preempted or in external
+ // code. Entering external code CASes this from 0 to 1. If
+ // this fails, a preemption is in progress, so the thread must
+ // wait for the preemption. preemptM also CASes this from 0 to
+ // 1. If this fails, the preemption fails (as it would if the
+ // PC weren't in Go code). The value is reset to 0 when
+ // returning from external code or after a preemption is
+ // complete.
+ //
+ // TODO(austin): We may not need this if preemption were more
+ // tightly synchronized on the G/P status and preemption
+ // blocked transition into _Gsyscall/_Psyscall.
+ preemptExtLock uint32
}
//go:linkname os_sigpipe os.sigpipe
@@ -205,7 +238,6 @@
}
_AddDllDirectory = windowsFindfunc(k32, []byte("AddDllDirectory\000"))
_AddVectoredContinueHandler = windowsFindfunc(k32, []byte("AddVectoredContinueHandler\000"))
- _GetQueuedCompletionStatusEx = windowsFindfunc(k32, []byte("GetQueuedCompletionStatusEx\000"))
_LoadLibraryExA = windowsFindfunc(k32, []byte("LoadLibraryExA\000"))
_LoadLibraryExW = windowsFindfunc(k32, []byte("LoadLibraryExW\000"))
useLoadLibraryEx = (_LoadLibraryExW != nil && _LoadLibraryExA != nil && _AddDllDirectory != nil)
@@ -258,6 +290,39 @@
}
}
+func monitorSuspendResume() {
+ const (
+ _DEVICE_NOTIFY_CALLBACK = 2
+ )
+ type _DEVICE_NOTIFY_SUBSCRIBE_PARAMETERS struct {
+ callback uintptr
+ context uintptr
+ }
+
+ powrprof := windowsLoadSystemLib([]byte("powrprof.dll\000"))
+ if powrprof == 0 {
+ return // Running on Windows 7, where we don't need it anyway.
+ }
+ powerRegisterSuspendResumeNotification := windowsFindfunc(powrprof, []byte("PowerRegisterSuspendResumeNotification\000"))
+ if powerRegisterSuspendResumeNotification == nil {
+ return // Running on Windows 7, where we don't need it anyway.
+ }
+ var fn interface{} = func(context uintptr, changeType uint32, setting uintptr) uintptr {
+ for mp := (*m)(atomic.Loadp(unsafe.Pointer(&allm))); mp != nil; mp = mp.alllink {
+ if mp.resumesema != 0 {
+ stdcall1(_SetEvent, mp.resumesema)
+ }
+ }
+ return 0
+ }
+ params := _DEVICE_NOTIFY_SUBSCRIBE_PARAMETERS{
+ callback: compileCallback(*efaceOf(&fn), true),
+ }
+ handle := uintptr(0)
+ stdcall3(powerRegisterSuspendResumeNotification, _DEVICE_NOTIFY_CALLBACK,
+ uintptr(unsafe.Pointer(¶ms)), uintptr(unsafe.Pointer(&handle)))
+}
+
//go:nosplit
func getLoadLibrary() uintptr {
return uintptr(unsafe.Pointer(_LoadLibraryW))
@@ -377,8 +442,6 @@
stdcall2(_SetProcessPriorityBoost, currentProcess, 1)
}
-func nanotime() int64
-
// useQPCTime controls whether time.now and nanotime use QueryPerformanceCounter.
// This is only set to 1 when running under Wine.
var useQPCTime uint8
@@ -488,6 +551,10 @@
}
stdcall1(_FreeEnvironmentStringsW, uintptr(strings))
+
+ // We call this all the way here, late in init, so that malloc works
+ // for the callback function this generates.
+ monitorSuspendResume()
}
// exiting is set to non-zero when the process is exiting.
@@ -495,12 +562,21 @@
//go:nosplit
func exit(code int32) {
+ // Disallow thread suspension for preemption. Otherwise,
+ // ExitProcess and SuspendThread can race: SuspendThread
+ // queues a suspension request for this thread, ExitProcess
+ // kills the suspending thread, and then this thread suspends.
+ lock(&suspendLock)
atomic.Store(&exiting, 1)
stdcall1(_ExitProcess, uintptr(code))
}
+// write1 must be nosplit because it's used as a last resort in
+// functions like badmorestackg0. In such cases, we'll always take the
+// ASCII path.
+//
//go:nosplit
-func write(fd uintptr, buf unsafe.Pointer, n int32) int32 {
+func write1(fd uintptr, buf unsafe.Pointer, n int32) int32 {
const (
_STD_OUTPUT_HANDLE = ^uintptr(10) // -11
_STD_ERROR_HANDLE = ^uintptr(11) // -12
@@ -597,6 +673,9 @@
return
}
+// walltime1 isn't implemented on Windows, but will never be called.
+func walltime1() (sec int64, nsec int32)
+
//go:nosplit
func semasleep(ns int64) int32 {
const (
@@ -606,19 +685,32 @@
_WAIT_FAILED = 0xFFFFFFFF
)
- // store ms in ns to save stack space
+ var result uintptr
if ns < 0 {
- ns = _INFINITE
+ result = stdcall2(_WaitForSingleObject, getg().m.waitsema, uintptr(_INFINITE))
} else {
- ns = int64(timediv(ns, 1000000, nil))
- if ns == 0 {
- ns = 1
+ start := nanotime()
+ elapsed := int64(0)
+ for {
+ ms := int64(timediv(ns-elapsed, 1000000, nil))
+ if ms == 0 {
+ ms = 1
+ }
+ result = stdcall4(_WaitForMultipleObjects, 2,
+ uintptr(unsafe.Pointer(&[2]uintptr{getg().m.waitsema, getg().m.resumesema})),
+ 0, uintptr(ms))
+ if result != _WAIT_OBJECT_0+1 {
+ // Not a suspend/resume event
+ break
+ }
+ elapsed = nanotime() - start
+ if elapsed >= ns {
+ return -1
+ }
}
}
-
- result := stdcall2(_WaitForSingleObject, getg().m.waitsema, uintptr(ns))
switch result {
- case _WAIT_OBJECT_0: //signaled
+ case _WAIT_OBJECT_0: // Signaled
return 0
case _WAIT_TIMEOUT:
@@ -667,6 +759,15 @@
throw("runtime.semacreate")
})
}
+ mp.resumesema = stdcall4(_CreateEventA, 0, 0, 0, 0)
+ if mp.resumesema == 0 {
+ systemstack(func() {
+ print("runtime: createevent failed; errno=", getlasterror(), "\n")
+ throw("runtime.semacreate")
+ })
+ stdcall1(_CloseHandle, mp.waitsema)
+ mp.waitsema = 0
+ }
}
// May run with m.p==nil, so write barriers are not allowed. This
@@ -705,7 +806,7 @@
func newosproc0(mp *m, stk unsafe.Pointer) {
// TODO: this is completely broken. The args passed to newosproc0 (in asm_amd64.s)
// are stacksize and function, not *m and stack.
- // Check os_linux.go for an implemention that might actually work.
+ // Check os_linux.go for an implementation that might actually work.
throw("bad newosproc0")
}
@@ -742,7 +843,11 @@
func minit() {
var thandle uintptr
stdcall7(_DuplicateHandle, currentProcess, currentThread, currentProcess, uintptr(unsafe.Pointer(&thandle)), 0, 0, _DUPLICATE_SAME_ACCESS)
- atomic.Storeuintptr(&getg().m.thread, thandle)
+
+ mp := getg().m
+ lock(&mp.threadLock)
+ mp.thread = thandle
+ unlock(&mp.threadLock)
// Query the true stack base from the OS. Currently we're
// running on a small assumed stack.
@@ -775,9 +880,11 @@
// Called from dropm to undo the effect of an minit.
//go:nosplit
func unminit() {
- tp := &getg().m.thread
- stdcall1(_CloseHandle, *tp)
- *tp = 0
+ mp := getg().m
+ lock(&mp.threadLock)
+ stdcall1(_CloseHandle, mp.thread)
+ mp.thread = 0
+ unlock(&mp.threadLock)
}
// Calling stdcall on os stack.
@@ -894,6 +1001,8 @@
switch _type {
case _CTRL_C_EVENT, _CTRL_BREAK_EVENT:
s = _SIGINT
+ case _CTRL_CLOSE_EVENT, _CTRL_LOGOFF_EVENT, _CTRL_SHUTDOWN_EVENT:
+ s = _SIGTERM
default:
return 0
}
@@ -901,7 +1010,11 @@
if sigsend(s) {
return 1
}
- exit(2) // SIGINT, SIGTERM, etc
+ if !islibrary && !isarchive {
+ // Only exit the program if we don't have a DLL.
+ // See https://golang.org/issues/35965.
+ exit(2) // SIGINT, SIGTERM, etc
+ }
return 0
}
@@ -914,27 +1027,30 @@
var profiletimer uintptr
func profilem(mp *m, thread uintptr) {
- var r *context
- rbuf := make([]byte, unsafe.Sizeof(*r)+15)
+ // Align Context to 16 bytes.
+ var c *context
+ var cbuf [unsafe.Sizeof(*c) + 15]byte
+ c = (*context)(unsafe.Pointer((uintptr(unsafe.Pointer(&cbuf[15]))) &^ 15))
- // align Context to 16 bytes
- r = (*context)(unsafe.Pointer((uintptr(unsafe.Pointer(&rbuf[15]))) &^ 15))
- r.contextflags = _CONTEXT_CONTROL
- stdcall2(_GetThreadContext, thread, uintptr(unsafe.Pointer(r)))
+ c.contextflags = _CONTEXT_CONTROL
+ stdcall2(_GetThreadContext, thread, uintptr(unsafe.Pointer(c)))
- var gp *g
+ gp := gFromTLS(mp)
+
+ sigprof(c.ip(), c.sp(), c.lr(), gp, mp)
+}
+
+func gFromTLS(mp *m) *g {
switch GOARCH {
- default:
- panic("unsupported architecture")
case "arm":
tls := &mp.tls[0]
- gp = **((***g)(unsafe.Pointer(tls)))
+ return **((***g)(unsafe.Pointer(tls)))
case "386", "amd64":
tls := &mp.tls[0]
- gp = *((**g)(unsafe.Pointer(tls)))
+ return *((**g)(unsafe.Pointer(tls)))
}
-
- sigprof(r.ip(), r.sp(), r.lr(), gp, mp)
+ throw("unsupported architecture")
+ return nil
}
func profileloop1(param uintptr) uint32 {
@@ -944,17 +1060,25 @@
stdcall2(_WaitForSingleObject, profiletimer, _INFINITE)
first := (*m)(atomic.Loadp(unsafe.Pointer(&allm)))
for mp := first; mp != nil; mp = mp.alllink {
- thread := atomic.Loaduintptr(&mp.thread)
+ lock(&mp.threadLock)
// Do not profile threads blocked on Notes,
// this includes idle worker threads,
// idle timer thread, idle heap scavenger, etc.
- if thread == 0 || mp.profilehz == 0 || mp.blocked {
+ if mp.thread == 0 || mp.profilehz == 0 || mp.blocked {
+ unlock(&mp.threadLock)
continue
}
- // mp may exit between the load above and the
- // SuspendThread, so be careful.
+ // Acquire our own handle to the thread.
+ var thread uintptr
+ stdcall7(_DuplicateHandle, currentProcess, mp.thread, currentProcess, uintptr(unsafe.Pointer(&thread)), 0, 0, _DUPLICATE_SAME_ACCESS)
+ unlock(&mp.threadLock)
+ // mp may exit between the DuplicateHandle
+ // above and the SuspendThread. The handle
+ // will remain valid, but SuspendThread may
+ // fail.
if int32(stdcall1(_SuspendThread, thread)) == -1 {
// The thread no longer exists.
+ stdcall1(_CloseHandle, thread)
continue
}
if mp.profilehz != 0 && !mp.blocked {
@@ -963,6 +1087,7 @@
profilem(mp, thread)
}
stdcall1(_ResumeThread, thread)
+ stdcall1(_CloseHandle, thread)
}
}
}
@@ -990,3 +1115,140 @@
stdcall6(_SetWaitableTimer, profiletimer, uintptr(unsafe.Pointer(&due)), uintptr(ms), 0, 0, 0)
atomic.Store((*uint32)(unsafe.Pointer(&getg().m.profilehz)), uint32(hz))
}
+
+const preemptMSupported = GOARCH != "arm"
+
+// suspendLock protects simultaneous SuspendThread operations from
+// suspending each other.
+var suspendLock mutex
+
+func preemptM(mp *m) {
+ if GOARCH == "arm" {
+ // TODO: Implement call injection
+ return
+ }
+
+ if mp == getg().m {
+ throw("self-preempt")
+ }
+
+ // Synchronize with external code that may try to ExitProcess.
+ if !atomic.Cas(&mp.preemptExtLock, 0, 1) {
+ // External code is running. Fail the preemption
+ // attempt.
+ atomic.Xadd(&mp.preemptGen, 1)
+ return
+ }
+
+ // Acquire our own handle to mp's thread.
+ lock(&mp.threadLock)
+ if mp.thread == 0 {
+ // The M hasn't been minit'd yet (or was just unminit'd).
+ unlock(&mp.threadLock)
+ atomic.Store(&mp.preemptExtLock, 0)
+ atomic.Xadd(&mp.preemptGen, 1)
+ return
+ }
+ var thread uintptr
+ stdcall7(_DuplicateHandle, currentProcess, mp.thread, currentProcess, uintptr(unsafe.Pointer(&thread)), 0, 0, _DUPLICATE_SAME_ACCESS)
+ unlock(&mp.threadLock)
+
+ // Prepare thread context buffer. This must be aligned to 16 bytes.
+ var c *context
+ var cbuf [unsafe.Sizeof(*c) + 15]byte
+ c = (*context)(unsafe.Pointer((uintptr(unsafe.Pointer(&cbuf[15]))) &^ 15))
+ c.contextflags = _CONTEXT_CONTROL
+
+ // Serialize thread suspension. SuspendThread is asynchronous,
+ // so it's otherwise possible for two threads to suspend each
+ // other and deadlock. We must hold this lock until after
+ // GetThreadContext, since that blocks until the thread is
+ // actually suspended.
+ lock(&suspendLock)
+
+ // Suspend the thread.
+ if int32(stdcall1(_SuspendThread, thread)) == -1 {
+ unlock(&suspendLock)
+ stdcall1(_CloseHandle, thread)
+ atomic.Store(&mp.preemptExtLock, 0)
+ // The thread no longer exists. This shouldn't be
+ // possible, but just acknowledge the request.
+ atomic.Xadd(&mp.preemptGen, 1)
+ return
+ }
+
+ // We have to be very careful between this point and once
+ // we've shown mp is at an async safe-point. This is like a
+ // signal handler in the sense that mp could have been doing
+ // anything when we stopped it, including holding arbitrary
+ // locks.
+
+ // We have to get the thread context before inspecting the M
+ // because SuspendThread only requests a suspend.
+ // GetThreadContext actually blocks until it's suspended.
+ stdcall2(_GetThreadContext, thread, uintptr(unsafe.Pointer(c)))
+
+ unlock(&suspendLock)
+
+ // Does it want a preemption and is it safe to preempt?
+ gp := gFromTLS(mp)
+ if wantAsyncPreempt(gp) {
+ if ok, newpc := isAsyncSafePoint(gp, c.ip(), c.sp(), c.lr()); ok {
+ // Inject call to asyncPreempt
+ targetPC := funcPC(asyncPreempt)
+ switch GOARCH {
+ default:
+ throw("unsupported architecture")
+ case "386", "amd64":
+ // Make it look like the thread called targetPC.
+ sp := c.sp()
+ sp -= sys.PtrSize
+ *(*uintptr)(unsafe.Pointer(sp)) = newpc
+ c.set_sp(sp)
+ c.set_ip(targetPC)
+ }
+
+ stdcall2(_SetThreadContext, thread, uintptr(unsafe.Pointer(c)))
+ }
+ }
+
+ atomic.Store(&mp.preemptExtLock, 0)
+
+ // Acknowledge the preemption.
+ atomic.Xadd(&mp.preemptGen, 1)
+
+ stdcall1(_ResumeThread, thread)
+ stdcall1(_CloseHandle, thread)
+}
+
+// osPreemptExtEnter is called before entering external code that may
+// call ExitProcess.
+//
+// This must be nosplit because it may be called from a syscall with
+// untyped stack slots, so the stack must not be grown or scanned.
+//
+//go:nosplit
+func osPreemptExtEnter(mp *m) {
+ for !atomic.Cas(&mp.preemptExtLock, 0, 1) {
+ // An asynchronous preemption is in progress. It's not
+ // safe to enter external code because it may call
+ // ExitProcess and deadlock with SuspendThread.
+ // Ideally we would do the preemption ourselves, but
+ // can't since there may be untyped syscall arguments
+ // on the stack. Instead, just wait and encourage the
+ // SuspendThread APC to run. The preemption should be
+ // done shortly.
+ osyield()
+ }
+ // Asynchronous preemption is now blocked.
+}
+
+// osPreemptExtExit is called after returning from external code that
+// may call ExitProcess.
+//
+// See osPreemptExtEnter for why this is nosplit.
+//
+//go:nosplit
+func osPreemptExtExit(mp *m) {
+ atomic.Store(&mp.preemptExtLock, 0)
+}
diff --git a/src/runtime/panic.go b/src/runtime/panic.go
index 5f33cd7..615249f 100644
--- a/src/runtime/panic.go
+++ b/src/runtime/panic.go
@@ -10,6 +10,19 @@
"unsafe"
)
+// We have two different ways of doing defers. The older way involves creating a
+// defer record at the time that a defer statement is executing and adding it to a
+// defer chain. This chain is inspected by the deferreturn call at all function
+// exits in order to run the appropriate defer calls. A cheaper way (which we call
+// open-coded defers) is used for functions in which no defer statements occur in
+// loops. In that case, we simply store the defer function/arg information into
+// specific stack slots at the point of each defer statement, as well as setting a
+// bit in a bitmask. At each function exit, we add inline code to directly make
+// the appropriate defer calls based on the bitmask and fn/arg information stored
+// on the stack. During panic/Goexit processing, the appropriate defer calls are
+// made using extra funcdata info that indicates the exact stack slots that
+// contain the bitmask and defer fn/args.
+
// Check to make sure we can really generate a panic. If the panic
// was generated from the runtime, or from inside malloc, then convert
// to a throw of msg.
@@ -203,7 +216,8 @@
// The compiler turns a defer statement into a call to this.
//go:nosplit
func deferproc(siz int32, fn *funcval) { // arguments of fn follow fn
- if getg().m.curg != getg() {
+ gp := getg()
+ if gp.m.curg != gp {
// go code on the system stack can't defer
throw("defer on system stack")
}
@@ -221,6 +235,8 @@
if d._panic != nil {
throw("deferproc: d.panic != nil after newdefer")
}
+ d.link = gp._defer
+ gp._defer = d
d.fn = fn
d.pc = callerpc
d.sp = sp
@@ -263,19 +279,24 @@
// are initialized here.
d.started = false
d.heap = false
+ d.openDefer = false
d.sp = getcallersp()
d.pc = getcallerpc()
+ d.framepc = 0
+ d.varp = 0
// The lines below implement:
// d.panic = nil
+ // d.fd = nil
// d.link = gp._defer
// gp._defer = d
- // But without write barriers. The first two are writes to
+ // But without write barriers. The first three are writes to
// the stack so they don't need a write barrier, and furthermore
// are to uninitialized memory, so they must not use a write barrier.
- // The third write does not require a write barrier because we
+ // The fourth write does not require a write barrier because we
// explicitly mark all the defer structures, so we don't need to
// keep track of pointers to them with a write barrier.
*(*uintptr)(unsafe.Pointer(&d._panic)) = 0
+ *(*uintptr)(unsafe.Pointer(&d.fd)) = 0
*(*uintptr)(unsafe.Pointer(&d.link)) = uintptr(unsafe.Pointer(gp._defer))
*(*uintptr)(unsafe.Pointer(&gp._defer)) = uintptr(unsafe.Pointer(d))
@@ -356,7 +377,8 @@
}
// Allocate a Defer, usually using per-P pool.
-// Each defer must be released with freedefer.
+// Each defer must be released with freedefer. The defer is not
+// added to any defer chain yet.
//
// This must not grow the stack because there may be a frame without
// stack map information when this is called.
@@ -406,8 +428,6 @@
}
d.siz = siz
d.heap = true
- d.link = gp._defer
- gp._defer = d
return d
}
@@ -463,8 +483,12 @@
// started causing a nosplit stack overflow via typedmemmove.
d.siz = 0
d.started = false
+ d.openDefer = false
d.sp = 0
d.pc = 0
+ d.framepc = 0
+ d.varp = 0
+ d.fd = nil
// d._panic and d.fn must be nil already.
// If not, we would have called freedeferpanic or freedeferfn above,
// both of which throw.
@@ -493,9 +517,11 @@
// to have been called by the caller of deferreturn at the point
// just before deferreturn was called. The effect is that deferreturn
// is called again and again until there are no more deferred functions.
-// Cannot split the stack because we reuse the caller's frame to
-// call the deferred function.
-
+//
+// Declared as nosplit, because the function should not be preempted once we start
+// modifying the caller's frame in order to reuse the frame to call the deferred
+// function.
+//
// The single argument isn't actually used - it just has its address
// taken so it can be matched against pending defers.
//go:nosplit
@@ -509,6 +535,15 @@
if d.sp != sp {
return
}
+ if d.openDefer {
+ done := runOpenDeferFrame(gp, d)
+ if !done {
+ throw("unfinished open-coded defers in deferreturn")
+ }
+ gp._defer = d.link
+ freedefer(d)
+ return
+ }
// Moving arguments around.
//
@@ -528,6 +563,12 @@
d.fn = nil
gp._defer = d.link
freedefer(d)
+ // If the defer function pointer is nil, force the seg fault to happen
+ // here rather than in jmpdefer. gentraceback() throws an error if it is
+ // called with a callback on an LR architecture and jmpdefer is on the
+ // stack, because the stack trace can be incorrect in that case - see
+ // issue #8153).
+ _ = fn.fn
jmpdefer(fn, uintptr(unsafe.Pointer(&arg0)))
}
@@ -544,6 +585,15 @@
// This code is similar to gopanic, see that implementation
// for detailed comments.
gp := getg()
+
+ // Create a panic object for Goexit, so we can recognize when it might be
+ // bypassed by a recover().
+ var p _panic
+ p.goexit = true
+ p.link = gp._panic
+ gp._panic = (*_panic)(noescape(unsafe.Pointer(&p)))
+
+ addOneOpenDeferFrame(gp, getcallerpc(), unsafe.Pointer(getcallersp()))
for {
d := gp._defer
if d == nil {
@@ -554,13 +604,47 @@
d._panic.aborted = true
d._panic = nil
}
- d.fn = nil
- gp._defer = d.link
- freedefer(d)
- continue
+ if !d.openDefer {
+ d.fn = nil
+ gp._defer = d.link
+ freedefer(d)
+ continue
+ }
}
d.started = true
- reflectcall(nil, unsafe.Pointer(d.fn), deferArgs(d), uint32(d.siz), uint32(d.siz))
+ d._panic = (*_panic)(noescape(unsafe.Pointer(&p)))
+ if d.openDefer {
+ done := runOpenDeferFrame(gp, d)
+ if !done {
+ // We should always run all defers in the frame,
+ // since there is no panic associated with this
+ // defer that can be recovered.
+ throw("unfinished open-coded defers in Goexit")
+ }
+ if p.aborted {
+ // Since our current defer caused a panic and may
+ // have been already freed, just restart scanning
+ // for open-coded defers from this frame again.
+ addOneOpenDeferFrame(gp, getcallerpc(), unsafe.Pointer(getcallersp()))
+ } else {
+ addOneOpenDeferFrame(gp, 0, nil)
+ }
+ } else {
+
+ // Save the pc/sp in reflectcallSave(), so we can "recover" back to this
+ // loop if necessary.
+ reflectcallSave(&p, unsafe.Pointer(d.fn), deferArgs(d), uint32(d.siz))
+ }
+ if p.aborted {
+ // We had a recursive panic in the defer d we started, and
+ // then did a recover in a defer that was further down the
+ // defer chain than d. In the case of an outstanding Goexit,
+ // we force the recover to return back to this loop. d will
+ // have already been freed if completed, so just continue
+ // immediately to the next defer on the chain.
+ p.aborted = false
+ continue
+ }
if gp._defer != d {
throw("bad defer entry in Goexit")
}
@@ -597,7 +681,12 @@
func printpanics(p *_panic) {
if p.link != nil {
printpanics(p.link)
- print("\t")
+ if !p.link.goexit {
+ print("\t")
+ }
+ }
+ if p.goexit {
+ return
}
print("panic: ")
printany(p.arg)
@@ -607,6 +696,195 @@
print("\n")
}
+// addOneOpenDeferFrame scans the stack for the first frame (if any) with
+// open-coded defers and if it finds one, adds a single record to the defer chain
+// for that frame. If sp is non-nil, it starts the stack scan from the frame
+// specified by sp. If sp is nil, it uses the sp from the current defer record
+// (which has just been finished). Hence, it continues the stack scan from the
+// frame of the defer that just finished. It skips any frame that already has an
+// open-coded _defer record, which would have been been created from a previous
+// (unrecovered) panic.
+//
+// Note: All entries of the defer chain (including this new open-coded entry) have
+// their pointers (including sp) adjusted properly if the stack moves while
+// running deferred functions. Also, it is safe to pass in the sp arg (which is
+// the direct result of calling getcallersp()), because all pointer variables
+// (including arguments) are adjusted as needed during stack copies.
+func addOneOpenDeferFrame(gp *g, pc uintptr, sp unsafe.Pointer) {
+ var prevDefer *_defer
+ if sp == nil {
+ prevDefer = gp._defer
+ pc = prevDefer.framepc
+ sp = unsafe.Pointer(prevDefer.sp)
+ }
+ systemstack(func() {
+ gentraceback(pc, uintptr(sp), 0, gp, 0, nil, 0x7fffffff,
+ func(frame *stkframe, unused unsafe.Pointer) bool {
+ if prevDefer != nil && prevDefer.sp == frame.sp {
+ // Skip the frame for the previous defer that
+ // we just finished (and was used to set
+ // where we restarted the stack scan)
+ return true
+ }
+ f := frame.fn
+ fd := funcdata(f, _FUNCDATA_OpenCodedDeferInfo)
+ if fd == nil {
+ return true
+ }
+ // Insert the open defer record in the
+ // chain, in order sorted by sp.
+ d := gp._defer
+ var prev *_defer
+ for d != nil {
+ dsp := d.sp
+ if frame.sp < dsp {
+ break
+ }
+ if frame.sp == dsp {
+ if !d.openDefer {
+ throw("duplicated defer entry")
+ }
+ return true
+ }
+ prev = d
+ d = d.link
+ }
+ if frame.fn.deferreturn == 0 {
+ throw("missing deferreturn")
+ }
+
+ maxargsize, _ := readvarintUnsafe(fd)
+ d1 := newdefer(int32(maxargsize))
+ d1.openDefer = true
+ d1._panic = nil
+ // These are the pc/sp to set after we've
+ // run a defer in this frame that did a
+ // recover. We return to a special
+ // deferreturn that runs any remaining
+ // defers and then returns from the
+ // function.
+ d1.pc = frame.fn.entry + uintptr(frame.fn.deferreturn)
+ d1.varp = frame.varp
+ d1.fd = fd
+ // Save the SP/PC associated with current frame,
+ // so we can continue stack trace later if needed.
+ d1.framepc = frame.pc
+ d1.sp = frame.sp
+ d1.link = d
+ if prev == nil {
+ gp._defer = d1
+ } else {
+ prev.link = d1
+ }
+ // Stop stack scanning after adding one open defer record
+ return false
+ },
+ nil, 0)
+ })
+}
+
+// readvarintUnsafe reads the uint32 in varint format starting at fd, and returns the
+// uint32 and a pointer to the byte following the varint.
+//
+// There is a similar function runtime.readvarint, which takes a slice of bytes,
+// rather than an unsafe pointer. These functions are duplicated, because one of
+// the two use cases for the functions would get slower if the functions were
+// combined.
+func readvarintUnsafe(fd unsafe.Pointer) (uint32, unsafe.Pointer) {
+ var r uint32
+ var shift int
+ for {
+ b := *(*uint8)((unsafe.Pointer(fd)))
+ fd = add(fd, unsafe.Sizeof(b))
+ if b < 128 {
+ return r + uint32(b)<<shift, fd
+ }
+ r += ((uint32(b) &^ 128) << shift)
+ shift += 7
+ if shift > 28 {
+ panic("Bad varint")
+ }
+ }
+}
+
+// runOpenDeferFrame runs the active open-coded defers in the frame specified by
+// d. It normally processes all active defers in the frame, but stops immediately
+// if a defer does a successful recover. It returns true if there are no
+// remaining defers to run in the frame.
+func runOpenDeferFrame(gp *g, d *_defer) bool {
+ done := true
+ fd := d.fd
+
+ // Skip the maxargsize
+ _, fd = readvarintUnsafe(fd)
+ deferBitsOffset, fd := readvarintUnsafe(fd)
+ nDefers, fd := readvarintUnsafe(fd)
+ deferBits := *(*uint8)(unsafe.Pointer(d.varp - uintptr(deferBitsOffset)))
+
+ for i := int(nDefers) - 1; i >= 0; i-- {
+ // read the funcdata info for this defer
+ var argWidth, closureOffset, nArgs uint32
+ argWidth, fd = readvarintUnsafe(fd)
+ closureOffset, fd = readvarintUnsafe(fd)
+ nArgs, fd = readvarintUnsafe(fd)
+ if deferBits&(1<<i) == 0 {
+ for j := uint32(0); j < nArgs; j++ {
+ _, fd = readvarintUnsafe(fd)
+ _, fd = readvarintUnsafe(fd)
+ _, fd = readvarintUnsafe(fd)
+ }
+ continue
+ }
+ closure := *(**funcval)(unsafe.Pointer(d.varp - uintptr(closureOffset)))
+ d.fn = closure
+ deferArgs := deferArgs(d)
+ // If there is an interface receiver or method receiver, it is
+ // described/included as the first arg.
+ for j := uint32(0); j < nArgs; j++ {
+ var argOffset, argLen, argCallOffset uint32
+ argOffset, fd = readvarintUnsafe(fd)
+ argLen, fd = readvarintUnsafe(fd)
+ argCallOffset, fd = readvarintUnsafe(fd)
+ memmove(unsafe.Pointer(uintptr(deferArgs)+uintptr(argCallOffset)),
+ unsafe.Pointer(d.varp-uintptr(argOffset)),
+ uintptr(argLen))
+ }
+ deferBits = deferBits &^ (1 << i)
+ *(*uint8)(unsafe.Pointer(d.varp - uintptr(deferBitsOffset))) = deferBits
+ p := d._panic
+ reflectcallSave(p, unsafe.Pointer(closure), deferArgs, argWidth)
+ if p != nil && p.aborted {
+ break
+ }
+ d.fn = nil
+ // These args are just a copy, so can be cleared immediately
+ memclrNoHeapPointers(deferArgs, uintptr(argWidth))
+ if d._panic != nil && d._panic.recovered {
+ done = deferBits == 0
+ break
+ }
+ }
+
+ return done
+}
+
+// reflectcallSave calls reflectcall after saving the caller's pc and sp in the
+// panic record. This allows the runtime to return to the Goexit defer processing
+// loop, in the unusual case where the Goexit may be bypassed by a successful
+// recover.
+func reflectcallSave(p *_panic, fn, arg unsafe.Pointer, argsize uint32) {
+ if p != nil {
+ p.argp = unsafe.Pointer(getargp(0))
+ p.pc = getcallerpc()
+ p.sp = unsafe.Pointer(getcallersp())
+ }
+ reflectcall(nil, fn, arg, argsize, argsize)
+ if p != nil {
+ p.pc = 0
+ p.sp = unsafe.Pointer(nil)
+ }
+}
+
// The implementation of the predeclared function panic.
func gopanic(e interface{}) {
gp := getg()
@@ -646,6 +924,10 @@
atomic.Xadd(&runningPanicDefers, 1)
+ // By calculating getcallerpc/getcallersp here, we avoid scanning the
+ // gopanic frame (stack scanning is slow...)
+ addOneOpenDeferFrame(gp, getcallerpc(), unsafe.Pointer(getcallersp()))
+
for {
d := gp._defer
if d == nil {
@@ -653,16 +935,23 @@
}
// If defer was started by earlier panic or Goexit (and, since we're back here, that triggered a new panic),
- // take defer off list. The earlier panic or Goexit will not continue running.
+ // take defer off list. An earlier panic will not continue running, but we will make sure below that an
+ // earlier Goexit does continue running.
if d.started {
if d._panic != nil {
d._panic.aborted = true
}
d._panic = nil
- d.fn = nil
- gp._defer = d.link
- freedefer(d)
- continue
+ if !d.openDefer {
+ // For open-coded defers, we need to process the
+ // defer again, in case there are any other defers
+ // to call in the frame (not including the defer
+ // call that caused the panic).
+ d.fn = nil
+ gp._defer = d.link
+ freedefer(d)
+ continue
+ }
}
// Mark defer as started, but keep on list, so that traceback
@@ -675,8 +964,16 @@
// will find d in the list and will mark d._panic (this panic) aborted.
d._panic = (*_panic)(noescape(unsafe.Pointer(&p)))
- p.argp = unsafe.Pointer(getargp(0))
- reflectcall(nil, unsafe.Pointer(d.fn), deferArgs(d), uint32(d.siz), uint32(d.siz))
+ done := true
+ if d.openDefer {
+ done = runOpenDeferFrame(gp, d)
+ if done && !d._panic.recovered {
+ addOneOpenDeferFrame(gp, 0, nil)
+ }
+ } else {
+ p.argp = unsafe.Pointer(getargp(0))
+ reflectcall(nil, unsafe.Pointer(d.fn), deferArgs(d), uint32(d.siz), uint32(d.siz))
+ }
p.argp = nil
// reflectcall did not panic. Remove d.
@@ -684,18 +981,63 @@
throw("bad defer entry in panic")
}
d._panic = nil
- d.fn = nil
- gp._defer = d.link
// trigger shrinkage to test stack copy. See stack_test.go:TestStackPanic
//GC()
pc := d.pc
sp := unsafe.Pointer(d.sp) // must be pointer so it gets adjusted during stack copy
- freedefer(d)
+ if done {
+ d.fn = nil
+ gp._defer = d.link
+ freedefer(d)
+ }
if p.recovered {
+ gp._panic = p.link
+ if gp._panic != nil && gp._panic.goexit && gp._panic.aborted {
+ // A normal recover would bypass/abort the Goexit. Instead,
+ // we return to the processing loop of the Goexit.
+ gp.sigcode0 = uintptr(gp._panic.sp)
+ gp.sigcode1 = uintptr(gp._panic.pc)
+ mcall(recovery)
+ throw("bypassed recovery failed") // mcall should not return
+ }
atomic.Xadd(&runningPanicDefers, -1)
+ if done {
+ // Remove any remaining non-started, open-coded
+ // defer entries after a recover, since the
+ // corresponding defers will be executed normally
+ // (inline). Any such entry will become stale once
+ // we run the corresponding defers inline and exit
+ // the associated stack frame.
+ d := gp._defer
+ var prev *_defer
+ for d != nil {
+ if d.openDefer {
+ if d.started {
+ // This defer is started but we
+ // are in the middle of a
+ // defer-panic-recover inside of
+ // it, so don't remove it or any
+ // further defer entries
+ break
+ }
+ if prev == nil {
+ gp._defer = d.link
+ } else {
+ prev.link = d.link
+ }
+ newd := d.link
+ freedefer(d)
+ d = newd
+ } else {
+ prev = d
+ d = d.link
+ }
+ }
+ }
+
gp._panic = p.link
// Aborted panics are marked but remain on the g.panic list.
// Remove them from the list.
@@ -748,7 +1090,7 @@
// If they match, the caller is the one who can recover.
gp := getg()
p := gp._panic
- if p != nil && !p.recovered && argp == uintptr(p.argp) {
+ if p != nil && !p.goexit && !p.recovered && argp == uintptr(p.argp) {
p.recovered = true
return p.arg
}
@@ -803,7 +1145,7 @@
}
// Make the deferproc for this d return again,
- // this time returning 1. The calling function will
+ // this time returning 1. The calling function will
// jump to the standard return epilogue.
gp.sched.sp = sp
gp.sched.pc = pc
@@ -941,6 +1283,12 @@
}
}
+// throwReportQuirk, if non-nil, is called by throw after dumping the stacks.
+//
+// TODO(austin): Remove this after Go 1.15 when we remove the
+// mlockGsignal workaround.
+var throwReportQuirk func()
+
var didothers bool
var deadlock mutex
@@ -987,6 +1335,10 @@
printDebugLog()
+ if throwReportQuirk != nil {
+ throwReportQuirk()
+ }
+
return docrash
}
diff --git a/src/runtime/panic32.go b/src/runtime/panic32.go
index b89ce9d..aea8401 100644
--- a/src/runtime/panic32.go
+++ b/src/runtime/panic32.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build 386 amd64p32 arm mips mipsle
+// +build 386 arm mips mipsle
package runtime
diff --git a/src/runtime/panic_test.go b/src/runtime/panic_test.go
new file mode 100644
index 0000000..b8a300f
--- /dev/null
+++ b/src/runtime/panic_test.go
@@ -0,0 +1,48 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "strings"
+ "testing"
+)
+
+// Test that panics print out the underlying value
+// when the underlying kind is directly printable.
+// Issue: https://golang.org/issues/37531
+func TestPanicWithDirectlyPrintableCustomTypes(t *testing.T) {
+ tests := []struct {
+ name string
+ wantPanicPrefix string
+ }{
+ {"panicCustomBool", `panic: main.MyBool(true)`},
+ {"panicCustomComplex128", `panic: main.MyComplex128(+3.210000e+001+1.000000e+001i)`},
+ {"panicCustomComplex64", `panic: main.MyComplex64(+1.100000e-001+3.000000e+000i)`},
+ {"panicCustomFloat32", `panic: main.MyFloat32(-9.370000e+001)`},
+ {"panicCustomFloat64", `panic: main.MyFloat64(-9.370000e+001)`},
+ {"panicCustomInt", `panic: main.MyInt(93)`},
+ {"panicCustomInt8", `panic: main.MyInt8(93)`},
+ {"panicCustomInt16", `panic: main.MyInt16(93)`},
+ {"panicCustomInt32", `panic: main.MyInt32(93)`},
+ {"panicCustomInt64", `panic: main.MyInt64(93)`},
+ {"panicCustomString", `panic: main.MyString("Panic")`},
+ {"panicCustomUint", `panic: main.MyUint(93)`},
+ {"panicCustomUint8", `panic: main.MyUint8(93)`},
+ {"panicCustomUint16", `panic: main.MyUint16(93)`},
+ {"panicCustomUint32", `panic: main.MyUint32(93)`},
+ {"panicCustomUint64", `panic: main.MyUint64(93)`},
+ {"panicCustomUintptr", `panic: main.MyUintptr(93)`},
+ }
+
+ for _, tt := range tests {
+ t := t
+ t.Run(tt.name, func(t *testing.T) {
+ output := runTestProg(t, "testprog", tt.name)
+ if !strings.HasPrefix(output, tt.wantPanicPrefix) {
+ t.Fatalf("%q\nis not present in\n%s", tt.wantPanicPrefix, output)
+ }
+ })
+ }
+}
diff --git a/src/runtime/pprof/internal/profile/encode.go b/src/runtime/pprof/internal/profile/encode.go
deleted file mode 100644
index af31933..0000000
--- a/src/runtime/pprof/internal/profile/encode.go
+++ /dev/null
@@ -1,482 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package profile
-
-import (
- "errors"
- "fmt"
- "sort"
-)
-
-func (p *Profile) decoder() []decoder {
- return profileDecoder
-}
-
-// preEncode populates the unexported fields to be used by encode
-// (with suffix X) from the corresponding exported fields. The
-// exported fields are cleared up to facilitate testing.
-func (p *Profile) preEncode() {
- strings := make(map[string]int)
- addString(strings, "")
-
- for _, st := range p.SampleType {
- st.typeX = addString(strings, st.Type)
- st.unitX = addString(strings, st.Unit)
- }
-
- for _, s := range p.Sample {
- s.labelX = nil
- var keys []string
- for k := range s.Label {
- keys = append(keys, k)
- }
- sort.Strings(keys)
- for _, k := range keys {
- vs := s.Label[k]
- for _, v := range vs {
- s.labelX = append(s.labelX,
- Label{
- keyX: addString(strings, k),
- strX: addString(strings, v),
- },
- )
- }
- }
- var numKeys []string
- for k := range s.NumLabel {
- numKeys = append(numKeys, k)
- }
- sort.Strings(numKeys)
- for _, k := range numKeys {
- vs := s.NumLabel[k]
- for _, v := range vs {
- s.labelX = append(s.labelX,
- Label{
- keyX: addString(strings, k),
- numX: v,
- },
- )
- }
- }
- s.locationIDX = nil
- for _, l := range s.Location {
- s.locationIDX = append(s.locationIDX, l.ID)
- }
- }
-
- for _, m := range p.Mapping {
- m.fileX = addString(strings, m.File)
- m.buildIDX = addString(strings, m.BuildID)
- }
-
- for _, l := range p.Location {
- for i, ln := range l.Line {
- if ln.Function != nil {
- l.Line[i].functionIDX = ln.Function.ID
- } else {
- l.Line[i].functionIDX = 0
- }
- }
- if l.Mapping != nil {
- l.mappingIDX = l.Mapping.ID
- } else {
- l.mappingIDX = 0
- }
- }
- for _, f := range p.Function {
- f.nameX = addString(strings, f.Name)
- f.systemNameX = addString(strings, f.SystemName)
- f.filenameX = addString(strings, f.Filename)
- }
-
- p.dropFramesX = addString(strings, p.DropFrames)
- p.keepFramesX = addString(strings, p.KeepFrames)
-
- if pt := p.PeriodType; pt != nil {
- pt.typeX = addString(strings, pt.Type)
- pt.unitX = addString(strings, pt.Unit)
- }
-
- p.stringTable = make([]string, len(strings))
- for s, i := range strings {
- p.stringTable[i] = s
- }
-}
-
-func (p *Profile) encode(b *buffer) {
- for _, x := range p.SampleType {
- encodeMessage(b, 1, x)
- }
- for _, x := range p.Sample {
- encodeMessage(b, 2, x)
- }
- for _, x := range p.Mapping {
- encodeMessage(b, 3, x)
- }
- for _, x := range p.Location {
- encodeMessage(b, 4, x)
- }
- for _, x := range p.Function {
- encodeMessage(b, 5, x)
- }
- encodeStrings(b, 6, p.stringTable)
- encodeInt64Opt(b, 7, p.dropFramesX)
- encodeInt64Opt(b, 8, p.keepFramesX)
- encodeInt64Opt(b, 9, p.TimeNanos)
- encodeInt64Opt(b, 10, p.DurationNanos)
- if pt := p.PeriodType; pt != nil && (pt.typeX != 0 || pt.unitX != 0) {
- encodeMessage(b, 11, p.PeriodType)
- }
- encodeInt64Opt(b, 12, p.Period)
-}
-
-var profileDecoder = []decoder{
- nil, // 0
- // repeated ValueType sample_type = 1
- func(b *buffer, m message) error {
- x := new(ValueType)
- pp := m.(*Profile)
- pp.SampleType = append(pp.SampleType, x)
- return decodeMessage(b, x)
- },
- // repeated Sample sample = 2
- func(b *buffer, m message) error {
- x := new(Sample)
- pp := m.(*Profile)
- pp.Sample = append(pp.Sample, x)
- return decodeMessage(b, x)
- },
- // repeated Mapping mapping = 3
- func(b *buffer, m message) error {
- x := new(Mapping)
- pp := m.(*Profile)
- pp.Mapping = append(pp.Mapping, x)
- return decodeMessage(b, x)
- },
- // repeated Location location = 4
- func(b *buffer, m message) error {
- x := new(Location)
- pp := m.(*Profile)
- pp.Location = append(pp.Location, x)
- return decodeMessage(b, x)
- },
- // repeated Function function = 5
- func(b *buffer, m message) error {
- x := new(Function)
- pp := m.(*Profile)
- pp.Function = append(pp.Function, x)
- return decodeMessage(b, x)
- },
- // repeated string string_table = 6
- func(b *buffer, m message) error {
- err := decodeStrings(b, &m.(*Profile).stringTable)
- if err != nil {
- return err
- }
- if *&m.(*Profile).stringTable[0] != "" {
- return errors.New("string_table[0] must be ''")
- }
- return nil
- },
- // repeated int64 drop_frames = 7
- func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).dropFramesX) },
- // repeated int64 keep_frames = 8
- func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).keepFramesX) },
- // repeated int64 time_nanos = 9
- func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).TimeNanos) },
- // repeated int64 duration_nanos = 10
- func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).DurationNanos) },
- // optional string period_type = 11
- func(b *buffer, m message) error {
- x := new(ValueType)
- pp := m.(*Profile)
- pp.PeriodType = x
- return decodeMessage(b, x)
- },
- // repeated int64 period = 12
- func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).Period) },
- // repeated int64 comment = 13
- func(b *buffer, m message) error { return decodeInt64s(b, &m.(*Profile).commentX) },
- // int64 defaultSampleType = 14
- func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).defaultSampleTypeX) },
-}
-
-// postDecode takes the unexported fields populated by decode (with
-// suffix X) and populates the corresponding exported fields.
-// The unexported fields are cleared up to facilitate testing.
-func (p *Profile) postDecode() error {
- var err error
-
- mappings := make(map[uint64]*Mapping)
- for _, m := range p.Mapping {
- m.File, err = getString(p.stringTable, &m.fileX, err)
- m.BuildID, err = getString(p.stringTable, &m.buildIDX, err)
- mappings[m.ID] = m
- }
-
- functions := make(map[uint64]*Function)
- for _, f := range p.Function {
- f.Name, err = getString(p.stringTable, &f.nameX, err)
- f.SystemName, err = getString(p.stringTable, &f.systemNameX, err)
- f.Filename, err = getString(p.stringTable, &f.filenameX, err)
- functions[f.ID] = f
- }
-
- locations := make(map[uint64]*Location)
- for _, l := range p.Location {
- l.Mapping = mappings[l.mappingIDX]
- l.mappingIDX = 0
- for i, ln := range l.Line {
- if id := ln.functionIDX; id != 0 {
- l.Line[i].Function = functions[id]
- if l.Line[i].Function == nil {
- return fmt.Errorf("Function ID %d not found", id)
- }
- l.Line[i].functionIDX = 0
- }
- }
- locations[l.ID] = l
- }
-
- for _, st := range p.SampleType {
- st.Type, err = getString(p.stringTable, &st.typeX, err)
- st.Unit, err = getString(p.stringTable, &st.unitX, err)
- }
-
- for _, s := range p.Sample {
- labels := make(map[string][]string)
- numLabels := make(map[string][]int64)
- for _, l := range s.labelX {
- var key, value string
- key, err = getString(p.stringTable, &l.keyX, err)
- if l.strX != 0 {
- value, err = getString(p.stringTable, &l.strX, err)
- labels[key] = append(labels[key], value)
- } else {
- numLabels[key] = append(numLabels[key], l.numX)
- }
- }
- if len(labels) > 0 {
- s.Label = labels
- }
- if len(numLabels) > 0 {
- s.NumLabel = numLabels
- }
- s.Location = nil
- for _, lid := range s.locationIDX {
- s.Location = append(s.Location, locations[lid])
- }
- s.locationIDX = nil
- }
-
- p.DropFrames, err = getString(p.stringTable, &p.dropFramesX, err)
- p.KeepFrames, err = getString(p.stringTable, &p.keepFramesX, err)
-
- if pt := p.PeriodType; pt == nil {
- p.PeriodType = &ValueType{}
- }
-
- if pt := p.PeriodType; pt != nil {
- pt.Type, err = getString(p.stringTable, &pt.typeX, err)
- pt.Unit, err = getString(p.stringTable, &pt.unitX, err)
- }
- for _, i := range p.commentX {
- var c string
- c, err = getString(p.stringTable, &i, err)
- p.Comments = append(p.Comments, c)
- }
-
- p.commentX = nil
- p.DefaultSampleType, err = getString(p.stringTable, &p.defaultSampleTypeX, err)
- p.stringTable = nil
- return nil
-}
-
-func (p *ValueType) decoder() []decoder {
- return valueTypeDecoder
-}
-
-func (p *ValueType) encode(b *buffer) {
- encodeInt64Opt(b, 1, p.typeX)
- encodeInt64Opt(b, 2, p.unitX)
-}
-
-var valueTypeDecoder = []decoder{
- nil, // 0
- // optional int64 type = 1
- func(b *buffer, m message) error { return decodeInt64(b, &m.(*ValueType).typeX) },
- // optional int64 unit = 2
- func(b *buffer, m message) error { return decodeInt64(b, &m.(*ValueType).unitX) },
-}
-
-func (p *Sample) decoder() []decoder {
- return sampleDecoder
-}
-
-func (p *Sample) encode(b *buffer) {
- encodeUint64s(b, 1, p.locationIDX)
- for _, x := range p.Value {
- encodeInt64(b, 2, x)
- }
- for _, x := range p.labelX {
- encodeMessage(b, 3, x)
- }
-}
-
-var sampleDecoder = []decoder{
- nil, // 0
- // repeated uint64 location = 1
- func(b *buffer, m message) error { return decodeUint64s(b, &m.(*Sample).locationIDX) },
- // repeated int64 value = 2
- func(b *buffer, m message) error { return decodeInt64s(b, &m.(*Sample).Value) },
- // repeated Label label = 3
- func(b *buffer, m message) error {
- s := m.(*Sample)
- n := len(s.labelX)
- s.labelX = append(s.labelX, Label{})
- return decodeMessage(b, &s.labelX[n])
- },
-}
-
-func (p Label) decoder() []decoder {
- return labelDecoder
-}
-
-func (p Label) encode(b *buffer) {
- encodeInt64Opt(b, 1, p.keyX)
- encodeInt64Opt(b, 2, p.strX)
- encodeInt64Opt(b, 3, p.numX)
-}
-
-var labelDecoder = []decoder{
- nil, // 0
- // optional int64 key = 1
- func(b *buffer, m message) error { return decodeInt64(b, &m.(*Label).keyX) },
- // optional int64 str = 2
- func(b *buffer, m message) error { return decodeInt64(b, &m.(*Label).strX) },
- // optional int64 num = 3
- func(b *buffer, m message) error { return decodeInt64(b, &m.(*Label).numX) },
-}
-
-func (p *Mapping) decoder() []decoder {
- return mappingDecoder
-}
-
-func (p *Mapping) encode(b *buffer) {
- encodeUint64Opt(b, 1, p.ID)
- encodeUint64Opt(b, 2, p.Start)
- encodeUint64Opt(b, 3, p.Limit)
- encodeUint64Opt(b, 4, p.Offset)
- encodeInt64Opt(b, 5, p.fileX)
- encodeInt64Opt(b, 6, p.buildIDX)
- encodeBoolOpt(b, 7, p.HasFunctions)
- encodeBoolOpt(b, 8, p.HasFilenames)
- encodeBoolOpt(b, 9, p.HasLineNumbers)
- encodeBoolOpt(b, 10, p.HasInlineFrames)
-}
-
-var mappingDecoder = []decoder{
- nil, // 0
- func(b *buffer, m message) error { return decodeUint64(b, &m.(*Mapping).ID) }, // optional uint64 id = 1
- func(b *buffer, m message) error { return decodeUint64(b, &m.(*Mapping).Start) }, // optional uint64 memory_offset = 2
- func(b *buffer, m message) error { return decodeUint64(b, &m.(*Mapping).Limit) }, // optional uint64 memory_limit = 3
- func(b *buffer, m message) error { return decodeUint64(b, &m.(*Mapping).Offset) }, // optional uint64 file_offset = 4
- func(b *buffer, m message) error { return decodeInt64(b, &m.(*Mapping).fileX) }, // optional int64 filename = 5
- func(b *buffer, m message) error { return decodeInt64(b, &m.(*Mapping).buildIDX) }, // optional int64 build_id = 6
- func(b *buffer, m message) error { return decodeBool(b, &m.(*Mapping).HasFunctions) }, // optional bool has_functions = 7
- func(b *buffer, m message) error { return decodeBool(b, &m.(*Mapping).HasFilenames) }, // optional bool has_filenames = 8
- func(b *buffer, m message) error { return decodeBool(b, &m.(*Mapping).HasLineNumbers) }, // optional bool has_line_numbers = 9
- func(b *buffer, m message) error { return decodeBool(b, &m.(*Mapping).HasInlineFrames) }, // optional bool has_inline_frames = 10
-}
-
-func (p *Location) decoder() []decoder {
- return locationDecoder
-}
-
-func (p *Location) encode(b *buffer) {
- encodeUint64Opt(b, 1, p.ID)
- encodeUint64Opt(b, 2, p.mappingIDX)
- encodeUint64Opt(b, 3, p.Address)
- for i := range p.Line {
- encodeMessage(b, 4, &p.Line[i])
- }
-}
-
-var locationDecoder = []decoder{
- nil, // 0
- func(b *buffer, m message) error { return decodeUint64(b, &m.(*Location).ID) }, // optional uint64 id = 1;
- func(b *buffer, m message) error { return decodeUint64(b, &m.(*Location).mappingIDX) }, // optional uint64 mapping_id = 2;
- func(b *buffer, m message) error { return decodeUint64(b, &m.(*Location).Address) }, // optional uint64 address = 3;
- func(b *buffer, m message) error { // repeated Line line = 4
- pp := m.(*Location)
- n := len(pp.Line)
- pp.Line = append(pp.Line, Line{})
- return decodeMessage(b, &pp.Line[n])
- },
-}
-
-func (p *Line) decoder() []decoder {
- return lineDecoder
-}
-
-func (p *Line) encode(b *buffer) {
- encodeUint64Opt(b, 1, p.functionIDX)
- encodeInt64Opt(b, 2, p.Line)
-}
-
-var lineDecoder = []decoder{
- nil, // 0
- // optional uint64 function_id = 1
- func(b *buffer, m message) error { return decodeUint64(b, &m.(*Line).functionIDX) },
- // optional int64 line = 2
- func(b *buffer, m message) error { return decodeInt64(b, &m.(*Line).Line) },
-}
-
-func (p *Function) decoder() []decoder {
- return functionDecoder
-}
-
-func (p *Function) encode(b *buffer) {
- encodeUint64Opt(b, 1, p.ID)
- encodeInt64Opt(b, 2, p.nameX)
- encodeInt64Opt(b, 3, p.systemNameX)
- encodeInt64Opt(b, 4, p.filenameX)
- encodeInt64Opt(b, 5, p.StartLine)
-}
-
-var functionDecoder = []decoder{
- nil, // 0
- // optional uint64 id = 1
- func(b *buffer, m message) error { return decodeUint64(b, &m.(*Function).ID) },
- // optional int64 function_name = 2
- func(b *buffer, m message) error { return decodeInt64(b, &m.(*Function).nameX) },
- // optional int64 function_system_name = 3
- func(b *buffer, m message) error { return decodeInt64(b, &m.(*Function).systemNameX) },
- // repeated int64 filename = 4
- func(b *buffer, m message) error { return decodeInt64(b, &m.(*Function).filenameX) },
- // optional int64 start_line = 5
- func(b *buffer, m message) error { return decodeInt64(b, &m.(*Function).StartLine) },
-}
-
-func addString(strings map[string]int, s string) int64 {
- i, ok := strings[s]
- if !ok {
- i = len(strings)
- strings[s] = i
- }
- return int64(i)
-}
-
-func getString(strings []string, strng *int64, err error) (string, error) {
- if err != nil {
- return "", err
- }
- s := int(*strng)
- if s < 0 || s >= len(strings) {
- return "", errMalformed
- }
- *strng = 0
- return strings[s], nil
-}
diff --git a/src/runtime/pprof/internal/profile/filter.go b/src/runtime/pprof/internal/profile/filter.go
deleted file mode 100644
index 9cad866..0000000
--- a/src/runtime/pprof/internal/profile/filter.go
+++ /dev/null
@@ -1,158 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Implements methods to filter samples from profiles.
-
-package profile
-
-import "regexp"
-
-// FilterSamplesByName filters the samples in a profile and only keeps
-// samples where at least one frame matches focus but none match ignore.
-// Returns true is the corresponding regexp matched at least one sample.
-func (p *Profile) FilterSamplesByName(focus, ignore, hide *regexp.Regexp) (fm, im, hm bool) {
- focusOrIgnore := make(map[uint64]bool)
- hidden := make(map[uint64]bool)
- for _, l := range p.Location {
- if ignore != nil && l.matchesName(ignore) {
- im = true
- focusOrIgnore[l.ID] = false
- } else if focus == nil || l.matchesName(focus) {
- fm = true
- focusOrIgnore[l.ID] = true
- }
- if hide != nil && l.matchesName(hide) {
- hm = true
- l.Line = l.unmatchedLines(hide)
- if len(l.Line) == 0 {
- hidden[l.ID] = true
- }
- }
- }
-
- s := make([]*Sample, 0, len(p.Sample))
- for _, sample := range p.Sample {
- if focusedAndNotIgnored(sample.Location, focusOrIgnore) {
- if len(hidden) > 0 {
- var locs []*Location
- for _, loc := range sample.Location {
- if !hidden[loc.ID] {
- locs = append(locs, loc)
- }
- }
- if len(locs) == 0 {
- // Remove sample with no locations (by not adding it to s).
- continue
- }
- sample.Location = locs
- }
- s = append(s, sample)
- }
- }
- p.Sample = s
-
- return
-}
-
-// matchesName reports whether the function name or file in the
-// location matches the regular expression.
-func (loc *Location) matchesName(re *regexp.Regexp) bool {
- for _, ln := range loc.Line {
- if fn := ln.Function; fn != nil {
- if re.MatchString(fn.Name) {
- return true
- }
- if re.MatchString(fn.Filename) {
- return true
- }
- }
- }
- return false
-}
-
-// unmatchedLines returns the lines in the location that do not match
-// the regular expression.
-func (loc *Location) unmatchedLines(re *regexp.Regexp) []Line {
- var lines []Line
- for _, ln := range loc.Line {
- if fn := ln.Function; fn != nil {
- if re.MatchString(fn.Name) {
- continue
- }
- if re.MatchString(fn.Filename) {
- continue
- }
- }
- lines = append(lines, ln)
- }
- return lines
-}
-
-// focusedAndNotIgnored looks up a slice of ids against a map of
-// focused/ignored locations. The map only contains locations that are
-// explicitly focused or ignored. Returns whether there is at least
-// one focused location but no ignored locations.
-func focusedAndNotIgnored(locs []*Location, m map[uint64]bool) bool {
- var f bool
- for _, loc := range locs {
- if focus, focusOrIgnore := m[loc.ID]; focusOrIgnore {
- if focus {
- // Found focused location. Must keep searching in case there
- // is an ignored one as well.
- f = true
- } else {
- // Found ignored location. Can return false right away.
- return false
- }
- }
- }
- return f
-}
-
-// TagMatch selects tags for filtering
-type TagMatch func(key, val string, nval int64) bool
-
-// FilterSamplesByTag removes all samples from the profile, except
-// those that match focus and do not match the ignore regular
-// expression.
-func (p *Profile) FilterSamplesByTag(focus, ignore TagMatch) (fm, im bool) {
- samples := make([]*Sample, 0, len(p.Sample))
- for _, s := range p.Sample {
- focused, ignored := focusedSample(s, focus, ignore)
- fm = fm || focused
- im = im || ignored
- if focused && !ignored {
- samples = append(samples, s)
- }
- }
- p.Sample = samples
- return
-}
-
-// focusedTag checks a sample against focus and ignore regexps.
-// Returns whether the focus/ignore regexps match any tags
-func focusedSample(s *Sample, focus, ignore TagMatch) (fm, im bool) {
- fm = focus == nil
- for key, vals := range s.Label {
- for _, val := range vals {
- if ignore != nil && ignore(key, val, 0) {
- im = true
- }
- if !fm && focus(key, val, 0) {
- fm = true
- }
- }
- }
- for key, vals := range s.NumLabel {
- for _, val := range vals {
- if ignore != nil && ignore(key, "", val) {
- im = true
- }
- if !fm && focus(key, "", val) {
- fm = true
- }
- }
- }
- return fm, im
-}
diff --git a/src/runtime/pprof/internal/profile/legacy_profile.go b/src/runtime/pprof/internal/profile/legacy_profile.go
deleted file mode 100644
index d69f8de..0000000
--- a/src/runtime/pprof/internal/profile/legacy_profile.go
+++ /dev/null
@@ -1,1266 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This file implements parsers to convert legacy profiles into the
-// profile.proto format.
-
-package profile
-
-import (
- "bufio"
- "bytes"
- "fmt"
- "io"
- "math"
- "regexp"
- "strconv"
- "strings"
-)
-
-var (
- countStartRE = regexp.MustCompile(`\A(\w+) profile: total \d+\n\z`)
- countRE = regexp.MustCompile(`\A(\d+) @(( 0x[0-9a-f]+)+)\n\z`)
-
- heapHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] *@ *(heap[_a-z0-9]*)/?(\d*)`)
- heapSampleRE = regexp.MustCompile(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+) *] @([ x0-9a-f]*)`)
-
- contentionSampleRE = regexp.MustCompile(`(\d+) *(\d+) @([ x0-9a-f]*)`)
-
- hexNumberRE = regexp.MustCompile(`0x[0-9a-f]+`)
-
- growthHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ growthz`)
-
- fragmentationHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ fragmentationz`)
-
- threadzStartRE = regexp.MustCompile(`--- threadz \d+ ---`)
- threadStartRE = regexp.MustCompile(`--- Thread ([[:xdigit:]]+) \(name: (.*)/(\d+)\) stack: ---`)
-
- procMapsRE = regexp.MustCompile(`([[:xdigit:]]+)-([[:xdigit:]]+)\s+([-rwxp]+)\s+([[:xdigit:]]+)\s+([[:xdigit:]]+):([[:xdigit:]]+)\s+([[:digit:]]+)\s*(\S+)?`)
-
- briefMapsRE = regexp.MustCompile(`\s*([[:xdigit:]]+)-([[:xdigit:]]+):\s*(\S+)(\s.*@)?([[:xdigit:]]+)?`)
-
- // LegacyHeapAllocated instructs the heapz parsers to use the
- // allocated memory stats instead of the default in-use memory. Note
- // that tcmalloc doesn't provide all allocated memory, only in-use
- // stats.
- LegacyHeapAllocated bool
-)
-
-func isSpaceOrComment(line string) bool {
- trimmed := strings.TrimSpace(line)
- return len(trimmed) == 0 || trimmed[0] == '#'
-}
-
-// parseGoCount parses a Go count profile (e.g., threadcreate or
-// goroutine) and returns a new Profile.
-func parseGoCount(b []byte) (*Profile, error) {
- r := bytes.NewBuffer(b)
-
- var line string
- var err error
- for {
- // Skip past comments and empty lines seeking a real header.
- line, err = r.ReadString('\n')
- if err != nil {
- return nil, err
- }
- if !isSpaceOrComment(line) {
- break
- }
- }
-
- m := countStartRE.FindStringSubmatch(line)
- if m == nil {
- return nil, errUnrecognized
- }
- profileType := m[1]
- p := &Profile{
- PeriodType: &ValueType{Type: profileType, Unit: "count"},
- Period: 1,
- SampleType: []*ValueType{{Type: profileType, Unit: "count"}},
- }
- locations := make(map[uint64]*Location)
- for {
- line, err = r.ReadString('\n')
- if err != nil {
- if err == io.EOF {
- break
- }
- return nil, err
- }
- if isSpaceOrComment(line) {
- continue
- }
- if strings.HasPrefix(line, "---") {
- break
- }
- m := countRE.FindStringSubmatch(line)
- if m == nil {
- return nil, errMalformed
- }
- n, err := strconv.ParseInt(m[1], 0, 64)
- if err != nil {
- return nil, errMalformed
- }
- fields := strings.Fields(m[2])
- locs := make([]*Location, 0, len(fields))
- for _, stk := range fields {
- addr, err := strconv.ParseUint(stk, 0, 64)
- if err != nil {
- return nil, errMalformed
- }
- // Adjust all frames by -1 to land on the call instruction.
- addr--
- loc := locations[addr]
- if loc == nil {
- loc = &Location{
- Address: addr,
- }
- locations[addr] = loc
- p.Location = append(p.Location, loc)
- }
- locs = append(locs, loc)
- }
- p.Sample = append(p.Sample, &Sample{
- Location: locs,
- Value: []int64{n},
- })
- }
-
- if err = parseAdditionalSections(strings.TrimSpace(line), r, p); err != nil {
- return nil, err
- }
- return p, nil
-}
-
-// remapLocationIDs ensures there is a location for each address
-// referenced by a sample, and remaps the samples to point to the new
-// location ids.
-func (p *Profile) remapLocationIDs() {
- seen := make(map[*Location]bool, len(p.Location))
- var locs []*Location
-
- for _, s := range p.Sample {
- for _, l := range s.Location {
- if seen[l] {
- continue
- }
- l.ID = uint64(len(locs) + 1)
- locs = append(locs, l)
- seen[l] = true
- }
- }
- p.Location = locs
-}
-
-func (p *Profile) remapFunctionIDs() {
- seen := make(map[*Function]bool, len(p.Function))
- var fns []*Function
-
- for _, l := range p.Location {
- for _, ln := range l.Line {
- fn := ln.Function
- if fn == nil || seen[fn] {
- continue
- }
- fn.ID = uint64(len(fns) + 1)
- fns = append(fns, fn)
- seen[fn] = true
- }
- }
- p.Function = fns
-}
-
-// remapMappingIDs matches location addresses with existing mappings
-// and updates them appropriately. This is O(N*M), if this ever shows
-// up as a bottleneck, evaluate sorting the mappings and doing a
-// binary search, which would make it O(N*log(M)).
-func (p *Profile) remapMappingIDs() {
- if len(p.Mapping) == 0 {
- return
- }
-
- // Some profile handlers will incorrectly set regions for the main
- // executable if its section is remapped. Fix them through heuristics.
-
- // Remove the initial mapping if named '/anon_hugepage' and has a
- // consecutive adjacent mapping.
- if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") {
- if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start {
- p.Mapping = p.Mapping[1:]
- }
- }
-
- // Subtract the offset from the start of the main mapping if it
- // ends up at a recognizable start address.
- const expectedStart = 0x400000
- if m := p.Mapping[0]; m.Start-m.Offset == expectedStart {
- m.Start = expectedStart
- m.Offset = 0
- }
-
- for _, l := range p.Location {
- if a := l.Address; a != 0 {
- for _, m := range p.Mapping {
- if m.Start <= a && a < m.Limit {
- l.Mapping = m
- break
- }
- }
- }
- }
-
- // Reset all mapping IDs.
- for i, m := range p.Mapping {
- m.ID = uint64(i + 1)
- }
-}
-
-var cpuInts = []func([]byte) (uint64, []byte){
- get32l,
- get32b,
- get64l,
- get64b,
-}
-
-func get32l(b []byte) (uint64, []byte) {
- if len(b) < 4 {
- return 0, nil
- }
- return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24, b[4:]
-}
-
-func get32b(b []byte) (uint64, []byte) {
- if len(b) < 4 {
- return 0, nil
- }
- return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24, b[4:]
-}
-
-func get64l(b []byte) (uint64, []byte) {
- if len(b) < 8 {
- return 0, nil
- }
- return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56, b[8:]
-}
-
-func get64b(b []byte) (uint64, []byte) {
- if len(b) < 8 {
- return 0, nil
- }
- return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56, b[8:]
-}
-
-// ParseTracebacks parses a set of tracebacks and returns a newly
-// populated profile. It will accept any text file and generate a
-// Profile out of it with any hex addresses it can identify, including
-// a process map if it can recognize one. Each sample will include a
-// tag "source" with the addresses recognized in string format.
-func ParseTracebacks(b []byte) (*Profile, error) {
- r := bytes.NewBuffer(b)
-
- p := &Profile{
- PeriodType: &ValueType{Type: "trace", Unit: "count"},
- Period: 1,
- SampleType: []*ValueType{
- {Type: "trace", Unit: "count"},
- },
- }
-
- var sources []string
- var sloc []*Location
-
- locs := make(map[uint64]*Location)
- for {
- l, err := r.ReadString('\n')
- if err != nil {
- if err != io.EOF {
- return nil, err
- }
- if l == "" {
- break
- }
- }
- if sectionTrigger(l) == memoryMapSection {
- break
- }
- if s, addrs := extractHexAddresses(l); len(s) > 0 {
- for _, addr := range addrs {
- // Addresses from stack traces point to the next instruction after
- // each call. Adjust by -1 to land somewhere on the actual call.
- addr--
- loc := locs[addr]
- if locs[addr] == nil {
- loc = &Location{
- Address: addr,
- }
- p.Location = append(p.Location, loc)
- locs[addr] = loc
- }
- sloc = append(sloc, loc)
- }
-
- sources = append(sources, s...)
- } else {
- if len(sources) > 0 || len(sloc) > 0 {
- addTracebackSample(sloc, sources, p)
- sloc, sources = nil, nil
- }
- }
- }
-
- // Add final sample to save any leftover data.
- if len(sources) > 0 || len(sloc) > 0 {
- addTracebackSample(sloc, sources, p)
- }
-
- if err := p.ParseMemoryMap(r); err != nil {
- return nil, err
- }
- return p, nil
-}
-
-func addTracebackSample(l []*Location, s []string, p *Profile) {
- p.Sample = append(p.Sample,
- &Sample{
- Value: []int64{1},
- Location: l,
- Label: map[string][]string{"source": s},
- })
-}
-
-// parseCPU parses a profilez legacy profile and returns a newly
-// populated Profile.
-//
-// The general format for profilez samples is a sequence of words in
-// binary format. The first words are a header with the following data:
-// 1st word -- 0
-// 2nd word -- 3
-// 3rd word -- 0 if a c++ application, 1 if a java application.
-// 4th word -- Sampling period (in microseconds).
-// 5th word -- Padding.
-func parseCPU(b []byte) (*Profile, error) {
- var parse func([]byte) (uint64, []byte)
- var n1, n2, n3, n4, n5 uint64
- for _, parse = range cpuInts {
- var tmp []byte
- n1, tmp = parse(b)
- n2, tmp = parse(tmp)
- n3, tmp = parse(tmp)
- n4, tmp = parse(tmp)
- n5, tmp = parse(tmp)
-
- if tmp != nil && n1 == 0 && n2 == 3 && n3 == 0 && n4 > 0 && n5 == 0 {
- b = tmp
- return cpuProfile(b, int64(n4), parse)
- }
- }
- return nil, errUnrecognized
-}
-
-// cpuProfile returns a new Profile from C++ profilez data.
-// b is the profile bytes after the header, period is the profiling
-// period, and parse is a function to parse 8-byte chunks from the
-// profile in its native endianness.
-func cpuProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) {
- p := &Profile{
- Period: period * 1000,
- PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"},
- SampleType: []*ValueType{
- {Type: "samples", Unit: "count"},
- {Type: "cpu", Unit: "nanoseconds"},
- },
- }
- var err error
- if b, _, err = parseCPUSamples(b, parse, true, p); err != nil {
- return nil, err
- }
-
- // If all samples have the same second-to-the-bottom frame, it
- // strongly suggests that it is an uninteresting artifact of
- // measurement -- a stack frame pushed by the signal handler. The
- // bottom frame is always correct as it is picked up from the signal
- // structure, not the stack. Check if this is the case and if so,
- // remove.
- if len(p.Sample) > 1 && len(p.Sample[0].Location) > 1 {
- allSame := true
- id1 := p.Sample[0].Location[1].Address
- for _, s := range p.Sample {
- if len(s.Location) < 2 || id1 != s.Location[1].Address {
- allSame = false
- break
- }
- }
- if allSame {
- for _, s := range p.Sample {
- s.Location = append(s.Location[:1], s.Location[2:]...)
- }
- }
- }
-
- if err := p.ParseMemoryMap(bytes.NewBuffer(b)); err != nil {
- return nil, err
- }
- return p, nil
-}
-
-// parseCPUSamples parses a collection of profilez samples from a
-// profile.
-//
-// profilez samples are a repeated sequence of stack frames of the
-// form:
-// 1st word -- The number of times this stack was encountered.
-// 2nd word -- The size of the stack (StackSize).
-// 3rd word -- The first address on the stack.
-// ...
-// StackSize + 2 -- The last address on the stack
-// The last stack trace is of the form:
-// 1st word -- 0
-// 2nd word -- 1
-// 3rd word -- 0
-//
-// Addresses from stack traces may point to the next instruction after
-// each call. Optionally adjust by -1 to land somewhere on the actual
-// call (except for the leaf, which is not a call).
-func parseCPUSamples(b []byte, parse func(b []byte) (uint64, []byte), adjust bool, p *Profile) ([]byte, map[uint64]*Location, error) {
- locs := make(map[uint64]*Location)
- for len(b) > 0 {
- var count, nstk uint64
- count, b = parse(b)
- nstk, b = parse(b)
- if b == nil || nstk > uint64(len(b)/4) {
- return nil, nil, errUnrecognized
- }
- var sloc []*Location
- addrs := make([]uint64, nstk)
- for i := 0; i < int(nstk); i++ {
- addrs[i], b = parse(b)
- }
-
- if count == 0 && nstk == 1 && addrs[0] == 0 {
- // End of data marker
- break
- }
- for i, addr := range addrs {
- if adjust && i > 0 {
- addr--
- }
- loc := locs[addr]
- if loc == nil {
- loc = &Location{
- Address: addr,
- }
- locs[addr] = loc
- p.Location = append(p.Location, loc)
- }
- sloc = append(sloc, loc)
- }
- p.Sample = append(p.Sample,
- &Sample{
- Value: []int64{int64(count), int64(count) * p.Period},
- Location: sloc,
- })
- }
- // Reached the end without finding the EOD marker.
- return b, locs, nil
-}
-
-// parseHeap parses a heapz legacy or a growthz profile and
-// returns a newly populated Profile.
-func parseHeap(b []byte) (p *Profile, err error) {
- r := bytes.NewBuffer(b)
- l, err := r.ReadString('\n')
- if err != nil {
- return nil, errUnrecognized
- }
-
- sampling := ""
-
- if header := heapHeaderRE.FindStringSubmatch(l); header != nil {
- p = &Profile{
- SampleType: []*ValueType{
- {Type: "objects", Unit: "count"},
- {Type: "space", Unit: "bytes"},
- },
- PeriodType: &ValueType{Type: "objects", Unit: "bytes"},
- }
-
- var period int64
- if len(header[6]) > 0 {
- if period, err = strconv.ParseInt(header[6], 10, 64); err != nil {
- return nil, errUnrecognized
- }
- }
-
- switch header[5] {
- case "heapz_v2", "heap_v2":
- sampling, p.Period = "v2", period
- case "heapprofile":
- sampling, p.Period = "", 1
- case "heap":
- sampling, p.Period = "v2", period/2
- default:
- return nil, errUnrecognized
- }
- } else if header = growthHeaderRE.FindStringSubmatch(l); header != nil {
- p = &Profile{
- SampleType: []*ValueType{
- {Type: "objects", Unit: "count"},
- {Type: "space", Unit: "bytes"},
- },
- PeriodType: &ValueType{Type: "heapgrowth", Unit: "count"},
- Period: 1,
- }
- } else if header = fragmentationHeaderRE.FindStringSubmatch(l); header != nil {
- p = &Profile{
- SampleType: []*ValueType{
- {Type: "objects", Unit: "count"},
- {Type: "space", Unit: "bytes"},
- },
- PeriodType: &ValueType{Type: "allocations", Unit: "count"},
- Period: 1,
- }
- } else {
- return nil, errUnrecognized
- }
-
- if LegacyHeapAllocated {
- for _, st := range p.SampleType {
- st.Type = "alloc_" + st.Type
- }
- } else {
- for _, st := range p.SampleType {
- st.Type = "inuse_" + st.Type
- }
- }
-
- locs := make(map[uint64]*Location)
- for {
- l, err = r.ReadString('\n')
- if err != nil {
- if err != io.EOF {
- return nil, err
- }
-
- if l == "" {
- break
- }
- }
-
- if isSpaceOrComment(l) {
- continue
- }
- l = strings.TrimSpace(l)
-
- if sectionTrigger(l) != unrecognizedSection {
- break
- }
-
- value, blocksize, addrs, err := parseHeapSample(l, p.Period, sampling)
- if err != nil {
- return nil, err
- }
- var sloc []*Location
- for _, addr := range addrs {
- // Addresses from stack traces point to the next instruction after
- // each call. Adjust by -1 to land somewhere on the actual call.
- addr--
- loc := locs[addr]
- if locs[addr] == nil {
- loc = &Location{
- Address: addr,
- }
- p.Location = append(p.Location, loc)
- locs[addr] = loc
- }
- sloc = append(sloc, loc)
- }
-
- p.Sample = append(p.Sample, &Sample{
- Value: value,
- Location: sloc,
- NumLabel: map[string][]int64{"bytes": {blocksize}},
- })
- }
-
- if err = parseAdditionalSections(l, r, p); err != nil {
- return nil, err
- }
- return p, nil
-}
-
-// parseHeapSample parses a single row from a heap profile into a new Sample.
-func parseHeapSample(line string, rate int64, sampling string) (value []int64, blocksize int64, addrs []uint64, err error) {
- sampleData := heapSampleRE.FindStringSubmatch(line)
- if len(sampleData) != 6 {
- return value, blocksize, addrs, fmt.Errorf("unexpected number of sample values: got %d, want 6", len(sampleData))
- }
-
- // Use first two values by default; tcmalloc sampling generates the
- // same value for both, only the older heap-profile collect separate
- // stats for in-use and allocated objects.
- valueIndex := 1
- if LegacyHeapAllocated {
- valueIndex = 3
- }
-
- var v1, v2 int64
- if v1, err = strconv.ParseInt(sampleData[valueIndex], 10, 64); err != nil {
- return value, blocksize, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
- }
- if v2, err = strconv.ParseInt(sampleData[valueIndex+1], 10, 64); err != nil {
- return value, blocksize, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
- }
-
- if v1 == 0 {
- if v2 != 0 {
- return value, blocksize, addrs, fmt.Errorf("allocation count was 0 but allocation bytes was %d", v2)
- }
- } else {
- blocksize = v2 / v1
- if sampling == "v2" {
- v1, v2 = scaleHeapSample(v1, v2, rate)
- }
- }
-
- value = []int64{v1, v2}
- addrs = parseHexAddresses(sampleData[5])
-
- return value, blocksize, addrs, nil
-}
-
-// extractHexAddresses extracts hex numbers from a string and returns
-// them, together with their numeric value, in a slice.
-func extractHexAddresses(s string) ([]string, []uint64) {
- hexStrings := hexNumberRE.FindAllString(s, -1)
- var ids []uint64
- for _, s := range hexStrings {
- if id, err := strconv.ParseUint(s, 0, 64); err == nil {
- ids = append(ids, id)
- } else {
- // Do not expect any parsing failures due to the regexp matching.
- panic("failed to parse hex value:" + s)
- }
- }
- return hexStrings, ids
-}
-
-// parseHexAddresses parses hex numbers from a string and returns them
-// in a slice.
-func parseHexAddresses(s string) []uint64 {
- _, ids := extractHexAddresses(s)
- return ids
-}
-
-// scaleHeapSample adjusts the data from a heapz Sample to
-// account for its probability of appearing in the collected
-// data. heapz profiles are a sampling of the memory allocations
-// requests in a program. We estimate the unsampled value by dividing
-// each collected sample by its probability of appearing in the
-// profile. heapz v2 profiles rely on a poisson process to determine
-// which samples to collect, based on the desired average collection
-// rate R. The probability of a sample of size S to appear in that
-// profile is 1-exp(-S/R).
-func scaleHeapSample(count, size, rate int64) (int64, int64) {
- if count == 0 || size == 0 {
- return 0, 0
- }
-
- if rate <= 1 {
- // if rate==1 all samples were collected so no adjustment is needed.
- // if rate<1 treat as unknown and skip scaling.
- return count, size
- }
-
- avgSize := float64(size) / float64(count)
- scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
-
- return int64(float64(count) * scale), int64(float64(size) * scale)
-}
-
-// parseContention parses a mutex or contention profile. There are 2 cases:
-// "--- contentionz " for legacy C++ profiles (and backwards compatibility)
-// "--- mutex:" or "--- contention:" for profiles generated by the Go runtime.
-// This code converts the text output from runtime into a *Profile. (In the future
-// the runtime might write a serialized Profile directly making this unnecessary.)
-func parseContention(b []byte) (*Profile, error) {
- r := bytes.NewBuffer(b)
- var l string
- var err error
- for {
- // Skip past comments and empty lines seeking a real header.
- l, err = r.ReadString('\n')
- if err != nil {
- return nil, err
- }
- if !isSpaceOrComment(l) {
- break
- }
- }
-
- if strings.HasPrefix(l, "--- contentionz ") {
- return parseCppContention(r)
- } else if strings.HasPrefix(l, "--- mutex:") {
- return parseCppContention(r)
- } else if strings.HasPrefix(l, "--- contention:") {
- return parseCppContention(r)
- }
- return nil, errUnrecognized
-}
-
-// parseCppContention parses the output from synchronization_profiling.cc
-// for backward compatibility, and the compatible (non-debug) block profile
-// output from the Go runtime.
-func parseCppContention(r *bytes.Buffer) (*Profile, error) {
- p := &Profile{
- PeriodType: &ValueType{Type: "contentions", Unit: "count"},
- Period: 1,
- SampleType: []*ValueType{
- {Type: "contentions", Unit: "count"},
- {Type: "delay", Unit: "nanoseconds"},
- },
- }
-
- var cpuHz int64
- var l string
- var err error
- // Parse text of the form "attribute = value" before the samples.
- const delimiter = "="
- for {
- l, err = r.ReadString('\n')
- if err != nil {
- if err != io.EOF {
- return nil, err
- }
-
- if l == "" {
- break
- }
- }
- if isSpaceOrComment(l) {
- continue
- }
-
- if l = strings.TrimSpace(l); l == "" {
- continue
- }
-
- if strings.HasPrefix(l, "---") {
- break
- }
-
- attr := strings.SplitN(l, delimiter, 2)
- if len(attr) != 2 {
- break
- }
- key, val := strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1])
- var err error
- switch key {
- case "cycles/second":
- if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil {
- return nil, errUnrecognized
- }
- case "sampling period":
- if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil {
- return nil, errUnrecognized
- }
- case "ms since reset":
- ms, err := strconv.ParseInt(val, 0, 64)
- if err != nil {
- return nil, errUnrecognized
- }
- p.DurationNanos = ms * 1000 * 1000
- case "format":
- // CPP contentionz profiles don't have format.
- return nil, errUnrecognized
- case "resolution":
- // CPP contentionz profiles don't have resolution.
- return nil, errUnrecognized
- case "discarded samples":
- default:
- return nil, errUnrecognized
- }
- }
-
- locs := make(map[uint64]*Location)
- for {
- if !isSpaceOrComment(l) {
- if l = strings.TrimSpace(l); strings.HasPrefix(l, "---") {
- break
- }
- value, addrs, err := parseContentionSample(l, p.Period, cpuHz)
- if err != nil {
- return nil, err
- }
- var sloc []*Location
- for _, addr := range addrs {
- // Addresses from stack traces point to the next instruction after
- // each call. Adjust by -1 to land somewhere on the actual call.
- addr--
- loc := locs[addr]
- if locs[addr] == nil {
- loc = &Location{
- Address: addr,
- }
- p.Location = append(p.Location, loc)
- locs[addr] = loc
- }
- sloc = append(sloc, loc)
- }
- p.Sample = append(p.Sample, &Sample{
- Value: value,
- Location: sloc,
- })
- }
-
- if l, err = r.ReadString('\n'); err != nil {
- if err != io.EOF {
- return nil, err
- }
- if l == "" {
- break
- }
- }
- }
-
- if err = parseAdditionalSections(l, r, p); err != nil {
- return nil, err
- }
-
- return p, nil
-}
-
-// parseContentionSample parses a single row from a contention profile
-// into a new Sample.
-func parseContentionSample(line string, period, cpuHz int64) (value []int64, addrs []uint64, err error) {
- sampleData := contentionSampleRE.FindStringSubmatch(line)
- if sampleData == nil {
- return value, addrs, errUnrecognized
- }
-
- v1, err := strconv.ParseInt(sampleData[1], 10, 64)
- if err != nil {
- return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
- }
- v2, err := strconv.ParseInt(sampleData[2], 10, 64)
- if err != nil {
- return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
- }
-
- // Unsample values if period and cpuHz are available.
- // - Delays are scaled to cycles and then to nanoseconds.
- // - Contentions are scaled to cycles.
- if period > 0 {
- if cpuHz > 0 {
- cpuGHz := float64(cpuHz) / 1e9
- v1 = int64(float64(v1) * float64(period) / cpuGHz)
- }
- v2 = v2 * period
- }
-
- value = []int64{v2, v1}
- addrs = parseHexAddresses(sampleData[3])
-
- return value, addrs, nil
-}
-
-// parseThread parses a Threadz profile and returns a new Profile.
-func parseThread(b []byte) (*Profile, error) {
- r := bytes.NewBuffer(b)
-
- var line string
- var err error
- for {
- // Skip past comments and empty lines seeking a real header.
- line, err = r.ReadString('\n')
- if err != nil {
- return nil, err
- }
- if !isSpaceOrComment(line) {
- break
- }
- }
-
- if m := threadzStartRE.FindStringSubmatch(line); m != nil {
- // Advance over initial comments until first stack trace.
- for {
- line, err = r.ReadString('\n')
- if err != nil {
- if err != io.EOF {
- return nil, err
- }
-
- if line == "" {
- break
- }
- }
- if sectionTrigger(line) != unrecognizedSection || line[0] == '-' {
- break
- }
- }
- } else if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
- return nil, errUnrecognized
- }
-
- p := &Profile{
- SampleType: []*ValueType{{Type: "thread", Unit: "count"}},
- PeriodType: &ValueType{Type: "thread", Unit: "count"},
- Period: 1,
- }
-
- locs := make(map[uint64]*Location)
- // Recognize each thread and populate profile samples.
- for sectionTrigger(line) == unrecognizedSection {
- if strings.HasPrefix(line, "---- no stack trace for") {
- line = ""
- break
- }
- if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
- return nil, errUnrecognized
- }
-
- var addrs []uint64
- line, addrs, err = parseThreadSample(r)
- if err != nil {
- return nil, errUnrecognized
- }
- if len(addrs) == 0 {
- // We got a --same as previous threads--. Bump counters.
- if len(p.Sample) > 0 {
- s := p.Sample[len(p.Sample)-1]
- s.Value[0]++
- }
- continue
- }
-
- var sloc []*Location
- for _, addr := range addrs {
- // Addresses from stack traces point to the next instruction after
- // each call. Adjust by -1 to land somewhere on the actual call.
- addr--
- loc := locs[addr]
- if locs[addr] == nil {
- loc = &Location{
- Address: addr,
- }
- p.Location = append(p.Location, loc)
- locs[addr] = loc
- }
- sloc = append(sloc, loc)
- }
-
- p.Sample = append(p.Sample, &Sample{
- Value: []int64{1},
- Location: sloc,
- })
- }
-
- if err = parseAdditionalSections(line, r, p); err != nil {
- return nil, err
- }
-
- return p, nil
-}
-
-// parseThreadSample parses a symbolized or unsymbolized stack trace.
-// Returns the first line after the traceback, the sample (or nil if
-// it hits a 'same-as-previous' marker) and an error.
-func parseThreadSample(b *bytes.Buffer) (nextl string, addrs []uint64, err error) {
- var l string
- sameAsPrevious := false
- for {
- if l, err = b.ReadString('\n'); err != nil {
- if err != io.EOF {
- return "", nil, err
- }
- if l == "" {
- break
- }
- }
- if l = strings.TrimSpace(l); l == "" {
- continue
- }
-
- if strings.HasPrefix(l, "---") {
- break
- }
- if strings.Contains(l, "same as previous thread") {
- sameAsPrevious = true
- continue
- }
-
- addrs = append(addrs, parseHexAddresses(l)...)
- }
-
- if sameAsPrevious {
- return l, nil, nil
- }
- return l, addrs, nil
-}
-
-// parseAdditionalSections parses any additional sections in the
-// profile, ignoring any unrecognized sections.
-func parseAdditionalSections(l string, b *bytes.Buffer, p *Profile) (err error) {
- for {
- if sectionTrigger(l) == memoryMapSection {
- break
- }
- // Ignore any unrecognized sections.
- if l, err := b.ReadString('\n'); err != nil {
- if err != io.EOF {
- return err
- }
- if l == "" {
- break
- }
- }
- }
- return p.ParseMemoryMap(b)
-}
-
-// ParseMemoryMap parses a memory map in the format of
-// /proc/self/maps, and overrides the mappings in the current profile.
-// It renumbers the samples and locations in the profile correspondingly.
-func (p *Profile) ParseMemoryMap(rd io.Reader) error {
- b := bufio.NewReader(rd)
-
- var attrs []string
- var r *strings.Replacer
- const delimiter = "="
- for {
- l, err := b.ReadString('\n')
- if err != nil {
- if err != io.EOF {
- return err
- }
- if l == "" {
- break
- }
- }
- if l = strings.TrimSpace(l); l == "" {
- continue
- }
-
- if r != nil {
- l = r.Replace(l)
- }
- m, err := parseMappingEntry(l)
- if err != nil {
- if err == errUnrecognized {
- // Recognize assignments of the form: attr=value, and replace
- // $attr with value on subsequent mappings.
- if attr := strings.SplitN(l, delimiter, 2); len(attr) == 2 {
- attrs = append(attrs, "$"+strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1]))
- r = strings.NewReplacer(attrs...)
- }
- // Ignore any unrecognized entries
- continue
- }
- return err
- }
- if m == nil || (m.File == "" && len(p.Mapping) != 0) {
- // In some cases the first entry may include the address range
- // but not the name of the file. It should be followed by
- // another entry with the name.
- continue
- }
- if len(p.Mapping) == 1 && p.Mapping[0].File == "" {
- // Update the name if this is the entry following that empty one.
- p.Mapping[0].File = m.File
- continue
- }
- p.Mapping = append(p.Mapping, m)
- }
- p.remapLocationIDs()
- p.remapFunctionIDs()
- p.remapMappingIDs()
- return nil
-}
-
-func parseMappingEntry(l string) (*Mapping, error) {
- mapping := &Mapping{}
- var err error
- if me := procMapsRE.FindStringSubmatch(l); len(me) == 9 {
- if !strings.Contains(me[3], "x") {
- // Skip non-executable entries.
- return nil, nil
- }
- if mapping.Start, err = strconv.ParseUint(me[1], 16, 64); err != nil {
- return nil, errUnrecognized
- }
- if mapping.Limit, err = strconv.ParseUint(me[2], 16, 64); err != nil {
- return nil, errUnrecognized
- }
- if me[4] != "" {
- if mapping.Offset, err = strconv.ParseUint(me[4], 16, 64); err != nil {
- return nil, errUnrecognized
- }
- }
- mapping.File = me[8]
- return mapping, nil
- }
-
- if me := briefMapsRE.FindStringSubmatch(l); len(me) == 6 {
- if mapping.Start, err = strconv.ParseUint(me[1], 16, 64); err != nil {
- return nil, errUnrecognized
- }
- if mapping.Limit, err = strconv.ParseUint(me[2], 16, 64); err != nil {
- return nil, errUnrecognized
- }
- mapping.File = me[3]
- if me[5] != "" {
- if mapping.Offset, err = strconv.ParseUint(me[5], 16, 64); err != nil {
- return nil, errUnrecognized
- }
- }
- return mapping, nil
- }
-
- return nil, errUnrecognized
-}
-
-type sectionType int
-
-const (
- unrecognizedSection sectionType = iota
- memoryMapSection
-)
-
-var memoryMapTriggers = []string{
- "--- Memory map: ---",
- "MAPPED_LIBRARIES:",
-}
-
-func sectionTrigger(line string) sectionType {
- for _, trigger := range memoryMapTriggers {
- if strings.Contains(line, trigger) {
- return memoryMapSection
- }
- }
- return unrecognizedSection
-}
-
-func (p *Profile) addLegacyFrameInfo() {
- switch {
- case isProfileType(p, heapzSampleTypes) ||
- isProfileType(p, heapzInUseSampleTypes) ||
- isProfileType(p, heapzAllocSampleTypes):
- p.DropFrames, p.KeepFrames = allocRxStr, allocSkipRxStr
- case isProfileType(p, contentionzSampleTypes):
- p.DropFrames, p.KeepFrames = lockRxStr, ""
- default:
- p.DropFrames, p.KeepFrames = cpuProfilerRxStr, ""
- }
-}
-
-var heapzSampleTypes = []string{"allocations", "size"} // early Go pprof profiles
-var heapzInUseSampleTypes = []string{"inuse_objects", "inuse_space"}
-var heapzAllocSampleTypes = []string{"alloc_objects", "alloc_space"}
-var contentionzSampleTypes = []string{"contentions", "delay"}
-
-func isProfileType(p *Profile, t []string) bool {
- st := p.SampleType
- if len(st) != len(t) {
- return false
- }
-
- for i := range st {
- if st[i].Type != t[i] {
- return false
- }
- }
- return true
-}
-
-var allocRxStr = strings.Join([]string{
- // POSIX entry points.
- `calloc`,
- `cfree`,
- `malloc`,
- `free`,
- `memalign`,
- `do_memalign`,
- `(__)?posix_memalign`,
- `pvalloc`,
- `valloc`,
- `realloc`,
-
- // TC malloc.
- `tcmalloc::.*`,
- `tc_calloc`,
- `tc_cfree`,
- `tc_malloc`,
- `tc_free`,
- `tc_memalign`,
- `tc_posix_memalign`,
- `tc_pvalloc`,
- `tc_valloc`,
- `tc_realloc`,
- `tc_new`,
- `tc_delete`,
- `tc_newarray`,
- `tc_deletearray`,
- `tc_new_nothrow`,
- `tc_newarray_nothrow`,
-
- // Memory-allocation routines on OS X.
- `malloc_zone_malloc`,
- `malloc_zone_calloc`,
- `malloc_zone_valloc`,
- `malloc_zone_realloc`,
- `malloc_zone_memalign`,
- `malloc_zone_free`,
-
- // Go runtime
- `runtime\..*`,
-
- // Other misc. memory allocation routines
- `BaseArena::.*`,
- `(::)?do_malloc_no_errno`,
- `(::)?do_malloc_pages`,
- `(::)?do_malloc`,
- `DoSampledAllocation`,
- `MallocedMemBlock::MallocedMemBlock`,
- `_M_allocate`,
- `__builtin_(vec_)?delete`,
- `__builtin_(vec_)?new`,
- `__gnu_cxx::new_allocator::allocate`,
- `__libc_malloc`,
- `__malloc_alloc_template::allocate`,
- `allocate`,
- `cpp_alloc`,
- `operator new(\[\])?`,
- `simple_alloc::allocate`,
-}, `|`)
-
-var allocSkipRxStr = strings.Join([]string{
- // Preserve Go runtime frames that appear in the middle/bottom of
- // the stack.
- `runtime\.panic`,
- `runtime\.reflectcall`,
- `runtime\.call[0-9]*`,
-}, `|`)
-
-var cpuProfilerRxStr = strings.Join([]string{
- `ProfileData::Add`,
- `ProfileData::prof_handler`,
- `CpuProfiler::prof_handler`,
- `__pthread_sighandler`,
- `__restore`,
-}, `|`)
-
-var lockRxStr = strings.Join([]string{
- `RecordLockProfileData`,
- `(base::)?RecordLockProfileData.*`,
- `(base::)?SubmitMutexProfileData.*`,
- `(base::)?SubmitSpinLockProfileData.*`,
- `(Mutex::)?AwaitCommon.*`,
- `(Mutex::)?Unlock.*`,
- `(Mutex::)?UnlockSlow.*`,
- `(Mutex::)?ReaderUnlock.*`,
- `(MutexLock::)?~MutexLock.*`,
- `(SpinLock::)?Unlock.*`,
- `(SpinLock::)?SlowUnlock.*`,
- `(SpinLockHolder::)?~SpinLockHolder.*`,
-}, `|`)
diff --git a/src/runtime/pprof/internal/profile/profile.go b/src/runtime/pprof/internal/profile/profile.go
deleted file mode 100644
index 443accd..0000000
--- a/src/runtime/pprof/internal/profile/profile.go
+++ /dev/null
@@ -1,577 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package profile provides a representation of profile.proto and
-// methods to encode/decode profiles in this format.
-//
-// This package is only for testing runtime/pprof.
-// It is not used by production Go programs.
-package profile
-
-import (
- "bytes"
- "compress/gzip"
- "fmt"
- "io"
- "io/ioutil"
- "regexp"
- "strings"
- "time"
-)
-
-// Profile is an in-memory representation of profile.proto.
-type Profile struct {
- SampleType []*ValueType
- DefaultSampleType string
- Sample []*Sample
- Mapping []*Mapping
- Location []*Location
- Function []*Function
- Comments []string
-
- DropFrames string
- KeepFrames string
-
- TimeNanos int64
- DurationNanos int64
- PeriodType *ValueType
- Period int64
-
- commentX []int64
- dropFramesX int64
- keepFramesX int64
- stringTable []string
- defaultSampleTypeX int64
-}
-
-// ValueType corresponds to Profile.ValueType
-type ValueType struct {
- Type string // cpu, wall, inuse_space, etc
- Unit string // seconds, nanoseconds, bytes, etc
-
- typeX int64
- unitX int64
-}
-
-// Sample corresponds to Profile.Sample
-type Sample struct {
- Location []*Location
- Value []int64
- Label map[string][]string
- NumLabel map[string][]int64
-
- locationIDX []uint64
- labelX []Label
-}
-
-// Label corresponds to Profile.Label
-type Label struct {
- keyX int64
- // Exactly one of the two following values must be set
- strX int64
- numX int64 // Integer value for this label
-}
-
-// Mapping corresponds to Profile.Mapping
-type Mapping struct {
- ID uint64
- Start uint64
- Limit uint64
- Offset uint64
- File string
- BuildID string
- HasFunctions bool
- HasFilenames bool
- HasLineNumbers bool
- HasInlineFrames bool
-
- fileX int64
- buildIDX int64
-}
-
-// Location corresponds to Profile.Location
-type Location struct {
- ID uint64
- Mapping *Mapping
- Address uint64
- Line []Line
-
- mappingIDX uint64
-}
-
-// Line corresponds to Profile.Line
-type Line struct {
- Function *Function
- Line int64
-
- functionIDX uint64
-}
-
-// Function corresponds to Profile.Function
-type Function struct {
- ID uint64
- Name string
- SystemName string
- Filename string
- StartLine int64
-
- nameX int64
- systemNameX int64
- filenameX int64
-}
-
-// Parse parses a profile and checks for its validity. The input
-// may be a gzip-compressed encoded protobuf or one of many legacy
-// profile formats which may be unsupported in the future.
-func Parse(r io.Reader) (*Profile, error) {
- orig, err := ioutil.ReadAll(r)
- if err != nil {
- return nil, err
- }
-
- var p *Profile
- if len(orig) >= 2 && orig[0] == 0x1f && orig[1] == 0x8b {
- gz, err := gzip.NewReader(bytes.NewBuffer(orig))
- if err != nil {
- return nil, fmt.Errorf("decompressing profile: %v", err)
- }
- data, err := ioutil.ReadAll(gz)
- if err != nil {
- return nil, fmt.Errorf("decompressing profile: %v", err)
- }
- orig = data
- }
- if p, err = parseUncompressed(orig); err != nil {
- if p, err = parseLegacy(orig); err != nil {
- return nil, fmt.Errorf("parsing profile: %v", err)
- }
- }
-
- if err := p.CheckValid(); err != nil {
- return nil, fmt.Errorf("malformed profile: %v", err)
- }
- return p, nil
-}
-
-var errUnrecognized = fmt.Errorf("unrecognized profile format")
-var errMalformed = fmt.Errorf("malformed profile format")
-
-func parseLegacy(data []byte) (*Profile, error) {
- parsers := []func([]byte) (*Profile, error){
- parseCPU,
- parseHeap,
- parseGoCount, // goroutine, threadcreate
- parseThread,
- parseContention,
- }
-
- for _, parser := range parsers {
- p, err := parser(data)
- if err == nil {
- p.setMain()
- p.addLegacyFrameInfo()
- return p, nil
- }
- if err != errUnrecognized {
- return nil, err
- }
- }
- return nil, errUnrecognized
-}
-
-func parseUncompressed(data []byte) (*Profile, error) {
- p := &Profile{}
- if err := unmarshal(data, p); err != nil {
- return nil, err
- }
-
- if err := p.postDecode(); err != nil {
- return nil, err
- }
-
- return p, nil
-}
-
-var libRx = regexp.MustCompile(`([.]so$|[.]so[._][0-9]+)`)
-
-// setMain scans Mapping entries and guesses which entry is main
-// because legacy profiles don't obey the convention of putting main
-// first.
-func (p *Profile) setMain() {
- for i := 0; i < len(p.Mapping); i++ {
- file := strings.TrimSpace(strings.ReplaceAll(p.Mapping[i].File, "(deleted)", ""))
- if len(file) == 0 {
- continue
- }
- if len(libRx.FindStringSubmatch(file)) > 0 {
- continue
- }
- if strings.HasPrefix(file, "[") {
- continue
- }
- // Swap what we guess is main to position 0.
- p.Mapping[i], p.Mapping[0] = p.Mapping[0], p.Mapping[i]
- break
- }
-}
-
-// Write writes the profile as a gzip-compressed marshaled protobuf.
-func (p *Profile) Write(w io.Writer) error {
- p.preEncode()
- b := marshal(p)
- zw := gzip.NewWriter(w)
- defer zw.Close()
- _, err := zw.Write(b)
- return err
-}
-
-// CheckValid tests whether the profile is valid. Checks include, but are
-// not limited to:
-// - len(Profile.Sample[n].value) == len(Profile.value_unit)
-// - Sample.id has a corresponding Profile.Location
-func (p *Profile) CheckValid() error {
- // Check that sample values are consistent
- sampleLen := len(p.SampleType)
- if sampleLen == 0 && len(p.Sample) != 0 {
- return fmt.Errorf("missing sample type information")
- }
- for _, s := range p.Sample {
- if len(s.Value) != sampleLen {
- return fmt.Errorf("mismatch: sample has: %d values vs. %d types", len(s.Value), len(p.SampleType))
- }
- }
-
- // Check that all mappings/locations/functions are in the tables
- // Check that there are no duplicate ids
- mappings := make(map[uint64]*Mapping, len(p.Mapping))
- for _, m := range p.Mapping {
- if m.ID == 0 {
- return fmt.Errorf("found mapping with reserved ID=0")
- }
- if mappings[m.ID] != nil {
- return fmt.Errorf("multiple mappings with same id: %d", m.ID)
- }
- mappings[m.ID] = m
- }
- functions := make(map[uint64]*Function, len(p.Function))
- for _, f := range p.Function {
- if f.ID == 0 {
- return fmt.Errorf("found function with reserved ID=0")
- }
- if functions[f.ID] != nil {
- return fmt.Errorf("multiple functions with same id: %d", f.ID)
- }
- functions[f.ID] = f
- }
- locations := make(map[uint64]*Location, len(p.Location))
- for _, l := range p.Location {
- if l.ID == 0 {
- return fmt.Errorf("found location with reserved id=0")
- }
- if locations[l.ID] != nil {
- return fmt.Errorf("multiple locations with same id: %d", l.ID)
- }
- locations[l.ID] = l
- if m := l.Mapping; m != nil {
- if m.ID == 0 || mappings[m.ID] != m {
- return fmt.Errorf("inconsistent mapping %p: %d", m, m.ID)
- }
- }
- for _, ln := range l.Line {
- if f := ln.Function; f != nil {
- if f.ID == 0 || functions[f.ID] != f {
- return fmt.Errorf("inconsistent function %p: %d", f, f.ID)
- }
- }
- }
- }
- return nil
-}
-
-// Aggregate merges the locations in the profile into equivalence
-// classes preserving the request attributes. It also updates the
-// samples to point to the merged locations.
-func (p *Profile) Aggregate(inlineFrame, function, filename, linenumber, address bool) error {
- for _, m := range p.Mapping {
- m.HasInlineFrames = m.HasInlineFrames && inlineFrame
- m.HasFunctions = m.HasFunctions && function
- m.HasFilenames = m.HasFilenames && filename
- m.HasLineNumbers = m.HasLineNumbers && linenumber
- }
-
- // Aggregate functions
- if !function || !filename {
- for _, f := range p.Function {
- if !function {
- f.Name = ""
- f.SystemName = ""
- }
- if !filename {
- f.Filename = ""
- }
- }
- }
-
- // Aggregate locations
- if !inlineFrame || !address || !linenumber {
- for _, l := range p.Location {
- if !inlineFrame && len(l.Line) > 1 {
- l.Line = l.Line[len(l.Line)-1:]
- }
- if !linenumber {
- for i := range l.Line {
- l.Line[i].Line = 0
- }
- }
- if !address {
- l.Address = 0
- }
- }
- }
-
- return p.CheckValid()
-}
-
-// Print dumps a text representation of a profile. Intended mainly
-// for debugging purposes.
-func (p *Profile) String() string {
-
- ss := make([]string, 0, len(p.Sample)+len(p.Mapping)+len(p.Location))
- if pt := p.PeriodType; pt != nil {
- ss = append(ss, fmt.Sprintf("PeriodType: %s %s", pt.Type, pt.Unit))
- }
- ss = append(ss, fmt.Sprintf("Period: %d", p.Period))
- if p.TimeNanos != 0 {
- ss = append(ss, fmt.Sprintf("Time: %v", time.Unix(0, p.TimeNanos)))
- }
- if p.DurationNanos != 0 {
- ss = append(ss, fmt.Sprintf("Duration: %v", time.Duration(p.DurationNanos)))
- }
-
- ss = append(ss, "Samples:")
- var sh1 string
- for _, s := range p.SampleType {
- sh1 = sh1 + fmt.Sprintf("%s/%s ", s.Type, s.Unit)
- }
- ss = append(ss, strings.TrimSpace(sh1))
- for _, s := range p.Sample {
- var sv string
- for _, v := range s.Value {
- sv = fmt.Sprintf("%s %10d", sv, v)
- }
- sv = sv + ": "
- for _, l := range s.Location {
- sv = sv + fmt.Sprintf("%d ", l.ID)
- }
- ss = append(ss, sv)
- const labelHeader = " "
- if len(s.Label) > 0 {
- ls := labelHeader
- for k, v := range s.Label {
- ls = ls + fmt.Sprintf("%s:%v ", k, v)
- }
- ss = append(ss, ls)
- }
- if len(s.NumLabel) > 0 {
- ls := labelHeader
- for k, v := range s.NumLabel {
- ls = ls + fmt.Sprintf("%s:%v ", k, v)
- }
- ss = append(ss, ls)
- }
- }
-
- ss = append(ss, "Locations")
- for _, l := range p.Location {
- locStr := fmt.Sprintf("%6d: %#x ", l.ID, l.Address)
- if m := l.Mapping; m != nil {
- locStr = locStr + fmt.Sprintf("M=%d ", m.ID)
- }
- if len(l.Line) == 0 {
- ss = append(ss, locStr)
- }
- for li := range l.Line {
- lnStr := "??"
- if fn := l.Line[li].Function; fn != nil {
- lnStr = fmt.Sprintf("%s %s:%d s=%d",
- fn.Name,
- fn.Filename,
- l.Line[li].Line,
- fn.StartLine)
- if fn.Name != fn.SystemName {
- lnStr = lnStr + "(" + fn.SystemName + ")"
- }
- }
- ss = append(ss, locStr+lnStr)
- // Do not print location details past the first line
- locStr = " "
- }
- }
-
- ss = append(ss, "Mappings")
- for _, m := range p.Mapping {
- bits := ""
- if m.HasFunctions {
- bits += "[FN]"
- }
- if m.HasFilenames {
- bits += "[FL]"
- }
- if m.HasLineNumbers {
- bits += "[LN]"
- }
- if m.HasInlineFrames {
- bits += "[IN]"
- }
- ss = append(ss, fmt.Sprintf("%d: %#x/%#x/%#x %s %s %s",
- m.ID,
- m.Start, m.Limit, m.Offset,
- m.File,
- m.BuildID,
- bits))
- }
-
- return strings.Join(ss, "\n") + "\n"
-}
-
-// Merge adds profile p adjusted by ratio r into profile p. Profiles
-// must be compatible (same Type and SampleType).
-// TODO(rsilvera): consider normalizing the profiles based on the
-// total samples collected.
-func (p *Profile) Merge(pb *Profile, r float64) error {
- if err := p.Compatible(pb); err != nil {
- return err
- }
-
- pb = pb.Copy()
-
- // Keep the largest of the two periods.
- if pb.Period > p.Period {
- p.Period = pb.Period
- }
-
- p.DurationNanos += pb.DurationNanos
-
- p.Mapping = append(p.Mapping, pb.Mapping...)
- for i, m := range p.Mapping {
- m.ID = uint64(i + 1)
- }
- p.Location = append(p.Location, pb.Location...)
- for i, l := range p.Location {
- l.ID = uint64(i + 1)
- }
- p.Function = append(p.Function, pb.Function...)
- for i, f := range p.Function {
- f.ID = uint64(i + 1)
- }
-
- if r != 1.0 {
- for _, s := range pb.Sample {
- for i, v := range s.Value {
- s.Value[i] = int64((float64(v) * r))
- }
- }
- }
- p.Sample = append(p.Sample, pb.Sample...)
- return p.CheckValid()
-}
-
-// Compatible determines if two profiles can be compared/merged.
-// returns nil if the profiles are compatible; otherwise an error with
-// details on the incompatibility.
-func (p *Profile) Compatible(pb *Profile) error {
- if !compatibleValueTypes(p.PeriodType, pb.PeriodType) {
- return fmt.Errorf("incompatible period types %v and %v", p.PeriodType, pb.PeriodType)
- }
-
- if len(p.SampleType) != len(pb.SampleType) {
- return fmt.Errorf("incompatible sample types %v and %v", p.SampleType, pb.SampleType)
- }
-
- for i := range p.SampleType {
- if !compatibleValueTypes(p.SampleType[i], pb.SampleType[i]) {
- return fmt.Errorf("incompatible sample types %v and %v", p.SampleType, pb.SampleType)
- }
- }
-
- return nil
-}
-
-// HasFunctions determines if all locations in this profile have
-// symbolized function information.
-func (p *Profile) HasFunctions() bool {
- for _, l := range p.Location {
- if l.Mapping == nil || !l.Mapping.HasFunctions {
- return false
- }
- }
- return true
-}
-
-// HasFileLines determines if all locations in this profile have
-// symbolized file and line number information.
-func (p *Profile) HasFileLines() bool {
- for _, l := range p.Location {
- if l.Mapping == nil || (!l.Mapping.HasFilenames || !l.Mapping.HasLineNumbers) {
- return false
- }
- }
- return true
-}
-
-func compatibleValueTypes(v1, v2 *ValueType) bool {
- if v1 == nil || v2 == nil {
- return true // No grounds to disqualify.
- }
- return v1.Type == v2.Type && v1.Unit == v2.Unit
-}
-
-// Copy makes a fully independent copy of a profile.
-func (p *Profile) Copy() *Profile {
- p.preEncode()
- b := marshal(p)
-
- pp := &Profile{}
- if err := unmarshal(b, pp); err != nil {
- panic(err)
- }
- if err := pp.postDecode(); err != nil {
- panic(err)
- }
-
- return pp
-}
-
-// Demangler maps symbol names to a human-readable form. This may
-// include C++ demangling and additional simplification. Names that
-// are not demangled may be missing from the resulting map.
-type Demangler func(name []string) (map[string]string, error)
-
-// Demangle attempts to demangle and optionally simplify any function
-// names referenced in the profile. It works on a best-effort basis:
-// it will silently preserve the original names in case of any errors.
-func (p *Profile) Demangle(d Demangler) error {
- // Collect names to demangle.
- var names []string
- for _, fn := range p.Function {
- names = append(names, fn.SystemName)
- }
-
- // Update profile with demangled names.
- demangled, err := d(names)
- if err != nil {
- return err
- }
- for _, fn := range p.Function {
- if dd, ok := demangled[fn.SystemName]; ok {
- fn.Name = dd
- }
- }
- return nil
-}
-
-// Empty reports whether the profile contains no samples.
-func (p *Profile) Empty() bool {
- return len(p.Sample) == 0
-}
diff --git a/src/runtime/pprof/internal/profile/profile_test.go b/src/runtime/pprof/internal/profile/profile_test.go
deleted file mode 100644
index e1963f3..0000000
--- a/src/runtime/pprof/internal/profile/profile_test.go
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package profile
-
-import (
- "bytes"
- "testing"
-)
-
-func TestEmptyProfile(t *testing.T) {
- var buf bytes.Buffer
- p, err := Parse(&buf)
- if err != nil {
- t.Error("Want no error, got", err)
- }
- if p == nil {
- t.Fatal("Want a valid profile, got <nil>")
- }
- if !p.Empty() {
- t.Errorf("Profile should be empty, got %#v", p)
- }
-}
-
-func TestParseContention(t *testing.T) {
- tests := []struct {
- name string
- in string
- wantErr bool
- }{
- {
- name: "valid",
- in: `--- mutex:
-cycles/second=3491920901
-sampling period=1
-43227965305 1659640 @ 0x45e851 0x45f764 0x4a2be1 0x44ea31
-34035731690 15760 @ 0x45e851 0x45f764 0x4a2b17 0x44ea31
-`,
- },
- {
- name: "valid with comment",
- in: `--- mutex:
-cycles/second=3491920901
-sampling period=1
-43227965305 1659640 @ 0x45e851 0x45f764 0x4a2be1 0x44ea31
-# 0x45e850 sync.(*Mutex).Unlock+0x80 /go/src/sync/mutex.go:126
-# 0x45f763 sync.(*RWMutex).Unlock+0x83 /go/src/sync/rwmutex.go:125
-# 0x4a2be0 main.main.func3+0x70 /go/src/internal/pprof/profile/a_binary.go:58
-
-34035731690 15760 @ 0x45e851 0x45f764 0x4a2b17 0x44ea31
-# 0x45e850 sync.(*Mutex).Unlock+0x80 /go/src/sync/mutex.go:126
-# 0x45f763 sync.(*RWMutex).Unlock+0x83 /go/src/sync/rwmutex.go:125
-# 0x4a2b16 main.main.func2+0xd6 /go/src/internal/pprof/profile/a_binary.go:48
-`,
- },
- {
- name: "empty",
- in: `--- mutex:`,
- wantErr: true,
- },
- {
- name: "invalid header",
- in: `--- channel:
-43227965305 1659640 @ 0x45e851 0x45f764 0x4a2be1 0x44ea31`,
- wantErr: true,
- },
- }
- for _, tc := range tests {
- _, err := parseContention([]byte(tc.in))
- if tc.wantErr && err == nil {
- t.Errorf("parseContention(%q) succeeded unexpectedly", tc.name)
- }
- if !tc.wantErr && err != nil {
- t.Errorf("parseContention(%q) failed unexpectedly: %v", tc.name, err)
- }
- }
-
-}
diff --git a/src/runtime/pprof/internal/profile/proto.go b/src/runtime/pprof/internal/profile/proto.go
deleted file mode 100644
index 11d7f9f..0000000
--- a/src/runtime/pprof/internal/profile/proto.go
+++ /dev/null
@@ -1,360 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This file is a simple protocol buffer encoder and decoder.
-//
-// A protocol message must implement the message interface:
-// decoder() []decoder
-// encode(*buffer)
-//
-// The decode method returns a slice indexed by field number that gives the
-// function to decode that field.
-// The encode method encodes its receiver into the given buffer.
-//
-// The two methods are simple enough to be implemented by hand rather than
-// by using a protocol compiler.
-//
-// See profile.go for examples of messages implementing this interface.
-//
-// There is no support for groups, message sets, or "has" bits.
-
-package profile
-
-import "errors"
-
-type buffer struct {
- field int
- typ int
- u64 uint64
- data []byte
- tmp [16]byte
-}
-
-type decoder func(*buffer, message) error
-
-type message interface {
- decoder() []decoder
- encode(*buffer)
-}
-
-func marshal(m message) []byte {
- var b buffer
- m.encode(&b)
- return b.data
-}
-
-func encodeVarint(b *buffer, x uint64) {
- for x >= 128 {
- b.data = append(b.data, byte(x)|0x80)
- x >>= 7
- }
- b.data = append(b.data, byte(x))
-}
-
-func encodeLength(b *buffer, tag int, len int) {
- encodeVarint(b, uint64(tag)<<3|2)
- encodeVarint(b, uint64(len))
-}
-
-func encodeUint64(b *buffer, tag int, x uint64) {
- // append varint to b.data
- encodeVarint(b, uint64(tag)<<3|0)
- encodeVarint(b, x)
-}
-
-func encodeUint64s(b *buffer, tag int, x []uint64) {
- if len(x) > 2 {
- // Use packed encoding
- n1 := len(b.data)
- for _, u := range x {
- encodeVarint(b, u)
- }
- n2 := len(b.data)
- encodeLength(b, tag, n2-n1)
- n3 := len(b.data)
- copy(b.tmp[:], b.data[n2:n3])
- copy(b.data[n1+(n3-n2):], b.data[n1:n2])
- copy(b.data[n1:], b.tmp[:n3-n2])
- return
- }
- for _, u := range x {
- encodeUint64(b, tag, u)
- }
-}
-
-func encodeUint64Opt(b *buffer, tag int, x uint64) {
- if x == 0 {
- return
- }
- encodeUint64(b, tag, x)
-}
-
-func encodeInt64(b *buffer, tag int, x int64) {
- u := uint64(x)
- encodeUint64(b, tag, u)
-}
-
-func encodeInt64Opt(b *buffer, tag int, x int64) {
- if x == 0 {
- return
- }
- encodeInt64(b, tag, x)
-}
-
-func encodeInt64s(b *buffer, tag int, x []int64) {
- if len(x) > 2 {
- // Use packed encoding
- n1 := len(b.data)
- for _, u := range x {
- encodeVarint(b, uint64(u))
- }
- n2 := len(b.data)
- encodeLength(b, tag, n2-n1)
- n3 := len(b.data)
- copy(b.tmp[:], b.data[n2:n3])
- copy(b.data[n1+(n3-n2):], b.data[n1:n2])
- copy(b.data[n1:], b.tmp[:n3-n2])
- return
- }
- for _, u := range x {
- encodeInt64(b, tag, u)
- }
-}
-
-func encodeString(b *buffer, tag int, x string) {
- encodeLength(b, tag, len(x))
- b.data = append(b.data, x...)
-}
-
-func encodeStrings(b *buffer, tag int, x []string) {
- for _, s := range x {
- encodeString(b, tag, s)
- }
-}
-
-func encodeStringOpt(b *buffer, tag int, x string) {
- if x == "" {
- return
- }
- encodeString(b, tag, x)
-}
-
-func encodeBool(b *buffer, tag int, x bool) {
- if x {
- encodeUint64(b, tag, 1)
- } else {
- encodeUint64(b, tag, 0)
- }
-}
-
-func encodeBoolOpt(b *buffer, tag int, x bool) {
- if x == false {
- return
- }
- encodeBool(b, tag, x)
-}
-
-func encodeMessage(b *buffer, tag int, m message) {
- n1 := len(b.data)
- m.encode(b)
- n2 := len(b.data)
- encodeLength(b, tag, n2-n1)
- n3 := len(b.data)
- copy(b.tmp[:], b.data[n2:n3])
- copy(b.data[n1+(n3-n2):], b.data[n1:n2])
- copy(b.data[n1:], b.tmp[:n3-n2])
-}
-
-func unmarshal(data []byte, m message) (err error) {
- b := buffer{data: data, typ: 2}
- return decodeMessage(&b, m)
-}
-
-func le64(p []byte) uint64 {
- return uint64(p[0]) | uint64(p[1])<<8 | uint64(p[2])<<16 | uint64(p[3])<<24 | uint64(p[4])<<32 | uint64(p[5])<<40 | uint64(p[6])<<48 | uint64(p[7])<<56
-}
-
-func le32(p []byte) uint32 {
- return uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
-}
-
-func decodeVarint(data []byte) (uint64, []byte, error) {
- var i int
- var u uint64
- for i = 0; ; i++ {
- if i >= 10 || i >= len(data) {
- return 0, nil, errors.New("bad varint")
- }
- u |= uint64(data[i]&0x7F) << uint(7*i)
- if data[i]&0x80 == 0 {
- return u, data[i+1:], nil
- }
- }
-}
-
-func decodeField(b *buffer, data []byte) ([]byte, error) {
- x, data, err := decodeVarint(data)
- if err != nil {
- return nil, err
- }
- b.field = int(x >> 3)
- b.typ = int(x & 7)
- b.data = nil
- b.u64 = 0
- switch b.typ {
- case 0:
- b.u64, data, err = decodeVarint(data)
- if err != nil {
- return nil, err
- }
- case 1:
- if len(data) < 8 {
- return nil, errors.New("not enough data")
- }
- b.u64 = le64(data[:8])
- data = data[8:]
- case 2:
- var n uint64
- n, data, err = decodeVarint(data)
- if err != nil {
- return nil, err
- }
- if n > uint64(len(data)) {
- return nil, errors.New("too much data")
- }
- b.data = data[:n]
- data = data[n:]
- case 5:
- if len(data) < 4 {
- return nil, errors.New("not enough data")
- }
- b.u64 = uint64(le32(data[:4]))
- data = data[4:]
- default:
- return nil, errors.New("unknown type: " + string(b.typ))
- }
-
- return data, nil
-}
-
-func checkType(b *buffer, typ int) error {
- if b.typ != typ {
- return errors.New("type mismatch")
- }
- return nil
-}
-
-func decodeMessage(b *buffer, m message) error {
- if err := checkType(b, 2); err != nil {
- return err
- }
- dec := m.decoder()
- data := b.data
- for len(data) > 0 {
- // pull varint field# + type
- var err error
- data, err = decodeField(b, data)
- if err != nil {
- return err
- }
- if b.field >= len(dec) || dec[b.field] == nil {
- continue
- }
- if err := dec[b.field](b, m); err != nil {
- return err
- }
- }
- return nil
-}
-
-func decodeInt64(b *buffer, x *int64) error {
- if err := checkType(b, 0); err != nil {
- return err
- }
- *x = int64(b.u64)
- return nil
-}
-
-func decodeInt64s(b *buffer, x *[]int64) error {
- if b.typ == 2 {
- // Packed encoding
- data := b.data
- for len(data) > 0 {
- var u uint64
- var err error
-
- if u, data, err = decodeVarint(data); err != nil {
- return err
- }
- *x = append(*x, int64(u))
- }
- return nil
- }
- var i int64
- if err := decodeInt64(b, &i); err != nil {
- return err
- }
- *x = append(*x, i)
- return nil
-}
-
-func decodeUint64(b *buffer, x *uint64) error {
- if err := checkType(b, 0); err != nil {
- return err
- }
- *x = b.u64
- return nil
-}
-
-func decodeUint64s(b *buffer, x *[]uint64) error {
- if b.typ == 2 {
- data := b.data
- // Packed encoding
- for len(data) > 0 {
- var u uint64
- var err error
-
- if u, data, err = decodeVarint(data); err != nil {
- return err
- }
- *x = append(*x, u)
- }
- return nil
- }
- var u uint64
- if err := decodeUint64(b, &u); err != nil {
- return err
- }
- *x = append(*x, u)
- return nil
-}
-
-func decodeString(b *buffer, x *string) error {
- if err := checkType(b, 2); err != nil {
- return err
- }
- *x = string(b.data)
- return nil
-}
-
-func decodeStrings(b *buffer, x *[]string) error {
- var s string
- if err := decodeString(b, &s); err != nil {
- return err
- }
- *x = append(*x, s)
- return nil
-}
-
-func decodeBool(b *buffer, x *bool) error {
- if err := checkType(b, 0); err != nil {
- return err
- }
- if int64(b.u64) == 0 {
- *x = false
- } else {
- *x = true
- }
- return nil
-}
diff --git a/src/runtime/pprof/internal/profile/proto_test.go b/src/runtime/pprof/internal/profile/proto_test.go
deleted file mode 100644
index c2613fc..0000000
--- a/src/runtime/pprof/internal/profile/proto_test.go
+++ /dev/null
@@ -1,67 +0,0 @@
-package profile
-
-import (
- "reflect"
- "testing"
-)
-
-func TestPackedEncoding(t *testing.T) {
-
- type testcase struct {
- uint64s []uint64
- int64s []int64
- encoded []byte
- }
- for i, tc := range []testcase{
- {
- []uint64{0, 1, 10, 100, 1000, 10000},
- []int64{1000, 0, 1000},
- []byte{10, 8, 0, 1, 10, 100, 232, 7, 144, 78, 18, 5, 232, 7, 0, 232, 7},
- },
- {
- []uint64{10000},
- nil,
- []byte{8, 144, 78},
- },
- {
- nil,
- []int64{-10000},
- []byte{16, 240, 177, 255, 255, 255, 255, 255, 255, 255, 1},
- },
- } {
- source := &packedInts{tc.uint64s, tc.int64s}
- if got, want := marshal(source), tc.encoded; !reflect.DeepEqual(got, want) {
- t.Errorf("failed encode %d, got %v, want %v", i, got, want)
- }
-
- dest := new(packedInts)
- if err := unmarshal(tc.encoded, dest); err != nil {
- t.Errorf("failed decode %d: %v", i, err)
- continue
- }
- if got, want := dest.uint64s, tc.uint64s; !reflect.DeepEqual(got, want) {
- t.Errorf("failed decode uint64s %d, got %v, want %v", i, got, want)
- }
- if got, want := dest.int64s, tc.int64s; !reflect.DeepEqual(got, want) {
- t.Errorf("failed decode int64s %d, got %v, want %v", i, got, want)
- }
- }
-}
-
-type packedInts struct {
- uint64s []uint64
- int64s []int64
-}
-
-func (u *packedInts) decoder() []decoder {
- return []decoder{
- nil,
- func(b *buffer, m message) error { return decodeUint64s(b, &m.(*packedInts).uint64s) },
- func(b *buffer, m message) error { return decodeInt64s(b, &m.(*packedInts).int64s) },
- }
-}
-
-func (u *packedInts) encode(b *buffer) {
- encodeUint64s(b, 1, u.uint64s)
- encodeInt64s(b, 2, u.int64s)
-}
diff --git a/src/runtime/pprof/internal/profile/prune.go b/src/runtime/pprof/internal/profile/prune.go
deleted file mode 100644
index 1924fad..0000000
--- a/src/runtime/pprof/internal/profile/prune.go
+++ /dev/null
@@ -1,97 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Implements methods to remove frames from profiles.
-
-package profile
-
-import (
- "fmt"
- "regexp"
-)
-
-// Prune removes all nodes beneath a node matching dropRx, and not
-// matching keepRx. If the root node of a Sample matches, the sample
-// will have an empty stack.
-func (p *Profile) Prune(dropRx, keepRx *regexp.Regexp) {
- prune := make(map[uint64]bool)
- pruneBeneath := make(map[uint64]bool)
-
- for _, loc := range p.Location {
- var i int
- for i = len(loc.Line) - 1; i >= 0; i-- {
- if fn := loc.Line[i].Function; fn != nil && fn.Name != "" {
- funcName := fn.Name
- // Account for leading '.' on the PPC ELF v1 ABI.
- if funcName[0] == '.' {
- funcName = funcName[1:]
- }
- if dropRx.MatchString(funcName) {
- if keepRx == nil || !keepRx.MatchString(funcName) {
- break
- }
- }
- }
- }
-
- if i >= 0 {
- // Found matching entry to prune.
- pruneBeneath[loc.ID] = true
-
- // Remove the matching location.
- if i == len(loc.Line)-1 {
- // Matched the top entry: prune the whole location.
- prune[loc.ID] = true
- } else {
- loc.Line = loc.Line[i+1:]
- }
- }
- }
-
- // Prune locs from each Sample
- for _, sample := range p.Sample {
- // Scan from the root to the leaves to find the prune location.
- // Do not prune frames before the first user frame, to avoid
- // pruning everything.
- foundUser := false
- for i := len(sample.Location) - 1; i >= 0; i-- {
- id := sample.Location[i].ID
- if !prune[id] && !pruneBeneath[id] {
- foundUser = true
- continue
- }
- if !foundUser {
- continue
- }
- if prune[id] {
- sample.Location = sample.Location[i+1:]
- break
- }
- if pruneBeneath[id] {
- sample.Location = sample.Location[i:]
- break
- }
- }
- }
-}
-
-// RemoveUninteresting prunes and elides profiles using built-in
-// tables of uninteresting function names.
-func (p *Profile) RemoveUninteresting() error {
- var keep, drop *regexp.Regexp
- var err error
-
- if p.DropFrames != "" {
- if drop, err = regexp.Compile("^(" + p.DropFrames + ")$"); err != nil {
- return fmt.Errorf("failed to compile regexp %s: %v", p.DropFrames, err)
- }
- if p.KeepFrames != "" {
- if keep, err = regexp.Compile("^(" + p.KeepFrames + ")$"); err != nil {
- return fmt.Errorf("failed to compile regexp %s: %v", p.KeepFrames, err)
- }
- }
- p.Prune(drop, keep)
- }
- return nil
-}
diff --git a/src/runtime/pprof/label.go b/src/runtime/pprof/label.go
index 20f9cdb..b614f12 100644
--- a/src/runtime/pprof/label.go
+++ b/src/runtime/pprof/label.go
@@ -6,6 +6,9 @@
import (
"context"
+ "fmt"
+ "sort"
+ "strings"
)
type label struct {
@@ -34,6 +37,23 @@
// that admits incremental immutable modification more efficiently.
type labelMap map[string]string
+// String statisfies Stringer and returns key, value pairs in a consistent
+// order.
+func (l *labelMap) String() string {
+ if l == nil {
+ return ""
+ }
+ keyVals := make([]string, 0, len(*l))
+
+ for k, v := range *l {
+ keyVals = append(keyVals, fmt.Sprintf("%q:%q", k, v))
+ }
+
+ sort.Strings(keyVals)
+
+ return "{" + strings.Join(keyVals, ", ") + "}"
+}
+
// WithLabels returns a new context.Context with the given labels added.
// A label overwrites a prior label with the same key.
func WithLabels(ctx context.Context, labels LabelSet) context.Context {
@@ -54,17 +74,18 @@
// Labels takes an even number of strings representing key-value pairs
// and makes a LabelSet containing them.
// A label overwrites a prior label with the same key.
-// Currently only CPU profile utilizes labels information.
+// Currently only the CPU and goroutine profiles utilize any labels
+// information.
// See https://golang.org/issue/23458 for details.
func Labels(args ...string) LabelSet {
if len(args)%2 != 0 {
panic("uneven number of arguments to pprof.Labels")
}
- labels := LabelSet{}
+ list := make([]label, 0, len(args)/2)
for i := 0; i+1 < len(args); i += 2 {
- labels.list = append(labels.list, label{key: args[i], value: args[i+1]})
+ list = append(list, label{key: args[i], value: args[i+1]})
}
- return labels
+ return LabelSet{list: list}
}
// Label returns the value of the label with the given key on ctx, and a boolean indicating
diff --git a/src/runtime/pprof/label_test.go b/src/runtime/pprof/label_test.go
index 240445f..fcb00bd 100644
--- a/src/runtime/pprof/label_test.go
+++ b/src/runtime/pprof/label_test.go
@@ -24,7 +24,7 @@
func (s labelSorter) Less(i, j int) bool { return s[i].key < s[j].key }
func TestContextLabels(t *testing.T) {
- // Background context starts with no lablels.
+ // Background context starts with no labels.
ctx := context.Background()
labels := labelsSorted(ctx)
if len(labels) != 0 {
@@ -80,3 +80,35 @@
t.Errorf("(sorted) labels on context: got %v, want %v", gotLabels, wantLabels)
}
}
+
+func TestLabelMapStringer(t *testing.T) {
+ for _, tbl := range []struct {
+ m labelMap
+ expected string
+ }{
+ {
+ m: labelMap{
+ // empty map
+ },
+ expected: "{}",
+ }, {
+ m: labelMap{
+ "foo": "bar",
+ },
+ expected: `{"foo":"bar"}`,
+ }, {
+ m: labelMap{
+ "foo": "bar",
+ "key1": "value1",
+ "key2": "value2",
+ "key3": "value3",
+ "key4WithNewline": "\nvalue4",
+ },
+ expected: `{"foo":"bar", "key1":"value1", "key2":"value2", "key3":"value3", "key4WithNewline":"\nvalue4"}`,
+ },
+ } {
+ if got := tbl.m.String(); tbl.expected != got {
+ t.Errorf("%#v.String() = %q; want %q", tbl.m, got, tbl.expected)
+ }
+ }
+}
diff --git a/src/runtime/pprof/map.go b/src/runtime/pprof/map.go
index a271ad0..7c75872 100644
--- a/src/runtime/pprof/map.go
+++ b/src/runtime/pprof/map.go
@@ -68,7 +68,8 @@
if len(m.freeStk) < len(stk) {
m.freeStk = make([]uintptr, 1024)
}
- e.stk = m.freeStk[:len(stk)]
+ // Limit cap to prevent append from clobbering freeStk.
+ e.stk = m.freeStk[:len(stk):len(stk)]
m.freeStk = m.freeStk[len(stk):]
for j := range stk {
diff --git a/src/runtime/pprof/mprof_test.go b/src/runtime/pprof/mprof_test.go
index 4c14527..f253f07 100644
--- a/src/runtime/pprof/mprof_test.go
+++ b/src/runtime/pprof/mprof_test.go
@@ -2,11 +2,14 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+// +build !js
+
package pprof
import (
"bytes"
"fmt"
+ "internal/profile"
"reflect"
"regexp"
"runtime"
@@ -27,6 +30,10 @@
memSink = make([]byte, 2<<20)
}
+func allocateTransient2MInline() {
+ memSink = make([]byte, 2<<20)
+}
+
type Obj32 struct {
link *Obj32
pad [32 - unsafe.Sizeof(uintptr(0))]byte
@@ -71,42 +78,99 @@
// Do the interesting allocations.
allocateTransient1M()
allocateTransient2M()
+ allocateTransient2MInline()
allocatePersistent1K()
allocateReflect()
memSink = nil
runtime.GC() // materialize stats
- var buf bytes.Buffer
- if err := Lookup("heap").WriteTo(&buf, 1); err != nil {
- t.Fatalf("failed to write heap profile: %v", err)
- }
memoryProfilerRun++
- tests := []string{
- fmt.Sprintf(`%v: %v \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
-# 0x[0-9,a-f]+ runtime/pprof\.allocatePersistent1K\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test\.go:40
-# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test\.go:74
+ tests := []struct {
+ stk []string
+ legacy string
+ }{{
+ stk: []string{"runtime/pprof.allocatePersistent1K", "runtime/pprof.TestMemoryProfiler"},
+ legacy: fmt.Sprintf(`%v: %v \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
+# 0x[0-9,a-f]+ runtime/pprof\.allocatePersistent1K\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test\.go:47
+# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test\.go:82
`, 32*memoryProfilerRun, 1024*memoryProfilerRun, 32*memoryProfilerRun, 1024*memoryProfilerRun),
-
- fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
-# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient1M\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:21
-# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:72
+ }, {
+ stk: []string{"runtime/pprof.allocateTransient1M", "runtime/pprof.TestMemoryProfiler"},
+ legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
+# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient1M\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:24
+# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:79
`, (1<<10)*memoryProfilerRun, (1<<20)*memoryProfilerRun),
-
- fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
-# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient2M\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:27
-# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:73
+ }, {
+ stk: []string{"runtime/pprof.allocateTransient2M", "runtime/pprof.TestMemoryProfiler"},
+ legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
+# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient2M\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:30
+# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:80
`, memoryProfilerRun, (2<<20)*memoryProfilerRun),
-
- fmt.Sprintf(`0: 0 \[%v: %v\] @( 0x[0-9,a-f]+)+
-# 0x[0-9,a-f]+ runtime/pprof\.allocateReflectTransient\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:48
+ }, {
+ stk: []string{"runtime/pprof.allocateTransient2MInline", "runtime/pprof.TestMemoryProfiler"},
+ legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
+# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient2MInline\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:34
+# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:81
`, memoryProfilerRun, (2<<20)*memoryProfilerRun),
- }
+ }, {
+ stk: []string{"runtime/pprof.allocateReflectTransient"},
+ legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @( 0x[0-9,a-f]+)+
+# 0x[0-9,a-f]+ runtime/pprof\.allocateReflectTransient\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:55
+`, memoryProfilerRun, (2<<20)*memoryProfilerRun),
+ }}
- for _, test := range tests {
- if !regexp.MustCompile(test).Match(buf.Bytes()) {
- t.Fatalf("The entry did not match:\n%v\n\nProfile:\n%v\n", test, buf.String())
+ t.Run("debug=1", func(t *testing.T) {
+ var buf bytes.Buffer
+ if err := Lookup("heap").WriteTo(&buf, 1); err != nil {
+ t.Fatalf("failed to write heap profile: %v", err)
}
- }
+
+ for _, test := range tests {
+ if !regexp.MustCompile(test.legacy).Match(buf.Bytes()) {
+ t.Fatalf("The entry did not match:\n%v\n\nProfile:\n%v\n", test.legacy, buf.String())
+ }
+ }
+ })
+
+ t.Run("proto", func(t *testing.T) {
+ var buf bytes.Buffer
+ if err := Lookup("heap").WriteTo(&buf, 0); err != nil {
+ t.Fatalf("failed to write heap profile: %v", err)
+ }
+ p, err := profile.Parse(&buf)
+ if err != nil {
+ t.Fatalf("failed to parse heap profile: %v", err)
+ }
+ t.Logf("Profile = %v", p)
+
+ stks := stacks(p)
+ for _, test := range tests {
+ if !containsStack(stks, test.stk) {
+ t.Fatalf("No matching stack entry for %q\n\nProfile:\n%v\n", test.stk, p)
+ }
+ }
+
+ if !containsInlinedCall(TestMemoryProfiler, 4<<10) {
+ t.Logf("Can't determine whether allocateTransient2MInline was inlined into TestMemoryProfiler.")
+ return
+ }
+
+ // Check the inlined function location is encoded correctly.
+ for _, loc := range p.Location {
+ inlinedCaller, inlinedCallee := false, false
+ for _, line := range loc.Line {
+ if line.Function.Name == "runtime/pprof.allocateTransient2MInline" {
+ inlinedCallee = true
+ }
+ if inlinedCallee && line.Function.Name == "runtime/pprof.TestMemoryProfiler" {
+ inlinedCaller = true
+ }
+ }
+ if inlinedCallee != inlinedCaller {
+ t.Errorf("want allocateTransient2MInline after TestMemoryProfiler in one location, got separate location entries:\n%v", loc)
+ }
+ }
+ })
}
diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go
index 74cdd15..d3b7df3 100644
--- a/src/runtime/pprof/pprof.go
+++ b/src/runtime/pprof/pprof.go
@@ -28,7 +28,7 @@
// if err != nil {
// log.Fatal("could not create CPU profile: ", err)
// }
-// defer f.Close()
+// defer f.Close() // error handling omitted for example
// if err := pprof.StartCPUProfile(f); err != nil {
// log.Fatal("could not start CPU profile: ", err)
// }
@@ -42,7 +42,7 @@
// if err != nil {
// log.Fatal("could not create memory profile: ", err)
// }
-// defer f.Close()
+// defer f.Close() // error handling omitted for example
// runtime.GC() // get up-to-date statistics
// if err := pprof.WriteHeapProfile(f); err != nil {
// log.Fatal("could not write memory profile: ", err)
@@ -313,9 +313,11 @@
// Otherwise, WriteTo returns nil.
//
// The debug parameter enables additional output.
-// Passing debug=0 prints only the hexadecimal addresses that pprof needs.
-// Passing debug=1 adds comments translating addresses to function names
-// and line numbers, so that a programmer can read the profile without tools.
+// Passing debug=0 writes the gzip-compressed protocol buffer described
+// in https://github.com/google/pprof/tree/master/proto#overview.
+// Passing debug=1 writes the legacy text format with comments
+// translating addresses to function names and line numbers, so that a
+// programmer can read the profile without tools.
//
// The predefined profiles may assign meaning to other debug values;
// for example, when printing the "goroutine" profile, debug=2 means to
@@ -355,6 +357,7 @@
func (x stackProfile) Len() int { return len(x) }
func (x stackProfile) Stack(i int) []uintptr { return x[i] }
+func (x stackProfile) Label(i int) *labelMap { return nil }
// A countProfile is a set of stack traces to be printed as counts
// grouped by stack trace. There are multiple implementations:
@@ -363,6 +366,7 @@
type countProfile interface {
Len() int
Stack(i int) []uintptr
+ Label(i int) *labelMap
}
// printCountCycleProfile outputs block profile records (for block or mutex profiles)
@@ -386,16 +390,9 @@
count, nanosec := scaler(r.Count, float64(r.Cycles)/cpuGHz)
values[0] = count
values[1] = int64(nanosec)
- locs = locs[:0]
- for _, addr := range r.Stack() {
- // For count profiles, all stack addresses are
- // return PCs, which is what locForPC expects.
- l := b.locForPC(addr)
- if l == 0 { // runtime.goexit
- continue
- }
- locs = append(locs, l)
- }
+ // For count profiles, all stack addresses are
+ // return PCs, which is what appendLocsForStack expects.
+ locs = b.appendLocsForStack(locs[:0], r.Stack())
b.pbSample(values, locs, nil)
}
b.build()
@@ -407,12 +404,16 @@
func printCountProfile(w io.Writer, debug int, name string, p countProfile) error {
// Build count of each stack.
var buf bytes.Buffer
- key := func(stk []uintptr) string {
+ key := func(stk []uintptr, lbls *labelMap) string {
buf.Reset()
fmt.Fprintf(&buf, "@")
for _, pc := range stk {
fmt.Fprintf(&buf, " %#x", pc)
}
+ if lbls != nil {
+ buf.WriteString("\n# labels: ")
+ buf.WriteString(lbls.String())
+ }
return buf.String()
}
count := map[string]int{}
@@ -420,7 +421,7 @@
var keys []string
n := p.Len()
for i := 0; i < n; i++ {
- k := key(p.Stack(i))
+ k := key(p.Stack(i), p.Label(i))
if count[k] == 0 {
index[k] = i
keys = append(keys, k)
@@ -451,17 +452,19 @@
var locs []uint64
for _, k := range keys {
values[0] = int64(count[k])
- locs = locs[:0]
- for _, addr := range p.Stack(index[k]) {
- // For count profiles, all stack addresses are
- // return PCs, which is what locForPC expects.
- l := b.locForPC(addr)
- if l == 0 { // runtime.goexit
- continue
+ // For count profiles, all stack addresses are
+ // return PCs, which is what appendLocsForStack expects.
+ locs = b.appendLocsForStack(locs[:0], p.Stack(index[k]))
+ idx := index[k]
+ var labels func()
+ if p.Label(idx) != nil {
+ labels = func() {
+ for k, v := range *p.Label(idx) {
+ b.pbLabel(tagSample_Label, k, v, 0)
+ }
}
- locs = append(locs, l)
}
- b.pbSample(values, locs, nil)
+ b.pbSample(values, locs, labels)
}
b.build()
return nil
@@ -642,6 +645,9 @@
fmt.Fprintf(w, "# GCCPUFraction = %v\n", s.GCCPUFraction)
fmt.Fprintf(w, "# DebugGC = %v\n", s.DebugGC)
+ // Also flush out MaxRSS on supported platforms.
+ addMaxRSS(w)
+
tw.Flush()
return b.Flush()
}
@@ -654,7 +660,12 @@
// writeThreadCreate writes the current runtime ThreadCreateProfile to w.
func writeThreadCreate(w io.Writer, debug int) error {
- return writeRuntimeProfile(w, debug, "threadcreate", runtime.ThreadCreateProfile)
+ // Until https://golang.org/issues/6104 is addressed, wrap
+ // ThreadCreateProfile because there's no point in tracking labels when we
+ // don't get any stack-traces.
+ return writeRuntimeProfile(w, debug, "threadcreate", func(p []runtime.StackRecord, _ []unsafe.Pointer) (n int, ok bool) {
+ return runtime.ThreadCreateProfile(p)
+ })
}
// countGoroutine returns the number of goroutines.
@@ -662,12 +673,15 @@
return runtime.NumGoroutine()
}
+// runtime_goroutineProfileWithLabels is defined in runtime/mprof.go
+func runtime_goroutineProfileWithLabels(p []runtime.StackRecord, labels []unsafe.Pointer) (n int, ok bool)
+
// writeGoroutine writes the current runtime GoroutineProfile to w.
func writeGoroutine(w io.Writer, debug int) error {
if debug >= 2 {
return writeGoroutineStacks(w)
}
- return writeRuntimeProfile(w, debug, "goroutine", runtime.GoroutineProfile)
+ return writeRuntimeProfile(w, debug, "goroutine", runtime_goroutineProfileWithLabels)
}
func writeGoroutineStacks(w io.Writer) error {
@@ -691,7 +705,7 @@
return err
}
-func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]runtime.StackRecord) (int, bool)) error {
+func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]runtime.StackRecord, []unsafe.Pointer) (int, bool)) error {
// Find out how many records there are (fetch(nil)),
// allocate that many records, and get the data.
// There's a race—more records might be added between
@@ -699,13 +713,15 @@
// and also try again if we're very unlucky.
// The loop should only execute one iteration in the common case.
var p []runtime.StackRecord
- n, ok := fetch(nil)
+ var labels []unsafe.Pointer
+ n, ok := fetch(nil, nil)
for {
// Allocate room for a slightly bigger profile,
// in case a few more entries have been added
// since the call to ThreadProfile.
p = make([]runtime.StackRecord, n+10)
- n, ok = fetch(p)
+ labels = make([]unsafe.Pointer, n+10)
+ n, ok = fetch(p, labels)
if ok {
p = p[0:n]
break
@@ -713,13 +729,17 @@
// Profile grew; try again.
}
- return printCountProfile(w, debug, name, runtimeProfile(p))
+ return printCountProfile(w, debug, name, &runtimeProfile{p, labels})
}
-type runtimeProfile []runtime.StackRecord
+type runtimeProfile struct {
+ stk []runtime.StackRecord
+ labels []unsafe.Pointer
+}
-func (p runtimeProfile) Len() int { return len(p) }
-func (p runtimeProfile) Stack(i int) []uintptr { return p[i].Stack() }
+func (p *runtimeProfile) Len() int { return len(p.stk) }
+func (p *runtimeProfile) Stack(i int) []uintptr { return p.stk[i].Stack() }
+func (p *runtimeProfile) Label(i int) *labelMap { return (*labelMap)(p.labels[i]) }
var cpu struct {
sync.Mutex
diff --git a/src/runtime/pprof/pprof_norusage.go b/src/runtime/pprof/pprof_norusage.go
new file mode 100644
index 0000000..6fdcc6c
--- /dev/null
+++ b/src/runtime/pprof/pprof_norusage.go
@@ -0,0 +1,15 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !darwin,!linux
+
+package pprof
+
+import (
+ "io"
+)
+
+// Stub call for platforms that don't support rusage.
+func addMaxRSS(w io.Writer) {
+}
diff --git a/src/runtime/pprof/pprof_rusage.go b/src/runtime/pprof/pprof_rusage.go
new file mode 100644
index 0000000..d42e6ed
--- /dev/null
+++ b/src/runtime/pprof/pprof_rusage.go
@@ -0,0 +1,31 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin linux
+
+package pprof
+
+import (
+ "fmt"
+ "io"
+ "runtime"
+ "syscall"
+)
+
+// Adds MaxRSS to platforms that are supported.
+func addMaxRSS(w io.Writer) {
+ var rssToBytes uintptr
+ switch runtime.GOOS {
+ case "linux", "android":
+ rssToBytes = 1024
+ case "darwin":
+ rssToBytes = 1
+ default:
+ panic("unsupported OS")
+ }
+
+ var rusage syscall.Rusage
+ syscall.Getrusage(0, &rusage)
+ fmt.Fprintf(w, "# MaxRSS = %d\n", uintptr(rusage.Maxrss)*rssToBytes)
+}
diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go
index 5349637..7149bfb 100644
--- a/src/runtime/pprof/pprof_test.go
+++ b/src/runtime/pprof/pprof_test.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build !nacl,!js
+// +build !js
package pprof
@@ -10,6 +10,7 @@
"bytes"
"context"
"fmt"
+ "internal/profile"
"internal/testenv"
"io"
"io/ioutil"
@@ -18,7 +19,6 @@
"os/exec"
"regexp"
"runtime"
- "runtime/pprof/internal/profile"
"strings"
"sync"
"sync/atomic"
@@ -49,8 +49,12 @@
// Must not call other functions nor access heap/globals in the loop,
// otherwise under race detector the samples will be in the race runtime.
func cpuHog1(x int) int {
+ return cpuHog0(x, 1e5)
+}
+
+func cpuHog0(x, n int) int {
foo := x
- for i := 0; i < 1e5; i++ {
+ for i := 0; i < n; i++ {
if foo > 0 {
foo *= foo
} else {
@@ -100,35 +104,149 @@
})
}
+// containsInlinedCall reports whether the function body for the function f is
+// known to contain an inlined function call within the first maxBytes bytes.
+func containsInlinedCall(f interface{}, maxBytes int) bool {
+ _, found := findInlinedCall(f, maxBytes)
+ return found
+}
+
+// findInlinedCall returns the PC of an inlined function call within
+// the function body for the function f if any.
+func findInlinedCall(f interface{}, maxBytes int) (pc uint64, found bool) {
+ fFunc := runtime.FuncForPC(uintptr(funcPC(f)))
+ if fFunc == nil || fFunc.Entry() == 0 {
+ panic("failed to locate function entry")
+ }
+
+ for offset := 0; offset < maxBytes; offset++ {
+ innerPC := fFunc.Entry() + uintptr(offset)
+ inner := runtime.FuncForPC(innerPC)
+ if inner == nil {
+ // No function known for this PC value.
+ // It might simply be misaligned, so keep searching.
+ continue
+ }
+ if inner.Entry() != fFunc.Entry() {
+ // Scanned past f and didn't find any inlined functions.
+ break
+ }
+ if inner.Name() != fFunc.Name() {
+ // This PC has f as its entry-point, but is not f. Therefore, it must be a
+ // function inlined into f.
+ return uint64(innerPC), true
+ }
+ }
+
+ return 0, false
+}
+
func TestCPUProfileInlining(t *testing.T) {
- testCPUProfile(t, stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}, avoidFunctions(), func(dur time.Duration) {
+ if !containsInlinedCall(inlinedCaller, 4<<10) {
+ t.Skip("Can't determine whether inlinedCallee was inlined into inlinedCaller.")
+ }
+
+ p := testCPUProfile(t, stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}, avoidFunctions(), func(dur time.Duration) {
cpuHogger(inlinedCaller, &salt1, dur)
})
+
+ // Check if inlined function locations are encoded correctly. The inlinedCalee and inlinedCaller should be in one location.
+ for _, loc := range p.Location {
+ hasInlinedCallerAfterInlinedCallee, hasInlinedCallee := false, false
+ for _, line := range loc.Line {
+ if line.Function.Name == "runtime/pprof.inlinedCallee" {
+ hasInlinedCallee = true
+ }
+ if hasInlinedCallee && line.Function.Name == "runtime/pprof.inlinedCaller" {
+ hasInlinedCallerAfterInlinedCallee = true
+ }
+ }
+ if hasInlinedCallee != hasInlinedCallerAfterInlinedCallee {
+ t.Fatalf("want inlinedCallee followed by inlinedCaller, got separate Location entries:\n%v", p)
+ }
+ }
}
func inlinedCaller(x int) int {
- x = inlinedCallee(x)
+ x = inlinedCallee(x, 1e5)
return x
}
-func inlinedCallee(x int) int {
- // We could just use cpuHog1, but for loops prevent inlining
- // right now. :(
- foo := x
- i := 0
-loop:
- if foo > 0 {
- foo *= foo
- } else {
- foo *= foo + 1
- }
- if i++; i < 1e5 {
- goto loop
- }
- return foo
+func inlinedCallee(x, n int) int {
+ return cpuHog0(x, n)
}
-func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) {
+//go:noinline
+func dumpCallers(pcs []uintptr) {
+ if pcs == nil {
+ return
+ }
+
+ skip := 2 // Callers and dumpCallers
+ runtime.Callers(skip, pcs)
+}
+
+//go:noinline
+func inlinedCallerDump(pcs []uintptr) {
+ inlinedCalleeDump(pcs)
+}
+
+func inlinedCalleeDump(pcs []uintptr) {
+ dumpCallers(pcs)
+}
+
+func TestCPUProfileRecursion(t *testing.T) {
+ p := testCPUProfile(t, stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.recursionCallee", "runtime/pprof.recursionCaller"}, avoidFunctions(), func(dur time.Duration) {
+ cpuHogger(recursionCaller, &salt1, dur)
+ })
+
+ // check the Location encoding was not confused by recursive calls.
+ for i, loc := range p.Location {
+ recursionFunc := 0
+ for _, line := range loc.Line {
+ if name := line.Function.Name; name == "runtime/pprof.recursionCaller" || name == "runtime/pprof.recursionCallee" {
+ recursionFunc++
+ }
+ }
+ if recursionFunc > 1 {
+ t.Fatalf("want at most one recursionCaller or recursionCallee in one Location, got a violating Location (index: %d):\n%v", i, p)
+ }
+ }
+}
+
+func recursionCaller(x int) int {
+ y := recursionCallee(3, x)
+ return y
+}
+
+func recursionCallee(n, x int) int {
+ if n == 0 {
+ return 1
+ }
+ y := inlinedCallee(x, 1e4)
+ return y * recursionCallee(n-1, x)
+}
+
+func recursionChainTop(x int, pcs []uintptr) {
+ if x < 0 {
+ return
+ }
+ recursionChainMiddle(x, pcs)
+}
+
+func recursionChainMiddle(x int, pcs []uintptr) {
+ recursionChainBottom(x, pcs)
+}
+
+func recursionChainBottom(x int, pcs []uintptr) {
+ // This will be called each time, we only care about the last. We
+ // can't make this conditional or this function won't be inlined.
+ dumpCallers(pcs)
+
+ recursionChainTop(x-1, pcs)
+}
+
+func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) *profile.Profile {
p, err := profile.Parse(bytes.NewReader(valBytes))
if err != nil {
t.Fatal(err)
@@ -137,15 +255,16 @@
count := uintptr(sample.Value[0])
f(count, sample.Location, sample.Label)
}
+ return p
}
// testCPUProfile runs f under the CPU profiler, checking for some conditions specified by need,
-// as interpreted by matches.
-func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []string, f func(dur time.Duration)) {
+// as interpreted by matches, and returns the parsed profile.
+func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []string, f func(dur time.Duration)) *profile.Profile {
switch runtime.GOOS {
case "darwin":
switch runtime.GOARCH {
- case "arm", "arm64":
+ case "arm64":
// nothing
default:
out, err := exec.Command("uname", "-a").CombinedOutput()
@@ -195,8 +314,8 @@
f(duration)
StopCPUProfile()
- if profileOk(t, matches, need, avoid, prof, duration) {
- return
+ if p, ok := profileOk(t, matches, need, avoid, prof, duration); ok {
+ return p
}
duration *= 2
@@ -217,6 +336,7 @@
t.Skip("ignore the failure in QEMU; see golang.org/issue/9605")
}
t.FailNow()
+ return nil
}
func contains(slice []string, s string) bool {
@@ -242,7 +362,7 @@
type matchFunc func(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool
-func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, prof bytes.Buffer, duration time.Duration) (ok bool) {
+func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, prof bytes.Buffer, duration time.Duration) (_ *profile.Profile, ok bool) {
ok = true
// Check that profile is well formed, contains 'need', and does not contain
@@ -251,7 +371,7 @@
avoidSamples := make([]uintptr, len(avoid))
var samples uintptr
var buf bytes.Buffer
- parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, labels map[string][]string) {
+ p := parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, labels map[string][]string) {
fmt.Fprintf(&buf, "%d:", count)
fprintStack(&buf, stk)
samples += count
@@ -278,7 +398,7 @@
// not enough samples due to coarse timer
// resolution. Let it go.
t.Log("too few samples on Windows (golang.org/issue/10842)")
- return false
+ return p, false
}
// Check that we got a reasonable number of samples.
@@ -300,7 +420,7 @@
}
if len(need) == 0 {
- return ok
+ return p, ok
}
var total uintptr
@@ -323,7 +443,7 @@
ok = false
}
}
- return ok
+ return p, ok
}
// Fork can hang if preempted with signals frequently enough (see issue 5517).
@@ -857,6 +977,26 @@
runtime.Gosched()
}
}
+ ctx := context.Background()
+
+ // ... and again, with labels this time (just with fewer iterations to keep
+ // sorting deterministic).
+ Do(ctx, Labels("label", "value"), func(context.Context) {
+ for i := 0; i < 89; i++ {
+ switch {
+ case i%10 == 0:
+ go func1(c)
+ case i%2 == 0:
+ go func2(c)
+ default:
+ go func3(c)
+ }
+ // Let goroutines block on channel
+ for j := 0; j < 5; j++ {
+ runtime.Gosched()
+ }
+ }
+ })
var w bytes.Buffer
goroutineProf := Lookup("goroutine")
@@ -865,8 +1005,11 @@
goroutineProf.WriteTo(&w, 1)
prof := w.String()
- if !containsInOrder(prof, "\n50 @ ", "\n40 @", "\n10 @", "\n1 @") {
- t.Errorf("expected sorted goroutine counts:\n%s", prof)
+ labels := labelMap{"label": "value"}
+ labelStr := "\n# labels: " + labels.String()
+ if !containsInOrder(prof, "\n50 @ ", "\n44 @", labelStr,
+ "\n40 @", "\n36 @", labelStr, "\n10 @", "\n9 @", labelStr, "\n1 @") {
+ t.Errorf("expected sorted goroutine counts with Labels:\n%s", prof)
}
// Check proto profile
@@ -879,9 +1022,18 @@
if err := p.CheckValid(); err != nil {
t.Errorf("protobuf profile is invalid: %v", err)
}
- if !containsCounts(p, []int64{50, 40, 10, 1}) {
- t.Errorf("expected count profile to contain goroutines with counts %v, got %v",
- []int64{50, 40, 10, 1}, p)
+ expectedLabels := map[int64]map[string]string{
+ 50: map[string]string{},
+ 44: map[string]string{"label": "value"},
+ 40: map[string]string{},
+ 36: map[string]string{"label": "value"},
+ 10: map[string]string{},
+ 9: map[string]string{"label": "value"},
+ 1: map[string]string{},
+ }
+ if !containsCountsLabels(p, expectedLabels) {
+ t.Errorf("expected count profile to contain goroutines with counts and labels %v, got %v",
+ expectedLabels, p)
}
close(c)
@@ -900,10 +1052,23 @@
return true
}
-func containsCounts(prof *profile.Profile, counts []int64) bool {
+func containsCountsLabels(prof *profile.Profile, countLabels map[int64]map[string]string) bool {
m := make(map[int64]int)
- for _, c := range counts {
+ type nkey struct {
+ count int64
+ key, val string
+ }
+ n := make(map[nkey]int)
+ for c, kv := range countLabels {
m[c]++
+ for k, v := range kv {
+ n[nkey{
+ count: c,
+ key: k,
+ val: v,
+ }]++
+
+ }
}
for _, s := range prof.Sample {
// The count is the single value in the sample
@@ -911,12 +1076,26 @@
return false
}
m[s.Value[0]]--
+ for k, vs := range s.Label {
+ for _, v := range vs {
+ n[nkey{
+ count: s.Value[0],
+ key: k,
+ val: v,
+ }]--
+ }
+ }
}
for _, n := range m {
if n > 0 {
return false
}
}
+ for _, ncnt := range n {
+ if ncnt != 0 {
+ return false
+ }
+ }
return true
}
@@ -1056,3 +1235,222 @@
runtime.Stack(buf, true)
}
}
+
+// TestTryAdd tests the cases that are hard to test with real program execution.
+//
+// For example, the current go compilers may not always inline functions
+// involved in recursion but that may not be true in the future compilers. This
+// tests such cases by using fake call sequences and forcing the profile build
+// utilizing translateCPUProfile defined in proto_test.go
+func TestTryAdd(t *testing.T) {
+ if _, found := findInlinedCall(inlinedCallerDump, 4<<10); !found {
+ t.Skip("Can't determine whether anything was inlined into inlinedCallerDump.")
+ }
+
+ // inlinedCallerDump
+ // inlinedCalleeDump
+ pcs := make([]uintptr, 2)
+ inlinedCallerDump(pcs)
+ inlinedCallerStack := make([]uint64, 2)
+ for i := range pcs {
+ inlinedCallerStack[i] = uint64(pcs[i])
+ }
+
+ if _, found := findInlinedCall(recursionChainBottom, 4<<10); !found {
+ t.Skip("Can't determine whether anything was inlined into recursionChainBottom.")
+ }
+
+ // recursionChainTop
+ // recursionChainMiddle
+ // recursionChainBottom
+ // recursionChainTop
+ // recursionChainMiddle
+ // recursionChainBottom
+ pcs = make([]uintptr, 6)
+ recursionChainTop(1, pcs)
+ recursionStack := make([]uint64, len(pcs))
+ for i := range pcs {
+ recursionStack[i] = uint64(pcs[i])
+ }
+
+ period := int64(2000 * 1000) // 1/500*1e9 nanosec.
+
+ testCases := []struct {
+ name string
+ input []uint64 // following the input format assumed by profileBuilder.addCPUData.
+ wantLocs [][]string // ordered location entries with function names.
+ wantSamples []*profile.Sample // ordered samples, we care only about Value and the profile location IDs.
+ }{{
+ // Sanity test for a normal, complete stack trace.
+ name: "full_stack_trace",
+ input: []uint64{
+ 3, 0, 500, // hz = 500. Must match the period.
+ 5, 0, 50, inlinedCallerStack[0], inlinedCallerStack[1],
+ },
+ wantLocs: [][]string{
+ {"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"},
+ },
+ wantSamples: []*profile.Sample{
+ {Value: []int64{50, 50 * period}, Location: []*profile.Location{{ID: 1}}},
+ },
+ }, {
+ name: "bug35538",
+ input: []uint64{
+ 3, 0, 500, // hz = 500. Must match the period.
+ // Fake frame: tryAdd will have inlinedCallerDump
+ // (stack[1]) on the deck when it encounters the next
+ // inline function. It should accept this.
+ 7, 0, 10, inlinedCallerStack[0], inlinedCallerStack[1], inlinedCallerStack[0], inlinedCallerStack[1],
+ 5, 0, 20, inlinedCallerStack[0], inlinedCallerStack[1],
+ },
+ wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}},
+ wantSamples: []*profile.Sample{
+ {Value: []int64{10, 10 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}}},
+ {Value: []int64{20, 20 * period}, Location: []*profile.Location{{ID: 1}}},
+ },
+ }, {
+ name: "bug38096",
+ input: []uint64{
+ 3, 0, 500, // hz = 500. Must match the period.
+ // count (data[2]) == 0 && len(stk) == 1 is an overflow
+ // entry. The "stk" entry is actually the count.
+ 4, 0, 0, 4242,
+ },
+ wantLocs: [][]string{{"runtime/pprof.lostProfileEvent"}},
+ wantSamples: []*profile.Sample{
+ {Value: []int64{4242, 4242 * period}, Location: []*profile.Location{{ID: 1}}},
+ },
+ }, {
+ // If a function is directly called recursively then it must
+ // not be inlined in the caller.
+ //
+ // N.B. We're generating an impossible profile here, with a
+ // recursive inlineCalleeDump call. This is simulating a non-Go
+ // function that looks like an inlined Go function other than
+ // its recursive property. See pcDeck.tryAdd.
+ name: "directly_recursive_func_is_not_inlined",
+ input: []uint64{
+ 3, 0, 500, // hz = 500. Must match the period.
+ 5, 0, 30, inlinedCallerStack[0], inlinedCallerStack[0],
+ 4, 0, 40, inlinedCallerStack[0],
+ },
+ // inlinedCallerDump shows up here because
+ // runtime_expandFinalInlineFrame adds it to the stack frame.
+ wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump"}, {"runtime/pprof.inlinedCallerDump"}},
+ wantSamples: []*profile.Sample{
+ {Value: []int64{30, 30 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}, {ID: 2}}},
+ {Value: []int64{40, 40 * period}, Location: []*profile.Location{{ID: 1}, {ID: 2}}},
+ },
+ }, {
+ name: "recursion_chain_inline",
+ input: []uint64{
+ 3, 0, 500, // hz = 500. Must match the period.
+ 9, 0, 10, recursionStack[0], recursionStack[1], recursionStack[2], recursionStack[3], recursionStack[4], recursionStack[5],
+ },
+ wantLocs: [][]string{
+ {"runtime/pprof.recursionChainBottom"},
+ {
+ "runtime/pprof.recursionChainMiddle",
+ "runtime/pprof.recursionChainTop",
+ "runtime/pprof.recursionChainBottom",
+ },
+ {
+ "runtime/pprof.recursionChainMiddle",
+ "runtime/pprof.recursionChainTop",
+ "runtime/pprof.TestTryAdd", // inlined into the test.
+ },
+ },
+ wantSamples: []*profile.Sample{
+ {Value: []int64{10, 10 * period}, Location: []*profile.Location{{ID: 1}, {ID: 2}, {ID: 3}}},
+ },
+ }, {
+ name: "truncated_stack_trace_later",
+ input: []uint64{
+ 3, 0, 500, // hz = 500. Must match the period.
+ 5, 0, 50, inlinedCallerStack[0], inlinedCallerStack[1],
+ 4, 0, 60, inlinedCallerStack[0],
+ },
+ wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}},
+ wantSamples: []*profile.Sample{
+ {Value: []int64{50, 50 * period}, Location: []*profile.Location{{ID: 1}}},
+ {Value: []int64{60, 60 * period}, Location: []*profile.Location{{ID: 1}}},
+ },
+ }, {
+ name: "truncated_stack_trace_first",
+ input: []uint64{
+ 3, 0, 500, // hz = 500. Must match the period.
+ 4, 0, 70, inlinedCallerStack[0],
+ 5, 0, 80, inlinedCallerStack[0], inlinedCallerStack[1],
+ },
+ wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}},
+ wantSamples: []*profile.Sample{
+ {Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
+ {Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 1}}},
+ },
+ }, {
+ // We can recover the inlined caller from a truncated stack.
+ name: "truncated_stack_trace_only",
+ input: []uint64{
+ 3, 0, 500, // hz = 500. Must match the period.
+ 4, 0, 70, inlinedCallerStack[0],
+ },
+ wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}},
+ wantSamples: []*profile.Sample{
+ {Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
+ },
+ }, {
+ // The same location is used for duplicated stacks.
+ name: "truncated_stack_trace_twice",
+ input: []uint64{
+ 3, 0, 500, // hz = 500. Must match the period.
+ 4, 0, 70, inlinedCallerStack[0],
+ // Fake frame: add a fake call to
+ // inlinedCallerDump to prevent this sample
+ // from getting merged into above.
+ 5, 0, 80, inlinedCallerStack[1], inlinedCallerStack[0],
+ },
+ wantLocs: [][]string{
+ {"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"},
+ {"runtime/pprof.inlinedCallerDump"},
+ },
+ wantSamples: []*profile.Sample{
+ {Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
+ {Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 2}, {ID: 1}}},
+ },
+ }}
+
+ for _, tc := range testCases {
+ t.Run(tc.name, func(t *testing.T) {
+ p, err := translateCPUProfile(tc.input)
+ if err != nil {
+ t.Fatalf("translating profile: %v", err)
+ }
+ t.Logf("Profile: %v\n", p)
+
+ // One location entry with all inlined functions.
+ var gotLoc [][]string
+ for _, loc := range p.Location {
+ var names []string
+ for _, line := range loc.Line {
+ names = append(names, line.Function.Name)
+ }
+ gotLoc = append(gotLoc, names)
+ }
+ if got, want := fmtJSON(gotLoc), fmtJSON(tc.wantLocs); got != want {
+ t.Errorf("Got Location = %+v\n\twant %+v", got, want)
+ }
+ // All samples should point to one location.
+ var gotSamples []*profile.Sample
+ for _, sample := range p.Sample {
+ var locs []*profile.Location
+ for _, loc := range sample.Location {
+ locs = append(locs, &profile.Location{ID: loc.ID})
+ }
+ gotSamples = append(gotSamples, &profile.Sample{Value: sample.Value, Location: locs})
+ }
+ if got, want := fmtJSON(gotSamples), fmtJSON(tc.wantSamples); got != want {
+ t.Errorf("Got Samples = %+v\n\twant %+v", got, want)
+ }
+ })
+ }
+}
diff --git a/src/runtime/pprof/proto.go b/src/runtime/pprof/proto.go
index 7864dd7..8519af6 100644
--- a/src/runtime/pprof/proto.go
+++ b/src/runtime/pprof/proto.go
@@ -41,9 +41,10 @@
pb protobuf
strings []string
stringMap map[string]int
- locs map[uintptr]int
- funcs map[string]int // Package path-qualified function name to Function.ID
+ locs map[uintptr]locInfo // list of locInfo starting with the given PC.
+ funcs map[string]int // Package path-qualified function name to Function.ID
mem []memMap
+ deck pcDeck
}
type memMap struct {
@@ -207,15 +208,7 @@
b.pb.endMessage(tag, start)
}
-// locForPC returns the location ID for addr.
-// addr must a return PC or 1 + the PC of an inline marker. This returns the location of the corresponding call.
-// It may emit to b.pb, so there must be no message encoding in progress.
-func (b *profileBuilder) locForPC(addr uintptr) uint64 {
- id := uint64(b.locs[addr])
- if id != 0 {
- return id
- }
-
+func allFrames(addr uintptr) ([]runtime.Frame, symbolizeFlag) {
// Expand this one address using CallersFrames so we can cache
// each expansion. In general, CallersFrames takes a whole
// stack, but in this case we know there will be no skips in
@@ -225,7 +218,7 @@
if frame.Function == "runtime.goexit" {
// Short-circuit if we see runtime.goexit so the loop
// below doesn't allocate a useless empty location.
- return 0
+ return nil, 0
}
symbolizeResult := lookupTried
@@ -238,59 +231,22 @@
// a reasonable call PC. This mostly happens in tests.
frame.PC = addr - 1
}
-
- // We can't write out functions while in the middle of the
- // Location message, so record new functions we encounter and
- // write them out after the Location.
- type newFunc struct {
- id uint64
- name, file string
- }
- newFuncs := make([]newFunc, 0, 8)
-
- id = uint64(len(b.locs)) + 1
- b.locs[addr] = int(id)
- start := b.pb.startMessage()
- b.pb.uint64Opt(tagLocation_ID, id)
- b.pb.uint64Opt(tagLocation_Address, uint64(frame.PC))
- for frame.Function != "runtime.goexit" {
- // Write out each line in frame expansion.
- funcID := uint64(b.funcs[frame.Function])
- if funcID == 0 {
- funcID = uint64(len(b.funcs)) + 1
- b.funcs[frame.Function] = int(funcID)
- newFuncs = append(newFuncs, newFunc{funcID, frame.Function, frame.File})
- }
- b.pbLine(tagLocation_Line, funcID, int64(frame.Line))
- if !more {
- break
- }
+ ret := []runtime.Frame{frame}
+ for frame.Function != "runtime.goexit" && more == true {
frame, more = frames.Next()
+ ret = append(ret, frame)
}
- for i := range b.mem {
- if b.mem[i].start <= addr && addr < b.mem[i].end || b.mem[i].fake {
- b.pb.uint64Opt(tagLocation_MappingID, uint64(i+1))
+ return ret, symbolizeResult
+}
- m := b.mem[i]
- m.funcs |= symbolizeResult
- b.mem[i] = m
- break
- }
- }
- b.pb.endMessage(tagProfile_Location, start)
+type locInfo struct {
+ // location id assigned by the profileBuilder
+ id uint64
- // Write out functions we found during frame expansion.
- for _, fn := range newFuncs {
- start := b.pb.startMessage()
- b.pb.uint64Opt(tagFunction_ID, fn.id)
- b.pb.int64Opt(tagFunction_Name, b.stringIndex(fn.name))
- b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(fn.name))
- b.pb.int64Opt(tagFunction_Filename, b.stringIndex(fn.file))
- b.pb.endMessage(tagProfile_Function, start)
- }
-
- b.flush()
- return id
+ // sequence of PCs, including the fake PCs returned by the traceback
+ // to represent inlined functions
+ // https://github.com/golang/go/blob/d6f2f833c93a41ec1c68e49804b8387a06b131c5/src/runtime/traceback.go#L347-L368
+ pcs []uintptr
}
// newProfileBuilder returns a new profileBuilder.
@@ -305,7 +261,7 @@
start: time.Now(),
strings: []string{""},
stringMap: map[string]int{"": 0},
- locs: map[uintptr]int{},
+ locs: map[uintptr]locInfo{},
funcs: map[string]int{},
}
b.readMapping()
@@ -366,7 +322,10 @@
// overflow record
count = uint64(stk[0])
stk = []uint64{
- uint64(funcPC(lostProfileEvent)),
+ // gentraceback guarantees that PCs in the
+ // stack can be unconditionally decremented and
+ // still be valid, so we must do the same.
+ uint64(funcPC(lostProfileEvent) + 1),
}
}
b.m.lookup(stk, tag).count += int64(count)
@@ -389,6 +348,7 @@
values := []int64{0, 0}
var locs []uint64
+
for e := b.m.all; e != nil; e = e.nextAll {
values[0] = e.count
values[1] = e.count * b.period
@@ -402,23 +362,8 @@
}
}
- locs = locs[:0]
- for i, addr := range e.stk {
- // Addresses from stack traces point to the
- // next instruction after each call, except
- // for the leaf, which points to where the
- // signal occurred. locForPC expects return
- // PCs, so increment the leaf address to look
- // like a return PC.
- if i == 0 {
- addr++
- }
- l := b.locForPC(addr)
- if l == 0 { // runtime.goexit
- continue
- }
- locs = append(locs, l)
- }
+ locs = b.appendLocsForStack(locs[:0], e.stk)
+
b.pbSample(values, locs, labels)
}
@@ -435,6 +380,197 @@
b.zw.Close()
}
+// appendLocsForStack appends the location IDs for the given stack trace to the given
+// location ID slice, locs. The addresses in the stack are return PCs or 1 + the PC of
+// an inline marker as the runtime traceback function returns.
+//
+// It may emit to b.pb, so there must be no message encoding in progress.
+func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLocs []uint64) {
+ b.deck.reset()
+
+ // The last frame might be truncated. Recover lost inline frames.
+ stk = runtime_expandFinalInlineFrame(stk)
+
+ for len(stk) > 0 {
+ addr := stk[0]
+ if l, ok := b.locs[addr]; ok {
+ // first record the location if there is any pending accumulated info.
+ if id := b.emitLocation(); id > 0 {
+ locs = append(locs, id)
+ }
+
+ // then, record the cached location.
+ locs = append(locs, l.id)
+
+ // Skip the matching pcs.
+ //
+ // Even if stk was truncated due to the stack depth
+ // limit, expandFinalInlineFrame above has already
+ // fixed the truncation, ensuring it is long enough.
+ stk = stk[len(l.pcs):]
+ continue
+ }
+
+ frames, symbolizeResult := allFrames(addr)
+ if len(frames) == 0 { // runtime.goexit.
+ if id := b.emitLocation(); id > 0 {
+ locs = append(locs, id)
+ }
+ stk = stk[1:]
+ continue
+ }
+
+ if added := b.deck.tryAdd(addr, frames, symbolizeResult); added {
+ stk = stk[1:]
+ continue
+ }
+ // add failed because this addr is not inlined with the
+ // existing PCs in the deck. Flush the deck and retry handling
+ // this pc.
+ if id := b.emitLocation(); id > 0 {
+ locs = append(locs, id)
+ }
+
+ // check cache again - previous emitLocation added a new entry
+ if l, ok := b.locs[addr]; ok {
+ locs = append(locs, l.id)
+ stk = stk[len(l.pcs):] // skip the matching pcs.
+ } else {
+ b.deck.tryAdd(addr, frames, symbolizeResult) // must succeed.
+ stk = stk[1:]
+ }
+ }
+ if id := b.emitLocation(); id > 0 { // emit remaining location.
+ locs = append(locs, id)
+ }
+ return locs
+}
+
+// pcDeck is a helper to detect a sequence of inlined functions from
+// a stack trace returned by the runtime.
+//
+// The stack traces returned by runtime's trackback functions are fully
+// expanded (at least for Go functions) and include the fake pcs representing
+// inlined functions. The profile proto expects the inlined functions to be
+// encoded in one Location message.
+// https://github.com/google/pprof/blob/5e965273ee43930341d897407202dd5e10e952cb/proto/profile.proto#L177-L184
+//
+// Runtime does not directly expose whether a frame is for an inlined function
+// and looking up debug info is not ideal, so we use a heuristic to filter
+// the fake pcs and restore the inlined and entry functions. Inlined functions
+// have the following properties:
+// Frame's Func is nil (note: also true for non-Go functions), and
+// Frame's Entry matches its entry function frame's Entry (note: could also be true for recursive calls and non-Go functions), and
+// Frame's Name does not match its entry function frame's name (note: inlined functions cannot be directly recursive).
+//
+// As reading and processing the pcs in a stack trace one by one (from leaf to the root),
+// we use pcDeck to temporarily hold the observed pcs and their expanded frames
+// until we observe the entry function frame.
+type pcDeck struct {
+ pcs []uintptr
+ frames []runtime.Frame
+ symbolizeResult symbolizeFlag
+}
+
+func (d *pcDeck) reset() {
+ d.pcs = d.pcs[:0]
+ d.frames = d.frames[:0]
+ d.symbolizeResult = 0
+}
+
+// tryAdd tries to add the pc and Frames expanded from it (most likely one,
+// since the stack trace is already fully expanded) and the symbolizeResult
+// to the deck. If it fails the caller needs to flush the deck and retry.
+func (d *pcDeck) tryAdd(pc uintptr, frames []runtime.Frame, symbolizeResult symbolizeFlag) (success bool) {
+ if existing := len(d.pcs); existing > 0 {
+ // 'd.frames' are all expanded from one 'pc' and represent all
+ // inlined functions so we check only the last one.
+ newFrame := frames[0]
+ last := d.frames[existing-1]
+ if last.Func != nil { // the last frame can't be inlined. Flush.
+ return false
+ }
+ if last.Entry == 0 || newFrame.Entry == 0 { // Possibly not a Go function. Don't try to merge.
+ return false
+ }
+
+ if last.Entry != newFrame.Entry { // newFrame is for a different function.
+ return false
+ }
+ if last.Function == newFrame.Function { // maybe recursion.
+ return false
+ }
+ }
+ d.pcs = append(d.pcs, pc)
+ d.frames = append(d.frames, frames...)
+ d.symbolizeResult |= symbolizeResult
+ return true
+}
+
+// emitLocation emits the new location and function information recorded in the deck
+// and returns the location ID encoded in the profile protobuf.
+// It emits to b.pb, so there must be no message encoding in progress.
+// It resets the deck.
+func (b *profileBuilder) emitLocation() uint64 {
+ if len(b.deck.pcs) == 0 {
+ return 0
+ }
+ defer b.deck.reset()
+
+ addr := b.deck.pcs[0]
+ firstFrame := b.deck.frames[0]
+
+ // We can't write out functions while in the middle of the
+ // Location message, so record new functions we encounter and
+ // write them out after the Location.
+ type newFunc struct {
+ id uint64
+ name, file string
+ }
+ newFuncs := make([]newFunc, 0, 8)
+
+ id := uint64(len(b.locs)) + 1
+ b.locs[addr] = locInfo{id: id, pcs: append([]uintptr{}, b.deck.pcs...)}
+
+ start := b.pb.startMessage()
+ b.pb.uint64Opt(tagLocation_ID, id)
+ b.pb.uint64Opt(tagLocation_Address, uint64(firstFrame.PC))
+ for _, frame := range b.deck.frames {
+ // Write out each line in frame expansion.
+ funcID := uint64(b.funcs[frame.Function])
+ if funcID == 0 {
+ funcID = uint64(len(b.funcs)) + 1
+ b.funcs[frame.Function] = int(funcID)
+ newFuncs = append(newFuncs, newFunc{funcID, frame.Function, frame.File})
+ }
+ b.pbLine(tagLocation_Line, funcID, int64(frame.Line))
+ }
+ for i := range b.mem {
+ if b.mem[i].start <= addr && addr < b.mem[i].end || b.mem[i].fake {
+ b.pb.uint64Opt(tagLocation_MappingID, uint64(i+1))
+
+ m := b.mem[i]
+ m.funcs |= b.deck.symbolizeResult
+ b.mem[i] = m
+ break
+ }
+ }
+ b.pb.endMessage(tagProfile_Location, start)
+
+ // Write out functions we found during frame expansion.
+ for _, fn := range newFuncs {
+ start := b.pb.startMessage()
+ b.pb.uint64Opt(tagFunction_ID, fn.id)
+ b.pb.int64Opt(tagFunction_Name, b.stringIndex(fn.name))
+ b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(fn.name))
+ b.pb.int64Opt(tagFunction_Filename, b.stringIndex(fn.file))
+ b.pb.endMessage(tagProfile_Function, start)
+ }
+
+ b.flush()
+ return id
+}
+
// readMapping reads /proc/self/maps and writes mappings to b.pb.
// It saves the address ranges of the mappings in b.mem for use
// when emitting locations.
diff --git a/src/runtime/pprof/proto_test.go b/src/runtime/pprof/proto_test.go
index bcb4d33..3043d53 100644
--- a/src/runtime/pprof/proto_test.go
+++ b/src/runtime/pprof/proto_test.go
@@ -8,13 +8,13 @@
"bytes"
"encoding/json"
"fmt"
+ "internal/profile"
"internal/testenv"
"io/ioutil"
"os"
"os/exec"
"reflect"
"runtime"
- "runtime/pprof/internal/profile"
"strings"
"testing"
)
@@ -116,9 +116,9 @@
b := []uint64{
3, 0, 500, // hz = 500
- 5, 0, 10, uint64(addr1), uint64(addr1 + 2), // 10 samples in addr1
- 5, 0, 40, uint64(addr2), uint64(addr2 + 2), // 40 samples in addr2
- 5, 0, 10, uint64(addr1), uint64(addr1 + 2), // 10 samples in addr1
+ 5, 0, 10, uint64(addr1 + 1), uint64(addr1 + 2), // 10 samples in addr1
+ 5, 0, 40, uint64(addr2 + 1), uint64(addr2 + 2), // 40 samples in addr2
+ 5, 0, 10, uint64(addr1 + 1), uint64(addr1 + 2), // 10 samples in addr1
}
p, err := translateCPUProfile(b)
if err != nil {
@@ -358,6 +358,17 @@
continue
}
}
+
+ if traceback == "Go+C" {
+ // The test code was arranged to have PCs from C and
+ // they are not symbolized.
+ // Check no Location containing those unsymbolized PCs contains multiple lines.
+ for i, loc := range prof.Location {
+ if !symbolized(loc) && len(loc.Line) > 1 {
+ t.Errorf("Location[%d] contains unsymbolized PCs and multiple lines: %v", i, loc)
+ }
+ }
+ }
})
}
}
@@ -411,3 +422,16 @@
}
}
}
+
+// Make sure the profiler can handle an empty stack trace.
+// See issue 37967.
+func TestEmptyStack(t *testing.T) {
+ b := []uint64{
+ 3, 0, 500, // hz = 500
+ 3, 0, 10, // 10 samples with an empty stack trace
+ }
+ _, err := translateCPUProfile(b)
+ if err != nil {
+ t.Fatalf("translating profile: %v", err)
+ }
+}
diff --git a/src/runtime/pprof/protomem.go b/src/runtime/pprof/protomem.go
index 1c88aae..fa75a28 100644
--- a/src/runtime/pprof/protomem.go
+++ b/src/runtime/pprof/protomem.go
@@ -27,30 +27,27 @@
values := []int64{0, 0, 0, 0}
var locs []uint64
for _, r := range p {
- locs = locs[:0]
hideRuntime := true
for tries := 0; tries < 2; tries++ {
- for _, addr := range r.Stack() {
- // For heap profiles, all stack
- // addresses are return PCs, which is
- // what locForPC expects.
- if hideRuntime {
+ stk := r.Stack()
+ // For heap profiles, all stack
+ // addresses are return PCs, which is
+ // what appendLocsForStack expects.
+ if hideRuntime {
+ for i, addr := range stk {
if f := runtime.FuncForPC(addr); f != nil && strings.HasPrefix(f.Name(), "runtime.") {
continue
}
// Found non-runtime. Show any runtime uses above it.
- hideRuntime = false
+ stk = stk[i:]
+ break
}
- l := b.locForPC(addr)
- if l == 0 { // runtime.goexit
- continue
- }
- locs = append(locs, l)
}
+ locs = b.appendLocsForStack(locs[:0], stk)
if len(locs) > 0 {
break
}
- hideRuntime = false // try again, and show all frames
+ hideRuntime = false // try again, and show all frames next time.
}
values[0], values[1] = scaleHeapSample(r.AllocObjects, r.AllocBytes, rate)
diff --git a/src/runtime/pprof/protomem_test.go b/src/runtime/pprof/protomem_test.go
index 471b1ae..156f628 100644
--- a/src/runtime/pprof/protomem_test.go
+++ b/src/runtime/pprof/protomem_test.go
@@ -6,8 +6,8 @@
import (
"bytes"
+ "internal/profile"
"runtime"
- "runtime/pprof/internal/profile"
"testing"
)
diff --git a/src/runtime/pprof/runtime.go b/src/runtime/pprof/runtime.go
index b71bbad..dd2545b 100644
--- a/src/runtime/pprof/runtime.go
+++ b/src/runtime/pprof/runtime.go
@@ -9,6 +9,9 @@
"unsafe"
)
+// runtime_expandFinalInlineFrame is defined in runtime/symtab.go.
+func runtime_expandFinalInlineFrame(stk []uintptr) []uintptr
+
// runtime_setProfLabel is defined in runtime/proflabel.go.
func runtime_setProfLabel(labels unsafe.Pointer)
diff --git a/src/runtime/pprof/testdata/mappingtest/main.go b/src/runtime/pprof/testdata/mappingtest/main.go
index 476b9e8..484b7f9 100644
--- a/src/runtime/pprof/testdata/mappingtest/main.go
+++ b/src/runtime/pprof/testdata/mappingtest/main.go
@@ -17,8 +17,7 @@
int cpuHogCSalt1 = 0;
int cpuHogCSalt2 = 0;
-void CPUHogCFunction() {
- int foo = cpuHogCSalt1;
+void CPUHogCFunction0(int foo) {
int i;
for (i = 0; i < 100000; i++) {
if (foo > 0) {
@@ -30,6 +29,10 @@
}
}
+void CPUHogCFunction() {
+ CPUHogCFunction0(cpuHogCSalt1);
+}
+
struct CgoTracebackArg {
uintptr_t context;
uintptr_t sigContext;
@@ -39,8 +42,9 @@
void CollectCgoTraceback(void* parg) {
struct CgoTracebackArg* arg = (struct CgoTracebackArg*)(parg);
- arg->buf[0] = (uintptr_t)(CPUHogCFunction);
- arg->buf[1] = 0;
+ arg->buf[0] = (uintptr_t)(CPUHogCFunction0);
+ arg->buf[1] = (uintptr_t)(CPUHogCFunction);
+ arg->buf[2] = 0;
};
*/
import "C"
@@ -81,7 +85,6 @@
var salt2 int
func cpuHogGoFunction() {
- // Generates CPU profile samples including a Go call path.
for {
foo := salt1
for i := 0; i < 1e5; i++ {
diff --git a/src/runtime/preempt.go b/src/runtime/preempt.go
new file mode 100644
index 0000000..7618565
--- /dev/null
+++ b/src/runtime/preempt.go
@@ -0,0 +1,481 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Goroutine preemption
+//
+// A goroutine can be preempted at any safe-point. Currently, there
+// are a few categories of safe-points:
+//
+// 1. A blocked safe-point occurs for the duration that a goroutine is
+// descheduled, blocked on synchronization, or in a system call.
+//
+// 2. Synchronous safe-points occur when a running goroutine checks
+// for a preemption request.
+//
+// 3. Asynchronous safe-points occur at any instruction in user code
+// where the goroutine can be safely paused and a conservative
+// stack and register scan can find stack roots. The runtime can
+// stop a goroutine at an async safe-point using a signal.
+//
+// At both blocked and synchronous safe-points, a goroutine's CPU
+// state is minimal and the garbage collector has complete information
+// about its entire stack. This makes it possible to deschedule a
+// goroutine with minimal space, and to precisely scan a goroutine's
+// stack.
+//
+// Synchronous safe-points are implemented by overloading the stack
+// bound check in function prologues. To preempt a goroutine at the
+// next synchronous safe-point, the runtime poisons the goroutine's
+// stack bound to a value that will cause the next stack bound check
+// to fail and enter the stack growth implementation, which will
+// detect that it was actually a preemption and redirect to preemption
+// handling.
+//
+// Preemption at asynchronous safe-points is implemented by suspending
+// the thread using an OS mechanism (e.g., signals) and inspecting its
+// state to determine if the goroutine was at an asynchronous
+// safe-point. Since the thread suspension itself is generally
+// asynchronous, it also checks if the running goroutine wants to be
+// preempted, since this could have changed. If all conditions are
+// satisfied, it adjusts the signal context to make it look like the
+// signaled thread just called asyncPreempt and resumes the thread.
+// asyncPreempt spills all registers and enters the scheduler.
+//
+// (An alternative would be to preempt in the signal handler itself.
+// This would let the OS save and restore the register state and the
+// runtime would only need to know how to extract potentially
+// pointer-containing registers from the signal context. However, this
+// would consume an M for every preempted G, and the scheduler itself
+// is not designed to run from a signal handler, as it tends to
+// allocate memory and start threads in the preemption path.)
+
+package runtime
+
+import (
+ "runtime/internal/atomic"
+ "runtime/internal/sys"
+ "unsafe"
+)
+
+// Keep in sync with cmd/compile/internal/gc/plive.go:go115ReduceLiveness.
+const go115ReduceLiveness = true
+
+const go115RestartSeq = go115ReduceLiveness && true // enable restartable sequences
+
+type suspendGState struct {
+ g *g
+
+ // dead indicates the goroutine was not suspended because it
+ // is dead. This goroutine could be reused after the dead
+ // state was observed, so the caller must not assume that it
+ // remains dead.
+ dead bool
+
+ // stopped indicates that this suspendG transitioned the G to
+ // _Gwaiting via g.preemptStop and thus is responsible for
+ // readying it when done.
+ stopped bool
+}
+
+// suspendG suspends goroutine gp at a safe-point and returns the
+// state of the suspended goroutine. The caller gets read access to
+// the goroutine until it calls resumeG.
+//
+// It is safe for multiple callers to attempt to suspend the same
+// goroutine at the same time. The goroutine may execute between
+// subsequent successful suspend operations. The current
+// implementation grants exclusive access to the goroutine, and hence
+// multiple callers will serialize. However, the intent is to grant
+// shared read access, so please don't depend on exclusive access.
+//
+// This must be called from the system stack and the user goroutine on
+// the current M (if any) must be in a preemptible state. This
+// prevents deadlocks where two goroutines attempt to suspend each
+// other and both are in non-preemptible states. There are other ways
+// to resolve this deadlock, but this seems simplest.
+//
+// TODO(austin): What if we instead required this to be called from a
+// user goroutine? Then we could deschedule the goroutine while
+// waiting instead of blocking the thread. If two goroutines tried to
+// suspend each other, one of them would win and the other wouldn't
+// complete the suspend until it was resumed. We would have to be
+// careful that they couldn't actually queue up suspend for each other
+// and then both be suspended. This would also avoid the need for a
+// kernel context switch in the synchronous case because we could just
+// directly schedule the waiter. The context switch is unavoidable in
+// the signal case.
+//
+//go:systemstack
+func suspendG(gp *g) suspendGState {
+ if mp := getg().m; mp.curg != nil && readgstatus(mp.curg) == _Grunning {
+ // Since we're on the system stack of this M, the user
+ // G is stuck at an unsafe point. If another goroutine
+ // were to try to preempt m.curg, it could deadlock.
+ throw("suspendG from non-preemptible goroutine")
+ }
+
+ // See https://golang.org/cl/21503 for justification of the yield delay.
+ const yieldDelay = 10 * 1000
+ var nextYield int64
+
+ // Drive the goroutine to a preemption point.
+ stopped := false
+ var asyncM *m
+ var asyncGen uint32
+ var nextPreemptM int64
+ for i := 0; ; i++ {
+ switch s := readgstatus(gp); s {
+ default:
+ if s&_Gscan != 0 {
+ // Someone else is suspending it. Wait
+ // for them to finish.
+ //
+ // TODO: It would be nicer if we could
+ // coalesce suspends.
+ break
+ }
+
+ dumpgstatus(gp)
+ throw("invalid g status")
+
+ case _Gdead:
+ // Nothing to suspend.
+ //
+ // preemptStop may need to be cleared, but
+ // doing that here could race with goroutine
+ // reuse. Instead, goexit0 clears it.
+ return suspendGState{dead: true}
+
+ case _Gcopystack:
+ // The stack is being copied. We need to wait
+ // until this is done.
+
+ case _Gpreempted:
+ // We (or someone else) suspended the G. Claim
+ // ownership of it by transitioning it to
+ // _Gwaiting.
+ if !casGFromPreempted(gp, _Gpreempted, _Gwaiting) {
+ break
+ }
+
+ // We stopped the G, so we have to ready it later.
+ stopped = true
+
+ s = _Gwaiting
+ fallthrough
+
+ case _Grunnable, _Gsyscall, _Gwaiting:
+ // Claim goroutine by setting scan bit.
+ // This may race with execution or readying of gp.
+ // The scan bit keeps it from transition state.
+ if !castogscanstatus(gp, s, s|_Gscan) {
+ break
+ }
+
+ // Clear the preemption request. It's safe to
+ // reset the stack guard because we hold the
+ // _Gscan bit and thus own the stack.
+ gp.preemptStop = false
+ gp.preempt = false
+ gp.stackguard0 = gp.stack.lo + _StackGuard
+
+ // The goroutine was already at a safe-point
+ // and we've now locked that in.
+ //
+ // TODO: It would be much better if we didn't
+ // leave it in _Gscan, but instead gently
+ // prevented its scheduling until resumption.
+ // Maybe we only use this to bump a suspended
+ // count and the scheduler skips suspended
+ // goroutines? That wouldn't be enough for
+ // {_Gsyscall,_Gwaiting} -> _Grunning. Maybe
+ // for all those transitions we need to check
+ // suspended and deschedule?
+ return suspendGState{g: gp, stopped: stopped}
+
+ case _Grunning:
+ // Optimization: if there is already a pending preemption request
+ // (from the previous loop iteration), don't bother with the atomics.
+ if gp.preemptStop && gp.preempt && gp.stackguard0 == stackPreempt && asyncM == gp.m && atomic.Load(&asyncM.preemptGen) == asyncGen {
+ break
+ }
+
+ // Temporarily block state transitions.
+ if !castogscanstatus(gp, _Grunning, _Gscanrunning) {
+ break
+ }
+
+ // Request synchronous preemption.
+ gp.preemptStop = true
+ gp.preempt = true
+ gp.stackguard0 = stackPreempt
+
+ // Prepare for asynchronous preemption.
+ asyncM2 := gp.m
+ asyncGen2 := atomic.Load(&asyncM2.preemptGen)
+ needAsync := asyncM != asyncM2 || asyncGen != asyncGen2
+ asyncM = asyncM2
+ asyncGen = asyncGen2
+
+ casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
+
+ // Send asynchronous preemption. We do this
+ // after CASing the G back to _Grunning
+ // because preemptM may be synchronous and we
+ // don't want to catch the G just spinning on
+ // its status.
+ if preemptMSupported && debug.asyncpreemptoff == 0 && needAsync {
+ // Rate limit preemptM calls. This is
+ // particularly important on Windows
+ // where preemptM is actually
+ // synchronous and the spin loop here
+ // can lead to live-lock.
+ now := nanotime()
+ if now >= nextPreemptM {
+ nextPreemptM = now + yieldDelay/2
+ preemptM(asyncM)
+ }
+ }
+ }
+
+ // TODO: Don't busy wait. This loop should really only
+ // be a simple read/decide/CAS loop that only fails if
+ // there's an active race. Once the CAS succeeds, we
+ // should queue up the preemption (which will require
+ // it to be reliable in the _Grunning case, not
+ // best-effort) and then sleep until we're notified
+ // that the goroutine is suspended.
+ if i == 0 {
+ nextYield = nanotime() + yieldDelay
+ }
+ if nanotime() < nextYield {
+ procyield(10)
+ } else {
+ osyield()
+ nextYield = nanotime() + yieldDelay/2
+ }
+ }
+}
+
+// resumeG undoes the effects of suspendG, allowing the suspended
+// goroutine to continue from its current safe-point.
+func resumeG(state suspendGState) {
+ if state.dead {
+ // We didn't actually stop anything.
+ return
+ }
+
+ gp := state.g
+ switch s := readgstatus(gp); s {
+ default:
+ dumpgstatus(gp)
+ throw("unexpected g status")
+
+ case _Grunnable | _Gscan,
+ _Gwaiting | _Gscan,
+ _Gsyscall | _Gscan:
+ casfrom_Gscanstatus(gp, s, s&^_Gscan)
+ }
+
+ if state.stopped {
+ // We stopped it, so we need to re-schedule it.
+ ready(gp, 0, true)
+ }
+}
+
+// canPreemptM reports whether mp is in a state that is safe to preempt.
+//
+// It is nosplit because it has nosplit callers.
+//
+//go:nosplit
+func canPreemptM(mp *m) bool {
+ return mp.locks == 0 && mp.mallocing == 0 && mp.preemptoff == "" && mp.p.ptr().status == _Prunning
+}
+
+//go:generate go run mkpreempt.go
+
+// asyncPreempt saves all user registers and calls asyncPreempt2.
+//
+// When stack scanning encounters an asyncPreempt frame, it scans that
+// frame and its parent frame conservatively.
+//
+// asyncPreempt is implemented in assembly.
+func asyncPreempt()
+
+//go:nosplit
+func asyncPreempt2() {
+ gp := getg()
+ gp.asyncSafePoint = true
+ if gp.preemptStop {
+ mcall(preemptPark)
+ } else {
+ mcall(gopreempt_m)
+ }
+ gp.asyncSafePoint = false
+}
+
+// asyncPreemptStack is the bytes of stack space required to inject an
+// asyncPreempt call.
+var asyncPreemptStack = ^uintptr(0)
+
+func init() {
+ f := findfunc(funcPC(asyncPreempt))
+ total := funcMaxSPDelta(f)
+ f = findfunc(funcPC(asyncPreempt2))
+ total += funcMaxSPDelta(f)
+ // Add some overhead for return PCs, etc.
+ asyncPreemptStack = uintptr(total) + 8*sys.PtrSize
+ if asyncPreemptStack > _StackLimit {
+ // We need more than the nosplit limit. This isn't
+ // unsafe, but it may limit asynchronous preemption.
+ //
+ // This may be a problem if we start using more
+ // registers. In that case, we should store registers
+ // in a context object. If we pre-allocate one per P,
+ // asyncPreempt can spill just a few registers to the
+ // stack, then grab its context object and spill into
+ // it. When it enters the runtime, it would allocate a
+ // new context for the P.
+ print("runtime: asyncPreemptStack=", asyncPreemptStack, "\n")
+ throw("async stack too large")
+ }
+}
+
+// wantAsyncPreempt returns whether an asynchronous preemption is
+// queued for gp.
+func wantAsyncPreempt(gp *g) bool {
+ // Check both the G and the P.
+ return (gp.preempt || gp.m.p != 0 && gp.m.p.ptr().preempt) && readgstatus(gp)&^_Gscan == _Grunning
+}
+
+// isAsyncSafePoint reports whether gp at instruction PC is an
+// asynchronous safe point. This indicates that:
+//
+// 1. It's safe to suspend gp and conservatively scan its stack and
+// registers. There are no potentially hidden pointer values and it's
+// not in the middle of an atomic sequence like a write barrier.
+//
+// 2. gp has enough stack space to inject the asyncPreempt call.
+//
+// 3. It's generally safe to interact with the runtime, even if we're
+// in a signal handler stopped here. For example, there are no runtime
+// locks held, so acquiring a runtime lock won't self-deadlock.
+//
+// In some cases the PC is safe for asynchronous preemption but it
+// also needs to adjust the resumption PC. The new PC is returned in
+// the second result.
+func isAsyncSafePoint(gp *g, pc, sp, lr uintptr) (bool, uintptr) {
+ mp := gp.m
+
+ // Only user Gs can have safe-points. We check this first
+ // because it's extremely common that we'll catch mp in the
+ // scheduler processing this G preemption.
+ if mp.curg != gp {
+ return false, 0
+ }
+
+ // Check M state.
+ if mp.p == 0 || !canPreemptM(mp) {
+ return false, 0
+ }
+
+ // Check stack space.
+ if sp < gp.stack.lo || sp-gp.stack.lo < asyncPreemptStack {
+ return false, 0
+ }
+
+ // Check if PC is an unsafe-point.
+ f := findfunc(pc)
+ if !f.valid() {
+ // Not Go code.
+ return false, 0
+ }
+ if (GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "mips64" || GOARCH == "mips64le") && lr == pc+8 && funcspdelta(f, pc, nil) == 0 {
+ // We probably stopped at a half-executed CALL instruction,
+ // where the LR is updated but the PC has not. If we preempt
+ // here we'll see a seemingly self-recursive call, which is in
+ // fact not.
+ // This is normally ok, as we use the return address saved on
+ // stack for unwinding, not the LR value. But if this is a
+ // call to morestack, we haven't created the frame, and we'll
+ // use the LR for unwinding, which will be bad.
+ return false, 0
+ }
+ var up int32
+ var startpc uintptr
+ if !go115ReduceLiveness {
+ smi := pcdatavalue(f, _PCDATA_RegMapIndex, pc, nil)
+ if smi == -2 {
+ // Unsafe-point marked by compiler. This includes
+ // atomic sequences (e.g., write barrier) and nosplit
+ // functions (except at calls).
+ return false, 0
+ }
+ } else {
+ up, startpc = pcdatavalue2(f, _PCDATA_UnsafePoint, pc)
+ if up != _PCDATA_UnsafePointSafe {
+ // Unsafe-point marked by compiler. This includes
+ // atomic sequences (e.g., write barrier) and nosplit
+ // functions (except at calls).
+ return false, 0
+ }
+ }
+ if fd := funcdata(f, _FUNCDATA_LocalsPointerMaps); fd == nil || fd == unsafe.Pointer(&no_pointers_stackmap) {
+ // This is assembly code. Don't assume it's
+ // well-formed. We identify assembly code by
+ // checking that it has either no stack map, or
+ // no_pointers_stackmap, which is the stack map
+ // for ones marked as NO_LOCAL_POINTERS.
+ //
+ // TODO: Are there cases that are safe but don't have a
+ // locals pointer map, like empty frame functions?
+ return false, 0
+ }
+ name := funcname(f)
+ if inldata := funcdata(f, _FUNCDATA_InlTree); inldata != nil {
+ inltree := (*[1 << 20]inlinedCall)(inldata)
+ ix := pcdatavalue(f, _PCDATA_InlTreeIndex, pc, nil)
+ if ix >= 0 {
+ name = funcnameFromNameoff(f, inltree[ix].func_)
+ }
+ }
+ if hasPrefix(name, "runtime.") ||
+ hasPrefix(name, "runtime/internal/") ||
+ hasPrefix(name, "reflect.") {
+ // For now we never async preempt the runtime or
+ // anything closely tied to the runtime. Known issues
+ // include: various points in the scheduler ("don't
+ // preempt between here and here"), much of the defer
+ // implementation (untyped info on stack), bulk write
+ // barriers (write barrier check),
+ // reflect.{makeFuncStub,methodValueCall}.
+ //
+ // TODO(austin): We should improve this, or opt things
+ // in incrementally.
+ return false, 0
+ }
+ if go115RestartSeq {
+ switch up {
+ case _PCDATA_Restart1, _PCDATA_Restart2:
+ // Restartable instruction sequence. Back off PC to
+ // the start PC.
+ if startpc == 0 || startpc > pc || pc-startpc > 20 {
+ throw("bad restart PC")
+ }
+ return true, startpc
+ case _PCDATA_RestartAtEntry:
+ // Restart from the function entry at resumption.
+ return true, f.entry
+ }
+ } else {
+ switch up {
+ case _PCDATA_Restart1, _PCDATA_Restart2, _PCDATA_RestartAtEntry:
+ // go115RestartSeq is not enabled. Treat it as unsafe point.
+ return false, 0
+ }
+ }
+ return true, pc
+}
+
+var no_pointers_stackmap uint64 // defined in assembly, for NO_LOCAL_POINTERS macro
diff --git a/src/runtime/preempt_386.s b/src/runtime/preempt_386.s
new file mode 100644
index 0000000..a00ac8f
--- /dev/null
+++ b/src/runtime/preempt_386.s
@@ -0,0 +1,52 @@
+// Code generated by mkpreempt.go; DO NOT EDIT.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
+ PUSHFL
+ ADJSP $264
+ NOP SP
+ MOVL AX, 0(SP)
+ MOVL CX, 4(SP)
+ MOVL DX, 8(SP)
+ MOVL BX, 12(SP)
+ MOVL BP, 16(SP)
+ MOVL SI, 20(SP)
+ MOVL DI, 24(SP)
+ FSAVE 28(SP)
+ FLDCW runtime·controlWord64(SB)
+ CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
+ JNE nosse
+ MOVUPS X0, 136(SP)
+ MOVUPS X1, 152(SP)
+ MOVUPS X2, 168(SP)
+ MOVUPS X3, 184(SP)
+ MOVUPS X4, 200(SP)
+ MOVUPS X5, 216(SP)
+ MOVUPS X6, 232(SP)
+ MOVUPS X7, 248(SP)
+nosse:
+ CALL ·asyncPreempt2(SB)
+ CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
+ JNE nosse2
+ MOVUPS 248(SP), X7
+ MOVUPS 232(SP), X6
+ MOVUPS 216(SP), X5
+ MOVUPS 200(SP), X4
+ MOVUPS 184(SP), X3
+ MOVUPS 168(SP), X2
+ MOVUPS 152(SP), X1
+ MOVUPS 136(SP), X0
+nosse2:
+ FRSTOR 28(SP)
+ MOVL 24(SP), DI
+ MOVL 20(SP), SI
+ MOVL 16(SP), BP
+ MOVL 12(SP), BX
+ MOVL 8(SP), DX
+ MOVL 4(SP), CX
+ MOVL 0(SP), AX
+ ADJSP $-264
+ POPFL
+ RET
diff --git a/src/runtime/preempt_amd64.s b/src/runtime/preempt_amd64.s
new file mode 100644
index 0000000..4765e9f
--- /dev/null
+++ b/src/runtime/preempt_amd64.s
@@ -0,0 +1,84 @@
+// Code generated by mkpreempt.go; DO NOT EDIT.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
+ PUSHQ BP
+ MOVQ SP, BP
+ // Save flags before clobbering them
+ PUSHFQ
+ // obj doesn't understand ADD/SUB on SP, but does understand ADJSP
+ ADJSP $368
+ // But vet doesn't know ADJSP, so suppress vet stack checking
+ NOP SP
+ #ifdef GOOS_darwin
+ CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0
+ JE 2(PC)
+ VZEROUPPER
+ #endif
+ MOVQ AX, 0(SP)
+ MOVQ CX, 8(SP)
+ MOVQ DX, 16(SP)
+ MOVQ BX, 24(SP)
+ MOVQ SI, 32(SP)
+ MOVQ DI, 40(SP)
+ MOVQ R8, 48(SP)
+ MOVQ R9, 56(SP)
+ MOVQ R10, 64(SP)
+ MOVQ R11, 72(SP)
+ MOVQ R12, 80(SP)
+ MOVQ R13, 88(SP)
+ MOVQ R14, 96(SP)
+ MOVQ R15, 104(SP)
+ MOVUPS X0, 112(SP)
+ MOVUPS X1, 128(SP)
+ MOVUPS X2, 144(SP)
+ MOVUPS X3, 160(SP)
+ MOVUPS X4, 176(SP)
+ MOVUPS X5, 192(SP)
+ MOVUPS X6, 208(SP)
+ MOVUPS X7, 224(SP)
+ MOVUPS X8, 240(SP)
+ MOVUPS X9, 256(SP)
+ MOVUPS X10, 272(SP)
+ MOVUPS X11, 288(SP)
+ MOVUPS X12, 304(SP)
+ MOVUPS X13, 320(SP)
+ MOVUPS X14, 336(SP)
+ MOVUPS X15, 352(SP)
+ CALL ·asyncPreempt2(SB)
+ MOVUPS 352(SP), X15
+ MOVUPS 336(SP), X14
+ MOVUPS 320(SP), X13
+ MOVUPS 304(SP), X12
+ MOVUPS 288(SP), X11
+ MOVUPS 272(SP), X10
+ MOVUPS 256(SP), X9
+ MOVUPS 240(SP), X8
+ MOVUPS 224(SP), X7
+ MOVUPS 208(SP), X6
+ MOVUPS 192(SP), X5
+ MOVUPS 176(SP), X4
+ MOVUPS 160(SP), X3
+ MOVUPS 144(SP), X2
+ MOVUPS 128(SP), X1
+ MOVUPS 112(SP), X0
+ MOVQ 104(SP), R15
+ MOVQ 96(SP), R14
+ MOVQ 88(SP), R13
+ MOVQ 80(SP), R12
+ MOVQ 72(SP), R11
+ MOVQ 64(SP), R10
+ MOVQ 56(SP), R9
+ MOVQ 48(SP), R8
+ MOVQ 40(SP), DI
+ MOVQ 32(SP), SI
+ MOVQ 24(SP), BX
+ MOVQ 16(SP), DX
+ MOVQ 8(SP), CX
+ MOVQ 0(SP), AX
+ ADJSP $-368
+ POPFQ
+ POPQ BP
+ RET
diff --git a/src/runtime/preempt_arm.s b/src/runtime/preempt_arm.s
new file mode 100644
index 0000000..8f243c0
--- /dev/null
+++ b/src/runtime/preempt_arm.s
@@ -0,0 +1,83 @@
+// Code generated by mkpreempt.go; DO NOT EDIT.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
+ MOVW.W R14, -188(R13)
+ MOVW R0, 4(R13)
+ MOVW R1, 8(R13)
+ MOVW R2, 12(R13)
+ MOVW R3, 16(R13)
+ MOVW R4, 20(R13)
+ MOVW R5, 24(R13)
+ MOVW R6, 28(R13)
+ MOVW R7, 32(R13)
+ MOVW R8, 36(R13)
+ MOVW R9, 40(R13)
+ MOVW R11, 44(R13)
+ MOVW R12, 48(R13)
+ MOVW CPSR, R0
+ MOVW R0, 52(R13)
+ MOVB ·goarm(SB), R0
+ CMP $6, R0
+ BLT nofp
+ MOVW FPCR, R0
+ MOVW R0, 56(R13)
+ MOVD F0, 60(R13)
+ MOVD F1, 68(R13)
+ MOVD F2, 76(R13)
+ MOVD F3, 84(R13)
+ MOVD F4, 92(R13)
+ MOVD F5, 100(R13)
+ MOVD F6, 108(R13)
+ MOVD F7, 116(R13)
+ MOVD F8, 124(R13)
+ MOVD F9, 132(R13)
+ MOVD F10, 140(R13)
+ MOVD F11, 148(R13)
+ MOVD F12, 156(R13)
+ MOVD F13, 164(R13)
+ MOVD F14, 172(R13)
+ MOVD F15, 180(R13)
+nofp:
+ CALL ·asyncPreempt2(SB)
+ MOVB ·goarm(SB), R0
+ CMP $6, R0
+ BLT nofp2
+ MOVD 180(R13), F15
+ MOVD 172(R13), F14
+ MOVD 164(R13), F13
+ MOVD 156(R13), F12
+ MOVD 148(R13), F11
+ MOVD 140(R13), F10
+ MOVD 132(R13), F9
+ MOVD 124(R13), F8
+ MOVD 116(R13), F7
+ MOVD 108(R13), F6
+ MOVD 100(R13), F5
+ MOVD 92(R13), F4
+ MOVD 84(R13), F3
+ MOVD 76(R13), F2
+ MOVD 68(R13), F1
+ MOVD 60(R13), F0
+ MOVW 56(R13), R0
+ MOVW R0, FPCR
+nofp2:
+ MOVW 52(R13), R0
+ MOVW R0, CPSR
+ MOVW 48(R13), R12
+ MOVW 44(R13), R11
+ MOVW 40(R13), R9
+ MOVW 36(R13), R8
+ MOVW 32(R13), R7
+ MOVW 28(R13), R6
+ MOVW 24(R13), R5
+ MOVW 20(R13), R4
+ MOVW 16(R13), R3
+ MOVW 12(R13), R2
+ MOVW 8(R13), R1
+ MOVW 4(R13), R0
+ MOVW 188(R13), R14
+ MOVW.P 192(R13), R15
+ UNDEF
diff --git a/src/runtime/preempt_arm64.s b/src/runtime/preempt_arm64.s
new file mode 100644
index 0000000..3c27b52
--- /dev/null
+++ b/src/runtime/preempt_arm64.s
@@ -0,0 +1,147 @@
+// Code generated by mkpreempt.go; DO NOT EDIT.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
+ MOVD R30, -496(RSP)
+ SUB $496, RSP
+ #ifdef GOOS_linux
+ MOVD R29, -8(RSP)
+ SUB $8, RSP, R29
+ #endif
+ #ifdef GOOS_darwin
+ MOVD R30, (RSP)
+ #endif
+ MOVD R0, 8(RSP)
+ MOVD R1, 16(RSP)
+ MOVD R2, 24(RSP)
+ MOVD R3, 32(RSP)
+ MOVD R4, 40(RSP)
+ MOVD R5, 48(RSP)
+ MOVD R6, 56(RSP)
+ MOVD R7, 64(RSP)
+ MOVD R8, 72(RSP)
+ MOVD R9, 80(RSP)
+ MOVD R10, 88(RSP)
+ MOVD R11, 96(RSP)
+ MOVD R12, 104(RSP)
+ MOVD R13, 112(RSP)
+ MOVD R14, 120(RSP)
+ MOVD R15, 128(RSP)
+ MOVD R16, 136(RSP)
+ MOVD R17, 144(RSP)
+ MOVD R19, 152(RSP)
+ MOVD R20, 160(RSP)
+ MOVD R21, 168(RSP)
+ MOVD R22, 176(RSP)
+ MOVD R23, 184(RSP)
+ MOVD R24, 192(RSP)
+ MOVD R25, 200(RSP)
+ MOVD R26, 208(RSP)
+ MOVD NZCV, R0
+ MOVD R0, 216(RSP)
+ MOVD FPSR, R0
+ MOVD R0, 224(RSP)
+ FMOVD F0, 232(RSP)
+ FMOVD F1, 240(RSP)
+ FMOVD F2, 248(RSP)
+ FMOVD F3, 256(RSP)
+ FMOVD F4, 264(RSP)
+ FMOVD F5, 272(RSP)
+ FMOVD F6, 280(RSP)
+ FMOVD F7, 288(RSP)
+ FMOVD F8, 296(RSP)
+ FMOVD F9, 304(RSP)
+ FMOVD F10, 312(RSP)
+ FMOVD F11, 320(RSP)
+ FMOVD F12, 328(RSP)
+ FMOVD F13, 336(RSP)
+ FMOVD F14, 344(RSP)
+ FMOVD F15, 352(RSP)
+ FMOVD F16, 360(RSP)
+ FMOVD F17, 368(RSP)
+ FMOVD F18, 376(RSP)
+ FMOVD F19, 384(RSP)
+ FMOVD F20, 392(RSP)
+ FMOVD F21, 400(RSP)
+ FMOVD F22, 408(RSP)
+ FMOVD F23, 416(RSP)
+ FMOVD F24, 424(RSP)
+ FMOVD F25, 432(RSP)
+ FMOVD F26, 440(RSP)
+ FMOVD F27, 448(RSP)
+ FMOVD F28, 456(RSP)
+ FMOVD F29, 464(RSP)
+ FMOVD F30, 472(RSP)
+ FMOVD F31, 480(RSP)
+ CALL ·asyncPreempt2(SB)
+ FMOVD 480(RSP), F31
+ FMOVD 472(RSP), F30
+ FMOVD 464(RSP), F29
+ FMOVD 456(RSP), F28
+ FMOVD 448(RSP), F27
+ FMOVD 440(RSP), F26
+ FMOVD 432(RSP), F25
+ FMOVD 424(RSP), F24
+ FMOVD 416(RSP), F23
+ FMOVD 408(RSP), F22
+ FMOVD 400(RSP), F21
+ FMOVD 392(RSP), F20
+ FMOVD 384(RSP), F19
+ FMOVD 376(RSP), F18
+ FMOVD 368(RSP), F17
+ FMOVD 360(RSP), F16
+ FMOVD 352(RSP), F15
+ FMOVD 344(RSP), F14
+ FMOVD 336(RSP), F13
+ FMOVD 328(RSP), F12
+ FMOVD 320(RSP), F11
+ FMOVD 312(RSP), F10
+ FMOVD 304(RSP), F9
+ FMOVD 296(RSP), F8
+ FMOVD 288(RSP), F7
+ FMOVD 280(RSP), F6
+ FMOVD 272(RSP), F5
+ FMOVD 264(RSP), F4
+ FMOVD 256(RSP), F3
+ FMOVD 248(RSP), F2
+ FMOVD 240(RSP), F1
+ FMOVD 232(RSP), F0
+ MOVD 224(RSP), R0
+ MOVD R0, FPSR
+ MOVD 216(RSP), R0
+ MOVD R0, NZCV
+ MOVD 208(RSP), R26
+ MOVD 200(RSP), R25
+ MOVD 192(RSP), R24
+ MOVD 184(RSP), R23
+ MOVD 176(RSP), R22
+ MOVD 168(RSP), R21
+ MOVD 160(RSP), R20
+ MOVD 152(RSP), R19
+ MOVD 144(RSP), R17
+ MOVD 136(RSP), R16
+ MOVD 128(RSP), R15
+ MOVD 120(RSP), R14
+ MOVD 112(RSP), R13
+ MOVD 104(RSP), R12
+ MOVD 96(RSP), R11
+ MOVD 88(RSP), R10
+ MOVD 80(RSP), R9
+ MOVD 72(RSP), R8
+ MOVD 64(RSP), R7
+ MOVD 56(RSP), R6
+ MOVD 48(RSP), R5
+ MOVD 40(RSP), R4
+ MOVD 32(RSP), R3
+ MOVD 24(RSP), R2
+ MOVD 16(RSP), R1
+ MOVD 8(RSP), R0
+ MOVD 496(RSP), R30
+ #ifdef GOOS_linux
+ MOVD -8(RSP), R29
+ #endif
+ MOVD (RSP), R27
+ ADD $512, RSP
+ JMP (R27)
diff --git a/src/runtime/preempt_mips64x.s b/src/runtime/preempt_mips64x.s
new file mode 100644
index 0000000..1e123e8
--- /dev/null
+++ b/src/runtime/preempt_mips64x.s
@@ -0,0 +1,145 @@
+// Code generated by mkpreempt.go; DO NOT EDIT.
+
+// +build mips64 mips64le
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
+ MOVV R31, -488(R29)
+ SUBV $488, R29
+ MOVV R1, 8(R29)
+ MOVV R2, 16(R29)
+ MOVV R3, 24(R29)
+ MOVV R4, 32(R29)
+ MOVV R5, 40(R29)
+ MOVV R6, 48(R29)
+ MOVV R7, 56(R29)
+ MOVV R8, 64(R29)
+ MOVV R9, 72(R29)
+ MOVV R10, 80(R29)
+ MOVV R11, 88(R29)
+ MOVV R12, 96(R29)
+ MOVV R13, 104(R29)
+ MOVV R14, 112(R29)
+ MOVV R15, 120(R29)
+ MOVV R16, 128(R29)
+ MOVV R17, 136(R29)
+ MOVV R18, 144(R29)
+ MOVV R19, 152(R29)
+ MOVV R20, 160(R29)
+ MOVV R21, 168(R29)
+ MOVV R22, 176(R29)
+ MOVV R24, 184(R29)
+ MOVV R25, 192(R29)
+ MOVV RSB, 200(R29)
+ MOVV HI, R1
+ MOVV R1, 208(R29)
+ MOVV LO, R1
+ MOVV R1, 216(R29)
+ #ifndef GOMIPS64_softfloat
+ MOVV FCR31, R1
+ MOVV R1, 224(R29)
+ MOVD F0, 232(R29)
+ MOVD F1, 240(R29)
+ MOVD F2, 248(R29)
+ MOVD F3, 256(R29)
+ MOVD F4, 264(R29)
+ MOVD F5, 272(R29)
+ MOVD F6, 280(R29)
+ MOVD F7, 288(R29)
+ MOVD F8, 296(R29)
+ MOVD F9, 304(R29)
+ MOVD F10, 312(R29)
+ MOVD F11, 320(R29)
+ MOVD F12, 328(R29)
+ MOVD F13, 336(R29)
+ MOVD F14, 344(R29)
+ MOVD F15, 352(R29)
+ MOVD F16, 360(R29)
+ MOVD F17, 368(R29)
+ MOVD F18, 376(R29)
+ MOVD F19, 384(R29)
+ MOVD F20, 392(R29)
+ MOVD F21, 400(R29)
+ MOVD F22, 408(R29)
+ MOVD F23, 416(R29)
+ MOVD F24, 424(R29)
+ MOVD F25, 432(R29)
+ MOVD F26, 440(R29)
+ MOVD F27, 448(R29)
+ MOVD F28, 456(R29)
+ MOVD F29, 464(R29)
+ MOVD F30, 472(R29)
+ MOVD F31, 480(R29)
+ #endif
+ CALL ·asyncPreempt2(SB)
+ #ifndef GOMIPS64_softfloat
+ MOVD 480(R29), F31
+ MOVD 472(R29), F30
+ MOVD 464(R29), F29
+ MOVD 456(R29), F28
+ MOVD 448(R29), F27
+ MOVD 440(R29), F26
+ MOVD 432(R29), F25
+ MOVD 424(R29), F24
+ MOVD 416(R29), F23
+ MOVD 408(R29), F22
+ MOVD 400(R29), F21
+ MOVD 392(R29), F20
+ MOVD 384(R29), F19
+ MOVD 376(R29), F18
+ MOVD 368(R29), F17
+ MOVD 360(R29), F16
+ MOVD 352(R29), F15
+ MOVD 344(R29), F14
+ MOVD 336(R29), F13
+ MOVD 328(R29), F12
+ MOVD 320(R29), F11
+ MOVD 312(R29), F10
+ MOVD 304(R29), F9
+ MOVD 296(R29), F8
+ MOVD 288(R29), F7
+ MOVD 280(R29), F6
+ MOVD 272(R29), F5
+ MOVD 264(R29), F4
+ MOVD 256(R29), F3
+ MOVD 248(R29), F2
+ MOVD 240(R29), F1
+ MOVD 232(R29), F0
+ MOVV 224(R29), R1
+ MOVV R1, FCR31
+ #endif
+ MOVV 216(R29), R1
+ MOVV R1, LO
+ MOVV 208(R29), R1
+ MOVV R1, HI
+ MOVV 200(R29), RSB
+ MOVV 192(R29), R25
+ MOVV 184(R29), R24
+ MOVV 176(R29), R22
+ MOVV 168(R29), R21
+ MOVV 160(R29), R20
+ MOVV 152(R29), R19
+ MOVV 144(R29), R18
+ MOVV 136(R29), R17
+ MOVV 128(R29), R16
+ MOVV 120(R29), R15
+ MOVV 112(R29), R14
+ MOVV 104(R29), R13
+ MOVV 96(R29), R12
+ MOVV 88(R29), R11
+ MOVV 80(R29), R10
+ MOVV 72(R29), R9
+ MOVV 64(R29), R8
+ MOVV 56(R29), R7
+ MOVV 48(R29), R6
+ MOVV 40(R29), R5
+ MOVV 32(R29), R4
+ MOVV 24(R29), R3
+ MOVV 16(R29), R2
+ MOVV 8(R29), R1
+ MOVV 488(R29), R31
+ MOVV (R29), R23
+ ADDV $496, R29
+ JMP (R23)
diff --git a/src/runtime/preempt_mipsx.s b/src/runtime/preempt_mipsx.s
new file mode 100644
index 0000000..afac33e
--- /dev/null
+++ b/src/runtime/preempt_mipsx.s
@@ -0,0 +1,145 @@
+// Code generated by mkpreempt.go; DO NOT EDIT.
+
+// +build mips mipsle
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
+ MOVW R31, -244(R29)
+ SUB $244, R29
+ MOVW R1, 4(R29)
+ MOVW R2, 8(R29)
+ MOVW R3, 12(R29)
+ MOVW R4, 16(R29)
+ MOVW R5, 20(R29)
+ MOVW R6, 24(R29)
+ MOVW R7, 28(R29)
+ MOVW R8, 32(R29)
+ MOVW R9, 36(R29)
+ MOVW R10, 40(R29)
+ MOVW R11, 44(R29)
+ MOVW R12, 48(R29)
+ MOVW R13, 52(R29)
+ MOVW R14, 56(R29)
+ MOVW R15, 60(R29)
+ MOVW R16, 64(R29)
+ MOVW R17, 68(R29)
+ MOVW R18, 72(R29)
+ MOVW R19, 76(R29)
+ MOVW R20, 80(R29)
+ MOVW R21, 84(R29)
+ MOVW R22, 88(R29)
+ MOVW R24, 92(R29)
+ MOVW R25, 96(R29)
+ MOVW R28, 100(R29)
+ MOVW HI, R1
+ MOVW R1, 104(R29)
+ MOVW LO, R1
+ MOVW R1, 108(R29)
+ #ifndef GOMIPS_softfloat
+ MOVW FCR31, R1
+ MOVW R1, 112(R29)
+ MOVF F0, 116(R29)
+ MOVF F1, 120(R29)
+ MOVF F2, 124(R29)
+ MOVF F3, 128(R29)
+ MOVF F4, 132(R29)
+ MOVF F5, 136(R29)
+ MOVF F6, 140(R29)
+ MOVF F7, 144(R29)
+ MOVF F8, 148(R29)
+ MOVF F9, 152(R29)
+ MOVF F10, 156(R29)
+ MOVF F11, 160(R29)
+ MOVF F12, 164(R29)
+ MOVF F13, 168(R29)
+ MOVF F14, 172(R29)
+ MOVF F15, 176(R29)
+ MOVF F16, 180(R29)
+ MOVF F17, 184(R29)
+ MOVF F18, 188(R29)
+ MOVF F19, 192(R29)
+ MOVF F20, 196(R29)
+ MOVF F21, 200(R29)
+ MOVF F22, 204(R29)
+ MOVF F23, 208(R29)
+ MOVF F24, 212(R29)
+ MOVF F25, 216(R29)
+ MOVF F26, 220(R29)
+ MOVF F27, 224(R29)
+ MOVF F28, 228(R29)
+ MOVF F29, 232(R29)
+ MOVF F30, 236(R29)
+ MOVF F31, 240(R29)
+ #endif
+ CALL ·asyncPreempt2(SB)
+ #ifndef GOMIPS_softfloat
+ MOVF 240(R29), F31
+ MOVF 236(R29), F30
+ MOVF 232(R29), F29
+ MOVF 228(R29), F28
+ MOVF 224(R29), F27
+ MOVF 220(R29), F26
+ MOVF 216(R29), F25
+ MOVF 212(R29), F24
+ MOVF 208(R29), F23
+ MOVF 204(R29), F22
+ MOVF 200(R29), F21
+ MOVF 196(R29), F20
+ MOVF 192(R29), F19
+ MOVF 188(R29), F18
+ MOVF 184(R29), F17
+ MOVF 180(R29), F16
+ MOVF 176(R29), F15
+ MOVF 172(R29), F14
+ MOVF 168(R29), F13
+ MOVF 164(R29), F12
+ MOVF 160(R29), F11
+ MOVF 156(R29), F10
+ MOVF 152(R29), F9
+ MOVF 148(R29), F8
+ MOVF 144(R29), F7
+ MOVF 140(R29), F6
+ MOVF 136(R29), F5
+ MOVF 132(R29), F4
+ MOVF 128(R29), F3
+ MOVF 124(R29), F2
+ MOVF 120(R29), F1
+ MOVF 116(R29), F0
+ MOVW 112(R29), R1
+ MOVW R1, FCR31
+ #endif
+ MOVW 108(R29), R1
+ MOVW R1, LO
+ MOVW 104(R29), R1
+ MOVW R1, HI
+ MOVW 100(R29), R28
+ MOVW 96(R29), R25
+ MOVW 92(R29), R24
+ MOVW 88(R29), R22
+ MOVW 84(R29), R21
+ MOVW 80(R29), R20
+ MOVW 76(R29), R19
+ MOVW 72(R29), R18
+ MOVW 68(R29), R17
+ MOVW 64(R29), R16
+ MOVW 60(R29), R15
+ MOVW 56(R29), R14
+ MOVW 52(R29), R13
+ MOVW 48(R29), R12
+ MOVW 44(R29), R11
+ MOVW 40(R29), R10
+ MOVW 36(R29), R9
+ MOVW 32(R29), R8
+ MOVW 28(R29), R7
+ MOVW 24(R29), R6
+ MOVW 20(R29), R5
+ MOVW 16(R29), R4
+ MOVW 12(R29), R3
+ MOVW 8(R29), R2
+ MOVW 4(R29), R1
+ MOVW 244(R29), R31
+ MOVW (R29), R23
+ ADD $248, R29
+ JMP (R23)
diff --git a/src/runtime/preempt_nonwindows.go b/src/runtime/preempt_nonwindows.go
new file mode 100644
index 0000000..3066a15
--- /dev/null
+++ b/src/runtime/preempt_nonwindows.go
@@ -0,0 +1,13 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !windows
+
+package runtime
+
+//go:nosplit
+func osPreemptExtEnter(mp *m) {}
+
+//go:nosplit
+func osPreemptExtExit(mp *m) {}
diff --git a/src/runtime/preempt_ppc64x.s b/src/runtime/preempt_ppc64x.s
new file mode 100644
index 0000000..b2d7e30
--- /dev/null
+++ b/src/runtime/preempt_ppc64x.s
@@ -0,0 +1,147 @@
+// Code generated by mkpreempt.go; DO NOT EDIT.
+
+// +build ppc64 ppc64le
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
+ MOVD R31, -488(R1)
+ MOVD LR, R31
+ MOVDU R31, -520(R1)
+ MOVD R3, 40(R1)
+ MOVD R4, 48(R1)
+ MOVD R5, 56(R1)
+ MOVD R6, 64(R1)
+ MOVD R7, 72(R1)
+ MOVD R8, 80(R1)
+ MOVD R9, 88(R1)
+ MOVD R10, 96(R1)
+ MOVD R11, 104(R1)
+ MOVD R14, 112(R1)
+ MOVD R15, 120(R1)
+ MOVD R16, 128(R1)
+ MOVD R17, 136(R1)
+ MOVD R18, 144(R1)
+ MOVD R19, 152(R1)
+ MOVD R20, 160(R1)
+ MOVD R21, 168(R1)
+ MOVD R22, 176(R1)
+ MOVD R23, 184(R1)
+ MOVD R24, 192(R1)
+ MOVD R25, 200(R1)
+ MOVD R26, 208(R1)
+ MOVD R27, 216(R1)
+ MOVD R28, 224(R1)
+ MOVD R29, 232(R1)
+ MOVW CR, R31
+ MOVW R31, 240(R1)
+ MOVD XER, R31
+ MOVD R31, 248(R1)
+ FMOVD F0, 256(R1)
+ FMOVD F1, 264(R1)
+ FMOVD F2, 272(R1)
+ FMOVD F3, 280(R1)
+ FMOVD F4, 288(R1)
+ FMOVD F5, 296(R1)
+ FMOVD F6, 304(R1)
+ FMOVD F7, 312(R1)
+ FMOVD F8, 320(R1)
+ FMOVD F9, 328(R1)
+ FMOVD F10, 336(R1)
+ FMOVD F11, 344(R1)
+ FMOVD F12, 352(R1)
+ FMOVD F13, 360(R1)
+ FMOVD F14, 368(R1)
+ FMOVD F15, 376(R1)
+ FMOVD F16, 384(R1)
+ FMOVD F17, 392(R1)
+ FMOVD F18, 400(R1)
+ FMOVD F19, 408(R1)
+ FMOVD F20, 416(R1)
+ FMOVD F21, 424(R1)
+ FMOVD F22, 432(R1)
+ FMOVD F23, 440(R1)
+ FMOVD F24, 448(R1)
+ FMOVD F25, 456(R1)
+ FMOVD F26, 464(R1)
+ FMOVD F27, 472(R1)
+ FMOVD F28, 480(R1)
+ FMOVD F29, 488(R1)
+ FMOVD F30, 496(R1)
+ FMOVD F31, 504(R1)
+ MOVFL FPSCR, F0
+ FMOVD F0, 512(R1)
+ CALL ·asyncPreempt2(SB)
+ FMOVD 512(R1), F0
+ MOVFL F0, FPSCR
+ FMOVD 504(R1), F31
+ FMOVD 496(R1), F30
+ FMOVD 488(R1), F29
+ FMOVD 480(R1), F28
+ FMOVD 472(R1), F27
+ FMOVD 464(R1), F26
+ FMOVD 456(R1), F25
+ FMOVD 448(R1), F24
+ FMOVD 440(R1), F23
+ FMOVD 432(R1), F22
+ FMOVD 424(R1), F21
+ FMOVD 416(R1), F20
+ FMOVD 408(R1), F19
+ FMOVD 400(R1), F18
+ FMOVD 392(R1), F17
+ FMOVD 384(R1), F16
+ FMOVD 376(R1), F15
+ FMOVD 368(R1), F14
+ FMOVD 360(R1), F13
+ FMOVD 352(R1), F12
+ FMOVD 344(R1), F11
+ FMOVD 336(R1), F10
+ FMOVD 328(R1), F9
+ FMOVD 320(R1), F8
+ FMOVD 312(R1), F7
+ FMOVD 304(R1), F6
+ FMOVD 296(R1), F5
+ FMOVD 288(R1), F4
+ FMOVD 280(R1), F3
+ FMOVD 272(R1), F2
+ FMOVD 264(R1), F1
+ FMOVD 256(R1), F0
+ MOVD 248(R1), R31
+ MOVD R31, XER
+ MOVW 240(R1), R31
+ MOVFL R31, $0xff
+ MOVD 232(R1), R29
+ MOVD 224(R1), R28
+ MOVD 216(R1), R27
+ MOVD 208(R1), R26
+ MOVD 200(R1), R25
+ MOVD 192(R1), R24
+ MOVD 184(R1), R23
+ MOVD 176(R1), R22
+ MOVD 168(R1), R21
+ MOVD 160(R1), R20
+ MOVD 152(R1), R19
+ MOVD 144(R1), R18
+ MOVD 136(R1), R17
+ MOVD 128(R1), R16
+ MOVD 120(R1), R15
+ MOVD 112(R1), R14
+ MOVD 104(R1), R11
+ MOVD 96(R1), R10
+ MOVD 88(R1), R9
+ MOVD 80(R1), R8
+ MOVD 72(R1), R7
+ MOVD 64(R1), R6
+ MOVD 56(R1), R5
+ MOVD 48(R1), R4
+ MOVD 40(R1), R3
+ MOVD 520(R1), R31
+ MOVD R31, LR
+ MOVD 528(R1), R2
+ MOVD 536(R1), R12
+ MOVD (R1), R31
+ MOVD R31, CTR
+ MOVD 32(R1), R31
+ ADD $552, R1
+ JMP (CTR)
diff --git a/src/runtime/preempt_riscv64.s b/src/runtime/preempt_riscv64.s
new file mode 100644
index 0000000..0338c22
--- /dev/null
+++ b/src/runtime/preempt_riscv64.s
@@ -0,0 +1,131 @@
+// Code generated by mkpreempt.go; DO NOT EDIT.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
+ MOV X1, -480(X2)
+ ADD $-480, X2
+ MOV X3, 8(X2)
+ MOV X5, 16(X2)
+ MOV X6, 24(X2)
+ MOV X7, 32(X2)
+ MOV X8, 40(X2)
+ MOV X9, 48(X2)
+ MOV X10, 56(X2)
+ MOV X11, 64(X2)
+ MOV X12, 72(X2)
+ MOV X13, 80(X2)
+ MOV X14, 88(X2)
+ MOV X15, 96(X2)
+ MOV X16, 104(X2)
+ MOV X17, 112(X2)
+ MOV X18, 120(X2)
+ MOV X19, 128(X2)
+ MOV X20, 136(X2)
+ MOV X21, 144(X2)
+ MOV X22, 152(X2)
+ MOV X23, 160(X2)
+ MOV X24, 168(X2)
+ MOV X25, 176(X2)
+ MOV X26, 184(X2)
+ MOV X27, 192(X2)
+ MOV X28, 200(X2)
+ MOV X29, 208(X2)
+ MOV X30, 216(X2)
+ MOVD F0, 224(X2)
+ MOVD F1, 232(X2)
+ MOVD F2, 240(X2)
+ MOVD F3, 248(X2)
+ MOVD F4, 256(X2)
+ MOVD F5, 264(X2)
+ MOVD F6, 272(X2)
+ MOVD F7, 280(X2)
+ MOVD F8, 288(X2)
+ MOVD F9, 296(X2)
+ MOVD F10, 304(X2)
+ MOVD F11, 312(X2)
+ MOVD F12, 320(X2)
+ MOVD F13, 328(X2)
+ MOVD F14, 336(X2)
+ MOVD F15, 344(X2)
+ MOVD F16, 352(X2)
+ MOVD F17, 360(X2)
+ MOVD F18, 368(X2)
+ MOVD F19, 376(X2)
+ MOVD F20, 384(X2)
+ MOVD F21, 392(X2)
+ MOVD F22, 400(X2)
+ MOVD F23, 408(X2)
+ MOVD F24, 416(X2)
+ MOVD F25, 424(X2)
+ MOVD F26, 432(X2)
+ MOVD F27, 440(X2)
+ MOVD F28, 448(X2)
+ MOVD F29, 456(X2)
+ MOVD F30, 464(X2)
+ MOVD F31, 472(X2)
+ CALL ·asyncPreempt2(SB)
+ MOVD 472(X2), F31
+ MOVD 464(X2), F30
+ MOVD 456(X2), F29
+ MOVD 448(X2), F28
+ MOVD 440(X2), F27
+ MOVD 432(X2), F26
+ MOVD 424(X2), F25
+ MOVD 416(X2), F24
+ MOVD 408(X2), F23
+ MOVD 400(X2), F22
+ MOVD 392(X2), F21
+ MOVD 384(X2), F20
+ MOVD 376(X2), F19
+ MOVD 368(X2), F18
+ MOVD 360(X2), F17
+ MOVD 352(X2), F16
+ MOVD 344(X2), F15
+ MOVD 336(X2), F14
+ MOVD 328(X2), F13
+ MOVD 320(X2), F12
+ MOVD 312(X2), F11
+ MOVD 304(X2), F10
+ MOVD 296(X2), F9
+ MOVD 288(X2), F8
+ MOVD 280(X2), F7
+ MOVD 272(X2), F6
+ MOVD 264(X2), F5
+ MOVD 256(X2), F4
+ MOVD 248(X2), F3
+ MOVD 240(X2), F2
+ MOVD 232(X2), F1
+ MOVD 224(X2), F0
+ MOV 216(X2), X30
+ MOV 208(X2), X29
+ MOV 200(X2), X28
+ MOV 192(X2), X27
+ MOV 184(X2), X26
+ MOV 176(X2), X25
+ MOV 168(X2), X24
+ MOV 160(X2), X23
+ MOV 152(X2), X22
+ MOV 144(X2), X21
+ MOV 136(X2), X20
+ MOV 128(X2), X19
+ MOV 120(X2), X18
+ MOV 112(X2), X17
+ MOV 104(X2), X16
+ MOV 96(X2), X15
+ MOV 88(X2), X14
+ MOV 80(X2), X13
+ MOV 72(X2), X12
+ MOV 64(X2), X11
+ MOV 56(X2), X10
+ MOV 48(X2), X9
+ MOV 40(X2), X8
+ MOV 32(X2), X7
+ MOV 24(X2), X6
+ MOV 16(X2), X5
+ MOV 8(X2), X3
+ MOV 480(X2), X1
+ MOV (X2), X31
+ ADD $488, X2
+ JMP (X31)
diff --git a/src/runtime/preempt_s390x.s b/src/runtime/preempt_s390x.s
new file mode 100644
index 0000000..ca9e47c
--- /dev/null
+++ b/src/runtime/preempt_s390x.s
@@ -0,0 +1,51 @@
+// Code generated by mkpreempt.go; DO NOT EDIT.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
+ IPM R10
+ MOVD R14, -248(R15)
+ ADD $-248, R15
+ MOVW R10, 8(R15)
+ STMG R0, R12, 16(R15)
+ FMOVD F0, 120(R15)
+ FMOVD F1, 128(R15)
+ FMOVD F2, 136(R15)
+ FMOVD F3, 144(R15)
+ FMOVD F4, 152(R15)
+ FMOVD F5, 160(R15)
+ FMOVD F6, 168(R15)
+ FMOVD F7, 176(R15)
+ FMOVD F8, 184(R15)
+ FMOVD F9, 192(R15)
+ FMOVD F10, 200(R15)
+ FMOVD F11, 208(R15)
+ FMOVD F12, 216(R15)
+ FMOVD F13, 224(R15)
+ FMOVD F14, 232(R15)
+ FMOVD F15, 240(R15)
+ CALL ·asyncPreempt2(SB)
+ FMOVD 240(R15), F15
+ FMOVD 232(R15), F14
+ FMOVD 224(R15), F13
+ FMOVD 216(R15), F12
+ FMOVD 208(R15), F11
+ FMOVD 200(R15), F10
+ FMOVD 192(R15), F9
+ FMOVD 184(R15), F8
+ FMOVD 176(R15), F7
+ FMOVD 168(R15), F6
+ FMOVD 160(R15), F5
+ FMOVD 152(R15), F4
+ FMOVD 144(R15), F3
+ FMOVD 136(R15), F2
+ FMOVD 128(R15), F1
+ FMOVD 120(R15), F0
+ LMG 16(R15), R0, R12
+ MOVD 248(R15), R14
+ ADD $256, R15
+ MOVWZ -248(R15), R10
+ TMLH R10, $(3<<12)
+ MOVD -256(R15), R10
+ JMP (R10)
diff --git a/src/runtime/preempt_wasm.s b/src/runtime/preempt_wasm.s
new file mode 100644
index 0000000..0cf57d3
--- /dev/null
+++ b/src/runtime/preempt_wasm.s
@@ -0,0 +1,8 @@
+// Code generated by mkpreempt.go; DO NOT EDIT.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
+ // No async preemption on wasm
+ UNDEF
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 93d329d..2399f0a 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -82,6 +82,7 @@
var (
m0 m
g0 g
+ mcache0 *mcache
raceprocctx0 uintptr
)
@@ -244,13 +245,14 @@
func forcegchelper() {
forcegc.g = getg()
+ lockInit(&forcegc.lock, lockRankForcegc)
for {
lock(&forcegc.lock)
if forcegc.idle != 0 {
throw("forcegc: phase error")
}
atomic.Store(&forcegc.idle, 1)
- goparkunlock(&forcegc.lock, waitReasonForceGGIdle, traceEvGoBlock, 1)
+ goparkunlock(&forcegc.lock, waitReasonForceGCIdle, traceEvGoBlock, 1)
// this goroutine is explicitly resumed by sysmon
if debug.gctrace > 0 {
println("GC forced")
@@ -413,7 +415,7 @@
// use the result as an address at which to start executing code.
//go:nosplit
func funcPC(f interface{}) uintptr {
- return **(**uintptr)(add(unsafe.Pointer(&f), sys.PtrSize))
+ return *(*uintptr)(efaceOf(&f).data)
}
// called from assembly
@@ -514,6 +516,9 @@
// to guard execution of instructions that can not be assumed to be always supported.
x86HasPOPCNT = cpu.X86.HasPOPCNT
x86HasSSE41 = cpu.X86.HasSSE41
+ x86HasFMA = cpu.X86.HasFMA
+
+ armHasVFPv4 = cpu.ARM.HasVFPv4
arm64HasATOMICS = cpu.ARM64.HasATOMICS
}
@@ -527,6 +532,22 @@
//
// The new G calls runtime·main.
func schedinit() {
+ lockInit(&sched.lock, lockRankSched)
+ lockInit(&sched.sysmonlock, lockRankSysmon)
+ lockInit(&sched.deferlock, lockRankDefer)
+ lockInit(&sched.sudoglock, lockRankSudog)
+ lockInit(&deadlock, lockRankDeadlock)
+ lockInit(&paniclk, lockRankPanic)
+ lockInit(&allglock, lockRankAllg)
+ lockInit(&allpLock, lockRankAllp)
+ lockInit(&reflectOffs.lock, lockRankReflectOffs)
+ lockInit(&finlock, lockRankFin)
+ lockInit(&trace.bufLock, lockRankTraceBuf)
+ lockInit(&trace.stringsLock, lockRankTraceStrings)
+ lockInit(&trace.lock, lockRankTrace)
+ lockInit(&cpuprof.lock, lockRankCpuprof)
+ lockInit(&trace.stackTab.lock, lockRankTraceStackTab)
+
// raceinit must be the first call to race detector.
// In particular, it must be done before mallocinit below calls racemapshadow.
_g_ := getg()
@@ -540,6 +561,7 @@
moduledataverify()
stackinit()
mallocinit()
+ fastrandinit() // must run before mcommoninit
mcommoninit(_g_.m)
cpuinit() // must run before alginit
alginit() // maps must not be used before this call
@@ -617,8 +639,8 @@
sched.mnext++
checkmcount()
- mp.fastrand[0] = 1597334677 * uint32(mp.id)
- mp.fastrand[1] = uint32(cputicks())
+ mp.fastrand[0] = uint32(int64Hash(uint64(mp.id), fastrandseed))
+ mp.fastrand[1] = uint32(int64Hash(uint64(cputicks()), ^fastrandseed))
if mp.fastrand[0]|mp.fastrand[1] == 0 {
mp.fastrand[1] = 1
}
@@ -643,6 +665,13 @@
}
}
+var fastrandseed uintptr
+
+func fastrandinit() {
+ s := (*[unsafe.Sizeof(fastrandseed)]byte)(unsafe.Pointer(&fastrandseed))[:]
+ getRandomData(s)
+}
+
// Mark gp ready to run.
func ready(gp *g, traceskip int, next bool) {
if trace.enabled {
@@ -662,9 +691,7 @@
// status is Gwaiting or Gscanwaiting, make Grunnable and put on runq
casgstatus(gp, _Gwaiting, _Grunnable)
runqput(_g_.m.p.ptr(), gp, next)
- if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 {
- wakep()
- }
+ wakep()
releasem(mp)
}
@@ -707,18 +734,6 @@
return atomic.Load(&gp.atomicstatus)
}
-// Ownership of gcscanvalid:
-//
-// If gp is running (meaning status == _Grunning or _Grunning|_Gscan),
-// then gp owns gp.gcscanvalid, and other goroutines must not modify it.
-//
-// Otherwise, a second goroutine can lock the scan state by setting _Gscan
-// in the status bit and then modify gcscanvalid, and then unlock the scan state.
-//
-// Note that the first condition implies an exception to the second:
-// if a second goroutine changes gp's status to _Grunning|_Gscan,
-// that second goroutine still does not have the right to modify gcscanvalid.
-
// The Gscanstatuses are acting like locks and this releases them.
// If it proves to be a performance hit we should be able to make these
// simple atomic stores but for now we are going to throw if
@@ -735,7 +750,8 @@
case _Gscanrunnable,
_Gscanwaiting,
_Gscanrunning,
- _Gscansyscall:
+ _Gscansyscall,
+ _Gscanpreempted:
if newval == oldval&^_Gscan {
success = atomic.Cas(&gp.atomicstatus, oldval, newval)
}
@@ -745,6 +761,7 @@
dumpgstatus(gp)
throw("casfrom_Gscanstatus: gp->status is not in scan state")
}
+ releaseLockRank(lockRankGscan)
}
// This will return false if the gp is not in the expected status and the cas fails.
@@ -756,7 +773,12 @@
_Gwaiting,
_Gsyscall:
if newval == oldval|_Gscan {
- return atomic.Cas(&gp.atomicstatus, oldval, newval)
+ r := atomic.Cas(&gp.atomicstatus, oldval, newval)
+ if r {
+ acquireLockRank(lockRankGscan)
+ }
+ return r
+
}
}
print("runtime: castogscanstatus oldval=", hex(oldval), " newval=", hex(newval), "\n")
@@ -777,16 +799,8 @@
})
}
- if oldval == _Grunning && gp.gcscanvalid {
- // If oldvall == _Grunning, then the actual status must be
- // _Grunning or _Grunning|_Gscan; either way,
- // we own gp.gcscanvalid, so it's safe to read.
- // gp.gcscanvalid must not be true when we are running.
- systemstack(func() {
- print("runtime: casgstatus ", hex(oldval), "->", hex(newval), " gp.status=", hex(gp.atomicstatus), " gp.gcscanvalid=true\n")
- throw("casgstatus")
- })
- }
+ acquireLockRank(lockRankGscan)
+ releaseLockRank(lockRankGscan)
// See https://golang.org/cl/21503 for justification of the yield delay.
const yieldDelay = 5 * 1000
@@ -798,14 +812,6 @@
if oldval == _Gwaiting && gp.atomicstatus == _Grunnable {
throw("casgstatus: waiting for Gwaiting but is Grunnable")
}
- // Help GC if needed.
- // if gp.preemptscan && !gp.gcworkdone && (oldval == _Grunning || oldval == _Gsyscall) {
- // gp.preemptscan = false
- // systemstack(func() {
- // gcphasework(gp)
- // })
- // }
- // But meanwhile just yield.
if i == 0 {
nextYield = nanotime() + yieldDelay
}
@@ -818,9 +824,6 @@
nextYield = nanotime() + yieldDelay/2
}
}
- if newval == _Grunning {
- gp.gcscanvalid = false
- }
}
// casgstatus(gp, oldstatus, Gcopystack), assuming oldstatus is Gwaiting or Grunnable.
@@ -841,109 +844,27 @@
}
}
-// scang blocks until gp's stack has been scanned.
-// It might be scanned by scang or it might be scanned by the goroutine itself.
-// Either way, the stack scan has completed when scang returns.
-func scang(gp *g, gcw *gcWork) {
- // Invariant; we (the caller, markroot for a specific goroutine) own gp.gcscandone.
- // Nothing is racing with us now, but gcscandone might be set to true left over
- // from an earlier round of stack scanning (we scan twice per GC).
- // We use gcscandone to record whether the scan has been done during this round.
-
- gp.gcscandone = false
-
- // See https://golang.org/cl/21503 for justification of the yield delay.
- const yieldDelay = 10 * 1000
- var nextYield int64
-
- // Endeavor to get gcscandone set to true,
- // either by doing the stack scan ourselves or by coercing gp to scan itself.
- // gp.gcscandone can transition from false to true when we're not looking
- // (if we asked for preemption), so any time we lock the status using
- // castogscanstatus we have to double-check that the scan is still not done.
-loop:
- for i := 0; !gp.gcscandone; i++ {
- switch s := readgstatus(gp); s {
- default:
- dumpgstatus(gp)
- throw("stopg: invalid status")
-
- case _Gdead:
- // No stack.
- gp.gcscandone = true
- break loop
-
- case _Gcopystack:
- // Stack being switched. Go around again.
-
- case _Grunnable, _Gsyscall, _Gwaiting:
- // Claim goroutine by setting scan bit.
- // Racing with execution or readying of gp.
- // The scan bit keeps them from running
- // the goroutine until we're done.
- if castogscanstatus(gp, s, s|_Gscan) {
- if !gp.gcscandone {
- scanstack(gp, gcw)
- gp.gcscandone = true
- }
- restartg(gp)
- break loop
- }
-
- case _Gscanwaiting:
- // newstack is doing a scan for us right now. Wait.
-
- case _Grunning:
- // Goroutine running. Try to preempt execution so it can scan itself.
- // The preemption handler (in newstack) does the actual scan.
-
- // Optimization: if there is already a pending preemption request
- // (from the previous loop iteration), don't bother with the atomics.
- if gp.preemptscan && gp.preempt && gp.stackguard0 == stackPreempt {
- break
- }
-
- // Ask for preemption and self scan.
- if castogscanstatus(gp, _Grunning, _Gscanrunning) {
- if !gp.gcscandone {
- gp.preemptscan = true
- gp.preempt = true
- gp.stackguard0 = stackPreempt
- }
- casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
- }
- }
-
- if i == 0 {
- nextYield = nanotime() + yieldDelay
- }
- if nanotime() < nextYield {
- procyield(10)
- } else {
- osyield()
- nextYield = nanotime() + yieldDelay/2
- }
+// casGToPreemptScan transitions gp from _Grunning to _Gscan|_Gpreempted.
+//
+// TODO(austin): This is the only status operation that both changes
+// the status and locks the _Gscan bit. Rethink this.
+func casGToPreemptScan(gp *g, old, new uint32) {
+ if old != _Grunning || new != _Gscan|_Gpreempted {
+ throw("bad g transition")
}
-
- gp.preemptscan = false // cancel scan request if no longer needed
+ acquireLockRank(lockRankGscan)
+ for !atomic.Cas(&gp.atomicstatus, _Grunning, _Gscan|_Gpreempted) {
+ }
}
-// The GC requests that this routine be moved from a scanmumble state to a mumble state.
-func restartg(gp *g) {
- s := readgstatus(gp)
- switch s {
- default:
- dumpgstatus(gp)
- throw("restartg: unexpected status")
-
- case _Gdead:
- // ok
-
- case _Gscanrunnable,
- _Gscanwaiting,
- _Gscansyscall:
- casfrom_Gscanstatus(gp, s, s&^_Gscan)
+// casGFromPreempted attempts to transition gp from _Gpreempted to
+// _Gwaiting. If successful, the caller is responsible for
+// re-scheduling gp.
+func casGFromPreempted(gp *g, old, new uint32) bool {
+ if old != _Gpreempted || new != _Gwaiting {
+ throw("bad g transition")
}
+ return atomic.Cas(&gp.atomicstatus, _Gpreempted, _Gwaiting)
}
// stopTheWorld stops all P's from executing goroutines, interrupting
@@ -962,8 +883,23 @@
// goroutines.
func stopTheWorld(reason string) {
semacquire(&worldsema)
- getg().m.preemptoff = reason
- systemstack(stopTheWorldWithSema)
+ gp := getg()
+ gp.m.preemptoff = reason
+ systemstack(func() {
+ // Mark the goroutine which called stopTheWorld preemptible so its
+ // stack may be scanned.
+ // This lets a mark worker scan us while we try to stop the world
+ // since otherwise we could get in a mutual preemption deadlock.
+ // We must not modify anything on the G stack because a stack shrink
+ // may occur. A stack shrink is otherwise OK though because in order
+ // to return from this function (and to leave the system stack) we
+ // must have preempted all goroutines, including any attempting
+ // to scan our stack, in which case, any stack shrinking will
+ // have already completed by the time we exit.
+ casgstatus(gp, _Grunning, _Gwaiting)
+ stopTheWorldWithSema()
+ casgstatus(gp, _Gwaiting, _Grunning)
+ })
}
// startTheWorld undoes the effects of stopTheWorld.
@@ -975,10 +911,31 @@
getg().m.preemptoff = ""
}
-// Holding worldsema grants an M the right to try to stop the world
-// and prevents gomaxprocs from changing concurrently.
+// stopTheWorldGC has the same effect as stopTheWorld, but blocks
+// until the GC is not running. It also blocks a GC from starting
+// until startTheWorldGC is called.
+func stopTheWorldGC(reason string) {
+ semacquire(&gcsema)
+ stopTheWorld(reason)
+}
+
+// startTheWorldGC undoes the effects of stopTheWorldGC.
+func startTheWorldGC() {
+ startTheWorld()
+ semrelease(&gcsema)
+}
+
+// Holding worldsema grants an M the right to try to stop the world.
var worldsema uint32 = 1
+// Holding gcsema grants the M the right to block a GC, and blocks
+// until the current GC is done. In particular, it prevents gomaxprocs
+// from changing concurrently.
+//
+// TODO(mknyszek): Once gomaxprocs and the execution tracer can handle
+// being changed/enabled during a GC, remove this.
+var gcsema uint32 = 1
+
// stopTheWorldWithSema is the core implementation of stopTheWorld.
// The caller is responsible for acquiring worldsema and disabling
// preemption first and then should stopTheWorldWithSema on the system
@@ -1080,7 +1037,7 @@
func startTheWorldWithSema(emitTraceEvent bool) int64 {
mp := acquirem() // disable preemption because it can be holding p in a local var
if netpollinited() {
- list := netpoll(false) // non-blocking
+ list := netpoll(0) // non-blocking
injectglist(&list)
}
lock(&sched.lock)
@@ -1124,9 +1081,7 @@
// Wakeup an additional proc in case we have excessive runnable goroutines
// in local queues or in the global queue. If we don't, the proc will park itself.
// If we have lots of excessive work, resetspinning will unpark additional procs as necessary.
- if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 {
- wakep()
- }
+ wakep()
releasem(mp)
@@ -1167,7 +1122,8 @@
mstart1()
// Exit this thread.
- if GOOS == "windows" || GOOS == "solaris" || GOOS == "illumos" || GOOS == "plan9" || GOOS == "darwin" || GOOS == "aix" {
+ switch GOOS {
+ case "windows", "solaris", "illumos", "plan9", "darwin", "aix":
// Windows, Solaris, illumos, Darwin, AIX and Plan 9 always system-allocate
// the stack, but put it in _g_.stack before mstart,
// so the logic above hasn't set osStack yet.
@@ -1266,6 +1222,11 @@
// Free the gsignal stack.
if m.gsignal != nil {
stackfree(m.gsignal.stack)
+ // On some platforms, when calling into VDSO (e.g. nanotime)
+ // we store our g on the gsignal stack, if there is one.
+ // Now the stack is freed, unlink it from the m, so we
+ // won't write to it when calling VDSO code.
+ m.gsignal = nil
}
// Remove m from allm.
@@ -1635,8 +1596,6 @@
gp.syscallpc = gp.sched.pc
gp.syscallsp = gp.sched.sp
gp.stktopsp = gp.sched.sp
- gp.gcscanvalid = true
- gp.gcscandone = true
// malg returns status as _Gidle. Change to _Gdead before
// adding to allg where GC can see it. We use _Gdead to hide
// this from tracebacks and stack scans since it isn't a
@@ -1698,6 +1657,7 @@
// Return mp.curg to dead state.
casgstatus(mp.curg, _Gsyscall, _Gdead)
+ mp.curg.preemptStop = false
atomic.Xadd(&sched.ngsys, +1)
// Block signals before unminit.
@@ -1742,8 +1702,7 @@
for {
old := atomic.Loaduintptr(&extram)
if old == locked {
- yield := osyield
- yield()
+ osyield()
continue
}
if old == 0 && !nilokay {
@@ -1760,8 +1719,7 @@
if atomic.Casuintptr(&extram, old, locked) {
return (*m)(unsafe.Pointer(old))
}
- yield := osyield
- yield()
+ osyield()
continue
}
}
@@ -1862,10 +1820,16 @@
if GOARCH == "wasm" { // no threads on wasm yet
return
}
+
+ // Disable preemption to guarantee that the template thread will be
+ // created before a park once haveTemplateThread is set.
+ mp := acquirem()
if !atomic.Cas(&newmHandoff.haveTemplateThread, 0, 1) {
+ releasem(mp)
return
}
newm(templateThread, nil)
+ releasem(mp)
}
// templateThread is a thread in a known-good state that exists solely
@@ -2036,6 +2000,9 @@
startm(_p_, false)
return
}
+ if when := nobarrierWakeTime(_p_); when != 0 {
+ wakeNetPoller(when)
+ }
pidleput(_p_)
unlock(&sched.lock)
}
@@ -2043,8 +2010,11 @@
// Tries to add one more P to execute G's.
// Called when a G is made runnable (newproc, ready).
func wakep() {
+ if atomic.Load(&sched.npidle) == 0 {
+ return
+ }
// be conservative about spinning threads
- if !atomic.Cas(&sched.nmspinning, 0, 1) {
+ if atomic.Load(&sched.nmspinning) != 0 || !atomic.Cas(&sched.nmspinning, 0, 1) {
return
}
startm(nil, true)
@@ -2137,6 +2107,10 @@
func execute(gp *g, inheritTime bool) {
_g_ := getg()
+ // Assign gp.m before entering _Grunning so running Gs have an
+ // M.
+ _g_.m.curg = gp
+ gp.m = _g_.m
casgstatus(gp, _Grunnable, _Grunning)
gp.waitsince = 0
gp.preempt = false
@@ -2144,8 +2118,6 @@
if !inheritTime {
_g_.m.p.ptr().schedtick++
}
- _g_.m.curg = gp
- gp.m = _g_.m
// Check whether the profiler needs to be turned on or off.
hz := sched.profilehz
@@ -2166,7 +2138,7 @@
}
// Finds a runnable goroutine to execute.
-// Tries to steal from other P's, get g from global queue, poll network.
+// Tries to steal from other P's, get g from local or global queue, poll network.
func findrunnable() (gp *g, inheritTime bool) {
_g_ := getg()
@@ -2183,6 +2155,9 @@
if _p_.runSafePointFn != 0 {
runSafePointFn()
}
+
+ now, pollUntil, _ := checkTimers(_p_, 0)
+
if fingwait && fingwake {
if gp := wakefing(); gp != nil {
ready(gp, 0, true)
@@ -2215,7 +2190,7 @@
// not set lastpoll yet), this thread will do blocking netpoll below
// anyway.
if netpollinited() && atomic.Load(&netpollWaiters) > 0 && atomic.Load64(&sched.lastpoll) != 0 {
- if list := netpoll(false); !list.empty() { // non-blocking
+ if list := netpoll(0); !list.empty() { // non-blocking
gp := list.pop()
injectglist(&list)
casgstatus(gp, _Gwaiting, _Grunnable)
@@ -2228,12 +2203,7 @@
// Steal work from other P's.
procs := uint32(gomaxprocs)
- if atomic.Load(&sched.npidle) == procs-1 {
- // Either GOMAXPROCS=1 or everybody, except for us, is idle already.
- // New work can appear from returning syscall/cgocall, network or timers.
- // Neither of that submits to local run queues, so no point in stealing.
- goto stop
- }
+ ranTimer := false
// If number of spinning M's >= number of busy P's, block.
// This is necessary to prevent excessive CPU consumption
// when GOMAXPROCS>>1 but the program parallelism is low.
@@ -2250,11 +2220,51 @@
goto top
}
stealRunNextG := i > 2 // first look for ready queues with more than 1 g
- if gp := runqsteal(_p_, allp[enum.position()], stealRunNextG); gp != nil {
+ p2 := allp[enum.position()]
+ if _p_ == p2 {
+ continue
+ }
+ if gp := runqsteal(_p_, p2, stealRunNextG); gp != nil {
return gp, false
}
+
+ // Consider stealing timers from p2.
+ // This call to checkTimers is the only place where
+ // we hold a lock on a different P's timers.
+ // Lock contention can be a problem here, so
+ // initially avoid grabbing the lock if p2 is running
+ // and is not marked for preemption. If p2 is running
+ // and not being preempted we assume it will handle its
+ // own timers.
+ // If we're still looking for work after checking all
+ // the P's, then go ahead and steal from an active P.
+ if i > 2 || (i > 1 && shouldStealTimers(p2)) {
+ tnow, w, ran := checkTimers(p2, now)
+ now = tnow
+ if w != 0 && (pollUntil == 0 || w < pollUntil) {
+ pollUntil = w
+ }
+ if ran {
+ // Running the timers may have
+ // made an arbitrary number of G's
+ // ready and added them to this P's
+ // local run queue. That invalidates
+ // the assumption of runqsteal
+ // that is always has room to add
+ // stolen G's. So check now if there
+ // is a local G to run.
+ if gp, inheritTime := runqget(_p_); gp != nil {
+ return gp, inheritTime
+ }
+ ranTimer = true
+ }
+ }
}
}
+ if ranTimer {
+ // Running a timer may have made some goroutine ready.
+ goto top
+ }
stop:
@@ -2271,11 +2281,25 @@
return gp, false
}
+ delta := int64(-1)
+ if pollUntil != 0 {
+ // checkTimers ensures that polluntil > now.
+ delta = pollUntil - now
+ }
+
// wasm only:
// If a callback returned and no other goroutine is awake,
- // then pause execution until a callback was triggered.
- if beforeIdle() {
- // At least one goroutine got woken.
+ // then wake event handler goroutine which pauses execution
+ // until a callback was triggered.
+ gp, otherReady := beforeIdle(delta)
+ if gp != nil {
+ casgstatus(gp, _Gwaiting, _Grunnable)
+ if trace.enabled {
+ traceGoUnpark(gp, 0)
+ }
+ return gp, false
+ }
+ if otherReady {
goto top
}
@@ -2362,21 +2386,35 @@
}
// poll network
- if netpollinited() && atomic.Load(&netpollWaiters) > 0 && atomic.Xchg64(&sched.lastpoll, 0) != 0 {
+ if netpollinited() && (atomic.Load(&netpollWaiters) > 0 || pollUntil != 0) && atomic.Xchg64(&sched.lastpoll, 0) != 0 {
+ atomic.Store64(&sched.pollUntil, uint64(pollUntil))
if _g_.m.p != 0 {
throw("findrunnable: netpoll with p")
}
if _g_.m.spinning {
throw("findrunnable: netpoll with spinning")
}
- list := netpoll(true) // block until new work is available
+ if faketime != 0 {
+ // When using fake time, just poll.
+ delta = 0
+ }
+ list := netpoll(delta) // block until new work is available
+ atomic.Store64(&sched.pollUntil, 0)
atomic.Store64(&sched.lastpoll, uint64(nanotime()))
- if !list.empty() {
- lock(&sched.lock)
- _p_ = pidleget()
- unlock(&sched.lock)
- if _p_ != nil {
- acquirep(_p_)
+ if faketime != 0 && list.empty() {
+ // Using fake time and nothing is ready; stop M.
+ // When all M's stop, checkdead will call timejump.
+ stopm()
+ goto top
+ }
+ lock(&sched.lock)
+ _p_ = pidleget()
+ unlock(&sched.lock)
+ if _p_ == nil {
+ injectglist(&list)
+ } else {
+ acquirep(_p_)
+ if !list.empty() {
gp := list.pop()
injectglist(&list)
casgstatus(gp, _Gwaiting, _Grunnable)
@@ -2385,7 +2423,16 @@
}
return gp, false
}
- injectglist(&list)
+ if wasSpinning {
+ _g_.m.spinning = true
+ atomic.Xadd(&sched.nmspinning, 1)
+ }
+ goto top
+ }
+ } else if pollUntil != 0 && netpollinited() {
+ pollerPollUntil := int64(atomic.Load64(&sched.pollUntil))
+ if pollerPollUntil == 0 || pollerPollUntil > pollUntil {
+ netpollBreak()
}
}
stopm()
@@ -2405,7 +2452,7 @@
return true
}
if netpollinited() && atomic.Load(&netpollWaiters) > 0 && sched.lastpoll != 0 {
- if list := netpoll(false); !list.empty() {
+ if list := netpoll(0); !list.empty() {
injectglist(&list)
return true
}
@@ -2413,6 +2460,22 @@
return false
}
+// wakeNetPoller wakes up the thread sleeping in the network poller,
+// if there is one, and if it isn't going to wake up anyhow before
+// the when argument.
+func wakeNetPoller(when int64) {
+ if atomic.Load64(&sched.lastpoll) == 0 {
+ // In findrunnable we ensure that when polling the pollUntil
+ // field is either zero or the time to which the current
+ // poll is expected to run. This can have a spurious wakeup
+ // but should never miss a wakeup.
+ pollerPollUntil := int64(atomic.Load64(&sched.pollUntil))
+ if pollerPollUntil == 0 || pollerPollUntil > when {
+ netpollBreak()
+ }
+ }
+}
+
func resetspinning() {
_g_ := getg()
if !_g_.m.spinning {
@@ -2426,12 +2489,16 @@
// M wakeup policy is deliberately somewhat conservative, so check if we
// need to wakeup another P here. See "Worker thread parking/unparking"
// comment at the top of the file for details.
- if nmspinning == 0 && atomic.Load(&sched.npidle) > 0 {
- wakep()
- }
+ wakep()
}
-// Injects the list of runnable G's into the scheduler and clears glist.
+// injectglist adds each runnable G on the list to some run queue,
+// and clears glist. If there is no current P, they are added to the
+// global queue, and up to npidle M's are started to run them.
+// Otherwise, for each idle P, this adds a G to the global queue
+// and starts an M. Any remaining G's are added to the current P's
+// local run queue.
+// This may temporarily acquire the scheduler lock.
// Can run concurrently with GC.
func injectglist(glist *gList) {
if glist.empty() {
@@ -2442,18 +2509,52 @@
traceGoUnpark(gp, 0)
}
}
- lock(&sched.lock)
- var n int
- for n = 0; !glist.empty(); n++ {
- gp := glist.pop()
+
+ // Mark all the goroutines as runnable before we put them
+ // on the run queues.
+ head := glist.head.ptr()
+ var tail *g
+ qsize := 0
+ for gp := head; gp != nil; gp = gp.schedlink.ptr() {
+ tail = gp
+ qsize++
casgstatus(gp, _Gwaiting, _Grunnable)
- globrunqput(gp)
+ }
+
+ // Turn the gList into a gQueue.
+ var q gQueue
+ q.head.set(head)
+ q.tail.set(tail)
+ *glist = gList{}
+
+ startIdle := func(n int) {
+ for ; n != 0 && sched.npidle != 0; n-- {
+ startm(nil, false)
+ }
+ }
+
+ pp := getg().m.p.ptr()
+ if pp == nil {
+ lock(&sched.lock)
+ globrunqputbatch(&q, int32(qsize))
+ unlock(&sched.lock)
+ startIdle(qsize)
+ return
+ }
+
+ lock(&sched.lock)
+ npidle := int(sched.npidle)
+ var n int
+ for n = 0; n < npidle && !q.empty(); n++ {
+ globrunqput(q.pop())
}
unlock(&sched.lock)
- for ; n != 0 && sched.npidle != 0; n-- {
- startm(nil, false)
+ startIdle(n)
+ qsize -= n
+
+ if !q.empty() {
+ runqputbatch(pp, &q, qsize)
}
- *glist = gList{}
}
// One round of scheduler: find a runnable goroutine and execute it.
@@ -2477,14 +2578,26 @@
}
top:
+ pp := _g_.m.p.ptr()
+ pp.preempt = false
+
if sched.gcwaiting != 0 {
gcstopm()
goto top
}
- if _g_.m.p.ptr().runSafePointFn != 0 {
+ if pp.runSafePointFn != 0 {
runSafePointFn()
}
+ // Sanity check: if we are spinning, the run queue should be empty.
+ // Check this before calling checkTimers, as that might call
+ // goready to put a ready goroutine on the local run queue.
+ if _g_.m.spinning && (pp.runnext != 0 || pp.runqhead != pp.runqtail) {
+ throw("schedule: spinning with local work")
+ }
+
+ checkTimers(pp, 0)
+
var gp *g
var inheritTime bool
@@ -2516,9 +2629,8 @@
}
if gp == nil {
gp, inheritTime = runqget(_g_.m.p.ptr())
- if gp != nil && _g_.m.spinning {
- throw("schedule: spinning with local work")
- }
+ // We can see gp != nil here even if the M is spinning,
+ // if checkTimers added a local goroutine via goready.
}
if gp == nil {
gp, inheritTime = findrunnable() // blocks until work is available
@@ -2551,9 +2663,7 @@
// If about to schedule a not-normal goroutine (a GCworker or tracereader),
// wake a P if there is one.
if tryWakeP {
- if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 {
- wakep()
- }
+ wakep()
}
if gp.lockedm != 0 {
// Hands off own p to the locked m,
@@ -2579,6 +2689,90 @@
setGNoWB(&_g_.m.curg, nil)
}
+// checkTimers runs any timers for the P that are ready.
+// If now is not 0 it is the current time.
+// It returns the current time or 0 if it is not known,
+// and the time when the next timer should run or 0 if there is no next timer,
+// and reports whether it ran any timers.
+// If the time when the next timer should run is not 0,
+// it is always larger than the returned time.
+// We pass now in and out to avoid extra calls of nanotime.
+//go:yeswritebarrierrec
+func checkTimers(pp *p, now int64) (rnow, pollUntil int64, ran bool) {
+ // If there are no timers to adjust, and the first timer on
+ // the heap is not yet ready to run, then there is nothing to do.
+ if atomic.Load(&pp.adjustTimers) == 0 {
+ next := int64(atomic.Load64(&pp.timer0When))
+ if next == 0 {
+ return now, 0, false
+ }
+ if now == 0 {
+ now = nanotime()
+ }
+ if now < next {
+ // Next timer is not ready to run.
+ // But keep going if we would clear deleted timers.
+ // This corresponds to the condition below where
+ // we decide whether to call clearDeletedTimers.
+ if pp != getg().m.p.ptr() || int(atomic.Load(&pp.deletedTimers)) <= int(atomic.Load(&pp.numTimers)/4) {
+ return now, next, false
+ }
+ }
+ }
+
+ lock(&pp.timersLock)
+
+ adjusttimers(pp)
+
+ rnow = now
+ if len(pp.timers) > 0 {
+ if rnow == 0 {
+ rnow = nanotime()
+ }
+ for len(pp.timers) > 0 {
+ // Note that runtimer may temporarily unlock
+ // pp.timersLock.
+ if tw := runtimer(pp, rnow); tw != 0 {
+ if tw > 0 {
+ pollUntil = tw
+ }
+ break
+ }
+ ran = true
+ }
+ }
+
+ // If this is the local P, and there are a lot of deleted timers,
+ // clear them out. We only do this for the local P to reduce
+ // lock contention on timersLock.
+ if pp == getg().m.p.ptr() && int(atomic.Load(&pp.deletedTimers)) > len(pp.timers)/4 {
+ clearDeletedTimers(pp)
+ }
+
+ unlock(&pp.timersLock)
+
+ return rnow, pollUntil, ran
+}
+
+// shouldStealTimers reports whether we should try stealing the timers from p2.
+// We don't steal timers from a running P that is not marked for preemption,
+// on the assumption that it will run its own timers. This reduces
+// contention on the timers lock.
+func shouldStealTimers(p2 *p) bool {
+ if p2.status != _Prunning {
+ return true
+ }
+ mp := p2.m.ptr()
+ if mp == nil || mp.locks > 0 {
+ return false
+ }
+ gp := mp.curg
+ if gp == nil || gp.atomicstatus != _Grunning || !gp.preempt {
+ return false
+ }
+ return true
+}
+
func parkunlock_c(gp *g, lock unsafe.Pointer) bool {
unlock((*mutex)(lock))
return true
@@ -2636,7 +2830,7 @@
// goschedguarded is a forbidden-states-avoided version of gosched_m
func goschedguarded_m(gp *g) {
- if gp.m.locks != 0 || gp.m.mallocing != 0 || gp.m.preemptoff != "" || gp.m.p.ptr().status != _Prunning {
+ if !canPreemptM(gp.m) {
gogo(&gp.sched) // never return
}
@@ -2653,6 +2847,50 @@
goschedImpl(gp)
}
+// preemptPark parks gp and puts it in _Gpreempted.
+//
+//go:systemstack
+func preemptPark(gp *g) {
+ if trace.enabled {
+ traceGoPark(traceEvGoBlock, 0)
+ }
+ status := readgstatus(gp)
+ if status&^_Gscan != _Grunning {
+ dumpgstatus(gp)
+ throw("bad g status")
+ }
+ gp.waitreason = waitReasonPreempted
+ // Transition from _Grunning to _Gscan|_Gpreempted. We can't
+ // be in _Grunning when we dropg because then we'd be running
+ // without an M, but the moment we're in _Gpreempted,
+ // something could claim this G before we've fully cleaned it
+ // up. Hence, we set the scan bit to lock down further
+ // transitions until we can dropg.
+ casGToPreemptScan(gp, _Grunning, _Gscan|_Gpreempted)
+ dropg()
+ casfrom_Gscanstatus(gp, _Gscan|_Gpreempted, _Gpreempted)
+ schedule()
+}
+
+// goyield is like Gosched, but it:
+// - emits a GoPreempt trace event instead of a GoSched trace event
+// - puts the current G on the runq of the current P instead of the globrunq
+func goyield() {
+ checkTimeouts()
+ mcall(goyield_m)
+}
+
+func goyield_m(gp *g) {
+ if trace.enabled {
+ traceGoPreempt()
+ }
+ pp := gp.m.p.ptr()
+ casgstatus(gp, _Grunning, _Grunnable)
+ dropg()
+ runqput(pp, gp, false)
+ schedule()
+}
+
// Finishes execution of the current goroutine.
func goexit1() {
if raceenabled {
@@ -2676,6 +2914,7 @@
locked := gp.lockedm != 0
gp.lockedm = 0
_g_.m.lockedg = 0
+ gp.preemptStop = false
gp.paniconfault = false
gp._defer = nil // should be true already but just in case.
gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data.
@@ -2694,9 +2933,6 @@
gp.gcAssistBytes = 0
}
- // Note that gp's stack scan is now "valid" because it has no
- // stack.
- gp.gcscanvalid = true
dropg()
if GOARCH == "wasm" { // no threads yet on wasm
@@ -2835,7 +3071,6 @@
_g_.m.syscalltick = _g_.m.p.ptr().syscalltick
_g_.sysblocktraced = true
- _g_.m.mcache = nil
pp := _g_.m.p.ptr()
pp.m = 0
_g_.m.oldp.set(pp)
@@ -2961,9 +3196,6 @@
oldp := _g_.m.oldp.ptr()
_g_.m.oldp = 0
if exitsyscallfast(oldp) {
- if _g_.m.mcache == nil {
- throw("lost mcache")
- }
if trace.enabled {
if oldp != _g_.m.p.ptr() || _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
systemstack(traceGoStart)
@@ -3014,10 +3246,6 @@
// Call the scheduler.
mcall(exitsyscall0)
- if _g_.m.mcache == nil {
- throw("lost mcache")
- }
-
// Scheduler returned, so we're allowed to run now.
// Delete the syscallsp information that we left for
// the garbage collector during the system call.
@@ -3237,6 +3465,9 @@
})
newg.stackguard0 = newg.stack.lo + _StackGuard
newg.stackguard1 = ^uintptr(0)
+ // Clear the bottom word of the stack. We record g
+ // there on gsignal stack during VDSO on ARM and ARM64.
+ *(*uintptr)(unsafe.Pointer(newg.stack.lo)) = 0
}
return newg
}
@@ -3244,23 +3475,44 @@
// Create a new g running fn with siz bytes of arguments.
// Put it on the queue of g's waiting to run.
// The compiler turns a go statement into a call to this.
-// Cannot split the stack because it assumes that the arguments
-// are available sequentially after &fn; they would not be
-// copied if a stack split occurred.
+//
+// The stack layout of this call is unusual: it assumes that the
+// arguments to pass to fn are on the stack sequentially immediately
+// after &fn. Hence, they are logically part of newproc's argument
+// frame, even though they don't appear in its signature (and can't
+// because their types differ between call sites).
+//
+// This must be nosplit because this stack layout means there are
+// untyped arguments in newproc's argument frame. Stack copies won't
+// be able to adjust them and stack splits won't be able to copy them.
+//
//go:nosplit
func newproc(siz int32, fn *funcval) {
argp := add(unsafe.Pointer(&fn), sys.PtrSize)
gp := getg()
pc := getcallerpc()
systemstack(func() {
- newproc1(fn, (*uint8)(argp), siz, gp, pc)
+ newg := newproc1(fn, argp, siz, gp, pc)
+
+ _p_ := getg().m.p.ptr()
+ runqput(_p_, newg, true)
+
+ if mainStarted {
+ wakep()
+ }
})
}
-// Create a new g running fn with narg bytes of arguments starting
-// at argp. callerpc is the address of the go statement that created
-// this. The new g is put on the queue of g's waiting to run.
-func newproc1(fn *funcval, argp *uint8, narg int32, callergp *g, callerpc uintptr) {
+// Create a new g in state _Grunnable, starting at fn, with narg bytes
+// of arguments starting at argp. callerpc is the address of the go
+// statement that created this. The caller is responsible for adding
+// the new g to the scheduler.
+//
+// This must run on the system stack because it's the continuation of
+// newproc, which cannot split the stack.
+//
+//go:systemstack
+func newproc1(fn *funcval, argp unsafe.Pointer, narg int32, callergp *g, callerpc uintptr) *g {
_g_ := getg()
if fn == nil {
@@ -3305,7 +3557,7 @@
spArg += sys.MinFrameSize
}
if narg > 0 {
- memmove(unsafe.Pointer(spArg), unsafe.Pointer(argp), uintptr(narg))
+ memmove(unsafe.Pointer(spArg), argp, uintptr(narg))
// This is a stack-to-stack copy. If write barriers
// are enabled and the source stack is grey (the
// destination is always black), then perform a
@@ -3338,7 +3590,6 @@
if isSystemGoroutine(newg, false) {
atomic.Xadd(&sched.ngsys, +1)
}
- newg.gcscanvalid = false
casgstatus(newg, _Gdead, _Grunnable)
if _p_.goidcache == _p_.goidcacheend {
@@ -3357,12 +3608,9 @@
if trace.enabled {
traceGoCreate(newg, newg.startpc)
}
- runqput(_p_, newg, true)
-
- if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 && mainStarted {
- wakep()
- }
releasem(_g_.m)
+
+ return newg
}
// saveAncestors copies previous ancestors of the given caller g and
@@ -3909,10 +4157,12 @@
pp.wbBuf.reset()
if pp.mcache == nil {
if id == 0 {
- if getg().m.mcache == nil {
+ if mcache0 == nil {
throw("missing mcache?")
}
- pp.mcache = getg().m.mcache // bootstrap
+ // Use the bootstrap mcache0. Only one P will get
+ // mcache0: the one with ID 0.
+ pp.mcache = mcache0
} else {
pp.mcache = allocmcache()
}
@@ -3925,6 +4175,7 @@
pp.raceprocctx = raceproccreate()
}
}
+ lockInit(&pp.timersLock, lockRankTimers)
}
// destroy releases all of the resources associated with pp and
@@ -3944,6 +4195,23 @@
globrunqputhead(pp.runnext.ptr())
pp.runnext = 0
}
+ if len(pp.timers) > 0 {
+ plocal := getg().m.p.ptr()
+ // The world is stopped, but we acquire timersLock to
+ // protect against sysmon calling timeSleepUntil.
+ // This is the only case where we hold the timersLock of
+ // more than one P, so there are no deadlock concerns.
+ lock(&plocal.timersLock)
+ lock(&pp.timersLock)
+ moveTimers(plocal, pp.timers)
+ pp.timers = nil
+ pp.numTimers = 0
+ pp.adjustTimers = 0
+ pp.deletedTimers = 0
+ atomic.Store64(&pp.timer0When, 0)
+ unlock(&pp.timersLock)
+ unlock(&plocal.timersLock)
+ }
// If there's a background worker, make it runnable and put
// it on the global queue so it can clean itself up.
if gp := pp.gcBgMarkWorker.ptr(); gp != nil {
@@ -3971,11 +4239,34 @@
}
pp.deferpool[i] = pp.deferpoolbuf[i][:0]
}
+ systemstack(func() {
+ for i := 0; i < pp.mspancache.len; i++ {
+ // Safe to call since the world is stopped.
+ mheap_.spanalloc.free(unsafe.Pointer(pp.mspancache.buf[i]))
+ }
+ pp.mspancache.len = 0
+ pp.pcache.flush(&mheap_.pages)
+ })
freemcache(pp.mcache)
pp.mcache = nil
gfpurge(pp)
traceProcFree(pp)
if raceenabled {
+ if pp.timerRaceCtx != 0 {
+ // The race detector code uses a callback to fetch
+ // the proc context, so arrange for that callback
+ // to see the right thing.
+ // This hack only works because we are the only
+ // thread running.
+ mp := getg().m
+ phold := mp.p.ptr()
+ mp.p.set(pp)
+
+ racectxend(pp.timerRaceCtx)
+ pp.timerRaceCtx = 0
+
+ mp.p.set(phold)
+ }
raceprocdestroy(pp.raceprocctx)
pp.raceprocctx = 0
}
@@ -4052,7 +4343,6 @@
_g_.m.p.ptr().m = 0
}
_g_.m.p = 0
- _g_.m.mcache = nil
p := allp[0]
p.m = 0
p.status = _Pidle
@@ -4062,6 +4352,9 @@
}
}
+ // g.m.p is now set, so we no longer need mcache0 for bootstrapping.
+ mcache0 = nil
+
// release resources from unused P's
for i := nprocs; i < old; i++ {
p := allp[i]
@@ -4127,7 +4420,7 @@
func wirep(_p_ *p) {
_g_ := getg()
- if _g_.m.p != 0 || _g_.m.mcache != nil {
+ if _g_.m.p != 0 {
throw("wirep: already in go")
}
if _p_.m != 0 || _p_.status != _Pidle {
@@ -4138,7 +4431,6 @@
print("wirep: p->m=", _p_.m, "(", id, ") p->status=", _p_.status, "\n")
throw("wirep: invalid p state")
}
- _g_.m.mcache = _p_.mcache
_g_.m.p.set(_p_)
_p_.m.set(_g_.m)
_p_.status = _Prunning
@@ -4148,19 +4440,18 @@
func releasep() *p {
_g_ := getg()
- if _g_.m.p == 0 || _g_.m.mcache == nil {
+ if _g_.m.p == 0 {
throw("releasep: invalid arg")
}
_p_ := _g_.m.p.ptr()
- if _p_.m.ptr() != _g_.m || _p_.mcache != _g_.m.mcache || _p_.status != _Prunning {
- print("releasep: m=", _g_.m, " m->p=", _g_.m.p.ptr(), " p->m=", hex(_p_.m), " m->mcache=", _g_.m.mcache, " p->mcache=", _p_.mcache, " p->status=", _p_.status, "\n")
+ if _p_.m.ptr() != _g_.m || _p_.status != _Prunning {
+ print("releasep: m=", _g_.m, " m->p=", _g_.m.p.ptr(), " p->m=", hex(_p_.m), " p->status=", _p_.status, "\n")
throw("releasep: invalid p state")
}
if trace.enabled {
traceProcStop(_g_.m.p.ptr())
}
_g_.m.p = 0
- _g_.m.mcache = nil
_p_.m = 0
_p_.status = _Pidle
return _p_
@@ -4226,7 +4517,8 @@
}
s := readgstatus(gp)
switch s &^ _Gscan {
- case _Gwaiting:
+ case _Gwaiting,
+ _Gpreempted:
grunning++
case _Grunnable,
_Grunning,
@@ -4238,30 +4530,42 @@
}
unlock(&allglock)
if grunning == 0 { // possible if main goroutine calls runtime·Goexit()
+ unlock(&sched.lock) // unlock so that GODEBUG=scheddetail=1 doesn't hang
throw("no goroutines (main called runtime.Goexit) - deadlock!")
}
// Maybe jump time forward for playground.
- gp := timejump()
- if gp != nil {
- casgstatus(gp, _Gwaiting, _Grunnable)
- globrunqput(gp)
- _p_ := pidleget()
- if _p_ == nil {
- throw("checkdead: no p for timer")
+ if faketime != 0 {
+ when, _p_ := timeSleepUntil()
+ if _p_ != nil {
+ faketime = when
+ for pp := &sched.pidle; *pp != 0; pp = &(*pp).ptr().link {
+ if (*pp).ptr() == _p_ {
+ *pp = _p_.link
+ break
+ }
+ }
+ mp := mget()
+ if mp == nil {
+ // There should always be a free M since
+ // nothing is running.
+ throw("checkdead: no m for timer")
+ }
+ mp.nextp.set(_p_)
+ notewakeup(&mp.park)
+ return
}
- mp := mget()
- if mp == nil {
- // There should always be a free M since
- // nothing is running.
- throw("checkdead: no m for timer")
+ }
+
+ // There are no goroutines running, so we can look at the P's.
+ for _, _p_ := range allp {
+ if len(_p_.timers) > 0 {
+ return
}
- mp.nextp.set(_p_)
- notewakeup(&mp.park)
- return
}
getg().m.throwing = -1 // do not dump full stacks
+ unlock(&sched.lock) // unlock so that GODEBUG=scheddetail=1 doesn't hang
throw("all goroutines are asleep - deadlock!")
}
@@ -4294,47 +4598,60 @@
delay = 10 * 1000
}
usleep(delay)
+ now := nanotime()
+ next, _ := timeSleepUntil()
if debug.schedtrace <= 0 && (sched.gcwaiting != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs)) {
lock(&sched.lock)
if atomic.Load(&sched.gcwaiting) != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs) {
- atomic.Store(&sched.sysmonwait, 1)
- unlock(&sched.lock)
- // Make wake-up period small enough
- // for the sampling to be correct.
- maxsleep := forcegcperiod / 2
- shouldRelax := true
- if osRelaxMinNS > 0 {
- next := timeSleepUntil()
- now := nanotime()
- if next-now < osRelaxMinNS {
- shouldRelax = false
+ if next > now {
+ atomic.Store(&sched.sysmonwait, 1)
+ unlock(&sched.lock)
+ // Make wake-up period small enough
+ // for the sampling to be correct.
+ sleep := forcegcperiod / 2
+ if next-now < sleep {
+ sleep = next - now
}
+ shouldRelax := sleep >= osRelaxMinNS
+ if shouldRelax {
+ osRelax(true)
+ }
+ notetsleep(&sched.sysmonnote, sleep)
+ if shouldRelax {
+ osRelax(false)
+ }
+ now = nanotime()
+ next, _ = timeSleepUntil()
+ lock(&sched.lock)
+ atomic.Store(&sched.sysmonwait, 0)
+ noteclear(&sched.sysmonnote)
}
- if shouldRelax {
- osRelax(true)
- }
- notetsleep(&sched.sysmonnote, maxsleep)
- if shouldRelax {
- osRelax(false)
- }
- lock(&sched.lock)
- atomic.Store(&sched.sysmonwait, 0)
- noteclear(&sched.sysmonnote)
idle = 0
delay = 20
}
unlock(&sched.lock)
}
+ lock(&sched.sysmonlock)
+ {
+ // If we spent a long time blocked on sysmonlock
+ // then we want to update now and next since it's
+ // likely stale.
+ now1 := nanotime()
+ if now1-now > 50*1000 /* 50µs */ {
+ next, _ = timeSleepUntil()
+ }
+ now = now1
+ }
+
// trigger libc interceptors if needed
if *cgo_yield != nil {
asmcgocall(*cgo_yield, nil)
}
// poll network if not polled for more than 10ms
lastpoll := int64(atomic.Load64(&sched.lastpoll))
- now := nanotime()
if netpollinited() && lastpoll != 0 && lastpoll+10*1000*1000 < now {
atomic.Cas64(&sched.lastpoll, uint64(lastpoll), uint64(now))
- list := netpoll(false) // non-blocking - returns list of goroutines
+ list := netpoll(0) // non-blocking - returns list of goroutines
if !list.empty() {
// Need to decrement number of idle locked M's
// (pretending that one more is running) before injectglist.
@@ -4348,6 +4665,16 @@
incidlelocked(1)
}
}
+ if next < now {
+ // There are timers that should have already run,
+ // perhaps because there is an unpreemptible P.
+ // Try to start an M to run them.
+ startm(nil, false)
+ }
+ if atomic.Load(&scavenge.sysmonWake) != 0 {
+ // Kick the scavenger awake if someone requested it.
+ wakeScavenger()
+ }
// retake P's blocked in syscalls
// and preempt long running G's
if retake(now) != 0 {
@@ -4368,6 +4695,7 @@
lasttrace = now
schedtrace(debug.scheddetail > 0)
}
+ unlock(&sched.sysmonlock)
}
}
@@ -4496,6 +4824,13 @@
// Setting gp->stackguard0 to StackPreempt folds
// preemption into the normal stack overflow check.
gp.stackguard0 = stackPreempt
+
+ // Request an async preemption of this P.
+ if preemptMSupported && debug.asyncpreemptoff == 0 {
+ _p_.preempt = true
+ preemptM(mp)
+ }
+
return true
}
@@ -4524,7 +4859,7 @@
if mp != nil {
id = mp.id
}
- print(" P", i, ": status=", _p_.status, " schedtick=", _p_.schedtick, " syscalltick=", _p_.syscalltick, " m=", id, " runqsize=", t-h, " gfreecnt=", _p_.gFree.n, "\n")
+ print(" P", i, ": status=", _p_.status, " schedtick=", _p_.schedtick, " syscalltick=", _p_.syscalltick, " m=", id, " runqsize=", t-h, " gfreecnt=", _p_.gFree.n, " timerslen=", len(_p_.timers), "\n")
} else {
// In non-detailed mode format lengths of per-P run queues as:
// [len1 len2 len3 len4]
@@ -4829,6 +5164,38 @@
return true
}
+// runqputbatch tries to put all the G's on q on the local runnable queue.
+// If the queue is full, they are put on the global queue; in that case
+// this will temporarily acquire the scheduler lock.
+// Executed only by the owner P.
+func runqputbatch(pp *p, q *gQueue, qsize int) {
+ h := atomic.LoadAcq(&pp.runqhead)
+ t := pp.runqtail
+ n := uint32(0)
+ for !q.empty() && t-h < uint32(len(pp.runq)) {
+ gp := q.pop()
+ pp.runq[t%uint32(len(pp.runq))].set(gp)
+ t++
+ n++
+ }
+ qsize -= int(n)
+
+ if randomizeScheduler {
+ off := func(o uint32) uint32 {
+ return (pp.runqtail + o) % uint32(len(pp.runq))
+ }
+ for i := uint32(1); i < n; i++ {
+ j := fastrandn(i + 1)
+ pp.runq[off(i)], pp.runq[off(j)] = pp.runq[off(j)], pp.runq[off(i)]
+ }
+ }
+
+ atomic.StoreRel(&pp.runqtail, t)
+ if !q.empty() {
+ globrunqputbatch(q, int32(qsize))
+ }
+}
+
// Get g from local runnable queue.
// If inheritTime is true, gp should inherit the remaining time in the
// current time slice. Otherwise, it should start a new time slice.
@@ -5194,6 +5561,7 @@
}
// An initTask represents the set of initializations that need to be done for a package.
+// Keep in sync with ../../test/initempty.go:initTask
type initTask struct {
// TODO: pack the first 3 fields more tightly?
state uintptr // 0 = uninitialized, 1 = in progress, 2 = done
diff --git a/src/runtime/proc_test.go b/src/runtime/proc_test.go
index 6e6272e..de4dec3 100644
--- a/src/runtime/proc_test.go
+++ b/src/runtime/proc_test.go
@@ -6,6 +6,8 @@
import (
"fmt"
+ "internal/race"
+ "internal/testenv"
"math"
"net"
"runtime"
@@ -356,6 +358,17 @@
atomic.StoreUint32(&stop, 1)
}
+func TestAsyncPreempt(t *testing.T) {
+ if !runtime.PreemptMSupported {
+ t.Skip("asynchronous preemption not supported on this platform")
+ }
+ output := runTestProg(t, "testprog", "AsyncPreempt")
+ want := "OK\n"
+ if output != want {
+ t.Fatalf("want %s, got %s\n", want, output)
+ }
+}
+
func TestGCFairness(t *testing.T) {
output := runTestProg(t, "testprog", "GCFairness")
want := "OK\n"
@@ -411,6 +424,11 @@
if testing.Short() {
t.Skip("skipping in -short mode")
}
+ if race.Enabled {
+ // The race detector randomizes the scheduler,
+ // which causes this test to fail (#38266).
+ t.Skip("skipping in -race mode")
+ }
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
done := make(chan bool)
@@ -912,6 +930,29 @@
}
}
+func TestLockOSThreadTemplateThreadRace(t *testing.T) {
+ testenv.MustHaveGoRun(t)
+
+ exe, err := buildTestProg(t, "testprog")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ iterations := 100
+ if testing.Short() {
+ // Reduce run time to ~100ms, with much lower probability of
+ // catching issues.
+ iterations = 5
+ }
+ for i := 0; i < iterations; i++ {
+ want := "OK\n"
+ output := runBuiltTestProg(t, exe, "LockOSThreadTemplateThreadRace")
+ if output != want {
+ t.Fatalf("run %d: want %q, got %q", i, want, output)
+ }
+ }
+}
+
// fakeSyscall emulates a system call.
//go:nosplit
func fakeSyscall(duration time.Duration) {
@@ -981,3 +1022,61 @@
func TestGetgThreadSwitch(t *testing.T) {
runtime.RunGetgThreadSwitchTest()
}
+
+// TestNetpollBreak tests that netpollBreak can break a netpoll.
+// This test is not particularly safe since the call to netpoll
+// will pick up any stray files that are ready, but it should work
+// OK as long it is not run in parallel.
+func TestNetpollBreak(t *testing.T) {
+ if runtime.GOMAXPROCS(0) == 1 {
+ t.Skip("skipping: GOMAXPROCS=1")
+ }
+
+ // Make sure that netpoll is initialized.
+ runtime.NetpollGenericInit()
+
+ start := time.Now()
+ c := make(chan bool, 2)
+ go func() {
+ c <- true
+ runtime.Netpoll(10 * time.Second.Nanoseconds())
+ c <- true
+ }()
+ <-c
+ // Loop because the break might get eaten by the scheduler.
+ // Break twice to break both the netpoll we started and the
+ // scheduler netpoll.
+loop:
+ for {
+ runtime.Usleep(100)
+ runtime.NetpollBreak()
+ runtime.NetpollBreak()
+ select {
+ case <-c:
+ break loop
+ default:
+ }
+ }
+ if dur := time.Since(start); dur > 5*time.Second {
+ t.Errorf("netpollBreak did not interrupt netpoll: slept for: %v", dur)
+ }
+}
+
+// TestBigGOMAXPROCS tests that setting GOMAXPROCS to a large value
+// doesn't cause a crash at startup. See issue 38474.
+func TestBigGOMAXPROCS(t *testing.T) {
+ t.Parallel()
+ output := runTestProg(t, "testprog", "NonexistentTest", "GOMAXPROCS=1024")
+ // Ignore error conditions on small machines.
+ for _, errstr := range []string{
+ "failed to create new OS thread",
+ "cannot allocate memory",
+ } {
+ if strings.Contains(output, errstr) {
+ t.Skipf("failed to create 1024 threads")
+ }
+ }
+ if !strings.Contains(output, "unknown function: NonexistentTest") {
+ t.Errorf("output:\n%s\nwanted:\nunknown function: NonexistentTest", output)
+ }
+}
diff --git a/src/runtime/race.go b/src/runtime/race.go
index c41e1ba..53910f9 100644
--- a/src/runtime/race.go
+++ b/src/runtime/race.go
@@ -155,15 +155,51 @@
}
}
+// raceSymbolizeCode reads ctx.pc and populates the rest of *ctx with
+// information about the code at that pc.
+//
+// The race detector has already subtracted 1 from pcs, so they point to the last
+// byte of call instructions (including calls to runtime.racewrite and friends).
+//
+// If the incoming pc is part of an inlined function, *ctx is populated
+// with information about the inlined function, and on return ctx.pc is set
+// to a pc in the logically containing function. (The race detector should call this
+// function again with that pc.)
+//
+// If the incoming pc is not part of an inlined function, the return pc is unchanged.
func raceSymbolizeCode(ctx *symbolizeCodeContext) {
- f := findfunc(ctx.pc)._Func()
+ pc := ctx.pc
+ fi := findfunc(pc)
+ f := fi._Func()
if f != nil {
- file, line := f.FileLine(ctx.pc)
+ file, line := f.FileLine(pc)
if line != 0 {
- ctx.fn = cfuncname(f.funcInfo())
+ if inldata := funcdata(fi, _FUNCDATA_InlTree); inldata != nil {
+ inltree := (*[1 << 20]inlinedCall)(inldata)
+ for {
+ ix := pcdatavalue(fi, _PCDATA_InlTreeIndex, pc, nil)
+ if ix >= 0 {
+ if inltree[ix].funcID == funcID_wrapper {
+ // ignore wrappers
+ // Back up to an instruction in the "caller".
+ pc = f.Entry() + uintptr(inltree[ix].parentPc)
+ continue
+ }
+ ctx.pc = f.Entry() + uintptr(inltree[ix].parentPc) // "caller" pc
+ ctx.fn = cfuncnameFromNameoff(fi, inltree[ix].func_)
+ ctx.line = uintptr(line)
+ ctx.file = &bytes(file)[0] // assume NUL-terminated
+ ctx.off = pc - f.Entry()
+ ctx.res = 1
+ return
+ }
+ break
+ }
+ }
+ ctx.fn = cfuncname(fi)
ctx.line = uintptr(line)
ctx.file = &bytes(file)[0] // assume NUL-terminated
- ctx.off = ctx.pc - f.Entry()
+ ctx.off = pc - f.Entry()
ctx.res = 1
return
}
@@ -349,7 +385,7 @@
if end < firstmoduledata.ebss {
end = firstmoduledata.ebss
}
- size := round(end-start, _PageSize)
+ size := alignUp(end-start, _PageSize)
racecall(&__tsan_map_shadow, start, size, 0, 0)
racedatastart = start
racedataend = start + size
@@ -367,6 +403,9 @@
// already held it's assumed that the first caller exits the program
// so other calls can hang forever without an issue.
lock(&raceFiniLock)
+ // We're entering external code that may call ExitProcess on
+ // Windows.
+ osPreemptExtEnter(getg().m)
racecall(&__tsan_fini, 0, 0, 0, 0)
}
@@ -424,6 +463,11 @@
}
//go:nosplit
+func racectxend(racectx uintptr) {
+ racecall(&__tsan_go_end, racectx, 0, 0, 0)
+}
+
+//go:nosplit
func racewriterangepc(addr unsafe.Pointer, sz, callpc, pc uintptr) {
_g_ := getg()
if _g_ != _g_.m.curg {
@@ -471,6 +515,14 @@
}
//go:nosplit
+func raceacquirectx(racectx uintptr, addr unsafe.Pointer) {
+ if !isvalidaddr(addr) {
+ return
+ }
+ racecall(&__tsan_acquire, racectx, uintptr(addr), 0, 0)
+}
+
+//go:nosplit
func racerelease(addr unsafe.Pointer) {
racereleaseg(getg(), addr)
}
diff --git a/src/runtime/race/README b/src/runtime/race/README
index be53b4c..65378c8 100644
--- a/src/runtime/race/README
+++ b/src/runtime/race/README
@@ -1,13 +1,13 @@
runtime/race package contains the data race detector runtime library.
It is based on ThreadSanitizer race detector, that is currently a part of
-the LLVM project (http://llvm.org/git/compiler-rt.git).
+the LLVM project (https://github.com/llvm/llvm-project/tree/master/compiler-rt).
To update the .syso files use golang.org/x/build/cmd/racebuild.
-race_darwin_amd64.syso built with LLVM fe2c72c59aa7f4afa45e3f65a5d16a374b6cce26 and Go 323c85862a7afbde66a3bba0776bf4ba6cd7c030.
-race_freebsd_amd64.syso built with LLVM fe2c72c59aa7f4afa45e3f65a5d16a374b6cce26 and Go 323c85862a7afbde66a3bba0776bf4ba6cd7c030.
-race_linux_amd64.syso built with LLVM fe2c72c59aa7f4afa45e3f65a5d16a374b6cce26 and Go 323c85862a7afbde66a3bba0776bf4ba6cd7c030.
-race_linux_ppc64le.syso built with LLVM fe2c72c59aa7f4afa45e3f65a5d16a374b6cce26 and Go 323c85862a7afbde66a3bba0776bf4ba6cd7c030.
-race_netbsd_amd64.syso built with LLVM fe2c72c59aa7f4afa45e3f65a5d16a374b6cce26 and Go 323c85862a7afbde66a3bba0776bf4ba6cd7c030.
-race_windows_amd64.syso built with LLVM ae08a22cc215448aa3ad5a6fb099f6df77e9fa01 and Go 323c85862a7afbde66a3bba0776bf4ba6cd7c030.
-race_linux_arm64.syso built with LLVM 3aa2b775d08f903f804246af10b80a439c16b436 and Go ef2c48659880c7e8a989e6721a21f018790f7793.
+race_darwin_amd64.syso built with LLVM 3496d6e4bea9cb99cb382939b7e79a50a3b863a5 and Go 553e003414d3aa90cc39830ee22f08453d9f3408.
+race_freebsd_amd64.syso built with LLVM 3496d6e4bea9cb99cb382939b7e79a50a3b863a5 and Go 553e003414d3aa90cc39830ee22f08453d9f3408.
+race_linux_amd64.syso built with LLVM 3496d6e4bea9cb99cb382939b7e79a50a3b863a5 and Go 553e003414d3aa90cc39830ee22f08453d9f3408.
+race_linux_ppc64le.syso built with LLVM 3496d6e4bea9cb99cb382939b7e79a50a3b863a5 and Go 553e003414d3aa90cc39830ee22f08453d9f3408.
+race_netbsd_amd64.syso built with LLVM 3496d6e4bea9cb99cb382939b7e79a50a3b863a5 and Go 553e003414d3aa90cc39830ee22f08453d9f3408.
+race_windows_amd64.syso built with LLVM 3496d6e4bea9cb99cb382939b7e79a50a3b863a5 and Go 553e003414d3aa90cc39830ee22f08453d9f3408.
+race_linux_arm64.syso built with LLVM 3496d6e4bea9cb99cb382939b7e79a50a3b863a5 and Go 553e003414d3aa90cc39830ee22f08453d9f3408.
diff --git a/src/runtime/race/output_test.go b/src/runtime/race/output_test.go
index 019ad58..d3e7762 100644
--- a/src/runtime/race/output_test.go
+++ b/src/runtime/race/output_test.go
@@ -24,7 +24,7 @@
t.Fatal(err)
}
defer os.RemoveAll(pkgdir)
- out, err := exec.Command(testenv.GoToolPath(t), "install", "-race", "-pkgdir="+pkgdir, "-gcflags=all=-l", "testing").CombinedOutput()
+ out, err := exec.Command(testenv.GoToolPath(t), "install", "-race", "-pkgdir="+pkgdir, "testing").CombinedOutput()
if err != nil {
t.Fatalf("go install -race: %v\n%s", err, out)
}
@@ -56,8 +56,8 @@
if err := f.Close(); err != nil {
t.Fatalf("failed to close file: %v", err)
}
- // Pass -l to the compiler to test stack traces.
- cmd := exec.Command(testenv.GoToolPath(t), test.run, "-race", "-pkgdir="+pkgdir, "-gcflags=all=-l", src)
+
+ cmd := exec.Command(testenv.GoToolPath(t), test.run, "-race", "-pkgdir="+pkgdir, src)
// GODEBUG spoils program output, GOMAXPROCS makes it flaky.
for _, env := range os.Environ() {
if strings.HasPrefix(env, "GODEBUG=") ||
@@ -218,6 +218,52 @@
main\.main\.func1\(\)
.*/main.go:7`},
+ // Test for https://golang.org/issue/33309
+ {"midstack_inlining_traceback", "run", "linux", "atexit_sleep_ms=0", `
+package main
+
+var x int
+
+func main() {
+ c := make(chan int)
+ go f(c)
+ x = 1
+ <-c
+}
+
+func f(c chan int) {
+ g(c)
+}
+
+func g(c chan int) {
+ h(c)
+}
+
+func h(c chan int) {
+ c <- x
+}
+`, `==================
+WARNING: DATA RACE
+Read at 0x[0-9,a-f]+ by goroutine [0-9]:
+ main\.h\(\)
+ .+/main\.go:22 \+0x[0-9,a-f]+
+ main\.g\(\)
+ .+/main\.go:18 \+0x[0-9,a-f]+
+ main\.f\(\)
+ .+/main\.go:14 \+0x[0-9,a-f]+
+
+Previous write at 0x[0-9,a-f]+ by main goroutine:
+ main\.main\(\)
+ .+/main\.go:9 \+0x[0-9,a-f]+
+
+Goroutine [0-9] \(running\) created at:
+ main\.main\(\)
+ .+/main\.go:8 \+0x[0-9,a-f]+
+==================
+Found 1 data race\(s\)
+exit status 66
+`},
+
// Test for https://golang.org/issue/17190
{"external_cgo_thread", "run", "linux", "atexit_sleep_ms=0", `
package main
diff --git a/src/runtime/race/race.go b/src/runtime/race/race.go
index d298e80..c894de5 100644
--- a/src/runtime/race/race.go
+++ b/src/runtime/race/race.go
@@ -7,7 +7,7 @@
package race
// This file merely ensures that we link in runtime/cgo in race build,
-// this is turn ensures that runtime uses pthread_create to create threads.
+// this in turn ensures that runtime uses pthread_create to create threads.
// The prebuilt race runtime lives in race_GOOS_GOARCH.syso.
// Calls to the runtime are done directly from src/runtime/race.go.
diff --git a/src/runtime/race/race_darwin_amd64.syso b/src/runtime/race/race_darwin_amd64.syso
index 0e4017b..d03a593 100644
--- a/src/runtime/race/race_darwin_amd64.syso
+++ b/src/runtime/race/race_darwin_amd64.syso
Binary files differ
diff --git a/src/runtime/race/race_freebsd_amd64.syso b/src/runtime/race/race_freebsd_amd64.syso
index fcae118..573591c 100644
--- a/src/runtime/race/race_freebsd_amd64.syso
+++ b/src/runtime/race/race_freebsd_amd64.syso
Binary files differ
diff --git a/src/runtime/race/race_linux_amd64.syso b/src/runtime/race/race_linux_amd64.syso
index c18e2a0..255b2e5 100644
--- a/src/runtime/race/race_linux_amd64.syso
+++ b/src/runtime/race/race_linux_amd64.syso
Binary files differ
diff --git a/src/runtime/race/race_linux_arm64.syso b/src/runtime/race/race_linux_arm64.syso
index 65bc1ec..f15c599 100644
--- a/src/runtime/race/race_linux_arm64.syso
+++ b/src/runtime/race/race_linux_arm64.syso
Binary files differ
diff --git a/src/runtime/race/race_linux_ppc64le.syso b/src/runtime/race/race_linux_ppc64le.syso
index a3609db..2bf5029 100644
--- a/src/runtime/race/race_linux_ppc64le.syso
+++ b/src/runtime/race/race_linux_ppc64le.syso
Binary files differ
diff --git a/src/runtime/race/race_netbsd_amd64.syso b/src/runtime/race/race_netbsd_amd64.syso
index 3937a61..54e276b 100644
--- a/src/runtime/race/race_netbsd_amd64.syso
+++ b/src/runtime/race/race_netbsd_amd64.syso
Binary files differ
diff --git a/src/runtime/race/race_windows_amd64.syso b/src/runtime/race/race_windows_amd64.syso
index 1f1dd17..abaf426 100644
--- a/src/runtime/race/race_windows_amd64.syso
+++ b/src/runtime/race/race_windows_amd64.syso
Binary files differ
diff --git a/src/runtime/race/testdata/chan_test.go b/src/runtime/race/testdata/chan_test.go
index 60e55ed..3e57b82 100644
--- a/src/runtime/race/testdata/chan_test.go
+++ b/src/runtime/race/testdata/chan_test.go
@@ -737,3 +737,29 @@
case <-make(chan int):
}
}
+
+// Test that close synchronizes with a read from the empty closed channel.
+// See https://golang.org/issue/36714.
+func TestNoRaceCloseHappensBeforeRead(t *testing.T) {
+ for i := 0; i < 100; i++ {
+ var loc int
+ var write = make(chan struct{})
+ var read = make(chan struct{})
+
+ go func() {
+ select {
+ case <-write:
+ _ = loc
+ default:
+ }
+ close(read)
+ }()
+
+ go func() {
+ loc = 1
+ close(write)
+ }()
+
+ <-read
+ }
+}
diff --git a/src/runtime/race/testdata/select_test.go b/src/runtime/race/testdata/select_test.go
index 3827867..9a43f9b 100644
--- a/src/runtime/race/testdata/select_test.go
+++ b/src/runtime/race/testdata/select_test.go
@@ -20,7 +20,7 @@
x = 1
// At least two channels are needed because
// otherwise the compiler optimizes select out.
- // See comment in runtime/select.go:^func selectgoImpl.
+ // See comment in runtime/select.go:^func selectgo.
select {
case c <- true:
case c1 <- true:
diff --git a/src/runtime/race/testdata/slice_test.go b/src/runtime/race/testdata/slice_test.go
index 1ec5243..9009a9a 100644
--- a/src/runtime/race/testdata/slice_test.go
+++ b/src/runtime/race/testdata/slice_test.go
@@ -5,6 +5,7 @@
package race_test
import (
+ "sync"
"testing"
)
@@ -590,3 +591,18 @@
_ = x[:1:i]
<-done
}
+
+var saved string
+
+func TestRaceSlice4(t *testing.T) {
+ // See issue 36794.
+ data := []byte("hello there")
+ var wg sync.WaitGroup
+ wg.Add(1)
+ go func() {
+ _ = string(data)
+ wg.Done()
+ }()
+ copy(data, data[2:])
+ wg.Wait()
+}
diff --git a/src/runtime/race/timer_test.go b/src/runtime/race/timer_test.go
new file mode 100644
index 0000000..a6c34a8
--- /dev/null
+++ b/src/runtime/race/timer_test.go
@@ -0,0 +1,33 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build race
+
+package race_test
+
+import (
+ "sync"
+ "testing"
+ "time"
+)
+
+func TestTimers(t *testing.T) {
+ const goroutines = 8
+ var wg sync.WaitGroup
+ wg.Add(goroutines)
+ var mu sync.Mutex
+ for i := 0; i < goroutines; i++ {
+ go func() {
+ defer wg.Done()
+ ticker := time.NewTicker(1)
+ defer ticker.Stop()
+ for c := 0; c < 1000; c++ {
+ <-ticker.C
+ mu.Lock()
+ mu.Unlock()
+ }
+ }()
+ }
+ wg.Wait()
+}
diff --git a/src/runtime/race0.go b/src/runtime/race0.go
index f1d3706..6f26afa 100644
--- a/src/runtime/race0.go
+++ b/src/runtime/race0.go
@@ -29,6 +29,7 @@
func racewriterangepc(addr unsafe.Pointer, sz, callerpc, pc uintptr) { throw("race") }
func raceacquire(addr unsafe.Pointer) { throw("race") }
func raceacquireg(gp *g, addr unsafe.Pointer) { throw("race") }
+func raceacquirectx(racectx uintptr, addr unsafe.Pointer) { throw("race") }
func racerelease(addr unsafe.Pointer) { throw("race") }
func racereleaseg(gp *g, addr unsafe.Pointer) { throw("race") }
func racereleasemerge(addr unsafe.Pointer) { throw("race") }
@@ -38,3 +39,4 @@
func racefree(p unsafe.Pointer, sz uintptr) { throw("race") }
func racegostart(pc uintptr) uintptr { throw("race"); return 0 }
func racegoend() { throw("race") }
+func racectxend(racectx uintptr) { throw("race") }
diff --git a/src/runtime/race_amd64.s b/src/runtime/race_amd64.s
index 4ed9533..758d543 100644
--- a/src/runtime/race_amd64.s
+++ b/src/runtime/race_amd64.s
@@ -416,9 +416,11 @@
// Set g = g0.
get_tls(R12)
MOVQ g(R12), R13
- MOVQ g_m(R13), R13
- MOVQ m_g0(R13), R14
- MOVQ R14, g(R12) // g = m->g0
+ MOVQ g_m(R13), R14
+ MOVQ m_g0(R14), R15
+ CMPQ R13, R15
+ JEQ noswitch // branch if already on g0
+ MOVQ R15, g(R12) // g = m->g0
PUSHQ RARG1 // func arg
PUSHQ RARG0 // func arg
CALL runtime·racecallback(SB)
@@ -430,6 +432,7 @@
MOVQ g_m(R13), R13
MOVQ m_curg(R13), R14
MOVQ R14, g(R12) // g = m->curg
+ret:
// Restore callee-saved registers.
POPQ R15
POPQ R14
@@ -440,3 +443,12 @@
POPQ BP
POPQ BX
RET
+
+noswitch:
+ // already on g0
+ PUSHQ RARG1 // func arg
+ PUSHQ RARG0 // func arg
+ CALL runtime·racecallback(SB)
+ POPQ R12
+ POPQ R12
+ JMP ret
diff --git a/src/runtime/race_arm64.s b/src/runtime/race_arm64.s
index 48c719a..9b909ac 100644
--- a/src/runtime/race_arm64.s
+++ b/src/runtime/race_arm64.s
@@ -421,8 +421,7 @@
// First, code below assumes that we are on curg, while raceGetProcCmd
// can be executed on g0. Second, it is called frequently, so will
// benefit from this fast path.
- CMP $0, R0
- BNE rest
+ CBNZ R0, rest
MOVD g, R13
load_g
MOVD g_m(g), R0
@@ -434,13 +433,13 @@
rest:
// Save callee-saved registers (Go code won't respect that).
// 8(RSP) and 16(RSP) are for args passed through racecallback
- SUB $96, RSP
+ SUB $112, RSP
MOVD LR, 0(RSP)
STP (R19, R20), 24(RSP)
STP (R21, R22), 40(RSP)
STP (R23, R24), 56(RSP)
STP (R25, R26), 72(RSP)
- MOVD R27, 88(RSP)
+ STP (R27, g), 88(RSP)
// Set g = g0.
// load_g will clobber R0, Save R0
MOVD R0, R13
@@ -448,7 +447,10 @@
// restore R0
MOVD R13, R0
MOVD g_m(g), R13
- MOVD m_g0(R13), g
+ MOVD m_g0(R13), R14
+ CMP R14, g
+ BEQ noswitch // branch if already on g0
+ MOVD R14, g
MOVD R0, 8(RSP) // func arg
MOVD R1, 16(RSP) // func arg
@@ -457,15 +459,23 @@
// All registers are smashed after Go code, reload.
MOVD g_m(g), R13
MOVD m_curg(R13), g // g = m->curg
+ret:
// Restore callee-saved registers.
MOVD 0(RSP), LR
LDP 24(RSP), (R19, R20)
LDP 40(RSP), (R21, R22)
LDP 56(RSP), (R23, R24)
LDP 72(RSP), (R25, R26)
- MOVD 88(RSP), R27
- ADD $96, RSP
+ LDP 88(RSP), (R27, g)
+ ADD $112, RSP
JMP (LR)
+noswitch:
+ // already on g0
+ MOVD R0, 8(RSP) // func arg
+ MOVD R1, 16(RSP) // func arg
+ BL runtime·racecallback(SB)
+ JMP ret
+
// tls_g, g value for each thread in TLS
GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8
diff --git a/src/runtime/race_ppc64le.s b/src/runtime/race_ppc64le.s
index 79b8ba2..7421d53 100644
--- a/src/runtime/race_ppc64le.s
+++ b/src/runtime/race_ppc64le.s
@@ -8,6 +8,7 @@
#include "go_tls.h"
#include "funcdata.h"
#include "textflag.h"
+#include "asm_ppc64x.h"
// The following functions allow calling the clang-compiled race runtime directly
// from Go code without going all the way through cgo.
@@ -101,7 +102,7 @@
MOVD $__tsan_read_range(SB), R8
BR racecalladdr<>(SB)
-TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-24
+TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16
BR runtime·racereadrange(SB)
// func runtime·RaceWriteRange(addr, size uintptr)
@@ -467,9 +468,9 @@
MOVD R10, 16(R1)
MOVW CR, R10
MOVW R10, 8(R1)
- MOVDU R1, -336(R1) // Allocate frame needed for register save area
+ MOVDU R1, -336(R1) // Allocate frame needed for outargs and register save area
- MOVD R14, 40(R1)
+ MOVD R14, 328(R1)
MOVD R15, 48(R1)
MOVD R16, 56(R1)
MOVD R17, 64(R1)
@@ -506,21 +507,30 @@
FMOVD F30, 312(R1)
FMOVD F31, 320(R1)
+ MOVD R3, FIXED_FRAME+0(R1)
+ MOVD R4, FIXED_FRAME+8(R1)
+
MOVD runtime·tls_g(SB), R10
MOVD 0(R13)(R10*1), g
MOVD g_m(g), R7
- MOVD m_g0(R7), g // set g = m-> g0
- MOVD R3, cmd+0(FP) // can't use R1 here ?? use input args and assumer caller expects those?
- MOVD R4, ctx+8(FP) // can't use R1 here ??
+ MOVD m_g0(R7), R8
+ CMP g, R8
+ BEQ noswitch
+
+ MOVD R8, g // set g = m-> g0
+
BL runtime·racecallback(SB)
+
// All registers are clobbered after Go code, reload.
MOVD runtime·tls_g(SB), R10
MOVD 0(R13)(R10*1), g
MOVD g_m(g), R7
MOVD m_curg(R7), g // restore g = m->curg
- MOVD 40(R1), R14
+
+ret:
+ MOVD 328(R1), R14
MOVD 48(R1), R15
MOVD 56(R1), R16
MOVD 64(R1), R17
@@ -564,5 +574,9 @@
MOVD R10, LR
RET
+noswitch:
+ BL runtime·racecallback(SB)
+ JMP ret
+
// tls_g, g value for each thread in TLS
GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8
diff --git a/src/runtime/rt0_darwin_386.s b/src/runtime/rt0_darwin_386.s
deleted file mode 100644
index a8d3a79..0000000
--- a/src/runtime/rt0_darwin_386.s
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-TEXT _rt0_386_darwin(SB),NOSPLIT,$0
- JMP _rt0_386(SB)
-
-TEXT _rt0_386_darwin_lib(SB),NOSPLIT,$0
- JMP _rt0_386_lib(SB)
-
-TEXT main(SB),NOSPLIT,$0
- // Remove the return address from the stack.
- // rt0_go doesn't expect it to be there.
- ADDL $4, SP
- JMP runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_darwin_arm.s b/src/runtime/rt0_darwin_arm.s
deleted file mode 100644
index 71fbe5f..0000000
--- a/src/runtime/rt0_darwin_arm.s
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-TEXT _rt0_arm_darwin(SB),7,$0
- B _rt0_asm(SB)
-
-TEXT _rt0_arm_darwin_lib(SB),NOSPLIT,$0
- B _rt0_arm_lib(SB)
diff --git a/src/runtime/rt0_freebsd_arm64.s b/src/runtime/rt0_freebsd_arm64.s
new file mode 100644
index 0000000..3a348c3
--- /dev/null
+++ b/src/runtime/rt0_freebsd_arm64.s
@@ -0,0 +1,106 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// On FreeBSD argc/argv are passed in R0, not RSP
+TEXT _rt0_arm64_freebsd(SB),NOSPLIT|NOFRAME,$0
+ ADD $8, R0, R1 // argv
+ MOVD 0(R0), R0 // argc
+ BL main(SB)
+
+// When building with -buildmode=c-shared, this symbol is called when the shared
+// library is loaded.
+TEXT _rt0_arm64_freebsd_lib(SB),NOSPLIT,$184
+ // Preserve callee-save registers.
+ MOVD R19, 24(RSP)
+ MOVD R20, 32(RSP)
+ MOVD R21, 40(RSP)
+ MOVD R22, 48(RSP)
+ MOVD R23, 56(RSP)
+ MOVD R24, 64(RSP)
+ MOVD R25, 72(RSP)
+ MOVD R26, 80(RSP)
+ MOVD R27, 88(RSP)
+ FMOVD F8, 96(RSP)
+ FMOVD F9, 104(RSP)
+ FMOVD F10, 112(RSP)
+ FMOVD F11, 120(RSP)
+ FMOVD F12, 128(RSP)
+ FMOVD F13, 136(RSP)
+ FMOVD F14, 144(RSP)
+ FMOVD F15, 152(RSP)
+ MOVD g, 160(RSP)
+
+ // Initialize g as null in case of using g later e.g. sigaction in cgo_sigaction.go
+ MOVD ZR, g
+
+ MOVD R0, _rt0_arm64_freebsd_lib_argc<>(SB)
+ MOVD R1, _rt0_arm64_freebsd_lib_argv<>(SB)
+
+ // Synchronous initialization.
+ MOVD $runtime·libpreinit(SB), R4
+ BL (R4)
+
+ // Create a new thread to do the runtime initialization and return.
+ MOVD _cgo_sys_thread_create(SB), R4
+ CMP $0, R4
+ BEQ nocgo
+ MOVD $_rt0_arm64_freebsd_lib_go(SB), R0
+ MOVD $0, R1
+ SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved.
+ BL (R4)
+ ADD $16, RSP
+ B restore
+
+nocgo:
+ MOVD $0x800000, R0 // stacksize = 8192KB
+ MOVD $_rt0_arm64_freebsd_lib_go(SB), R1
+ MOVD R0, 8(RSP)
+ MOVD R1, 16(RSP)
+ MOVD $runtime·newosproc0(SB),R4
+ BL (R4)
+
+restore:
+ // Restore callee-save registers.
+ MOVD 24(RSP), R19
+ MOVD 32(RSP), R20
+ MOVD 40(RSP), R21
+ MOVD 48(RSP), R22
+ MOVD 56(RSP), R23
+ MOVD 64(RSP), R24
+ MOVD 72(RSP), R25
+ MOVD 80(RSP), R26
+ MOVD 88(RSP), R27
+ FMOVD 96(RSP), F8
+ FMOVD 104(RSP), F9
+ FMOVD 112(RSP), F10
+ FMOVD 120(RSP), F11
+ FMOVD 128(RSP), F12
+ FMOVD 136(RSP), F13
+ FMOVD 144(RSP), F14
+ FMOVD 152(RSP), F15
+ MOVD 160(RSP), g
+ RET
+
+TEXT _rt0_arm64_freebsd_lib_go(SB),NOSPLIT,$0
+ MOVD _rt0_arm64_freebsd_lib_argc<>(SB), R0
+ MOVD _rt0_arm64_freebsd_lib_argv<>(SB), R1
+ MOVD $runtime·rt0_go(SB),R4
+ B (R4)
+
+DATA _rt0_arm64_freebsd_lib_argc<>(SB)/8, $0
+GLOBL _rt0_arm64_freebsd_lib_argc<>(SB),NOPTR, $8
+DATA _rt0_arm64_freebsd_lib_argv<>(SB)/8, $0
+GLOBL _rt0_arm64_freebsd_lib_argv<>(SB),NOPTR, $8
+
+
+TEXT main(SB),NOSPLIT|NOFRAME,$0
+ MOVD $runtime·rt0_go(SB), R2
+ BL (R2)
+exit:
+ MOVD $0, R0
+ MOVD $1, R8 // SYS_exit
+ SVC
+ B exit
diff --git a/src/runtime/rt0_js_wasm.s b/src/runtime/rt0_js_wasm.s
index b22c46e..714582a 100644
--- a/src/runtime/rt0_js_wasm.s
+++ b/src/runtime/rt0_js_wasm.s
@@ -19,7 +19,7 @@
// R0: argc (i32)
// R1: argv (i32)
TEXT wasm_export_run(SB),NOSPLIT,$0
- MOVD $runtime·wasmStack+m0Stack__size(SB), SP
+ MOVD $runtime·wasmStack+(m0Stack__size-16)(SB), SP
Get SP
Get R0 // argc
diff --git a/src/runtime/rt0_linux_arm64.s b/src/runtime/rt0_linux_arm64.s
index a6bc99d..f48a8d6 100644
--- a/src/runtime/rt0_linux_arm64.s
+++ b/src/runtime/rt0_linux_arm64.s
@@ -44,8 +44,7 @@
// Create a new thread to do the runtime initialization and return.
MOVD _cgo_sys_thread_create(SB), R4
- CMP $0, R4
- BEQ nocgo
+ CBZ R4, nocgo
MOVD $_rt0_arm64_linux_lib_go(SB), R0
MOVD $0, R1
SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved.
diff --git a/src/runtime/rt0_linux_riscv64.s b/src/runtime/rt0_linux_riscv64.s
new file mode 100644
index 0000000..f31f7f7
--- /dev/null
+++ b/src/runtime/rt0_linux_riscv64.s
@@ -0,0 +1,14 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT _rt0_riscv64_linux(SB),NOSPLIT|NOFRAME,$0
+ MOV 0(X2), A0 // argc
+ ADD $8, X2, A1 // argv
+ JMP main(SB)
+
+TEXT main(SB),NOSPLIT|NOFRAME,$0
+ MOV $runtime·rt0_go(SB), T0
+ JALR ZERO, T0
diff --git a/src/runtime/rt0_nacl_386.s b/src/runtime/rt0_nacl_386.s
deleted file mode 100644
index 4c99002..0000000
--- a/src/runtime/rt0_nacl_386.s
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-// NaCl entry has:
-// 0(FP) - arg block == SP+8
-// 4(FP) - cleanup function pointer, always 0
-// 8(FP) - envc
-// 12(FP) - argc
-// 16(FP) - argv, then 0, then envv, then 0, then auxv
-TEXT _rt0_386_nacl(SB),NOSPLIT,$8
- MOVL argc+12(FP), AX
- LEAL argv+16(FP), BX
- MOVL AX, 0(SP)
- MOVL BX, 4(SP)
- JMP runtime·rt0_go(SB)
-
-TEXT main(SB),NOSPLIT,$0
- // Remove the return address from the stack.
- // rt0_go doesn't expect it to be there.
- ADDL $4, SP
- JMP runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_nacl_amd64p32.s b/src/runtime/rt0_nacl_amd64p32.s
deleted file mode 100644
index 38583c5..0000000
--- a/src/runtime/rt0_nacl_amd64p32.s
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-// NaCl entry on 32-bit x86 has DI pointing at the arg block, which contains:
-//
-// 0(DI) - cleanup function pointer, always 0
-// 4(DI) - envc
-// 8(DI) - argc
-// 12(DI) - argv, then 0, then envv, then 0, then auxv
-// NaCl entry here is almost the same, except that there
-// is no saved caller PC, so 0(FP) is -8(FP) and so on.
-TEXT _rt0_amd64p32_nacl(SB),NOSPLIT,$16
- MOVL DI, 0(SP)
- CALL runtime·nacl_sysinfo(SB)
- MOVL 0(SP), DI
- MOVL 8(DI), AX
- LEAL 12(DI), BX
- MOVL AX, 0(SP)
- MOVL BX, 4(SP)
- CALL main(SB)
- INT $3
-
-TEXT main(SB),NOSPLIT,$0
- // Uncomment for fake time like on Go Playground.
- //MOVQ $1257894000000000000, AX
- //MOVQ AX, runtime·faketime(SB)
- JMP runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_nacl_arm.s b/src/runtime/rt0_nacl_arm.s
deleted file mode 100644
index a52c0d8..0000000
--- a/src/runtime/rt0_nacl_arm.s
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-// NaCl entry has:
-// 0(FP) - 0
-// 4(FP) - cleanup function pointer, always 0
-// 8(FP) - envc
-// 12(FP) - argc
-// 16(FP) - argv, then 0, then envv, then 0, then auxv
-TEXT _rt0_arm_nacl(SB),NOSPLIT|NOFRAME,$0
- MOVW 8(R13), R0
- MOVW $12(R13), R1
- B runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_openbsd_arm64.s b/src/runtime/rt0_openbsd_arm64.s
index ab8ea97..12408f2 100644
--- a/src/runtime/rt0_openbsd_arm64.s
+++ b/src/runtime/rt0_openbsd_arm64.s
@@ -4,6 +4,12 @@
#include "textflag.h"
+// See comment in runtime/sys_openbsd_arm64.s re this construction.
+#define INVOKE_SYSCALL \
+ SVC; \
+ NOOP; \
+ NOOP
+
TEXT _rt0_arm64_openbsd(SB),NOSPLIT|NOFRAME,$0
MOVD 0(RSP), R0 // argc
ADD $8, RSP, R1 // argv
@@ -101,5 +107,5 @@
exit:
MOVD $0, R0
MOVD $1, R8 // sys_exit
- SVC
+ INVOKE_SYSCALL
B exit
diff --git a/src/runtime/runtime-gdb.py b/src/runtime/runtime-gdb.py
index 6139f99..8d96dfb 100644
--- a/src/runtime/runtime-gdb.py
+++ b/src/runtime/runtime-gdb.py
@@ -18,6 +18,7 @@
from __future__ import print_function
import re
import sys
+import gdb
print("Loading Go Runtime support.", file=sys.stderr)
#http://python3porting.com/differences.html
@@ -98,11 +99,11 @@
# Pretty Printers
#
-
+# The patterns for matching types are permissive because gdb 8.2 switched to matching on (we think) typedef names instead of C syntax names.
class StringTypePrinter:
"Pretty print Go strings."
- pattern = re.compile(r'^struct string( \*)?$')
+ pattern = re.compile(r'^(struct string( \*)?|string)$')
def __init__(self, val):
self.val = val
@@ -118,7 +119,7 @@
class SliceTypePrinter:
"Pretty print slices."
- pattern = re.compile(r'^struct \[\]')
+ pattern = re.compile(r'^(struct \[\]|\[\])')
def __init__(self, val):
self.val = val
@@ -127,7 +128,10 @@
return 'array'
def to_string(self):
- return str(self.val.type)[6:] # skip 'struct '
+ t = str(self.val.type)
+ if (t.startswith("struct ")):
+ return t[len("struct "):]
+ return t
def children(self):
sval = SliceValue(self.val)
@@ -195,7 +199,7 @@
to inspect their contents with this pretty printer.
"""
- pattern = re.compile(r'^struct hchan<.*>$')
+ pattern = re.compile(r'^chan ')
def __init__(self, val):
self.val = val
@@ -209,7 +213,7 @@
def children(self):
# see chan.c chanbuf(). et is the type stolen from hchan<T>::recvq->first->elem
et = [x.type for x in self.val['recvq']['first'].type.target().fields() if x.name == 'elem'][0]
- ptr = (self.val.address + 1).cast(et.pointer())
+ ptr = (self.val.address["buf"]).cast(et)
for i in range(self.val["qcount"]):
j = (self.val["recvx"] + i) % self.val["dataqsiz"]
yield ('[{0}]'.format(i), (ptr + j).dereference())
@@ -229,8 +233,6 @@
return matcher
goobjfile.pretty_printers.extend([makematcher(var) for var in vars().values() if hasattr(var, 'pattern')])
-
-
#
# Utilities
#
diff --git a/src/runtime/runtime-gdb_test.go b/src/runtime/runtime-gdb_test.go
index de1bac6..e52bd1c 100644
--- a/src/runtime/runtime-gdb_test.go
+++ b/src/runtime/runtime-gdb_test.go
@@ -19,6 +19,12 @@
"testing"
)
+// NOTE: In some configurations, GDB will segfault when sent a SIGWINCH signal.
+// Some runtime tests send SIGWINCH to the entire process group, so those tests
+// must never run in parallel with GDB tests.
+//
+// See issue 39021 and https://sourceware.org/bugzilla/show_bug.cgi?id=26056.
+
func checkGdbEnvironment(t *testing.T) {
testenv.MustHaveGoBuild(t)
switch runtime.GOOS {
@@ -37,6 +43,12 @@
}
case "freebsd":
t.Skip("skipping gdb tests on FreeBSD; see https://golang.org/issue/29508")
+ case "aix":
+ if testing.Short() {
+ t.Skip("skipping gdb tests on AIX; see https://golang.org/issue/35710")
+ }
+ case "plan9":
+ t.Skip("there is no gdb on Plan 9")
}
if final := os.Getenv("GOROOT_FINAL"); final != "" && runtime.GOROOT() != final {
t.Skip("gdb test can fail with GOROOT_FINAL pending")
@@ -66,8 +78,8 @@
}
func checkGdbPython(t *testing.T) {
- if runtime.GOOS == "solaris" && testenv.Builder() != "solaris-amd64-smartosbuildlet" {
- t.Skip("skipping gdb python tests on solaris; see golang.org/issue/20821")
+ if runtime.GOOS == "solaris" || runtime.GOOS == "illumos" {
+ t.Skip("skipping gdb python tests on illumos and solaris; see golang.org/issue/20821")
}
cmd := exec.Command("gdb", "-nx", "-q", "--batch", "-iex", "python import sys; print('go gdb python support')")
@@ -103,16 +115,26 @@
var gslice []string
func main() {
mapvar := make(map[string]string, 13)
+ slicemap := make(map[string][]string,11)
+ chanint := make(chan int, 10)
+ chanstr := make(chan string, 10)
+ chanint <- 99
+ chanint <- 11
+ chanstr <- "spongepants"
+ chanstr <- "squarebob"
mapvar["abc"] = "def"
mapvar["ghi"] = "jkl"
+ slicemap["a"] = []string{"b","c","d"}
+ slicemap["e"] = []string{"f","g","h"}
strvar := "abc"
ptrvar := &strvar
slicevar := make([]string, 0, 16)
slicevar = append(slicevar, mapvar["abc"])
fmt.Println("hi")
runtime.KeepAlive(ptrvar)
- _ = ptrvar
+ _ = ptrvar // set breakpoint here
gslice = slicevar
+ fmt.Printf("%v, %v, %v\n", slicemap, <-chanint, <-chanstr)
runtime.KeepAlive(mapvar)
} // END_OF_PROGRAM
`
@@ -163,6 +185,16 @@
src := buf.Bytes()
+ // Locate breakpoint line
+ var bp int
+ lines := bytes.Split(src, []byte("\n"))
+ for i, line := range lines {
+ if bytes.Contains(line, []byte("breakpoint")) {
+ bp = i
+ break
+ }
+ }
+
err = ioutil.WriteFile(filepath.Join(dir, "main.go"), src, 0644)
if err != nil {
t.Fatalf("failed to create file: %v", err)
@@ -197,7 +229,7 @@
}
args = append(args,
"-ex", "set python print-stack full",
- "-ex", "br main.go:15",
+ "-ex", fmt.Sprintf("br main.go:%d", bp),
"-ex", "run",
"-ex", "echo BEGIN info goroutines\n",
"-ex", "info goroutines",
@@ -205,18 +237,24 @@
"-ex", "echo BEGIN print mapvar\n",
"-ex", "print mapvar",
"-ex", "echo END\n",
+ "-ex", "echo BEGIN print slicemap\n",
+ "-ex", "print slicemap",
+ "-ex", "echo END\n",
"-ex", "echo BEGIN print strvar\n",
"-ex", "print strvar",
"-ex", "echo END\n",
+ "-ex", "echo BEGIN print chanint\n",
+ "-ex", "print chanint",
+ "-ex", "echo END\n",
+ "-ex", "echo BEGIN print chanstr\n",
+ "-ex", "print chanstr",
+ "-ex", "echo END\n",
"-ex", "echo BEGIN info locals\n",
"-ex", "info locals",
"-ex", "echo END\n",
"-ex", "echo BEGIN goroutine 1 bt\n",
"-ex", "goroutine 1 bt",
"-ex", "echo END\n",
- "-ex", "echo BEGIN goroutine 2 bt\n",
- "-ex", "goroutine 2 bt",
- "-ex", "echo END\n",
"-ex", "echo BEGIN goroutine all bt\n",
"-ex", "goroutine all bt",
"-ex", "echo END\n",
@@ -228,8 +266,11 @@
"-ex", "echo END\n",
filepath.Join(dir, "a.exe"),
)
- got, _ := exec.Command("gdb", args...).CombinedOutput()
- t.Logf("gdb output: %s\n", got)
+ got, err := exec.Command("gdb", args...).CombinedOutput()
+ t.Logf("gdb output:\n%s", got)
+ if err != nil {
+ t.Fatalf("gdb exited with error: %v", err)
+ }
firstLine := bytes.SplitN(got, []byte("\n"), 2)[0]
if string(firstLine) != "Loading Go Runtime support." {
@@ -268,6 +309,23 @@
t.Fatalf("print mapvar failed: %s", bl)
}
+ // 2 orders, and possible differences in spacing.
+ sliceMapSfx1 := `map[string][]string = {["e"] = []string = {"f", "g", "h"}, ["a"] = []string = {"b", "c", "d"}}`
+ sliceMapSfx2 := `map[string][]string = {["a"] = []string = {"b", "c", "d"}, ["e"] = []string = {"f", "g", "h"}}`
+ if bl := strings.ReplaceAll(blocks["print slicemap"], " ", " "); !strings.HasSuffix(bl, sliceMapSfx1) && !strings.HasSuffix(bl, sliceMapSfx2) {
+ t.Fatalf("print slicemap failed: %s", bl)
+ }
+
+ chanIntSfx := `chan int = {99, 11}`
+ if bl := strings.ReplaceAll(blocks["print chanint"], " ", " "); !strings.HasSuffix(bl, chanIntSfx) {
+ t.Fatalf("print chanint failed: %s", bl)
+ }
+
+ chanStrSfx := `chan string = {"spongepants", "squarebob"}`
+ if bl := strings.ReplaceAll(blocks["print chanstr"], " ", " "); !strings.HasSuffix(bl, chanStrSfx) {
+ t.Fatalf("print chanstr failed: %s", bl)
+ }
+
strVarRe := regexp.MustCompile(`^\$[0-9]+ = (0x[0-9a-f]+\s+)?"abc"$`)
if bl := blocks["print strvar"]; !strVarRe.MatchString(bl) {
t.Fatalf("print strvar failed: %s", bl)
@@ -293,7 +351,6 @@
// Check that the backtraces are well formed.
checkCleanBacktrace(t, blocks["goroutine 1 bt"])
- checkCleanBacktrace(t, blocks["goroutine 2 bt"])
checkCleanBacktrace(t, blocks["goroutine 1 bt at the end"])
btGoroutine1Re := regexp.MustCompile(`(?m)^#0\s+(0x[0-9a-f]+\s+in\s+)?main\.main.+at`)
@@ -301,12 +358,7 @@
t.Fatalf("goroutine 1 bt failed: %s", bl)
}
- btGoroutine2Re := regexp.MustCompile(`(?m)^#0\s+(0x[0-9a-f]+\s+in\s+)?runtime.+at`)
- if bl := blocks["goroutine 2 bt"]; !btGoroutine2Re.MatchString(bl) {
- t.Fatalf("goroutine 2 bt failed: %s", bl)
- }
-
- if bl := blocks["goroutine all bt"]; !btGoroutine1Re.MatchString(bl) || !btGoroutine2Re.MatchString(bl) {
+ if bl := blocks["goroutine all bt"]; !btGoroutine1Re.MatchString(bl) {
t.Fatalf("goroutine all bt failed: %s", bl)
}
@@ -381,7 +433,11 @@
"-ex", "continue",
filepath.Join(dir, "a.exe"),
}
- got, _ := exec.Command("gdb", args...).CombinedOutput()
+ got, err := exec.Command("gdb", args...).CombinedOutput()
+ t.Logf("gdb output:\n%s", got)
+ if err != nil {
+ t.Fatalf("gdb exited with error: %v", err)
+ }
// Check that the backtrace matches the source code.
bt := []string{
@@ -396,8 +452,7 @@
s := fmt.Sprintf("#%v.*main\\.%v", i, name)
re := regexp.MustCompile(s)
if found := re.Find(got) != nil; !found {
- t.Errorf("could not find '%v' in backtrace", s)
- t.Fatalf("gdb output:\n%v", string(got))
+ t.Fatalf("could not find '%v' in backtrace", s)
}
}
}
@@ -456,7 +511,11 @@
"-ex", "info types astruct",
filepath.Join(dir, "a.exe"),
}
- got, _ := exec.Command("gdb", args...).CombinedOutput()
+ got, err := exec.Command("gdb", args...).CombinedOutput()
+ t.Logf("gdb output:\n%s", got)
+ if err != nil {
+ t.Fatalf("gdb exited with error: %v", err)
+ }
sgot := string(got)
@@ -470,8 +529,7 @@
}
for _, name := range types {
if !strings.Contains(sgot, name) {
- t.Errorf("could not find %s in 'info typrs astruct' output", name)
- t.Fatalf("gdb output:\n%v", sgot)
+ t.Fatalf("could not find %s in 'info typrs astruct' output", name)
}
}
}
@@ -525,12 +583,14 @@
"-ex", "print 'runtime._PageSize'",
filepath.Join(dir, "a.exe"),
}
- got, _ := exec.Command("gdb", args...).CombinedOutput()
+ got, err := exec.Command("gdb", args...).CombinedOutput()
+ t.Logf("gdb output:\n%s", got)
+ if err != nil {
+ t.Fatalf("gdb exited with error: %v", err)
+ }
sgot := strings.ReplaceAll(string(got), "\r\n", "\n")
- t.Logf("output %q", sgot)
-
if !strings.Contains(sgot, "\n$1 = 42\n$2 = 18446744073709551615\n$3 = -1\n$4 = 1 '\\001'\n$5 = 8192") {
t.Fatalf("output mismatch")
}
@@ -585,7 +645,11 @@
"-ex", "backtrace",
filepath.Join(dir, "a.exe"),
}
- got, _ := exec.Command("gdb", args...).CombinedOutput()
+ got, err := exec.Command("gdb", args...).CombinedOutput()
+ t.Logf("gdb output:\n%s", got)
+ if err != nil {
+ t.Fatalf("gdb exited with error: %v", err)
+ }
// Check that the backtrace matches the source code.
bt := []string{
@@ -596,8 +660,91 @@
s := fmt.Sprintf("(#.* .* in )?main\\.%v", name)
re := regexp.MustCompile(s)
if found := re.Find(got) != nil; !found {
- t.Errorf("could not find '%v' in backtrace", s)
- t.Fatalf("gdb output:\n%v", string(got))
+ t.Fatalf("could not find '%v' in backtrace", s)
+ }
+ }
+}
+
+const InfCallstackSource = `
+package main
+import "C"
+import "time"
+
+func loop() {
+ for i := 0; i < 1000; i++ {
+ time.Sleep(time.Millisecond*5)
+ }
+}
+
+func main() {
+ go loop()
+ time.Sleep(time.Second * 1)
+}
+`
+
+// TestGdbInfCallstack tests that gdb can unwind the callstack of cgo programs
+// on arm64 platforms without endless frames of function 'crossfunc1'.
+// https://golang.org/issue/37238
+func TestGdbInfCallstack(t *testing.T) {
+ checkGdbEnvironment(t)
+
+ testenv.MustHaveCGO(t)
+ if runtime.GOARCH != "arm64" {
+ t.Skip("skipping infinite callstack test on non-arm64 arches")
+ }
+
+ t.Parallel()
+ checkGdbVersion(t)
+
+ dir, err := ioutil.TempDir("", "go-build")
+ if err != nil {
+ t.Fatalf("failed to create temp directory: %v", err)
+ }
+ defer os.RemoveAll(dir)
+
+ // Build the source code.
+ src := filepath.Join(dir, "main.go")
+ err = ioutil.WriteFile(src, []byte(InfCallstackSource), 0644)
+ if err != nil {
+ t.Fatalf("failed to create file: %v", err)
+ }
+ cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "a.exe", "main.go")
+ cmd.Dir = dir
+ out, err := testenv.CleanCmdEnv(cmd).CombinedOutput()
+ if err != nil {
+ t.Fatalf("building source %v\n%s", err, out)
+ }
+
+ // Execute gdb commands.
+ // 'setg_gcc' is the first point where we can reproduce the issue with just one 'run' command.
+ args := []string{"-nx", "-batch",
+ "-iex", "add-auto-load-safe-path " + filepath.Join(runtime.GOROOT(), "src", "runtime"),
+ "-ex", "set startup-with-shell off",
+ "-ex", "break setg_gcc",
+ "-ex", "run",
+ "-ex", "backtrace 3",
+ "-ex", "disable 1",
+ "-ex", "continue",
+ filepath.Join(dir, "a.exe"),
+ }
+ got, err := exec.Command("gdb", args...).CombinedOutput()
+ t.Logf("gdb output:\n%s", got)
+ if err != nil {
+ t.Fatalf("gdb exited with error: %v", err)
+ }
+
+ // Check that the backtrace matches
+ // We check the 3 inner most frames only as they are present certainly, according to gcc_<OS>_arm64.c
+ bt := []string{
+ `setg_gcc`,
+ `crosscall1`,
+ `threadentry`,
+ }
+ for i, name := range bt {
+ s := fmt.Sprintf("#%v.*%v", i, name)
+ re := regexp.MustCompile(s)
+ if found := re.Find(got) != nil; !found {
+ t.Fatalf("could not find '%v' in backtrace", s)
}
}
}
diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go
index ad29818..c65a534 100644
--- a/src/runtime/runtime1.go
+++ b/src/runtime/runtime1.go
@@ -312,9 +312,11 @@
madvdontneed int32 // for Linux; issue 28466
sbrk int32
scavenge int32
+ scavtrace int32
scheddetail int32
schedtrace int32
tracebackancestors int32
+ asyncpreemptoff int32
}
var dbgvars = []dbgVar{
@@ -331,9 +333,11 @@
{"madvdontneed", &debug.madvdontneed},
{"sbrk", &debug.sbrk},
{"scavenge", &debug.scavenge},
+ {"scavtrace", &debug.scavtrace},
{"scheddetail", &debug.scheddetail},
{"schedtrace", &debug.schedtrace},
{"tracebackancestors", &debug.tracebackancestors},
+ {"asyncpreemptoff", &debug.asyncpreemptoff},
}
func parsedebugvars() {
@@ -455,11 +459,6 @@
}
}
-//go:nosplit
-func gomcache() *mcache {
- return getg().m.mcache
-}
-
//go:linkname reflect_typelinks reflect.typelinks
func reflect_typelinks() ([]unsafe.Pointer, [][]int32) {
modules := activeModules()
@@ -484,7 +483,7 @@
return unsafe.Pointer((*_type)(rtype).typeOff(typeOff(off)))
}
-// reflect_resolveTextOff resolves an function pointer offset from a base type.
+// reflect_resolveTextOff resolves a function pointer offset from a base type.
//go:linkname reflect_resolveTextOff reflect.resolveTextOff
func reflect_resolveTextOff(rtype unsafe.Pointer, off int32) unsafe.Pointer {
return (*_type)(rtype).textOff(textOff(off))
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 16c02cd..cffdb0b 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -40,7 +40,7 @@
// _Grunning means this goroutine may execute user code. The
// stack is owned by this goroutine. It is not on a run queue.
- // It is assigned an M and a P.
+ // It is assigned an M and a P (g.m and g.m.p are valid).
_Grunning // 2
// _Gsyscall means this goroutine is executing a system call.
@@ -78,6 +78,13 @@
// stack is owned by the goroutine that put it in _Gcopystack.
_Gcopystack // 8
+ // _Gpreempted means this goroutine stopped itself for a
+ // suspendG preemption. It is like _Gwaiting, but nothing is
+ // yet responsible for ready()ing it. Some suspendG must CAS
+ // the status to _Gwaiting to take responsibility for
+ // ready()ing this G.
+ _Gpreempted // 9
+
// _Gscan combined with one of the above states other than
// _Grunning indicates that GC is scanning the stack. The
// goroutine is not executing user code and the stack is owned
@@ -89,11 +96,12 @@
//
// atomicstatus&~Gscan gives the state the goroutine will
// return to when the scan completes.
- _Gscan = 0x1000
- _Gscanrunnable = _Gscan + _Grunnable // 0x1001
- _Gscanrunning = _Gscan + _Grunning // 0x1002
- _Gscansyscall = _Gscan + _Gsyscall // 0x1003
- _Gscanwaiting = _Gscan + _Gwaiting // 0x1004
+ _Gscan = 0x1000
+ _Gscanrunnable = _Gscan + _Grunnable // 0x1001
+ _Gscanrunning = _Gscan + _Grunning // 0x1002
+ _Gscansyscall = _Gscan + _Gsyscall // 0x1003
+ _Gscanwaiting = _Gscan + _Gwaiting // 0x1004
+ _Gscanpreempted = _Gscan + _Gpreempted // 0x1009
)
const (
@@ -150,7 +158,10 @@
// as fast as spin locks (just a few user-level instructions),
// but on the contention path they sleep in the kernel.
// A zeroed Mutex is unlocked (no need to initialize each lock).
+// Initialization is helpful for static lock ranking, but not required.
type mutex struct {
+ // Empty struct if lock ranking is disabled, otherwise includes the lock rank
+ lockRankStruct
// Futex-based impl treats it as uint32 key,
// while sema-based impl as M* waitm.
// Used to be a union, but unions break precise GC.
@@ -338,12 +349,9 @@
g *g
- // isSelect indicates g is participating in a select, so
- // g.selectDone must be CAS'd to win the wake-up race.
- isSelect bool
- next *sudog
- prev *sudog
- elem unsafe.Pointer // data element (may point to stack)
+ next *sudog
+ prev *sudog
+ elem unsafe.Pointer // data element (may point to stack)
// The following fields are never accessed concurrently.
// For channels, waitlink is only accessed by g.
@@ -353,10 +361,15 @@
acquiretime int64
releasetime int64
ticket uint32
- parent *sudog // semaRoot binary tree
- waitlink *sudog // g.waiting list or semaRoot
- waittail *sudog // semaRoot
- c *hchan // channel
+
+ // isSelect indicates g is participating in a select, so
+ // g.selectDone must be CAS'd to win the wake-up race.
+ isSelect bool
+
+ parent *sudog // semaRoot binary tree
+ waitlink *sudog // g.waiting list or semaRoot
+ waittail *sudog // semaRoot
+ c *hchan // channel
}
type libcall struct {
@@ -384,6 +397,12 @@
hi uintptr
}
+// heldLockInfo gives info on a held lock and the rank of that lock
+type heldLockInfo struct {
+ lockAddr uintptr
+ rank lockRank
+}
+
type g struct {
// Stack parameters.
// stack describes the actual stack memory: [stack.lo, stack.hi).
@@ -396,31 +415,44 @@
stackguard0 uintptr // offset known to liblink
stackguard1 uintptr // offset known to liblink
- _panic *_panic // innermost panic - offset known to liblink
- _defer *_defer // innermost defer
- m *m // current m; offset known to arm liblink
- sched gobuf
- syscallsp uintptr // if status==Gsyscall, syscallsp = sched.sp to use during gc
- syscallpc uintptr // if status==Gsyscall, syscallpc = sched.pc to use during gc
- stktopsp uintptr // expected sp at top of stack, to check in traceback
- param unsafe.Pointer // passed parameter on wakeup
- atomicstatus uint32
- stackLock uint32 // sigprof/scang lock; TODO: fold in to atomicstatus
- goid int64
- schedlink guintptr
- waitsince int64 // approx time when the g become blocked
- waitreason waitReason // if status==Gwaiting
- preempt bool // preemption signal, duplicates stackguard0 = stackpreempt
- paniconfault bool // panic (instead of crash) on unexpected fault address
- preemptscan bool // preempted g does scan for gc
- gcscandone bool // g has scanned stack; protected by _Gscan bit in status
- gcscanvalid bool // false at start of gc cycle, true if G has not run since last scan; TODO: remove?
- throwsplit bool // must not split stack
- raceignore int8 // ignore race detection events
- sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine
- sysexitticks int64 // cputicks when syscall has returned (for tracing)
- traceseq uint64 // trace event sequencer
- tracelastp puintptr // last P emitted an event for this goroutine
+ _panic *_panic // innermost panic - offset known to liblink
+ _defer *_defer // innermost defer
+ m *m // current m; offset known to arm liblink
+ sched gobuf
+ syscallsp uintptr // if status==Gsyscall, syscallsp = sched.sp to use during gc
+ syscallpc uintptr // if status==Gsyscall, syscallpc = sched.pc to use during gc
+ stktopsp uintptr // expected sp at top of stack, to check in traceback
+ param unsafe.Pointer // passed parameter on wakeup
+ atomicstatus uint32
+ stackLock uint32 // sigprof/scang lock; TODO: fold in to atomicstatus
+ goid int64
+ schedlink guintptr
+ waitsince int64 // approx time when the g become blocked
+ waitreason waitReason // if status==Gwaiting
+
+ preempt bool // preemption signal, duplicates stackguard0 = stackpreempt
+ preemptStop bool // transition to _Gpreempted on preemption; otherwise, just deschedule
+ preemptShrink bool // shrink stack at synchronous safe point
+
+ // asyncSafePoint is set if g is stopped at an asynchronous
+ // safe point. This means there are frames on the stack
+ // without precise pointer information.
+ asyncSafePoint bool
+
+ paniconfault bool // panic (instead of crash) on unexpected fault address
+ gcscandone bool // g has scanned stack; protected by _Gscan bit in status
+ throwsplit bool // must not split stack
+ // activeStackChans indicates that there are unlocked channels
+ // pointing into this goroutine's stack. If true, stack
+ // copying needs to acquire channel locks to protect these
+ // areas of the stack.
+ activeStackChans bool
+
+ raceignore int8 // ignore race detection events
+ sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine
+ sysexitticks int64 // cputicks when syscall has returned (for tracing)
+ traceseq uint64 // trace event sequencer
+ tracelastp puintptr // last P emitted an event for this goroutine
lockedm muintptr
sig uint32
writebuf []byte
@@ -489,7 +521,6 @@
park note
alllink *m // on allm
schedlink muintptr
- mcache *mcache
lockedg guintptr
createstack [32]uintptr // stack that created this thread.
lockedExt uint32 // tracking for external LockOSThread
@@ -501,8 +532,7 @@
waittraceskip int
startingtrace bool
syscalltick uint32
- thread uintptr // thread handle
- freelink *m // on sched.freem
+ freelink *m // on sched.freem
// these are here because they are too large to be on the stack
// of low-level NOSPLIT functions.
@@ -515,9 +545,22 @@
vdsoSP uintptr // SP for traceback while in VDSO call (0 if not in call)
vdsoPC uintptr // PC for traceback while in VDSO call
+ // preemptGen counts the number of completed preemption
+ // signals. This is used to detect when a preemption is
+ // requested, but fails. Accessed atomically.
+ preemptGen uint32
+
+ // Whether this is a pending preemption signal on this M.
+ // Accessed atomically.
+ signalPending uint32
+
dlogPerM
mOS
+
+ // Up to 10 locks held by this m, maintained by the lock ranking code.
+ locksHeldLen int
+ locksHeld [10]heldLockInfo
}
type p struct {
@@ -529,6 +572,7 @@
sysmontick sysmontick // last tick observed by sysmon
m muintptr // back-link to associated m (nil if idle)
mcache *mcache
+ pcache pageCache
raceprocctx uintptr
deferpool [5][]*_defer // pool of available defer structs of different sizes (see panic.go)
@@ -562,6 +606,17 @@
sudogcache []*sudog
sudogbuf [128]*sudog
+ // Cache of mspan objects from the heap.
+ mspancache struct {
+ // We need an explicit length here because this field is used
+ // in allocation codepaths where write barriers are not allowed,
+ // and eliminating the write barrier/keeping it eliminated from
+ // slice updates is tricky, moreso than just managing the length
+ // ourselves.
+ len int
+ buf [128]*mspan
+ }
+
tracebuf traceBufPtr
// traceSweep indicates the sweep events should be traced.
@@ -576,6 +631,11 @@
_ uint32 // Alignment for atomic fields below
+ // The when field of the first entry on the timer heap.
+ // This is updated using atomic functions.
+ // This is 0 if the timer heap is empty.
+ timer0When uint64
+
// Per-P GC state
gcAssistTime int64 // Nanoseconds in assistAlloc
gcFractionalMarkTime int64 // Nanoseconds in fractional mark worker (atomic)
@@ -598,13 +658,44 @@
runSafePointFn uint32 // if 1, run sched.safePointFn at next safe point
+ // Lock for timers. We normally access the timers while running
+ // on this P, but the scheduler can also do it from a different P.
+ timersLock mutex
+
+ // Actions to take at some time. This is used to implement the
+ // standard library's time package.
+ // Must hold timersLock to access.
+ timers []*timer
+
+ // Number of timers in P's heap.
+ // Modified using atomic instructions.
+ numTimers uint32
+
+ // Number of timerModifiedEarlier timers on P's heap.
+ // This should only be modified while holding timersLock,
+ // or while the timer status is in a transient state
+ // such as timerModifying.
+ adjustTimers uint32
+
+ // Number of timerDeleted timers in P's heap.
+ // Modified using atomic instructions.
+ deletedTimers uint32
+
+ // Race context used while executing timer functions.
+ timerRaceCtx uintptr
+
+ // preempt is set to indicate that this P should be enter the
+ // scheduler ASAP (regardless of what G is running on it).
+ preempt bool
+
pad cpu.CacheLinePad
}
type schedt struct {
// accessed atomically. keep at top to ensure alignment on 32-bit systems.
- goidgen uint64
- lastpoll uint64
+ goidgen uint64
+ lastpoll uint64 // time of last network poll, 0 if currently polling
+ pollUntil uint64 // time to which current poll is sleeping
lock mutex
@@ -677,6 +768,12 @@
procresizetime int64 // nanotime() of last change to gomaxprocs
totaltime int64 // ∫gomaxprocs dt up to procresizetime
+
+ // sysmonlock protects sysmon's actions on the runtime.
+ //
+ // Acquire and hold this mutex to block sysmon from interacting
+ // with the rest of the runtime.
+ sysmonlock mutex
}
// Values for the flags field of a sigTabT.
@@ -701,7 +798,7 @@
nameoff int32 // function name
args int32 // in/out args size
- deferreturn uint32 // offset of a deferreturn block from entry, if any.
+ deferreturn uint32 // offset of start of a deferreturn call instruction from entry, if any.
pcsp int32
pcfile int32
@@ -726,7 +823,7 @@
// layout of Itab known to compilers
// allocated in non-garbage-collected memory
// Needs to be in sync with
-// ../cmd/compile/internal/gc/reflect.go:/^func.dumptypestructs.
+// ../cmd/compile/internal/gc/reflect.go:/^func.dumptabs.
type itab struct {
inter *interfacetype
_type *_type
@@ -774,7 +871,7 @@
}
// A _defer holds an entry on the list of deferred calls.
-// If you add a field here, add code to clear it in freedefer.
+// If you add a field here, add code to clear it in freedefer and deferProcStack
// This struct must match the code in cmd/compile/internal/gc/reflect.go:deferstruct
// and cmd/compile/internal/gc/ssa.go:(*state).call.
// Some defers will be allocated on the stack and some on the heap.
@@ -785,11 +882,27 @@
siz int32 // includes both arguments and results
started bool
heap bool
- sp uintptr // sp at time of defer
- pc uintptr
- fn *funcval
- _panic *_panic // panic that is running defer
- link *_defer
+ // openDefer indicates that this _defer is for a frame with open-coded
+ // defers. We have only one defer record for the entire frame (which may
+ // currently have 0, 1, or more defers active).
+ openDefer bool
+ sp uintptr // sp at time of defer
+ pc uintptr // pc at time of defer
+ fn *funcval // can be nil for open-coded defers
+ _panic *_panic // panic that is running defer
+ link *_defer
+
+ // If openDefer is true, the fields below record values about the stack
+ // frame and associated function that has the open-coded defer(s). sp
+ // above will be the sp for the frame, and pc will be address of the
+ // deferreturn call in the function.
+ fd unsafe.Pointer // funcdata for the function associated with the frame
+ varp uintptr // value of varp for the stack frame
+ // framepc is the current pc associated with the stack frame. Together,
+ // with sp above (which is the sp associated with the stack frame),
+ // framepc/sp can be used as pc/sp pair to continue a stack trace via
+ // gentraceback().
+ framepc uintptr
}
// A _panic holds information about an active panic.
@@ -807,8 +920,11 @@
argp unsafe.Pointer // pointer to arguments of deferred call run during panic; cannot move - known to liblink
arg interface{} // argument to panic
link *_panic // link to earlier panic
+ pc uintptr // where to return to in runtime if this panic is bypassed
+ sp unsafe.Pointer // where to return to in runtime if this panic is bypassed
recovered bool // whether this panic is over
aborted bool // the panic was aborted
+ goexit bool
}
// stack traces
@@ -863,7 +979,7 @@
waitReasonChanReceive // "chan receive"
waitReasonChanSend // "chan send"
waitReasonFinalizerWait // "finalizer wait"
- waitReasonForceGGIdle // "force gc (idle)"
+ waitReasonForceGCIdle // "force gc (idle)"
waitReasonSemacquire // "semacquire"
waitReasonSleep // "sleep"
waitReasonSyncCondWait // "sync.Cond.Wait"
@@ -871,6 +987,8 @@
waitReasonTraceReaderBlocked // "trace reader (blocked)"
waitReasonWaitForGCCycle // "wait for GC cycle"
waitReasonGCWorkerIdle // "GC worker (idle)"
+ waitReasonPreempted // "preempted"
+ waitReasonDebugCall // "debug call"
)
var waitReasonStrings = [...]string{
@@ -891,7 +1009,7 @@
waitReasonChanReceive: "chan receive",
waitReasonChanSend: "chan send",
waitReasonFinalizerWait: "finalizer wait",
- waitReasonForceGGIdle: "force gc (idle)",
+ waitReasonForceGCIdle: "force gc (idle)",
waitReasonSemacquire: "semacquire",
waitReasonSleep: "sleep",
waitReasonSyncCondWait: "sync.Cond.Wait",
@@ -899,6 +1017,8 @@
waitReasonTraceReaderBlocked: "trace reader (blocked)",
waitReasonWaitForGCCycle: "wait for GC cycle",
waitReasonGCWorkerIdle: "GC worker (idle)",
+ waitReasonPreempted: "preempted",
+ waitReasonDebugCall: "debug call",
}
func (w waitReason) String() string {
@@ -922,7 +1042,7 @@
// Information about what cpu features are available.
// Packages outside the runtime should not use these
// as they are not an external api.
- // Set on startup in asm_{386,amd64,amd64p32}.s
+ // Set on startup in asm_{386,amd64}.s
processorVersionInfo uint32
isIntel bool
lfenceBeforeRdtsc bool
diff --git a/src/runtime/runtime_linux_test.go b/src/runtime/runtime_linux_test.go
index 17d6fbd..cd59368 100644
--- a/src/runtime/runtime_linux_test.go
+++ b/src/runtime/runtime_linux_test.go
@@ -41,11 +41,11 @@
}
}
-// Test that error values are negative. Use address 1 (a misaligned
-// pointer) to get -EINVAL.
+// Test that error values are negative.
+// Use a misaligned pointer to get -EINVAL.
func TestMincoreErrorSign(t *testing.T) {
var dst byte
- v := Mincore(unsafe.Pointer(uintptr(1)), 1, &dst)
+ v := Mincore(Add(unsafe.Pointer(new(int32)), 1), 1, &dst)
const EINVAL = 0x16
if v != -EINVAL {
@@ -54,7 +54,7 @@
}
func TestEpollctlErrorSign(t *testing.T) {
- v := Epollctl(-1, 1, -1, unsafe.Pointer(&struct{}{}))
+ v := Epollctl(-1, 1, -1, unsafe.Pointer(&EpollEvent{}))
const EBADF = 0x09
if v != -EBADF {
diff --git a/src/runtime/runtime_mmap_test.go b/src/runtime/runtime_mmap_test.go
index 6741e1d..bb0b747 100644
--- a/src/runtime/runtime_mmap_test.go
+++ b/src/runtime/runtime_mmap_test.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris
+// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris
package runtime_test
diff --git a/src/runtime/runtime_test.go b/src/runtime/runtime_test.go
index 5ea9cbd..e5d2d97 100644
--- a/src/runtime/runtime_test.go
+++ b/src/runtime/runtime_test.go
@@ -122,6 +122,21 @@
}
}
+func BenchmarkPanicRecover(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ defer3()
+ }
+}
+
+func defer3() {
+ defer func(x, y, z int) {
+ if recover() == nil {
+ panic("failed recover")
+ }
+ }(1, 2, 3)
+ panic("hi")
+}
+
// golang.org/issue/7063
func TestStopCPUProfilingWithProfilerOff(t *testing.T) {
SetCPUProfileRate(0)
@@ -177,10 +192,11 @@
}
}
+// testSetPanicOnFault tests one potentially faulting address.
+// It deliberately constructs and uses an invalid pointer,
+// so mark it as nocheckptr.
+//go:nocheckptr
func testSetPanicOnFault(t *testing.T, addr uintptr, nfault *int) {
- if GOOS == "nacl" {
- t.Skip("nacl doesn't seem to fault on high addresses")
- }
if GOOS == "js" {
t.Skip("js does not support catching faults")
}
@@ -278,32 +294,6 @@
}
}
-func TestBadOpen(t *testing.T) {
- if GOOS == "windows" || GOOS == "nacl" || GOOS == "js" {
- t.Skip("skipping OS that doesn't have open/read/write/close")
- }
- // make sure we get the correct error code if open fails. Same for
- // read/write/close on the resulting -1 fd. See issue 10052.
- nonfile := []byte("/notreallyafile")
- fd := Open(&nonfile[0], 0, 0)
- if fd != -1 {
- t.Errorf("open(\"%s\")=%d, want -1", string(nonfile), fd)
- }
- var buf [32]byte
- r := Read(-1, unsafe.Pointer(&buf[0]), int32(len(buf)))
- if r != -1 {
- t.Errorf("read()=%d, want -1", r)
- }
- w := Write(^uintptr(0), unsafe.Pointer(&buf[0]), int32(len(buf)))
- if w != -1 {
- t.Errorf("write()=%d, want -1", w)
- }
- c := Close(-1)
- if c != -1 {
- t.Errorf("close()=%d, want -1", c)
- }
-}
-
func TestAppendGrowth(t *testing.T) {
var x []int64
check := func(want int) {
diff --git a/src/runtime/rwmutex.go b/src/runtime/rwmutex.go
index a6da4c9..7713c3f 100644
--- a/src/runtime/rwmutex.go
+++ b/src/runtime/rwmutex.go
@@ -39,7 +39,7 @@
if int32(atomic.Xadd(&rw.readerCount, 1)) < 0 {
// A writer is pending. Park on the reader queue.
systemstack(func() {
- lock(&rw.rLock)
+ lockWithRank(&rw.rLock, lockRankRwmutexR)
if rw.readerPass > 0 {
// Writer finished.
rw.readerPass -= 1
@@ -67,7 +67,7 @@
// A writer is pending.
if atomic.Xadd(&rw.readerWait, -1) == 0 {
// The last reader unblocks the writer.
- lock(&rw.rLock)
+ lockWithRank(&rw.rLock, lockRankRwmutexR)
w := rw.writer.ptr()
if w != nil {
notewakeup(&w.park)
@@ -81,12 +81,12 @@
// lock locks rw for writing.
func (rw *rwmutex) lock() {
// Resolve competition with other writers and stick to our P.
- lock(&rw.wLock)
+ lockWithRank(&rw.wLock, lockRankRwmutexW)
m := getg().m
// Announce that there is a pending writer.
r := int32(atomic.Xadd(&rw.readerCount, -rwmutexMaxReaders)) + rwmutexMaxReaders
// Wait for any active readers to complete.
- lock(&rw.rLock)
+ lockWithRank(&rw.rLock, lockRankRwmutexR)
if r != 0 && atomic.Xadd(&rw.readerWait, r) != 0 {
// Wait for reader to wake us up.
systemstack(func() {
@@ -108,7 +108,7 @@
throw("unlock of unlocked rwmutex")
}
// Unblock blocked readers.
- lock(&rw.rLock)
+ lockWithRank(&rw.rLock, lockRankRwmutexR)
for rw.readers.ptr() != nil {
reader := rw.readers.ptr()
rw.readers = reader.schedlink
diff --git a/src/runtime/select.go b/src/runtime/select.go
index 85be1bc..a069e3e 100644
--- a/src/runtime/select.go
+++ b/src/runtime/select.go
@@ -14,7 +14,7 @@
// scase.kind values.
// Known to compiler.
-// Changes here must also be made in src/cmd/compile/internal/gc/select.go's walkselect.
+// Changes here must also be made in src/cmd/compile/internal/gc/select.go's walkselectcases.
const (
caseNil = iota
caseRecv
@@ -75,6 +75,9 @@
}
func selparkcommit(gp *g, _ unsafe.Pointer) bool {
+ // There are unlocked sudogs that point into gp's stack. Stack
+ // copying must lock the channels of those sudogs.
+ gp.activeStackChans = true
// This must not access gp's stack (see gopark). In
// particular, it must not access the *hselect. That's okay,
// because by the time this is called, gp.waiting has all
@@ -105,8 +108,9 @@
// selectgo implements the select statement.
//
// cas0 points to an array of type [ncases]scase, and order0 points to
-// an array of type [2*ncases]uint16. Both reside on the goroutine's
-// stack (regardless of any escaping in selectgo).
+// an array of type [2*ncases]uint16 where ncases must be <= 65536.
+// Both reside on the goroutine's stack (regardless of any escaping in
+// selectgo).
//
// selectgo returns the index of the chosen scase, which matches the
// ordinal position of its respective select{recv,send,default} call.
@@ -117,6 +121,8 @@
print("select: cas0=", cas0, "\n")
}
+ // NOTE: In order to maintain a lean stack size, the number of scases
+ // is capped at 65536.
cas1 := (*[1 << 16]scase)(unsafe.Pointer(cas0))
order1 := (*[1 << 17]uint16)(unsafe.Pointer(order0))
@@ -311,6 +317,7 @@
// wait for someone to wake us up
gp.param = nil
gopark(selparkcommit, nil, waitReasonSelect, traceEvGoBlockSelect, 1)
+ gp.activeStackChans = false
sellock(scases, lockorder)
@@ -493,8 +500,6 @@
}
func (c *hchan) sortkey() uintptr {
- // TODO(khr): if we have a moving garbage collector, we'll need to
- // change this function.
return uintptr(unsafe.Pointer(c))
}
diff --git a/src/runtime/sema.go b/src/runtime/sema.go
index 30c8959..f94c1aa 100644
--- a/src/runtime/sema.go
+++ b/src/runtime/sema.go
@@ -129,7 +129,7 @@
s.acquiretime = t0
}
for {
- lock(&root.lock)
+ lockWithRank(&root.lock, lockRankRoot)
// Add ourselves to nwait to disable "easy case" in semrelease.
atomic.Xadd(&root.nwait, 1)
// Check cansemacquire to avoid missed wakeup.
@@ -168,7 +168,7 @@
}
// Harder case: search for a waiter and wake it.
- lock(&root.lock)
+ lockWithRank(&root.lock, lockRankRoot)
if atomic.Load(&root.nwait) == 0 {
// The count is already consumed by another goroutine,
// so no need to wake up another goroutine.
@@ -180,7 +180,7 @@
atomic.Xadd(&root.nwait, -1)
}
unlock(&root.lock)
- if s != nil { // May be slow, so unlock first
+ if s != nil { // May be slow or even yield, so unlock first
acquiretime := s.acquiretime
if acquiretime != 0 {
mutexevent(t0-acquiretime, 3+skipframes)
@@ -192,6 +192,25 @@
s.ticket = 1
}
readyWithTime(s, 5+skipframes)
+ if s.ticket == 1 && getg().m.locks == 0 {
+ // Direct G handoff
+ // readyWithTime has added the waiter G as runnext in the
+ // current P; we now call the scheduler so that we start running
+ // the waiter G immediately.
+ // Note that waiter inherits our time slice: this is desirable
+ // to avoid having a highly contended semaphore hog the P
+ // indefinitely. goyield is like Gosched, but it emits a
+ // "preempted" trace event instead and, more importantly, puts
+ // the current G on the local runq instead of the global one.
+ // We only do this in the starving regime (handoff=true), as in
+ // the non-starving case it is possible for a different waiter
+ // to acquire the semaphore while we are yielding/scheduling,
+ // and this would be wasteful. We wait instead to enter starving
+ // regime, and then we start to do direct handoffs of ticket and
+ // P.
+ // See issue 33747 for discussion.
+ goyield()
+ }
}
}
@@ -373,19 +392,11 @@
func (root *semaRoot) rotateLeft(x *sudog) {
// p -> (x a (y b c))
p := x.parent
- a, y := x.prev, x.next
- b, c := y.prev, y.next
+ y := x.next
+ b := y.prev
y.prev = x
x.parent = y
- y.next = c
- if c != nil {
- c.parent = y
- }
- x.prev = a
- if a != nil {
- a.parent = x
- }
x.next = b
if b != nil {
b.parent = x
@@ -409,23 +420,15 @@
func (root *semaRoot) rotateRight(y *sudog) {
// p -> (y (x a b) c)
p := y.parent
- x, c := y.prev, y.next
- a, b := x.prev, x.next
+ x := y.prev
+ b := x.next
- x.prev = a
- if a != nil {
- a.parent = x
- }
x.next = y
y.parent = x
y.prev = b
if b != nil {
b.parent = y
}
- y.next = c
- if c != nil {
- c.parent = y
- }
x.parent = p
if p == nil {
@@ -483,7 +486,7 @@
// notifyListAdd was called, it returns immediately. Otherwise, it blocks.
//go:linkname notifyListWait sync.runtime_notifyListWait
func notifyListWait(l *notifyList, t uint32) {
- lock(&l.lock)
+ lockWithRank(&l.lock, lockRankNotifyList)
// Return right away if this ticket has already been notified.
if less(t, l.notify) {
@@ -525,7 +528,7 @@
// Pull the list out into a local variable, waiters will be readied
// outside the lock.
- lock(&l.lock)
+ lockWithRank(&l.lock, lockRankNotifyList)
s := l.head
l.head = nil
l.tail = nil
@@ -555,7 +558,7 @@
return
}
- lock(&l.lock)
+ lockWithRank(&l.lock, lockRankNotifyList)
// Re-check under the lock if we need to do anything.
t := l.notify
diff --git a/src/runtime/sema_test.go b/src/runtime/sema_test.go
new file mode 100644
index 0000000..cf3de0a
--- /dev/null
+++ b/src/runtime/sema_test.go
@@ -0,0 +1,103 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ . "runtime"
+ "sync"
+ "sync/atomic"
+ "testing"
+)
+
+// TestSemaHandoff checks that when semrelease+handoff is
+// requested, the G that releases the semaphore yields its
+// P directly to the first waiter in line.
+// See issue 33747 for discussion.
+func TestSemaHandoff(t *testing.T) {
+ const iter = 10000
+ ok := 0
+ for i := 0; i < iter; i++ {
+ if testSemaHandoff() {
+ ok++
+ }
+ }
+ // As long as two thirds of handoffs are direct, we
+ // consider the test successful. The scheduler is
+ // nondeterministic, so this test checks that we get the
+ // desired outcome in a significant majority of cases.
+ // The actual ratio of direct handoffs is much higher
+ // (>90%) but we use a lower threshold to minimize the
+ // chances that unrelated changes in the runtime will
+ // cause the test to fail or become flaky.
+ if ok < iter*2/3 {
+ t.Fatal("direct handoff < 2/3:", ok, iter)
+ }
+}
+
+func TestSemaHandoff1(t *testing.T) {
+ if GOMAXPROCS(-1) <= 1 {
+ t.Skip("GOMAXPROCS <= 1")
+ }
+ defer GOMAXPROCS(GOMAXPROCS(-1))
+ GOMAXPROCS(1)
+ TestSemaHandoff(t)
+}
+
+func TestSemaHandoff2(t *testing.T) {
+ if GOMAXPROCS(-1) <= 2 {
+ t.Skip("GOMAXPROCS <= 2")
+ }
+ defer GOMAXPROCS(GOMAXPROCS(-1))
+ GOMAXPROCS(2)
+ TestSemaHandoff(t)
+}
+
+func testSemaHandoff() bool {
+ var sema, res uint32
+ done := make(chan struct{})
+
+ // We're testing that the current goroutine is able to yield its time slice
+ // to another goroutine. Stop the current goroutine from migrating to
+ // another CPU where it can win the race (and appear to have not yielded) by
+ // keeping the CPUs slightly busy.
+ var wg sync.WaitGroup
+ for i := 0; i < GOMAXPROCS(-1); i++ {
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ for {
+ select {
+ case <-done:
+ return
+ default:
+ }
+ Gosched()
+ }
+ }()
+ }
+
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ Semacquire(&sema)
+ atomic.CompareAndSwapUint32(&res, 0, 1)
+
+ Semrelease1(&sema, true, 0)
+ close(done)
+ }()
+ for SemNwait(&sema) == 0 {
+ Gosched() // wait for goroutine to block in Semacquire
+ }
+
+ // The crux of the test: we release the semaphore with handoff
+ // and immediately perform a CAS both here and in the waiter; we
+ // want the CAS in the waiter to execute first.
+ Semrelease1(&sema, true, 0)
+ atomic.CompareAndSwapUint32(&res, 0, 2)
+
+ wg.Wait() // wait for goroutines to finish to avoid data races
+
+ return res == 1 // did the waiter run first?
+}
diff --git a/src/runtime/semasleep_test.go b/src/runtime/semasleep_test.go
index f5b4a50..9b371b0 100644
--- a/src/runtime/semasleep_test.go
+++ b/src/runtime/semasleep_test.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//+build !nacl,!plan9,!windows,!js
+// +build !plan9,!windows,!js
package runtime_test
diff --git a/src/runtime/signal_386.go b/src/runtime/signal_386.go
index 143deb9..065aff4 100644
--- a/src/runtime/signal_386.go
+++ b/src/runtime/signal_386.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build darwin dragonfly freebsd linux nacl netbsd openbsd
+// +build dragonfly freebsd linux netbsd openbsd
package runtime
@@ -37,34 +37,22 @@
// preparePanic sets up the stack to look like a call to sigpanic.
func (c *sigctxt) preparePanic(sig uint32, gp *g) {
- if GOOS == "darwin" {
- // Work around Leopard bug that doesn't set FPE_INTDIV.
- // Look at instruction to see if it is a divide.
- // Not necessary in Snow Leopard (si_code will be != 0).
- if sig == _SIGFPE && gp.sigcode0 == 0 {
- pc := (*[4]byte)(unsafe.Pointer(gp.sigpc))
- i := 0
- if pc[i] == 0x66 { // 16-bit instruction prefix
- i++
- }
- if pc[i] == 0xF6 || pc[i] == 0xF7 {
- gp.sigcode0 = _FPE_INTDIV
- }
- }
- }
-
pc := uintptr(c.eip())
sp := uintptr(c.esp())
if shouldPushSigpanic(gp, pc, *(*uintptr)(unsafe.Pointer(sp))) {
- // Make it look like the faulting PC called sigpanic.
- if sys.RegSize > sys.PtrSize {
- sp -= sys.PtrSize
- *(*uintptr)(unsafe.Pointer(sp)) = 0
- }
- sp -= sys.PtrSize
- *(*uintptr)(unsafe.Pointer(sp)) = pc
- c.set_esp(uint32(sp))
+ c.pushCall(funcPC(sigpanic), pc)
+ } else {
+ // Not safe to push the call. Just clobber the frame.
+ c.set_eip(uint32(funcPC(sigpanic)))
}
- c.set_eip(uint32(funcPC(sigpanic)))
+}
+
+func (c *sigctxt) pushCall(targetPC, resumePC uintptr) {
+ // Make it look like we called target at resumePC.
+ sp := uintptr(c.esp())
+ sp -= sys.PtrSize
+ *(*uintptr)(unsafe.Pointer(sp)) = resumePC
+ c.set_esp(uint32(sp))
+ c.set_eip(uint32(targetPC))
}
diff --git a/src/runtime/signal_amd64x.go b/src/runtime/signal_amd64.go
similarity index 81%
rename from src/runtime/signal_amd64x.go
rename to src/runtime/signal_amd64.go
index 9d59e26..6ab1f75 100644
--- a/src/runtime/signal_amd64x.go
+++ b/src/runtime/signal_amd64.go
@@ -2,8 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build amd64 amd64p32
-// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris
+// +build amd64
+// +build darwin dragonfly freebsd linux netbsd openbsd solaris
package runtime
@@ -66,14 +66,18 @@
sp := uintptr(c.rsp())
if shouldPushSigpanic(gp, pc, *(*uintptr)(unsafe.Pointer(sp))) {
- // Make it look the like faulting PC called sigpanic.
- if sys.RegSize > sys.PtrSize {
- sp -= sys.PtrSize
- *(*uintptr)(unsafe.Pointer(sp)) = 0
- }
- sp -= sys.PtrSize
- *(*uintptr)(unsafe.Pointer(sp)) = pc
- c.set_rsp(uint64(sp))
+ c.pushCall(funcPC(sigpanic), pc)
+ } else {
+ // Not safe to push the call. Just clobber the frame.
+ c.set_rip(uint64(funcPC(sigpanic)))
}
- c.set_rip(uint64(funcPC(sigpanic)))
+}
+
+func (c *sigctxt) pushCall(targetPC, resumePC uintptr) {
+ // Make it look like we called target at resumePC.
+ sp := uintptr(c.rsp())
+ sp -= sys.PtrSize
+ *(*uintptr)(unsafe.Pointer(sp)) = resumePC
+ c.set_rsp(uint64(sp))
+ c.set_rip(uint64(targetPC))
}
diff --git a/src/runtime/signal_arm.go b/src/runtime/signal_arm.go
index bb597c5..156d9d3 100644
--- a/src/runtime/signal_arm.go
+++ b/src/runtime/signal_arm.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build darwin dragonfly freebsd linux nacl netbsd openbsd
+// +build dragonfly freebsd linux netbsd openbsd
package runtime
@@ -62,3 +62,17 @@
c.set_r10(uint32(uintptr(unsafe.Pointer(gp))))
c.set_pc(uint32(funcPC(sigpanic)))
}
+
+func (c *sigctxt) pushCall(targetPC, resumePC uintptr) {
+ // Push the LR to stack, as we'll clobber it in order to
+ // push the call. The function being pushed is responsible
+ // for restoring the LR and setting the SP back.
+ // This extra slot is known to gentraceback.
+ sp := c.sp() - 4
+ c.set_sp(sp)
+ *(*uint32)(unsafe.Pointer(uintptr(sp))) = c.lr()
+ // Set up PC and LR to pretend the function being signaled
+ // calls targetPC at resumePC.
+ c.set_lr(uint32(resumePC))
+ c.set_pc(uint32(targetPC))
+}
diff --git a/src/runtime/signal_arm64.go b/src/runtime/signal_arm64.go
index 7a3b1cc..3c20139 100644
--- a/src/runtime/signal_arm64.go
+++ b/src/runtime/signal_arm64.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build darwin linux netbsd openbsd
+// +build darwin freebsd linux netbsd openbsd
package runtime
@@ -78,3 +78,17 @@
c.set_r28(uint64(uintptr(unsafe.Pointer(gp))))
c.set_pc(uint64(funcPC(sigpanic)))
}
+
+func (c *sigctxt) pushCall(targetPC, resumePC uintptr) {
+ // Push the LR to stack, as we'll clobber it in order to
+ // push the call. The function being pushed is responsible
+ // for restoring the LR and setting the SP back.
+ // This extra space is known to gentraceback.
+ sp := c.sp() - 16 // SP needs 16-byte alignment
+ c.set_sp(sp)
+ *(*uint64)(unsafe.Pointer(uintptr(sp))) = c.lr()
+ // Set up PC and LR to pretend the function being signaled
+ // calls targetPC at resumePC.
+ c.set_lr(uint64(resumePC))
+ c.set_pc(uint64(targetPC))
+}
diff --git a/src/runtime/signal_darwin_386.go b/src/runtime/signal_darwin_386.go
deleted file mode 100644
index 3dc5334..0000000
--- a/src/runtime/signal_darwin_386.go
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import "unsafe"
-
-type sigctxt struct {
- info *siginfo
- ctxt unsafe.Pointer
-}
-
-//go:nosplit
-//go:nowritebarrierrec
-func (c *sigctxt) regs() *regs32 { return &(*ucontext)(c.ctxt).uc_mcontext.ss }
-
-func (c *sigctxt) eax() uint32 { return c.regs().eax }
-func (c *sigctxt) ebx() uint32 { return c.regs().ebx }
-func (c *sigctxt) ecx() uint32 { return c.regs().ecx }
-func (c *sigctxt) edx() uint32 { return c.regs().edx }
-func (c *sigctxt) edi() uint32 { return c.regs().edi }
-func (c *sigctxt) esi() uint32 { return c.regs().esi }
-func (c *sigctxt) ebp() uint32 { return c.regs().ebp }
-func (c *sigctxt) esp() uint32 { return c.regs().esp }
-
-//go:nosplit
-//go:nowritebarrierrec
-func (c *sigctxt) eip() uint32 { return c.regs().eip }
-
-func (c *sigctxt) eflags() uint32 { return c.regs().eflags }
-func (c *sigctxt) cs() uint32 { return c.regs().cs }
-func (c *sigctxt) fs() uint32 { return c.regs().fs }
-func (c *sigctxt) gs() uint32 { return c.regs().gs }
-func (c *sigctxt) sigcode() uint32 { return uint32(c.info.si_code) }
-func (c *sigctxt) sigaddr() uint32 { return c.info.si_addr }
-
-func (c *sigctxt) set_eip(x uint32) { c.regs().eip = x }
-func (c *sigctxt) set_esp(x uint32) { c.regs().esp = x }
-func (c *sigctxt) set_sigcode(x uint32) { c.info.si_code = int32(x) }
-func (c *sigctxt) set_sigaddr(x uint32) { c.info.si_addr = x }
-
-//go:nosplit
-func (c *sigctxt) fixsigcode(sig uint32) {
- switch sig {
- case _SIGTRAP:
- // OS X sets c.sigcode() == TRAP_BRKPT unconditionally for all SIGTRAPs,
- // leaving no way to distinguish a breakpoint-induced SIGTRAP
- // from an asynchronous signal SIGTRAP.
- // They all look breakpoint-induced by default.
- // Try looking at the code to see if it's a breakpoint.
- // The assumption is that we're very unlikely to get an
- // asynchronous SIGTRAP at just the moment that the
- // PC started to point at unmapped memory.
- pc := uintptr(c.eip())
- // OS X will leave the pc just after the INT 3 instruction.
- // INT 3 is usually 1 byte, but there is a 2-byte form.
- code := (*[2]byte)(unsafe.Pointer(pc - 2))
- if code[1] != 0xCC && (code[0] != 0xCD || code[1] != 3) {
- // SIGTRAP on something other than INT 3.
- c.set_sigcode(_SI_USER)
- }
- }
-}
diff --git a/src/runtime/signal_darwin_arm.go b/src/runtime/signal_darwin_arm.go
deleted file mode 100644
index 9098b10..0000000
--- a/src/runtime/signal_darwin_arm.go
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import "unsafe"
-
-type sigctxt struct {
- info *siginfo
- ctxt unsafe.Pointer
-}
-
-//go:nosplit
-//go:nowritebarrierrec
-func (c *sigctxt) regs() *regs32 { return &(*ucontext)(c.ctxt).uc_mcontext.ss }
-
-func (c *sigctxt) r0() uint32 { return c.regs().r[0] }
-func (c *sigctxt) r1() uint32 { return c.regs().r[1] }
-func (c *sigctxt) r2() uint32 { return c.regs().r[2] }
-func (c *sigctxt) r3() uint32 { return c.regs().r[3] }
-func (c *sigctxt) r4() uint32 { return c.regs().r[4] }
-func (c *sigctxt) r5() uint32 { return c.regs().r[5] }
-func (c *sigctxt) r6() uint32 { return c.regs().r[6] }
-func (c *sigctxt) r7() uint32 { return c.regs().r[7] }
-func (c *sigctxt) r8() uint32 { return c.regs().r[8] }
-func (c *sigctxt) r9() uint32 { return c.regs().r[9] }
-func (c *sigctxt) r10() uint32 { return c.regs().r[10] }
-func (c *sigctxt) fp() uint32 { return c.regs().r[11] }
-func (c *sigctxt) ip() uint32 { return c.regs().r[12] }
-func (c *sigctxt) sp() uint32 { return c.regs().sp }
-func (c *sigctxt) lr() uint32 { return c.regs().lr }
-
-//go:nosplit
-//go:nowritebarrierrec
-func (c *sigctxt) pc() uint32 { return c.regs().pc }
-
-func (c *sigctxt) cpsr() uint32 { return c.regs().cpsr }
-func (c *sigctxt) fault() uintptr { return uintptr(c.info.si_addr) }
-func (c *sigctxt) sigcode() uint32 { return uint32(c.info.si_code) }
-func (c *sigctxt) trap() uint32 { return 0 }
-func (c *sigctxt) error() uint32 { return 0 }
-func (c *sigctxt) oldmask() uint32 { return 0 }
-
-func (c *sigctxt) set_pc(x uint32) { c.regs().pc = x }
-func (c *sigctxt) set_sp(x uint32) { c.regs().sp = x }
-func (c *sigctxt) set_lr(x uint32) { c.regs().lr = x }
-func (c *sigctxt) set_r10(x uint32) { c.regs().r[10] = x }
-
-func (c *sigctxt) set_sigcode(x uint32) { c.info.si_code = int32(x) }
-func (c *sigctxt) set_sigaddr(x uint32) { c.info.si_addr = x }
-
-//go:nosplit
-func (c *sigctxt) fixsigcode(sig uint32) {
- switch sig {
- case _SIGTRAP:
- // OS X sets c.sigcode() == TRAP_BRKPT unconditionally for all SIGTRAPs,
- // leaving no way to distinguish a breakpoint-induced SIGTRAP
- // from an asynchronous signal SIGTRAP.
- // They all look breakpoint-induced by default.
- // Try looking at the code to see if it's a breakpoint.
- // The assumption is that we're very unlikely to get an
- // asynchronous SIGTRAP at just the moment that the
- // PC started to point at unmapped memory.
- pc := uintptr(c.pc())
- // OS X will leave the pc just after the instruction.
- code := (*uint32)(unsafe.Pointer(pc - 4))
- if *code != 0xe7f001f0 {
- // SIGTRAP on something other than breakpoint.
- c.set_sigcode(_SI_USER)
- }
- }
-}
diff --git a/src/runtime/signal_freebsd_arm64.go b/src/runtime/signal_freebsd_arm64.go
new file mode 100644
index 0000000..159e965
--- /dev/null
+++ b/src/runtime/signal_freebsd_arm64.go
@@ -0,0 +1,66 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+type sigctxt struct {
+ info *siginfo
+ ctxt unsafe.Pointer
+}
+
+//go:nosplit
+//go:nowritebarrierrec
+func (c *sigctxt) regs() *mcontext { return &(*ucontext)(c.ctxt).uc_mcontext }
+
+func (c *sigctxt) r0() uint64 { return c.regs().mc_gpregs.gp_x[0] }
+func (c *sigctxt) r1() uint64 { return c.regs().mc_gpregs.gp_x[1] }
+func (c *sigctxt) r2() uint64 { return c.regs().mc_gpregs.gp_x[2] }
+func (c *sigctxt) r3() uint64 { return c.regs().mc_gpregs.gp_x[3] }
+func (c *sigctxt) r4() uint64 { return c.regs().mc_gpregs.gp_x[4] }
+func (c *sigctxt) r5() uint64 { return c.regs().mc_gpregs.gp_x[5] }
+func (c *sigctxt) r6() uint64 { return c.regs().mc_gpregs.gp_x[6] }
+func (c *sigctxt) r7() uint64 { return c.regs().mc_gpregs.gp_x[7] }
+func (c *sigctxt) r8() uint64 { return c.regs().mc_gpregs.gp_x[8] }
+func (c *sigctxt) r9() uint64 { return c.regs().mc_gpregs.gp_x[9] }
+func (c *sigctxt) r10() uint64 { return c.regs().mc_gpregs.gp_x[10] }
+func (c *sigctxt) r11() uint64 { return c.regs().mc_gpregs.gp_x[11] }
+func (c *sigctxt) r12() uint64 { return c.regs().mc_gpregs.gp_x[12] }
+func (c *sigctxt) r13() uint64 { return c.regs().mc_gpregs.gp_x[13] }
+func (c *sigctxt) r14() uint64 { return c.regs().mc_gpregs.gp_x[14] }
+func (c *sigctxt) r15() uint64 { return c.regs().mc_gpregs.gp_x[15] }
+func (c *sigctxt) r16() uint64 { return c.regs().mc_gpregs.gp_x[16] }
+func (c *sigctxt) r17() uint64 { return c.regs().mc_gpregs.gp_x[17] }
+func (c *sigctxt) r18() uint64 { return c.regs().mc_gpregs.gp_x[18] }
+func (c *sigctxt) r19() uint64 { return c.regs().mc_gpregs.gp_x[19] }
+func (c *sigctxt) r20() uint64 { return c.regs().mc_gpregs.gp_x[20] }
+func (c *sigctxt) r21() uint64 { return c.regs().mc_gpregs.gp_x[21] }
+func (c *sigctxt) r22() uint64 { return c.regs().mc_gpregs.gp_x[22] }
+func (c *sigctxt) r23() uint64 { return c.regs().mc_gpregs.gp_x[23] }
+func (c *sigctxt) r24() uint64 { return c.regs().mc_gpregs.gp_x[24] }
+func (c *sigctxt) r25() uint64 { return c.regs().mc_gpregs.gp_x[25] }
+func (c *sigctxt) r26() uint64 { return c.regs().mc_gpregs.gp_x[26] }
+func (c *sigctxt) r27() uint64 { return c.regs().mc_gpregs.gp_x[27] }
+func (c *sigctxt) r28() uint64 { return c.regs().mc_gpregs.gp_x[28] }
+func (c *sigctxt) r29() uint64 { return c.regs().mc_gpregs.gp_x[29] }
+func (c *sigctxt) lr() uint64 { return c.regs().mc_gpregs.gp_lr }
+func (c *sigctxt) sp() uint64 { return c.regs().mc_gpregs.gp_sp }
+
+//go:nosplit
+//go:nowritebarrierrec
+func (c *sigctxt) pc() uint64 { return c.regs().mc_gpregs.gp_elr }
+
+func (c *sigctxt) fault() uint64 { return c.info.si_addr }
+
+func (c *sigctxt) sigcode() uint64 { return uint64(c.info.si_code) }
+func (c *sigctxt) sigaddr() uint64 { return c.info.si_addr }
+
+func (c *sigctxt) set_pc(x uint64) { c.regs().mc_gpregs.gp_elr = x }
+func (c *sigctxt) set_sp(x uint64) { c.regs().mc_gpregs.gp_sp = x }
+func (c *sigctxt) set_lr(x uint64) { c.regs().mc_gpregs.gp_lr = x }
+func (c *sigctxt) set_r28(x uint64) { c.regs().mc_gpregs.gp_x[28] = x }
+
+func (c *sigctxt) set_sigcode(x uint64) { c.info.si_code = int32(x) }
+func (c *sigctxt) set_sigaddr(x uint64) { c.info.si_addr = x }
diff --git a/src/runtime/signal_linux_riscv64.go b/src/runtime/signal_linux_riscv64.go
new file mode 100644
index 0000000..9f68e5c
--- /dev/null
+++ b/src/runtime/signal_linux_riscv64.go
@@ -0,0 +1,68 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+ "runtime/internal/sys"
+ "unsafe"
+)
+
+type sigctxt struct {
+ info *siginfo
+ ctxt unsafe.Pointer
+}
+
+//go:nosplit
+//go:nowritebarrierrec
+func (c *sigctxt) regs() *sigcontext { return &(*ucontext)(c.ctxt).uc_mcontext }
+
+func (c *sigctxt) ra() uint64 { return c.regs().sc_regs.ra }
+func (c *sigctxt) sp() uint64 { return c.regs().sc_regs.sp }
+func (c *sigctxt) gp() uint64 { return c.regs().sc_regs.gp }
+func (c *sigctxt) tp() uint64 { return c.regs().sc_regs.tp }
+func (c *sigctxt) t0() uint64 { return c.regs().sc_regs.t0 }
+func (c *sigctxt) t1() uint64 { return c.regs().sc_regs.t1 }
+func (c *sigctxt) t2() uint64 { return c.regs().sc_regs.t2 }
+func (c *sigctxt) s0() uint64 { return c.regs().sc_regs.s0 }
+func (c *sigctxt) s1() uint64 { return c.regs().sc_regs.s1 }
+func (c *sigctxt) a0() uint64 { return c.regs().sc_regs.a0 }
+func (c *sigctxt) a1() uint64 { return c.regs().sc_regs.a1 }
+func (c *sigctxt) a2() uint64 { return c.regs().sc_regs.a2 }
+func (c *sigctxt) a3() uint64 { return c.regs().sc_regs.a3 }
+func (c *sigctxt) a4() uint64 { return c.regs().sc_regs.a4 }
+func (c *sigctxt) a5() uint64 { return c.regs().sc_regs.a5 }
+func (c *sigctxt) a6() uint64 { return c.regs().sc_regs.a6 }
+func (c *sigctxt) a7() uint64 { return c.regs().sc_regs.a7 }
+func (c *sigctxt) s2() uint64 { return c.regs().sc_regs.s2 }
+func (c *sigctxt) s3() uint64 { return c.regs().sc_regs.s3 }
+func (c *sigctxt) s4() uint64 { return c.regs().sc_regs.s4 }
+func (c *sigctxt) s5() uint64 { return c.regs().sc_regs.s5 }
+func (c *sigctxt) s6() uint64 { return c.regs().sc_regs.s6 }
+func (c *sigctxt) s7() uint64 { return c.regs().sc_regs.s7 }
+func (c *sigctxt) s8() uint64 { return c.regs().sc_regs.s8 }
+func (c *sigctxt) s9() uint64 { return c.regs().sc_regs.s9 }
+func (c *sigctxt) s10() uint64 { return c.regs().sc_regs.s10 }
+func (c *sigctxt) s11() uint64 { return c.regs().sc_regs.s11 }
+func (c *sigctxt) t3() uint64 { return c.regs().sc_regs.t3 }
+func (c *sigctxt) t4() uint64 { return c.regs().sc_regs.t4 }
+func (c *sigctxt) t5() uint64 { return c.regs().sc_regs.t5 }
+func (c *sigctxt) t6() uint64 { return c.regs().sc_regs.t6 }
+
+//go:nosplit
+//go:nowritebarrierrec
+func (c *sigctxt) pc() uint64 { return c.regs().sc_regs.pc }
+
+func (c *sigctxt) sigcode() uint32 { return uint32(c.info.si_code) }
+func (c *sigctxt) sigaddr() uint64 { return c.info.si_addr }
+
+func (c *sigctxt) set_pc(x uint64) { c.regs().sc_regs.pc = x }
+func (c *sigctxt) set_ra(x uint64) { c.regs().sc_regs.ra = x }
+func (c *sigctxt) set_sp(x uint64) { c.regs().sc_regs.sp = x }
+func (c *sigctxt) set_gp(x uint64) { c.regs().sc_regs.gp = x }
+
+func (c *sigctxt) set_sigcode(x uint32) { c.info.si_code = int32(x) }
+func (c *sigctxt) set_sigaddr(x uint64) {
+ *(*uintptr)(add(unsafe.Pointer(c.info), 2*sys.PtrSize)) = uintptr(x)
+}
diff --git a/src/runtime/signal_linux_s390x.go b/src/runtime/signal_linux_s390x.go
index 6892f63..12d5c31 100644
--- a/src/runtime/signal_linux_s390x.go
+++ b/src/runtime/signal_linux_s390x.go
@@ -109,3 +109,17 @@
c.set_r13(uint64(uintptr(unsafe.Pointer(gp))))
c.set_pc(uint64(funcPC(sigpanic)))
}
+
+func (c *sigctxt) pushCall(targetPC, resumePC uintptr) {
+ // Push the LR to stack, as we'll clobber it in order to
+ // push the call. The function being pushed is responsible
+ // for restoring the LR and setting the SP back.
+ // This extra slot is known to gentraceback.
+ sp := c.sp() - 8
+ c.set_sp(sp)
+ *(*uint64)(unsafe.Pointer(uintptr(sp))) = c.link()
+ // Set up PC and LR to pretend the function being signaled
+ // calls targetPC at resumePC.
+ c.set_link(uint64(resumePC))
+ c.set_pc(uint64(targetPC))
+}
diff --git a/src/runtime/signal_mips64x.go b/src/runtime/signal_mips64x.go
index 1b96842..040c959 100644
--- a/src/runtime/signal_mips64x.go
+++ b/src/runtime/signal_mips64x.go
@@ -84,3 +84,17 @@
c.set_r30(uint64(uintptr(unsafe.Pointer(gp))))
c.set_pc(sigpanicPC)
}
+
+func (c *sigctxt) pushCall(targetPC, resumePC uintptr) {
+ // Push the LR to stack, as we'll clobber it in order to
+ // push the call. The function being pushed is responsible
+ // for restoring the LR and setting the SP back.
+ // This extra slot is known to gentraceback.
+ sp := c.sp() - 8
+ c.set_sp(sp)
+ *(*uint64)(unsafe.Pointer(uintptr(sp))) = c.link()
+ // Set up PC and LR to pretend the function being signaled
+ // calls targetPC at resumePC.
+ c.set_link(uint64(resumePC))
+ c.set_pc(uint64(targetPC))
+}
diff --git a/src/runtime/signal_mipsx.go b/src/runtime/signal_mipsx.go
index e223c28..8c29f59 100644
--- a/src/runtime/signal_mipsx.go
+++ b/src/runtime/signal_mipsx.go
@@ -79,3 +79,17 @@
c.set_r30(uint32(uintptr(unsafe.Pointer(gp))))
c.set_pc(uint32(funcPC(sigpanic)))
}
+
+func (c *sigctxt) pushCall(targetPC, resumePC uintptr) {
+ // Push the LR to stack, as we'll clobber it in order to
+ // push the call. The function being pushed is responsible
+ // for restoring the LR and setting the SP back.
+ // This extra slot is known to gentraceback.
+ sp := c.sp() - 4
+ c.set_sp(sp)
+ *(*uint32)(unsafe.Pointer(uintptr(sp))) = c.link()
+ // Set up PC and LR to pretend the function being signaled
+ // calls targetPC at resumePC.
+ c.set_link(uint32(resumePC))
+ c.set_pc(uint32(targetPC))
+}
diff --git a/src/runtime/signal_nacl.go b/src/runtime/signal_nacl.go
deleted file mode 100644
index ad321d8..0000000
--- a/src/runtime/signal_nacl.go
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-type sigTabT struct {
- flags int32
- name string
-}
-
-var sigtable = [...]sigTabT{
- /* 0 */ {0, "SIGNONE: no trap"},
- /* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"},
- /* 2 */ {_SigNotify + _SigKill, "SIGINT: interrupt"},
- /* 3 */ {_SigNotify + _SigThrow, "SIGQUIT: quit"},
- /* 4 */ {_SigThrow, "SIGILL: illegal instruction"},
- /* 5 */ {_SigThrow, "SIGTRAP: trace trap"},
- /* 6 */ {_SigNotify + _SigThrow, "SIGABRT: abort"},
- /* 7 */ {_SigThrow, "SIGEMT: emulate instruction executed"},
- /* 8 */ {_SigPanic, "SIGFPE: floating-point exception"},
- /* 9 */ {0, "SIGKILL: kill"},
- /* 10 */ {_SigPanic, "SIGBUS: bus error"},
- /* 11 */ {_SigPanic, "SIGSEGV: segmentation violation"},
- /* 12 */ {_SigThrow, "SIGSYS: bad system call"},
- /* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"},
- /* 14 */ {_SigNotify, "SIGALRM: alarm clock"},
- /* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"},
- /* 16 */ {_SigNotify + _SigIgn, "SIGURG: urgent condition on socket"},
- /* 17 */ {0, "SIGSTOP: stop"},
- /* 18 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTSTP: keyboard stop"},
- /* 19 */ {_SigNotify + _SigDefault + _SigIgn, "SIGCONT: continue after stop"},
- /* 20 */ {_SigNotify + _SigIgn, "SIGCHLD: child status has changed"},
- /* 21 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTIN: background read from tty"},
- /* 22 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTOU: background write to tty"},
- /* 23 */ {_SigNotify, "SIGIO: i/o now possible"},
- /* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"},
- /* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"},
- /* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"},
- /* 27 */ {_SigNotify, "SIGPROF: profiling alarm clock"},
- /* 28 */ {_SigNotify, "SIGWINCH: window size change"},
- /* 29 */ {_SigNotify, "SIGINFO: status request from keyboard"},
- /* 30 */ {_SigNotify, "SIGUSR1: user-defined signal 1"},
- /* 31 */ {_SigNotify, "SIGUSR2: user-defined signal 2"},
-}
diff --git a/src/runtime/signal_nacl_386.go b/src/runtime/signal_nacl_386.go
deleted file mode 100644
index 1a30a89..0000000
--- a/src/runtime/signal_nacl_386.go
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import "unsafe"
-
-type sigctxt struct {
- info *siginfo
- ctxt unsafe.Pointer
-}
-
-//go:nosplit
-//go:nowritebarrierrec
-func (c *sigctxt) regs() *excregs386 { return &(*exccontext)(c.ctxt).regs }
-
-func (c *sigctxt) eax() uint32 { return c.regs().eax }
-func (c *sigctxt) ebx() uint32 { return c.regs().ebx }
-func (c *sigctxt) ecx() uint32 { return c.regs().ecx }
-func (c *sigctxt) edx() uint32 { return c.regs().edx }
-func (c *sigctxt) edi() uint32 { return c.regs().edi }
-func (c *sigctxt) esi() uint32 { return c.regs().esi }
-func (c *sigctxt) ebp() uint32 { return c.regs().ebp }
-func (c *sigctxt) esp() uint32 { return c.regs().esp }
-
-//go:nosplit
-//go:nowritebarrierrec
-func (c *sigctxt) eip() uint32 { return c.regs().eip }
-
-func (c *sigctxt) eflags() uint32 { return c.regs().eflags }
-func (c *sigctxt) cs() uint32 { return ^uint32(0) }
-func (c *sigctxt) fs() uint32 { return ^uint32(0) }
-func (c *sigctxt) gs() uint32 { return ^uint32(0) }
-func (c *sigctxt) sigcode() uint32 { return ^uint32(0) }
-func (c *sigctxt) sigaddr() uint32 { return 0 }
-
-func (c *sigctxt) set_eip(x uint32) { c.regs().eip = x }
-func (c *sigctxt) set_esp(x uint32) { c.regs().esp = x }
-func (c *sigctxt) set_sigcode(x uint32) {}
-func (c *sigctxt) set_sigaddr(x uint32) {}
diff --git a/src/runtime/signal_nacl_amd64p32.go b/src/runtime/signal_nacl_amd64p32.go
deleted file mode 100644
index 81bbdc5..0000000
--- a/src/runtime/signal_nacl_amd64p32.go
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import "unsafe"
-
-func nacl_sysinfo(di uint32) // cross-assembly-file call; declared for vet
-
-type sigctxt struct {
- info *siginfo
- ctxt unsafe.Pointer
-}
-
-//go:nosplit
-//go:nowritebarrierrec
-func (c *sigctxt) regs() *excregsamd64 {
- return &(*exccontext)(c.ctxt).regs
-}
-
-func (c *sigctxt) rax() uint64 { return c.regs().rax }
-func (c *sigctxt) rbx() uint64 { return c.regs().rbx }
-func (c *sigctxt) rcx() uint64 { return c.regs().rcx }
-func (c *sigctxt) rdx() uint64 { return c.regs().rdx }
-func (c *sigctxt) rdi() uint64 { return c.regs().rdi }
-func (c *sigctxt) rsi() uint64 { return c.regs().rsi }
-func (c *sigctxt) rbp() uint64 { return c.regs().rbp }
-func (c *sigctxt) rsp() uint64 { return c.regs().rsp }
-func (c *sigctxt) r8() uint64 { return c.regs().r8 }
-func (c *sigctxt) r9() uint64 { return c.regs().r9 }
-func (c *sigctxt) r10() uint64 { return c.regs().r10 }
-func (c *sigctxt) r11() uint64 { return c.regs().r11 }
-func (c *sigctxt) r12() uint64 { return c.regs().r12 }
-func (c *sigctxt) r13() uint64 { return c.regs().r13 }
-func (c *sigctxt) r14() uint64 { return c.regs().r14 }
-func (c *sigctxt) r15() uint64 { return c.regs().r15 }
-
-//go:nosplit
-//go:nowritebarrierrec
-func (c *sigctxt) rip() uint64 { return c.regs().rip }
-
-func (c *sigctxt) rflags() uint64 { return uint64(c.regs().rflags) }
-func (c *sigctxt) cs() uint64 { return ^uint64(0) }
-func (c *sigctxt) fs() uint64 { return ^uint64(0) }
-func (c *sigctxt) gs() uint64 { return ^uint64(0) }
-func (c *sigctxt) sigcode() uint64 { return ^uint64(0) }
-func (c *sigctxt) sigaddr() uint64 { return 0 }
-
-func (c *sigctxt) set_rip(x uint64) { c.regs().rip = x }
-func (c *sigctxt) set_rsp(x uint64) { c.regs().rsp = x }
-func (c *sigctxt) set_sigcode(x uint64) {}
-func (c *sigctxt) set_sigaddr(x uint64) {}
diff --git a/src/runtime/signal_nacl_arm.go b/src/runtime/signal_nacl_arm.go
deleted file mode 100644
index b831232..0000000
--- a/src/runtime/signal_nacl_arm.go
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import "unsafe"
-
-type sigctxt struct {
- info *siginfo
- ctxt unsafe.Pointer
-}
-
-//go:nosplit
-//go:nowritebarrierrec
-func (c *sigctxt) regs() *excregsarm { return &(*exccontext)(c.ctxt).regs }
-
-func (c *sigctxt) r0() uint32 { return c.regs().r0 }
-func (c *sigctxt) r1() uint32 { return c.regs().r1 }
-func (c *sigctxt) r2() uint32 { return c.regs().r2 }
-func (c *sigctxt) r3() uint32 { return c.regs().r3 }
-func (c *sigctxt) r4() uint32 { return c.regs().r4 }
-func (c *sigctxt) r5() uint32 { return c.regs().r5 }
-func (c *sigctxt) r6() uint32 { return c.regs().r6 }
-func (c *sigctxt) r7() uint32 { return c.regs().r7 }
-func (c *sigctxt) r8() uint32 { return c.regs().r8 }
-func (c *sigctxt) r9() uint32 { return c.regs().r9 }
-func (c *sigctxt) r10() uint32 { return c.regs().r10 }
-func (c *sigctxt) fp() uint32 { return c.regs().r11 }
-func (c *sigctxt) ip() uint32 { return c.regs().r12 }
-func (c *sigctxt) sp() uint32 { return c.regs().sp }
-func (c *sigctxt) lr() uint32 { return c.regs().lr }
-
-//go:nosplit
-//go:nowritebarrierrec
-func (c *sigctxt) pc() uint32 { return c.regs().pc }
-
-func (c *sigctxt) cpsr() uint32 { return c.regs().cpsr }
-func (c *sigctxt) fault() uintptr { return ^uintptr(0) }
-func (c *sigctxt) trap() uint32 { return ^uint32(0) }
-func (c *sigctxt) error() uint32 { return ^uint32(0) }
-func (c *sigctxt) oldmask() uint32 { return ^uint32(0) }
-
-func (c *sigctxt) sigcode() uint32 { return 0 }
-func (c *sigctxt) sigaddr() uint32 { return 0 }
-
-func (c *sigctxt) set_pc(x uint32) { c.regs().pc = x }
-func (c *sigctxt) set_sp(x uint32) { c.regs().sp = x }
-func (c *sigctxt) set_lr(x uint32) { c.regs().lr = x }
-func (c *sigctxt) set_r10(x uint32) { c.regs().r10 = x }
-
-func (c *sigctxt) set_sigcode(x uint32) {}
-func (c *sigctxt) set_sigaddr(x uint32) {}
diff --git a/src/runtime/signal_ppc64x.go b/src/runtime/signal_ppc64x.go
index cac1a23..5de93a3 100644
--- a/src/runtime/signal_ppc64x.go
+++ b/src/runtime/signal_ppc64x.go
@@ -85,3 +85,27 @@
c.set_r12(uint64(funcPC(sigpanic)))
c.set_pc(uint64(funcPC(sigpanic)))
}
+
+func (c *sigctxt) pushCall(targetPC, resumePC uintptr) {
+ // Push the LR to stack, as we'll clobber it in order to
+ // push the call. The function being pushed is responsible
+ // for restoring the LR and setting the SP back.
+ // This extra space is known to gentraceback.
+ sp := c.sp() - sys.MinFrameSize
+ c.set_sp(sp)
+ *(*uint64)(unsafe.Pointer(uintptr(sp))) = c.link()
+ // In PIC mode, we'll set up (i.e. clobber) R2 on function
+ // entry. Save it ahead of time.
+ // In PIC mode it requires R12 points to the function entry,
+ // so we'll set it up when pushing the call. Save it ahead
+ // of time as well.
+ // 8(SP) and 16(SP) are unused space in the reserved
+ // MinFrameSize (32) bytes.
+ *(*uint64)(unsafe.Pointer(uintptr(sp) + 8)) = c.r2()
+ *(*uint64)(unsafe.Pointer(uintptr(sp) + 16)) = c.r12()
+ // Set up PC and LR to pretend the function being signaled
+ // calls targetPC at resumePC.
+ c.set_link(uint64(resumePC))
+ c.set_r12(uint64(targetPC))
+ c.set_pc(uint64(targetPC))
+}
diff --git a/src/runtime/signal_riscv64.go b/src/runtime/signal_riscv64.go
new file mode 100644
index 0000000..93363a4
--- /dev/null
+++ b/src/runtime/signal_riscv64.go
@@ -0,0 +1,93 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux,riscv64
+
+package runtime
+
+import (
+ "runtime/internal/sys"
+ "unsafe"
+)
+
+func dumpregs(c *sigctxt) {
+ print("ra ", hex(c.ra()), "\t")
+ print("sp ", hex(c.sp()), "\n")
+ print("gp ", hex(c.gp()), "\t")
+ print("tp ", hex(c.tp()), "\n")
+ print("t0 ", hex(c.t0()), "\t")
+ print("t1 ", hex(c.t1()), "\n")
+ print("t2 ", hex(c.t2()), "\t")
+ print("s0 ", hex(c.s0()), "\n")
+ print("s1 ", hex(c.s1()), "\t")
+ print("a0 ", hex(c.a0()), "\n")
+ print("a1 ", hex(c.a1()), "\t")
+ print("a2 ", hex(c.a2()), "\n")
+ print("a3 ", hex(c.a3()), "\t")
+ print("a4 ", hex(c.a4()), "\n")
+ print("a5 ", hex(c.a5()), "\t")
+ print("a6 ", hex(c.a6()), "\n")
+ print("a7 ", hex(c.a7()), "\t")
+ print("s2 ", hex(c.s2()), "\n")
+ print("s3 ", hex(c.s3()), "\t")
+ print("s4 ", hex(c.s4()), "\n")
+ print("s5 ", hex(c.s5()), "\t")
+ print("s6 ", hex(c.s6()), "\n")
+ print("s7 ", hex(c.s7()), "\t")
+ print("s8 ", hex(c.s8()), "\n")
+ print("s9 ", hex(c.s9()), "\t")
+ print("s10 ", hex(c.s10()), "\n")
+ print("s11 ", hex(c.s11()), "\t")
+ print("t3 ", hex(c.t3()), "\n")
+ print("t4 ", hex(c.t4()), "\t")
+ print("t5 ", hex(c.t5()), "\n")
+ print("t6 ", hex(c.t6()), "\t")
+ print("pc ", hex(c.pc()), "\n")
+}
+
+//go:nosplit
+//go:nowritebarrierrec
+func (c *sigctxt) sigpc() uintptr { return uintptr(c.pc()) }
+
+func (c *sigctxt) sigsp() uintptr { return uintptr(c.sp()) }
+func (c *sigctxt) siglr() uintptr { return uintptr(c.ra()) }
+func (c *sigctxt) fault() uintptr { return uintptr(c.sigaddr()) }
+
+// preparePanic sets up the stack to look like a call to sigpanic.
+func (c *sigctxt) preparePanic(sig uint32, gp *g) {
+ // We arrange RA, and pc to pretend the panicking
+ // function calls sigpanic directly.
+ // Always save RA to stack so that panics in leaf
+ // functions are correctly handled. This smashes
+ // the stack frame but we're not going back there
+ // anyway.
+ sp := c.sp() - sys.PtrSize
+ c.set_sp(sp)
+ *(*uint64)(unsafe.Pointer(uintptr(sp))) = c.ra()
+
+ pc := gp.sigpc
+
+ if shouldPushSigpanic(gp, pc, uintptr(c.ra())) {
+ // Make it look the like faulting PC called sigpanic.
+ c.set_ra(uint64(pc))
+ }
+
+ // In case we are panicking from external C code
+ c.set_gp(uint64(uintptr(unsafe.Pointer(gp))))
+ c.set_pc(uint64(funcPC(sigpanic)))
+}
+
+func (c *sigctxt) pushCall(targetPC, resumePC uintptr) {
+ // Push the LR to stack, as we'll clobber it in order to
+ // push the call. The function being pushed is responsible
+ // for restoring the LR and setting the SP back.
+ // This extra slot is known to gentraceback.
+ sp := c.sp() - sys.PtrSize
+ c.set_sp(sp)
+ *(*uint64)(unsafe.Pointer(uintptr(sp))) = c.ra()
+ // Set up PC and LR to pretend the function being signaled
+ // calls targetPC at resumePC.
+ c.set_ra(uint64(resumePC))
+ c.set_pc(uint64(targetPC))
+}
diff --git a/src/runtime/signal_sighandler.go b/src/runtime/signal_sighandler.go
deleted file mode 100644
index bec4653..0000000
--- a/src/runtime/signal_sighandler.go
+++ /dev/null
@@ -1,154 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris
-
-package runtime
-
-import (
- "unsafe"
-)
-
-// crashing is the number of m's we have waited for when implementing
-// GOTRACEBACK=crash when a signal is received.
-var crashing int32
-
-// testSigtrap is used by the runtime tests. If non-nil, it is called
-// on SIGTRAP. If it returns true, the normal behavior on SIGTRAP is
-// suppressed.
-var testSigtrap func(info *siginfo, ctxt *sigctxt, gp *g) bool
-
-// sighandler is invoked when a signal occurs. The global g will be
-// set to a gsignal goroutine and we will be running on the alternate
-// signal stack. The parameter g will be the value of the global g
-// when the signal occurred. The sig, info, and ctxt parameters are
-// from the system signal handler: they are the parameters passed when
-// the SA is passed to the sigaction system call.
-//
-// The garbage collector may have stopped the world, so write barriers
-// are not allowed.
-//
-//go:nowritebarrierrec
-func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
- _g_ := getg()
- c := &sigctxt{info, ctxt}
-
- if sig == _SIGPROF {
- sigprof(c.sigpc(), c.sigsp(), c.siglr(), gp, _g_.m)
- return
- }
-
- if sig == _SIGTRAP && testSigtrap != nil && testSigtrap(info, (*sigctxt)(noescape(unsafe.Pointer(c))), gp) {
- return
- }
-
- flags := int32(_SigThrow)
- if sig < uint32(len(sigtable)) {
- flags = sigtable[sig].flags
- }
- if flags&_SigPanic != 0 && gp.throwsplit {
- // We can't safely sigpanic because it may grow the
- // stack. Abort in the signal handler instead.
- flags = (flags &^ _SigPanic) | _SigThrow
- }
- if isAbortPC(c.sigpc()) {
- // On many architectures, the abort function just
- // causes a memory fault. Don't turn that into a panic.
- flags = _SigThrow
- }
- if c.sigcode() != _SI_USER && flags&_SigPanic != 0 {
- // The signal is going to cause a panic.
- // Arrange the stack so that it looks like the point
- // where the signal occurred made a call to the
- // function sigpanic. Then set the PC to sigpanic.
-
- // Have to pass arguments out of band since
- // augmenting the stack frame would break
- // the unwinding code.
- gp.sig = sig
- gp.sigcode0 = uintptr(c.sigcode())
- gp.sigcode1 = uintptr(c.fault())
- gp.sigpc = c.sigpc()
-
- c.preparePanic(sig, gp)
- return
- }
-
- if c.sigcode() == _SI_USER || flags&_SigNotify != 0 {
- if sigsend(sig) {
- return
- }
- }
-
- if c.sigcode() == _SI_USER && signal_ignored(sig) {
- return
- }
-
- if flags&_SigKill != 0 {
- dieFromSignal(sig)
- }
-
- if flags&_SigThrow == 0 {
- return
- }
-
- _g_.m.throwing = 1
- _g_.m.caughtsig.set(gp)
-
- if crashing == 0 {
- startpanic_m()
- }
-
- if sig < uint32(len(sigtable)) {
- print(sigtable[sig].name, "\n")
- } else {
- print("Signal ", sig, "\n")
- }
-
- print("PC=", hex(c.sigpc()), " m=", _g_.m.id, " sigcode=", c.sigcode(), "\n")
- if _g_.m.lockedg != 0 && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
- print("signal arrived during cgo execution\n")
- gp = _g_.m.lockedg.ptr()
- }
- print("\n")
-
- level, _, docrash := gotraceback()
- if level > 0 {
- goroutineheader(gp)
- tracebacktrap(c.sigpc(), c.sigsp(), c.siglr(), gp)
- if crashing > 0 && gp != _g_.m.curg && _g_.m.curg != nil && readgstatus(_g_.m.curg)&^_Gscan == _Grunning {
- // tracebackothers on original m skipped this one; trace it now.
- goroutineheader(_g_.m.curg)
- traceback(^uintptr(0), ^uintptr(0), 0, _g_.m.curg)
- } else if crashing == 0 {
- tracebackothers(gp)
- print("\n")
- }
- dumpregs(c)
- }
-
- if docrash {
- crashing++
- if crashing < mcount()-int32(extraMCount) {
- // There are other m's that need to dump their stacks.
- // Relay SIGQUIT to the next m by sending it to the current process.
- // All m's that have already received SIGQUIT have signal masks blocking
- // receipt of any signals, so the SIGQUIT will go to an m that hasn't seen it yet.
- // When the last m receives the SIGQUIT, it will fall through to the call to
- // crash below. Just in case the relaying gets botched, each m involved in
- // the relay sleeps for 5 seconds and then does the crash/exit itself.
- // In expected operation, the last m has received the SIGQUIT and run
- // crash/exit and the process is gone, all long before any of the
- // 5-second sleeps have finished.
- print("\n-----\n\n")
- raiseproc(_SIGQUIT)
- usleep(5 * 1000 * 1000)
- }
- crash()
- }
-
- printDebugLog()
-
- exit(2)
-}
diff --git a/src/runtime/signal_unix.go b/src/runtime/signal_unix.go
index ad51dc1..5aedbf7 100644
--- a/src/runtime/signal_unix.go
+++ b/src/runtime/signal_unix.go
@@ -38,6 +38,38 @@
_SIG_IGN uintptr = 1
)
+// sigPreempt is the signal used for non-cooperative preemption.
+//
+// There's no good way to choose this signal, but there are some
+// heuristics:
+//
+// 1. It should be a signal that's passed-through by debuggers by
+// default. On Linux, this is SIGALRM, SIGURG, SIGCHLD, SIGIO,
+// SIGVTALRM, SIGPROF, and SIGWINCH, plus some glibc-internal signals.
+//
+// 2. It shouldn't be used internally by libc in mixed Go/C binaries
+// because libc may assume it's the only thing that can handle these
+// signals. For example SIGCANCEL or SIGSETXID.
+//
+// 3. It should be a signal that can happen spuriously without
+// consequences. For example, SIGALRM is a bad choice because the
+// signal handler can't tell if it was caused by the real process
+// alarm or not (arguably this means the signal is broken, but I
+// digress). SIGUSR1 and SIGUSR2 are also bad because those are often
+// used in meaningful ways by applications.
+//
+// 4. We need to deal with platforms without real-time signals (like
+// macOS), so those are out.
+//
+// We use SIGURG because it meets all of these criteria, is extremely
+// unlikely to be used by an application for its "real" meaning (both
+// because out-of-band data is basically unused and because SIGURG
+// doesn't report which socket has the condition, making it pretty
+// useless), and even if it is, the application has to be ready for
+// spurious SIGURG. SIGIO wouldn't be a bad choice either, but is more
+// likely to be used for real.
+const sigPreempt = _SIGURG
+
// Stores the signal handlers registered before Go installed its own.
// These signal handlers will be invoked in cases where Go doesn't want to
// handle a particular signal (e.g., signal occurred on a non-Go thread).
@@ -242,10 +274,26 @@
}
} else {
// If the Go signal handler should be disabled by default,
- // disable it if it is enabled.
+ // switch back to the signal handler that was installed
+ // when we enabled profiling. We don't try to handle the case
+ // of a program that changes the SIGPROF handler while Go
+ // profiling is enabled.
+ //
+ // If no signal handler was installed before, then start
+ // ignoring SIGPROF signals. We do this, rather than change
+ // to SIG_DFL, because there may be a pending SIGPROF
+ // signal that has not yet been delivered to some other thread.
+ // If we change to SIG_DFL here, the program will crash
+ // when that SIGPROF is delivered. We assume that programs
+ // that use profiling don't want to crash on a stray SIGPROF.
+ // See issue 19320.
if !sigInstallGoHandler(_SIGPROF) {
if atomic.Cas(&handlingSig[_SIGPROF], 1, 0) {
- setsig(_SIGPROF, atomic.Loaduintptr(&fwdSig[_SIGPROF]))
+ h := atomic.Loaduintptr(&fwdSig[_SIGPROF])
+ if h == _SIG_DFL {
+ h = _SIG_IGN
+ }
+ setsig(_SIGPROF, h)
}
}
}
@@ -274,6 +322,78 @@
dieFromSignal(_SIGPIPE)
}
+// doSigPreempt handles a preemption signal on gp.
+func doSigPreempt(gp *g, ctxt *sigctxt) {
+ // Check if this G wants to be preempted and is safe to
+ // preempt.
+ if wantAsyncPreempt(gp) {
+ if ok, newpc := isAsyncSafePoint(gp, ctxt.sigpc(), ctxt.sigsp(), ctxt.siglr()); ok {
+ // Adjust the PC and inject a call to asyncPreempt.
+ ctxt.pushCall(funcPC(asyncPreempt), newpc)
+ }
+ }
+
+ // Acknowledge the preemption.
+ atomic.Xadd(&gp.m.preemptGen, 1)
+ atomic.Store(&gp.m.signalPending, 0)
+}
+
+const preemptMSupported = true
+
+// preemptM sends a preemption request to mp. This request may be
+// handled asynchronously and may be coalesced with other requests to
+// the M. When the request is received, if the running G or P are
+// marked for preemption and the goroutine is at an asynchronous
+// safe-point, it will preempt the goroutine. It always atomically
+// increments mp.preemptGen after handling a preemption request.
+func preemptM(mp *m) {
+ if GOOS == "darwin" && GOARCH == "arm64" && !iscgo {
+ // On darwin, we use libc calls, and cgo is required on ARM64
+ // so we have TLS set up to save/restore G during C calls. If cgo is
+ // absent, we cannot save/restore G in TLS, and if a signal is
+ // received during C execution we cannot get the G. Therefore don't
+ // send signals.
+ // This can only happen in the go_bootstrap program (otherwise cgo is
+ // required).
+ return
+ }
+ if atomic.Cas(&mp.signalPending, 0, 1) {
+ // If multiple threads are preempting the same M, it may send many
+ // signals to the same M such that it hardly make progress, causing
+ // live-lock problem. Apparently this could happen on darwin. See
+ // issue #37741.
+ // Only send a signal if there isn't already one pending.
+ signalM(mp, sigPreempt)
+ }
+}
+
+// sigFetchG fetches the value of G safely when running in a signal handler.
+// On some architectures, the g value may be clobbered when running in a VDSO.
+// See issue #32912.
+//
+//go:nosplit
+func sigFetchG(c *sigctxt) *g {
+ switch GOARCH {
+ case "arm", "arm64":
+ if !iscgo && inVDSOPage(c.sigpc()) {
+ // When using cgo, we save the g on TLS and load it from there
+ // in sigtramp. Just use that.
+ // Otherwise, before making a VDSO call we save the g to the
+ // bottom of the signal stack. Fetch from there.
+ // TODO: in efence mode, stack is sysAlloc'd, so this wouldn't
+ // work.
+ sp := getcallersp()
+ s := spanOf(sp)
+ if s != nil && s.state.get() == mSpanManual && s.base() < sp && sp < s.limit {
+ gp := *(**g)(unsafe.Pointer(s.base()))
+ return gp
+ }
+ return nil
+ }
+ }
+ return getg()
+}
+
// sigtrampgo is called from the signal handler function, sigtramp,
// written in assembly code.
// This is called by the signal handler, and the world may be stopped.
@@ -289,56 +409,34 @@
if sigfwdgo(sig, info, ctx) {
return
}
- g := getg()
+ c := &sigctxt{info, ctx}
+ g := sigFetchG(c)
+ setg(g)
if g == nil {
- c := &sigctxt{info, ctx}
if sig == _SIGPROF {
sigprofNonGoPC(c.sigpc())
return
}
+ if sig == sigPreempt && preemptMSupported && debug.asyncpreemptoff == 0 {
+ // This is probably a signal from preemptM sent
+ // while executing Go code but received while
+ // executing non-Go code.
+ // We got past sigfwdgo, so we know that there is
+ // no non-Go signal handler for sigPreempt.
+ // The default behavior for sigPreempt is to ignore
+ // the signal, so badsignal will be a no-op anyway.
+ return
+ }
c.fixsigcode(sig)
badsignal(uintptr(sig), c)
return
}
// If some non-Go code called sigaltstack, adjust.
- setStack := false
var gsignalStack gsignalStack
- sp := uintptr(unsafe.Pointer(&sig))
- if sp < g.m.gsignal.stack.lo || sp >= g.m.gsignal.stack.hi {
- if sp >= g.m.g0.stack.lo && sp < g.m.g0.stack.hi {
- // The signal was delivered on the g0 stack.
- // This can happen when linked with C code
- // using the thread sanitizer, which collects
- // signals then delivers them itself by calling
- // the signal handler directly when C code,
- // including C code called via cgo, calls a
- // TSAN-intercepted function such as malloc.
- st := stackt{ss_size: g.m.g0.stack.hi - g.m.g0.stack.lo}
- setSignalstackSP(&st, g.m.g0.stack.lo)
- setGsignalStack(&st, &gsignalStack)
- g.m.gsignal.stktopsp = getcallersp()
- setStack = true
- } else {
- var st stackt
- sigaltstack(nil, &st)
- if st.ss_flags&_SS_DISABLE != 0 {
- setg(nil)
- needm(0)
- noSignalStack(sig)
- dropm()
- }
- stsp := uintptr(unsafe.Pointer(st.ss_sp))
- if sp < stsp || sp >= stsp+st.ss_size {
- setg(nil)
- needm(0)
- sigNotOnStack(sig)
- dropm()
- }
- setGsignalStack(&st, &gsignalStack)
- g.m.gsignal.stktopsp = getcallersp()
- setStack = true
- }
+ setStack := adjustSignalStack(sig, g.m, &gsignalStack)
+ if setStack {
+ g.m.gsignal.stktopsp = getcallersp()
}
setg(g.m.gsignal)
@@ -347,7 +445,6 @@
signalDuringFork(sig)
}
- c := &sigctxt{info, ctx}
c.fixsigcode(sig)
sighandler(sig, info, ctx, g)
setg(g)
@@ -356,6 +453,235 @@
}
}
+// adjustSignalStack adjusts the current stack guard based on the
+// stack pointer that is actually in use while handling a signal.
+// We do this in case some non-Go code called sigaltstack.
+// This reports whether the stack was adjusted, and if so stores the old
+// signal stack in *gsigstack.
+//go:nosplit
+func adjustSignalStack(sig uint32, mp *m, gsigStack *gsignalStack) bool {
+ sp := uintptr(unsafe.Pointer(&sig))
+ if sp >= mp.gsignal.stack.lo && sp < mp.gsignal.stack.hi {
+ return false
+ }
+
+ if sp >= mp.g0.stack.lo && sp < mp.g0.stack.hi {
+ // The signal was delivered on the g0 stack.
+ // This can happen when linked with C code
+ // using the thread sanitizer, which collects
+ // signals then delivers them itself by calling
+ // the signal handler directly when C code,
+ // including C code called via cgo, calls a
+ // TSAN-intercepted function such as malloc.
+ st := stackt{ss_size: mp.g0.stack.hi - mp.g0.stack.lo}
+ setSignalstackSP(&st, mp.g0.stack.lo)
+ setGsignalStack(&st, gsigStack)
+ return true
+ }
+
+ var st stackt
+ sigaltstack(nil, &st)
+ if st.ss_flags&_SS_DISABLE != 0 {
+ setg(nil)
+ needm(0)
+ noSignalStack(sig)
+ dropm()
+ }
+ stsp := uintptr(unsafe.Pointer(st.ss_sp))
+ if sp < stsp || sp >= stsp+st.ss_size {
+ setg(nil)
+ needm(0)
+ sigNotOnStack(sig)
+ dropm()
+ }
+ setGsignalStack(&st, gsigStack)
+ return true
+}
+
+// crashing is the number of m's we have waited for when implementing
+// GOTRACEBACK=crash when a signal is received.
+var crashing int32
+
+// testSigtrap and testSigusr1 are used by the runtime tests. If
+// non-nil, it is called on SIGTRAP/SIGUSR1. If it returns true, the
+// normal behavior on this signal is suppressed.
+var testSigtrap func(info *siginfo, ctxt *sigctxt, gp *g) bool
+var testSigusr1 func(gp *g) bool
+
+// sighandler is invoked when a signal occurs. The global g will be
+// set to a gsignal goroutine and we will be running on the alternate
+// signal stack. The parameter g will be the value of the global g
+// when the signal occurred. The sig, info, and ctxt parameters are
+// from the system signal handler: they are the parameters passed when
+// the SA is passed to the sigaction system call.
+//
+// The garbage collector may have stopped the world, so write barriers
+// are not allowed.
+//
+//go:nowritebarrierrec
+func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
+ _g_ := getg()
+ c := &sigctxt{info, ctxt}
+
+ if sig == _SIGPROF {
+ sigprof(c.sigpc(), c.sigsp(), c.siglr(), gp, _g_.m)
+ return
+ }
+
+ if sig == _SIGTRAP && testSigtrap != nil && testSigtrap(info, (*sigctxt)(noescape(unsafe.Pointer(c))), gp) {
+ return
+ }
+
+ if sig == _SIGUSR1 && testSigusr1 != nil && testSigusr1(gp) {
+ return
+ }
+
+ if sig == sigPreempt {
+ // Might be a preemption signal.
+ doSigPreempt(gp, c)
+ // Even if this was definitely a preemption signal, it
+ // may have been coalesced with another signal, so we
+ // still let it through to the application.
+ }
+
+ flags := int32(_SigThrow)
+ if sig < uint32(len(sigtable)) {
+ flags = sigtable[sig].flags
+ }
+ if c.sigcode() != _SI_USER && flags&_SigPanic != 0 && gp.throwsplit {
+ // We can't safely sigpanic because it may grow the
+ // stack. Abort in the signal handler instead.
+ flags = _SigThrow
+ }
+ if isAbortPC(c.sigpc()) {
+ // On many architectures, the abort function just
+ // causes a memory fault. Don't turn that into a panic.
+ flags = _SigThrow
+ }
+ if c.sigcode() != _SI_USER && flags&_SigPanic != 0 {
+ // The signal is going to cause a panic.
+ // Arrange the stack so that it looks like the point
+ // where the signal occurred made a call to the
+ // function sigpanic. Then set the PC to sigpanic.
+
+ // Have to pass arguments out of band since
+ // augmenting the stack frame would break
+ // the unwinding code.
+ gp.sig = sig
+ gp.sigcode0 = uintptr(c.sigcode())
+ gp.sigcode1 = uintptr(c.fault())
+ gp.sigpc = c.sigpc()
+
+ c.preparePanic(sig, gp)
+ return
+ }
+
+ if c.sigcode() == _SI_USER || flags&_SigNotify != 0 {
+ if sigsend(sig) {
+ return
+ }
+ }
+
+ if c.sigcode() == _SI_USER && signal_ignored(sig) {
+ return
+ }
+
+ if flags&_SigKill != 0 {
+ dieFromSignal(sig)
+ }
+
+ // _SigThrow means that we should exit now.
+ // If we get here with _SigPanic, it means that the signal
+ // was sent to us by a program (c.sigcode() == _SI_USER);
+ // in that case, if we didn't handle it in sigsend, we exit now.
+ if flags&(_SigThrow|_SigPanic) == 0 {
+ return
+ }
+
+ _g_.m.throwing = 1
+ _g_.m.caughtsig.set(gp)
+
+ if crashing == 0 {
+ startpanic_m()
+ }
+
+ if sig < uint32(len(sigtable)) {
+ print(sigtable[sig].name, "\n")
+ } else {
+ print("Signal ", sig, "\n")
+ }
+
+ print("PC=", hex(c.sigpc()), " m=", _g_.m.id, " sigcode=", c.sigcode(), "\n")
+ if _g_.m.lockedg != 0 && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
+ print("signal arrived during cgo execution\n")
+ gp = _g_.m.lockedg.ptr()
+ }
+ if sig == _SIGILL {
+ // It would be nice to know how long the instruction is.
+ // Unfortunately, that's complicated to do in general (mostly for x86
+ // and s930x, but other archs have non-standard instruction lengths also).
+ // Opt to print 16 bytes, which covers most instructions.
+ const maxN = 16
+ n := uintptr(maxN)
+ // We have to be careful, though. If we're near the end of
+ // a page and the following page isn't mapped, we could
+ // segfault. So make sure we don't straddle a page (even though
+ // that could lead to printing an incomplete instruction).
+ // We're assuming here we can read at least the page containing the PC.
+ // I suppose it is possible that the page is mapped executable but not readable?
+ pc := c.sigpc()
+ if n > physPageSize-pc%physPageSize {
+ n = physPageSize - pc%physPageSize
+ }
+ print("instruction bytes:")
+ b := (*[maxN]byte)(unsafe.Pointer(pc))
+ for i := uintptr(0); i < n; i++ {
+ print(" ", hex(b[i]))
+ }
+ println()
+ }
+ print("\n")
+
+ level, _, docrash := gotraceback()
+ if level > 0 {
+ goroutineheader(gp)
+ tracebacktrap(c.sigpc(), c.sigsp(), c.siglr(), gp)
+ if crashing > 0 && gp != _g_.m.curg && _g_.m.curg != nil && readgstatus(_g_.m.curg)&^_Gscan == _Grunning {
+ // tracebackothers on original m skipped this one; trace it now.
+ goroutineheader(_g_.m.curg)
+ traceback(^uintptr(0), ^uintptr(0), 0, _g_.m.curg)
+ } else if crashing == 0 {
+ tracebackothers(gp)
+ print("\n")
+ }
+ dumpregs(c)
+ }
+
+ if docrash {
+ crashing++
+ if crashing < mcount()-int32(extraMCount) {
+ // There are other m's that need to dump their stacks.
+ // Relay SIGQUIT to the next m by sending it to the current process.
+ // All m's that have already received SIGQUIT have signal masks blocking
+ // receipt of any signals, so the SIGQUIT will go to an m that hasn't seen it yet.
+ // When the last m receives the SIGQUIT, it will fall through to the call to
+ // crash below. Just in case the relaying gets botched, each m involved in
+ // the relay sleeps for 5 seconds and then does the crash/exit itself.
+ // In expected operation, the last m has received the SIGQUIT and run
+ // crash/exit and the process is gone, all long before any of the
+ // 5-second sleeps have finished.
+ print("\n-----\n\n")
+ raiseproc(_SIGQUIT)
+ usleep(5 * 1000 * 1000)
+ }
+ crash()
+ }
+
+ printDebugLog()
+
+ exit(2)
+}
+
// sigpanic turns a synchronous signal into a run-time panic.
// If the signal handler sees a synchronous panic, it arranges the
// stack to look like the function where the signal occurred called
@@ -588,11 +914,22 @@
throw("signal received during fork")
}
+var badginsignalMsg = "fatal: bad g in signal handler\n"
+
// This runs on a foreign stack, without an m or a g. No stack split.
//go:nosplit
//go:norace
//go:nowritebarrierrec
func badsignal(sig uintptr, c *sigctxt) {
+ if !iscgo && !cgoHasExtraM {
+ // There is no extra M. needm will not be able to grab
+ // an M. Instead of hanging, just crash.
+ // Cannot call split-stack function as there is no G.
+ s := stringStructOf(&badginsignalMsg)
+ write(2, s.str, int32(s.len))
+ exit(2)
+ *(*uintptr)(unsafe.Pointer(uintptr(123))) = 2
+ }
needm(0)
if !sigsend(uint32(sig)) {
// A foreign thread received the signal sig, and the
@@ -636,6 +973,13 @@
return true
}
+ // This function and its caller sigtrampgo assumes SIGPIPE is delivered on the
+ // originating thread. This property does not hold on macOS (golang.org/issue/33384),
+ // so we have no choice but to ignore SIGPIPE.
+ if GOOS == "darwin" && sig == _SIGPIPE {
+ return true
+ }
+
// If there is no handler to forward to, no need to forward.
if fwdFn == _SIG_DFL {
return false
@@ -650,9 +994,10 @@
return false
}
// Determine if the signal occurred inside Go code. We test that:
- // (1) we were in a goroutine (i.e., m.curg != nil), and
- // (2) we weren't in CGO.
- g := getg()
+ // (1) we weren't in VDSO page,
+ // (2) we were in a goroutine (i.e., m.curg != nil), and
+ // (3) we weren't in CGO.
+ g := sigFetchG(c)
if g != nil && g.m != nil && g.m.curg != nil && !g.m.incgo {
return false
}
@@ -724,13 +1069,15 @@
// stack to the gsignal stack. If the alternate signal stack is set
// for the thread (the case when a non-Go thread sets the alternate
// signal stack and then calls a Go function) then set the gsignal
-// stack to the alternate signal stack. Record which choice was made
-// in newSigstack, so that it can be undone in unminit.
+// stack to the alternate signal stack. We also set the alternate
+// signal stack to the gsignal stack if cgo is not used (regardless
+// of whether it is already set). Record which choice was made in
+// newSigstack, so that it can be undone in unminit.
func minitSignalStack() {
_g_ := getg()
var st stackt
sigaltstack(nil, &st)
- if st.ss_flags&_SS_DISABLE != 0 {
+ if st.ss_flags&_SS_DISABLE != 0 || !iscgo {
signalstack(&_g_.m.gsignal.stack)
_g_.m.newSigstack = true
} else {
@@ -845,7 +1192,7 @@
sigaltstack(&st, nil)
}
-// setsigsegv is used on darwin/arm{,64} to fake a segmentation fault.
+// setsigsegv is used on darwin/arm64 to fake a segmentation fault.
//
// This is exported via linkname to assembly in runtime/cgo.
//
diff --git a/src/runtime/signal_windows.go b/src/runtime/signal_windows.go
index 3fc1ec5..d123276 100644
--- a/src/runtime/signal_windows.go
+++ b/src/runtime/signal_windows.go
@@ -129,7 +129,14 @@
// make the trace look like a call to runtime·sigpanic instead.
// (Otherwise the trace will end at runtime·sigpanic and we
// won't get to see who faulted.)
- if r.ip() != 0 {
+ // Also don't push a sigpanic frame if the faulting PC
+ // is the entry of asyncPreempt. In this case, we suspended
+ // the thread right between the fault and the exception handler
+ // starting to run, and we have pushed an asyncPreempt call.
+ // The exception is not from asyncPreempt, so not to push a
+ // sigpanic call to make it look like that. Instead, just
+ // overwrite the PC. (See issue #35773)
+ if r.ip() != 0 && r.ip() != funcPC(asyncPreempt) {
sp := unsafe.Pointer(r.sp())
sp = add(sp, ^(unsafe.Sizeof(uintptr(0)) - 1)) // sp--
r.set_sp(uintptr(sp))
@@ -171,6 +178,12 @@
//
//go:nosplit
func lastcontinuehandler(info *exceptionrecord, r *context, gp *g) int32 {
+ if islibrary || isarchive {
+ // Go DLL/archive has been loaded in a non-go program.
+ // If the exception does not originate from go, the go runtime
+ // should not take responsibility of crashing the process.
+ return _EXCEPTION_CONTINUE_SEARCH
+ }
if testingWER {
return _EXCEPTION_CONTINUE_SEARCH
}
diff --git a/src/runtime/signal_windows_test.go b/src/runtime/signal_windows_test.go
new file mode 100644
index 0000000..f998571
--- /dev/null
+++ b/src/runtime/signal_windows_test.go
@@ -0,0 +1,152 @@
+// +build windows
+
+package runtime_test
+
+import (
+ "bufio"
+ "bytes"
+ "fmt"
+ "internal/testenv"
+ "io/ioutil"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "runtime"
+ "strings"
+ "syscall"
+ "testing"
+)
+
+func TestVectoredHandlerDontCrashOnLibrary(t *testing.T) {
+ if *flagQuick {
+ t.Skip("-quick")
+ }
+ if runtime.GOARCH != "amd64" {
+ t.Skip("this test can only run on windows/amd64")
+ }
+ testenv.MustHaveGoBuild(t)
+ testenv.MustHaveExecPath(t, "gcc")
+ testprog.Lock()
+ defer testprog.Unlock()
+ dir, err := ioutil.TempDir("", "go-build")
+ if err != nil {
+ t.Fatalf("failed to create temp directory: %v", err)
+ }
+ defer os.RemoveAll(dir)
+
+ // build go dll
+ dll := filepath.Join(dir, "testwinlib.dll")
+ cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", dll, "--buildmode", "c-shared", "testdata/testwinlib/main.go")
+ out, err := testenv.CleanCmdEnv(cmd).CombinedOutput()
+ if err != nil {
+ t.Fatalf("failed to build go library: %s\n%s", err, out)
+ }
+
+ // build c program
+ exe := filepath.Join(dir, "test.exe")
+ cmd = exec.Command("gcc", "-L"+dir, "-I"+dir, "-ltestwinlib", "-o", exe, "testdata/testwinlib/main.c")
+ out, err = testenv.CleanCmdEnv(cmd).CombinedOutput()
+ if err != nil {
+ t.Fatalf("failed to build c exe: %s\n%s", err, out)
+ }
+
+ // run test program
+ cmd = exec.Command(exe)
+ out, err = testenv.CleanCmdEnv(cmd).CombinedOutput()
+ if err != nil {
+ t.Fatalf("failure while running executable: %s\n%s", err, out)
+ }
+ expectedOutput := "exceptionCount: 1\ncontinueCount: 1\n"
+ // cleaning output
+ cleanedOut := strings.ReplaceAll(string(out), "\r\n", "\n")
+ if cleanedOut != expectedOutput {
+ t.Errorf("expected output %q, got %q", expectedOutput, cleanedOut)
+ }
+}
+
+func sendCtrlBreak(pid int) error {
+ kernel32, err := syscall.LoadDLL("kernel32.dll")
+ if err != nil {
+ return fmt.Errorf("LoadDLL: %v\n", err)
+ }
+ generateEvent, err := kernel32.FindProc("GenerateConsoleCtrlEvent")
+ if err != nil {
+ return fmt.Errorf("FindProc: %v\n", err)
+ }
+ result, _, err := generateEvent.Call(syscall.CTRL_BREAK_EVENT, uintptr(pid))
+ if result == 0 {
+ return fmt.Errorf("GenerateConsoleCtrlEvent: %v\n", err)
+ }
+ return nil
+}
+
+// TestLibraryCtrlHandler tests that Go DLL allows calling program to handle console control events.
+// See https://golang.org/issues/35965.
+func TestLibraryCtrlHandler(t *testing.T) {
+ if *flagQuick {
+ t.Skip("-quick")
+ }
+ if runtime.GOARCH != "amd64" {
+ t.Skip("this test can only run on windows/amd64")
+ }
+ testenv.MustHaveGoBuild(t)
+ testenv.MustHaveExecPath(t, "gcc")
+ testprog.Lock()
+ defer testprog.Unlock()
+ dir, err := ioutil.TempDir("", "go-build")
+ if err != nil {
+ t.Fatalf("failed to create temp directory: %v", err)
+ }
+ defer os.RemoveAll(dir)
+
+ // build go dll
+ dll := filepath.Join(dir, "dummy.dll")
+ cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", dll, "--buildmode", "c-shared", "testdata/testwinlibsignal/dummy.go")
+ out, err := testenv.CleanCmdEnv(cmd).CombinedOutput()
+ if err != nil {
+ t.Fatalf("failed to build go library: %s\n%s", err, out)
+ }
+
+ // build c program
+ exe := filepath.Join(dir, "test.exe")
+ cmd = exec.Command("gcc", "-o", exe, "testdata/testwinlibsignal/main.c")
+ out, err = testenv.CleanCmdEnv(cmd).CombinedOutput()
+ if err != nil {
+ t.Fatalf("failed to build c exe: %s\n%s", err, out)
+ }
+
+ // run test program
+ cmd = exec.Command(exe)
+ var stderr bytes.Buffer
+ cmd.Stderr = &stderr
+ outPipe, err := cmd.StdoutPipe()
+ if err != nil {
+ t.Fatalf("Failed to create stdout pipe: %v", err)
+ }
+ outReader := bufio.NewReader(outPipe)
+
+ cmd.SysProcAttr = &syscall.SysProcAttr{
+ CreationFlags: syscall.CREATE_NEW_PROCESS_GROUP,
+ }
+ if err := cmd.Start(); err != nil {
+ t.Fatalf("Start failed: %v", err)
+ }
+
+ errCh := make(chan error, 1)
+ go func() {
+ if line, err := outReader.ReadString('\n'); err != nil {
+ errCh <- fmt.Errorf("could not read stdout: %v", err)
+ } else if strings.TrimSpace(line) != "ready" {
+ errCh <- fmt.Errorf("unexpected message: %v", line)
+ } else {
+ errCh <- sendCtrlBreak(cmd.Process.Pid)
+ }
+ }()
+
+ if err := <-errCh; err != nil {
+ t.Fatal(err)
+ }
+ if err := cmd.Wait(); err != nil {
+ t.Fatalf("Program exited with error: %v\n%s", err, &stderr)
+ }
+}
diff --git a/src/runtime/sigqueue.go b/src/runtime/sigqueue.go
index b2ebb2b..3bf07cb 100644
--- a/src/runtime/sigqueue.go
+++ b/src/runtime/sigqueue.go
@@ -192,16 +192,13 @@
//go:linkname signal_enable os/signal.signal_enable
func signal_enable(s uint32) {
if !sig.inuse {
- // The first call to signal_enable is for us
- // to use for initialization. It does not pass
- // signal information in m.
+ // This is the first call to signal_enable. Initialize.
sig.inuse = true // enable reception of signals; cannot disable
if GOOS == "darwin" {
sigNoteSetup(&sig.note)
- return
+ } else {
+ noteclear(&sig.note)
}
- noteclear(&sig.note)
- return
}
if s >= uint32(len(sig.wanted)*32) {
diff --git a/src/runtime/sigqueue_plan9.go b/src/runtime/sigqueue_plan9.go
index 934742a..d5fe8f8 100644
--- a/src/runtime/sigqueue_plan9.go
+++ b/src/runtime/sigqueue_plan9.go
@@ -134,12 +134,9 @@
//go:linkname signal_enable os/signal.signal_enable
func signal_enable(s uint32) {
if !sig.inuse {
- // The first call to signal_enable is for us
- // to use for initialization. It does not pass
- // signal information in m.
+ // This is the first call to signal_enable. Initialize.
sig.inuse = true // enable reception of signals; cannot disable
noteclear(&sig.note)
- return
}
}
diff --git a/src/runtime/sizeof_test.go b/src/runtime/sizeof_test.go
index 830055e..736e848 100644
--- a/src/runtime/sizeof_test.go
+++ b/src/runtime/sizeof_test.go
@@ -2,8 +2,6 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build !nacl
-
package runtime_test
import (
@@ -23,7 +21,8 @@
_32bit uintptr // size on 32bit platforms
_64bit uintptr // size on 64bit platforms
}{
- {runtime.G{}, 216, 376}, // g, but exported for testing
+ {runtime.G{}, 216, 376}, // g, but exported for testing
+ {runtime.Sudog{}, 56, 88}, // sudog, but exported for testing
}
for _, tt := range tests {
diff --git a/src/runtime/slice.go b/src/runtime/slice.go
index 79cfc69..0418ace 100644
--- a/src/runtime/slice.go
+++ b/src/runtime/slice.go
@@ -16,7 +16,7 @@
cap int
}
-// An notInHeapSlice is a slice backed by go:notinheap memory.
+// A notInHeapSlice is a slice backed by go:notinheap memory.
type notInHeapSlice struct {
array *notInHeap
len int
@@ -31,6 +31,55 @@
panic(errorString("makeslice: cap out of range"))
}
+// makeslicecopy allocates a slice of "tolen" elements of type "et",
+// then copies "fromlen" elements of type "et" into that new allocation from "from".
+func makeslicecopy(et *_type, tolen int, fromlen int, from unsafe.Pointer) unsafe.Pointer {
+ var tomem, copymem uintptr
+ if uintptr(tolen) > uintptr(fromlen) {
+ var overflow bool
+ tomem, overflow = math.MulUintptr(et.size, uintptr(tolen))
+ if overflow || tomem > maxAlloc || tolen < 0 {
+ panicmakeslicelen()
+ }
+ copymem = et.size * uintptr(fromlen)
+ } else {
+ // fromlen is a known good length providing and equal or greater than tolen,
+ // thereby making tolen a good slice length too as from and to slices have the
+ // same element width.
+ tomem = et.size * uintptr(tolen)
+ copymem = tomem
+ }
+
+ var to unsafe.Pointer
+ if et.ptrdata == 0 {
+ to = mallocgc(tomem, nil, false)
+ if copymem < tomem {
+ memclrNoHeapPointers(add(to, copymem), tomem-copymem)
+ }
+ } else {
+ // Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan uninitialized memory.
+ to = mallocgc(tomem, et, true)
+ if copymem > 0 && writeBarrier.enabled {
+ // Only shade the pointers in old.array since we know the destination slice to
+ // only contains nil pointers because it has been cleared during alloc.
+ bulkBarrierPreWriteSrcOnly(uintptr(to), uintptr(from), copymem)
+ }
+ }
+
+ if raceenabled {
+ callerpc := getcallerpc()
+ pc := funcPC(makeslicecopy)
+ racereadrangepc(from, copymem, callerpc, pc)
+ }
+ if msanenabled {
+ msanread(from, copymem)
+ }
+
+ memmove(to, from, copymem)
+
+ return to
+}
+
func makeslice(et *_type, len, cap int) unsafe.Pointer {
mem, overflow := math.MulUintptr(et.size, uintptr(cap))
if overflow || mem > maxAlloc || len < 0 || len > cap {
@@ -182,7 +231,7 @@
if lenmem > 0 && writeBarrier.enabled {
// Only shade the pointers in old.array since we know the destination slice p
// only contains nil pointers because it has been cleared during alloc.
- bulkBarrierPreWriteSrcOnly(uintptr(p), uintptr(old.array), lenmem)
+ bulkBarrierPreWriteSrcOnly(uintptr(p), uintptr(old.array), lenmem-et.size+et.ptrdata)
}
}
memmove(p, old.array, lenmem)
@@ -194,14 +243,14 @@
return x&(x-1) == 0
}
-func slicecopy(to, fm slice, width uintptr) int {
- if fm.len == 0 || to.len == 0 {
+func slicecopy(toPtr unsafe.Pointer, toLen int, fmPtr unsafe.Pointer, fmLen int, width uintptr) int {
+ if fmLen == 0 || toLen == 0 {
return 0
}
- n := fm.len
- if to.len < n {
- n = to.len
+ n := fmLen
+ if toLen < n {
+ n = toLen
}
if width == 0 {
@@ -211,43 +260,43 @@
if raceenabled {
callerpc := getcallerpc()
pc := funcPC(slicecopy)
- racewriterangepc(to.array, uintptr(n*int(width)), callerpc, pc)
- racereadrangepc(fm.array, uintptr(n*int(width)), callerpc, pc)
+ racereadrangepc(fmPtr, uintptr(n*int(width)), callerpc, pc)
+ racewriterangepc(toPtr, uintptr(n*int(width)), callerpc, pc)
}
if msanenabled {
- msanwrite(to.array, uintptr(n*int(width)))
- msanread(fm.array, uintptr(n*int(width)))
+ msanread(fmPtr, uintptr(n*int(width)))
+ msanwrite(toPtr, uintptr(n*int(width)))
}
size := uintptr(n) * width
if size == 1 { // common case worth about 2x to do here
// TODO: is this still worth it with new memmove impl?
- *(*byte)(to.array) = *(*byte)(fm.array) // known to be a byte pointer
+ *(*byte)(toPtr) = *(*byte)(fmPtr) // known to be a byte pointer
} else {
- memmove(to.array, fm.array, size)
+ memmove(toPtr, fmPtr, size)
}
return n
}
-func slicestringcopy(to []byte, fm string) int {
- if len(fm) == 0 || len(to) == 0 {
+func slicestringcopy(toPtr *byte, toLen int, fm string) int {
+ if len(fm) == 0 || toLen == 0 {
return 0
}
n := len(fm)
- if len(to) < n {
- n = len(to)
+ if toLen < n {
+ n = toLen
}
if raceenabled {
callerpc := getcallerpc()
pc := funcPC(slicestringcopy)
- racewriterangepc(unsafe.Pointer(&to[0]), uintptr(n), callerpc, pc)
+ racewriterangepc(unsafe.Pointer(toPtr), uintptr(n), callerpc, pc)
}
if msanenabled {
- msanwrite(unsafe.Pointer(&to[0]), uintptr(n))
+ msanwrite(unsafe.Pointer(toPtr), uintptr(n))
}
- memmove(unsafe.Pointer(&to[0]), stringStructOf(&fm).str, uintptr(n))
+ memmove(unsafe.Pointer(toPtr), stringStructOf(&fm).str, uintptr(n))
return n
}
diff --git a/src/runtime/slice_test.go b/src/runtime/slice_test.go
index 0463fc7..e963a43 100644
--- a/src/runtime/slice_test.go
+++ b/src/runtime/slice_test.go
@@ -10,6 +10,84 @@
const N = 20
+func BenchmarkMakeSliceCopy(b *testing.B) {
+ const length = 32
+ var bytes = make([]byte, 8*length)
+ var ints = make([]int, length)
+ var ptrs = make([]*byte, length)
+ b.Run("mallocmove", func(b *testing.B) {
+ b.Run("Byte", func(b *testing.B) {
+ var x []byte
+ for i := 0; i < b.N; i++ {
+ x = make([]byte, len(bytes))
+ copy(x, bytes)
+ }
+ })
+ b.Run("Int", func(b *testing.B) {
+ var x []int
+ for i := 0; i < b.N; i++ {
+ x = make([]int, len(ints))
+ copy(x, ints)
+ }
+ })
+ b.Run("Ptr", func(b *testing.B) {
+ var x []*byte
+ for i := 0; i < b.N; i++ {
+ x = make([]*byte, len(ptrs))
+ copy(x, ptrs)
+ }
+
+ })
+ })
+ b.Run("makecopy", func(b *testing.B) {
+ b.Run("Byte", func(b *testing.B) {
+ var x []byte
+ for i := 0; i < b.N; i++ {
+ x = make([]byte, 8*length)
+ copy(x, bytes)
+ }
+ })
+ b.Run("Int", func(b *testing.B) {
+ var x []int
+ for i := 0; i < b.N; i++ {
+ x = make([]int, length)
+ copy(x, ints)
+ }
+ })
+ b.Run("Ptr", func(b *testing.B) {
+ var x []*byte
+ for i := 0; i < b.N; i++ {
+ x = make([]*byte, length)
+ copy(x, ptrs)
+ }
+
+ })
+ })
+ b.Run("nilappend", func(b *testing.B) {
+ b.Run("Byte", func(b *testing.B) {
+ var x []byte
+ for i := 0; i < b.N; i++ {
+ x = append([]byte(nil), bytes...)
+ _ = x
+ }
+ })
+ b.Run("Int", func(b *testing.B) {
+ var x []int
+ for i := 0; i < b.N; i++ {
+ x = append([]int(nil), ints...)
+ _ = x
+ }
+ })
+ b.Run("Ptr", func(b *testing.B) {
+ var x []*byte
+ for i := 0; i < b.N; i++ {
+ x = append([]*byte(nil), ptrs...)
+ _ = x
+ }
+ })
+ })
+}
+
type (
struct24 struct{ a, b, c int64 }
struct32 struct{ a, b, c, d int64 }
diff --git a/src/runtime/softfloat64.go b/src/runtime/softfloat64.go
index 8fde0fe..13bee6c 100644
--- a/src/runtime/softfloat64.go
+++ b/src/runtime/softfloat64.go
@@ -13,7 +13,7 @@
expbits64 uint = 11
bias64 = -1<<(expbits64-1) + 1
- nan64 uint64 = (1<<expbits64-1)<<mantbits64 + 1
+ nan64 uint64 = (1<<expbits64-1)<<mantbits64 + 1<<(mantbits64-1) // quiet NaN, 0 payload
inf64 uint64 = (1<<expbits64 - 1) << mantbits64
neg64 uint64 = 1 << (expbits64 + mantbits64)
@@ -21,7 +21,7 @@
expbits32 uint = 8
bias32 = -1<<(expbits32-1) + 1
- nan32 uint32 = (1<<expbits32-1)<<mantbits32 + 1
+ nan32 uint32 = (1<<expbits32-1)<<mantbits32 + 1<<(mantbits32-1) // quiet NaN, 0 payload
inf32 uint32 = (1<<expbits32 - 1) << mantbits32
neg32 uint32 = 1 << (expbits32 + mantbits32)
)
diff --git a/src/runtime/stack.go b/src/runtime/stack.go
index 7ae3eee..52e5417 100644
--- a/src/runtime/stack.go
+++ b/src/runtime/stack.go
@@ -5,6 +5,7 @@
package runtime
import (
+ "internal/cpu"
"runtime/internal/atomic"
"runtime/internal/sys"
"unsafe"
@@ -90,7 +91,7 @@
// The stack guard is a pointer this many bytes above the
// bottom of the stack.
- _StackGuard = 880*sys.StackGuardMultiplier + _StackSystem
+ _StackGuard = 928*sys.StackGuardMultiplier + _StackSystem
// After a stack split check the SP is allowed to be this
// many bytes below the stack guard. This saves an instruction
@@ -137,9 +138,16 @@
// Stacks are assigned an order according to size.
// order = log_2(size/FixedStack)
// There is a free list for each order.
-// TODO: one lock per order?
-var stackpool [_NumStackOrders]mSpanList
-var stackpoolmu mutex
+var stackpool [_NumStackOrders]struct {
+ item stackpoolItem
+ _ [cpu.CacheLinePadSize - unsafe.Sizeof(stackpoolItem{})%cpu.CacheLinePadSize]byte
+}
+
+//go:notinheap
+type stackpoolItem struct {
+ mu mutex
+ span mSpanList
+}
// Global pool of large stack spans.
var stackLarge struct {
@@ -152,10 +160,12 @@
throw("cache size must be a multiple of page size")
}
for i := range stackpool {
- stackpool[i].init()
+ stackpool[i].item.span.init()
+ lockInit(&stackpool[i].item.mu, lockRankStackpool)
}
for i := range stackLarge.free {
stackLarge.free[i].init()
+ lockInit(&stackLarge.lock, lockRankStackLarge)
}
}
@@ -170,10 +180,11 @@
}
// Allocates a stack from the free pool. Must be called with
-// stackpoolmu held.
+// stackpool[order].item.mu held.
func stackpoolalloc(order uint8) gclinkptr {
- list := &stackpool[order]
+ list := &stackpool[order].item.span
s := list.first
+ lockWithRankMayAcquire(&mheap_.lock, lockRankMheap)
if s == nil {
// no free stacks. Allocate another span worth.
s = mheap_.allocManual(_StackCacheSize>>_PageShift, &memstats.stacks_inuse)
@@ -208,15 +219,15 @@
return x
}
-// Adds stack x to the free pool. Must be called with stackpoolmu held.
+// Adds stack x to the free pool. Must be called with stackpool[order].item.mu held.
func stackpoolfree(x gclinkptr, order uint8) {
s := spanOfUnchecked(uintptr(x))
- if s.state != mSpanManual {
+ if s.state.get() != mSpanManual {
throw("freeing stack not in a stack span")
}
if s.manualFreeList.ptr() == nil {
// s will now have a free stack
- stackpool[order].insert(s)
+ stackpool[order].item.span.insert(s)
}
x.ptr().next = s.manualFreeList
s.manualFreeList = x
@@ -237,7 +248,7 @@
// pointer into a free span.
//
// By not freeing, we prevent step #4 until GC is done.
- stackpool[order].remove(s)
+ stackpool[order].item.span.remove(s)
s.manualFreeList = 0
osStackFree(s)
mheap_.freeManual(s, &memstats.stacks_inuse)
@@ -257,14 +268,14 @@
// Grab half of the allowed capacity (to prevent thrashing).
var list gclinkptr
var size uintptr
- lock(&stackpoolmu)
+ lock(&stackpool[order].item.mu)
for size < _StackCacheSize/2 {
x := stackpoolalloc(order)
x.ptr().next = list
list = x
size += _FixedStack << order
}
- unlock(&stackpoolmu)
+ unlock(&stackpool[order].item.mu)
c.stackcache[order].list = list
c.stackcache[order].size = size
}
@@ -276,14 +287,14 @@
}
x := c.stackcache[order].list
size := c.stackcache[order].size
- lock(&stackpoolmu)
+ lock(&stackpool[order].item.mu)
for size > _StackCacheSize/2 {
y := x.ptr().next
stackpoolfree(x, order)
x = y
size -= _FixedStack << order
}
- unlock(&stackpoolmu)
+ unlock(&stackpool[order].item.mu)
c.stackcache[order].list = x
c.stackcache[order].size = size
}
@@ -293,8 +304,8 @@
if stackDebug >= 1 {
print("stackcache clear\n")
}
- lock(&stackpoolmu)
for order := uint8(0); order < _NumStackOrders; order++ {
+ lock(&stackpool[order].item.mu)
x := c.stackcache[order].list
for x.ptr() != nil {
y := x.ptr().next
@@ -303,8 +314,8 @@
}
c.stackcache[order].list = 0
c.stackcache[order].size = 0
+ unlock(&stackpool[order].item.mu)
}
- unlock(&stackpoolmu)
}
// stackalloc allocates an n byte stack.
@@ -329,7 +340,7 @@
}
if debug.efence != 0 || stackFromSystem != 0 {
- n = uint32(round(uintptr(n), physPageSize))
+ n = uint32(alignUp(uintptr(n), physPageSize))
v := sysAlloc(uintptr(n), &memstats.stacks_sys)
if v == nil {
throw("out of memory (stackalloc)")
@@ -349,16 +360,16 @@
n2 >>= 1
}
var x gclinkptr
- c := thisg.m.mcache
- if stackNoCache != 0 || c == nil || thisg.m.preemptoff != "" {
- // c == nil can happen in the guts of exitsyscall or
- // procresize. Just get a stack from the global pool.
+ if stackNoCache != 0 || thisg.m.p == 0 || thisg.m.preemptoff != "" {
+ // thisg.m.p == 0 can happen in the guts of exitsyscall
+ // or procresize. Just get a stack from the global pool.
// Also don't touch stackcache during gc
// as it's flushed concurrently.
- lock(&stackpoolmu)
+ lock(&stackpool[order].item.mu)
x = stackpoolalloc(order)
- unlock(&stackpoolmu)
+ unlock(&stackpool[order].item.mu)
} else {
+ c := thisg.m.p.ptr().mcache
x = c.stackcache[order].list
if x.ptr() == nil {
stackcacherefill(c, order)
@@ -381,6 +392,8 @@
}
unlock(&stackLarge.lock)
+ lockWithRankMayAcquire(&mheap_.lock, lockRankMheap)
+
if s == nil {
// Allocate a new stack from the heap.
s = mheap_.allocManual(npage, &memstats.stacks_inuse)
@@ -444,12 +457,12 @@
n2 >>= 1
}
x := gclinkptr(v)
- c := gp.m.mcache
- if stackNoCache != 0 || c == nil || gp.m.preemptoff != "" {
- lock(&stackpoolmu)
+ if stackNoCache != 0 || gp.m.p == 0 || gp.m.preemptoff != "" {
+ lock(&stackpool[order].item.mu)
stackpoolfree(x, order)
- unlock(&stackpoolmu)
+ unlock(&stackpool[order].item.mu)
} else {
+ c := gp.m.p.ptr().mcache
if c.stackcache[order].size >= _StackCacheSize {
stackcacherelease(c, order)
}
@@ -459,7 +472,7 @@
}
} else {
s := spanOfUnchecked(uintptr(v))
- if s.state != mSpanManual {
+ if s.state.get() != mSpanManual {
println(hex(s.base()), v)
throw("bad span state")
}
@@ -619,7 +632,7 @@
print(" adjusting ", funcname(f), " frame=[", hex(frame.sp), ",", hex(frame.fp), "] pc=", hex(frame.pc), " continpc=", hex(frame.continpc), "\n")
}
if f.funcID == funcID_systemstack_switch {
- // A special routine at the bottom of stack of a goroutine that does an systemstack call.
+ // A special routine at the bottom of stack of a goroutine that does a systemstack call.
// We will allow it to be copied even though we don't
// have full GC info for it (because it is written in asm).
return true
@@ -728,6 +741,8 @@
adjustpointer(adjinfo, unsafe.Pointer(&d.sp))
adjustpointer(adjinfo, unsafe.Pointer(&d._panic))
adjustpointer(adjinfo, unsafe.Pointer(&d.link))
+ adjustpointer(adjinfo, unsafe.Pointer(&d.varp))
+ adjustpointer(adjinfo, unsafe.Pointer(&d.fd))
}
// Adjust defer argument blocks the same way we adjust active stack frames.
@@ -776,14 +791,19 @@
}
// Lock channels to prevent concurrent send/receive.
- // It's important that we *only* do this for async
- // copystack; otherwise, gp may be in the middle of
- // putting itself on wait queues and this would
- // self-deadlock.
var lastc *hchan
for sg := gp.waiting; sg != nil; sg = sg.waitlink {
if sg.c != lastc {
- lock(&sg.c.lock)
+ // There is a ranking cycle here between gscan bit and
+ // hchan locks. Normally, we only allow acquiring hchan
+ // locks and then getting a gscan bit. In this case, we
+ // already have the gscan bit. We allow acquiring hchan
+ // locks here as a special case, since a deadlock can't
+ // happen because the G involved must already be
+ // suspended. So, we get a special hchan lock rank here
+ // that is lower than gscan, but doesn't allow acquiring
+ // any other locks other than hchan.
+ lockWithRank(&sg.c.lock, lockRankHchanLeaf)
}
lastc = sg.c
}
@@ -816,12 +836,7 @@
// Copies gp's stack to a new stack of a different size.
// Caller must have changed gp status to Gcopystack.
-//
-// If sync is true, this is a self-triggered stack growth and, in
-// particular, no other G may be writing to gp's stack (e.g., via a
-// channel operation). If sync is false, copystack protects against
-// concurrent channel operations.
-func copystack(gp *g, newsize uintptr, sync bool) {
+func copystack(gp *g, newsize uintptr) {
if gp.syscallsp != 0 {
throw("stack growth not allowed in system call")
}
@@ -847,15 +862,16 @@
// Adjust sudogs, synchronizing with channel ops if necessary.
ncopy := used
- if sync {
+ if !gp.activeStackChans {
adjustsudogs(gp, &adjinfo)
} else {
- // sudogs can point in to the stack. During concurrent
- // shrinking, these areas may be written to. Find the
- // highest such pointer so we can handle everything
- // there and below carefully. (This shouldn't be far
- // from the bottom of the stack, so there's little
- // cost in handling everything below it carefully.)
+ // sudogs may be pointing in to the stack and gp has
+ // released channel locks, so other goroutines could
+ // be writing to gp's stack. Find the highest such
+ // pointer so we can handle everything there and below
+ // carefully. (This shouldn't be far from the bottom
+ // of the stack, so there's little cost in handling
+ // everything below it carefully.)
adjinfo.sghi = findsghi(gp, old)
// Synchronize with channel ops and copy the part of
@@ -906,7 +922,7 @@
// Stack growth is multiplicative, for constant amortized cost.
//
// g->atomicstatus will be Grunning or Gscanrunning upon entry.
-// If the GC is trying to stop this g then it will set preemptscan to true.
+// If the scheduler is trying to stop this g, then it will set preemptStop.
//
// This must be nowritebarrierrec because it can be called as part of
// stack growth from other nowritebarrierrec functions, but the
@@ -973,7 +989,7 @@
// it needs a lock held by the goroutine), that small preemption turns
// into a real deadlock.
if preempt {
- if thisg.m.locks != 0 || thisg.m.mallocing != 0 || thisg.m.preemptoff != "" || thisg.m.p.ptr().status != _Prunning {
+ if !canPreemptM(thisg.m) {
// Let the goroutine keep running for now.
// gp->preempt is set, so it will be preempted next time.
gp.stackguard0 = gp.stack.lo + _StackGuard
@@ -1007,42 +1023,39 @@
if thisg.m.p == 0 && thisg.m.locks == 0 {
throw("runtime: g is running but p is not")
}
- // Synchronize with scang.
- casgstatus(gp, _Grunning, _Gwaiting)
- if gp.preemptscan {
- for !castogscanstatus(gp, _Gwaiting, _Gscanwaiting) {
- // Likely to be racing with the GC as
- // it sees a _Gwaiting and does the
- // stack scan. If so, gcworkdone will
- // be set and gcphasework will simply
- // return.
- }
- if !gp.gcscandone {
- // gcw is safe because we're on the
- // system stack.
- gcw := &gp.m.p.ptr().gcw
- scanstack(gp, gcw)
- gp.gcscandone = true
- }
- gp.preemptscan = false
- gp.preempt = false
- casfrom_Gscanstatus(gp, _Gscanwaiting, _Gwaiting)
- // This clears gcscanvalid.
- casgstatus(gp, _Gwaiting, _Grunning)
- gp.stackguard0 = gp.stack.lo + _StackGuard
- gogo(&gp.sched) // never return
+
+ if gp.preemptShrink {
+ // We're at a synchronous safe point now, so
+ // do the pending stack shrink.
+ gp.preemptShrink = false
+ shrinkstack(gp)
+ }
+
+ if gp.preemptStop {
+ preemptPark(gp) // never returns
}
// Act like goroutine called runtime.Gosched.
- casgstatus(gp, _Gwaiting, _Grunning)
gopreempt_m(gp) // never return
}
// Allocate a bigger segment and move the stack.
oldsize := gp.stack.hi - gp.stack.lo
newsize := oldsize * 2
+
+ // Make sure we grow at least as much as needed to fit the new frame.
+ // (This is just an optimization - the caller of morestack will
+ // recheck the bounds on return.)
+ if f := findfunc(gp.sched.pc); f.valid() {
+ max := uintptr(funcMaxSPDelta(f))
+ for newsize-oldsize < max+_StackGuard {
+ newsize *= 2
+ }
+ }
+
if newsize > maxstacksize {
print("runtime: goroutine stack exceeds ", maxstacksize, "-byte limit\n")
+ print("runtime: sp=", hex(sp), " stack=[", hex(gp.stack.lo), ", ", hex(gp.stack.hi), "]\n")
throw("stack overflow")
}
@@ -1052,7 +1065,7 @@
// The concurrent GC will not scan the stack while we are doing the copy since
// the gp is in a Gcopystack status.
- copystack(gp, newsize, true)
+ copystack(gp, newsize)
if stackDebug >= 1 {
print("stack grow done\n")
}
@@ -1077,16 +1090,46 @@
gostartcall(gobuf, fn, unsafe.Pointer(fv))
}
+// isShrinkStackSafe returns whether it's safe to attempt to shrink
+// gp's stack. Shrinking the stack is only safe when we have precise
+// pointer maps for all frames on the stack.
+func isShrinkStackSafe(gp *g) bool {
+ // We can't copy the stack if we're in a syscall.
+ // The syscall might have pointers into the stack and
+ // often we don't have precise pointer maps for the innermost
+ // frames.
+ //
+ // We also can't copy the stack if we're at an asynchronous
+ // safe-point because we don't have precise pointer maps for
+ // all frames.
+ return gp.syscallsp == 0 && !gp.asyncSafePoint
+}
+
// Maybe shrink the stack being used by gp.
-// Called at garbage collection time.
-// gp must be stopped, but the world need not be.
+//
+// gp must be stopped and we must own its stack. It may be in
+// _Grunning, but only if this is our own user G.
func shrinkstack(gp *g) {
- gstatus := readgstatus(gp)
if gp.stack.lo == 0 {
throw("missing stack in shrinkstack")
}
- if gstatus&_Gscan == 0 {
- throw("bad status in shrinkstack")
+ if s := readgstatus(gp); s&_Gscan == 0 {
+ // We don't own the stack via _Gscan. We could still
+ // own it if this is our own user G and we're on the
+ // system stack.
+ if !(gp == getg().m.curg && getg() != getg().m.curg && s == _Grunning) {
+ // We don't own the stack.
+ throw("bad status in shrinkstack")
+ }
+ }
+ if !isShrinkStackSafe(gp) {
+ throw("shrinkstack at bad time")
+ }
+ // Check for self-shrinks while in a libcall. These may have
+ // pointers into the stack disguised as uintptrs, but these
+ // code paths should all be nosplit.
+ if gp == getg().m.curg && gp.m.libcallsp != 0 {
+ throw("shrinking stack in libcall")
}
if debug.gcshrinkstackoff > 0 {
@@ -1116,29 +1159,20 @@
return
}
- // We can't copy the stack if we're in a syscall.
- // The syscall might have pointers into the stack.
- if gp.syscallsp != 0 {
- return
- }
- if sys.GoosWindows != 0 && gp.m != nil && gp.m.libcallsp != 0 {
- return
- }
-
if stackDebug > 0 {
print("shrinking stack ", oldsize, "->", newsize, "\n")
}
- copystack(gp, newsize, false)
+ copystack(gp, newsize)
}
// freeStackSpans frees unused stack spans at the end of GC.
func freeStackSpans() {
- lock(&stackpoolmu)
// Scan stack pools for empty stack spans.
for order := range stackpool {
- list := &stackpool[order]
+ lock(&stackpool[order].item.mu)
+ list := &stackpool[order].item.span
for s := list.first; s != nil; {
next := s.next
if s.allocCount == 0 {
@@ -1149,10 +1183,9 @@
}
s = next
}
+ unlock(&stackpool[order].item.mu)
}
- unlock(&stackpoolmu)
-
// Free large stack spans.
lock(&stackLarge.lock)
for i := range stackLarge.free {
@@ -1203,29 +1236,8 @@
minsize = sys.MinFrameSize
}
if size > minsize {
- var stkmap *stackmap
stackid := pcdata
- if f.funcID != funcID_debugCallV1 {
- stkmap = (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
- } else {
- // debugCallV1's stack map is the register map
- // at its call site.
- callerPC := frame.lr
- caller := findfunc(callerPC)
- if !caller.valid() {
- println("runtime: debugCallV1 called by unknown caller", hex(callerPC))
- throw("bad debugCallV1")
- }
- stackid = int32(-1)
- if callerPC != caller.entry {
- callerPC--
- stackid = pcdatavalue(caller, _PCDATA_RegMapIndex, callerPC, cache)
- }
- if stackid == -1 {
- stackid = 0 // in prologue
- }
- stkmap = (*stackmap)(funcdata(caller, _FUNCDATA_RegPointerMaps))
- }
+ stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
if stkmap == nil || stkmap.n <= 0 {
print("runtime: frame ", funcname(f), " untyped locals ", hex(frame.varp-size), "+", hex(size), "\n")
throw("missing stackmap")
diff --git a/src/runtime/stack_test.go b/src/runtime/stack_test.go
index 143d3a9..adfc653 100644
--- a/src/runtime/stack_test.go
+++ b/src/runtime/stack_test.go
@@ -599,9 +599,6 @@
return pc[:Callers(0, pc)]
}
-// The noinline prevents this function from being inlined
-// into a wrapper. TODO: remove this when issue 28640 is fixed.
-//go:noinline
func (s structWithMethod) stack() string {
buf := make([]byte, 4<<10)
return string(buf[:Stack(buf, false)])
diff --git a/src/runtime/string.go b/src/runtime/string.go
index d198f73..0515b56 100644
--- a/src/runtime/string.go
+++ b/src/runtime/string.go
@@ -6,6 +6,7 @@
import (
"internal/bytealg"
+ "runtime/internal/sys"
"unsafe"
)
@@ -70,40 +71,47 @@
return concatstrings(buf, a[:])
}
+// slicebytetostring converts a byte slice to a string.
+// It is inserted by the compiler into generated code.
+// ptr is a pointer to the first element of the slice;
+// n is the length of the slice.
// Buf is a fixed-size buffer for the result,
// it is not nil if the result does not escape.
-func slicebytetostring(buf *tmpBuf, b []byte) (str string) {
- l := len(b)
- if l == 0 {
+func slicebytetostring(buf *tmpBuf, ptr *byte, n int) (str string) {
+ if n == 0 {
// Turns out to be a relatively common case.
// Consider that you want to parse out data between parens in "foo()bar",
// you find the indices and convert the subslice to string.
return ""
}
if raceenabled {
- racereadrangepc(unsafe.Pointer(&b[0]),
- uintptr(l),
+ racereadrangepc(unsafe.Pointer(ptr),
+ uintptr(n),
getcallerpc(),
funcPC(slicebytetostring))
}
if msanenabled {
- msanread(unsafe.Pointer(&b[0]), uintptr(l))
+ msanread(unsafe.Pointer(ptr), uintptr(n))
}
- if l == 1 {
- stringStructOf(&str).str = unsafe.Pointer(&staticbytes[b[0]])
+ if n == 1 {
+ p := unsafe.Pointer(&staticuint64s[*ptr])
+ if sys.BigEndian {
+ p = add(p, 7)
+ }
+ stringStructOf(&str).str = p
stringStructOf(&str).len = 1
return
}
var p unsafe.Pointer
- if buf != nil && len(b) <= len(buf) {
+ if buf != nil && n <= len(buf) {
p = unsafe.Pointer(buf)
} else {
- p = mallocgc(uintptr(len(b)), nil, false)
+ p = mallocgc(uintptr(n), nil, false)
}
stringStructOf(&str).str = p
- stringStructOf(&str).len = len(b)
- memmove(p, (*(*slice)(unsafe.Pointer(&b))).array, uintptr(len(b)))
+ stringStructOf(&str).len = n
+ memmove(p, unsafe.Pointer(ptr), uintptr(n))
return
}
@@ -118,7 +126,7 @@
func rawstringtmp(buf *tmpBuf, l int) (s string, b []byte) {
if buf != nil && l <= len(buf) {
b = buf[:l]
- s = slicebytetostringtmp(b)
+ s = slicebytetostringtmp(&b[0], len(b))
} else {
s, b = rawstring(l)
}
@@ -139,17 +147,19 @@
// where k is []byte, T1 to Tn is a nesting of struct and array literals.
// - Used for "<"+string(b)+">" concatenation where b is []byte.
// - Used for string(b)=="foo" comparison where b is []byte.
-func slicebytetostringtmp(b []byte) string {
- if raceenabled && len(b) > 0 {
- racereadrangepc(unsafe.Pointer(&b[0]),
- uintptr(len(b)),
+func slicebytetostringtmp(ptr *byte, n int) (str string) {
+ if raceenabled && n > 0 {
+ racereadrangepc(unsafe.Pointer(ptr),
+ uintptr(n),
getcallerpc(),
funcPC(slicebytetostringtmp))
}
- if msanenabled && len(b) > 0 {
- msanread(unsafe.Pointer(&b[0]), uintptr(len(b)))
+ if msanenabled && n > 0 {
+ msanread(unsafe.Pointer(ptr), uintptr(n))
}
- return *(*string)(unsafe.Pointer(&b))
+ stringStructOf(&str).str = unsafe.Pointer(ptr)
+ stringStructOf(&str).len = n
+ return
}
func stringtoslicebyte(buf *tmpBuf, s string) []byte {
@@ -231,16 +241,10 @@
}
func intstring(buf *[4]byte, v int64) (s string) {
- if v >= 0 && v < runeSelf {
- stringStructOf(&s).str = unsafe.Pointer(&staticbytes[v])
- stringStructOf(&s).len = 1
- return
- }
-
var b []byte
if buf != nil {
b = buf[:]
- s = slicebytetostringtmp(b)
+ s = slicebytetostringtmp(&b[0], len(b))
} else {
s, b = rawstring(4)
}
@@ -495,3 +499,37 @@
b[n2] = 0 // for luck
return s[:n2]
}
+
+// parseRelease parses a dot-separated version number. It follows the
+// semver syntax, but allows the minor and patch versions to be
+// elided.
+func parseRelease(rel string) (major, minor, patch int, ok bool) {
+ // Strip anything after a dash or plus.
+ for i := 0; i < len(rel); i++ {
+ if rel[i] == '-' || rel[i] == '+' {
+ rel = rel[:i]
+ break
+ }
+ }
+
+ next := func() (int, bool) {
+ for i := 0; i < len(rel); i++ {
+ if rel[i] == '.' {
+ ver, ok := atoi(rel[:i])
+ rel = rel[i+1:]
+ return ver, ok
+ }
+ }
+ ver, ok := atoi(rel)
+ rel = ""
+ return ver, ok
+ }
+ if major, ok = next(); !ok || rel == "" {
+ return
+ }
+ if minor, ok = next(); !ok || rel == "" {
+ return
+ }
+ patch, ok = next()
+ return
+}
diff --git a/src/runtime/string_test.go b/src/runtime/string_test.go
index a1716fa..b9ac667 100644
--- a/src/runtime/string_test.go
+++ b/src/runtime/string_test.go
@@ -282,7 +282,7 @@
func TestIntString(t *testing.T) {
// Non-escaping result of intstring.
s := ""
- for i := 0; i < 4; i++ {
+ for i := rune(0); i < 4; i++ {
s += string(i+'0') + string(i+'0'+1)
}
if want := "01122334"; s != want {
@@ -291,7 +291,7 @@
// Escaping result of intstring.
var a [4]string
- for i := 0; i < 4; i++ {
+ for i := rune(0); i < 4; i++ {
a[i] = string(i + '0')
}
s = a[0] + a[1] + a[2] + a[3]
@@ -454,3 +454,34 @@
}
}
}
+
+type parseReleaseTest struct {
+ in string
+ major, minor, patch int
+}
+
+var parseReleaseTests = []parseReleaseTest{
+ {"", -1, -1, -1},
+ {"x", -1, -1, -1},
+ {"5", 5, 0, 0},
+ {"5.12", 5, 12, 0},
+ {"5.12-x", 5, 12, 0},
+ {"5.12.1", 5, 12, 1},
+ {"5.12.1-x", 5, 12, 1},
+ {"5.12.1.0", 5, 12, 1},
+ {"5.20496382327982653440", -1, -1, -1},
+}
+
+func TestParseRelease(t *testing.T) {
+ for _, test := range parseReleaseTests {
+ major, minor, patch, ok := runtime.ParseRelease(test.in)
+ if !ok {
+ major, minor, patch = -1, -1, -1
+ }
+ if test.major != major || test.minor != minor || test.patch != patch {
+ t.Errorf("parseRelease(%q) = (%v, %v, %v) want (%v, %v, %v)",
+ test.in, major, minor, patch,
+ test.major, test.minor, test.patch)
+ }
+ }
+}
diff --git a/src/runtime/stubs.go b/src/runtime/stubs.go
index 26aaf22..2c6f027 100644
--- a/src/runtime/stubs.go
+++ b/src/runtime/stubs.go
@@ -83,7 +83,17 @@
}
// memmove copies n bytes from "from" to "to".
-// in memmove_*.s
+//
+// memmove ensures that any pointer in "from" is written to "to" with
+// an indivisible write, so that racy reads cannot observe a
+// half-written pointer. This is necessary to prevent the garbage
+// collector from observing invalid pointers, and differs from memmove
+// in unmanaged languages. However, memmove is only required to do
+// this if "from" and "to" may contain pointers, which can only be the
+// case if "from", "to", and "n" are all be word-aligned.
+//
+// Implementations are in memmove_*.s.
+//
//go:noescape
func memmove(to, from unsafe.Pointer, n uintptr)
@@ -290,11 +300,23 @@
func systemstack_switch()
-// round n up to a multiple of a. a must be a power of 2.
-func round(n, a uintptr) uintptr {
+// alignUp rounds n up to a multiple of a. a must be a power of 2.
+func alignUp(n, a uintptr) uintptr {
return (n + a - 1) &^ (a - 1)
}
+// alignDown rounds n down to a multiple of a. a must be a power of 2.
+func alignDown(n, a uintptr) uintptr {
+ return n &^ (a - 1)
+}
+
+// divRoundUp returns ceil(n / a).
+func divRoundUp(n, a uintptr) uintptr {
+ // a is generally a power of two. This will get inlined and
+ // the compiler will optimize the division.
+ return (n + a - 1) / a
+}
+
// checkASM reports whether assembly runtime checks have passed.
func checkASM() bool
diff --git a/src/runtime/stubs2.go b/src/runtime/stubs2.go
index 57134f7..4a1a5cc 100644
--- a/src/runtime/stubs2.go
+++ b/src/runtime/stubs2.go
@@ -5,7 +5,6 @@
// +build !plan9
// +build !solaris
// +build !windows
-// +build !nacl
// +build !js
// +build !darwin
// +build !aix
@@ -14,14 +13,19 @@
import "unsafe"
+// read calls the read system call.
+// It returns a non-negative number of bytes written or a negative errno value.
func read(fd int32, p unsafe.Pointer, n int32) int32
+
func closefd(fd int32) int32
func exit(code int32)
func usleep(usec uint32)
+// write calls the write system call.
+// It returns a non-negative number of bytes written or a negative errno value.
//go:noescape
-func write(fd uintptr, p unsafe.Pointer, n int32) int32
+func write1(fd uintptr, p unsafe.Pointer, n int32) int32
//go:noescape
func open(name *byte, mode, perm int32) int32
diff --git a/src/runtime/stubs3.go b/src/runtime/stubs3.go
index a9ff689..95eecc7 100644
--- a/src/runtime/stubs3.go
+++ b/src/runtime/stubs3.go
@@ -4,12 +4,10 @@
// +build !plan9
// +build !solaris
-// +build !windows
-// +build !nacl
// +build !freebsd
// +build !darwin
// +build !aix
package runtime
-func nanotime() int64
+func nanotime1() int64
diff --git a/src/runtime/stubs32.go b/src/runtime/stubs32.go
index 149560f..a7f52f6 100644
--- a/src/runtime/stubs32.go
+++ b/src/runtime/stubs32.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build 386 arm amd64p32 mips mipsle
+// +build 386 arm mips mipsle
package runtime
diff --git a/src/runtime/stubs_amd64.go b/src/runtime/stubs_amd64.go
new file mode 100644
index 0000000..8c14bc2
--- /dev/null
+++ b/src/runtime/stubs_amd64.go
@@ -0,0 +1,37 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+// Called from compiled code; declared for vet; do NOT call from Go.
+func gcWriteBarrierCX()
+func gcWriteBarrierDX()
+func gcWriteBarrierBX()
+func gcWriteBarrierBP()
+func gcWriteBarrierSI()
+func gcWriteBarrierR8()
+func gcWriteBarrierR9()
+
+// stackcheck checks that SP is in range [g->stack.lo, g->stack.hi).
+func stackcheck()
+
+// Called from assembly only; declared for go vet.
+func settls() // argument in DI
+
+// Retpolines, used by -spectre=ret flag in cmd/asm, cmd/compile.
+func retpolineAX()
+func retpolineCX()
+func retpolineDX()
+func retpolineBX()
+func retpolineBP()
+func retpolineSI()
+func retpolineDI()
+func retpolineR8()
+func retpolineR9()
+func retpolineR10()
+func retpolineR11()
+func retpolineR12()
+func retpolineR13()
+func retpolineR14()
+func retpolineR15()
diff --git a/src/runtime/stubs_amd64x.go b/src/runtime/stubs_amd64x.go
deleted file mode 100644
index e7a1be8..0000000
--- a/src/runtime/stubs_amd64x.go
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build amd64 amd64p32
-
-package runtime
-
-// stackcheck checks that SP is in range [g->stack.lo, g->stack.hi).
-func stackcheck()
-
-// Called from assembly only; declared for go vet.
-func settls() // argument in DI
diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go
index c2f32e0..1e86662 100644
--- a/src/runtime/symtab.go
+++ b/src/runtime/symtab.go
@@ -148,6 +148,62 @@
return
}
+// runtime_expandFinalInlineFrame expands the final pc in stk to include all
+// "callers" if pc is inline.
+//
+//go:linkname runtime_expandFinalInlineFrame runtime/pprof.runtime_expandFinalInlineFrame
+func runtime_expandFinalInlineFrame(stk []uintptr) []uintptr {
+ if len(stk) == 0 {
+ return stk
+ }
+ pc := stk[len(stk)-1]
+ tracepc := pc - 1
+
+ f := findfunc(tracepc)
+ if !f.valid() {
+ // Not a Go function.
+ return stk
+ }
+
+ inldata := funcdata(f, _FUNCDATA_InlTree)
+ if inldata == nil {
+ // Nothing inline in f.
+ return stk
+ }
+
+ // Treat the previous func as normal. We haven't actually checked, but
+ // since this pc was included in the stack, we know it shouldn't be
+ // elided.
+ lastFuncID := funcID_normal
+
+ // Remove pc from stk; we'll re-add it below.
+ stk = stk[:len(stk)-1]
+
+ // See inline expansion in gentraceback.
+ var cache pcvalueCache
+ inltree := (*[1 << 20]inlinedCall)(inldata)
+ for {
+ ix := pcdatavalue(f, _PCDATA_InlTreeIndex, tracepc, &cache)
+ if ix < 0 {
+ break
+ }
+ if inltree[ix].funcID == funcID_wrapper && elideWrapperCalling(lastFuncID) {
+ // ignore wrappers
+ } else {
+ stk = append(stk, pc)
+ }
+ lastFuncID = inltree[ix].funcID
+ // Back up to an instruction in the "caller".
+ tracepc = f.entry + uintptr(inltree[ix].parentPc)
+ pc = tracepc + 1
+ }
+
+ // N.B. we want to keep the last parentPC which is not inline.
+ stk = append(stk, pc)
+
+ return stk
+}
+
// expandCgoFrames expands frame information for pc, known to be
// a non-Go function, using the cgoSymbolizer hook. expandCgoFrames
// returns nil if pc could not be expanded.
@@ -212,19 +268,39 @@
//
// See funcdata.h and ../cmd/internal/objabi/funcdata.go.
const (
- _PCDATA_RegMapIndex = 0
+ _PCDATA_RegMapIndex = 0 // if !go115ReduceLiveness
+ _PCDATA_UnsafePoint = 0 // if go115ReduceLiveness
_PCDATA_StackMapIndex = 1
_PCDATA_InlTreeIndex = 2
- _FUNCDATA_ArgsPointerMaps = 0
- _FUNCDATA_LocalsPointerMaps = 1
- _FUNCDATA_RegPointerMaps = 2
- _FUNCDATA_StackObjects = 3
- _FUNCDATA_InlTree = 4
+ _FUNCDATA_ArgsPointerMaps = 0
+ _FUNCDATA_LocalsPointerMaps = 1
+ _FUNCDATA_RegPointerMaps = 2 // if !go115ReduceLiveness
+ _FUNCDATA_StackObjects = 3
+ _FUNCDATA_InlTree = 4
+ _FUNCDATA_OpenCodedDeferInfo = 5
_ArgsSizeUnknown = -0x80000000
)
+const (
+ // PCDATA_UnsafePoint values.
+ _PCDATA_UnsafePointSafe = -1 // Safe for async preemption
+ _PCDATA_UnsafePointUnsafe = -2 // Unsafe for async preemption
+
+ // _PCDATA_Restart1(2) apply on a sequence of instructions, within
+ // which if an async preemption happens, we should back off the PC
+ // to the start of the sequence when resume.
+ // We need two so we can distinguish the start/end of the sequence
+ // in case that two sequences are next to each other.
+ _PCDATA_Restart1 = -3
+ _PCDATA_Restart2 = -4
+
+ // Like _PCDATA_RestartAtEntry, but back to function entry if async
+ // preempted.
+ _PCDATA_RestartAtEntry = -5
+)
+
// A FuncID identifies particular functions that need to be treated
// specially by the runtime.
// Note that in some situations involving plugins, there may be multiple
@@ -253,6 +329,8 @@
funcID_debugCallV1
funcID_gopanic
funcID_panicwrap
+ funcID_handleAsyncEvent
+ funcID_asyncPreempt
funcID_wrapper // any autogenerated code (hash/eq algorithms, method wrappers, etc.)
)
@@ -485,8 +563,8 @@
// given program counter address, or else nil.
//
// If pc represents multiple functions because of inlining, it returns
-// the a *Func describing the innermost function, but with an entry
-// of the outermost function.
+// the *Func describing the innermost function, but with an entry of
+// the outermost function.
func FuncForPC(pc uintptr) *Func {
f := findfunc(pc)
if !f.valid() {
@@ -611,7 +689,15 @@
idx++
}
}
- return funcInfo{(*_func)(unsafe.Pointer(&datap.pclntable[datap.ftab[idx].funcoff])), datap}
+ funcoff := datap.ftab[idx].funcoff
+ if funcoff == ^uintptr(0) {
+ // With multiple text sections, there may be functions inserted by the external
+ // linker that are not known by Go. This means there may be holes in the PC
+ // range covered by the func table. The invalid funcoff value indicates a hole.
+ // See also cmd/link/internal/ld/pcln.go:pclntab
+ return funcInfo{}
+ }
+ return funcInfo{(*_func)(unsafe.Pointer(&datap.pclntable[funcoff])), datap}
}
type pcvalueCache struct {
@@ -634,9 +720,11 @@
return (targetpc / sys.PtrSize) % uintptr(len(pcvalueCache{}.entries))
}
-func pcvalue(f funcInfo, off int32, targetpc uintptr, cache *pcvalueCache, strict bool) int32 {
+// Returns the PCData value, and the PC where this value starts.
+// TODO: the start PC is returned only when cache is nil.
+func pcvalue(f funcInfo, off int32, targetpc uintptr, cache *pcvalueCache, strict bool) (int32, uintptr) {
if off == 0 {
- return -1
+ return -1, 0
}
// Check the cache. This speeds up walks of deep stacks, which
@@ -655,7 +743,7 @@
// fail in the first clause.
ent := &cache.entries[x][i]
if ent.off == off && ent.targetpc == targetpc {
- return ent.val
+ return ent.val, 0
}
}
}
@@ -665,11 +753,12 @@
print("runtime: no module data for ", hex(f.entry), "\n")
throw("no module data")
}
- return -1
+ return -1, 0
}
datap := f.datap
p := datap.pclntable[off:]
pc := f.entry
+ prevpc := pc
val := int32(-1)
for {
var ok bool
@@ -696,14 +785,15 @@
}
}
- return val
+ return val, prevpc
}
+ prevpc = pc
}
// If there was a table, it should have covered all program counters.
// If not, something is wrong.
if panicking != 0 || !strict {
- return -1
+ return -1, 0
}
print("runtime: invalid pc-encoded table f=", funcname(f), " pc=", hex(pc), " targetpc=", hex(targetpc), " tab=", p, "\n")
@@ -721,7 +811,7 @@
}
throw("invalid runtime symbol table")
- return -1
+ return -1, 0
}
func cfuncname(f funcInfo) *byte {
@@ -735,13 +825,15 @@
return gostringnocopy(cfuncname(f))
}
-func funcnameFromNameoff(f funcInfo, nameoff int32) string {
- datap := f.datap
+func cfuncnameFromNameoff(f funcInfo, nameoff int32) *byte {
if !f.valid() {
- return ""
+ return nil
}
- cstr := &datap.pclntable[nameoff]
- return gostringnocopy(cstr)
+ return &f.datap.pclntable[nameoff]
+}
+
+func funcnameFromNameoff(f funcInfo, nameoff int32) string {
+ return gostringnocopy(cfuncnameFromNameoff(f, nameoff))
}
func funcfile(f funcInfo, fileno int32) string {
@@ -757,9 +849,9 @@
if !f.valid() {
return "?", 0
}
- fileno := int(pcvalue(f, f.pcfile, targetpc, nil, strict))
- line = pcvalue(f, f.pcln, targetpc, nil, strict)
- if fileno == -1 || line == -1 || fileno >= len(datap.filetab) {
+ fileno, _ := pcvalue(f, f.pcfile, targetpc, nil, strict)
+ line, _ = pcvalue(f, f.pcln, targetpc, nil, strict)
+ if fileno == -1 || line == -1 || int(fileno) >= len(datap.filetab) {
// print("looking for ", hex(targetpc), " in ", funcname(f), " got file=", fileno, " line=", lineno, "\n")
return "?", 0
}
@@ -772,13 +864,32 @@
}
func funcspdelta(f funcInfo, targetpc uintptr, cache *pcvalueCache) int32 {
- x := pcvalue(f, f.pcsp, targetpc, cache, true)
+ x, _ := pcvalue(f, f.pcsp, targetpc, cache, true)
if x&(sys.PtrSize-1) != 0 {
print("invalid spdelta ", funcname(f), " ", hex(f.entry), " ", hex(targetpc), " ", hex(f.pcsp), " ", x, "\n")
}
return x
}
+// funcMaxSPDelta returns the maximum spdelta at any point in f.
+func funcMaxSPDelta(f funcInfo) int32 {
+ datap := f.datap
+ p := datap.pclntable[f.pcsp:]
+ pc := f.entry
+ val := int32(-1)
+ max := int32(0)
+ for {
+ var ok bool
+ p, ok = step(p, &pc, &val, pc == f.entry)
+ if !ok {
+ return max
+ }
+ if val > max {
+ max = val
+ }
+ }
+}
+
func pcdatastart(f funcInfo, table int32) int32 {
return *(*int32)(add(unsafe.Pointer(&f.nfuncdata), unsafe.Sizeof(f.nfuncdata)+uintptr(table)*4))
}
@@ -787,14 +898,25 @@
if table < 0 || table >= f.npcdata {
return -1
}
- return pcvalue(f, pcdatastart(f, table), targetpc, cache, true)
+ r, _ := pcvalue(f, pcdatastart(f, table), targetpc, cache, true)
+ return r
}
func pcdatavalue1(f funcInfo, table int32, targetpc uintptr, cache *pcvalueCache, strict bool) int32 {
if table < 0 || table >= f.npcdata {
return -1
}
- return pcvalue(f, pcdatastart(f, table), targetpc, cache, strict)
+ r, _ := pcvalue(f, pcdatastart(f, table), targetpc, cache, strict)
+ return r
+}
+
+// Like pcdatavalue, but also return the start PC of this PCData value.
+// It doesn't take a cache.
+func pcdatavalue2(f funcInfo, table int32, targetpc uintptr) (int32, uintptr) {
+ if table < 0 || table >= f.npcdata {
+ return -1, 0
+ }
+ return pcvalue(f, pcdatastart(f, table), targetpc, nil, true)
}
func funcdata(f funcInfo, i uint8) unsafe.Pointer {
diff --git a/src/runtime/sys_aix_ppc64.s b/src/runtime/sys_aix_ppc64.s
index 75f4178..a56d043 100644
--- a/src/runtime/sys_aix_ppc64.s
+++ b/src/runtime/sys_aix_ppc64.s
@@ -258,8 +258,8 @@
CSYSCALL()
RET
-// Runs on OS stack, called from runtime·write.
-TEXT runtime·write1(SB),NOSPLIT,$0-28
+// Runs on OS stack, called from runtime·write1.
+TEXT runtime·write2(SB),NOSPLIT,$0-28
MOVD fd+0(FP), R3
MOVD p+8(FP), R4
MOVW n+16(FP), R5
diff --git a/src/runtime/sys_darwin.go b/src/runtime/sys_darwin.go
index 376f76d..28c500a 100644
--- a/src/runtime/sys_darwin.go
+++ b/src/runtime/sys_darwin.go
@@ -60,18 +60,29 @@
//go:nosplit
//go:cgo_unsafe_args
func syscall_syscall(fn, a1, a2, a3 uintptr) (r1, r2, err uintptr) {
- entersyscallblock()
+ entersyscall()
libcCall(unsafe.Pointer(funcPC(syscall)), unsafe.Pointer(&fn))
exitsyscall()
return
}
func syscall()
+//go:linkname syscall_syscallX syscall.syscallX
+//go:nosplit
+//go:cgo_unsafe_args
+func syscall_syscallX(fn, a1, a2, a3 uintptr) (r1, r2, err uintptr) {
+ entersyscallblock()
+ libcCall(unsafe.Pointer(funcPC(syscallX)), unsafe.Pointer(&fn))
+ exitsyscall()
+ return
+}
+func syscallX()
+
//go:linkname syscall_syscall6 syscall.syscall6
//go:nosplit
//go:cgo_unsafe_args
func syscall_syscall6(fn, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) {
- entersyscallblock()
+ entersyscall()
libcCall(unsafe.Pointer(funcPC(syscall6)), unsafe.Pointer(&fn))
exitsyscall()
return
@@ -82,7 +93,7 @@
//go:nosplit
//go:cgo_unsafe_args
func syscall_syscall6X(fn, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) {
- entersyscallblock()
+ entersyscall()
libcCall(unsafe.Pointer(funcPC(syscall6X)), unsafe.Pointer(&fn))
exitsyscall()
return
@@ -93,7 +104,7 @@
//go:nosplit
//go:cgo_unsafe_args
func syscall_syscallPtr(fn, a1, a2, a3 uintptr) (r1, r2, err uintptr) {
- entersyscallblock()
+ entersyscall()
libcCall(unsafe.Pointer(funcPC(syscallPtr)), unsafe.Pointer(&fn))
exitsyscall()
return
@@ -116,6 +127,19 @@
return
}
+// syscallNoErr is used in crypto/x509 to call into Security.framework and CF.
+
+//go:linkname crypto_x509_syscall crypto/x509/internal/macOS.syscall
+//go:nosplit
+//go:cgo_unsafe_args
+func crypto_x509_syscall(fn, a1, a2, a3, a4, a5, a6 uintptr) (r1 uintptr) {
+ entersyscall()
+ libcCall(unsafe.Pointer(funcPC(syscallNoErr)), unsafe.Pointer(&fn))
+ exitsyscall()
+ return
+}
+func syscallNoErr()
+
// The *_trampoline functions convert from the Go calling convention to the C calling convention
// and then call the underlying libc function. They are defined in sys_darwin_$ARCH.s.
@@ -162,6 +186,18 @@
}
func pthread_self_trampoline()
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_kill(t pthread, sig uint32) {
+ libcCall(unsafe.Pointer(funcPC(pthread_kill_trampoline)), unsafe.Pointer(&t))
+ return
+}
+func pthread_kill_trampoline()
+
+// mmap is used to do low-level memory allocation via mmap. Don't allow stack
+// splits, since this function (used by sysAlloc) is called in a lot of low-level
+// parts of the runtime and callers often assume it won't acquire any locks.
+// go:nosplit
func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (unsafe.Pointer, int) {
args := struct {
addr unsafe.Pointer
@@ -230,7 +266,7 @@
//go:nosplit
//go:cgo_unsafe_args
-func write(fd uintptr, p unsafe.Pointer, n int32) int32 {
+func write1(fd uintptr, p unsafe.Pointer, n int32) int32 {
return libcCall(unsafe.Pointer(funcPC(write_trampoline)), unsafe.Pointer(&fd))
}
func write_trampoline()
@@ -244,7 +280,7 @@
//go:nosplit
//go:cgo_unsafe_args
-func nanotime() int64 {
+func nanotime1() int64 {
var r struct {
t int64 // raw timer
numer, denom uint32 // conversion factors. nanoseconds = t * numer / denom.
@@ -266,7 +302,7 @@
//go:nosplit
//go:cgo_unsafe_args
-func walltime() (int64, int32) {
+func walltime1() (int64, int32) {
var t timeval
libcCall(unsafe.Pointer(funcPC(walltime_trampoline)), unsafe.Pointer(&t))
return int64(t.tv_sec), 1000 * t.tv_usec
@@ -415,6 +451,8 @@
//go:cgo_import_dynamic libc_pthread_attr_getstacksize pthread_attr_getstacksize "/usr/lib/libSystem.B.dylib"
//go:cgo_import_dynamic libc_pthread_attr_setdetachstate pthread_attr_setdetachstate "/usr/lib/libSystem.B.dylib"
//go:cgo_import_dynamic libc_pthread_create pthread_create "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_pthread_self pthread_self "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_pthread_kill pthread_kill "/usr/lib/libSystem.B.dylib"
//go:cgo_import_dynamic libc_exit exit "/usr/lib/libSystem.B.dylib"
//go:cgo_import_dynamic libc_raise raise "/usr/lib/libSystem.B.dylib"
@@ -452,6 +490,8 @@
//go:cgo_import_dynamic libc_pthread_cond_timedwait_relative_np pthread_cond_timedwait_relative_np "/usr/lib/libSystem.B.dylib"
//go:cgo_import_dynamic libc_pthread_cond_signal pthread_cond_signal "/usr/lib/libSystem.B.dylib"
-// Magic incantation to get libSystem actually dynamically linked.
+// Magic incantation to get libSystem and friends actually dynamically linked.
// TODO: Why does the code require this? See cmd/link/internal/ld/go.go
//go:cgo_import_dynamic _ _ "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic _ _ "/System/Library/Frameworks/Security.framework/Versions/A/Security"
+//go:cgo_import_dynamic _ _ "/System/Library/Frameworks/CoreFoundation.framework/Versions/A/CoreFoundation"
diff --git a/src/runtime/sys_darwin_32.go b/src/runtime/sys_darwin_32.go
deleted file mode 100644
index f126be8..0000000
--- a/src/runtime/sys_darwin_32.go
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build darwin
-// +build 386 arm
-
-package runtime
-
-import "unsafe"
-
-//go:linkname syscall_syscall9 syscall.syscall9
-//go:nosplit
-//go:cgo_unsafe_args
-func syscall_syscall9(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r1, r2, err uintptr) {
- entersyscallblock()
- libcCall(unsafe.Pointer(funcPC(syscall9)), unsafe.Pointer(&fn))
- exitsyscall()
- return
-}
-func syscall9()
diff --git a/src/runtime/sys_darwin_386.s b/src/runtime/sys_darwin_386.s
deleted file mode 100644
index e653c54..0000000
--- a/src/runtime/sys_darwin_386.s
+++ /dev/null
@@ -1,914 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// System calls and other sys.stuff for 386, Darwin
-// System calls are implemented in libSystem, this file contains
-// trampolines that convert from Go to C calling convention.
-
-#include "go_asm.h"
-#include "go_tls.h"
-#include "textflag.h"
-
-// Exit the entire program (like C exit)
-TEXT runtime·exit_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $8, SP // allocate space for callee args (must be 8 mod 16)
- MOVL 16(SP), CX // arg ptr
- MOVL 0(CX), AX // arg 1 exit status
- MOVL AX, 0(SP)
- CALL libc_exit(SB)
- MOVL $0xf1, 0xf1 // crash
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·open_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $24, SP
- MOVL 32(SP), CX
- MOVL 0(CX), AX // arg 1 name
- MOVL AX, 0(SP)
- MOVL 4(CX), AX // arg 2 mode
- MOVL AX, 4(SP)
- MOVL 8(CX), AX // arg 3 perm
- MOVL AX, 8(SP)
- CALL libc_open(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·close_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $8, SP
- MOVL 16(SP), CX
- MOVL 0(CX), AX // arg 1 fd
- MOVL AX, 0(SP)
- CALL libc_close(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·read_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $24, SP
- MOVL 32(SP), CX
- MOVL 0(CX), AX // arg 1 fd
- MOVL AX, 0(SP)
- MOVL 4(CX), AX // arg 2 buf
- MOVL AX, 4(SP)
- MOVL 8(CX), AX // arg 3 count
- MOVL AX, 8(SP)
- CALL libc_read(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·write_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $24, SP
- MOVL 32(SP), CX
- MOVL 0(CX), AX // arg 1 fd
- MOVL AX, 0(SP)
- MOVL 4(CX), AX // arg 2 buf
- MOVL AX, 4(SP)
- MOVL 8(CX), AX // arg 3 count
- MOVL AX, 8(SP)
- CALL libc_write(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·pipe_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $8, SP
- MOVL 16(SP), AX // arg 1 pipefd
- MOVL AX, 0(SP)
- CALL libc_pipe(SB)
- TESTL AX, AX
- JEQ 3(PC)
- CALL libc_error(SB) // return negative errno value
- NEGL AX
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·mmap_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $24, SP
- MOVL 32(SP), CX
- MOVL 0(CX), AX // arg 1 addr
- MOVL AX, 0(SP)
- MOVL 4(CX), AX // arg 2 len
- MOVL AX, 4(SP)
- MOVL 8(CX), AX // arg 3 prot
- MOVL AX, 8(SP)
- MOVL 12(CX), AX // arg 4 flags
- MOVL AX, 12(SP)
- MOVL 16(CX), AX // arg 5 fid
- MOVL AX, 16(SP)
- MOVL 20(CX), AX // arg 6 offset
- MOVL AX, 20(SP)
- CALL libc_mmap(SB)
- XORL DX, DX
- CMPL AX, $-1
- JNE ok
- CALL libc_error(SB)
- MOVL (AX), DX // errno
- XORL AX, AX
-ok:
- MOVL 32(SP), CX
- MOVL AX, 24(CX) // result pointer
- MOVL DX, 28(CX) // errno
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·madvise_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $24, SP
- MOVL 32(SP), CX
- MOVL 0(CX), AX // arg 1 addr
- MOVL AX, 0(SP)
- MOVL 4(CX), AX // arg 2 len
- MOVL AX, 4(SP)
- MOVL 8(CX), AX // arg 3 advice
- MOVL AX, 8(SP)
- CALL libc_madvise(SB)
- // ignore failure - maybe pages are locked
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·munmap_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $8, SP
- MOVL 16(SP), CX
- MOVL 0(CX), AX // arg 1 addr
- MOVL AX, 0(SP)
- MOVL 4(CX), AX // arg 2 len
- MOVL AX, 4(SP)
- CALL libc_munmap(SB)
- TESTL AX, AX
- JEQ 2(PC)
- MOVL $0xf1, 0xf1 // crash
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·setitimer_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $24, SP
- MOVL 32(SP), CX
- MOVL 0(CX), AX // arg 1 mode
- MOVL AX, 0(SP)
- MOVL 4(CX), AX // arg 2 new
- MOVL AX, 4(SP)
- MOVL 8(CX), AX // arg 3 old
- MOVL AX, 8(SP)
- CALL libc_setitimer(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·walltime_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $8, SP
- MOVL 16(SP), AX
- MOVL AX, 0(SP) // *timeval
- MOVL $0, 4(SP) // no timezone needed
- CALL libc_gettimeofday(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-GLOBL timebase<>(SB),NOPTR,$(machTimebaseInfo__size)
-
-TEXT runtime·nanotime_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $8+(machTimebaseInfo__size+15)/16*16, SP
- CALL libc_mach_absolute_time(SB)
- MOVL 16+(machTimebaseInfo__size+15)/16*16(SP), CX
- MOVL AX, 0(CX)
- MOVL DX, 4(CX)
- MOVL timebase<>+machTimebaseInfo_denom(SB), DI // atomic read
- MOVL timebase<>+machTimebaseInfo_numer(SB), SI
- TESTL DI, DI
- JNE initialized
-
- LEAL 4(SP), AX
- MOVL AX, 0(SP)
- CALL libc_mach_timebase_info(SB)
- MOVL 4+machTimebaseInfo_numer(SP), SI
- MOVL 4+machTimebaseInfo_denom(SP), DI
-
- MOVL SI, timebase<>+machTimebaseInfo_numer(SB)
- MOVL DI, AX
- XCHGL AX, timebase<>+machTimebaseInfo_denom(SB) // atomic write
- MOVL 16+(machTimebaseInfo__size+15)/16*16(SP), CX
-
-initialized:
- MOVL SI, 8(CX)
- MOVL DI, 12(CX)
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·sigaction_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $24, SP
- MOVL 32(SP), CX
- MOVL 0(CX), AX // arg 1 sig
- MOVL AX, 0(SP)
- MOVL 4(CX), AX // arg 2 new
- MOVL AX, 4(SP)
- MOVL 8(CX), AX // arg 3 old
- MOVL AX, 8(SP)
- CALL libc_sigaction(SB)
- TESTL AX, AX
- JEQ 2(PC)
- MOVL $0xf1, 0xf1 // crash
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·sigprocmask_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $24, SP
- MOVL 32(SP), CX
- MOVL 0(CX), AX // arg 1 how
- MOVL AX, 0(SP)
- MOVL 4(CX), AX // arg 2 new
- MOVL AX, 4(SP)
- MOVL 8(CX), AX // arg 3 old
- MOVL AX, 8(SP)
- CALL libc_pthread_sigmask(SB)
- TESTL AX, AX
- JEQ 2(PC)
- MOVL $0xf1, 0xf1 // crash
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·sigaltstack_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $8, SP
- MOVL 16(SP), CX
- MOVL 0(CX), AX // arg 1 new
- MOVL AX, 0(SP)
- MOVL 4(CX), AX // arg 2 old
- MOVL AX, 4(SP)
- CALL libc_sigaltstack(SB)
- TESTL AX, AX
- JEQ 2(PC)
- MOVL $0xf1, 0xf1 // crash
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·raiseproc_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $8, SP
- CALL libc_getpid(SB)
- MOVL AX, 0(SP) // arg 1 pid
- MOVL 16(SP), CX
- MOVL 0(CX), AX
- MOVL AX, 4(SP) // arg 2 signal
- CALL libc_kill(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·sigfwd(SB),NOSPLIT,$0-16
- MOVL fn+0(FP), AX
- MOVL sig+4(FP), BX
- MOVL info+8(FP), CX
- MOVL ctx+12(FP), DX
- MOVL SP, SI
- SUBL $32, SP
- ANDL $~15, SP // align stack: handler might be a C function
- MOVL BX, 0(SP)
- MOVL CX, 4(SP)
- MOVL DX, 8(SP)
- MOVL SI, 12(SP) // save SI: handler might be a Go function
- CALL AX
- MOVL 12(SP), AX
- MOVL AX, SP
- RET
-
-// Sigtramp's job is to call the actual signal handler.
-// It is called with the C calling convention, and calls out
-// to sigtrampgo with the Go calling convention.
-TEXT runtime·sigtramp(SB),NOSPLIT,$0
- SUBL $28, SP
-
- // Save callee-save registers.
- MOVL BP, 12(SP)
- MOVL BX, 16(SP)
- MOVL SI, 20(SP)
- MOVL DI, 24(SP)
-
- MOVL 32(SP), AX
- MOVL AX, 0(SP) // arg 1 signal number
- MOVL 36(SP), AX
- MOVL AX, 4(SP) // arg 2 siginfo
- MOVL 40(SP), AX
- MOVL AX, 8(SP) // arg 3 ctxt
- CALL runtime·sigtrampgo(SB)
-
- // Restore callee-save registers.
- MOVL 12(SP), BP
- MOVL 16(SP), BX
- MOVL 20(SP), SI
- MOVL 24(SP), DI
-
- ADDL $28, SP
- RET
-
-TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0
- JMP runtime·sigtramp(SB)
-
-TEXT runtime·usleep_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $8, SP
- MOVL 16(SP), CX
- MOVL 0(CX), AX // arg 1 usec
- MOVL AX, 0(SP)
- CALL libc_usleep(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-// func setldt(entry int, address int, limit int)
-TEXT runtime·setldt(SB),NOSPLIT,$32
- // Nothing to do on Darwin, pthread already set thread-local storage up.
- RET
-
-TEXT runtime·sysctl_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $24, SP
- MOVL 32(SP), CX
- MOVL 0(CX), AX // arg 1 mib
- MOVL AX, 0(SP)
- MOVL 4(CX), AX // arg 2 miblen
- MOVL AX, 4(SP)
- MOVL 8(CX), AX // arg 3 out
- MOVL AX, 8(SP)
- MOVL 12(CX), AX // arg 4 size
- MOVL AX, 12(SP)
- MOVL 16(CX), AX // arg 5 dst
- MOVL AX, 16(SP)
- MOVL 20(CX), AX // arg 6 ndst
- MOVL AX, 20(SP)
- CALL libc_sysctl(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·kqueue_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $8, SP
- CALL libc_kqueue(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·kevent_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $24, SP
- MOVL 32(SP), CX
- MOVL 0(CX), AX // arg 1 kq
- MOVL AX, 0(SP)
- MOVL 4(CX), AX // arg 2 ch
- MOVL AX, 4(SP)
- MOVL 8(CX), AX // arg 3 nch
- MOVL AX, 8(SP)
- MOVL 12(CX), AX // arg 4 ev
- MOVL AX, 12(SP)
- MOVL 16(CX), AX // arg 5 nev
- MOVL AX, 16(SP)
- MOVL 20(CX), AX // arg 6 ts
- MOVL AX, 20(SP)
- CALL libc_kevent(SB)
- CMPL AX, $-1
- JNE ok
- CALL libc_error(SB)
- MOVL (AX), AX // errno
- NEGL AX // caller wants it as a negative error code
-ok:
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·fcntl_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $24, SP
- MOVL 32(SP), CX
- MOVL 0(CX), AX // arg 1 fd
- MOVL AX, 0(SP)
- MOVL 4(CX), AX // arg 2 cmd
- MOVL AX, 4(SP)
- MOVL 8(CX), AX // arg 3 arg
- MOVL AX, 8(SP)
- CALL libc_fcntl(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-// mstart_stub is the first function executed on a new thread started by pthread_create.
-// It just does some low-level setup and then calls mstart.
-// Note: called with the C calling convention.
-TEXT runtime·mstart_stub(SB),NOSPLIT,$0
- // The value at SP+4 points to the m.
- // We are already on m's g0 stack.
-
- // Save callee-save registers.
- SUBL $16, SP
- MOVL BP, 0(SP)
- MOVL BX, 4(SP)
- MOVL SI, 8(SP)
- MOVL DI, 12(SP)
-
- MOVL SP, AX // hide argument read from vet (vet thinks this function is using the Go calling convention)
- MOVL 20(AX), DI // m
- MOVL m_g0(DI), DX // g
-
- // Initialize TLS entry.
- // See cmd/link/internal/ld/sym.go:computeTLSOffset.
- MOVL DX, 0x18(GS)
-
- // Someday the convention will be D is always cleared.
- CLD
-
- CALL runtime·mstart(SB)
-
- // Restore callee-save registers.
- MOVL 0(SP), BP
- MOVL 4(SP), BX
- MOVL 8(SP), SI
- MOVL 12(SP), DI
-
- // Go is all done with this OS thread.
- // Tell pthread everything is ok (we never join with this thread, so
- // the value here doesn't really matter).
- XORL AX, AX
-
- ADDL $16, SP
- RET
-
-TEXT runtime·pthread_attr_init_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $8, SP
- MOVL 16(SP), CX
- MOVL 0(CX), AX // arg 1 attr
- MOVL AX, 0(SP)
- CALL libc_pthread_attr_init(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·pthread_attr_getstacksize_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $8, SP
- MOVL 16(SP), CX
- MOVL 0(CX), AX // arg 1 attr
- MOVL AX, 0(SP)
- MOVL 4(CX), AX // arg 2 size
- MOVL AX, 4(SP)
- CALL libc_pthread_attr_getstacksize(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·pthread_attr_setdetachstate_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $8, SP
- MOVL 16(SP), CX
- MOVL 0(CX), AX // arg 1 attr
- MOVL AX, 0(SP)
- MOVL 4(CX), AX // arg 2 state
- MOVL AX, 4(SP)
- CALL libc_pthread_attr_setdetachstate(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·pthread_create_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $24, SP
- MOVL 32(SP), CX
- LEAL 16(SP), AX // arg "0" &threadid (which we throw away)
- MOVL AX, 0(SP)
- MOVL 0(CX), AX // arg 1 attr
- MOVL AX, 4(SP)
- MOVL 4(CX), AX // arg 2 start
- MOVL AX, 8(SP)
- MOVL 8(CX), AX // arg 3 arg
- MOVL AX, 12(SP)
- CALL libc_pthread_create(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·raise_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $8, SP
- MOVL 16(SP), CX
- MOVL 0(CX), AX // arg 1 sig
- MOVL AX, 0(SP)
- CALL libc_raise(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·pthread_mutex_init_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $8, SP
- MOVL 16(SP), CX
- MOVL 0(CX), AX // arg 1 mutex
- MOVL AX, 0(SP)
- MOVL 4(CX), AX // arg 2 attr
- MOVL AX, 4(SP)
- CALL libc_pthread_mutex_init(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·pthread_mutex_lock_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $8, SP
- MOVL 16(SP), CX
- MOVL 0(CX), AX // arg 1 mutex
- MOVL AX, 0(SP)
- CALL libc_pthread_mutex_lock(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·pthread_mutex_unlock_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $8, SP
- MOVL 16(SP), CX
- MOVL 0(CX), AX // arg 1 mutex
- MOVL AX, 0(SP)
- CALL libc_pthread_mutex_unlock(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·pthread_cond_init_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $8, SP
- MOVL 16(SP), CX
- MOVL 0(CX), AX // arg 1 cond
- MOVL AX, 0(SP)
- MOVL 4(CX), AX // arg 2 attr
- MOVL AX, 4(SP)
- CALL libc_pthread_cond_init(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·pthread_cond_wait_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $8, SP
- MOVL 16(SP), CX
- MOVL 0(CX), AX // arg 1 cond
- MOVL AX, 0(SP)
- MOVL 4(CX), AX // arg 2 mutex
- MOVL AX, 4(SP)
- CALL libc_pthread_cond_wait(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·pthread_cond_timedwait_relative_np_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $24, SP
- MOVL 32(SP), CX
- MOVL 0(CX), AX // arg 1 cond
- MOVL AX, 0(SP)
- MOVL 4(CX), AX // arg 2 mutex
- MOVL AX, 4(SP)
- MOVL 8(CX), AX // arg 3 timeout
- MOVL AX, 8(SP)
- CALL libc_pthread_cond_timedwait_relative_np(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-TEXT runtime·pthread_cond_signal_trampoline(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $8, SP
- MOVL 16(SP), CX
- MOVL 0(CX), AX // arg 1 cond
- MOVL AX, 0(SP)
- CALL libc_pthread_cond_signal(SB)
- MOVL BP, SP
- POPL BP
- RET
-
-// syscall calls a function in libc on behalf of the syscall package.
-// syscall takes a pointer to a struct like:
-// struct {
-// fn uintptr
-// a1 uintptr
-// a2 uintptr
-// a3 uintptr
-// r1 uintptr
-// r2 uintptr
-// err uintptr
-// }
-// syscall must be called on the g0 stack with the
-// C calling convention (use libcCall).
-TEXT runtime·syscall(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $24, SP
- MOVL 32(SP), CX
- MOVL (0*4)(CX), AX // fn
- MOVL (1*4)(CX), DX // a1
- MOVL DX, 0(SP)
- MOVL (2*4)(CX), DX // a2
- MOVL DX, 4(SP)
- MOVL (3*4)(CX), DX // a3
- MOVL DX, 8(SP)
-
- CALL AX
-
- MOVL 32(SP), CX
- MOVL AX, (4*4)(CX) // r1
- MOVL DX, (5*4)(CX) // r2
-
- // Standard libc functions return -1 on error
- // and set errno.
- CMPL AX, $-1
- JNE ok
-
- // Get error code from libc.
- CALL libc_error(SB)
- MOVL (AX), AX
- MOVL 32(SP), CX
- MOVL AX, (6*4)(CX) // err
-
-ok:
- XORL AX, AX // no error (it's ignored anyway)
- MOVL BP, SP
- POPL BP
- RET
-
-// syscallPtr is like syscall except the libc function reports an
-// error by returning NULL and setting errno.
-TEXT runtime·syscallPtr(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $24, SP
- MOVL 32(SP), CX
- MOVL (0*4)(CX), AX // fn
- MOVL (1*4)(CX), DX // a1
- MOVL DX, 0(SP)
- MOVL (2*4)(CX), DX // a2
- MOVL DX, 4(SP)
- MOVL (3*4)(CX), DX // a3
- MOVL DX, 8(SP)
-
- CALL AX
-
- MOVL 32(SP), CX
- MOVL AX, (4*4)(CX) // r1
- MOVL DX, (5*4)(CX) // r2
-
- // syscallPtr libc functions return NULL on error
- // and set errno.
- TESTL AX, AX
- JNE ok
-
- // Get error code from libc.
- CALL libc_error(SB)
- MOVL (AX), AX
- MOVL 32(SP), CX
- MOVL AX, (6*4)(CX) // err
-
-ok:
- XORL AX, AX // no error (it's ignored anyway)
- MOVL BP, SP
- POPL BP
- RET
-
-// syscall6 calls a function in libc on behalf of the syscall package.
-// syscall6 takes a pointer to a struct like:
-// struct {
-// fn uintptr
-// a1 uintptr
-// a2 uintptr
-// a3 uintptr
-// a4 uintptr
-// a5 uintptr
-// a6 uintptr
-// r1 uintptr
-// r2 uintptr
-// err uintptr
-// }
-// syscall6 must be called on the g0 stack with the
-// C calling convention (use libcCall).
-TEXT runtime·syscall6(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $24, SP
- MOVL 32(SP), CX
- MOVL (0*4)(CX), AX // fn
- MOVL (1*4)(CX), DX // a1
- MOVL DX, 0(SP)
- MOVL (2*4)(CX), DX // a2
- MOVL DX, 4(SP)
- MOVL (3*4)(CX), DX // a3
- MOVL DX, 8(SP)
- MOVL (4*4)(CX), DX // a4
- MOVL DX, 12(SP)
- MOVL (5*4)(CX), DX // a5
- MOVL DX, 16(SP)
- MOVL (6*4)(CX), DX // a6
- MOVL DX, 20(SP)
-
- CALL AX
-
- MOVL 32(SP), CX
- MOVL AX, (7*4)(CX) // r1
- MOVL DX, (8*4)(CX) // r2
-
- // Standard libc functions return -1 on error
- // and set errno.
- CMPL AX, $-1
- JNE ok
-
- // Get error code from libc.
- CALL libc_error(SB)
- MOVL (AX), AX
- MOVL 32(SP), CX
- MOVL AX, (9*4)(CX) // err
-
-ok:
- XORL AX, AX // no error (it's ignored anyway)
- MOVL BP, SP
- POPL BP
- RET
-
-// syscall6X calls a function in libc on behalf of the syscall package.
-// syscall6X takes a pointer to a struct like:
-// struct {
-// fn uintptr
-// a1 uintptr
-// a2 uintptr
-// a3 uintptr
-// a4 uintptr
-// a5 uintptr
-// a6 uintptr
-// r1 uintptr
-// r2 uintptr
-// err uintptr
-// }
-// syscall6X must be called on the g0 stack with the
-// C calling convention (use libcCall).
-TEXT runtime·syscall6X(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $24, SP
- MOVL 32(SP), CX
- MOVL (0*4)(CX), AX // fn
- MOVL (1*4)(CX), DX // a1
- MOVL DX, 0(SP)
- MOVL (2*4)(CX), DX // a2
- MOVL DX, 4(SP)
- MOVL (3*4)(CX), DX // a3
- MOVL DX, 8(SP)
- MOVL (4*4)(CX), DX // a4
- MOVL DX, 12(SP)
- MOVL (5*4)(CX), DX // a5
- MOVL DX, 16(SP)
- MOVL (6*4)(CX), DX // a6
- MOVL DX, 20(SP)
-
- CALL AX
-
- MOVL 32(SP), CX
- MOVL AX, (7*4)(CX) // r1
- MOVL DX, (8*4)(CX) // r2
-
- // Standard libc functions return -1 on error
- // and set errno.
- CMPL AX, $-1
- JNE ok
- CMPL DX, $-1
- JNE ok
-
- // Get error code from libc.
- CALL libc_error(SB)
- MOVL (AX), AX
- MOVL 32(SP), CX
- MOVL AX, (9*4)(CX) // err
-
-ok:
- XORL AX, AX // no error (it's ignored anyway)
- MOVL BP, SP
- POPL BP
- RET
-
-// syscall9 calls a function in libc on behalf of the syscall package.
-// syscall9 takes a pointer to a struct like:
-// struct {
-// fn uintptr
-// a1 uintptr
-// a2 uintptr
-// a3 uintptr
-// a4 uintptr
-// a5 uintptr
-// a6 uintptr
-// a7 uintptr
-// a8 uintptr
-// a9 uintptr
-// r1 uintptr
-// r2 uintptr
-// err uintptr
-// }
-// syscall9 must be called on the g0 stack with the
-// C calling convention (use libcCall).
-TEXT runtime·syscall9(SB),NOSPLIT,$0
- PUSHL BP
- MOVL SP, BP
- SUBL $40, SP
- MOVL 48(SP), CX
- MOVL (0*4)(CX), AX // fn
- MOVL (1*4)(CX), DX // a1
- MOVL DX, 0(SP)
- MOVL (2*4)(CX), DX // a2
- MOVL DX, 4(SP)
- MOVL (3*4)(CX), DX // a3
- MOVL DX, 8(SP)
- MOVL (4*4)(CX), DX // a4
- MOVL DX, 12(SP)
- MOVL (5*4)(CX), DX // a5
- MOVL DX, 16(SP)
- MOVL (6*4)(CX), DX // a6
- MOVL DX, 20(SP)
- MOVL (7*4)(CX), DX // a7
- MOVL DX, 24(SP)
- MOVL (8*4)(CX), DX // a8
- MOVL DX, 28(SP)
- MOVL (9*4)(CX), DX // a9
- MOVL DX, 32(SP)
-
- CALL AX
-
- MOVL 48(SP), CX
- MOVL AX, (10*4)(CX) // r1
- MOVL DX, (11*4)(CX) // r2
-
- // Standard libc functions return -1 on error
- // and set errno.
- CMPL AX, $-1
- JNE ok
-
- // Get error code from libc.
- CALL libc_error(SB)
- MOVL (AX), AX
- MOVL 48(SP), CX
- MOVL AX, (12*4)(CX) // err
-
-ok:
- XORL AX, AX // no error (it's ignored anyway)
- MOVL BP, SP
- POPL BP
- RET
diff --git a/src/runtime/sys_darwin_64.go b/src/runtime/sys_darwin_64.go
deleted file mode 100644
index 07b0bb5..0000000
--- a/src/runtime/sys_darwin_64.go
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build darwin
-// +build amd64 arm64
-
-package runtime
-
-import "unsafe"
-
-//go:linkname syscall_syscallX syscall.syscallX
-//go:nosplit
-//go:cgo_unsafe_args
-func syscall_syscallX(fn, a1, a2, a3 uintptr) (r1, r2, err uintptr) {
- entersyscallblock()
- libcCall(unsafe.Pointer(funcPC(syscallX)), unsafe.Pointer(&fn))
- exitsyscall()
- return
-}
-func syscallX()
diff --git a/src/runtime/sys_darwin_amd64.s b/src/runtime/sys_darwin_amd64.s
index 87c8db8..825852d 100644
--- a/src/runtime/sys_darwin_amd64.s
+++ b/src/runtime/sys_darwin_amd64.s
@@ -46,6 +46,12 @@
MOVL 16(DI), DX // arg 3 count
MOVL 0(DI), DI // arg 1 fd
CALL libc_read(SB)
+ TESTL AX, AX
+ JGE noerr
+ CALL libc_error(SB)
+ MOVL (AX), AX
+ NEGL AX // caller expects negative errno value
+noerr:
POPQ BP
RET
@@ -56,6 +62,12 @@
MOVL 16(DI), DX // arg 3 count
MOVQ 0(DI), DI // arg 1 fd
CALL libc_write(SB)
+ TESTL AX, AX
+ JGE noerr
+ CALL libc_error(SB)
+ MOVL (AX), AX
+ NEGL AX // caller expects negative errno value
+noerr:
POPQ BP
RET
@@ -554,6 +566,24 @@
POPQ BP
RET
+TEXT runtime·pthread_self_trampoline(SB),NOSPLIT,$0
+ PUSHQ BP
+ MOVQ SP, BP
+ MOVQ DI, BX // BX is caller-save
+ CALL libc_pthread_self(SB)
+ MOVQ AX, 0(BX) // return value
+ POPQ BP
+ RET
+
+TEXT runtime·pthread_kill_trampoline(SB),NOSPLIT,$0
+ PUSHQ BP
+ MOVQ SP, BP
+ MOVQ 8(DI), SI // arg 2 sig
+ MOVQ 0(DI), DI // arg 1 thread
+ CALL libc_pthread_kill(SB)
+ POPQ BP
+ RET
+
// syscall calls a function in libc on behalf of the syscall package.
// syscall takes a pointer to a struct like:
// struct {
@@ -795,3 +825,29 @@
MOVQ BP, SP
POPQ BP
RET
+
+// syscallNoErr is like syscall6 but does not check for errors, and
+// only returns one value, for use with standard C ABI library functions.
+TEXT runtime·syscallNoErr(SB),NOSPLIT,$0
+ PUSHQ BP
+ MOVQ SP, BP
+ SUBQ $16, SP
+ MOVQ (0*8)(DI), R11// fn
+ MOVQ (2*8)(DI), SI // a2
+ MOVQ (3*8)(DI), DX // a3
+ MOVQ (4*8)(DI), CX // a4
+ MOVQ (5*8)(DI), R8 // a5
+ MOVQ (6*8)(DI), R9 // a6
+ MOVQ DI, (SP)
+ MOVQ (1*8)(DI), DI // a1
+ XORL AX, AX // vararg: say "no float args"
+
+ CALL R11
+
+ MOVQ (SP), DI
+ MOVQ AX, (7*8)(DI) // r1
+
+ XORL AX, AX // no error (it's ignored anyway)
+ MOVQ BP, SP
+ POPQ BP
+ RET
diff --git a/src/runtime/sys_darwin_arm.s b/src/runtime/sys_darwin_arm.s
deleted file mode 100644
index 996f802..0000000
--- a/src/runtime/sys_darwin_arm.s
+++ /dev/null
@@ -1,589 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// System calls and other sys.stuff for ARM, Darwin
-// System calls are implemented in libSystem, this file contains
-// trampolines that convert from Go to C calling convention.
-
-#include "go_asm.h"
-#include "go_tls.h"
-#include "textflag.h"
-
-TEXT notok<>(SB),NOSPLIT,$0
- MOVW $0, R8
- MOVW R8, (R8)
- B 0(PC)
-
-TEXT runtime·open_trampoline(SB),NOSPLIT,$0
- MOVW 4(R0), R1 // arg 2 mode
- MOVW 8(R0), R2 // arg 3 perm
- MOVW 0(R0), R0 // arg 1 name
- BL libc_open(SB)
- RET
-
-TEXT runtime·close_trampoline(SB),NOSPLIT,$0
- MOVW 0(R0), R0 // arg 1 fd
- BL libc_close(SB)
- RET
-
-TEXT runtime·write_trampoline(SB),NOSPLIT,$0
- MOVW 4(R0), R1 // arg 2 buf
- MOVW 8(R0), R2 // arg 3 count
- MOVW 0(R0), R0 // arg 1 fd
- BL libc_write(SB)
- RET
-
-TEXT runtime·read_trampoline(SB),NOSPLIT,$0
- MOVW 4(R0), R1 // arg 2 buf
- MOVW 8(R0), R2 // arg 3 count
- MOVW 0(R0), R0 // arg 1 fd
- BL libc_read(SB)
- RET
-
-TEXT runtime·pipe_trampoline(SB),NOSPLIT,$0
- BL libc_pipe(SB) // pointer already in R0
- CMP $0, R0
- BEQ 3(PC)
- BL libc_error(SB) // return negative errno value
- RSB $0, R0, R0
- RET
-
-TEXT runtime·exit_trampoline(SB),NOSPLIT|NOFRAME,$0
- MOVW 0(R0), R0 // arg 0 code
- BL libc_exit(SB)
- MOVW $1234, R0
- MOVW $1002, R1
- MOVW R0, (R1) // fail hard
-
-TEXT runtime·raiseproc_trampoline(SB),NOSPLIT,$0
- MOVW 0(R0), R8 // signal
- BL libc_getpid(SB)
- // arg 1 pid already in R0 from getpid
- MOVW R8, R1 // arg 2 signal
- BL libc_kill(SB)
- RET
-
-TEXT runtime·mmap_trampoline(SB),NOSPLIT,$0
- MOVW R0, R8
- MOVW 0(R8), R0 // arg 1 addr
- MOVW 4(R8), R1 // arg 2 len
- MOVW 8(R8), R2 // arg 3 prot
- MOVW 12(R8), R3 // arg 4 flags
- MOVW 16(R8), R4 // arg 5 fid
- MOVW 20(R8), R5 // arg 6 offset
- MOVW $0, R6 // off_t is uint64_t
- // Only R0-R3 are used for arguments, the rest
- // go on the stack.
- MOVM.DB.W [R4-R6], (R13)
- BL libc_mmap(SB)
- ADD $12, R13
- MOVW $0, R1
- MOVW $-1, R2
- CMP R0, R2
- BNE ok
- BL libc_error(SB)
- MOVW (R0), R1
- MOVW $0, R0
-ok:
- MOVW R0, 24(R8) // ret 1 addr
- MOVW R1, 28(R8) // ret 2 err
- RET
-
-TEXT runtime·munmap_trampoline(SB),NOSPLIT,$0
- MOVW 4(R0), R1 // arg 2 len
- MOVW 0(R0), R0 // arg 1 addr
- BL libc_munmap(SB)
- MOVW $-1, R2
- CMP R0, R2
- BL.EQ notok<>(SB)
- RET
-
-TEXT runtime·madvise_trampoline(SB),NOSPLIT,$0
- MOVW 4(R0), R1 // arg 2 len
- MOVW 8(R0), R2 // arg 3 advice
- MOVW 0(R0), R0 // arg 1 addr
- BL libc_madvise(SB)
- MOVW $-1, R2
- CMP R0, R2
- BL.EQ notok<>(SB)
- RET
-
-TEXT runtime·setitimer_trampoline(SB),NOSPLIT,$0
- MOVW 4(R0), R1 // arg 2 new
- MOVW 8(R0), R2 // arg 3 old
- MOVW 0(R0), R0 // arg 1 which
- BL libc_setitimer(SB)
- RET
-
-TEXT runtime·walltime_trampoline(SB),NOSPLIT,$0
- // R0 already has *timeval
- MOVW $0, R1 // no timezone needed
- BL libc_gettimeofday(SB)
- RET
-
-GLOBL timebase<>(SB),NOPTR,$(machTimebaseInfo__size)
-
-TEXT runtime·nanotime_trampoline(SB),NOSPLIT,$0
- MOVW R0, R8
- BL libc_mach_absolute_time(SB)
- MOVW R0, 0(R8)
- MOVW R1, 4(R8)
- MOVW timebase<>+machTimebaseInfo_numer(SB), R6
- MOVW $timebase<>+machTimebaseInfo_denom(SB), R5
- MOVW (R5), R7
- DMB MB_ISH // memory barrier for atomic read
- CMP $0, R7
- BNE initialized
-
- SUB $(machTimebaseInfo__size+7)/8*8, R13
- MOVW R13, R0
- BL libc_mach_timebase_info(SB)
- MOVW machTimebaseInfo_numer(R13), R6
- MOVW machTimebaseInfo_denom(R13), R7
- ADD $(machTimebaseInfo__size+7)/8*8, R13
-
- MOVW R6, timebase<>+machTimebaseInfo_numer(SB)
- MOVW $timebase<>+machTimebaseInfo_denom(SB), R5
- DMB MB_ISH // memory barrier for atomic write
- MOVW R7, (R5)
- DMB MB_ISH
-
-initialized:
- MOVW R6, 8(R8)
- MOVW R7, 12(R8)
- RET
-
-TEXT runtime·sigfwd(SB),NOSPLIT,$0-16
- MOVW sig+4(FP), R0
- MOVW info+8(FP), R1
- MOVW ctx+12(FP), R2
- MOVW fn+0(FP), R11
- MOVW R13, R4
- SUB $24, R13
- BIC $0x7, R13 // alignment for ELF ABI
- BL (R11)
- MOVW R4, R13
- RET
-
-TEXT runtime·sigtramp(SB),NOSPLIT,$0
- // Reserve space for callee-save registers and arguments.
- SUB $40, R13
-
- MOVW R4, 16(R13)
- MOVW R5, 20(R13)
- MOVW R6, 24(R13)
- MOVW R7, 28(R13)
- MOVW R8, 32(R13)
- MOVW R11, 36(R13)
-
- // Save arguments.
- MOVW R0, 4(R13) // sig
- MOVW R1, 8(R13) // info
- MOVW R2, 12(R13) // ctx
-
- // this might be called in external code context,
- // where g is not set.
- MOVB runtime·iscgo(SB), R0
- CMP $0, R0
- BL.NE runtime·load_g(SB)
-
- MOVW R13, R6
- CMP $0, g
- BEQ nog
-
- // iOS always use the main stack to run the signal handler.
- // We need to switch to gsignal ourselves.
- MOVW g_m(g), R11
- MOVW m_gsignal(R11), R5
- MOVW (g_stack+stack_hi)(R5), R6
-
-nog:
- // Restore arguments.
- MOVW 4(R13), R0
- MOVW 8(R13), R1
- MOVW 12(R13), R2
-
- // Reserve space for args and the stack pointer on the
- // gsignal stack.
- SUB $24, R6
- // Save stack pointer.
- MOVW R13, R4
- MOVW R4, 16(R6)
- // Switch to gsignal stack.
- MOVW R6, R13
-
- // Call sigtrampgo
- MOVW R0, 4(R13)
- MOVW R1, 8(R13)
- MOVW R2, 12(R13)
- BL runtime·sigtrampgo(SB)
-
- // Switch to old stack.
- MOVW 16(R13), R5
- MOVW R5, R13
-
- // Restore callee-save registers.
- MOVW 16(R13), R4
- MOVW 20(R13), R5
- MOVW 24(R13), R6
- MOVW 28(R13), R7
- MOVW 32(R13), R8
- MOVW 36(R13), R11
-
- ADD $40, R13
-
- RET
-
-TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0
- JMP runtime·sigtramp(SB)
-
-TEXT runtime·sigprocmask_trampoline(SB),NOSPLIT,$0
- MOVW 4(R0), R1 // arg 2 new
- MOVW 8(R0), R2 // arg 3 old
- MOVW 0(R0), R0 // arg 1 how
- BL libc_pthread_sigmask(SB)
- CMP $0, R0
- BL.NE notok<>(SB)
- RET
-
-TEXT runtime·sigaction_trampoline(SB),NOSPLIT,$0
- MOVW 4(R0), R1 // arg 2 new
- MOVW 8(R0), R2 // arg 3 old
- MOVW 0(R0), R0 // arg 1 how
- BL libc_sigaction(SB)
- RET
-
-TEXT runtime·usleep_trampoline(SB),NOSPLIT,$0
- MOVW 0(R0), R0 // arg 1 usec
- BL libc_usleep(SB)
- RET
-
-TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
- B runtime·armPublicationBarrier(SB)
-
-TEXT runtime·sysctl_trampoline(SB),NOSPLIT,$0
- MOVW 4(R0), R1 // arg 2 miblen
- MOVW 8(R0), R2 // arg 3 out
- MOVW 12(R0), R3 // arg 4 size
- MOVW 16(R0), R4 // arg 5 dst
- MOVW 20(R0), R5 // arg 6 ndst
- MOVW 0(R0), R0 // arg 1 mib
- // Only R0-R3 are used for arguments, the rest
- // go on the stack.
- MOVM.DB.W [R4-R5], (R13)
- BL libc_sysctl(SB)
- ADD $(2*4), R13
- RET
-
-TEXT runtime·kqueue_trampoline(SB),NOSPLIT,$0
- BL libc_kqueue(SB)
- RET
-
-// int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int events, Timespec *timeout)
-TEXT runtime·kevent_trampoline(SB),NOSPLIT,$0
- MOVW 4(R0), R1 // arg 2 keventss
- MOVW 8(R0), R2 // arg 3 nch
- MOVW 12(R0), R3 // arg 4 ev
- MOVW 16(R0), R4 // arg 5 nev
- MOVW 20(R0), R5 // arg 6 ts
- MOVW 0(R0), R0 // arg 1 kq
- // Only R0-R3 are used for arguments, the rest
- // go on the stack.
- MOVM.DB.W [R4-R5], (R13)
- BL libc_kevent(SB)
- ADD $(2*4), R13
- MOVW $-1, R2
- CMP R0, R2
- BNE ok
- BL libc_error(SB)
- MOVW (R0), R0 // errno
- RSB $0, R0, R0 // caller wants it as a negative error code
-ok:
- RET
-
-TEXT runtime·fcntl_trampoline(SB),NOSPLIT,$0
- MOVW 4(R0), R1 // arg 2 cmd
- MOVW 8(R0), R2 // arg 3 arg
- MOVW 0(R0), R0 // arg 1 fd
- BL libc_fcntl(SB)
- RET
-
-// sigaltstack is not supported on iOS, so our sigtramp has
-// to do the stack switch ourselves.
-TEXT runtime·sigaltstack_trampoline(SB),NOSPLIT,$0
- MOVW $43, R0
- BL libc_exit(SB)
- RET
-
-// Thread related functions
-// Note: On darwin/arm, the runtime always use runtime/cgo to
-// create threads, so all thread related functions will just exit with a
-// unique status.
-
-TEXT runtime·mstart_stub(SB),NOSPLIT,$0
- MOVW $44, R0
- BL libc_exit(SB)
- RET
-
-TEXT runtime·pthread_attr_init_trampoline(SB),NOSPLIT,$0
- MOVW $45, R0
- BL libc_exit(SB)
- RET
-
-TEXT runtime·pthread_attr_getstacksize_trampoline(SB),NOSPLIT,$0
- MOVW $46, R0
- BL libc_exit(SB)
- RET
-
-TEXT runtime·pthread_attr_setdetachstate_trampoline(SB),NOSPLIT,$0
- MOVW $47, R0
- BL libc_exit(SB)
- RET
-
-TEXT runtime·pthread_create_trampoline(SB),NOSPLIT,$0
- MOVW $48, R0
- BL libc_exit(SB)
- RET
-
-TEXT runtime·raise_trampoline(SB),NOSPLIT,$0
- MOVW 0(R0), R0 // arg 1 sig
- BL libc_raise(SB)
- RET
-
-TEXT runtime·pthread_mutex_init_trampoline(SB),NOSPLIT,$0
- MOVW 4(R0), R1 // arg 2 attr
- MOVW 0(R0), R0 // arg 1 mutex
- BL libc_pthread_mutex_init(SB)
- RET
-
-TEXT runtime·pthread_mutex_lock_trampoline(SB),NOSPLIT,$0
- MOVW 0(R0), R0 // arg 1 mutex
- BL libc_pthread_mutex_lock(SB)
- RET
-
-TEXT runtime·pthread_mutex_unlock_trampoline(SB),NOSPLIT,$0
- MOVW 0(R0), R0 // arg 1 mutex
- BL libc_pthread_mutex_unlock(SB)
- RET
-
-TEXT runtime·pthread_cond_init_trampoline(SB),NOSPLIT,$0
- MOVW 4(R0), R1 // arg 2 attr
- MOVW 0(R0), R0 // arg 1 cond
- BL libc_pthread_cond_init(SB)
- RET
-
-TEXT runtime·pthread_cond_wait_trampoline(SB),NOSPLIT,$0
- MOVW 4(R0), R1 // arg 2 mutex
- MOVW 0(R0), R0 // arg 1 cond
- BL libc_pthread_cond_wait(SB)
- RET
-
-TEXT runtime·pthread_cond_timedwait_relative_np_trampoline(SB),NOSPLIT,$0
- MOVW 4(R0), R1 // arg 2 mutex
- MOVW 8(R0), R2 // arg 3 timeout
- MOVW 0(R0), R0 // arg 1 cond
- BL libc_pthread_cond_timedwait_relative_np(SB)
- RET
-
-TEXT runtime·pthread_cond_signal_trampoline(SB),NOSPLIT,$0
- MOVW 0(R0), R0 // arg 1 cond
- BL libc_pthread_cond_signal(SB)
- RET
-
-// syscall calls a function in libc on behalf of the syscall package.
-// syscall takes a pointer to a struct like:
-// struct {
-// fn uintptr
-// a1 uintptr
-// a2 uintptr
-// a3 uintptr
-// r1 uintptr
-// r2 uintptr
-// err uintptr
-// }
-// syscall must be called on the g0 stack with the
-// C calling convention (use libcCall).
-TEXT runtime·syscall(SB),NOSPLIT,$0
- MOVW.W R0, -4(R13) // push structure pointer
- MOVW 0(R0), R12 // fn
- MOVW 8(R0), R1 // a2
- MOVW 12(R0), R2 // a3
- MOVW 4(R0), R0 // a1
- BL (R12)
- MOVW.P 4(R13), R2 // pop structure pointer
- MOVW R0, 16(R2) // save r1
- MOVW R1, 20(R2) // save r2
- MOVW $-1, R3
- CMP R0, R3
- BNE ok
- MOVW.W R2, -4(R13) // push structure pointer
- BL libc_error(SB)
- MOVW (R0), R0
- MOVW.P 4(R13), R2 // pop structure pointer
- MOVW R0, 24(R2) // save err
-ok:
- RET
-
-// syscallPtr is like syscall except the libc function reports an
-// error by returning NULL and setting errno.
-TEXT runtime·syscallPtr(SB),NOSPLIT,$0
- MOVW.W R0, -4(R13) // push structure pointer
- MOVW 0(R0), R12 // fn
- MOVW 8(R0), R1 // a2
- MOVW 12(R0), R2 // a3
- MOVW 4(R0), R0 // a1
- BL (R12)
- MOVW.P 4(R13), R2 // pop structure pointer
- MOVW R0, 16(R2) // save r1
- MOVW R1, 20(R2) // save r2
- MOVW $0, R3
- CMP R0, R3
- BNE ok
- MOVW.W R2, -4(R13) // push structure pointer
- BL libc_error(SB)
- MOVW (R0), R0
- MOVW.P 4(R13), R2 // pop structure pointer
- MOVW R0, 24(R2) // save err
-ok:
- RET
-
-// syscall6 calls a function in libc on behalf of the syscall package.
-// syscall6 takes a pointer to a struct like:
-// struct {
-// fn uintptr
-// a1 uintptr
-// a2 uintptr
-// a3 uintptr
-// a4 uintptr
-// a5 uintptr
-// a6 uintptr
-// r1 uintptr
-// r2 uintptr
-// err uintptr
-// }
-// syscall6 must be called on the g0 stack with the
-// C calling convention (use libcCall).
-TEXT runtime·syscall6(SB),NOSPLIT,$0
- MOVW.W R0, -4(R13) // push structure pointer
- MOVW 0(R0), R12 // fn
- MOVW 24(R0), R1 // a6
- MOVW.W R1, -4(R13)
- MOVW 20(R0), R1 // a5
- MOVW.W R1, -4(R13)
- MOVW 8(R0), R1 // a2
- MOVW 12(R0), R2 // a3
- MOVW 16(R0), R3 // a4
- MOVW 4(R0), R0 // a1
- BL (R12)
- ADD $8, R13
- MOVW.P 4(R13), R2 // pop structure pointer
- MOVW R0, 28(R2) // save r1
- MOVW R1, 32(R2) // save r2
- MOVW $-1, R3
- CMP R0, R3
- BNE ok
- MOVW.W R2, -4(R13) // push structure pointer
- BL libc_error(SB)
- MOVW (R0), R0
- MOVW.P 4(R13), R2 // pop structure pointer
- MOVW R0, 36(R2) // save err
-ok:
- RET
-
-// syscall6X calls a function in libc on behalf of the syscall package.
-// syscall6X takes a pointer to a struct like:
-// struct {
-// fn uintptr
-// a1 uintptr
-// a2 uintptr
-// a3 uintptr
-// a4 uintptr
-// a5 uintptr
-// a6 uintptr
-// r1 uintptr
-// r2 uintptr
-// err uintptr
-// }
-// syscall6X must be called on the g0 stack with the
-// C calling convention (use libcCall).
-TEXT runtime·syscall6X(SB),NOSPLIT,$0
- MOVW.W R0, -4(R13) // push structure pointer
- MOVW 0(R0), R12 // fn
- MOVW 24(R0), R1 // a6
- MOVW.W R1, -4(R13)
- MOVW 20(R0), R1 // a5
- MOVW.W R1, -4(R13)
- MOVW 8(R0), R1 // a2
- MOVW 12(R0), R2 // a3
- MOVW 16(R0), R3 // a4
- MOVW 4(R0), R0 // a1
- BL (R12)
- ADD $8, R13
- MOVW.P 4(R13), R2 // pop structure pointer
- MOVW R0, 28(R2) // save r1
- MOVW R1, 32(R2) // save r2
- MOVW $-1, R3
- CMP R0, R3
- BNE ok
- CMP R1, R3
- BNE ok
- MOVW.W R2, -4(R13) // push structure pointer
- BL libc_error(SB)
- MOVW (R0), R0
- MOVW.P 4(R13), R2 // pop structure pointer
- MOVW R0, 36(R2) // save err
-ok:
- RET
-
-// syscall9 calls a function in libc on behalf of the syscall package.
-// syscall9 takes a pointer to a struct like:
-// struct {
-// fn uintptr
-// a1 uintptr
-// a2 uintptr
-// a3 uintptr
-// a4 uintptr
-// a5 uintptr
-// a6 uintptr
-// a7 uintptr
-// a8 uintptr
-// a9 uintptr
-// r1 uintptr
-// r2 uintptr
-// err uintptr
-// }
-// syscall9 must be called on the g0 stack with the
-// C calling convention (use libcCall).
-TEXT runtime·syscall9(SB),NOSPLIT,$0
- MOVW.W R0, -4(R13) // push structure pointer
- MOVW 0(R0), R12 // fn
- MOVW 36(R0), R1 // a9
- MOVW.W R1, -4(R13)
- MOVW 32(R0), R1 // a8
- MOVW.W R1, -4(R13)
- MOVW 28(R0), R1 // a7
- MOVW.W R1, -4(R13)
- MOVW 24(R0), R1 // a6
- MOVW.W R1, -4(R13)
- MOVW 20(R0), R1 // a5
- MOVW.W R1, -4(R13)
- MOVW 8(R0), R1 // a2
- MOVW 12(R0), R2 // a3
- MOVW 16(R0), R3 // a4
- MOVW 4(R0), R0 // a1
- BL (R12)
- ADD $20, R13
- MOVW.P 4(R13), R2 // pop structure pointer
- MOVW R0, 40(R2) // save r1
- MOVW R1, 44(R2) // save r2
- MOVW $-1, R3
- CMP R0, R3
- BNE ok
- MOVW.W R2, -4(R13) // push structure pointer
- BL libc_error(SB)
- MOVW (R0), R0
- MOVW.P 4(R13), R2 // pop structure pointer
- MOVW R0, 48(R2) // save err
-ok:
- RET
diff --git a/src/runtime/sys_darwin_arm64.s b/src/runtime/sys_darwin_arm64.s
index ac3ca74..585d4f2 100644
--- a/src/runtime/sys_darwin_arm64.s
+++ b/src/runtime/sys_darwin_arm64.s
@@ -35,6 +35,13 @@
MOVW 16(R0), R2 // arg 3 count
MOVW 0(R0), R0 // arg 1 fd
BL libc_write(SB)
+ MOVD $-1, R1
+ CMP R0, R1
+ BNE noerr
+ BL libc_error(SB)
+ MOVW (R0), R0
+ NEG R0, R0 // caller expects negative errno value
+noerr:
RET
TEXT runtime·read_trampoline(SB),NOSPLIT,$0
@@ -42,6 +49,13 @@
MOVW 16(R0), R2 // arg 3 count
MOVW 0(R0), R0 // arg 1 fd
BL libc_read(SB)
+ MOVD $-1, R1
+ CMP R0, R1
+ BNE noerr
+ BL libc_error(SB)
+ MOVW (R0), R0
+ NEG R0, R0 // caller expects negative errno value
+noerr:
RET
TEXT runtime·pipe_trampoline(SB),NOSPLIT,$0
@@ -457,6 +471,18 @@
BL libc_pthread_cond_signal(SB)
RET
+TEXT runtime·pthread_self_trampoline(SB),NOSPLIT,$0
+ MOVD R0, R19 // R19 is callee-save
+ BL libc_pthread_self(SB)
+ MOVD R0, 0(R19) // return value
+ RET
+
+TEXT runtime·pthread_kill_trampoline(SB),NOSPLIT,$0
+ MOVD 8(R0), R1 // arg 2 sig
+ MOVD 0(R0), R0 // arg 1 thread
+ BL libc_pthread_kill(SB)
+ RET
+
// syscall calls a function in libc on behalf of the syscall package.
// syscall takes a pointer to a struct like:
// struct {
diff --git a/src/runtime/sys_dragonfly_amd64.s b/src/runtime/sys_dragonfly_amd64.s
index b771850..580633a 100644
--- a/src/runtime/sys_dragonfly_amd64.s
+++ b/src/runtime/sys_dragonfly_amd64.s
@@ -104,27 +104,46 @@
MOVL $3, AX
SYSCALL
JCC 2(PC)
- MOVL $-1, AX
+ NEGL AX // caller expects negative errno
MOVL AX, ret+24(FP)
RET
-TEXT runtime·write(SB),NOSPLIT,$-8
+// func pipe() (r, w int32, errno int32)
+TEXT runtime·pipe(SB),NOSPLIT,$0-12
+ MOVL $42, AX
+ SYSCALL
+ JCC pipeok
+ MOVL $-1,r+0(FP)
+ MOVL $-1,w+4(FP)
+ MOVL AX, errno+8(FP)
+ RET
+pipeok:
+ MOVL AX, r+0(FP)
+ MOVL DX, w+4(FP)
+ MOVL $0, errno+8(FP)
+ RET
+
+TEXT runtime·write1(SB),NOSPLIT,$-8
MOVQ fd+0(FP), DI // arg 1 fd
MOVQ p+8(FP), SI // arg 2 buf
MOVL n+16(FP), DX // arg 3 count
MOVL $4, AX
SYSCALL
JCC 2(PC)
- MOVL $-1, AX
+ NEGL AX // caller expects negative errno
MOVL AX, ret+24(FP)
RET
-TEXT runtime·raise(SB),NOSPLIT,$16
+TEXT runtime·lwp_gettid(SB),NOSPLIT,$0-4
MOVL $496, AX // lwp_gettid
SYSCALL
- MOVQ $-1, DI // arg 1 - pid
- MOVQ AX, SI // arg 2 - tid
- MOVL sig+0(FP), DX // arg 3 - signum
+ MOVL AX, ret+0(FP)
+ RET
+
+TEXT runtime·lwp_kill(SB),NOSPLIT,$0-16
+ MOVL pid+0(FP), DI // arg 1 - pid
+ MOVL tid+4(FP), SI // arg 2 - tid
+ MOVQ sig+8(FP), DX // arg 3 - signum
MOVL $497, AX // lwp_kill
SYSCALL
RET
@@ -146,8 +165,8 @@
SYSCALL
RET
-// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB), NOSPLIT, $32
+// func walltime1() (sec int64, nsec int32)
+TEXT runtime·walltime1(SB), NOSPLIT, $32
MOVL $232, AX // clock_gettime
MOVQ $0, DI // CLOCK_REALTIME
LEAQ 8(SP), SI
@@ -160,7 +179,7 @@
MOVL DX, nsec+8(FP)
RET
-TEXT runtime·nanotime(SB), NOSPLIT, $32
+TEXT runtime·nanotime1(SB), NOSPLIT, $32
MOVL $232, AX
MOVQ $4, DI // CLOCK_MONOTONIC
LEAQ 8(SP), SI
@@ -371,3 +390,18 @@
MOVL $92, AX // fcntl
SYSCALL
RET
+
+// func runtime·setNonblock(int32 fd)
+TEXT runtime·setNonblock(SB),NOSPLIT,$0-4
+ MOVL fd+0(FP), DI // fd
+ MOVQ $3, SI // F_GETFL
+ MOVQ $0, DX
+ MOVL $92, AX // fcntl
+ SYSCALL
+ MOVL fd+0(FP), DI // fd
+ MOVQ $4, SI // F_SETFL
+ MOVQ $4, DX // O_NONBLOCK
+ ORL AX, DX
+ MOVL $92, AX // fcntl
+ SYSCALL
+ RET
diff --git a/src/runtime/sys_freebsd_386.s b/src/runtime/sys_freebsd_386.s
index 35f357a..c346e71 100644
--- a/src/runtime/sys_freebsd_386.s
+++ b/src/runtime/sys_freebsd_386.s
@@ -93,29 +93,54 @@
MOVL $3, AX
INT $0x80
JAE 2(PC)
- MOVL $-1, AX
+ NEGL AX // caller expects negative errno
MOVL AX, ret+12(FP)
RET
-TEXT runtime·write(SB),NOSPLIT,$-4
+// func pipe() (r, w int32, errno int32)
+TEXT runtime·pipe(SB),NOSPLIT,$8-12
+ MOVL $42, AX
+ INT $0x80
+ JAE ok
+ MOVL $0, r+0(FP)
+ MOVL $0, w+4(FP)
+ MOVL AX, errno+8(FP)
+ RET
+ok:
+ MOVL AX, r+0(FP)
+ MOVL DX, w+4(FP)
+ MOVL $0, errno+8(FP)
+ RET
+
+// func pipe2(flags int32) (r, w int32, errno int32)
+TEXT runtime·pipe2(SB),NOSPLIT,$12-16
+ MOVL $542, AX
+ LEAL r+4(FP), BX
+ MOVL BX, 4(SP)
+ MOVL flags+0(FP), BX
+ MOVL BX, 8(SP)
+ INT $0x80
+ MOVL AX, errno+12(FP)
+ RET
+
+TEXT runtime·write1(SB),NOSPLIT,$-4
MOVL $4, AX
INT $0x80
JAE 2(PC)
- MOVL $-1, AX
+ NEGL AX // caller expects negative errno
MOVL AX, ret+12(FP)
RET
-TEXT runtime·raise(SB),NOSPLIT,$16
- // thr_self(&8(SP))
- LEAL 8(SP), AX
+TEXT runtime·thr_self(SB),NOSPLIT,$8-4
+ // thr_self(&0(FP))
+ LEAL ret+0(FP), AX
MOVL AX, 4(SP)
MOVL $432, AX
INT $0x80
- // thr_kill(self, SIGPIPE)
- MOVL 8(SP), AX
- MOVL AX, 4(SP)
- MOVL sig+0(FP), AX
- MOVL AX, 8(SP)
+ RET
+
+TEXT runtime·thr_kill(SB),NOSPLIT,$-4
+ // thr_kill(tid, sig)
MOVL $433, AX
INT $0x80
RET
@@ -412,6 +437,23 @@
NEGL AX
RET
+// func runtime·setNonblock(fd int32)
+TEXT runtime·setNonblock(SB),NOSPLIT,$16-4
+ MOVL $92, AX // fcntl
+ MOVL fd+0(FP), BX // fd
+ MOVL BX, 4(SP)
+ MOVL $3, 8(SP) // F_GETFL
+ MOVL $0, 12(SP)
+ INT $0x80
+ MOVL fd+0(FP), BX // fd
+ MOVL BX, 4(SP)
+ MOVL $4, 8(SP) // F_SETFL
+ ORL $4, AX // O_NONBLOCK
+ MOVL AX, 12(SP)
+ MOVL $92, AX // fcntl
+ INT $0x80
+ RET
+
// func cpuset_getaffinity(level int, which int, id int64, size int, mask *byte) int32
TEXT runtime·cpuset_getaffinity(SB), NOSPLIT, $0-28
MOVL $487, AX
diff --git a/src/runtime/sys_freebsd_amd64.s b/src/runtime/sys_freebsd_amd64.s
index 55959b3..010b2ec 100644
--- a/src/runtime/sys_freebsd_amd64.s
+++ b/src/runtime/sys_freebsd_amd64.s
@@ -93,29 +93,56 @@
MOVL $3, AX
SYSCALL
JCC 2(PC)
- MOVL $-1, AX
+ NEGQ AX // caller expects negative errno
MOVL AX, ret+24(FP)
RET
-TEXT runtime·write(SB),NOSPLIT,$-8
+// func pipe() (r, w int32, errno int32)
+TEXT runtime·pipe(SB),NOSPLIT,$0-12
+ MOVL $42, AX
+ SYSCALL
+ JCC ok
+ MOVL $0, r+0(FP)
+ MOVL $0, w+4(FP)
+ MOVL AX, errno+8(FP)
+ RET
+ok:
+ MOVL AX, r+0(FP)
+ MOVL DX, w+4(FP)
+ MOVL $0, errno+8(FP)
+ RET
+
+// func pipe2(flags int32) (r, w int32, errno int32)
+TEXT runtime·pipe2(SB),NOSPLIT,$0-20
+ LEAQ r+8(FP), DI
+ MOVL flags+0(FP), SI
+ MOVL $542, AX
+ SYSCALL
+ MOVL AX, errno+16(FP)
+ RET
+
+TEXT runtime·write1(SB),NOSPLIT,$-8
MOVQ fd+0(FP), DI // arg 1 fd
MOVQ p+8(FP), SI // arg 2 buf
MOVL n+16(FP), DX // arg 3 count
MOVL $4, AX
SYSCALL
JCC 2(PC)
- MOVL $-1, AX
+ NEGQ AX // caller expects negative errno
MOVL AX, ret+24(FP)
RET
-TEXT runtime·raise(SB),NOSPLIT,$16
- // thr_self(&8(SP))
- LEAQ 8(SP), DI // arg 1 &8(SP)
+TEXT runtime·thr_self(SB),NOSPLIT,$0-8
+ // thr_self(&0(FP))
+ LEAQ ret+0(FP), DI // arg 1
MOVL $432, AX
SYSCALL
- // thr_kill(self, SIGPIPE)
- MOVQ 8(SP), DI // arg 1 id
- MOVL sig+0(FP), SI // arg 2
+ RET
+
+TEXT runtime·thr_kill(SB),NOSPLIT,$0-16
+ // thr_kill(tid, sig)
+ MOVQ tid+0(FP), DI // arg 1 id
+ MOVQ sig+8(FP), SI // arg 2 sig
MOVL $433, AX
SYSCALL
RET
@@ -447,6 +474,21 @@
SYSCALL
RET
+// func runtime·setNonblock(int32 fd)
+TEXT runtime·setNonblock(SB),NOSPLIT,$0-4
+ MOVL fd+0(FP), DI // fd
+ MOVQ $3, SI // F_GETFL
+ MOVQ $0, DX
+ MOVL $92, AX // fcntl
+ SYSCALL
+ MOVL fd+0(FP), DI // fd
+ MOVQ $4, SI // F_SETFL
+ MOVQ $4, DX // O_NONBLOCK
+ ORL AX, DX
+ MOVL $92, AX // fcntl
+ SYSCALL
+ RET
+
// func cpuset_getaffinity(level int, which int, id int64, size int, mask *byte) int32
TEXT runtime·cpuset_getaffinity(SB), NOSPLIT, $0-44
MOVQ level+0(FP), DI
diff --git a/src/runtime/sys_freebsd_arm.s b/src/runtime/sys_freebsd_arm.s
index f347b9f..1e12f9c 100644
--- a/src/runtime/sys_freebsd_arm.s
+++ b/src/runtime/sys_freebsd_arm.s
@@ -20,6 +20,7 @@
#define SYS_close (SYS_BASE + 6)
#define SYS_getpid (SYS_BASE + 20)
#define SYS_kill (SYS_BASE + 37)
+#define SYS_pipe (SYS_BASE + 42)
#define SYS_sigaltstack (SYS_BASE + 53)
#define SYS_munmap (SYS_BASE + 73)
#define SYS_madvise (SYS_BASE + 75)
@@ -40,6 +41,7 @@
#define SYS_thr_new (SYS_BASE + 455)
#define SYS_mmap (SYS_BASE + 477)
#define SYS_cpuset_getaffinity (SYS_BASE + 487)
+#define SYS_pipe2 (SYS_BASE + 542)
TEXT runtime·sys_umtx_op(SB),NOSPLIT,$0
MOVW addr+0(FP), R0
@@ -115,17 +117,43 @@
MOVW n+8(FP), R2 // arg 3 count
MOVW $SYS_read, R7
SWI $0
- MOVW.CS $-1, R0
+ RSB.CS $0, R0 // caller expects negative errno
MOVW R0, ret+12(FP)
RET
-TEXT runtime·write(SB),NOSPLIT|NOFRAME,$0
+// func pipe() (r, w int32, errno int32)
+TEXT runtime·pipe(SB),NOSPLIT,$0-12
+ MOVW $SYS_pipe, R7
+ SWI $0
+ BCC ok
+ MOVW $0, R1
+ MOVW R1, r+0(FP)
+ MOVW R1, w+4(FP)
+ MOVW R0, errno+8(FP)
+ RET
+ok:
+ MOVW R0, r+0(FP)
+ MOVW R1, w+4(FP)
+ MOVW $0, R1
+ MOVW R1, errno+8(FP)
+ RET
+
+// func pipe2(flags int32) (r, w int32, errno int32)
+TEXT runtime·pipe2(SB),NOSPLIT,$0-16
+ MOVW $r+4(FP), R0
+ MOVW flags+0(FP), R1
+ MOVW $SYS_pipe2, R7
+ SWI $0
+ MOVW R0, errno+12(FP)
+ RET
+
+TEXT runtime·write1(SB),NOSPLIT|NOFRAME,$0
MOVW fd+0(FP), R0 // arg 1 fd
MOVW p+4(FP), R1 // arg 2 buf
MOVW n+8(FP), R2 // arg 3 count
MOVW $SYS_write, R7
SWI $0
- MOVW.CS $-1, R0
+ RSB.CS $0, R0 // caller expects negative errno
MOVW R0, ret+12(FP)
RET
@@ -137,14 +165,17 @@
MOVW R0, ret+4(FP)
RET
-TEXT runtime·raise(SB),NOSPLIT,$8
- // thr_self(&4(R13))
- MOVW $4(R13), R0 // arg 1 &4(R13)
+TEXT runtime·thr_self(SB),NOSPLIT,$0-4
+ // thr_self(&0(FP))
+ MOVW $ret+0(FP), R0 // arg 1
MOVW $SYS_thr_self, R7
SWI $0
- // thr_kill(self, SIGPIPE)
- MOVW 4(R13), R0 // arg 1 id
- MOVW sig+0(FP), R1 // arg 2 - signal
+ RET
+
+TEXT runtime·thr_kill(SB),NOSPLIT,$0-8
+ // thr_kill(tid, sig)
+ MOVW tid+0(FP), R0 // arg 1 id
+ MOVW sig+4(FP), R1 // arg 2 signal
MOVW $SYS_thr_kill, R7
SWI $0
RET
@@ -215,7 +246,11 @@
MOVW R0, ret+12(FP)
RET
-TEXT runtime·sigtramp(SB),NOSPLIT,$12
+TEXT runtime·sigtramp(SB),NOSPLIT,$0
+ // Reserve space for callee-save registers and arguments.
+ MOVM.DB.W [R4-R11], (R13)
+ SUB $16, R13
+
// this might be called in external code context,
// where g is not set.
// first save R0, because runtime·load_g will clobber it
@@ -227,6 +262,11 @@
MOVW R1, 8(R13)
MOVW R2, 12(R13)
BL runtime·sigtrampgo(SB)
+
+ // Restore callee-save registers.
+ ADD $16, R13
+ MOVM.IA.W (R13), [R4-R11]
+
RET
TEXT runtime·mmap(SB),NOSPLIT,$16
@@ -371,6 +411,20 @@
SWI $0
RET
+// func runtime·setNonblock(fd int32)
+TEXT runtime·setNonblock(SB),NOSPLIT,$0-4
+ MOVW fd+0(FP), R0 // fd
+ MOVW $3, R1 // F_GETFL
+ MOVW $0, R2
+ MOVW $SYS_fcntl, R7
+ SWI $0
+ ORR $0x4, R0, R2 // O_NONBLOCK
+ MOVW fd+0(FP), R0 // fd
+ MOVW $4, R1 // F_SETFL
+ MOVW $SYS_fcntl, R7
+ SWI $0
+ RET
+
// TODO: this is only valid for ARMv7+
TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
B runtime·armPublicationBarrier(SB)
diff --git a/src/runtime/sys_freebsd_arm64.s b/src/runtime/sys_freebsd_arm64.s
new file mode 100644
index 0000000..2330f2f
--- /dev/null
+++ b/src/runtime/sys_freebsd_arm64.s
@@ -0,0 +1,538 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// System calls and other sys.stuff for arm64, FreeBSD
+// /usr/src/sys/kern/syscalls.master for syscall numbers.
+//
+
+#include "go_asm.h"
+#include "go_tls.h"
+#include "textflag.h"
+
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 4
+#define FD_CLOEXEC 1
+#define F_SETFD 2
+#define F_GETFL 3
+#define F_SETFL 4
+#define O_NONBLOCK 4
+
+#define SYS_exit 1
+#define SYS_read 3
+#define SYS_write 4
+#define SYS_open 5
+#define SYS_close 6
+#define SYS_getpid 20
+#define SYS_kill 37
+#define SYS_sigaltstack 53
+#define SYS_munmap 73
+#define SYS_madvise 75
+#define SYS_setitimer 83
+#define SYS_fcntl 92
+#define SYS___sysctl 202
+#define SYS_nanosleep 240
+#define SYS_clock_gettime 232
+#define SYS_sched_yield 331
+#define SYS_sigprocmask 340
+#define SYS_kqueue 362
+#define SYS_kevent 363
+#define SYS_sigaction 416
+#define SYS_thr_exit 431
+#define SYS_thr_self 432
+#define SYS_thr_kill 433
+#define SYS__umtx_op 454
+#define SYS_thr_new 455
+#define SYS_mmap 477
+#define SYS_cpuset_getaffinity 487
+#define SYS_pipe2 542
+
+TEXT emptyfunc<>(SB),0,$0-0
+ RET
+
+// func sys_umtx_op(addr *uint32, mode int32, val uint32, uaddr1 uintptr, ut *umtx_time) int32
+TEXT runtime·sys_umtx_op(SB),NOSPLIT,$0
+ MOVD addr+0(FP), R0
+ MOVW mode+8(FP), R1
+ MOVW val+12(FP), R2
+ MOVD uaddr1+16(FP), R3
+ MOVD ut+24(FP), R4
+ MOVD $SYS__umtx_op, R8
+ SVC
+ MOVW R0, ret+32(FP)
+ RET
+
+// func thr_new(param *thrparam, size int32) int32
+TEXT runtime·thr_new(SB),NOSPLIT,$0
+ MOVD param+0(FP), R0
+ MOVW size+8(FP), R1
+ MOVD $SYS_thr_new, R8
+ SVC
+ MOVW R0, ret+16(FP)
+ RET
+
+// func thr_start()
+TEXT runtime·thr_start(SB),NOSPLIT,$0
+ // set up g
+ MOVD m_g0(R0), g
+ MOVD R0, g_m(g)
+ BL emptyfunc<>(SB) // fault if stack check is wrong
+ BL runtime·mstart(SB)
+
+ MOVD $2, R8 // crash (not reached)
+ MOVD R8, (R8)
+ RET
+
+// func exit(code int32)
+TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0-4
+ MOVW code+0(FP), R0
+ MOVD $SYS_exit, R8
+ SVC
+ MOVD $0, R0
+ MOVD R0, (R0)
+
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT|NOFRAME,$0-8
+ MOVD wait+0(FP), R0
+ // We're done using the stack.
+ MOVW $0, R1
+ STLRW R1, (R0)
+ MOVW $0, R0
+ MOVD $SYS_thr_exit, R8
+ SVC
+ JMP 0(PC)
+
+// func open(name *byte, mode, perm int32) int32
+TEXT runtime·open(SB),NOSPLIT|NOFRAME,$0-20
+ MOVD name+0(FP), R0
+ MOVW mode+8(FP), R1
+ MOVW perm+12(FP), R2
+ MOVD $SYS_open, R8
+ SVC
+ BCC ok
+ MOVW $-1, R0
+ok:
+ MOVW R0, ret+16(FP)
+ RET
+
+// func closefd(fd int32) int32
+TEXT runtime·closefd(SB),NOSPLIT|NOFRAME,$0-12
+ MOVW fd+0(FP), R0
+ MOVD $SYS_close, R8
+ SVC
+ BCC ok
+ MOVW $-1, R0
+ok:
+ MOVW R0, ret+8(FP)
+ RET
+
+// func pipe() (r, w int32, errno int32)
+TEXT runtime·pipe(SB),NOSPLIT|NOFRAME,$0-12
+ MOVD $r+0(FP), R0
+ MOVW $0, R1
+ MOVD $SYS_pipe2, R8
+ SVC
+ BCC ok
+ NEG R0, R0
+ok:
+ MOVW R0, errno+8(FP)
+ RET
+
+// func pipe2(flags int32) (r, w int32, errno int32)
+TEXT runtime·pipe2(SB),NOSPLIT|NOFRAME,$0-20
+ MOVD $r+8(FP), R0
+ MOVW flags+0(FP), R1
+ MOVD $SYS_pipe2, R8
+ SVC
+ BCC ok
+ NEG R0, R0
+ok:
+ MOVW R0, errno+16(FP)
+ RET
+
+// func write1(fd uintptr, p unsafe.Pointer, n int32) int32
+TEXT runtime·write1(SB),NOSPLIT|NOFRAME,$0-28
+ MOVD fd+0(FP), R0
+ MOVD p+8(FP), R1
+ MOVW n+16(FP), R2
+ MOVD $SYS_write, R8
+ SVC
+ BCC ok
+ NEG R0, R0 // caller expects negative errno
+ok:
+ MOVW R0, ret+24(FP)
+ RET
+
+// func read(fd int32, p unsafe.Pointer, n int32) int32
+TEXT runtime·read(SB),NOSPLIT|NOFRAME,$0-28
+ MOVW fd+0(FP), R0
+ MOVD p+8(FP), R1
+ MOVW n+16(FP), R2
+ MOVD $SYS_read, R8
+ SVC
+ BCC ok
+ NEG R0, R0 // caller expects negative errno
+ok:
+ MOVW R0, ret+24(FP)
+ RET
+
+// func usleep(usec uint32)
+TEXT runtime·usleep(SB),NOSPLIT,$24-4
+ MOVWU usec+0(FP), R3
+ MOVD R3, R5
+ MOVW $1000000, R4
+ UDIV R4, R3
+ MOVD R3, 8(RSP)
+ MUL R3, R4
+ SUB R4, R5
+ MOVW $1000, R4
+ MUL R4, R5
+ MOVD R5, 16(RSP)
+
+ // nanosleep(&ts, 0)
+ ADD $8, RSP, R0
+ MOVD $0, R1
+ MOVD $SYS_nanosleep, R8
+ SVC
+ RET
+
+// func thr_self() thread
+TEXT runtime·thr_self(SB),NOSPLIT,$8-8
+ MOVD $ptr-8(SP), R0 // arg 1 &8(SP)
+ MOVD $SYS_thr_self, R8
+ SVC
+ MOVD ptr-8(SP), R0
+ MOVD R0, ret+0(FP)
+ RET
+
+// func thr_kill(t thread, sig int)
+TEXT runtime·thr_kill(SB),NOSPLIT,$0-16
+ MOVD tid+0(FP), R0 // arg 1 pid
+ MOVD sig+8(FP), R1 // arg 2 sig
+ MOVD $SYS_thr_kill, R8
+ SVC
+ RET
+
+// func raiseproc(sig uint32)
+TEXT runtime·raiseproc(SB),NOSPLIT|NOFRAME,$0
+ MOVD $SYS_getpid, R8
+ SVC
+ MOVW sig+0(FP), R1
+ MOVD $SYS_kill, R8
+ SVC
+ RET
+
+// func setitimer(mode int32, new, old *itimerval)
+TEXT runtime·setitimer(SB),NOSPLIT|NOFRAME,$0-24
+ MOVW mode+0(FP), R0
+ MOVD new+8(FP), R1
+ MOVD old+16(FP), R2
+ MOVD $SYS_setitimer, R8
+ SVC
+ RET
+
+// func fallback_walltime() (sec int64, nsec int32)
+TEXT runtime·fallback_walltime(SB),NOSPLIT,$24-12
+ MOVW $CLOCK_REALTIME, R0
+ MOVD $8(RSP), R1
+ MOVD $SYS_clock_gettime, R8
+ SVC
+ MOVD 8(RSP), R0 // sec
+ MOVW 16(RSP), R1 // nsec
+ MOVD R0, sec+0(FP)
+ MOVW R1, nsec+8(FP)
+ RET
+
+// func fallback_nanotime() int64
+TEXT runtime·fallback_nanotime(SB),NOSPLIT,$24-8
+ MOVD $CLOCK_MONOTONIC, R0
+ MOVD $8(RSP), R1
+ MOVD $SYS_clock_gettime, R8
+ SVC
+ MOVD 8(RSP), R0 // sec
+ MOVW 16(RSP), R2 // nsec
+
+ // sec is in R0, nsec in R2
+ // return nsec in R2
+ MOVD $1000000000, R3
+ MUL R3, R0
+ ADD R2, R0
+
+ MOVD R0, ret+0(FP)
+ RET
+
+// func asmSigaction(sig uintptr, new, old *sigactiont) int32
+TEXT runtime·asmSigaction(SB),NOSPLIT|NOFRAME,$0
+ MOVD sig+0(FP), R0 // arg 1 sig
+ MOVD new+8(FP), R1 // arg 2 act
+ MOVD old+16(FP), R2 // arg 3 oact
+ MOVD $SYS_sigaction, R8
+ SVC
+ BCC ok
+ MOVW $-1, R0
+ok:
+ MOVW R0, ret+24(FP)
+ RET
+
+// func sigfwd(fn uintptr, sig uint32, info *siginfo, ctx unsafe.Pointer)
+TEXT runtime·sigfwd(SB),NOSPLIT,$0-32
+ MOVW sig+8(FP), R0
+ MOVD info+16(FP), R1
+ MOVD ctx+24(FP), R2
+ MOVD fn+0(FP), R11
+ BL (R11)
+ RET
+
+// func sigtramp()
+TEXT runtime·sigtramp(SB),NOSPLIT,$192
+ // Save callee-save registers in the case of signal forwarding.
+ // Please refer to https://golang.org/issue/31827 .
+ MOVD R19, 8*4(RSP)
+ MOVD R20, 8*5(RSP)
+ MOVD R21, 8*6(RSP)
+ MOVD R22, 8*7(RSP)
+ MOVD R23, 8*8(RSP)
+ MOVD R24, 8*9(RSP)
+ MOVD R25, 8*10(RSP)
+ MOVD R26, 8*11(RSP)
+ MOVD R27, 8*12(RSP)
+ MOVD g, 8*13(RSP)
+ MOVD R29, 8*14(RSP)
+ FMOVD F8, 8*15(RSP)
+ FMOVD F9, 8*16(RSP)
+ FMOVD F10, 8*17(RSP)
+ FMOVD F11, 8*18(RSP)
+ FMOVD F12, 8*19(RSP)
+ FMOVD F13, 8*20(RSP)
+ FMOVD F14, 8*21(RSP)
+ FMOVD F15, 8*22(RSP)
+
+ // this might be called in external code context,
+ // where g is not set.
+ // first save R0, because runtime·load_g will clobber it
+ MOVW R0, 8(RSP)
+ MOVBU runtime·iscgo(SB), R0
+ CMP $0, R0
+ BEQ 2(PC)
+ BL runtime·load_g(SB)
+
+ MOVD R1, 16(RSP)
+ MOVD R2, 24(RSP)
+ MOVD $runtime·sigtrampgo(SB), R0
+ BL (R0)
+
+ // Restore callee-save registers.
+ MOVD 8*4(RSP), R19
+ MOVD 8*5(RSP), R20
+ MOVD 8*6(RSP), R21
+ MOVD 8*7(RSP), R22
+ MOVD 8*8(RSP), R23
+ MOVD 8*9(RSP), R24
+ MOVD 8*10(RSP), R25
+ MOVD 8*11(RSP), R26
+ MOVD 8*12(RSP), R27
+ MOVD 8*13(RSP), g
+ MOVD 8*14(RSP), R29
+ FMOVD 8*15(RSP), F8
+ FMOVD 8*16(RSP), F9
+ FMOVD 8*17(RSP), F10
+ FMOVD 8*18(RSP), F11
+ FMOVD 8*19(RSP), F12
+ FMOVD 8*20(RSP), F13
+ FMOVD 8*21(RSP), F14
+ FMOVD 8*22(RSP), F15
+
+ RET
+
+// func mmap(addr uintptr, n uintptr, prot int, flags int, fd int, off int64) (ret uintptr, err error)
+TEXT runtime·mmap(SB),NOSPLIT|NOFRAME,$0
+ MOVD addr+0(FP), R0
+ MOVD n+8(FP), R1
+ MOVW prot+16(FP), R2
+ MOVW flags+20(FP), R3
+ MOVW fd+24(FP), R4
+ MOVW off+28(FP), R5
+ MOVD $SYS_mmap, R8
+ SVC
+ BCS fail
+ MOVD R0, p+32(FP)
+ MOVD $0, err+40(FP)
+ RET
+fail:
+ MOVD $0, p+32(FP)
+ MOVD R0, err+40(FP)
+ RET
+
+// func munmap(addr uintptr, n uintptr) (err error)
+TEXT runtime·munmap(SB),NOSPLIT|NOFRAME,$0
+ MOVD addr+0(FP), R0
+ MOVD n+8(FP), R1
+ MOVD $SYS_munmap, R8
+ SVC
+ BCS fail
+ RET
+fail:
+ MOVD $0, R0
+ MOVD R0, (R0) // crash
+
+// func madvise(addr unsafe.Pointer, n uintptr, flags int32) int32
+TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
+ MOVD addr+0(FP), R0
+ MOVD n+8(FP), R1
+ MOVW flags+16(FP), R2
+ MOVD $SYS_madvise, R8
+ SVC
+ BCC ok
+ MOVW $-1, R0
+ok:
+ MOVW R0, ret+24(FP)
+ RET
+
+// func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
+TEXT runtime·sysctl(SB),NOSPLIT,$0
+ MOVD mib+0(FP), R0
+ MOVD miblen+8(FP), R1
+ MOVD out+16(FP), R2
+ MOVD size+24(FP), R3
+ MOVD dst+32(FP), R4
+ MOVD ndst+40(FP), R5
+ MOVD $SYS___sysctl, R8
+ SVC
+ BCC ok
+ NEG R0, R0
+ok:
+ MOVW R0, ret+48(FP)
+ RET
+
+// func sigaltstack(new, old *stackt)
+TEXT runtime·sigaltstack(SB),NOSPLIT|NOFRAME,$0
+ MOVD new+0(FP), R0
+ MOVD old+8(FP), R1
+ MOVD $SYS_sigaltstack, R8
+ SVC
+ BCS fail
+ RET
+fail:
+ MOVD $0, R0
+ MOVD R0, (R0) // crash
+
+// func osyield()
+TEXT runtime·osyield(SB),NOSPLIT|NOFRAME,$0
+ MOVD $SYS_sched_yield, R8
+ SVC
+ RET
+
+// func sigprocmask(how int32, new, old *sigset)
+TEXT runtime·sigprocmask(SB),NOSPLIT|NOFRAME,$0-24
+ MOVW how+0(FP), R0
+ MOVD new+8(FP), R1
+ MOVD old+16(FP), R2
+ MOVD $SYS_sigprocmask, R8
+ SVC
+ BCS fail
+ RET
+fail:
+ MOVD $0, R0
+ MOVD R0, (R0) // crash
+
+// func cpuset_getaffinity(level int, which int, id int64, size int, mask *byte) int32
+TEXT runtime·cpuset_getaffinity(SB),NOSPLIT|NOFRAME,$0-44
+ MOVD level+0(FP), R0
+ MOVD which+8(FP), R1
+ MOVD id+16(FP), R2
+ MOVD size+24(FP), R3
+ MOVD mask+32(FP), R4
+ MOVD $SYS_cpuset_getaffinity, R8
+ SVC
+ BCC ok
+ MOVW $-1, R0
+ok:
+ MOVW R0, ret+40(FP)
+ RET
+
+// func kqueue() int32
+TEXT runtime·kqueue(SB),NOSPLIT|NOFRAME,$0
+ MOVD $SYS_kqueue, R8
+ SVC
+ BCC ok
+ MOVW $-1, R0
+ok:
+ MOVW R0, ret+0(FP)
+ RET
+
+// func kevent(kq int, ch unsafe.Pointer, nch int, ev unsafe.Pointer, nev int, ts *Timespec) (n int, err error)
+TEXT runtime·kevent(SB),NOSPLIT,$0
+ MOVW kq+0(FP), R0
+ MOVD ch+8(FP), R1
+ MOVW nch+16(FP), R2
+ MOVD ev+24(FP), R3
+ MOVW nev+32(FP), R4
+ MOVD ts+40(FP), R5
+ MOVD $SYS_kevent, R8
+ SVC
+ BCC ok
+ NEG R0, R0
+ok:
+ MOVW R0, ret+48(FP)
+ RET
+
+// func closeonexec(fd int32)
+TEXT runtime·closeonexec(SB),NOSPLIT|NOFRAME,$0
+ MOVW fd+0(FP), R0
+ MOVD $F_SETFD, R1
+ MOVD $FD_CLOEXEC, R2
+ MOVD $SYS_fcntl, R8
+ SVC
+ RET
+
+// func runtime·setNonblock(fd int32)
+TEXT runtime·setNonblock(SB),NOSPLIT,$0-4
+ MOVW fd+0(FP), R0
+ MOVD $F_GETFL, R1
+ MOVD $0, R2
+ MOVD $SYS_fcntl, R8
+ SVC
+ ORR $O_NONBLOCK, R0, R2
+ MOVW fd+0(FP), R0
+ MOVW $F_SETFL, R1
+ MOVW $SYS_fcntl, R7
+ SVC
+ RET
+
+// func getCntxct(physical bool) uint32
+TEXT runtime·getCntxct(SB),NOSPLIT,$0
+ MOVB physical+0(FP), R0
+ CMP $0, R0
+ BEQ 3(PC)
+
+ // get CNTPCT (Physical Count Register) into R0
+ MRS CNTPCT_EL0, R0 // SIGILL
+ B 2(PC)
+
+ // get CNTVCT (Virtual Count Register) into R0
+ MRS CNTVCT_EL0, R0
+
+ MOVW R0, ret+8(FP)
+ RET
+
+// func getisar0() uint64
+TEXT runtime·getisar0(SB),NOSPLIT,$0
+ // get Instruction Set Attributes 0 into R0
+ MRS ID_AA64ISAR0_EL1, R0
+ MOVD R0, ret+0(FP)
+ RET
+
+// func getisar1() uint64
+TEXT runtime·getisar1(SB),NOSPLIT,$0
+ // get Instruction Set Attributes 1 into R0
+ MRS ID_AA64ISAR1_EL1, R0
+ MOVD R0, ret+0(FP)
+ RET
+
+// func getpfr0() uint64
+TEXT runtime·getpfr0(SB),NOSPLIT,$0
+ // get Processor Feature Register 0 into R0
+ MRS ID_AA64PFR0_EL1, R0
+ MOVD R0, ret+0(FP)
+ RET
diff --git a/src/runtime/sys_linux_386.s b/src/runtime/sys_linux_386.s
index 72c43bd..1b28098 100644
--- a/src/runtime/sys_linux_386.s
+++ b/src/runtime/sys_linux_386.s
@@ -32,12 +32,15 @@
#define SYS_getpid 20
#define SYS_access 33
#define SYS_kill 37
+#define SYS_pipe 42
#define SYS_brk 45
#define SYS_fcntl 55
#define SYS_munmap 91
#define SYS_socketcall 102
#define SYS_setittimer 104
#define SYS_clone 120
+#define SYS_uname 122
+#define SYS_mlock 150
#define SYS_sched_yield 158
#define SYS_nanosleep 162
#define SYS_rt_sigreturn 173
@@ -58,6 +61,7 @@
#define SYS_clock_gettime 265
#define SYS_tgkill 270
#define SYS_epoll_create1 329
+#define SYS_pipe2 331
TEXT runtime·exit(SB),NOSPLIT,$0
MOVL $SYS_exit_group, AX
@@ -107,15 +111,12 @@
MOVL AX, ret+4(FP)
RET
-TEXT runtime·write(SB),NOSPLIT,$0
+TEXT runtime·write1(SB),NOSPLIT,$0
MOVL $SYS_write, AX
MOVL fd+0(FP), BX
MOVL p+4(FP), CX
MOVL n+8(FP), DX
INVOKE_SYSCALL
- CMPL AX, $0xfffff001
- JLS 2(PC)
- MOVL $-1, AX
MOVL AX, ret+12(FP)
RET
@@ -125,12 +126,26 @@
MOVL p+4(FP), CX
MOVL n+8(FP), DX
INVOKE_SYSCALL
- CMPL AX, $0xfffff001
- JLS 2(PC)
- MOVL $-1, AX
MOVL AX, ret+12(FP)
RET
+// func pipe() (r, w int32, errno int32)
+TEXT runtime·pipe(SB),NOSPLIT,$0-12
+ MOVL $SYS_pipe, AX
+ LEAL r+0(FP), BX
+ INVOKE_SYSCALL
+ MOVL AX, errno+8(FP)
+ RET
+
+// func pipe2(flags int32) (r, w int32, errno int32)
+TEXT runtime·pipe2(SB),NOSPLIT,$0-16
+ MOVL $SYS_pipe2, AX
+ LEAL r+4(FP), BX
+ MOVL flags+0(FP), CX
+ INVOKE_SYSCALL
+ MOVL AX, errno+12(FP)
+ RET
+
TEXT runtime·usleep(SB),NOSPLIT,$8
MOVL $0, DX
MOVL usec+0(FP), AX
@@ -175,6 +190,20 @@
INVOKE_SYSCALL
RET
+TEXT ·getpid(SB),NOSPLIT,$0-4
+ MOVL $SYS_getpid, AX
+ INVOKE_SYSCALL
+ MOVL AX, ret+0(FP)
+ RET
+
+TEXT ·tgkill(SB),NOSPLIT,$0
+ MOVL $SYS_tgkill, AX
+ MOVL tgid+0(FP), BX
+ MOVL tid+4(FP), CX
+ MOVL sig+8(FP), DX
+ INVOKE_SYSCALL
+ RET
+
TEXT runtime·setitimer(SB),NOSPLIT,$0-12
MOVL $SYS_setittimer, AX
MOVL mode+0(FP), BX
@@ -192,8 +221,8 @@
MOVL AX, ret+12(FP)
RET
-// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB), NOSPLIT, $0-12
+// func walltime1() (sec int64, nsec int32)
+TEXT runtime·walltime1(SB), NOSPLIT, $0-12
// We don't know how much stack space the VDSO code will need,
// so switch to g0.
@@ -204,9 +233,9 @@
MOVL g_m(AX), SI // SI unchanged by C code.
// Set vdsoPC and vdsoSP for SIGPROF traceback.
- MOVL 0(SP), DX
- MOVL DX, m_vdsoPC(SI)
- LEAL sec+0(SP), DX
+ LEAL sec+0(FP), DX
+ MOVL -4(DX), CX
+ MOVL CX, m_vdsoPC(SI)
MOVL DX, m_vdsoSP(SI)
CMPL AX, m_curg(SI) // Only switch if on curg.
@@ -257,7 +286,7 @@
// int64 nanotime(void) so really
// void nanotime(int64 *nsec)
-TEXT runtime·nanotime(SB), NOSPLIT, $0-8
+TEXT runtime·nanotime1(SB), NOSPLIT, $0-8
// Switch to g0 stack. See comment above in runtime·walltime.
MOVL SP, BP // Save old SP; BP unchanged by C code.
@@ -267,9 +296,9 @@
MOVL g_m(AX), SI // SI unchanged by C code.
// Set vdsoPC and vdsoSP for SIGPROF traceback.
- MOVL 0(SP), DX
- MOVL DX, m_vdsoPC(SI)
- LEAL ret+0(SP), DX
+ LEAL ret+0(FP), DX
+ MOVL -4(DX), CX
+ MOVL CX, m_vdsoPC(SI)
MOVL DX, m_vdsoSP(SI)
CMPL AX, m_curg(SI) // Only switch if on curg.
@@ -695,6 +724,21 @@
INVOKE_SYSCALL
RET
+// func runtime·setNonblock(fd int32)
+TEXT runtime·setNonblock(SB),NOSPLIT,$0-4
+ MOVL $SYS_fcntl, AX
+ MOVL fd+0(FP), BX // fd
+ MOVL $3, CX // F_GETFL
+ MOVL $0, DX
+ INVOKE_SYSCALL
+ MOVL fd+0(FP), BX // fd
+ MOVL $4, CX // F_SETFL
+ MOVL $0x800, DX // O_NONBLOCK
+ ORL AX, DX
+ MOVL $SYS_fcntl, AX
+ INVOKE_SYSCALL
+ RET
+
// int access(const char *name, int mode)
TEXT runtime·access(SB),NOSPLIT,$0
MOVL $SYS_access, AX
@@ -734,3 +778,20 @@
INVOKE_SYSCALL
MOVL AX, ret+0(FP)
RET
+
+// func uname(utsname *new_utsname) int
+TEXT ·uname(SB),NOSPLIT,$0-8
+ MOVL $SYS_uname, AX
+ MOVL utsname+0(FP), BX
+ INVOKE_SYSCALL
+ MOVL AX, ret+4(FP)
+ RET
+
+// func mlock(addr, len uintptr) int
+TEXT ·mlock(SB),NOSPLIT,$0-12
+ MOVL $SYS_mlock, AX
+ MOVL addr+0(FP), BX
+ MOVL len+4(FP), CX
+ INVOKE_SYSCALL
+ MOVL AX, ret+8(FP)
+ RET
diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s
index 5c300f5..58d3bc5 100644
--- a/src/runtime/sys_linux_amd64.s
+++ b/src/runtime/sys_linux_amd64.s
@@ -21,6 +21,7 @@
#define SYS_rt_sigaction 13
#define SYS_rt_sigprocmask 14
#define SYS_rt_sigreturn 15
+#define SYS_pipe 22
#define SYS_sched_yield 24
#define SYS_mincore 27
#define SYS_madvise 28
@@ -32,8 +33,10 @@
#define SYS_clone 56
#define SYS_exit 60
#define SYS_kill 62
+#define SYS_uname 63
#define SYS_fcntl 72
#define SYS_sigaltstack 131
+#define SYS_mlock 149
#define SYS_arch_prctl 158
#define SYS_gettid 186
#define SYS_futex 202
@@ -46,6 +49,7 @@
#define SYS_faccessat 269
#define SYS_epoll_pwait 281
#define SYS_epoll_create1 291
+#define SYS_pipe2 293
TEXT runtime·exit(SB),NOSPLIT,$0-4
MOVL code+0(FP), DI
@@ -89,15 +93,12 @@
MOVL AX, ret+8(FP)
RET
-TEXT runtime·write(SB),NOSPLIT,$0-28
+TEXT runtime·write1(SB),NOSPLIT,$0-28
MOVQ fd+0(FP), DI
MOVQ p+8(FP), SI
MOVL n+16(FP), DX
MOVL $SYS_write, AX
SYSCALL
- CMPQ AX, $0xfffffffffffff001
- JLS 2(PC)
- MOVL $-1, AX
MOVL AX, ret+24(FP)
RET
@@ -107,12 +108,26 @@
MOVL n+16(FP), DX
MOVL $SYS_read, AX
SYSCALL
- CMPQ AX, $0xfffffffffffff001
- JLS 2(PC)
- MOVL $-1, AX
MOVL AX, ret+24(FP)
RET
+// func pipe() (r, w int32, errno int32)
+TEXT runtime·pipe(SB),NOSPLIT,$0-12
+ LEAQ r+0(FP), DI
+ MOVL $SYS_pipe, AX
+ SYSCALL
+ MOVL AX, errno+8(FP)
+ RET
+
+// func pipe2(flags int32) (r, w int32, errno int32)
+TEXT runtime·pipe2(SB),NOSPLIT,$0-20
+ LEAQ r+8(FP), DI
+ MOVL flags+0(FP), SI
+ MOVL $SYS_pipe2, AX
+ SYSCALL
+ MOVL AX, errno+16(FP)
+ RET
+
TEXT runtime·usleep(SB),NOSPLIT,$16
MOVL $0, DX
MOVL usec+0(FP), AX
@@ -158,6 +173,20 @@
SYSCALL
RET
+TEXT ·getpid(SB),NOSPLIT,$0-8
+ MOVL $SYS_getpid, AX
+ SYSCALL
+ MOVQ AX, ret+0(FP)
+ RET
+
+TEXT ·tgkill(SB),NOSPLIT,$0
+ MOVQ tgid+0(FP), DI
+ MOVQ tid+8(FP), SI
+ MOVQ sig+16(FP), DX
+ MOVL $SYS_tgkill, AX
+ SYSCALL
+ RET
+
TEXT runtime·setitimer(SB),NOSPLIT,$0-24
MOVL mode+0(FP), DI
MOVQ new+8(FP), SI
@@ -175,8 +204,9 @@
MOVL AX, ret+24(FP)
RET
-// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB),NOSPLIT,$0-12
+// func walltime1() (sec int64, nsec int32)
+// non-zero frame-size means bp is saved and restored
+TEXT runtime·walltime1(SB),NOSPLIT,$8-12
// We don't know how much stack space the VDSO code will need,
// so switch to g0.
// In particular, a kernel configured with CONFIG_OPTIMIZE_INLINING=n
@@ -191,9 +221,9 @@
MOVQ g_m(AX), BX // BX unchanged by C code.
// Set vdsoPC and vdsoSP for SIGPROF traceback.
- MOVQ 0(SP), DX
- MOVQ DX, m_vdsoPC(BX)
- LEAQ sec+0(SP), DX
+ LEAQ sec+0(FP), DX
+ MOVQ -8(DX), CX
+ MOVQ CX, m_vdsoPC(BX)
MOVQ DX, m_vdsoSP(BX)
CMPQ AX, m_curg(BX) // Only switch if on curg.
@@ -233,7 +263,9 @@
MOVL DX, nsec+8(FP)
RET
-TEXT runtime·nanotime(SB),NOSPLIT,$0-8
+// func nanotime1() int64
+// non-zero frame-size means bp is saved and restored
+TEXT runtime·nanotime1(SB),NOSPLIT,$8-8
// Switch to g0 stack. See comment above in runtime·walltime.
MOVQ SP, BP // Save old SP; BP unchanged by C code.
@@ -243,9 +275,9 @@
MOVQ g_m(AX), BX // BX unchanged by C code.
// Set vdsoPC and vdsoSP for SIGPROF traceback.
- MOVQ 0(SP), DX
- MOVQ DX, m_vdsoPC(BX)
- LEAQ ret+0(SP), DX
+ LEAQ ret+0(FP), DX
+ MOVQ -8(DX), CX
+ MOVQ CX, m_vdsoPC(BX)
MOVQ DX, m_vdsoSP(BX)
CMPQ AX, m_curg(BX) // Only switch if on curg.
@@ -682,6 +714,20 @@
SYSCALL
RET
+// func runtime·setNonblock(int32 fd)
+TEXT runtime·setNonblock(SB),NOSPLIT,$0-4
+ MOVL fd+0(FP), DI // fd
+ MOVQ $3, SI // F_GETFL
+ MOVQ $0, DX
+ MOVL $SYS_fcntl, AX
+ SYSCALL
+ MOVL fd+0(FP), DI // fd
+ MOVQ $4, SI // F_SETFL
+ MOVQ $0x800, DX // O_NONBLOCK
+ ORL AX, DX
+ MOVL $SYS_fcntl, AX
+ SYSCALL
+ RET
// int access(const char *name, int mode)
TEXT runtime·access(SB),NOSPLIT,$0
@@ -723,3 +769,20 @@
SYSCALL
MOVQ AX, ret+0(FP)
RET
+
+// func uname(utsname *new_utsname) int
+TEXT ·uname(SB),NOSPLIT,$0-16
+ MOVQ utsname+0(FP), DI
+ MOVL $SYS_uname, AX
+ SYSCALL
+ MOVQ AX, ret+8(FP)
+ RET
+
+// func mlock(addr, len uintptr) int
+TEXT ·mlock(SB),NOSPLIT,$0-24
+ MOVQ addr+0(FP), DI
+ MOVQ len+8(FP), SI
+ MOVL $SYS_mlock, AX
+ SYSCALL
+ MOVQ AX, ret+16(FP)
+ RET
diff --git a/src/runtime/sys_linux_arm.s b/src/runtime/sys_linux_arm.s
index 9c73984..e103da5 100644
--- a/src/runtime/sys_linux_arm.s
+++ b/src/runtime/sys_linux_arm.s
@@ -23,6 +23,7 @@
#define SYS_close (SYS_BASE + 6)
#define SYS_getpid (SYS_BASE + 20)
#define SYS_kill (SYS_BASE + 37)
+#define SYS_pipe (SYS_BASE + 42)
#define SYS_clone (SYS_BASE + 120)
#define SYS_rt_sigreturn (SYS_BASE + 173)
#define SYS_rt_sigaction (SYS_BASE + 174)
@@ -45,6 +46,7 @@
#define SYS_epoll_ctl (SYS_BASE + 251)
#define SYS_epoll_wait (SYS_BASE + 252)
#define SYS_epoll_create1 (SYS_BASE + 357)
+#define SYS_pipe2 (SYS_BASE + 359)
#define SYS_fcntl (SYS_BASE + 55)
#define SYS_access (SYS_BASE + 33)
#define SYS_connect (SYS_BASE + 283)
@@ -75,15 +77,12 @@
MOVW R0, ret+4(FP)
RET
-TEXT runtime·write(SB),NOSPLIT,$0
+TEXT runtime·write1(SB),NOSPLIT,$0
MOVW fd+0(FP), R0
MOVW p+4(FP), R1
MOVW n+8(FP), R2
MOVW $SYS_write, R7
SWI $0
- MOVW $0xfffff001, R1
- CMP R1, R0
- MOVW.HI $-1, R0
MOVW R0, ret+12(FP)
RET
@@ -93,12 +92,26 @@
MOVW n+8(FP), R2
MOVW $SYS_read, R7
SWI $0
- MOVW $0xfffff001, R1
- CMP R1, R0
- MOVW.HI $-1, R0
MOVW R0, ret+12(FP)
RET
+// func pipe() (r, w int32, errno int32)
+TEXT runtime·pipe(SB),NOSPLIT,$0-12
+ MOVW $r+0(FP), R0
+ MOVW $SYS_pipe, R7
+ SWI $0
+ MOVW R0, errno+8(FP)
+ RET
+
+// func pipe2(flags int32) (r, w int32, errno int32)
+TEXT runtime·pipe2(SB),NOSPLIT,$0-16
+ MOVW $r+4(FP), R0
+ MOVW flags+0(FP), R1
+ MOVW $SYS_pipe2, R7
+ SWI $0
+ MOVW R0, errno+12(FP)
+ RET
+
TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0
MOVW code+0(FP), R0
MOVW $SYS_exit_group, R7
@@ -159,6 +172,20 @@
SWI $0
RET
+TEXT ·getpid(SB),NOSPLIT,$0-4
+ MOVW $SYS_getpid, R7
+ SWI $0
+ MOVW R0, ret+0(FP)
+ RET
+
+TEXT ·tgkill(SB),NOSPLIT,$0-12
+ MOVW tgid+0(FP), R0
+ MOVW tid+4(FP), R1
+ MOVW sig+8(FP), R2
+ MOVW $SYS_tgkill, R7
+ SWI $0
+ RET
+
TEXT runtime·mmap(SB),NOSPLIT,$0
MOVW addr+0(FP), R0
MOVW n+4(FP), R1
@@ -215,7 +242,7 @@
MOVW R0, ret+12(FP)
RET
-TEXT runtime·walltime(SB),NOSPLIT,$0-12
+TEXT runtime·walltime1(SB),NOSPLIT,$0-12
// We don't know how much stack space the VDSO code will need,
// so switch to g0.
@@ -242,11 +269,38 @@
MOVW $CLOCK_REALTIME, R0
MOVW $8(R13), R1 // timespec
- MOVW runtime·vdsoClockgettimeSym(SB), R11
- CMP $0, R11
+ MOVW runtime·vdsoClockgettimeSym(SB), R2
+ CMP $0, R2
B.EQ fallback
- BL (R11)
+ // Store g on gsignal's stack, so if we receive a signal
+ // during VDSO code we can find the g.
+ // If we don't have a signal stack, we won't receive signal,
+ // so don't bother saving g.
+ // When using cgo, we already saved g on TLS, also don't save
+ // g here.
+ // Also don't save g if we are already on the signal stack.
+ // We won't get a nested signal.
+ MOVB runtime·iscgo(SB), R6
+ CMP $0, R6
+ BNE nosaveg
+ MOVW m_gsignal(R5), R6 // g.m.gsignal
+ CMP $0, R6
+ BEQ nosaveg
+ CMP g, R6
+ BEQ nosaveg
+ MOVW (g_stack+stack_lo)(R6), R6 // g.m.gsignal.stack.lo
+ MOVW g, (R6)
+
+ BL (R2)
+
+ MOVW $0, R1
+ MOVW R1, (R6) // clear g slot, R6 is unchanged by C code
+
+ JMP finish
+
+nosaveg:
+ BL (R2)
JMP finish
fallback:
@@ -266,8 +320,8 @@
MOVW R2, nsec+8(FP)
RET
-// int64 nanotime(void)
-TEXT runtime·nanotime(SB),NOSPLIT,$0-8
+// int64 nanotime1(void)
+TEXT runtime·nanotime1(SB),NOSPLIT,$0-8
// Switch to g0 stack. See comment above in runtime·walltime.
// Save old SP. Use R13 instead of SP to avoid linker rewriting the offsets.
@@ -293,11 +347,38 @@
MOVW $CLOCK_MONOTONIC, R0
MOVW $8(R13), R1 // timespec
- MOVW runtime·vdsoClockgettimeSym(SB), R11
- CMP $0, R11
+ MOVW runtime·vdsoClockgettimeSym(SB), R2
+ CMP $0, R2
B.EQ fallback
- BL (R11)
+ // Store g on gsignal's stack, so if we receive a signal
+ // during VDSO code we can find the g.
+ // If we don't have a signal stack, we won't receive signal,
+ // so don't bother saving g.
+ // When using cgo, we already saved g on TLS, also don't save
+ // g here.
+ // Also don't save g if we are already on the signal stack.
+ // We won't get a nested signal.
+ MOVB runtime·iscgo(SB), R6
+ CMP $0, R6
+ BNE nosaveg
+ MOVW m_gsignal(R5), R6 // g.m.gsignal
+ CMP $0, R6
+ BEQ nosaveg
+ CMP g, R6
+ BEQ nosaveg
+ MOVW (g_stack+stack_lo)(R6), R6 // g.m.gsignal.stack.lo
+ MOVW g, (R6)
+
+ BL (R2)
+
+ MOVW $0, R1
+ MOVW R1, (R6) // clear g slot, R6 is unchanged by C code
+
+ JMP finish
+
+nosaveg:
+ BL (R2)
JMP finish
fallback:
@@ -434,7 +515,11 @@
MOVW R4, R13
RET
-TEXT runtime·sigtramp(SB),NOSPLIT,$12
+TEXT runtime·sigtramp(SB),NOSPLIT,$0
+ // Reserve space for callee-save registers and arguments.
+ MOVM.DB.W [R4-R11], (R13)
+ SUB $16, R13
+
// this might be called in external code context,
// where g is not set.
// first save R0, because runtime·load_g will clobber it
@@ -447,6 +532,11 @@
MOVW R2, 12(R13)
MOVW $runtime·sigtrampgo(SB), R11
BL (R11)
+
+ // Restore callee-save registers.
+ ADD $16, R13
+ MOVM.IA.W (R13), [R4-R11]
+
RET
TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0
@@ -567,6 +657,20 @@
SWI $0
RET
+// func runtime·setNonblock(fd int32)
+TEXT runtime·setNonblock(SB),NOSPLIT,$0-4
+ MOVW fd+0(FP), R0 // fd
+ MOVW $3, R1 // F_GETFL
+ MOVW $0, R2
+ MOVW $SYS_fcntl, R7
+ SWI $0
+ ORR $0x800, R0, R2 // O_NONBLOCK
+ MOVW fd+0(FP), R0 // fd
+ MOVW $4, R1 // F_SETFL
+ MOVW $SYS_fcntl, R7
+ SWI $0
+ RET
+
// b __kuser_get_tls @ 0xffff0fe0
TEXT runtime·read_tls_fallback(SB),NOSPLIT|NOFRAME,$0
MOVW $0xffff0fe0, R0
diff --git a/src/runtime/sys_linux_arm64.s b/src/runtime/sys_linux_arm64.s
index 2835b6c..b23e3b9 100644
--- a/src/runtime/sys_linux_arm64.s
+++ b/src/runtime/sys_linux_arm64.s
@@ -20,6 +20,7 @@
#define SYS_write 64
#define SYS_openat 56
#define SYS_close 57
+#define SYS_pipe2 59
#define SYS_fcntl 25
#define SYS_nanosleep 101
#define SYS_mmap 222
@@ -91,16 +92,12 @@
MOVW R0, ret+8(FP)
RET
-TEXT runtime·write(SB),NOSPLIT|NOFRAME,$0-28
+TEXT runtime·write1(SB),NOSPLIT|NOFRAME,$0-28
MOVD fd+0(FP), R0
MOVD p+8(FP), R1
MOVW n+16(FP), R2
MOVD $SYS_write, R8
SVC
- CMN $4095, R0
- BCC done
- MOVW $-1, R0
-done:
MOVW R0, ret+24(FP)
RET
@@ -110,13 +107,27 @@
MOVW n+16(FP), R2
MOVD $SYS_read, R8
SVC
- CMN $4095, R0
- BCC done
- MOVW $-1, R0
-done:
MOVW R0, ret+24(FP)
RET
+// func pipe() (r, w int32, errno int32)
+TEXT runtime·pipe(SB),NOSPLIT|NOFRAME,$0-12
+ MOVD $r+0(FP), R0
+ MOVW $0, R1
+ MOVW $SYS_pipe2, R8
+ SVC
+ MOVW R0, errno+8(FP)
+ RET
+
+// func pipe2(flags int32) (r, w int32, errno int32)
+TEXT runtime·pipe2(SB),NOSPLIT|NOFRAME,$0-20
+ MOVD $r+8(FP), R0
+ MOVW flags+0(FP), R1
+ MOVW $SYS_pipe2, R8
+ SVC
+ MOVW R0, errno+16(FP)
+ RET
+
TEXT runtime·usleep(SB),NOSPLIT,$24-4
MOVWU usec+0(FP), R3
MOVD R3, R5
@@ -164,6 +175,20 @@
SVC
RET
+TEXT ·getpid(SB),NOSPLIT|NOFRAME,$0-8
+ MOVD $SYS_getpid, R8
+ SVC
+ MOVD R0, ret+0(FP)
+ RET
+
+TEXT ·tgkill(SB),NOSPLIT,$0-24
+ MOVD tgid+0(FP), R0
+ MOVD tid+8(FP), R1
+ MOVD sig+16(FP), R2
+ MOVD $SYS_tgkill, R8
+ SVC
+ RET
+
TEXT runtime·setitimer(SB),NOSPLIT|NOFRAME,$0-24
MOVW mode+0(FP), R0
MOVD new+8(FP), R1
@@ -181,8 +206,8 @@
MOVW R0, ret+24(FP)
RET
-// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB),NOSPLIT,$24-12
+// func walltime1() (sec int64, nsec int32)
+TEXT runtime·walltime1(SB),NOSPLIT,$24-12
MOVD RSP, R20 // R20 is unchanged by C code
MOVD RSP, R1
@@ -207,6 +232,31 @@
MOVW $CLOCK_REALTIME, R0
MOVD runtime·vdsoClockgettimeSym(SB), R2
CBZ R2, fallback
+
+ // Store g on gsignal's stack, so if we receive a signal
+ // during VDSO code we can find the g.
+ // If we don't have a signal stack, we won't receive signal,
+ // so don't bother saving g.
+ // When using cgo, we already saved g on TLS, also don't save
+ // g here.
+ // Also don't save g if we are already on the signal stack.
+ // We won't get a nested signal.
+ MOVBU runtime·iscgo(SB), R22
+ CBNZ R22, nosaveg
+ MOVD m_gsignal(R21), R22 // g.m.gsignal
+ CBZ R22, nosaveg
+ CMP g, R22
+ BEQ nosaveg
+ MOVD (g_stack+stack_lo)(R22), R22 // g.m.gsignal.stack.lo
+ MOVD g, (R22)
+
+ BL (R2)
+
+ MOVD ZR, (R22) // clear g slot, R22 is unchanged by C code
+
+ B finish
+
+nosaveg:
BL (R2)
B finish
@@ -225,7 +275,7 @@
MOVW R5, nsec+8(FP)
RET
-TEXT runtime·nanotime(SB),NOSPLIT,$24-8
+TEXT runtime·nanotime1(SB),NOSPLIT,$24-8
MOVD RSP, R20 // R20 is unchanged by C code
MOVD RSP, R1
@@ -250,6 +300,31 @@
MOVW $CLOCK_MONOTONIC, R0
MOVD runtime·vdsoClockgettimeSym(SB), R2
CBZ R2, fallback
+
+ // Store g on gsignal's stack, so if we receive a signal
+ // during VDSO code we can find the g.
+ // If we don't have a signal stack, we won't receive signal,
+ // so don't bother saving g.
+ // When using cgo, we already saved g on TLS, also don't save
+ // g here.
+ // Also don't save g if we are already on the signal stack.
+ // We won't get a nested signal.
+ MOVBU runtime·iscgo(SB), R22
+ CBNZ R22, nosaveg
+ MOVD m_gsignal(R21), R22 // g.m.gsignal
+ CBZ R22, nosaveg
+ CMP g, R22
+ BEQ nosaveg
+ MOVD (g_stack+stack_lo)(R22), R22 // g.m.gsignal.stack.lo
+ MOVD g, (R22)
+
+ BL (R2)
+
+ MOVD ZR, (R22) // clear g slot, R22 is unchanged by C code
+
+ B finish
+
+nosaveg:
BL (R2)
B finish
@@ -344,8 +419,7 @@
// first save R0, because runtime·load_g will clobber it
MOVW R0, 8(RSP)
MOVBU runtime·iscgo(SB), R0
- CMP $0, R0
- BEQ 2(PC)
+ CBZ R0, 2(PC)
BL runtime·load_g(SB)
MOVD R1, 16(RSP)
@@ -605,6 +679,21 @@
SVC
RET
+// func runtime·setNonblock(int32 fd)
+TEXT runtime·setNonblock(SB),NOSPLIT|NOFRAME,$0-4
+ MOVW fd+0(FP), R0 // fd
+ MOVD $3, R1 // F_GETFL
+ MOVD $0, R2
+ MOVD $SYS_fcntl, R8
+ SVC
+ MOVD $0x800, R2 // O_NONBLOCK
+ ORR R0, R2
+ MOVW fd+0(FP), R0 // fd
+ MOVD $4, R1 // F_SETFL
+ MOVD $SYS_fcntl, R8
+ SVC
+ RET
+
// int access(const char *name, int mode)
TEXT runtime·access(SB),NOSPLIT,$0-20
MOVD $AT_FDCWD, R0
diff --git a/src/runtime/sys_linux_mips64x.s b/src/runtime/sys_linux_mips64x.s
index 33ed105..6668a0f 100644
--- a/src/runtime/sys_linux_mips64x.s
+++ b/src/runtime/sys_linux_mips64x.s
@@ -21,7 +21,7 @@
#define SYS_close 5003
#define SYS_getpid 5038
#define SYS_kill 5060
-#define SYS_fcntl 5080
+#define SYS_fcntl 5070
#define SYS_mmap 5009
#define SYS_munmap 5011
#define SYS_setitimer 5036
@@ -46,6 +46,7 @@
#define SYS_clock_gettime 5222
#define SYS_epoll_create1 5285
#define SYS_brk 5012
+#define SYS_pipe2 5287
TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0-4
MOVW code+0(FP), R4
@@ -88,14 +89,14 @@
MOVW R2, ret+8(FP)
RET
-TEXT runtime·write(SB),NOSPLIT|NOFRAME,$0-28
+TEXT runtime·write1(SB),NOSPLIT|NOFRAME,$0-28
MOVV fd+0(FP), R4
MOVV p+8(FP), R5
MOVW n+16(FP), R6
MOVV $SYS_write, R2
SYSCALL
BEQ R7, 2(PC)
- MOVW $-1, R2
+ SUBVU R2, R0, R2 // caller expects negative errno
MOVW R2, ret+24(FP)
RET
@@ -106,10 +107,32 @@
MOVV $SYS_read, R2
SYSCALL
BEQ R7, 2(PC)
- MOVW $-1, R2
+ SUBVU R2, R0, R2 // caller expects negative errno
MOVW R2, ret+24(FP)
RET
+// func pipe() (r, w int32, errno int32)
+TEXT runtime·pipe(SB),NOSPLIT|NOFRAME,$0-12
+ MOVV $r+0(FP), R4
+ MOVV R0, R5
+ MOVV $SYS_pipe2, R2
+ SYSCALL
+ BEQ R7, 2(PC)
+ SUBVU R2, R0, R2 // caller expects negative errno
+ MOVW R2, errno+8(FP)
+ RET
+
+// func pipe2(flags int32) (r, w int32, errno int32)
+TEXT runtime·pipe2(SB),NOSPLIT|NOFRAME,$0-20
+ MOVV $r+8(FP), R4
+ MOVW flags+0(FP), R5
+ MOVV $SYS_pipe2, R2
+ SYSCALL
+ BEQ R7, 2(PC)
+ SUBVU R2, R0, R2 // caller expects negative errno
+ MOVW R2, errno+16(FP)
+ RET
+
TEXT runtime·usleep(SB),NOSPLIT,$16-4
MOVWU usec+0(FP), R3
MOVV R3, R5
@@ -158,6 +181,20 @@
SYSCALL
RET
+TEXT ·getpid(SB),NOSPLIT|NOFRAME,$0-8
+ MOVV $SYS_getpid, R2
+ SYSCALL
+ MOVV R2, ret+0(FP)
+ RET
+
+TEXT ·tgkill(SB),NOSPLIT|NOFRAME,$0-24
+ MOVV tgid+0(FP), R4
+ MOVV tid+8(FP), R5
+ MOVV sig+16(FP), R6
+ MOVV $SYS_tgkill, R2
+ SYSCALL
+ RET
+
TEXT runtime·setitimer(SB),NOSPLIT|NOFRAME,$0-24
MOVW mode+0(FP), R4
MOVV new+8(FP), R5
@@ -176,25 +213,90 @@
MOVW R2, ret+24(FP)
RET
-// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB),NOSPLIT,$16
+// func walltime1() (sec int64, nsec int32)
+TEXT runtime·walltime1(SB),NOSPLIT,$16
+ MOVV R29, R16 // R16 is unchanged by C code
+ MOVV R29, R1
+
+ MOVV g_m(g), R17 // R17 = m
+
+ // Set vdsoPC and vdsoSP for SIGPROF traceback.
+ MOVV R31, m_vdsoPC(R17)
+ MOVV R29, m_vdsoSP(R17)
+
+ MOVV m_curg(R17), R4
+ MOVV g, R5
+ BNE R4, R5, noswitch
+
+ MOVV m_g0(R17), R4
+ MOVV (g_sched+gobuf_sp)(R4), R1 // Set SP to g0 stack
+
+noswitch:
+ SUBV $16, R1
+ AND $~15, R1 // Align for C code
+ MOVV R1, R29
+
MOVW $0, R4 // CLOCK_REALTIME
MOVV $0(R29), R5
- MOVV $SYS_clock_gettime, R2
- SYSCALL
+
+ MOVV runtime·vdsoClockgettimeSym(SB), R25
+ BEQ R25, fallback
+
+ JAL (R25)
+
+finish:
MOVV 0(R29), R3 // sec
MOVV 8(R29), R5 // nsec
+
+ MOVV R16, R29 // restore SP
+ MOVV R0, m_vdsoSP(R17) // clear vdsoSP
+
MOVV R3, sec+0(FP)
MOVW R5, nsec+8(FP)
RET
-TEXT runtime·nanotime(SB),NOSPLIT,$16
- MOVW $1, R4 // CLOCK_MONOTONIC
- MOVV $0(R29), R5
+fallback:
MOVV $SYS_clock_gettime, R2
SYSCALL
+ JMP finish
+
+TEXT runtime·nanotime1(SB),NOSPLIT,$16
+ MOVV R29, R16 // R16 is unchanged by C code
+ MOVV R29, R1
+
+ MOVV g_m(g), R17 // R17 = m
+
+ // Set vdsoPC and vdsoSP for SIGPROF traceback.
+ MOVV R31, m_vdsoPC(R17)
+ MOVV R29, m_vdsoSP(R17)
+
+ MOVV m_curg(R17), R4
+ MOVV g, R5
+ BNE R4, R5, noswitch
+
+ MOVV m_g0(R17), R4
+ MOVV (g_sched+gobuf_sp)(R4), R1 // Set SP to g0 stack
+
+noswitch:
+ SUBV $16, R1
+ AND $~15, R1 // Align for C code
+ MOVV R1, R29
+
+ MOVW $1, R4 // CLOCK_MONOTONIC
+ MOVV $0(R29), R5
+
+ MOVV runtime·vdsoClockgettimeSym(SB), R25
+ BEQ R25, fallback
+
+ JAL (R25)
+
+finish:
MOVV 0(R29), R3 // sec
MOVV 8(R29), R5 // nsec
+
+ MOVV R16, R29 // restore SP
+ MOVV R0, m_vdsoSP(R17) // clear vdsoSP
+
// sec is in R3, nsec in R5
// return nsec in R3
MOVV $1000000000, R4
@@ -204,6 +306,11 @@
MOVV R3, ret+0(FP)
RET
+fallback:
+ MOVV $SYS_clock_gettime, R2
+ SYSCALL
+ JMP finish
+
TEXT runtime·rtsigprocmask(SB),NOSPLIT|NOFRAME,$0-28
MOVW how+0(FP), R4
MOVV new+8(FP), R5
@@ -454,6 +561,21 @@
SYSCALL
RET
+// func runtime·setNonblock(int32 fd)
+TEXT runtime·setNonblock(SB),NOSPLIT|NOFRAME,$0-4
+ MOVW fd+0(FP), R4 // fd
+ MOVV $3, R5 // F_GETFL
+ MOVV $0, R6
+ MOVV $SYS_fcntl, R2
+ SYSCALL
+ MOVW $0x80, R6 // O_NONBLOCK
+ OR R2, R6
+ MOVW fd+0(FP), R4 // fd
+ MOVV $4, R5 // F_SETFL
+ MOVV $SYS_fcntl, R2
+ SYSCALL
+ RET
+
// func sbrk0() uintptr
TEXT runtime·sbrk0(SB),NOSPLIT|NOFRAME,$0-8
// Implemented as brk(NULL).
diff --git a/src/runtime/sys_linux_mipsx.s b/src/runtime/sys_linux_mipsx.s
index 6e539fb..fab2ab3 100644
--- a/src/runtime/sys_linux_mipsx.s
+++ b/src/runtime/sys_linux_mipsx.s
@@ -20,6 +20,7 @@
#define SYS_close 4006
#define SYS_getpid 4020
#define SYS_kill 4037
+#define SYS_pipe 4042
#define SYS_brk 4045
#define SYS_fcntl 4055
#define SYS_mmap 4090
@@ -44,6 +45,7 @@
#define SYS_clock_gettime 4263
#define SYS_tgkill 4266
#define SYS_epoll_create1 4326
+#define SYS_pipe2 4328
TEXT runtime·exit(SB),NOSPLIT,$0-4
MOVW code+0(FP), R4
@@ -86,14 +88,14 @@
MOVW R2, ret+4(FP)
RET
-TEXT runtime·write(SB),NOSPLIT,$0-16
+TEXT runtime·write1(SB),NOSPLIT,$0-16
MOVW fd+0(FP), R4
MOVW p+4(FP), R5
MOVW n+8(FP), R6
MOVW $SYS_write, R2
SYSCALL
BEQ R7, 2(PC)
- MOVW $-1, R2
+ SUBU R2, R0, R2 // caller expects negative errno
MOVW R2, ret+12(FP)
RET
@@ -104,10 +106,38 @@
MOVW $SYS_read, R2
SYSCALL
BEQ R7, 2(PC)
- MOVW $-1, R2
+ SUBU R2, R0, R2 // caller expects negative errno
MOVW R2, ret+12(FP)
RET
+// func pipe() (r, w int32, errno int32)
+TEXT runtime·pipe(SB),NOSPLIT,$0-12
+ MOVW $SYS_pipe, R2
+ SYSCALL
+ BEQ R7, pipeok
+ MOVW $-1, R1
+ MOVW R1, r+0(FP)
+ MOVW R1, w+4(FP)
+ SUBU R2, R0, R2 // caller expects negative errno
+ MOVW R2, errno+8(FP)
+ RET
+pipeok:
+ MOVW R2, r+0(FP)
+ MOVW R3, w+4(FP)
+ MOVW R0, errno+8(FP)
+ RET
+
+// func pipe2(flags int32) (r, w int32, errno int32)
+TEXT runtime·pipe2(SB),NOSPLIT,$0-16
+ MOVW $r+4(FP), R4
+ MOVW flags+0(FP), R5
+ MOVW $SYS_pipe2, R2
+ SYSCALL
+ BEQ R7, 2(PC)
+ SUBU R2, R0, R2 // caller expects negative errno
+ MOVW R2, errno+12(FP)
+ RET
+
TEXT runtime·usleep(SB),NOSPLIT,$28-4
MOVW usec+0(FP), R3
MOVW R3, R5
@@ -156,6 +186,20 @@
SYSCALL
RET
+TEXT ·getpid(SB),NOSPLIT,$0-4
+ MOVW $SYS_getpid, R2
+ SYSCALL
+ MOVW R2, ret+0(FP)
+ RET
+
+TEXT ·tgkill(SB),NOSPLIT,$0-12
+ MOVW tgid+0(FP), R4
+ MOVW tid+4(FP), R5
+ MOVW sig+8(FP), R6
+ MOVW $SYS_tgkill, R2
+ SYSCALL
+ RET
+
TEXT runtime·setitimer(SB),NOSPLIT,$0-12
MOVW mode+0(FP), R4
MOVW new+4(FP), R5
@@ -174,8 +218,8 @@
MOVW R2, ret+12(FP)
RET
-// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB),NOSPLIT,$8-12
+// func walltime1() (sec int64, nsec int32)
+TEXT runtime·walltime1(SB),NOSPLIT,$8-12
MOVW $0, R4 // CLOCK_REALTIME
MOVW $4(R29), R5
MOVW $SYS_clock_gettime, R2
@@ -193,7 +237,7 @@
MOVW R5, nsec+8(FP)
RET
-TEXT runtime·nanotime(SB),NOSPLIT,$8-8
+TEXT runtime·nanotime1(SB),NOSPLIT,$8-8
MOVW $1, R4 // CLOCK_MONOTONIC
MOVW $4(R29), R5
MOVW $SYS_clock_gettime, R2
@@ -487,6 +531,21 @@
SYSCALL
RET
+// func runtime·setNonblock(int32 fd)
+TEXT runtime·setNonblock(SB),NOSPLIT,$0-4
+ MOVW fd+0(FP), R4 // fd
+ MOVW $3, R5 // F_GETFL
+ MOVW $0, R6
+ MOVW $SYS_fcntl, R2
+ SYSCALL
+ MOVW $0x80, R6 // O_NONBLOCK
+ OR R2, R6
+ MOVW fd+0(FP), R4 // fd
+ MOVW $4, R5 // F_SETFL
+ MOVW $SYS_fcntl, R2
+ SYSCALL
+ RET
+
// func sbrk0() uintptr
TEXT runtime·sbrk0(SB),NOSPLIT,$0-4
// Implemented as brk(NULL).
diff --git a/src/runtime/sys_linux_ppc64x.s b/src/runtime/sys_linux_ppc64x.s
index 13d2315..8629fe3 100644
--- a/src/runtime/sys_linux_ppc64x.s
+++ b/src/runtime/sys_linux_ppc64x.s
@@ -21,6 +21,7 @@
#define SYS_close 6
#define SYS_getpid 20
#define SYS_kill 37
+#define SYS_pipe 42
#define SYS_brk 45
#define SYS_fcntl 55
#define SYS_mmap 90
@@ -45,6 +46,7 @@
#define SYS_clock_gettime 246
#define SYS_tgkill 250
#define SYS_epoll_create1 315
+#define SYS_pipe2 317
TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0-4
MOVW code+0(FP), R3
@@ -80,13 +82,13 @@
MOVW R3, ret+8(FP)
RET
-TEXT runtime·write(SB),NOSPLIT|NOFRAME,$0-28
+TEXT runtime·write1(SB),NOSPLIT|NOFRAME,$0-28
MOVD fd+0(FP), R3
MOVD p+8(FP), R4
MOVW n+16(FP), R5
SYSCALL $SYS_write
BVC 2(PC)
- MOVW $-1, R3
+ NEG R3 // caller expects negative errno
MOVW R3, ret+24(FP)
RET
@@ -96,10 +98,25 @@
MOVW n+16(FP), R5
SYSCALL $SYS_read
BVC 2(PC)
- MOVW $-1, R3
+ NEG R3 // caller expects negative errno
MOVW R3, ret+24(FP)
RET
+// func pipe() (r, w int32, errno int32)
+TEXT runtime·pipe(SB),NOSPLIT|NOFRAME,$0-12
+ ADD $FIXED_FRAME, R1, R3
+ SYSCALL $SYS_pipe
+ MOVW R3, errno+8(FP)
+ RET
+
+// func pipe2(flags int32) (r, w int32, errno int32)
+TEXT runtime·pipe2(SB),NOSPLIT|NOFRAME,$0-20
+ ADD $FIXED_FRAME+8, R1, R3
+ MOVW flags+0(FP), R4
+ SYSCALL $SYS_pipe2
+ MOVW R3, errno+16(FP)
+ RET
+
TEXT runtime·usleep(SB),NOSPLIT,$16-4
MOVW usec+0(FP), R3
MOVD R3, R5
@@ -139,6 +156,18 @@
SYSCALL $SYS_kill
RET
+TEXT ·getpid(SB),NOSPLIT|NOFRAME,$0-8
+ SYSCALL $SYS_getpid
+ MOVD R3, ret+0(FP)
+ RET
+
+TEXT ·tgkill(SB),NOSPLIT|NOFRAME,$0-24
+ MOVD tgid+0(FP), R3
+ MOVD tid+8(FP), R4
+ MOVD sig+16(FP), R5
+ SYSCALL $SYS_tgkill
+ RET
+
TEXT runtime·setitimer(SB),NOSPLIT|NOFRAME,$0-24
MOVW mode+0(FP), R3
MOVD new+8(FP), R4
@@ -155,8 +184,8 @@
MOVW R3, ret+24(FP)
RET
-// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB),NOSPLIT,$16
+// func walltime1() (sec int64, nsec int32)
+TEXT runtime·walltime1(SB),NOSPLIT,$16
MOVD R1, R15 // R15 is unchanged by C code
MOVD g_m(g), R21 // R21 = m
@@ -203,7 +232,7 @@
MOVD 40(R1), R5
JMP finish
-TEXT runtime·nanotime(SB),NOSPLIT,$16
+TEXT runtime·nanotime1(SB),NOSPLIT,$16
MOVD $1, R3 // CLOCK_MONOTONIC
MOVD R1, R15 // R15 is unchanged by C code
@@ -251,7 +280,7 @@
ADD $32, R1, R4
SYSCALL $SYS_clock_gettime
MOVD 32(R1), R3
- MOVD 48(R1), R5
+ MOVD 40(R1), R5
JMP finish
TEXT runtime·rtsigprocmask(SB),NOSPLIT|NOFRAME,$0-28
@@ -612,6 +641,18 @@
SYSCALL $SYS_fcntl
RET
+// func runtime·setNonblock(int32 fd)
+TEXT runtime·setNonblock(SB),NOSPLIT|NOFRAME,$0-4
+ MOVW fd+0(FP), R3 // fd
+ MOVD $3, R4 // F_GETFL
+ MOVD $0, R5
+ SYSCALL $SYS_fcntl
+ OR $0x800, R3, R5 // O_NONBLOCK
+ MOVW fd+0(FP), R3 // fd
+ MOVD $4, R4 // F_SETFL
+ SYSCALL $SYS_fcntl
+ RET
+
// func sbrk0() uintptr
TEXT runtime·sbrk0(SB),NOSPLIT|NOFRAME,$0
// Implemented as brk(NULL).
diff --git a/src/runtime/sys_linux_riscv64.s b/src/runtime/sys_linux_riscv64.s
new file mode 100644
index 0000000..626ab39
--- /dev/null
+++ b/src/runtime/sys_linux_riscv64.s
@@ -0,0 +1,515 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// System calls and other sys.stuff for riscv64, Linux
+//
+
+#include "textflag.h"
+#include "go_asm.h"
+
+#define AT_FDCWD -100
+
+#define SYS_brk 214
+#define SYS_clock_gettime 113
+#define SYS_clone 220
+#define SYS_close 57
+#define SYS_connect 203
+#define SYS_epoll_create1 20
+#define SYS_epoll_ctl 21
+#define SYS_epoll_pwait 22
+#define SYS_exit 93
+#define SYS_exit_group 94
+#define SYS_faccessat 48
+#define SYS_fcntl 25
+#define SYS_futex 98
+#define SYS_getpid 172
+#define SYS_getrlimit 163
+#define SYS_gettid 178
+#define SYS_gettimeofday 169
+#define SYS_kill 129
+#define SYS_madvise 233
+#define SYS_mincore 232
+#define SYS_mmap 222
+#define SYS_munmap 215
+#define SYS_nanosleep 101
+#define SYS_openat 56
+#define SYS_pipe2 59
+#define SYS_pselect6 72
+#define SYS_read 63
+#define SYS_rt_sigaction 134
+#define SYS_rt_sigprocmask 135
+#define SYS_rt_sigreturn 139
+#define SYS_sched_getaffinity 123
+#define SYS_sched_yield 124
+#define SYS_setitimer 103
+#define SYS_sigaltstack 132
+#define SYS_socket 198
+#define SYS_tgkill 131
+#define SYS_tkill 130
+#define SYS_write 64
+
+// func exit(code int32)
+TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0-4
+ MOVW code+0(FP), A0
+ MOV $SYS_exit_group, A7
+ ECALL
+ RET
+
+// func exitThread(wait *uint32)
+TEXT runtime·exitThread(SB),NOSPLIT|NOFRAME,$0-8
+ MOV wait+0(FP), A0
+ // We're done using the stack.
+ FENCE
+ MOVW ZERO, (A0)
+ FENCE
+ MOV $0, A0 // exit code
+ MOV $SYS_exit, A7
+ ECALL
+ JMP 0(PC)
+
+// func open(name *byte, mode, perm int32) int32
+TEXT runtime·open(SB),NOSPLIT|NOFRAME,$0-20
+ MOV $AT_FDCWD, A0
+ MOV name+0(FP), A1
+ MOVW mode+8(FP), A2
+ MOVW perm+12(FP), A3
+ MOV $SYS_openat, A7
+ ECALL
+ MOV $-4096, T0
+ BGEU T0, A0, 2(PC)
+ MOV $-1, A0
+ MOVW A0, ret+16(FP)
+ RET
+
+// func closefd(fd int32) int32
+TEXT runtime·closefd(SB),NOSPLIT|NOFRAME,$0-12
+ MOVW fd+0(FP), A0
+ MOV $SYS_close, A7
+ ECALL
+ MOV $-4096, T0
+ BGEU T0, A0, 2(PC)
+ MOV $-1, A0
+ MOVW A0, ret+8(FP)
+ RET
+
+// func write1(fd uintptr, p unsafe.Pointer, n int32) int32
+TEXT runtime·write1(SB),NOSPLIT|NOFRAME,$0-28
+ MOV fd+0(FP), A0
+ MOV p+8(FP), A1
+ MOVW n+16(FP), A2
+ MOV $SYS_write, A7
+ ECALL
+ MOVW A0, ret+24(FP)
+ RET
+
+// func read(fd int32, p unsafe.Pointer, n int32) int32
+TEXT runtime·read(SB),NOSPLIT|NOFRAME,$0-28
+ MOVW fd+0(FP), A0
+ MOV p+8(FP), A1
+ MOVW n+16(FP), A2
+ MOV $SYS_read, A7
+ ECALL
+ MOVW A0, ret+24(FP)
+ RET
+
+// func pipe() (r, w int32, errno int32)
+TEXT runtime·pipe(SB),NOSPLIT|NOFRAME,$0-12
+ MOV $r+0(FP), A0
+ MOV ZERO, A1
+ MOV $SYS_pipe2, A7
+ ECALL
+ MOVW A0, errno+8(FP)
+ RET
+
+// func pipe2(flags int32) (r, w int32, errno int32)
+TEXT runtime·pipe2(SB),NOSPLIT|NOFRAME,$0-20
+ MOV $r+8(FP), A0
+ MOVW flags+0(FP), A1
+ MOV $SYS_pipe2, A7
+ ECALL
+ MOVW A0, errno+16(FP)
+ RET
+
+// func getrlimit(kind int32, limit unsafe.Pointer) int32
+TEXT runtime·getrlimit(SB),NOSPLIT|NOFRAME,$0-20
+ MOVW kind+0(FP), A0
+ MOV limit+8(FP), A1
+ MOV $SYS_getrlimit, A7
+ ECALL
+ MOVW A0, ret+16(FP)
+ RET
+
+// func usleep(usec uint32)
+TEXT runtime·usleep(SB),NOSPLIT,$24-4
+ MOVWU usec+0(FP), A0
+ MOV $1000, A1
+ MUL A1, A0, A0
+ MOV $1000000000, A1
+ DIV A1, A0, A2
+ MOV A2, 8(X2)
+ REM A1, A0, A3
+ MOV A3, 16(X2)
+ ADD $8, X2, A0
+ MOV ZERO, A1
+ MOV $SYS_nanosleep, A7
+ ECALL
+ RET
+
+// func gettid() uint32
+TEXT runtime·gettid(SB),NOSPLIT,$0-4
+ MOV $SYS_gettid, A7
+ ECALL
+ MOVW A0, ret+0(FP)
+ RET
+
+// func raise(sig uint32)
+TEXT runtime·raise(SB),NOSPLIT|NOFRAME,$0
+ MOV $SYS_gettid, A7
+ ECALL
+ // arg 1 tid - already in A0
+ MOVW sig+0(FP), A1 // arg 2
+ MOV $SYS_tkill, A7
+ ECALL
+ RET
+
+// func raiseproc(sig uint32)
+TEXT runtime·raiseproc(SB),NOSPLIT|NOFRAME,$0
+ MOV $SYS_getpid, A7
+ ECALL
+ // arg 1 pid - already in A0
+ MOVW sig+0(FP), A1 // arg 2
+ MOV $SYS_kill, A7
+ ECALL
+ RET
+
+// func getpid() int
+TEXT ·getpid(SB),NOSPLIT|NOFRAME,$0-8
+ MOV $SYS_getpid, A7
+ ECALL
+ MOV A0, ret+0(FP)
+ RET
+
+// func tgkill(tgid, tid, sig int)
+TEXT ·tgkill(SB),NOSPLIT|NOFRAME,$0-24
+ MOV tgid+0(FP), A0
+ MOV tid+8(FP), A1
+ MOV sig+16(FP), A2
+ MOV $SYS_tgkill, A7
+ ECALL
+ RET
+
+// func setitimer(mode int32, new, old *itimerval)
+TEXT runtime·setitimer(SB),NOSPLIT|NOFRAME,$0-24
+ MOVW mode+0(FP), A0
+ MOV new+8(FP), A1
+ MOV old+16(FP), A2
+ MOV $SYS_setitimer, A7
+ ECALL
+ RET
+
+// func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
+TEXT runtime·mincore(SB),NOSPLIT|NOFRAME,$0-28
+ MOV addr+0(FP), A0
+ MOV n+8(FP), A1
+ MOV dst+16(FP), A2
+ MOV $SYS_mincore, A7
+ ECALL
+ MOVW A0, ret+24(FP)
+ RET
+
+// func walltime1() (sec int64, nsec int32)
+TEXT runtime·walltime1(SB),NOSPLIT,$24-12
+ MOV $0, A0 // CLOCK_REALTIME
+ MOV $8(X2), A1
+ MOV $SYS_clock_gettime, A7
+ ECALL
+ MOV 8(X2), T0 // sec
+ MOV 16(X2), T1 // nsec
+ MOV T0, sec+0(FP)
+ MOVW T1, nsec+8(FP)
+ RET
+
+// func nanotime1() int64
+TEXT runtime·nanotime1(SB),NOSPLIT,$24-8
+ MOV $1, A0 // CLOCK_MONOTONIC
+ MOV $8(X2), A1
+ MOV $SYS_clock_gettime, A7
+ ECALL
+ MOV 8(X2), T0 // sec
+ MOV 16(X2), T1 // nsec
+ // sec is in T0, nsec in T1
+ // return nsec in T0
+ MOV $1000000000, T2
+ MUL T2, T0
+ ADD T1, T0
+ MOV T0, ret+0(FP)
+ RET
+
+// func rtsigprocmask(how int32, new, old *sigset, size int32)
+TEXT runtime·rtsigprocmask(SB),NOSPLIT|NOFRAME,$0-28
+ MOVW how+0(FP), A0
+ MOV new+8(FP), A1
+ MOV old+16(FP), A2
+ MOVW size+24(FP), A3
+ MOV $SYS_rt_sigprocmask, A7
+ ECALL
+ MOV $-4096, T0
+ BLTU A0, T0, 2(PC)
+ WORD $0 // crash
+ RET
+
+// func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32
+TEXT runtime·rt_sigaction(SB),NOSPLIT|NOFRAME,$0-36
+ MOV sig+0(FP), A0
+ MOV new+8(FP), A1
+ MOV old+16(FP), A2
+ MOV size+24(FP), A3
+ MOV $SYS_rt_sigaction, A7
+ ECALL
+ MOVW A0, ret+32(FP)
+ RET
+
+// func sigfwd(fn uintptr, sig uint32, info *siginfo, ctx unsafe.Pointer)
+TEXT runtime·sigfwd(SB),NOSPLIT,$0-32
+ MOVW sig+8(FP), A0
+ MOV info+16(FP), A1
+ MOV ctx+24(FP), A2
+ MOV fn+0(FP), T1
+ JALR RA, T1
+ RET
+
+// func sigtramp(signo, ureg, ctxt unsafe.Pointer)
+TEXT runtime·sigtramp(SB),NOSPLIT,$64
+ MOVW A0, 8(X2)
+ MOV A1, 16(X2)
+ MOV A2, 24(X2)
+
+ // this might be called in external code context,
+ // where g is not set.
+ MOVBU runtime·iscgo(SB), A0
+ BEQ A0, ZERO, 2(PC)
+ CALL runtime·load_g(SB)
+
+ MOV $runtime·sigtrampgo(SB), A0
+ JALR RA, A0
+ RET
+
+// func cgoSigtramp()
+TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0
+ MOV $runtime·sigtramp(SB), T1
+ JALR ZERO, T1
+
+// func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (p unsafe.Pointer, err int)
+TEXT runtime·mmap(SB),NOSPLIT|NOFRAME,$0
+ MOV addr+0(FP), A0
+ MOV n+8(FP), A1
+ MOVW prot+16(FP), A2
+ MOVW flags+20(FP), A3
+ MOVW fd+24(FP), A4
+ MOVW off+28(FP), A5
+ MOV $SYS_mmap, A7
+ ECALL
+ MOV $-4096, T0
+ BGEU T0, A0, 5(PC)
+ SUB A0, ZERO, A0
+ MOV ZERO, p+32(FP)
+ MOV A0, err+40(FP)
+ RET
+ok:
+ MOV A0, p+32(FP)
+ MOV ZERO, err+40(FP)
+ RET
+
+// func munmap(addr unsafe.Pointer, n uintptr)
+TEXT runtime·munmap(SB),NOSPLIT|NOFRAME,$0
+ MOV addr+0(FP), A0
+ MOV n+8(FP), A1
+ MOV $SYS_munmap, A7
+ ECALL
+ MOV $-4096, T0
+ BLTU A0, T0, 2(PC)
+ WORD $0 // crash
+ RET
+
+// func madvise(addr unsafe.Pointer, n uintptr, flags int32)
+TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
+ MOV addr+0(FP), A0
+ MOV n+8(FP), A1
+ MOVW flags+16(FP), A2
+ MOV $SYS_madvise, A7
+ ECALL
+ MOVW A0, ret+24(FP)
+ RET
+
+// func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32
+TEXT runtime·futex(SB),NOSPLIT|NOFRAME,$0
+ MOV addr+0(FP), A0
+ MOVW op+8(FP), A1
+ MOVW val+12(FP), A2
+ MOV ts+16(FP), A3
+ MOV addr2+24(FP), A4
+ MOVW val3+32(FP), A5
+ MOV $SYS_futex, A7
+ ECALL
+ MOVW A0, ret+40(FP)
+ RET
+
+// func clone(flags int32, stk, mp, gp, fn unsafe.Pointer) int32
+TEXT runtime·clone(SB),NOSPLIT|NOFRAME,$0
+ MOVW flags+0(FP), A0
+ MOV stk+8(FP), A1
+
+ // Copy mp, gp, fn off parent stack for use by child.
+ MOV mp+16(FP), T0
+ MOV gp+24(FP), T1
+ MOV fn+32(FP), T2
+
+ MOV T0, -8(A1)
+ MOV T1, -16(A1)
+ MOV T2, -24(A1)
+ MOV $1234, T0
+ MOV T0, -32(A1)
+
+ MOV $SYS_clone, A7
+ ECALL
+
+ // In parent, return.
+ BEQ ZERO, A0, child
+ MOVW ZERO, ret+40(FP)
+ RET
+
+child:
+ // In child, on new stack.
+ MOV -32(X2), T0
+ MOV $1234, A0
+ BEQ A0, T0, good
+ WORD $0 // crash
+
+good:
+ // Initialize m->procid to Linux tid
+ MOV $SYS_gettid, A7
+ ECALL
+
+ MOV -24(X2), T2 // fn
+ MOV -16(X2), T1 // g
+ MOV -8(X2), T0 // m
+
+ BEQ ZERO, T0, nog
+ BEQ ZERO, T1, nog
+
+ MOV A0, m_procid(T0)
+
+ // In child, set up new stack
+ MOV T0, g_m(T1)
+ MOV T1, g
+
+nog:
+ // Call fn
+ JALR RA, T2
+
+ // It shouldn't return. If it does, exit this thread.
+ MOV $111, A0
+ MOV $SYS_exit, A7
+ ECALL
+ JMP -3(PC) // keep exiting
+
+// func sigaltstack(new, old *stackt)
+TEXT runtime·sigaltstack(SB),NOSPLIT|NOFRAME,$0
+ MOV new+0(FP), A0
+ MOV old+8(FP), A1
+ MOV $SYS_sigaltstack, A7
+ ECALL
+ MOV $-4096, T0
+ BLTU A0, T0, 2(PC)
+ WORD $0 // crash
+ RET
+
+// func osyield()
+TEXT runtime·osyield(SB),NOSPLIT|NOFRAME,$0
+ MOV $SYS_sched_yield, A7
+ ECALL
+ RET
+
+// func sched_getaffinity(pid, len uintptr, buf *uintptr) int32
+TEXT runtime·sched_getaffinity(SB),NOSPLIT|NOFRAME,$0
+ MOV pid+0(FP), A0
+ MOV len+8(FP), A1
+ MOV buf+16(FP), A2
+ MOV $SYS_sched_getaffinity, A7
+ ECALL
+ MOV A0, ret+24(FP)
+ RET
+
+// func epollcreate(size int32) int32
+TEXT runtime·epollcreate(SB),NOSPLIT|NOFRAME,$0
+ MOV $0, A0
+ MOV $SYS_epoll_create1, A7
+ ECALL
+ MOVW A0, ret+8(FP)
+ RET
+
+// func epollcreate1(flags int32) int32
+TEXT runtime·epollcreate1(SB),NOSPLIT|NOFRAME,$0
+ MOVW flags+0(FP), A0
+ MOV $SYS_epoll_create1, A7
+ ECALL
+ MOVW A0, ret+8(FP)
+ RET
+
+// func epollctl(epfd, op, fd int32, ev *epollevent) int32
+TEXT runtime·epollctl(SB),NOSPLIT|NOFRAME,$0
+ MOVW epfd+0(FP), A0
+ MOVW op+4(FP), A1
+ MOVW fd+8(FP), A2
+ MOV ev+16(FP), A3
+ MOV $SYS_epoll_ctl, A7
+ ECALL
+ MOVW A0, ret+24(FP)
+ RET
+
+// func epollwait(epfd int32, ev *epollevent, nev, timeout int32) int32
+TEXT runtime·epollwait(SB),NOSPLIT|NOFRAME,$0
+ MOVW epfd+0(FP), A0
+ MOV ev+8(FP), A1
+ MOVW nev+16(FP), A2
+ MOVW timeout+20(FP), A3
+ MOV $0, A4
+ MOV $SYS_epoll_pwait, A7
+ ECALL
+ MOVW A0, ret+24(FP)
+ RET
+
+// func closeonexec(int32)
+TEXT runtime·closeonexec(SB),NOSPLIT|NOFRAME,$0
+ MOVW fd+0(FP), A0 // fd
+ MOV $2, A1 // F_SETFD
+ MOV $1, A2 // FD_CLOEXEC
+ MOV $SYS_fcntl, A7
+ ECALL
+ RET
+
+// func runtime·setNonblock(int32 fd)
+TEXT runtime·setNonblock(SB),NOSPLIT|NOFRAME,$0-4
+ MOVW fd+0(FP), A0 // fd
+ MOV $3, A1 // F_GETFL
+ MOV $0, A2
+ MOV $SYS_fcntl, A7
+ ECALL
+ MOV $0x800, A2 // O_NONBLOCK
+ OR A0, A2
+ MOVW fd+0(FP), A0 // fd
+ MOV $4, A1 // F_SETFL
+ MOV $SYS_fcntl, A7
+ ECALL
+ RET
+
+// func sbrk0() uintptr
+TEXT runtime·sbrk0(SB),NOSPLIT,$0-8
+ // Implemented as brk(NULL).
+ MOV $0, A0
+ MOV $SYS_brk, A7
+ ECALL
+ MOVW A0, ret+0(FP)
+ RET
diff --git a/src/runtime/sys_linux_s390x.s b/src/runtime/sys_linux_s390x.s
index 58b36df..c15a1d5 100644
--- a/src/runtime/sys_linux_s390x.s
+++ b/src/runtime/sys_linux_s390x.s
@@ -16,6 +16,7 @@
#define SYS_close 6
#define SYS_getpid 20
#define SYS_kill 37
+#define SYS_pipe 42
#define SYS_brk 45
#define SYS_fcntl 55
#define SYS_mmap 90
@@ -39,6 +40,7 @@
#define SYS_epoll_ctl 250
#define SYS_epoll_wait 251
#define SYS_clock_gettime 260
+#define SYS_pipe2 325
#define SYS_epoll_create1 327
TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0-4
@@ -80,15 +82,12 @@
MOVW R2, ret+8(FP)
RET
-TEXT runtime·write(SB),NOSPLIT|NOFRAME,$0-28
+TEXT runtime·write1(SB),NOSPLIT|NOFRAME,$0-28
MOVD fd+0(FP), R2
MOVD p+8(FP), R3
MOVW n+16(FP), R4
MOVW $SYS_write, R1
SYSCALL
- MOVD $-4095, R3
- CMPUBLT R2, R3, 2(PC)
- MOVW $-1, R2
MOVW R2, ret+24(FP)
RET
@@ -98,12 +97,26 @@
MOVW n+16(FP), R4
MOVW $SYS_read, R1
SYSCALL
- MOVD $-4095, R3
- CMPUBLT R2, R3, 2(PC)
- MOVW $-1, R2
MOVW R2, ret+24(FP)
RET
+// func pipe() (r, w int32, errno int32)
+TEXT runtime·pipe(SB),NOSPLIT|NOFRAME,$0-12
+ MOVD $r+0(FP), R2
+ MOVW $SYS_pipe, R1
+ SYSCALL
+ MOVW R2, errno+8(FP)
+ RET
+
+// func pipe2() (r, w int32, errno int32)
+TEXT runtime·pipe2(SB),NOSPLIT|NOFRAME,$0-20
+ MOVD $r+8(FP), R2
+ MOVW flags+0(FP), R3
+ MOVW $SYS_pipe2, R1
+ SYSCALL
+ MOVW R2, errno+16(FP)
+ RET
+
TEXT runtime·usleep(SB),NOSPLIT,$16-4
MOVW usec+0(FP), R2
MOVD R2, R4
@@ -150,6 +163,20 @@
SYSCALL
RET
+TEXT ·getpid(SB),NOSPLIT|NOFRAME,$0-8
+ MOVW $SYS_getpid, R1
+ SYSCALL
+ MOVD R2, ret+0(FP)
+ RET
+
+TEXT ·tgkill(SB),NOSPLIT|NOFRAME,$0-24
+ MOVD tgid+0(FP), R2
+ MOVD tid+8(FP), R3
+ MOVD sig+16(FP), R4
+ MOVW $SYS_tgkill, R1
+ SYSCALL
+ RET
+
TEXT runtime·setitimer(SB),NOSPLIT|NOFRAME,$0-24
MOVW mode+0(FP), R2
MOVD new+8(FP), R3
@@ -167,8 +194,8 @@
MOVW R2, ret+24(FP)
RET
-// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB),NOSPLIT,$16
+// func walltime1() (sec int64, nsec int32)
+TEXT runtime·walltime1(SB),NOSPLIT,$16
MOVW $0, R2 // CLOCK_REALTIME
MOVD $tp-16(SP), R3
MOVW $SYS_clock_gettime, R1
@@ -179,7 +206,7 @@
MOVW R3, nsec+8(FP)
RET
-TEXT runtime·nanotime(SB),NOSPLIT,$16
+TEXT runtime·nanotime1(SB),NOSPLIT,$16
MOVW $1, R2 // CLOCK_MONOTONIC
MOVD $tp-16(SP), R3
MOVW $SYS_clock_gettime, R1
@@ -441,6 +468,21 @@
SYSCALL
RET
+// func runtime·setNonblock(int32 fd)
+TEXT runtime·setNonblock(SB),NOSPLIT|NOFRAME,$0-4
+ MOVW fd+0(FP), R2 // fd
+ MOVD $3, R3 // F_GETFL
+ XOR R4, R4
+ MOVW $SYS_fcntl, R1
+ SYSCALL
+ MOVD $0x800, R4 // O_NONBLOCK
+ OR R2, R4
+ MOVW fd+0(FP), R2 // fd
+ MOVD $4, R3 // F_SETFL
+ MOVW $SYS_fcntl, R1
+ SYSCALL
+ RET
+
// func sbrk0() uintptr
TEXT runtime·sbrk0(SB),NOSPLIT|NOFRAME,$0-8
// Implemented as brk(NULL).
diff --git a/src/runtime/sys_nacl_386.s b/src/runtime/sys_nacl_386.s
deleted file mode 100644
index 8460aab..0000000
--- a/src/runtime/sys_nacl_386.s
+++ /dev/null
@@ -1,374 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "go_asm.h"
-#include "go_tls.h"
-#include "textflag.h"
-#include "syscall_nacl.h"
-
-#define NACL_SYSCALL(code) \
- MOVL $(0x10000 + ((code)<<5)), AX; CALL AX
-
-TEXT runtime·exit(SB),NOSPLIT,$4
- MOVL code+0(FP), AX
- MOVL AX, 0(SP)
- NACL_SYSCALL(SYS_exit)
- JMP 0(PC)
-
-// func exitThread(wait *uint32)
-TEXT runtime·exitThread(SB),NOSPLIT,$4-4
- MOVL wait+0(FP), AX
- // SYS_thread_exit will clear *wait when the stack is free.
- MOVL AX, 0(SP)
- NACL_SYSCALL(SYS_thread_exit)
- JMP 0(PC)
-
-TEXT runtime·open(SB),NOSPLIT,$12
- MOVL name+0(FP), AX
- MOVL AX, 0(SP)
- MOVL mode+4(FP), AX
- MOVL AX, 4(SP)
- MOVL perm+8(FP), AX
- MOVL AX, 8(SP)
- NACL_SYSCALL(SYS_open)
- MOVL AX, ret+12(FP)
- RET
-
-TEXT runtime·closefd(SB),NOSPLIT,$4
- MOVL fd+0(FP), AX
- MOVL AX, 0(SP)
- NACL_SYSCALL(SYS_close)
- MOVL AX, ret+4(FP)
- RET
-
-TEXT runtime·read(SB),NOSPLIT,$12
- MOVL fd+0(FP), AX
- MOVL AX, 0(SP)
- MOVL p+4(FP), AX
- MOVL AX, 4(SP)
- MOVL n+8(FP), AX
- MOVL AX, 8(SP)
- NACL_SYSCALL(SYS_read)
- MOVL AX, ret+12(FP)
- RET
-
-TEXT syscall·naclWrite(SB), NOSPLIT, $16-16
- MOVL arg1+0(FP), DI
- MOVL arg2+4(FP), SI
- MOVL arg3+8(FP), DX
- MOVL DI, 0(SP)
- MOVL SI, 4(SP)
- MOVL DX, 8(SP)
- CALL runtime·write(SB)
- MOVL AX, ret+16(FP)
- RET
-
-TEXT runtime·write(SB),NOSPLIT,$12
- MOVL fd+0(FP), AX
- MOVL AX, 0(SP)
- MOVL p+4(FP), AX
- MOVL AX, 4(SP)
- MOVL n+8(FP), AX
- MOVL AX, 8(SP)
- NACL_SYSCALL(SYS_write)
- MOVL AX, ret+12(FP)
- RET
-
-TEXT runtime·nacl_exception_stack(SB),NOSPLIT,$8
- MOVL p+0(FP), AX
- MOVL AX, 0(SP)
- MOVL size+4(FP), AX
- MOVL AX, 4(SP)
- NACL_SYSCALL(SYS_exception_stack)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·nacl_exception_handler(SB),NOSPLIT,$8
- MOVL fn+0(FP), AX
- MOVL AX, 0(SP)
- MOVL arg+4(FP), AX
- MOVL AX, 4(SP)
- NACL_SYSCALL(SYS_exception_handler)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·nacl_sem_create(SB),NOSPLIT,$4
- MOVL flag+0(FP), AX
- MOVL AX, 0(SP)
- NACL_SYSCALL(SYS_sem_create)
- MOVL AX, ret+4(FP)
- RET
-
-TEXT runtime·nacl_sem_wait(SB),NOSPLIT,$4
- MOVL sem+0(FP), AX
- MOVL AX, 0(SP)
- NACL_SYSCALL(SYS_sem_wait)
- MOVL AX, ret+4(FP)
- RET
-
-TEXT runtime·nacl_sem_post(SB),NOSPLIT,$4
- MOVL sem+0(FP), AX
- MOVL AX, 0(SP)
- NACL_SYSCALL(SYS_sem_post)
- MOVL AX, ret+4(FP)
- RET
-
-TEXT runtime·nacl_mutex_create(SB),NOSPLIT,$4
- MOVL flag+0(FP), AX
- MOVL AX, 0(SP)
- NACL_SYSCALL(SYS_mutex_create)
- MOVL AX, ret+4(FP)
- RET
-
-TEXT runtime·nacl_mutex_lock(SB),NOSPLIT,$4
- MOVL mutex+0(FP), AX
- MOVL AX, 0(SP)
- NACL_SYSCALL(SYS_mutex_lock)
- MOVL AX, ret+4(FP)
- RET
-
-TEXT runtime·nacl_mutex_trylock(SB),NOSPLIT,$4
- MOVL mutex+0(FP), AX
- MOVL AX, 0(SP)
- NACL_SYSCALL(SYS_mutex_trylock)
- MOVL AX, ret+4(FP)
- RET
-
-TEXT runtime·nacl_mutex_unlock(SB),NOSPLIT,$4
- MOVL mutex+0(FP), AX
- MOVL AX, 0(SP)
- NACL_SYSCALL(SYS_mutex_unlock)
- MOVL AX, ret+4(FP)
- RET
-
-TEXT runtime·nacl_cond_create(SB),NOSPLIT,$4
- MOVL flag+0(FP), AX
- MOVL AX, 0(SP)
- NACL_SYSCALL(SYS_cond_create)
- MOVL AX, ret+4(FP)
- RET
-
-TEXT runtime·nacl_cond_wait(SB),NOSPLIT,$8
- MOVL cond+0(FP), AX
- MOVL AX, 0(SP)
- MOVL n+4(FP), AX
- MOVL AX, 4(SP)
- NACL_SYSCALL(SYS_cond_wait)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·nacl_cond_signal(SB),NOSPLIT,$4
- MOVL cond+0(FP), AX
- MOVL AX, 0(SP)
- NACL_SYSCALL(SYS_cond_signal)
- MOVL AX, ret+4(FP)
- RET
-
-TEXT runtime·nacl_cond_broadcast(SB),NOSPLIT,$4
- MOVL cond+0(FP), AX
- MOVL AX, 0(SP)
- NACL_SYSCALL(SYS_cond_broadcast)
- MOVL AX, ret+4(FP)
- RET
-
-TEXT runtime·nacl_cond_timed_wait_abs(SB),NOSPLIT,$12
- MOVL cond+0(FP), AX
- MOVL AX, 0(SP)
- MOVL lock+4(FP), AX
- MOVL AX, 4(SP)
- MOVL ts+8(FP), AX
- MOVL AX, 8(SP)
- NACL_SYSCALL(SYS_cond_timed_wait_abs)
- MOVL AX, ret+12(FP)
- RET
-
-TEXT runtime·nacl_thread_create(SB),NOSPLIT,$16
- MOVL fn+0(FP), AX
- MOVL AX, 0(SP)
- MOVL stk+4(FP), AX
- MOVL AX, 4(SP)
- MOVL tls+8(FP), AX
- MOVL AX, 8(SP)
- MOVL xx+12(FP), AX
- MOVL AX, 12(SP)
- NACL_SYSCALL(SYS_thread_create)
- MOVL AX, ret+16(FP)
- RET
-
-TEXT runtime·mstart_nacl(SB),NOSPLIT,$0
- JMP runtime·mstart(SB)
-
-TEXT runtime·nacl_nanosleep(SB),NOSPLIT,$8
- MOVL ts+0(FP), AX
- MOVL AX, 0(SP)
- MOVL extra+4(FP), AX
- MOVL AX, 4(SP)
- NACL_SYSCALL(SYS_nanosleep)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·osyield(SB),NOSPLIT,$0
- NACL_SYSCALL(SYS_sched_yield)
- RET
-
-TEXT runtime·mmap(SB),NOSPLIT,$32
- MOVL addr+0(FP), AX
- MOVL AX, 0(SP)
- MOVL n+4(FP), AX
- MOVL AX, 4(SP)
- MOVL prot+8(FP), AX
- MOVL AX, 8(SP)
- MOVL flags+12(FP), AX
- MOVL AX, 12(SP)
- MOVL fd+16(FP), AX
- MOVL AX, 16(SP)
- MOVL off+20(FP), AX
- MOVL AX, 24(SP)
- MOVL $0, 28(SP)
- LEAL 24(SP), AX
- MOVL AX, 20(SP)
- NACL_SYSCALL(SYS_mmap)
- CMPL AX, $-4095
- JNA ok
- NEGL AX
- MOVL $0, p+24(FP)
- MOVL AX, err+28(FP)
- RET
-ok:
- MOVL AX, p+24(FP)
- MOVL $0, err+28(FP)
- RET
-
-TEXT runtime·walltime(SB),NOSPLIT,$20
- MOVL $0, 0(SP) // real time clock
- LEAL 8(SP), AX
- MOVL AX, 4(SP) // timespec
- NACL_SYSCALL(SYS_clock_gettime)
- MOVL 8(SP), AX // low 32 sec
- MOVL 12(SP), CX // high 32 sec
- MOVL 16(SP), BX // nsec
-
- // sec is in AX, nsec in BX
- MOVL AX, sec_lo+0(FP)
- MOVL CX, sec_hi+4(FP)
- MOVL BX, nsec+8(FP)
- RET
-
-TEXT syscall·now(SB),NOSPLIT,$0
- JMP runtime·walltime(SB)
-
-TEXT runtime·nanotime(SB),NOSPLIT,$20
- MOVL $0, 0(SP) // real time clock
- LEAL 8(SP), AX
- MOVL AX, 4(SP) // timespec
- NACL_SYSCALL(SYS_clock_gettime)
- MOVL 8(SP), AX // low 32 sec
- MOVL 16(SP), BX // nsec
-
- // sec is in AX, nsec in BX
- // convert to DX:AX nsec
- MOVL $1000000000, CX
- MULL CX
- ADDL BX, AX
- ADCL $0, DX
-
- MOVL AX, ret_lo+0(FP)
- MOVL DX, ret_hi+4(FP)
- RET
-
-TEXT runtime·setldt(SB),NOSPLIT,$8
- MOVL base+4(FP), BX
- ADDL $0x8, BX
- MOVL BX, 0(SP)
- NACL_SYSCALL(SYS_tls_init)
- RET
-
-TEXT runtime·sigtramp(SB),NOSPLIT,$0
- get_tls(CX)
-
- // check that g exists
- MOVL g(CX), DI
- CMPL DI, $0
- JNE 6(PC)
- MOVL $11, BX
- MOVL $0, 0(SP)
- MOVL $runtime·badsignal(SB), AX
- CALL AX
- JMP ret
-
- // save g
- NOP SP // tell vet SP changed - stop checking offsets
- MOVL DI, 20(SP)
-
- // g = m->gsignal
- MOVL g_m(DI), BX
- MOVL m_gsignal(BX), BX
- MOVL BX, g(CX)
-
- // copy arguments for sighandler
- MOVL $11, 0(SP) // signal
- MOVL $0, 4(SP) // siginfo
- LEAL 8(SP), AX
- MOVL AX, 8(SP) // context
- MOVL DI, 12(SP) // g
-
- CALL runtime·sighandler(SB)
-
- // restore g
- get_tls(CX)
- MOVL 20(SP), BX
- MOVL BX, g(CX)
-
-ret:
- // Enable exceptions again.
- NACL_SYSCALL(SYS_exception_clear_flag)
-
- // NaCl has abdicated its traditional operating system responsibility
- // and declined to implement 'sigreturn'. Instead the only way to return
- // to the execution of our program is to restore the registers ourselves.
- // Unfortunately, that is impossible to do with strict fidelity, because
- // there is no way to do the final update of PC that ends the sequence
- // without either (1) jumping to a register, in which case the register ends
- // holding the PC value instead of its intended value or (2) storing the PC
- // on the stack and using RET, which imposes the requirement that SP is
- // valid and that is okay to smash the word below it. The second would
- // normally be the lesser of the two evils, except that on NaCl, the linker
- // must rewrite RET into "POP reg; AND $~31, reg; JMP reg", so either way
- // we are going to lose a register as a result of the incoming signal.
- // Similarly, there is no way to restore EFLAGS; the usual way is to use
- // POPFL, but NaCl rejects that instruction. We could inspect the bits and
- // execute a sequence of instructions designed to recreate those flag
- // settings, but that's a lot of work.
- //
- // Thankfully, Go's signal handlers never try to return directly to the
- // executing code, so all the registers and EFLAGS are dead and can be
- // smashed. The only registers that matter are the ones that are setting
- // up for the simulated call that the signal handler has created.
- // Today those registers are just PC and SP, but in case additional registers
- // are relevant in the future (for example DX is the Go func context register)
- // we restore as many registers as possible.
- //
- // We smash BP, because that's what the linker smashes during RET.
- //
- LEAL 72(SP), BP
- MOVL 0(BP), AX
- MOVL 4(BP), CX
- MOVL 8(BP), DX
- MOVL 12(BP), BX
- MOVL 16(BP), SP
- // 20(BP) is saved BP, never to be seen again
- MOVL 24(BP), SI
- MOVL 28(BP), DI
- // 36(BP) is saved EFLAGS, never to be seen again
- MOVL 32(BP), BP // saved PC
- JMP BP
-
-// func getRandomData([]byte)
-TEXT runtime·getRandomData(SB),NOSPLIT,$8-12
- MOVL arg_base+0(FP), AX
- MOVL AX, 0(SP)
- MOVL arg_len+4(FP), AX
- MOVL AX, 4(SP)
- NACL_SYSCALL(SYS_get_random_bytes)
- RET
diff --git a/src/runtime/sys_nacl_amd64p32.s b/src/runtime/sys_nacl_amd64p32.s
deleted file mode 100644
index 9f4f69c..0000000
--- a/src/runtime/sys_nacl_amd64p32.s
+++ /dev/null
@@ -1,482 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "go_asm.h"
-#include "go_tls.h"
-#include "textflag.h"
-#include "syscall_nacl.h"
-
-#define NACL_SYSCALL(code) \
- MOVL $(0x10000 + ((code)<<5)), AX; CALL AX
-
-TEXT runtime·settls(SB),NOSPLIT,$0
- MOVL DI, TLS // really BP
- RET
-
-TEXT runtime·exit(SB),NOSPLIT,$0
- MOVL code+0(FP), DI
- NACL_SYSCALL(SYS_exit)
- RET
-
-// func exitThread(wait *uint32)
-TEXT runtime·exitThread(SB),NOSPLIT,$0-4
- MOVL wait+0(FP), DI
- // SYS_thread_exit will clear *wait when the stack is free.
- NACL_SYSCALL(SYS_thread_exit)
- JMP 0(PC)
-
-TEXT runtime·open(SB),NOSPLIT,$0
- MOVL name+0(FP), DI
- MOVL mode+4(FP), SI
- MOVL perm+8(FP), DX
- NACL_SYSCALL(SYS_open)
- MOVL AX, ret+16(FP)
- RET
-
-TEXT runtime·closefd(SB),NOSPLIT,$0
- MOVL fd+0(FP), DI
- NACL_SYSCALL(SYS_close)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·read(SB),NOSPLIT,$0
- MOVL fd+0(FP), DI
- MOVL p+4(FP), SI
- MOVL n+8(FP), DX
- NACL_SYSCALL(SYS_read)
- MOVL AX, ret+16(FP)
- RET
-
-TEXT syscall·naclWrite(SB), NOSPLIT, $24-20
- MOVL arg1+0(FP), DI
- MOVL arg2+4(FP), SI
- MOVL arg3+8(FP), DX
- MOVL DI, 0(SP)
- MOVL SI, 4(SP)
- MOVL DX, 8(SP)
- CALL runtime·write(SB)
- MOVL 16(SP), AX
- MOVL AX, ret+16(FP)
- RET
-
-TEXT runtime·write(SB),NOSPLIT,$16-20
- // If using fake time and writing to stdout or stderr,
- // emit playback header before actual data.
- MOVQ runtime·faketime(SB), AX
- CMPQ AX, $0
- JEQ write
- MOVL fd+0(FP), DI
- CMPL DI, $1
- JEQ playback
- CMPL DI, $2
- JEQ playback
-
-write:
- // Ordinary write.
- MOVL fd+0(FP), DI
- MOVL p+4(FP), SI
- MOVL n+8(FP), DX
- NACL_SYSCALL(SYS_write)
- MOVL AX, ret+16(FP)
- RET
-
- // Write with playback header.
- // First, lock to avoid interleaving writes.
-playback:
- MOVL $1, BX
- XCHGL runtime·writelock(SB), BX
- CMPL BX, $0
- JNE playback
-
- MOVQ runtime·lastfaketime(SB), CX
- MOVL runtime·lastfaketimefd(SB), BX
- CMPL DI, BX
- JE samefd
-
- // If the current fd doesn't match the fd of the previous write,
- // ensure that the timestamp is strictly greater. That way, we can
- // recover the original order even if we read the fds separately.
- INCQ CX
- MOVL DI, runtime·lastfaketimefd(SB)
-
-samefd:
- CMPQ AX, CX
- CMOVQLT CX, AX
- MOVQ AX, runtime·lastfaketime(SB)
-
- // Playback header: 0 0 P B <8-byte time> <4-byte data length>
- MOVL $(('B'<<24) | ('P'<<16)), 0(SP)
- BSWAPQ AX
- MOVQ AX, 4(SP)
- MOVL n+8(FP), DX
- BSWAPL DX
- MOVL DX, 12(SP)
- MOVL fd+0(FP), DI
- MOVL SP, SI
- MOVL $16, DX
- NACL_SYSCALL(SYS_write)
-
- // Write actual data.
- MOVL fd+0(FP), DI
- MOVL p+4(FP), SI
- MOVL n+8(FP), DX
- NACL_SYSCALL(SYS_write)
-
- // Unlock.
- MOVL $0, runtime·writelock(SB)
-
- MOVL AX, ret+16(FP)
- RET
-
-TEXT runtime·nacl_exception_stack(SB),NOSPLIT,$0
- MOVL p+0(FP), DI
- MOVL size+4(FP), SI
- NACL_SYSCALL(SYS_exception_stack)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·nacl_exception_handler(SB),NOSPLIT,$0
- MOVL fn+0(FP), DI
- MOVL arg+4(FP), SI
- NACL_SYSCALL(SYS_exception_handler)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·nacl_sem_create(SB),NOSPLIT,$0
- MOVL flag+0(FP), DI
- NACL_SYSCALL(SYS_sem_create)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·nacl_sem_wait(SB),NOSPLIT,$0
- MOVL sem+0(FP), DI
- NACL_SYSCALL(SYS_sem_wait)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·nacl_sem_post(SB),NOSPLIT,$0
- MOVL sem+0(FP), DI
- NACL_SYSCALL(SYS_sem_post)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·nacl_mutex_create(SB),NOSPLIT,$0
- MOVL flag+0(FP), DI
- NACL_SYSCALL(SYS_mutex_create)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·nacl_mutex_lock(SB),NOSPLIT,$0
- MOVL mutex+0(FP), DI
- NACL_SYSCALL(SYS_mutex_lock)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·nacl_mutex_trylock(SB),NOSPLIT,$0
- MOVL mutex+0(FP), DI
- NACL_SYSCALL(SYS_mutex_trylock)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·nacl_mutex_unlock(SB),NOSPLIT,$0
- MOVL mutex+0(FP), DI
- NACL_SYSCALL(SYS_mutex_unlock)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·nacl_cond_create(SB),NOSPLIT,$0
- MOVL flag+0(FP), DI
- NACL_SYSCALL(SYS_cond_create)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·nacl_cond_wait(SB),NOSPLIT,$0
- MOVL cond+0(FP), DI
- MOVL n+4(FP), SI
- NACL_SYSCALL(SYS_cond_wait)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·nacl_cond_signal(SB),NOSPLIT,$0
- MOVL cond+0(FP), DI
- NACL_SYSCALL(SYS_cond_signal)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·nacl_cond_broadcast(SB),NOSPLIT,$0
- MOVL cond+0(FP), DI
- NACL_SYSCALL(SYS_cond_broadcast)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·nacl_cond_timed_wait_abs(SB),NOSPLIT,$0
- MOVL cond+0(FP), DI
- MOVL lock+4(FP), SI
- MOVL ts+8(FP), DX
- NACL_SYSCALL(SYS_cond_timed_wait_abs)
- MOVL AX, ret+16(FP)
- RET
-
-TEXT runtime·nacl_thread_create(SB),NOSPLIT,$0
- MOVL fn+0(FP), DI
- MOVL stk+4(FP), SI
- MOVL tls+8(FP), DX
- MOVL xx+12(FP), CX
- NACL_SYSCALL(SYS_thread_create)
- MOVL AX, ret+16(FP)
- RET
-
-TEXT runtime·mstart_nacl(SB),NOSPLIT,$0
- NACL_SYSCALL(SYS_tls_get)
- SUBL $8, AX
- MOVL AX, TLS
- JMP runtime·mstart(SB)
-
-TEXT runtime·nacl_nanosleep(SB),NOSPLIT,$0
- MOVL ts+0(FP), DI
- MOVL extra+4(FP), SI
- NACL_SYSCALL(SYS_nanosleep)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·osyield(SB),NOSPLIT,$0
- NACL_SYSCALL(SYS_sched_yield)
- RET
-
-TEXT runtime·mmap(SB),NOSPLIT,$8
- MOVL addr+0(FP), DI
- MOVL n+4(FP), SI
- MOVL prot+8(FP), DX
- MOVL flags+12(FP), CX
- MOVL fd+16(FP), R8
- MOVL off+20(FP), AX
- MOVQ AX, 0(SP)
- MOVL SP, R9
- NACL_SYSCALL(SYS_mmap)
- CMPL AX, $-4095
- JNA ok
- NEGL AX
- MOVL $0, p+24(FP)
- MOVL AX, err+28(FP)
- RET
-ok:
- MOVL AX, p+24(FP)
- MOVL $0, err+28(FP)
- RET
-
-TEXT runtime·walltime(SB),NOSPLIT,$16
- MOVQ runtime·faketime(SB), AX
- CMPQ AX, $0
- JEQ realtime
- MOVQ $0, DX
- MOVQ $1000000000, CX
- DIVQ CX
- MOVQ AX, sec+0(FP)
- MOVL DX, nsec+8(FP)
- RET
-realtime:
- MOVL $0, DI // real time clock
- LEAL 0(SP), AX
- MOVL AX, SI // timespec
- NACL_SYSCALL(SYS_clock_gettime)
- MOVL 0(SP), AX // low 32 sec
- MOVL 4(SP), CX // high 32 sec
- MOVL 8(SP), BX // nsec
-
- // sec is in AX, nsec in BX
- MOVL AX, sec_lo+0(FP)
- MOVL CX, sec_hi+4(FP)
- MOVL BX, nsec+8(FP)
- RET
-
-TEXT syscall·now(SB),NOSPLIT,$0
- JMP runtime·walltime(SB)
-
-TEXT runtime·nanotime(SB),NOSPLIT,$16
- MOVQ runtime·faketime(SB), AX
- CMPQ AX, $0
- JEQ 3(PC)
- MOVQ AX, ret+0(FP)
- RET
- MOVL $0, DI // real time clock
- LEAL 0(SP), AX
- MOVL AX, SI // timespec
- NACL_SYSCALL(SYS_clock_gettime)
- MOVQ 0(SP), AX // sec
- MOVL 8(SP), DX // nsec
-
- // sec is in AX, nsec in DX
- // return nsec in AX
- IMULQ $1000000000, AX
- ADDQ DX, AX
- MOVQ AX, ret+0(FP)
- RET
-
-TEXT runtime·sigtramp(SB),NOSPLIT,$80
- // restore TLS register at time of execution,
- // in case it's been smashed.
- // the TLS register is really BP, but for consistency
- // with non-NaCl systems it is referred to here as TLS.
- // NOTE: Cannot use SYS_tls_get here (like we do in mstart_nacl),
- // because the main thread never calls tls_set.
- LEAL ctxt+0(FP), AX
- MOVL (16*4+5*8)(AX), AX
- MOVL AX, TLS
-
- // check that g exists
- get_tls(CX)
- MOVL g(CX), DI
-
- CMPL DI, $0
- JEQ nog
-
- // save g
- MOVL DI, 20(SP)
-
- // g = m->gsignal
- MOVL g_m(DI), BX
- MOVL m_gsignal(BX), BX
- MOVL BX, g(CX)
-
-//JMP debughandler
-
- // copy arguments for sighandler
- MOVL $11, 0(SP) // signal
- MOVL $0, 4(SP) // siginfo
- LEAL ctxt+0(FP), AX
- MOVL AX, 8(SP) // context
- MOVL DI, 12(SP) // g
-
- CALL runtime·sighandler(SB)
-
- // restore g
- get_tls(CX)
- MOVL 20(SP), BX
- MOVL BX, g(CX)
-
- // Enable exceptions again.
- NACL_SYSCALL(SYS_exception_clear_flag)
-
- // Restore registers as best we can. Impossible to do perfectly.
- // See comment in sys_nacl_386.s for extended rationale.
- LEAL ctxt+0(FP), SI
- ADDL $64, SI
- MOVQ 0(SI), AX
- MOVQ 8(SI), CX
- MOVQ 16(SI), DX
- MOVQ 24(SI), BX
- MOVL 32(SI), SP // MOVL for SP sandboxing
- // 40(SI) is saved BP aka TLS, already restored above
- // 48(SI) is saved SI, never to be seen again
- MOVQ 56(SI), DI
- MOVQ 64(SI), R8
- MOVQ 72(SI), R9
- MOVQ 80(SI), R10
- MOVQ 88(SI), R11
- MOVQ 96(SI), R12
- MOVQ 104(SI), R13
- MOVQ 112(SI), R14
- // 120(SI) is R15, which is owned by Native Client and must not be modified
- MOVQ 128(SI), SI // saved PC
- // 136(SI) is saved EFLAGS, never to be seen again
- JMP SI
-
-//debughandler:
- //// print basic information
- //LEAL ctxt+0(FP), DI
- //MOVL $runtime·sigtrampf(SB), AX
- //MOVL AX, 0(SP)
- //MOVQ (16*4+16*8)(DI), BX // rip
- //MOVQ BX, 8(SP)
- //MOVQ (16*4+0*8)(DI), BX // rax
- //MOVQ BX, 16(SP)
- //MOVQ (16*4+1*8)(DI), BX // rcx
- //MOVQ BX, 24(SP)
- //MOVQ (16*4+2*8)(DI), BX // rdx
- //MOVQ BX, 32(SP)
- //MOVQ (16*4+3*8)(DI), BX // rbx
- //MOVQ BX, 40(SP)
- //MOVQ (16*4+7*8)(DI), BX // rdi
- //MOVQ BX, 48(SP)
- //MOVQ (16*4+15*8)(DI), BX // r15
- //MOVQ BX, 56(SP)
- //MOVQ (16*4+4*8)(DI), BX // rsp
- //MOVQ 0(BX), BX
- //MOVQ BX, 64(SP)
- //CALL runtime·printf(SB)
- //
- //LEAL ctxt+0(FP), DI
- //MOVQ (16*4+16*8)(DI), BX // rip
- //MOVL BX, 0(SP)
- //MOVQ (16*4+4*8)(DI), BX // rsp
- //MOVL BX, 4(SP)
- //MOVL $0, 8(SP) // lr
- //get_tls(CX)
- //MOVL g(CX), BX
- //MOVL BX, 12(SP) // gp
- //CALL runtime·traceback(SB)
-
-notls:
- MOVL 0, AX
- RET
-
-nog:
- MOVL 0, AX
- RET
-
-// cannot do real signal handling yet, because gsignal has not been allocated.
-MOVL $1, DI; NACL_SYSCALL(SYS_exit)
-
-// func getRandomData([]byte)
-TEXT runtime·getRandomData(SB),NOSPLIT,$0-12
- MOVL arg_base+0(FP), DI
- MOVL arg_len+4(FP), SI
- NACL_SYSCALL(SYS_get_random_bytes)
- RET
-
-TEXT runtime·nacl_sysinfo(SB),NOSPLIT,$16
-/*
- MOVL di+0(FP), DI
- LEAL 12(DI), BX
- MOVL 8(DI), AX
- ADDL 4(DI), AX
- ADDL $2, AX
- LEAL (BX)(AX*4), BX
- MOVL BX, runtime·nacl_irt_query(SB)
-auxloop:
- MOVL 0(BX), DX
- CMPL DX, $0
- JNE 2(PC)
- RET
- CMPL DX, $32
- JEQ auxfound
- ADDL $8, BX
- JMP auxloop
-auxfound:
- MOVL 4(BX), BX
- MOVL BX, runtime·nacl_irt_query(SB)
-
- LEAL runtime·nacl_irt_basic_v0_1_str(SB), DI
- LEAL runtime·nacl_irt_basic_v0_1(SB), SI
- MOVL runtime·nacl_irt_basic_v0_1_size(SB), DX
- MOVL runtime·nacl_irt_query(SB), BX
- CALL BX
-
- LEAL runtime·nacl_irt_memory_v0_3_str(SB), DI
- LEAL runtime·nacl_irt_memory_v0_3(SB), SI
- MOVL runtime·nacl_irt_memory_v0_3_size(SB), DX
- MOVL runtime·nacl_irt_query(SB), BX
- CALL BX
-
- LEAL runtime·nacl_irt_thread_v0_1_str(SB), DI
- LEAL runtime·nacl_irt_thread_v0_1(SB), SI
- MOVL runtime·nacl_irt_thread_v0_1_size(SB), DX
- MOVL runtime·nacl_irt_query(SB), BX
- CALL BX
-
- // TODO: Once we have a NaCl SDK with futex syscall support,
- // try switching to futex syscalls and here load the
- // nacl-irt-futex-0.1 table.
-*/
- RET
diff --git a/src/runtime/sys_nacl_arm.s b/src/runtime/sys_nacl_arm.s
deleted file mode 100644
index 9020168..0000000
--- a/src/runtime/sys_nacl_arm.s
+++ /dev/null
@@ -1,312 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "go_asm.h"
-#include "go_tls.h"
-#include "textflag.h"
-#include "syscall_nacl.h"
-
-#define NACL_SYSCALL(code) \
- MOVW $(0x10000 + ((code)<<5)), R8; BL (R8)
-
-TEXT runtime·exit(SB),NOSPLIT,$0
- MOVW code+0(FP), R0
- NACL_SYSCALL(SYS_exit)
- RET
-
-// func exitThread(wait *uint32)
-TEXT runtime·exitThread(SB),NOSPLIT,$4-4
- MOVW wait+0(FP), R0
- // SYS_thread_exit will clear *wait when the stack is free.
- NACL_SYSCALL(SYS_thread_exit)
- JMP 0(PC)
-
-TEXT runtime·open(SB),NOSPLIT,$0
- MOVW name+0(FP), R0
- MOVW name+0(FP), R1
- MOVW name+0(FP), R2
- NACL_SYSCALL(SYS_open)
- MOVW R0, ret+12(FP)
- RET
-
-TEXT runtime·closefd(SB),NOSPLIT,$0
- MOVW fd+0(FP), R0
- NACL_SYSCALL(SYS_close)
- MOVW R0, ret+4(FP)
- RET
-
-TEXT runtime·read(SB),NOSPLIT,$0
- MOVW fd+0(FP), R0
- MOVW p+4(FP), R1
- MOVW n+8(FP), R2
- NACL_SYSCALL(SYS_read)
- MOVW R0, ret+12(FP)
- RET
-
-// func naclWrite(fd int, b []byte) int
-TEXT syscall·naclWrite(SB),NOSPLIT,$0
- MOVW arg1+0(FP), R0
- MOVW arg2+4(FP), R1
- MOVW arg3+8(FP), R2
- NACL_SYSCALL(SYS_write)
- MOVW R0, ret+16(FP)
- RET
-
-TEXT runtime·write(SB),NOSPLIT,$0
- MOVW fd+0(FP), R0
- MOVW p+4(FP), R1
- MOVW n+8(FP), R2
- NACL_SYSCALL(SYS_write)
- MOVW R0, ret+12(FP)
- RET
-
-TEXT runtime·nacl_exception_stack(SB),NOSPLIT,$0
- MOVW p+0(FP), R0
- MOVW size+4(FP), R1
- NACL_SYSCALL(SYS_exception_stack)
- MOVW R0, ret+8(FP)
- RET
-
-TEXT runtime·nacl_exception_handler(SB),NOSPLIT,$0
- MOVW fn+0(FP), R0
- MOVW arg+4(FP), R1
- NACL_SYSCALL(SYS_exception_handler)
- MOVW R0, ret+8(FP)
- RET
-
-TEXT runtime·nacl_sem_create(SB),NOSPLIT,$0
- MOVW flag+0(FP), R0
- NACL_SYSCALL(SYS_sem_create)
- MOVW R0, ret+4(FP)
- RET
-
-TEXT runtime·nacl_sem_wait(SB),NOSPLIT,$0
- MOVW sem+0(FP), R0
- NACL_SYSCALL(SYS_sem_wait)
- MOVW R0, ret+4(FP)
- RET
-
-TEXT runtime·nacl_sem_post(SB),NOSPLIT,$0
- MOVW sem+0(FP), R0
- NACL_SYSCALL(SYS_sem_post)
- MOVW R0, ret+4(FP)
- RET
-
-TEXT runtime·nacl_mutex_create(SB),NOSPLIT,$0
- MOVW flag+0(FP), R0
- NACL_SYSCALL(SYS_mutex_create)
- MOVW R0, ret+4(FP)
- RET
-
-TEXT runtime·nacl_mutex_lock(SB),NOSPLIT,$0
- MOVW mutex+0(FP), R0
- NACL_SYSCALL(SYS_mutex_lock)
- MOVW R0, ret+4(FP)
- RET
-
-TEXT runtime·nacl_mutex_trylock(SB),NOSPLIT,$0
- MOVW mutex+0(FP), R0
- NACL_SYSCALL(SYS_mutex_trylock)
- MOVW R0, ret+4(FP)
- RET
-
-TEXT runtime·nacl_mutex_unlock(SB),NOSPLIT,$0
- MOVW mutex+0(FP), R0
- NACL_SYSCALL(SYS_mutex_unlock)
- MOVW R0, ret+4(FP)
- RET
-
-TEXT runtime·nacl_cond_create(SB),NOSPLIT,$0
- MOVW flag+0(FP), R0
- NACL_SYSCALL(SYS_cond_create)
- MOVW R0, ret+4(FP)
- RET
-
-TEXT runtime·nacl_cond_wait(SB),NOSPLIT,$0
- MOVW cond+0(FP), R0
- MOVW n+4(FP), R1
- NACL_SYSCALL(SYS_cond_wait)
- MOVW R0, ret+8(FP)
- RET
-
-TEXT runtime·nacl_cond_signal(SB),NOSPLIT,$0
- MOVW cond+0(FP), R0
- NACL_SYSCALL(SYS_cond_signal)
- MOVW R0, ret+4(FP)
- RET
-
-TEXT runtime·nacl_cond_broadcast(SB),NOSPLIT,$0
- MOVW cond+0(FP), R0
- NACL_SYSCALL(SYS_cond_broadcast)
- MOVW R0, ret+4(FP)
- RET
-
-TEXT runtime·nacl_cond_timed_wait_abs(SB),NOSPLIT,$0
- MOVW cond+0(FP), R0
- MOVW lock+4(FP), R1
- MOVW ts+8(FP), R2
- NACL_SYSCALL(SYS_cond_timed_wait_abs)
- MOVW R0, ret+12(FP)
- RET
-
-TEXT runtime·nacl_thread_create(SB),NOSPLIT,$0
- MOVW fn+0(FP), R0
- MOVW stk+4(FP), R1
- MOVW tls+8(FP), R2
- MOVW xx+12(FP), R3
- NACL_SYSCALL(SYS_thread_create)
- MOVW R0, ret+16(FP)
- RET
-
-TEXT runtime·mstart_nacl(SB),NOSPLIT,$0
- MOVW 0(R9), R0 // TLS
- MOVW -8(R0), R1 // g
- MOVW -4(R0), R2 // m
- MOVW R2, g_m(R1)
- MOVW R1, g
- B runtime·mstart(SB)
-
-TEXT runtime·nacl_nanosleep(SB),NOSPLIT,$0
- MOVW ts+0(FP), R0
- MOVW extra+4(FP), R1
- NACL_SYSCALL(SYS_nanosleep)
- MOVW R0, ret+8(FP)
- RET
-
-TEXT runtime·osyield(SB),NOSPLIT,$0
- NACL_SYSCALL(SYS_sched_yield)
- RET
-
-TEXT runtime·mmap(SB),NOSPLIT,$8
- MOVW addr+0(FP), R0
- MOVW n+4(FP), R1
- MOVW prot+8(FP), R2
- MOVW flags+12(FP), R3
- MOVW fd+16(FP), R4
- // arg6:offset should be passed as a pointer (to int64)
- MOVW off+20(FP), R5
- MOVW R5, 4(R13)
- MOVW $0, R6
- MOVW R6, 8(R13)
- MOVW $4(R13), R5
- MOVM.DB.W [R4,R5], (R13) // arg5 and arg6 are passed on stack
- NACL_SYSCALL(SYS_mmap)
- MOVM.IA.W (R13), [R4, R5]
- CMP $-4095, R0
- MOVW $0, R1
- RSB.HI $0, R0
- MOVW.HI R0, R1 // if error, put in R1
- MOVW.HI $0, R0
- MOVW R0, p+24(FP)
- MOVW R1, err+28(FP)
- RET
-
-TEXT runtime·walltime(SB),NOSPLIT,$16
- MOVW $0, R0 // real time clock
- MOVW $4(R13), R1
- NACL_SYSCALL(SYS_clock_gettime)
- MOVW 4(R13), R0 // low 32-bit sec
- MOVW 8(R13), R1 // high 32-bit sec
- MOVW 12(R13), R2 // nsec
- MOVW R0, sec_lo+0(FP)
- MOVW R1, sec_hi+4(FP)
- MOVW R2, nsec+8(FP)
- RET
-
-TEXT syscall·now(SB),NOSPLIT,$0
- B runtime·walltime(SB)
-
-// int64 nanotime(void) so really
-// void nanotime(int64 *nsec)
-TEXT runtime·nanotime(SB),NOSPLIT,$16
- MOVW $0, R0 // real time clock
- MOVW $4(R13), R1
- NACL_SYSCALL(SYS_clock_gettime)
- MOVW 4(R13), R0 // low 32-bit sec
- MOVW 8(R13), R1 // high 32-bit sec (ignored for now)
- MOVW 12(R13), R2 // nsec
- MOVW $1000000000, R3
- MULLU R0, R3, (R1, R0)
- MOVW $0, R4
- ADD.S R2, R0
- ADC R4, R1
- MOVW R0, ret_lo+0(FP)
- MOVW R1, ret_hi+4(FP)
- RET
-
-TEXT runtime·sigtramp(SB),NOSPLIT,$80
- // load g from thread context
- MOVW $ctxt+-4(FP), R0
- MOVW (16*4+10*4)(R0), g
-
- // check that g exists
- CMP $0, g
- BNE 4(PC)
- MOVW $runtime·badsignal2(SB), R11
- BL (R11)
- RET
-
- // save g
- MOVW g, R3
- MOVW g, 20(R13)
-
- // g = m->gsignal
- MOVW g_m(g), R8
- MOVW m_gsignal(R8), g
-
- // copy arguments for call to sighandler
- MOVW $11, R0
- MOVW R0, 4(R13) // signal
- MOVW $0, R0
- MOVW R0, 8(R13) // siginfo
- MOVW $ctxt+-4(FP), R0
- MOVW R0, 12(R13) // context
- MOVW R3, 16(R13) // g
-
- BL runtime·sighandler(SB)
-
- // restore g
- MOVW 20(R13), g
-
- // Enable exceptions again.
- NACL_SYSCALL(SYS_exception_clear_flag)
-
- // Restore registers as best we can. Impossible to do perfectly.
- // See comment in sys_nacl_386.s for extended rationale.
- MOVW $ctxt+-4(FP), R1
- ADD $64, R1
- MOVW (0*4)(R1), R0
- MOVW (2*4)(R1), R2
- MOVW (3*4)(R1), R3
- MOVW (4*4)(R1), R4
- MOVW (5*4)(R1), R5
- MOVW (6*4)(R1), R6
- MOVW (7*4)(R1), R7
- MOVW (8*4)(R1), R8
- // cannot write to R9
- MOVW (10*4)(R1), g
- MOVW (11*4)(R1), R11
- MOVW (12*4)(R1), R12
- MOVW (13*4)(R1), R13
- MOVW (14*4)(R1), R14
- MOVW (15*4)(R1), R1
- B (R1)
-
-nog:
- MOVW $0, R0
- RET
-
-// func getRandomData([]byte)
-TEXT runtime·getRandomData(SB),NOSPLIT,$0-12
- MOVW arg_base+0(FP), R0
- MOVW arg_len+4(FP), R1
- NACL_SYSCALL(SYS_get_random_bytes)
- RET
-
-// Likewise, this is only valid for ARMv7+, but that's okay.
-TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
- B runtime·armPublicationBarrier(SB)
-
-TEXT runtime·read_tls_fallback(SB),NOSPLIT|NOFRAME,$0
- WORD $0xe7fedef0 // NACL_INSTR_ARM_ABORT_NOW (UDF #0xEDE0)
diff --git a/src/runtime/sys_netbsd_386.s b/src/runtime/sys_netbsd_386.s
index c14ecfb..d0c470c 100644
--- a/src/runtime/sys_netbsd_386.s
+++ b/src/runtime/sys_netbsd_386.s
@@ -83,15 +83,41 @@
MOVL $SYS_read, AX
INT $0x80
JAE 2(PC)
- MOVL $-1, AX
+ NEGL AX // caller expects negative errno
MOVL AX, ret+12(FP)
RET
-TEXT runtime·write(SB),NOSPLIT,$-4
+// func pipe() (r, w int32, errno int32)
+TEXT runtime·pipe(SB),NOSPLIT,$0-12
+ MOVL $42, AX
+ INT $0x80
+ JCC pipeok
+ MOVL $-1, r+0(FP)
+ MOVL $-1, w+4(FP)
+ MOVL AX, errno+8(FP)
+ RET
+pipeok:
+ MOVL AX, r+0(FP)
+ MOVL DX, w+4(FP)
+ MOVL $0, errno+8(FP)
+ RET
+
+// func pipe2(flags int32) (r, w int32, errno int32)
+TEXT runtime·pipe2(SB),NOSPLIT,$12-16
+ MOVL $453, AX
+ LEAL r+4(FP), BX
+ MOVL BX, 4(SP)
+ MOVL flags+0(FP), BX
+ MOVL BX, 8(SP)
+ INT $0x80
+ MOVL AX, errno+12(FP)
+ RET
+
+TEXT runtime·write1(SB),NOSPLIT,$-4
MOVL $SYS_write, AX
INT $0x80
JAE 2(PC)
- MOVL $-1, AX
+ NEGL AX // caller expects negative errno
MOVL AX, ret+12(FP)
RET
@@ -114,12 +140,11 @@
INT $0x80
RET
-TEXT runtime·raise(SB),NOSPLIT,$12
- MOVL $SYS__lwp_self, AX
- INT $0x80
+TEXT runtime·lwp_kill(SB),NOSPLIT,$12-8
MOVL $0, 0(SP)
+ MOVL tid+0(FP), AX
MOVL AX, 4(SP) // arg 1 - target
- MOVL sig+0(FP), AX
+ MOVL sig+4(FP), AX
MOVL AX, 8(SP) // arg 2 - signo
MOVL $SYS__lwp_kill, AX
INT $0x80
@@ -181,8 +206,8 @@
INT $0x80
RET
-// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB), NOSPLIT, $32
+// func walltime1() (sec int64, nsec int32)
+TEXT runtime·walltime1(SB), NOSPLIT, $32
LEAL 12(SP), BX
MOVL $CLOCK_REALTIME, 4(SP) // arg 1 - clock_id
MOVL BX, 8(SP) // arg 2 - tp
@@ -198,9 +223,9 @@
MOVL BX, nsec+8(FP)
RET
-// int64 nanotime(void) so really
-// void nanotime(int64 *nsec)
-TEXT runtime·nanotime(SB),NOSPLIT,$32
+// int64 nanotime1(void) so really
+// void nanotime1(int64 *nsec)
+TEXT runtime·nanotime1(SB),NOSPLIT,$32
LEAL 12(SP), BX
MOVL $CLOCK_MONOTONIC, 4(SP) // arg 1 - clock_id
MOVL BX, 8(SP) // arg 2 - tp
@@ -455,3 +480,20 @@
JAE 2(PC)
NEGL AX
RET
+
+// func runtime·setNonblock(fd int32)
+TEXT runtime·setNonblock(SB),NOSPLIT,$16-4
+ MOVL $92, AX // fcntl
+ MOVL fd+0(FP), BX // fd
+ MOVL BX, 4(SP)
+ MOVL $3, 8(SP) // F_GETFL
+ MOVL $0, 12(SP)
+ INT $0x80
+ MOVL fd+0(FP), BX // fd
+ MOVL BX, 4(SP)
+ MOVL $4, 8(SP) // F_SETFL
+ ORL $4, AX // O_NONBLOCK
+ MOVL AX, 12(SP)
+ MOVL $92, AX // fcntl
+ INT $0x80
+ RET
diff --git a/src/runtime/sys_netbsd_amd64.s b/src/runtime/sys_netbsd_amd64.s
index 5fc47ae..dc9bd12 100644
--- a/src/runtime/sys_netbsd_amd64.s
+++ b/src/runtime/sys_netbsd_amd64.s
@@ -154,18 +154,42 @@
MOVL $SYS_read, AX
SYSCALL
JCC 2(PC)
- MOVL $-1, AX
+ NEGQ AX // caller expects negative errno
MOVL AX, ret+24(FP)
RET
-TEXT runtime·write(SB),NOSPLIT,$-8
+// func pipe() (r, w int32, errno int32)
+TEXT runtime·pipe(SB),NOSPLIT,$0-12
+ MOVL $42, AX
+ SYSCALL
+ JCC pipeok
+ MOVL $-1, r+0(FP)
+ MOVL $-1, w+4(FP)
+ MOVL AX, errno+8(FP)
+ RET
+pipeok:
+ MOVL AX, r+0(FP)
+ MOVL DX, w+4(FP)
+ MOVL $0, errno+8(FP)
+ RET
+
+// func pipe2(flags int32) (r, w int32, errno int32)
+TEXT runtime·pipe2(SB),NOSPLIT,$0-20
+ LEAQ r+8(FP), DI
+ MOVL flags+0(FP), SI
+ MOVL $453, AX
+ SYSCALL
+ MOVL AX, errno+16(FP)
+ RET
+
+TEXT runtime·write1(SB),NOSPLIT,$-8
MOVQ fd+0(FP), DI // arg 1 - fd
MOVQ p+8(FP), SI // arg 2 - buf
MOVL n+16(FP), DX // arg 3 - nbyte
MOVL $SYS_write, AX
SYSCALL
JCC 2(PC)
- MOVL $-1, AX
+ NEGQ AX // caller expects negative errno
MOVL AX, ret+24(FP)
RET
@@ -185,11 +209,9 @@
SYSCALL
RET
-TEXT runtime·raise(SB),NOSPLIT,$16
- MOVL $SYS__lwp_self, AX
- SYSCALL
- MOVQ AX, DI // arg 1 - target
- MOVL sig+0(FP), SI // arg 2 - signo
+TEXT runtime·lwp_kill(SB),NOSPLIT,$0-16
+ MOVL tid+0(FP), DI // arg 1 - target
+ MOVQ sig+8(FP), SI // arg 2 - signo
MOVL $SYS__lwp_kill, AX
SYSCALL
RET
@@ -211,8 +233,8 @@
SYSCALL
RET
-// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB), NOSPLIT, $32
+// func walltime1() (sec int64, nsec int32)
+TEXT runtime·walltime1(SB), NOSPLIT, $32
MOVQ $CLOCK_REALTIME, DI // arg 1 - clock_id
LEAQ 8(SP), SI // arg 2 - tp
MOVL $SYS___clock_gettime50, AX
@@ -225,7 +247,7 @@
MOVL DX, nsec+8(FP)
RET
-TEXT runtime·nanotime(SB),NOSPLIT,$32
+TEXT runtime·nanotime1(SB),NOSPLIT,$32
MOVQ $CLOCK_MONOTONIC, DI // arg 1 - clock_id
LEAQ 8(SP), SI // arg 2 - tp
MOVL $SYS___clock_gettime50, AX
@@ -429,3 +451,18 @@
MOVL $SYS_fcntl, AX
SYSCALL
RET
+
+// func runtime·setNonblock(int32 fd)
+TEXT runtime·setNonblock(SB),NOSPLIT,$0-4
+ MOVL fd+0(FP), DI // fd
+ MOVQ $3, SI // F_GETFL
+ MOVQ $0, DX
+ MOVL $92, AX // fcntl
+ SYSCALL
+ MOVL fd+0(FP), DI // fd
+ MOVQ $4, SI // F_SETFL
+ MOVQ $4, DX // O_NONBLOCK
+ ORL AX, DX
+ MOVL $92, AX // fcntl
+ SYSCALL
+ RET
diff --git a/src/runtime/sys_netbsd_arm.s b/src/runtime/sys_netbsd_arm.s
index c32259b..678dea5 100644
--- a/src/runtime/sys_netbsd_arm.s
+++ b/src/runtime/sys_netbsd_arm.s
@@ -92,16 +92,40 @@
MOVW p+4(FP), R1
MOVW n+8(FP), R2
SWI $SYS_read
- MOVW.CS $-1, R0
+ RSB.CS $0, R0 // caller expects negative errno
MOVW R0, ret+12(FP)
RET
-TEXT runtime·write(SB),NOSPLIT|NOFRAME,$0
+// func pipe() (r, w int32, errno int32)
+TEXT runtime·pipe(SB),NOSPLIT,$0-12
+ SWI $0xa0002a
+ BCC pipeok
+ MOVW $-1,R2
+ MOVW R2, r+0(FP)
+ MOVW R2, w+4(FP)
+ MOVW R0, errno+8(FP)
+ RET
+pipeok:
+ MOVW $0, R2
+ MOVW R0, r+0(FP)
+ MOVW R1, w+4(FP)
+ MOVW R2, errno+8(FP)
+ RET
+
+// func pipe2(flags int32) (r, w int32, errno int32)
+TEXT runtime·pipe2(SB),NOSPLIT,$0-16
+ MOVW $r+4(FP), R0
+ MOVW flags+0(FP), R1
+ SWI $0xa001c5
+ MOVW R0, errno+12(FP)
+ RET
+
+TEXT runtime·write1(SB),NOSPLIT|NOFRAME,$0
MOVW fd+0(FP), R0 // arg 1 - fd
MOVW p+4(FP), R1 // arg 2 - buf
MOVW n+8(FP), R2 // arg 3 - nbyte
SWI $SYS_write
- MOVW.CS $-1, R0
+ RSB.CS $0, R0 // caller expects negative errno
MOVW R0, ret+12(FP)
RET
@@ -169,9 +193,9 @@
SWI $SYS___nanosleep50
RET
-TEXT runtime·raise(SB),NOSPLIT,$16
- SWI $SYS__lwp_self // the returned R0 is arg 1
- MOVW sig+0(FP), R1 // arg 2 - signal
+TEXT runtime·lwp_kill(SB),NOSPLIT,$0-8
+ MOVW tid+0(FP), R0 // arg 1 - tid
+ MOVW sig+4(FP), R1 // arg 2 - signal
SWI $SYS__lwp_kill
RET
@@ -188,8 +212,8 @@
SWI $SYS___setitimer50
RET
-// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB), NOSPLIT, $32
+// func walltime1() (sec int64, nsec int32)
+TEXT runtime·walltime1(SB), NOSPLIT, $32
MOVW $0, R0 // CLOCK_REALTIME
MOVW $8(R13), R1
SWI $SYS___clock_gettime50
@@ -203,9 +227,9 @@
MOVW R2, nsec+8(FP)
RET
-// int64 nanotime(void) so really
-// void nanotime(int64 *nsec)
-TEXT runtime·nanotime(SB), NOSPLIT, $32
+// int64 nanotime1(void) so really
+// void nanotime1(int64 *nsec)
+TEXT runtime·nanotime1(SB), NOSPLIT, $32
MOVW $3, R0 // CLOCK_MONOTONIC
MOVW $8(R13), R1
SWI $SYS___clock_gettime50
@@ -276,7 +300,11 @@
MOVW R4, R13
RET
-TEXT runtime·sigtramp(SB),NOSPLIT,$12
+TEXT runtime·sigtramp(SB),NOSPLIT,$0
+ // Reserve space for callee-save registers and arguments.
+ MOVM.DB.W [R4-R11], (R13)
+ SUB $16, R13
+
// this might be called in external code context,
// where g is not set.
// first save R0, because runtime·load_g will clobber it
@@ -288,6 +316,11 @@
MOVW R1, 8(R13)
MOVW R2, 12(R13)
BL runtime·sigtrampgo(SB)
+
+ // Restore callee-save registers.
+ ADD $16, R13
+ MOVM.IA.W (R13), [R4-R11]
+
RET
TEXT runtime·mmap(SB),NOSPLIT,$12
@@ -385,6 +418,18 @@
SWI $SYS_fcntl
RET
+// func runtime·setNonblock(fd int32)
+TEXT runtime·setNonblock(SB),NOSPLIT,$0-4
+ MOVW fd+0(FP), R0 // fd
+ MOVW $3, R1 // F_GETFL
+ MOVW $0, R2
+ SWI $0xa0005c // sys_fcntl
+ ORR $0x4, R0, R2 // O_NONBLOCK
+ MOVW fd+0(FP), R0 // fd
+ MOVW $4, R1 // F_SETFL
+ SWI $0xa0005c // sys_fcntl
+ RET
+
// TODO: this is only valid for ARMv7+
TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
B runtime·armPublicationBarrier(SB)
diff --git a/src/runtime/sys_netbsd_arm64.s b/src/runtime/sys_netbsd_arm64.s
index 57ded53..e70be0f 100644
--- a/src/runtime/sys_netbsd_arm64.s
+++ b/src/runtime/sys_netbsd_arm64.s
@@ -14,6 +14,9 @@
#define CLOCK_MONOTONIC 3
#define FD_CLOEXEC 1
#define F_SETFD 2
+#define F_GETFL 3
+#define F_SETFL 4
+#define O_NONBLOCK 4
#define SYS_exit 1
#define SYS_read 3
@@ -43,6 +46,7 @@
#define SYS___clock_gettime50 427
#define SYS___nanosleep50 430
#define SYS___kevent50 435
+#define SYS_pipe2 453
#define SYS_openat 468
#define SYS____lwp_park60 478
@@ -141,18 +145,45 @@
MOVW n+16(FP), R2 // arg 3 - count
SVC $SYS_read
BCC ok
- MOVW $-1, R0
+ NEG R0, R0
ok:
MOVW R0, ret+24(FP)
RET
-TEXT runtime·write(SB),NOSPLIT,$-8
+// func pipe() (r, w int32, errno int32)
+TEXT runtime·pipe(SB),NOSPLIT|NOFRAME,$0-12
+ MOVW $0, R0
+ SVC $SYS_pipe2
+ BCC pipeok
+ MOVW $-1,R1
+ MOVW R1, r+0(FP)
+ MOVW R1, w+4(FP)
+ NEG R0, R0
+ MOVW R0, errno+8(FP)
+ RET
+pipeok:
+ MOVW R0, r+0(FP)
+ MOVW R1, w+4(FP)
+ MOVW ZR, errno+8(FP)
+ RET
+
+// func pipe2(flags int32) (r, w int32, errno int32)
+TEXT runtime·pipe2(SB),NOSPLIT|NOFRAME,$0-20
+ ADD $8, RSP, R0
+ MOVW flags+0(FP), R1
+ SVC $SYS_pipe2
+ BCC 2(PC)
+ NEG R0, R0
+ MOVW R0, errno+16(FP)
+ RET
+
+TEXT runtime·write1(SB),NOSPLIT,$-8
MOVD fd+0(FP), R0 // arg 1 - fd
MOVD p+8(FP), R1 // arg 2 - buf
MOVW n+16(FP), R2 // arg 3 - nbyte
SVC $SYS_write
BCC ok
- MOVW $-1, R0
+ NEG R0, R0
ok:
MOVW R0, ret+24(FP)
RET
@@ -174,10 +205,9 @@
SVC $SYS___nanosleep50
RET
-TEXT runtime·raise(SB),NOSPLIT,$16
- SVC $SYS__lwp_self
- // arg 1 - target (lwp_self)
- MOVW sig+0(FP), R1 // arg 2 - signo
+TEXT runtime·lwp_kill(SB),NOSPLIT,$0-16
+ MOVW tid+0(FP), R0 // arg 1 - target
+ MOVD sig+8(FP), R1 // arg 2 - signo
SVC $SYS__lwp_kill
RET
@@ -195,8 +225,8 @@
SVC $SYS___setitimer50
RET
-// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB), NOSPLIT, $32
+// func walltime1() (sec int64, nsec int32)
+TEXT runtime·walltime1(SB), NOSPLIT, $32
MOVW $CLOCK_REALTIME, R0 // arg 1 - clock_id
MOVD $8(RSP), R1 // arg 2 - tp
SVC $SYS___clock_gettime50
@@ -209,9 +239,9 @@
MOVW R1, nsec+8(FP)
RET
-// int64 nanotime(void) so really
-// void nanotime(int64 *nsec)
-TEXT runtime·nanotime(SB), NOSPLIT, $32
+// int64 nanotime1(void) so really
+// void nanotime1(int64 *nsec)
+TEXT runtime·nanotime1(SB), NOSPLIT, $32
MOVD $CLOCK_MONOTONIC, R0 // arg 1 - clock_id
MOVD $8(RSP), R1 // arg 2 - tp
SVC $SYS___clock_gettime50
@@ -431,3 +461,16 @@
MOVW $FD_CLOEXEC, R2
SVC $SYS_fcntl
RET
+
+// func runtime·setNonblock(int32 fd)
+TEXT runtime·setNonblock(SB),NOSPLIT|NOFRAME,$0-4
+ MOVW fd+0(FP), R0 // arg 1 - fd
+ MOVD $F_GETFL, R1 // arg 2 - cmd
+ MOVD $0, R2 // arg 3
+ SVC $SYS_fcntl
+ MOVD $O_NONBLOCK, R2
+ EOR R0, R2 // arg 3 - flags
+ MOVW fd+0(FP), R0 // arg 1 - fd
+ MOVD $F_SETFL, R1 // arg 2 - cmd
+ SVC $SYS_fcntl
+ RET
diff --git a/src/runtime/sys_openbsd_386.s b/src/runtime/sys_openbsd_386.s
index 6457e37..24fbfd6 100644
--- a/src/runtime/sys_openbsd_386.s
+++ b/src/runtime/sys_openbsd_386.s
@@ -46,15 +46,35 @@
MOVL $3, AX
INT $0x80
JAE 2(PC)
- MOVL $-1, AX
+ NEGL AX // caller expects negative errno
MOVL AX, ret+12(FP)
RET
-TEXT runtime·write(SB),NOSPLIT,$-4
+// func pipe() (r, w int32, errno int32)
+TEXT runtime·pipe(SB),NOSPLIT,$8-12
+ MOVL $263, AX
+ LEAL r+0(FP), BX
+ MOVL BX, 4(SP)
+ INT $0x80
+ MOVL AX, errno+8(FP)
+ RET
+
+// func pipe2(flags int32) (r, w int32, errno int32)
+TEXT runtime·pipe2(SB),NOSPLIT,$12-16
+ MOVL $101, AX
+ LEAL r+4(FP), BX
+ MOVL BX, 4(SP)
+ MOVL flags+0(FP), BX
+ MOVL BX, 8(SP)
+ INT $0x80
+ MOVL AX, errno+12(FP)
+ RET
+
+TEXT runtime·write1(SB),NOSPLIT,$-4
MOVL $4, AX // sys_write
INT $0x80
JAE 2(PC)
- MOVL $-1, AX
+ NEGL AX // caller expects negative errno
MOVL AX, ret+12(FP)
RET
@@ -77,12 +97,17 @@
INT $0x80
RET
-TEXT runtime·raise(SB),NOSPLIT,$16
+TEXT runtime·getthrid(SB),NOSPLIT,$0-4
MOVL $299, AX // sys_getthrid
INT $0x80
+ MOVL AX, ret+0(FP)
+ RET
+
+TEXT runtime·thrkill(SB),NOSPLIT,$16-8
MOVL $0, 0(SP)
+ MOVL tid+0(FP), AX
MOVL AX, 4(SP) // arg 1 - tid
- MOVL sig+0(FP), AX
+ MOVL sig+4(FP), AX
MOVL AX, 8(SP) // arg 2 - signum
MOVL $0, 12(SP) // arg 3 - tcb
MOVL $119, AX // sys_thrkill
@@ -145,8 +170,8 @@
INT $0x80
RET
-// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB), NOSPLIT, $32
+// func walltime1() (sec int64, nsec int32)
+TEXT runtime·walltime1(SB), NOSPLIT, $32
LEAL 12(SP), BX
MOVL $0, 4(SP) // arg 1 - clock_id
MOVL BX, 8(SP) // arg 2 - tp
@@ -162,9 +187,9 @@
MOVL BX, nsec+8(FP)
RET
-// int64 nanotime(void) so really
-// void nanotime(int64 *nsec)
-TEXT runtime·nanotime(SB),NOSPLIT,$32
+// int64 nanotime1(void) so really
+// void nanotime1(int64 *nsec)
+TEXT runtime·nanotime1(SB),NOSPLIT,$32
LEAL 12(SP), BX
MOVL CLOCK_MONOTONIC, 4(SP) // arg 1 - clock_id
MOVL BX, 8(SP) // arg 2 - tp
@@ -416,4 +441,21 @@
NEGL AX
RET
+// func runtime·setNonblock(fd int32)
+TEXT runtime·setNonblock(SB),NOSPLIT,$16-4
+ MOVL $92, AX // fcntl
+ MOVL fd+0(FP), BX // fd
+ MOVL BX, 4(SP)
+ MOVL $3, 8(SP) // F_GETFL
+ MOVL $0, 12(SP)
+ INT $0x80
+ MOVL fd+0(FP), BX // fd
+ MOVL BX, 4(SP)
+ MOVL $4, 8(SP) // F_SETFL
+ ORL $4, AX // O_NONBLOCK
+ MOVL AX, 12(SP)
+ MOVL $92, AX // fcntl
+ INT $0x80
+ RET
+
GLOBL runtime·tlsoffset(SB),NOPTR,$4
diff --git a/src/runtime/sys_openbsd_amd64.s b/src/runtime/sys_openbsd_amd64.s
index d5c030d..37d70ab 100644
--- a/src/runtime/sys_openbsd_amd64.s
+++ b/src/runtime/sys_openbsd_amd64.s
@@ -123,18 +123,35 @@
MOVL $3, AX
SYSCALL
JCC 2(PC)
- MOVL $-1, AX
+ NEGQ AX // caller expects negative errno
MOVL AX, ret+24(FP)
RET
-TEXT runtime·write(SB),NOSPLIT,$-8
+// func pipe() (r, w int32, errno int32)
+TEXT runtime·pipe(SB),NOSPLIT,$0-12
+ LEAQ r+0(FP), DI
+ MOVL $263, AX
+ SYSCALL
+ MOVL AX, errno+8(FP)
+ RET
+
+// func pipe2(flags int32) (r, w int32, errno int32)
+TEXT runtime·pipe2(SB),NOSPLIT,$0-20
+ LEAQ r+8(FP), DI
+ MOVL flags+0(FP), SI
+ MOVL $101, AX
+ SYSCALL
+ MOVL AX, errno+16(FP)
+ RET
+
+TEXT runtime·write1(SB),NOSPLIT,$-8
MOVQ fd+0(FP), DI // arg 1 - fd
MOVQ p+8(FP), SI // arg 2 - buf
MOVL n+16(FP), DX // arg 3 - nbyte
MOVL $4, AX // sys_write
SYSCALL
JCC 2(PC)
- MOVL $-1, AX
+ NEGQ AX // caller expects negative errno
MOVL AX, ret+24(FP)
RET
@@ -154,11 +171,15 @@
SYSCALL
RET
-TEXT runtime·raise(SB),NOSPLIT,$16
+TEXT runtime·getthrid(SB),NOSPLIT,$0-4
MOVL $299, AX // sys_getthrid
SYSCALL
- MOVQ AX, DI // arg 1 - tid
- MOVL sig+0(FP), SI // arg 2 - signum
+ MOVL AX, ret+0(FP)
+ RET
+
+TEXT runtime·thrkill(SB),NOSPLIT,$0-16
+ MOVL tid+0(FP), DI // arg 1 - tid
+ MOVQ sig+8(FP), SI // arg 2 - signum
MOVQ $0, DX // arg 3 - tcb
MOVL $119, AX // sys_thrkill
SYSCALL
@@ -181,8 +202,8 @@
SYSCALL
RET
-// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB), NOSPLIT, $32
+// func walltime1() (sec int64, nsec int32)
+TEXT runtime·walltime1(SB), NOSPLIT, $32
MOVQ $0, DI // arg 1 - clock_id
LEAQ 8(SP), SI // arg 2 - tp
MOVL $87, AX // sys_clock_gettime
@@ -195,7 +216,7 @@
MOVL DX, nsec+8(FP)
RET
-TEXT runtime·nanotime(SB),NOSPLIT,$24
+TEXT runtime·nanotime1(SB),NOSPLIT,$24
MOVQ CLOCK_MONOTONIC, DI // arg 1 - clock_id
LEAQ 8(SP), SI // arg 2 - tp
MOVL $87, AX // sys_clock_gettime
@@ -378,3 +399,18 @@
MOVL $92, AX // fcntl
SYSCALL
RET
+
+// func runtime·setNonblock(int32 fd)
+TEXT runtime·setNonblock(SB),NOSPLIT,$0-4
+ MOVL fd+0(FP), DI // fd
+ MOVQ $3, SI // F_GETFL
+ MOVQ $0, DX
+ MOVL $92, AX // fcntl
+ SYSCALL
+ MOVL fd+0(FP), DI // fd
+ MOVQ $4, SI // F_SETFL
+ MOVQ $4, DX // O_NONBLOCK
+ ORL AX, DX
+ MOVL $92, AX // fcntl
+ SYSCALL
+ RET
diff --git a/src/runtime/sys_openbsd_arm.s b/src/runtime/sys_openbsd_arm.s
index 69c3ded..9e18ce0 100644
--- a/src/runtime/sys_openbsd_arm.s
+++ b/src/runtime/sys_openbsd_arm.s
@@ -13,11 +13,23 @@
#define CLOCK_REALTIME $0
#define CLOCK_MONOTONIC $3
+// With OpenBSD 6.7 onwards, an armv7 syscall returns two instructions
+// after the SWI instruction, to allow for a speculative execution
+// barrier to be placed after the SWI without impacting performance.
+// For now use hardware no-ops as this works with both older and newer
+// kernels. After OpenBSD 6.8 is released this should be changed to
+// speculation barriers.
+#define NOOP MOVW R0, R0
+#define INVOKE_SYSCALL \
+ SWI $0; \
+ NOOP; \
+ NOOP
+
// Exit the entire program (like C exit)
TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0
MOVW code+0(FP), R0 // arg 1 - status
MOVW $1, R12 // sys_exit
- SWI $0
+ INVOKE_SYSCALL
MOVW.CS $0, R8 // crash on syscall failure
MOVW.CS R8, (R8)
RET
@@ -26,7 +38,7 @@
TEXT runtime·exitThread(SB),NOSPLIT,$0-4
MOVW wait+0(FP), R0 // arg 1 - notdead
MOVW $302, R12 // sys___threxit
- SWI $0
+ INVOKE_SYSCALL
MOVW.CS $1, R8 // crash on syscall failure
MOVW.CS R8, (R8)
JMP 0(PC)
@@ -36,7 +48,7 @@
MOVW mode+4(FP), R1 // arg 2 - mode
MOVW perm+8(FP), R2 // arg 3 - perm
MOVW $5, R12 // sys_open
- SWI $0
+ INVOKE_SYSCALL
MOVW.CS $-1, R0
MOVW R0, ret+12(FP)
RET
@@ -44,7 +56,7 @@
TEXT runtime·closefd(SB),NOSPLIT|NOFRAME,$0
MOVW fd+0(FP), R0 // arg 1 - fd
MOVW $6, R12 // sys_close
- SWI $0
+ INVOKE_SYSCALL
MOVW.CS $-1, R0
MOVW R0, ret+4(FP)
RET
@@ -54,18 +66,35 @@
MOVW p+4(FP), R1 // arg 2 - buf
MOVW n+8(FP), R2 // arg 3 - nbyte
MOVW $3, R12 // sys_read
- SWI $0
- MOVW.CS $-1, R0
+ INVOKE_SYSCALL
+ RSB.CS $0, R0 // caller expects negative errno
MOVW R0, ret+12(FP)
RET
-TEXT runtime·write(SB),NOSPLIT|NOFRAME,$0
+// func pipe() (r, w int32, errno int32)
+TEXT runtime·pipe(SB),NOSPLIT,$0-12
+ MOVW $r+0(FP), R0
+ MOVW $263, R12
+ INVOKE_SYSCALL
+ MOVW R0, errno+8(FP)
+ RET
+
+// func pipe2(flags int32) (r, w int32, errno int32)
+TEXT runtime·pipe2(SB),NOSPLIT,$0-16
+ MOVW $r+4(FP), R0
+ MOVW flags+0(FP), R1
+ MOVW $101, R12
+ INVOKE_SYSCALL
+ MOVW R0, errno+12(FP)
+ RET
+
+TEXT runtime·write1(SB),NOSPLIT|NOFRAME,$0
MOVW fd+0(FP), R0 // arg 1 - fd
MOVW p+4(FP), R1 // arg 2 - buf
MOVW n+8(FP), R2 // arg 3 - nbyte
MOVW $4, R12 // sys_write
- SWI $0
- MOVW.CS $-1, R0
+ INVOKE_SYSCALL
+ RSB.CS $0, R0 // caller expects negative errno
MOVW R0, ret+12(FP)
RET
@@ -82,26 +111,30 @@
MOVW $4(R13), R0 // arg 1 - rqtp
MOVW $0, R1 // arg 2 - rmtp
MOVW $91, R12 // sys_nanosleep
- SWI $0
+ INVOKE_SYSCALL
RET
-TEXT runtime·raise(SB),NOSPLIT,$12
+TEXT runtime·getthrid(SB),NOSPLIT,$0-4
MOVW $299, R12 // sys_getthrid
- SWI $0
- // arg 1 - tid, already in R0
- MOVW sig+0(FP), R1 // arg 2 - signum
+ INVOKE_SYSCALL
+ MOVW R0, ret+0(FP)
+ RET
+
+TEXT runtime·thrkill(SB),NOSPLIT,$0-8
+ MOVW tid+0(FP), R0 // arg 1 - tid
+ MOVW sig+4(FP), R1 // arg 2 - signum
MOVW $0, R2 // arg 3 - tcb
MOVW $119, R12 // sys_thrkill
- SWI $0
+ INVOKE_SYSCALL
RET
TEXT runtime·raiseproc(SB),NOSPLIT,$12
- MOVW $20, R12
- SWI $0 // sys_getpid
+ MOVW $20, R12 // sys_getpid
+ INVOKE_SYSCALL
// arg 1 - pid, already in R0
MOVW sig+0(FP), R1 // arg 2 - signum
MOVW $122, R12 // sys_kill
- SWI $0
+ INVOKE_SYSCALL
RET
TEXT runtime·mmap(SB),NOSPLIT,$16
@@ -119,7 +152,7 @@
MOVW R7, 16(R13) // high 32 bits
ADD $4, R13
MOVW $197, R12 // sys_mmap
- SWI $0
+ INVOKE_SYSCALL
SUB $4, R13
MOVW $0, R1
MOVW.CS R0, R1 // if error, move to R1
@@ -132,7 +165,7 @@
MOVW addr+0(FP), R0 // arg 1 - addr
MOVW n+4(FP), R1 // arg 2 - len
MOVW $73, R12 // sys_munmap
- SWI $0
+ INVOKE_SYSCALL
MOVW.CS $0, R8 // crash on syscall failure
MOVW.CS R8, (R8)
RET
@@ -142,7 +175,7 @@
MOVW n+4(FP), R1 // arg 2 - len
MOVW flags+8(FP), R2 // arg 2 - flags
MOVW $75, R12 // sys_madvise
- SWI $0
+ INVOKE_SYSCALL
MOVW.CS $-1, R0
MOVW R0, ret+12(FP)
RET
@@ -152,15 +185,15 @@
MOVW new+4(FP), R1 // arg 2 - new value
MOVW old+8(FP), R2 // arg 3 - old value
MOVW $69, R12 // sys_setitimer
- SWI $0
+ INVOKE_SYSCALL
RET
-// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB), NOSPLIT, $32
+// func walltime1() (sec int64, nsec int32)
+TEXT runtime·walltime1(SB), NOSPLIT, $32
MOVW CLOCK_REALTIME, R0 // arg 1 - clock_id
MOVW $8(R13), R1 // arg 2 - tp
MOVW $87, R12 // sys_clock_gettime
- SWI $0
+ INVOKE_SYSCALL
MOVW 8(R13), R0 // sec - l32
MOVW 12(R13), R1 // sec - h32
@@ -172,13 +205,13 @@
RET
-// int64 nanotime(void) so really
-// void nanotime(int64 *nsec)
-TEXT runtime·nanotime(SB),NOSPLIT,$32
+// int64 nanotime1(void) so really
+// void nanotime1(int64 *nsec)
+TEXT runtime·nanotime1(SB),NOSPLIT,$32
MOVW CLOCK_MONOTONIC, R0 // arg 1 - clock_id
MOVW $8(R13), R1 // arg 2 - tp
MOVW $87, R12 // sys_clock_gettime
- SWI $0
+ INVOKE_SYSCALL
MOVW 8(R13), R0 // sec - l32
MOVW 12(R13), R4 // sec - h32
@@ -199,7 +232,7 @@
MOVW new+4(FP), R1 // arg 2 - new sigaction
MOVW old+8(FP), R2 // arg 3 - old sigaction
MOVW $46, R12 // sys_sigaction
- SWI $0
+ INVOKE_SYSCALL
MOVW.CS $3, R8 // crash on syscall failure
MOVW.CS R8, (R8)
RET
@@ -208,7 +241,7 @@
MOVW how+0(FP), R0 // arg 1 - mode
MOVW new+4(FP), R1 // arg 2 - new
MOVW $48, R12 // sys_sigprocmask
- SWI $0
+ INVOKE_SYSCALL
MOVW.CS $3, R8 // crash on syscall failure
MOVW.CS R8, (R8)
MOVW R0, ret+8(FP)
@@ -226,7 +259,11 @@
MOVW R4, R13
RET
-TEXT runtime·sigtramp(SB),NOSPLIT,$12
+TEXT runtime·sigtramp(SB),NOSPLIT,$0
+ // Reserve space for callee-save registers and arguments.
+ MOVM.DB.W [R4-R11], (R13)
+ SUB $16, R13
+
// If called from an external code context, g will not be set.
// Save R0, since runtime·load_g will clobber it.
MOVW R0, 4(R13) // signum
@@ -237,6 +274,11 @@
MOVW R1, 8(R13)
MOVW R2, 12(R13)
BL runtime·sigtrampgo(SB)
+
+ // Restore callee-save registers.
+ ADD $16, R13
+ MOVM.IA.W (R13), [R4-R11]
+
RET
// int32 tfork(void *param, uintptr psize, M *mp, G *gp, void (*fn)(void));
@@ -250,7 +292,7 @@
MOVW param+0(FP), R0 // arg 1 - param
MOVW psize+4(FP), R1 // arg 2 - psize
MOVW $8, R12 // sys___tfork
- SWI $0
+ INVOKE_SYSCALL
// Return if syscall failed.
B.CC 4(PC)
@@ -283,14 +325,14 @@
MOVW new+0(FP), R0 // arg 1 - new sigaltstack
MOVW old+4(FP), R1 // arg 2 - old sigaltstack
MOVW $288, R12 // sys_sigaltstack
- SWI $0
+ INVOKE_SYSCALL
MOVW.CS $0, R8 // crash on syscall failure
MOVW.CS R8, (R8)
RET
TEXT runtime·osyield(SB),NOSPLIT,$0
MOVW $298, R12 // sys_sched_yield
- SWI $0
+ INVOKE_SYSCALL
RET
TEXT runtime·thrsleep(SB),NOSPLIT,$4
@@ -302,7 +344,7 @@
MOVW R4, 4(R13)
ADD $4, R13
MOVW $94, R12 // sys___thrsleep
- SWI $0
+ INVOKE_SYSCALL
SUB $4, R13
MOVW R0, ret+20(FP)
RET
@@ -311,7 +353,7 @@
MOVW ident+0(FP), R0 // arg 1 - ident
MOVW n+4(FP), R1 // arg 2 - n
MOVW $301, R12 // sys___thrwakeup
- SWI $0
+ INVOKE_SYSCALL
MOVW R0, ret+8(FP)
RET
@@ -326,7 +368,7 @@
MOVW R5, 8(R13)
ADD $4, R13
MOVW $202, R12 // sys___sysctl
- SWI $0
+ INVOKE_SYSCALL
SUB $4, R13
MOVW.CC $0, R0
RSB.CS $0, R0
@@ -336,7 +378,7 @@
// int32 runtime·kqueue(void);
TEXT runtime·kqueue(SB),NOSPLIT,$0
MOVW $269, R12 // sys_kqueue
- SWI $0
+ INVOKE_SYSCALL
RSB.CS $0, R0
MOVW R0, ret+0(FP)
RET
@@ -353,7 +395,7 @@
MOVW R5, 8(R13)
ADD $4, R13
MOVW $72, R12 // sys_kevent
- SWI $0
+ INVOKE_SYSCALL
RSB.CS $0, R0
SUB $4, R13
MOVW R0, ret+24(FP)
@@ -365,7 +407,21 @@
MOVW $2, R1 // arg 2 - cmd (F_SETFD)
MOVW $1, R2 // arg 3 - arg (FD_CLOEXEC)
MOVW $92, R12 // sys_fcntl
- SWI $0
+ INVOKE_SYSCALL
+ RET
+
+// func runtime·setNonblock(fd int32)
+TEXT runtime·setNonblock(SB),NOSPLIT,$0-4
+ MOVW fd+0(FP), R0 // fd
+ MOVW $3, R1 // F_GETFL
+ MOVW $0, R2
+ MOVW $92, R12
+ INVOKE_SYSCALL
+ ORR $0x4, R0, R2 // O_NONBLOCK
+ MOVW fd+0(FP), R0 // fd
+ MOVW $4, R1 // F_SETFL
+ MOVW $92, R12
+ INVOKE_SYSCALL
RET
TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
@@ -374,6 +430,6 @@
TEXT runtime·read_tls_fallback(SB),NOSPLIT|NOFRAME,$0
MOVM.WP [R1, R2, R3, R12], (R13)
MOVW $330, R12 // sys___get_tcb
- SWI $0
+ INVOKE_SYSCALL
MOVM.IAW (R13), [R1, R2, R3, R12]
RET
diff --git a/src/runtime/sys_openbsd_arm64.s b/src/runtime/sys_openbsd_arm64.s
index 52bed4b..621b1b1 100644
--- a/src/runtime/sys_openbsd_arm64.s
+++ b/src/runtime/sys_openbsd_arm64.s
@@ -13,11 +13,22 @@
#define CLOCK_REALTIME $0
#define CLOCK_MONOTONIC $3
+// With OpenBSD 6.7 onwards, an arm64 syscall returns two instructions
+// after the SVC instruction, to allow for a speculative execution
+// barrier to be placed after the SVC without impacting performance.
+// For now use hardware no-ops as this works with both older and newer
+// kernels. After OpenBSD 6.8 is released this should be changed to
+// speculation barriers.
+#define INVOKE_SYSCALL \
+ SVC; \
+ NOOP; \
+ NOOP
+
// Exit the entire program (like C exit)
TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0
MOVW code+0(FP), R0 // arg 1 - status
MOVD $1, R8 // sys_exit
- SVC
+ INVOKE_SYSCALL
BCC 3(PC)
MOVD $0, R0 // crash on syscall failure
MOVD R0, (R0)
@@ -27,7 +38,7 @@
TEXT runtime·exitThread(SB),NOSPLIT,$0
MOVD wait+0(FP), R0 // arg 1 - notdead
MOVD $302, R8 // sys___threxit
- SVC
+ INVOKE_SYSCALL
MOVD $0, R0 // crash on syscall failure
MOVD R0, (R0)
JMP 0(PC)
@@ -37,7 +48,7 @@
MOVW mode+8(FP), R1 // arg 2 - mode
MOVW perm+12(FP), R2 // arg 3 - perm
MOVD $5, R8 // sys_open
- SVC
+ INVOKE_SYSCALL
BCC 2(PC)
MOVW $-1, R0
MOVW R0, ret+16(FP)
@@ -46,7 +57,7 @@
TEXT runtime·closefd(SB),NOSPLIT|NOFRAME,$0
MOVW fd+0(FP), R0 // arg 1 - fd
MOVD $6, R8 // sys_close
- SVC
+ INVOKE_SYSCALL
BCC 2(PC)
MOVW $-1, R0
MOVW R0, ret+8(FP)
@@ -57,20 +68,42 @@
MOVD p+8(FP), R1 // arg 2 - buf
MOVW n+16(FP), R2 // arg 3 - nbyte
MOVD $3, R8 // sys_read
- SVC
+ INVOKE_SYSCALL
BCC 2(PC)
- MOVW $-1, R0
+ NEG R0, R0
MOVW R0, ret+24(FP)
RET
-TEXT runtime·write(SB),NOSPLIT|NOFRAME,$0
- MOVW fd+0(FP), R0 // arg 1 - fd
+// func pipe() (r, w int32, errno int32)
+TEXT runtime·pipe(SB),NOSPLIT|NOFRAME,$0-12
+ MOVD $r+0(FP), R0
+ MOVW $0, R1
+ MOVD $101, R8 // sys_pipe2
+ INVOKE_SYSCALL
+ BCC 2(PC)
+ NEG R0, R0
+ MOVW R0, errno+8(FP)
+ RET
+
+// func pipe2(flags int32) (r, w int32, errno int32)
+TEXT runtime·pipe2(SB),NOSPLIT|NOFRAME,$0-20
+ MOVD $r+8(FP), R0
+ MOVW flags+0(FP), R1
+ MOVD $101, R8 // sys_pipe2
+ INVOKE_SYSCALL
+ BCC 2(PC)
+ NEG R0, R0
+ MOVW R0, errno+16(FP)
+ RET
+
+TEXT runtime·write1(SB),NOSPLIT|NOFRAME,$0
+ MOVD fd+0(FP), R0 // arg 1 - fd
MOVD p+8(FP), R1 // arg 2 - buf
MOVW n+16(FP), R2 // arg 3 - nbyte
MOVD $4, R8 // sys_write
- SVC
+ INVOKE_SYSCALL
BCC 2(PC)
- MOVW $-1, R0
+ NEG R0, R0
MOVW R0, ret+24(FP)
RET
@@ -89,26 +122,30 @@
ADD $8, RSP, R0 // arg 1 - rqtp
MOVD $0, R1 // arg 2 - rmtp
MOVD $91, R8 // sys_nanosleep
- SVC
+ INVOKE_SYSCALL
RET
-TEXT runtime·raise(SB),NOSPLIT,$0
+TEXT runtime·getthrid(SB),NOSPLIT,$0-4
MOVD $299, R8 // sys_getthrid
- SVC
- // arg 1 - tid, already in R0
- MOVW sig+0(FP), R1 // arg 2 - signum
+ INVOKE_SYSCALL
+ MOVW R0, ret+0(FP)
+ RET
+
+TEXT runtime·thrkill(SB),NOSPLIT,$0-16
+ MOVW tid+0(FP), R0 // arg 1 - tid
+ MOVD sig+8(FP), R1 // arg 2 - signum
MOVW $0, R2 // arg 3 - tcb
MOVD $119, R8 // sys_thrkill
- SVC
+ INVOKE_SYSCALL
RET
TEXT runtime·raiseproc(SB),NOSPLIT,$0
MOVD $20, R8 // sys_getpid
- SVC
+ INVOKE_SYSCALL
// arg 1 - pid, already in R0
MOVW sig+0(FP), R1 // arg 2 - signum
MOVD $122, R8 // sys_kill
- SVC
+ INVOKE_SYSCALL
RET
TEXT runtime·mmap(SB),NOSPLIT,$0
@@ -120,7 +157,7 @@
MOVW $0, R5 // arg 6 - pad
MOVW off+28(FP), R6 // arg 7 - offset
MOVD $197, R8 // sys_mmap
- SVC
+ INVOKE_SYSCALL
MOVD $0, R1
BCC 3(PC)
MOVD R0, R1 // if error, move to R1
@@ -133,7 +170,7 @@
MOVD addr+0(FP), R0 // arg 1 - addr
MOVD n+8(FP), R1 // arg 2 - len
MOVD $73, R8 // sys_munmap
- SVC
+ INVOKE_SYSCALL
BCC 3(PC)
MOVD $0, R0 // crash on syscall failure
MOVD R0, (R0)
@@ -144,7 +181,7 @@
MOVD n+8(FP), R1 // arg 2 - len
MOVW flags+16(FP), R2 // arg 2 - flags
MOVD $75, R8 // sys_madvise
- SVC
+ INVOKE_SYSCALL
BCC 2(PC)
MOVW $-1, R0
MOVW R0, ret+24(FP)
@@ -155,15 +192,15 @@
MOVD new+8(FP), R1 // arg 2 - new value
MOVD old+16(FP), R2 // arg 3 - old value
MOVD $69, R8 // sys_setitimer
- SVC
+ INVOKE_SYSCALL
RET
-// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB), NOSPLIT, $32
+// func walltime1() (sec int64, nsec int32)
+TEXT runtime·walltime1(SB), NOSPLIT, $32
MOVW CLOCK_REALTIME, R0 // arg 1 - clock_id
MOVD $8(RSP), R1 // arg 2 - tp
MOVD $87, R8 // sys_clock_gettime
- SVC
+ INVOKE_SYSCALL
MOVD 8(RSP), R0 // sec
MOVD 16(RSP), R1 // nsec
@@ -172,13 +209,13 @@
RET
-// int64 nanotime(void) so really
-// void nanotime(int64 *nsec)
-TEXT runtime·nanotime(SB),NOSPLIT,$32
+// int64 nanotime1(void) so really
+// void nanotime1(int64 *nsec)
+TEXT runtime·nanotime1(SB),NOSPLIT,$32
MOVW CLOCK_MONOTONIC, R0 // arg 1 - clock_id
MOVD $8(RSP), R1 // arg 2 - tp
MOVD $87, R8 // sys_clock_gettime
- SVC
+ INVOKE_SYSCALL
MOVW 8(RSP), R3 // sec
MOVW 16(RSP), R5 // nsec
@@ -194,7 +231,7 @@
MOVD new+8(FP), R1 // arg 2 - new sigaction
MOVD old+16(FP), R2 // arg 3 - old sigaction
MOVD $46, R8 // sys_sigaction
- SVC
+ INVOKE_SYSCALL
BCC 3(PC)
MOVD $3, R0 // crash on syscall failure
MOVD R0, (R0)
@@ -204,7 +241,7 @@
MOVW how+0(FP), R0 // arg 1 - mode
MOVW new+4(FP), R1 // arg 2 - new
MOVD $48, R8 // sys_sigprocmask
- SVC
+ INVOKE_SYSCALL
BCC 3(PC)
MOVD $3, R8 // crash on syscall failure
MOVD R8, (R8)
@@ -288,7 +325,7 @@
MOVD param+0(FP), R0 // arg 1 - param
MOVD psize+8(FP), R1 // arg 2 - psize
MOVD $8, R8 // sys___tfork
- SVC
+ INVOKE_SYSCALL
// Return if syscall failed.
BCC 4(PC)
@@ -318,7 +355,7 @@
MOVD new+0(FP), R0 // arg 1 - new sigaltstack
MOVD old+8(FP), R1 // arg 2 - old sigaltstack
MOVD $288, R8 // sys_sigaltstack
- SVC
+ INVOKE_SYSCALL
BCC 3(PC)
MOVD $0, R8 // crash on syscall failure
MOVD R8, (R8)
@@ -326,7 +363,7 @@
TEXT runtime·osyield(SB),NOSPLIT,$0
MOVD $298, R8 // sys_sched_yield
- SVC
+ INVOKE_SYSCALL
RET
TEXT runtime·thrsleep(SB),NOSPLIT,$0
@@ -336,7 +373,7 @@
MOVD lock+24(FP), R3 // arg 4 - lock
MOVD abort+32(FP), R4 // arg 5 - abort
MOVD $94, R8 // sys___thrsleep
- SVC
+ INVOKE_SYSCALL
MOVW R0, ret+40(FP)
RET
@@ -344,7 +381,7 @@
MOVD ident+0(FP), R0 // arg 1 - ident
MOVW n+8(FP), R1 // arg 2 - n
MOVD $301, R8 // sys___thrwakeup
- SVC
+ INVOKE_SYSCALL
MOVW R0, ret+16(FP)
RET
@@ -356,7 +393,7 @@
MOVD dst+32(FP), R4 // arg 5 - dest
MOVD ndst+40(FP), R5 // arg 6 - newlen
MOVD $202, R8 // sys___sysctl
- SVC
+ INVOKE_SYSCALL
BCC 2(PC)
NEG R0, R0
MOVW R0, ret+48(FP)
@@ -365,7 +402,7 @@
// int32 runtime·kqueue(void);
TEXT runtime·kqueue(SB),NOSPLIT,$0
MOVD $269, R8 // sys_kqueue
- SVC
+ INVOKE_SYSCALL
BCC 2(PC)
NEG R0, R0
MOVW R0, ret+0(FP)
@@ -380,7 +417,7 @@
MOVW nev+32(FP), R4 // arg 5 - nevents
MOVD ts+40(FP), R5 // arg 6 - timeout
MOVD $72, R8 // sys_kevent
- SVC
+ INVOKE_SYSCALL
BCC 2(PC)
NEG R0, R0
MOVW R0, ret+48(FP)
@@ -392,5 +429,20 @@
MOVD $2, R1 // arg 2 - cmd (F_SETFD)
MOVD $1, R2 // arg 3 - arg (FD_CLOEXEC)
MOVD $92, R8 // sys_fcntl
- SVC
+ INVOKE_SYSCALL
+ RET
+
+// func runtime·setNonblock(int32 fd)
+TEXT runtime·setNonblock(SB),NOSPLIT|NOFRAME,$0-4
+ MOVW fd+0(FP), R0 // arg 1 - fd
+ MOVD $3, R1 // arg 2 - cmd (F_GETFL)
+ MOVD $0, R2 // arg 3
+ MOVD $92, R8 // sys_fcntl
+ INVOKE_SYSCALL
+ MOVD $4, R2 // O_NONBLOCK
+ ORR R0, R2 // arg 3 - flags
+ MOVW fd+0(FP), R0 // arg 1 - fd
+ MOVD $4, R1 // arg 2 - cmd (F_SETFL)
+ MOVD $92, R8 // sys_fcntl
+ INVOKE_SYSCALL
RET
diff --git a/src/runtime/sys_plan9_386.s b/src/runtime/sys_plan9_386.s
index a7fb9fe..f9969f6 100644
--- a/src/runtime/sys_plan9_386.s
+++ b/src/runtime/sys_plan9_386.s
@@ -102,9 +102,9 @@
MOVL $-1, ret_hi+8(FP)
RET
-// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB),NOSPLIT,$8-12
- CALL runtime·nanotime(SB)
+// func walltime1() (sec int64, nsec int32)
+TEXT runtime·walltime1(SB),NOSPLIT,$8-12
+ CALL runtime·nanotime1(SB)
MOVL 0(SP), AX
MOVL 4(SP), DX
diff --git a/src/runtime/sys_plan9_amd64.s b/src/runtime/sys_plan9_amd64.s
index a73c33f..383622b 100644
--- a/src/runtime/sys_plan9_amd64.s
+++ b/src/runtime/sys_plan9_amd64.s
@@ -88,9 +88,9 @@
MOVQ AX, ret+8(FP)
RET
-// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB),NOSPLIT,$8-12
- CALL runtime·nanotime(SB)
+// func walltime1() (sec int64, nsec int32)
+TEXT runtime·walltime1(SB),NOSPLIT,$8-12
+ CALL runtime·nanotime1(SB)
MOVQ 0(SP), AX
// generated code for
diff --git a/src/runtime/sys_plan9_arm.s b/src/runtime/sys_plan9_arm.s
index b82e6c6..9fbe305 100644
--- a/src/runtime/sys_plan9_arm.s
+++ b/src/runtime/sys_plan9_arm.s
@@ -138,8 +138,8 @@
MOVW R0, ret_hi+8(FP)
RET
-// time.now() (sec int64, nsec int32)
-TEXT runtime·walltime(SB),NOSPLIT,$12-12
+// func walltime1() (sec int64, nsec int32)
+TEXT runtime·walltime1(SB),NOSPLIT,$12-12
// use nsec system call to get current time in nanoseconds
MOVW $sysnsec_lo-8(SP), R0 // destination addr
MOVW R0,res-12(SP)
diff --git a/src/runtime/sys_riscv64.go b/src/runtime/sys_riscv64.go
new file mode 100644
index 0000000..e710840
--- /dev/null
+++ b/src/runtime/sys_riscv64.go
@@ -0,0 +1,18 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// adjust Gobuf as if it executed a call to fn with context ctxt
+// and then did an immediate Gosave.
+func gostartcall(buf *gobuf, fn, ctxt unsafe.Pointer) {
+ if buf.lr != 0 {
+ throw("invalid use of gostartcall")
+ }
+ buf.lr = buf.pc
+ buf.pc = uintptr(fn)
+ buf.ctxt = ctxt
+}
diff --git a/src/runtime/sys_solaris_amd64.s b/src/runtime/sys_solaris_amd64.s
index ead8c8d..05fd187 100644
--- a/src/runtime/sys_solaris_amd64.s
+++ b/src/runtime/sys_solaris_amd64.s
@@ -29,26 +29,6 @@
MOVQ AX, (m_mOS+mOS_perrno)(BX)
RET
-// int64 runtime·nanotime1(void);
-//
-// clock_gettime(3c) wrapper because Timespec is too large for
-// runtime·nanotime stack.
-//
-// Called using runtime·sysvicall6 from os_solaris.c:/nanotime.
-// NOT USING GO CALLING CONVENTION.
-TEXT runtime·nanotime1(SB),NOSPLIT,$0
- // need space for the timespec argument.
- SUBQ $64, SP // 16 bytes will do, but who knows in the future?
- MOVQ $3, DI // CLOCK_REALTIME from <sys/time_impl.h>
- MOVQ SP, SI
- LEAQ libc_clock_gettime(SB), AX
- CALL AX
- MOVQ (SP), AX // tv_sec from struct timespec
- IMULQ $1000000000, AX // multiply into nanoseconds
- ADDQ 8(SP), AX // tv_nsec, offset should be stable.
- ADDQ $64, SP
- RET
-
// pipe(3c) wrapper that returns fds in AX, DX.
// NOT USING GO CALLING CONVENTION.
TEXT runtime·pipe1(SB),NOSPLIT,$0
@@ -338,23 +318,3 @@
LEAQ libc_sched_yield(SB), AX
CALL AX
RET
-
-// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB),NOSPLIT,$8-12
- CALL runtime·nanotime(SB)
- MOVQ 0(SP), AX
-
- // generated code for
- // func f(x uint64) (uint64, uint64) { return x/1000000000, x%100000000 }
- // adapted to reduce duplication
- MOVQ AX, CX
- MOVQ $1360296554856532783, AX
- MULQ CX
- ADDQ CX, DX
- RCRQ $1, DX
- SHRQ $29, DX
- MOVQ DX, sec+0(FP)
- IMULQ $1000000000, DX
- SUBQ DX, CX
- MOVL CX, nsec+8(FP)
- RET
diff --git a/src/runtime/sys_wasm.s b/src/runtime/sys_wasm.s
index d7bab92..e7a6570 100644
--- a/src/runtime/sys_wasm.s
+++ b/src/runtime/sys_wasm.s
@@ -17,10 +17,9 @@
Get R2
I32Const $1
I32Sub
- Set R2
+ Tee R2
// n == 0
- Get R2
I32Eqz
If
Return
@@ -54,10 +53,9 @@
Get R1
I32Const $1
I32Sub
- Set R1
+ Tee R1
// n == 0
- Get R1
I32Eqz
If
Return
@@ -101,7 +99,7 @@
End
Get R0
- F64Const $9223372036854775807.
+ F64Const $0x7ffffffffffffc00p0 // Maximum truncated representation of 0x7fffffffffffffff
F64Gt
If
I64Const $0x8000000000000000
@@ -109,7 +107,7 @@
End
Get R0
- F64Const $-9223372036854775808.
+ F64Const $-0x7ffffffffffffc00p0 // Minimum truncated representation of -0x8000000000000000
F64Lt
If
I64Const $0x8000000000000000
@@ -130,7 +128,7 @@
End
Get R0
- F64Const $18446744073709551615.
+ F64Const $0xfffffffffffff800p0 // Maximum truncated representation of 0xffffffffffffffff
F64Gt
If
I64Const $0x8000000000000000
@@ -171,6 +169,10 @@
I32Store ret+8(FP)
RET
+TEXT ·resetMemoryDataView(SB), NOSPLIT, $0
+ CallImport
+ RET
+
TEXT ·wasmExit(SB), NOSPLIT, $0
CallImport
RET
@@ -179,11 +181,11 @@
CallImport
RET
-TEXT ·nanotime(SB), NOSPLIT, $0
+TEXT ·nanotime1(SB), NOSPLIT, $0
CallImport
RET
-TEXT ·walltime(SB), NOSPLIT, $0
+TEXT ·walltime1(SB), NOSPLIT, $0
CallImport
RET
diff --git a/src/runtime/sys_windows_386.s b/src/runtime/sys_windows_386.s
index 761da8e..9e1f409 100644
--- a/src/runtime/sys_windows_386.s
+++ b/src/runtime/sys_windows_386.s
@@ -444,7 +444,7 @@
#define time_hi1 4
#define time_hi2 8
-TEXT runtime·nanotime(SB),NOSPLIT,$0-8
+TEXT runtime·nanotime1(SB),NOSPLIT,$0-8
CMPB runtime·useQPCTime(SB), $0
JNE useQPC
loop:
diff --git a/src/runtime/sys_windows_amd64.s b/src/runtime/sys_windows_amd64.s
index 2aea8ea..6c8eecd 100644
--- a/src/runtime/sys_windows_amd64.s
+++ b/src/runtime/sys_windows_amd64.s
@@ -62,6 +62,10 @@
// Return result.
POPQ CX
MOVQ AX, libcall_r1(CX)
+ // Floating point return values are returned in XMM0. Setting r2 to this
+ // value in case this call returned a floating point value. For details,
+ // see https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention
+ MOVQ X0, libcall_r2(CX)
// GetLastError().
MOVQ 0x30(GS), DI
@@ -473,7 +477,7 @@
#define time_hi1 4
#define time_hi2 8
-TEXT runtime·nanotime(SB),NOSPLIT,$0-8
+TEXT runtime·nanotime1(SB),NOSPLIT,$0-8
CMPB runtime·useQPCTime(SB), $0
JNE useQPC
MOVQ $_INTERRUPT_TIME, DI
diff --git a/src/runtime/sys_windows_arm.s b/src/runtime/sys_windows_arm.s
index 8f8af0a..256b5ff 100644
--- a/src/runtime/sys_windows_arm.s
+++ b/src/runtime/sys_windows_arm.s
@@ -495,7 +495,7 @@
#define time_hi1 4
#define time_hi2 8
-TEXT runtime·nanotime(SB),NOSPLIT,$0-8
+TEXT runtime·nanotime1(SB),NOSPLIT,$0-8
MOVW $0, R0
MOVB runtime·useQPCTime(SB), R0
CMP $0, R0
diff --git a/src/runtime/sys_x86.go b/src/runtime/sys_x86.go
index 2b4ed8b..f917cb8 100644
--- a/src/runtime/sys_x86.go
+++ b/src/runtime/sys_x86.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build amd64 amd64p32 386
+// +build amd64 386
package runtime
diff --git a/src/runtime/syscall_nacl.h b/src/runtime/syscall_nacl.h
deleted file mode 100644
index 5ee75ab..0000000
--- a/src/runtime/syscall_nacl.h
+++ /dev/null
@@ -1,84 +0,0 @@
-// Code generated by mknacl.sh; DO NOT EDIT.
-#define SYS_null 1
-#define SYS_nameservice 2
-#define SYS_dup 8
-#define SYS_dup2 9
-#define SYS_open 10
-#define SYS_close 11
-#define SYS_read 12
-#define SYS_write 13
-#define SYS_lseek 14
-#define SYS_stat 16
-#define SYS_fstat 17
-#define SYS_chmod 18
-#define SYS_isatty 19
-#define SYS_brk 20
-#define SYS_mmap 21
-#define SYS_munmap 22
-#define SYS_getdents 23
-#define SYS_mprotect 24
-#define SYS_list_mappings 25
-#define SYS_exit 30
-#define SYS_getpid 31
-#define SYS_sched_yield 32
-#define SYS_sysconf 33
-#define SYS_gettimeofday 40
-#define SYS_clock 41
-#define SYS_nanosleep 42
-#define SYS_clock_getres 43
-#define SYS_clock_gettime 44
-#define SYS_mkdir 45
-#define SYS_rmdir 46
-#define SYS_chdir 47
-#define SYS_getcwd 48
-#define SYS_unlink 49
-#define SYS_imc_makeboundsock 60
-#define SYS_imc_accept 61
-#define SYS_imc_connect 62
-#define SYS_imc_sendmsg 63
-#define SYS_imc_recvmsg 64
-#define SYS_imc_mem_obj_create 65
-#define SYS_imc_socketpair 66
-#define SYS_mutex_create 70
-#define SYS_mutex_lock 71
-#define SYS_mutex_trylock 72
-#define SYS_mutex_unlock 73
-#define SYS_cond_create 74
-#define SYS_cond_wait 75
-#define SYS_cond_signal 76
-#define SYS_cond_broadcast 77
-#define SYS_cond_timed_wait_abs 79
-#define SYS_thread_create 80
-#define SYS_thread_exit 81
-#define SYS_tls_init 82
-#define SYS_thread_nice 83
-#define SYS_tls_get 84
-#define SYS_second_tls_set 85
-#define SYS_second_tls_get 86
-#define SYS_exception_handler 87
-#define SYS_exception_stack 88
-#define SYS_exception_clear_flag 89
-#define SYS_sem_create 100
-#define SYS_sem_wait 101
-#define SYS_sem_post 102
-#define SYS_sem_get_value 103
-#define SYS_dyncode_create 104
-#define SYS_dyncode_modify 105
-#define SYS_dyncode_delete 106
-#define SYS_test_infoleak 109
-#define SYS_test_crash 110
-#define SYS_test_syscall_1 111
-#define SYS_test_syscall_2 112
-#define SYS_futex_wait_abs 120
-#define SYS_futex_wake 121
-#define SYS_pread 130
-#define SYS_pwrite 131
-#define SYS_truncate 140
-#define SYS_lstat 141
-#define SYS_link 142
-#define SYS_rename 143
-#define SYS_symlink 144
-#define SYS_access 145
-#define SYS_readlink 146
-#define SYS_utimes 147
-#define SYS_get_random_bytes 150
diff --git a/src/runtime/syscall_solaris.go b/src/runtime/syscall_solaris.go
index 3538180..0945169 100644
--- a/src/runtime/syscall_solaris.go
+++ b/src/runtime/syscall_solaris.go
@@ -16,7 +16,6 @@
libc_gethostname,
libc_getpid,
libc_ioctl,
- libc_pipe,
libc_setgid,
libc_setgroups,
libc_setsid,
@@ -143,6 +142,9 @@
args: uintptr(unsafe.Pointer(&flags)),
}
asmcgocall(unsafe.Pointer(&asmsysvicall6x), unsafe.Pointer(&call))
+ if int(call.r1) != -1 {
+ call.err = 0
+ }
return call.r1, call.err
}
diff --git a/src/runtime/syscall_windows.go b/src/runtime/syscall_windows.go
index 722a73d..0e2fcfb 100644
--- a/src/runtime/syscall_windows.go
+++ b/src/runtime/syscall_windows.go
@@ -74,16 +74,18 @@
argsize += uintptrSize
}
- lock(&cbs.lock)
- defer unlock(&cbs.lock)
+ lock(&cbs.lock) // We don't unlock this in a defer because this is used from the system stack.
n := cbs.n
for i := 0; i < n; i++ {
if cbs.ctxt[i].gobody == fn.data && cbs.ctxt[i].isCleanstack() == cleanstack {
- return callbackasmAddr(i)
+ r := callbackasmAddr(i)
+ unlock(&cbs.lock)
+ return r
}
}
if n >= cb_max {
+ unlock(&cbs.lock)
throw("too many callback functions")
}
@@ -99,7 +101,9 @@
cbs.ctxt[n] = c
cbs.n++
- return callbackasmAddr(n)
+ r := callbackasmAddr(n)
+ unlock(&cbs.lock)
+ return r
}
const _LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800
diff --git a/src/runtime/syscall_windows_test.go b/src/runtime/syscall_windows_test.go
index 5335c12..2e74546 100644
--- a/src/runtime/syscall_windows_test.go
+++ b/src/runtime/syscall_windows_test.go
@@ -725,6 +725,82 @@
}
}
+func TestFloatReturn(t *testing.T) {
+ if _, err := exec.LookPath("gcc"); err != nil {
+ t.Skip("skipping test: gcc is missing")
+ }
+ if runtime.GOARCH != "amd64" {
+ t.Skipf("skipping test: GOARCH=%s", runtime.GOARCH)
+ }
+
+ const src = `
+#include <stdint.h>
+#include <windows.h>
+
+float cfuncFloat(uintptr_t a, double b, float c, double d) {
+ if (a == 1 && b == 2.2 && c == 3.3f && d == 4.4e44) {
+ return 1.5f;
+ }
+ return 0;
+}
+
+double cfuncDouble(uintptr_t a, double b, float c, double d) {
+ if (a == 1 && b == 2.2 && c == 3.3f && d == 4.4e44) {
+ return 2.5;
+ }
+ return 0;
+}
+`
+ tmpdir, err := ioutil.TempDir("", "TestFloatReturn")
+ if err != nil {
+ t.Fatal("TempDir failed: ", err)
+ }
+ defer os.RemoveAll(tmpdir)
+
+ srcname := "mydll.c"
+ err = ioutil.WriteFile(filepath.Join(tmpdir, srcname), []byte(src), 0)
+ if err != nil {
+ t.Fatal(err)
+ }
+ outname := "mydll.dll"
+ cmd := exec.Command("gcc", "-shared", "-s", "-Werror", "-o", outname, srcname)
+ cmd.Dir = tmpdir
+ out, err := cmd.CombinedOutput()
+ if err != nil {
+ t.Fatalf("failed to build dll: %v - %v", err, string(out))
+ }
+ dllpath := filepath.Join(tmpdir, outname)
+
+ dll := syscall.MustLoadDLL(dllpath)
+ defer dll.Release()
+
+ proc := dll.MustFindProc("cfuncFloat")
+
+ _, r, err := proc.Call(
+ 1,
+ uintptr(math.Float64bits(2.2)),
+ uintptr(math.Float32bits(3.3)),
+ uintptr(math.Float64bits(4.4e44)),
+ )
+ fr := math.Float32frombits(uint32(r))
+ if fr != 1.5 {
+ t.Errorf("got %f want 1.5 (err=%v)", fr, err)
+ }
+
+ proc = dll.MustFindProc("cfuncDouble")
+
+ _, r, err = proc.Call(
+ 1,
+ uintptr(math.Float64bits(2.2)),
+ uintptr(math.Float32bits(3.3)),
+ uintptr(math.Float64bits(4.4e44)),
+ )
+ dr := math.Float64frombits(uint64(r))
+ if dr != 2.5 {
+ t.Errorf("got %f want 2.5 (err=%v)", dr, err)
+ }
+}
+
func TestTimeBeginPeriod(t *testing.T) {
const TIMERR_NOERROR = 0
if *runtime.TimeBeginPeriodRetValue != TIMERR_NOERROR {
diff --git a/src/runtime/testdata/testfaketime/faketime.go b/src/runtime/testdata/testfaketime/faketime.go
new file mode 100644
index 0000000..1fb15eb
--- /dev/null
+++ b/src/runtime/testdata/testfaketime/faketime.go
@@ -0,0 +1,28 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test faketime support. This is its own test program because we have
+// to build it with custom build tags and hence want to minimize
+// dependencies.
+
+package main
+
+import (
+ "os"
+ "time"
+)
+
+func main() {
+ println("line 1")
+ // Stream switch, increments time
+ os.Stdout.WriteString("line 2\n")
+ os.Stdout.WriteString("line 3\n")
+ // Stream switch, increments time
+ os.Stderr.WriteString("line 4\n")
+ // Time jump
+ time.Sleep(1 * time.Second)
+ os.Stdout.WriteString("line 5\n")
+ // Print the current time.
+ os.Stdout.WriteString(time.Now().UTC().Format(time.RFC3339))
+}
diff --git a/src/runtime/testdata/testprog/checkptr.go b/src/runtime/testdata/testprog/checkptr.go
new file mode 100644
index 0000000..45e6fb1
--- /dev/null
+++ b/src/runtime/testdata/testprog/checkptr.go
@@ -0,0 +1,43 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import "unsafe"
+
+func init() {
+ register("CheckPtrAlignmentNoPtr", CheckPtrAlignmentNoPtr)
+ register("CheckPtrAlignmentPtr", CheckPtrAlignmentPtr)
+ register("CheckPtrArithmetic", CheckPtrArithmetic)
+ register("CheckPtrSize", CheckPtrSize)
+ register("CheckPtrSmall", CheckPtrSmall)
+}
+
+func CheckPtrAlignmentNoPtr() {
+ var x [2]int64
+ p := unsafe.Pointer(&x[0])
+ sink2 = (*int64)(unsafe.Pointer(uintptr(p) + 1))
+}
+
+func CheckPtrAlignmentPtr() {
+ var x [2]int64
+ p := unsafe.Pointer(&x[0])
+ sink2 = (**int64)(unsafe.Pointer(uintptr(p) + 1))
+}
+
+func CheckPtrArithmetic() {
+ var x int
+ i := uintptr(unsafe.Pointer(&x))
+ sink2 = (*int)(unsafe.Pointer(i))
+}
+
+func CheckPtrSize() {
+ p := new(int64)
+ sink2 = p
+ sink2 = (*[100]int64)(unsafe.Pointer(p))
+}
+
+func CheckPtrSmall() {
+ sink2 = unsafe.Pointer(uintptr(1))
+}
diff --git a/src/runtime/testdata/testprog/deadlock.go b/src/runtime/testdata/testprog/deadlock.go
index 5f0d120..105d6a5 100644
--- a/src/runtime/testdata/testprog/deadlock.go
+++ b/src/runtime/testdata/testprog/deadlock.go
@@ -22,6 +22,9 @@
register("StackOverflow", StackOverflow)
register("ThreadExhaustion", ThreadExhaustion)
register("RecursivePanic", RecursivePanic)
+ register("RecursivePanic2", RecursivePanic2)
+ register("RecursivePanic3", RecursivePanic3)
+ register("RecursivePanic4", RecursivePanic4)
register("GoexitExit", GoexitExit)
register("GoNil", GoNil)
register("MainGoroutineID", MainGoroutineID)
@@ -29,6 +32,8 @@
register("GoexitInPanic", GoexitInPanic)
register("PanicAfterGoexit", PanicAfterGoexit)
register("RecoveredPanicAfterGoexit", RecoveredPanicAfterGoexit)
+ register("RecoverBeforePanicAfterGoexit", RecoverBeforePanicAfterGoexit)
+ register("RecoverBeforePanicAfterGoexit2", RecoverBeforePanicAfterGoexit2)
register("PanicTraceback", PanicTraceback)
register("GoschedInPanic", GoschedInPanic)
register("SyscallInPanic", SyscallInPanic)
@@ -111,6 +116,50 @@
panic("again")
}
+// Same as RecursivePanic, but do the first recover and the second panic in
+// separate defers, and make sure they are executed in the correct order.
+func RecursivePanic2() {
+ func() {
+ defer func() {
+ fmt.Println(recover())
+ }()
+ var x [8192]byte
+ func(x [8192]byte) {
+ defer func() {
+ panic("second panic")
+ }()
+ defer func() {
+ fmt.Println(recover())
+ }()
+ panic("first panic")
+ }(x)
+ }()
+ panic("third panic")
+}
+
+// Make sure that the first panic finished as a panic, even though the second
+// panic was recovered
+func RecursivePanic3() {
+ defer func() {
+ defer func() {
+ recover()
+ }()
+ panic("second panic")
+ }()
+ panic("first panic")
+}
+
+// Test case where a single defer recovers one panic but starts another panic. If
+// the second panic is never recovered, then the recovered first panic will still
+// appear on the panic stack (labeled '[recovered]') and the runtime stack.
+func RecursivePanic4() {
+ defer func() {
+ recover()
+ panic("second panic")
+ }()
+ panic("first panic")
+}
+
func GoexitExit() {
println("t1")
go func() {
@@ -202,6 +251,50 @@
runtime.Goexit()
}
+func RecoverBeforePanicAfterGoexit() {
+ // 1. defer a function that recovers
+ // 2. defer a function that panics
+ // 3. call goexit
+ // Goexit runs the #2 defer. Its panic
+ // is caught by the #1 defer. For Goexit, we explicitly
+ // resume execution in the Goexit loop, instead of resuming
+ // execution in the caller (which would make the Goexit disappear!)
+ defer func() {
+ r := recover()
+ if r == nil {
+ panic("bad recover")
+ }
+ }()
+ defer func() {
+ panic("hello")
+ }()
+ runtime.Goexit()
+}
+
+func RecoverBeforePanicAfterGoexit2() {
+ for i := 0; i < 2; i++ {
+ defer func() {
+ }()
+ }
+ // 1. defer a function that recovers
+ // 2. defer a function that panics
+ // 3. call goexit
+ // Goexit runs the #2 defer. Its panic
+ // is caught by the #1 defer. For Goexit, we explicitly
+ // resume execution in the Goexit loop, instead of resuming
+ // execution in the caller (which would make the Goexit disappear!)
+ defer func() {
+ r := recover()
+ if r == nil {
+ panic("bad recover")
+ }
+ }()
+ defer func() {
+ panic("hello")
+ }()
+ runtime.Goexit()
+}
+
func PanicTraceback() {
pt1()
}
diff --git a/src/runtime/testdata/testprog/gc.go b/src/runtime/testdata/testprog/gc.go
index cca9c45..74732cd 100644
--- a/src/runtime/testdata/testprog/gc.go
+++ b/src/runtime/testdata/testprog/gc.go
@@ -11,6 +11,7 @@
"runtime/debug"
"sync/atomic"
"time"
+ "unsafe"
)
func init() {
@@ -19,6 +20,7 @@
register("GCSys", GCSys)
register("GCPhys", GCPhys)
register("DeferLiveness", DeferLiveness)
+ register("GCZombie", GCZombie)
}
func GCSys() {
@@ -147,9 +149,24 @@
size = 4 << 20
split = 64 << 10
objects = 2
+
+ // The page cache could hide 64 8-KiB pages from the scavenger today.
+ maxPageCache = (8 << 10) * 64
+
+ // Reduce GOMAXPROCS down to 4 if it's greater. We need to bound the amount
+ // of memory held in the page cache because the scavenger can't reach it.
+ // The page cache will hold at most maxPageCache of memory per-P, so this
+ // bounds the amount of memory hidden from the scavenger to 4*maxPageCache
+ // at most.
+ maxProcs = 4
)
// Set GOGC so that this test operates under consistent assumptions.
debug.SetGCPercent(100)
+ procs := runtime.GOMAXPROCS(-1)
+ if procs > maxProcs {
+ defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(maxProcs))
+ procs = runtime.GOMAXPROCS(-1)
+ }
// Save objects which we want to survive, and condemn objects which we don't.
// Note that we condemn objects in this way and release them all at once in
// order to avoid having the GC start freeing up these objects while the loop
@@ -197,10 +214,22 @@
// Since the runtime should scavenge the entirety of the remaining holes,
// theoretically there should be no more free and unscavenged memory. However due
// to other allocations that happen during this test we may still see some physical
- // memory over-use. 10% here is an arbitrary but very conservative threshold which
- // should easily account for any other allocations this test may have done.
+ // memory over-use.
overuse := (float64(heapBacked) - float64(stats.HeapAlloc)) / float64(stats.HeapAlloc)
- if overuse <= 0.10 {
+ // Compute the threshold.
+ //
+ // In theory, this threshold should just be zero, but that's not possible in practice.
+ // Firstly, the runtime's page cache can hide up to maxPageCache of free memory from the
+ // scavenger per P. To account for this, we increase the threshold by the ratio between the
+ // total amount the runtime could hide from the scavenger to the amount of memory we expect
+ // to be able to scavenge here, which is (size-split)*objects. This computation is the crux
+ // GOMAXPROCS above; if GOMAXPROCS is too high the threshold just becomes 100%+ since the
+ // amount of memory being allocated is fixed. Then we add 5% to account for noise, such as
+ // other allocations this test may have performed that we don't explicitly account for The
+ // baseline threshold here is around 11% for GOMAXPROCS=1, capping out at around 30% for
+ // GOMAXPROCS=4.
+ threshold := 0.05 + float64(procs)*maxPageCache/float64((size-split)*objects)
+ if overuse <= threshold {
fmt.Println("OK")
return
}
@@ -210,8 +239,8 @@
// In the context of this test, this indicates a large amount of
// fragmentation with physical pages that are otherwise unused but not
// returned to the OS.
- fmt.Printf("exceeded physical memory overuse threshold of 10%%: %3.2f%%\n"+
- "(alloc: %d, goal: %d, sys: %d, rel: %d, objs: %d)\n", overuse*100,
+ fmt.Printf("exceeded physical memory overuse threshold of %3.2f%%: %3.2f%%\n"+
+ "(alloc: %d, goal: %d, sys: %d, rel: %d, objs: %d)\n", threshold*100, overuse*100,
stats.HeapAlloc, stats.NextGC, stats.HeapSys, stats.HeapReleased, len(saved))
runtime.KeepAlive(saved)
}
@@ -237,3 +266,37 @@
func escape(x interface{}) { sink2 = x; sink2 = nil }
var sink2 interface{}
+
+// Test zombie object detection and reporting.
+func GCZombie() {
+ // Allocate several objects of unusual size (so free slots are
+ // unlikely to all be re-allocated by the runtime).
+ const size = 190
+ const count = 8192 / size
+ keep := make([]*byte, 0, (count+1)/2)
+ free := make([]uintptr, 0, (count+1)/2)
+ zombies := make([]*byte, 0, len(free))
+ for i := 0; i < count; i++ {
+ obj := make([]byte, size)
+ p := &obj[0]
+ if i%2 == 0 {
+ keep = append(keep, p)
+ } else {
+ free = append(free, uintptr(unsafe.Pointer(p)))
+ }
+ }
+
+ // Free the unreferenced objects.
+ runtime.GC()
+
+ // Bring the free objects back to life.
+ for _, p := range free {
+ zombies = append(zombies, (*byte)(unsafe.Pointer(p)))
+ }
+
+ // GC should detect the zombie objects.
+ runtime.GC()
+ println("failed")
+ runtime.KeepAlive(keep)
+ runtime.KeepAlive(zombies)
+}
diff --git a/src/runtime/testdata/testprog/lockosthread.go b/src/runtime/testdata/testprog/lockosthread.go
index fd3123e..e9d7fdb 100644
--- a/src/runtime/testdata/testprog/lockosthread.go
+++ b/src/runtime/testdata/testprog/lockosthread.go
@@ -7,6 +7,7 @@
import (
"os"
"runtime"
+ "sync"
"time"
)
@@ -30,6 +31,7 @@
runtime.LockOSThread()
})
register("LockOSThreadAvoidsStatePropagation", LockOSThreadAvoidsStatePropagation)
+ register("LockOSThreadTemplateThreadRace", LockOSThreadTemplateThreadRace)
}
func LockOSThreadMain() {
@@ -195,3 +197,50 @@
runtime.UnlockOSThread()
println("OK")
}
+
+func LockOSThreadTemplateThreadRace() {
+ // This test attempts to reproduce the race described in
+ // golang.org/issue/38931. To do so, we must have a stop-the-world
+ // (achieved via ReadMemStats) racing with two LockOSThread calls.
+ //
+ // While this test attempts to line up the timing, it is only expected
+ // to fail (and thus hang) around 2% of the time if the race is
+ // present.
+
+ // Ensure enough Ps to actually run everything in parallel. Though on
+ // <4 core machines, we are still at the whim of the kernel scheduler.
+ runtime.GOMAXPROCS(4)
+
+ go func() {
+ // Stop the world; race with LockOSThread below.
+ var m runtime.MemStats
+ for {
+ runtime.ReadMemStats(&m)
+ }
+ }()
+
+ // Try to synchronize both LockOSThreads.
+ start := time.Now().Add(10 * time.Millisecond)
+
+ var wg sync.WaitGroup
+ wg.Add(2)
+
+ for i := 0; i < 2; i++ {
+ go func() {
+ for time.Now().Before(start) {
+ }
+
+ // Add work to the local runq to trigger early startm
+ // in handoffp.
+ go func() {}()
+
+ runtime.LockOSThread()
+ runtime.Gosched() // add a preemption point.
+ wg.Done()
+ }()
+ }
+
+ wg.Wait()
+ // If both LockOSThreads completed then we did not hit the race.
+ println("OK")
+}
diff --git a/src/runtime/testdata/testprog/numcpu_freebsd.go b/src/runtime/testdata/testprog/numcpu_freebsd.go
index 42ee154..aff36ec 100644
--- a/src/runtime/testdata/testprog/numcpu_freebsd.go
+++ b/src/runtime/testdata/testprog/numcpu_freebsd.go
@@ -85,7 +85,13 @@
if err != nil {
return nil, fmt.Errorf("fail to execute '%s': %s", cmdline, err)
}
- pos := bytes.IndexRune(output, ':')
+ pos := bytes.IndexRune(output, '\n')
+ if pos == -1 {
+ return nil, fmt.Errorf("invalid output from '%s', '\\n' not found: %s", cmdline, output)
+ }
+ output = output[0:pos]
+
+ pos = bytes.IndexRune(output, ':')
if pos == -1 {
return nil, fmt.Errorf("invalid output from '%s', ':' not found: %s", cmdline, output)
}
diff --git a/src/runtime/testdata/testprog/panicprint.go b/src/runtime/testdata/testprog/panicprint.go
new file mode 100644
index 0000000..c8deabe
--- /dev/null
+++ b/src/runtime/testdata/testprog/panicprint.go
@@ -0,0 +1,111 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+type MyBool bool
+type MyComplex128 complex128
+type MyComplex64 complex64
+type MyFloat32 float32
+type MyFloat64 float64
+type MyInt int
+type MyInt8 int8
+type MyInt16 int16
+type MyInt32 int32
+type MyInt64 int64
+type MyString string
+type MyUint uint
+type MyUint8 uint8
+type MyUint16 uint16
+type MyUint32 uint32
+type MyUint64 uint64
+type MyUintptr uintptr
+
+func panicCustomComplex64() {
+ panic(MyComplex64(0.11 + 3i))
+}
+
+func panicCustomComplex128() {
+ panic(MyComplex128(32.1 + 10i))
+}
+
+func panicCustomString() {
+ panic(MyString("Panic"))
+}
+
+func panicCustomBool() {
+ panic(MyBool(true))
+}
+
+func panicCustomInt() {
+ panic(MyInt(93))
+}
+
+func panicCustomInt8() {
+ panic(MyInt8(93))
+}
+
+func panicCustomInt16() {
+ panic(MyInt16(93))
+}
+
+func panicCustomInt32() {
+ panic(MyInt32(93))
+}
+
+func panicCustomInt64() {
+ panic(MyInt64(93))
+}
+
+func panicCustomUint() {
+ panic(MyUint(93))
+}
+
+func panicCustomUint8() {
+ panic(MyUint8(93))
+}
+
+func panicCustomUint16() {
+ panic(MyUint16(93))
+}
+
+func panicCustomUint32() {
+ panic(MyUint32(93))
+}
+
+func panicCustomUint64() {
+ panic(MyUint64(93))
+}
+
+func panicCustomUintptr() {
+ panic(MyUintptr(93))
+}
+
+func panicCustomFloat64() {
+ panic(MyFloat64(-93.70))
+}
+
+func panicCustomFloat32() {
+ panic(MyFloat32(-93.70))
+}
+
+func init() {
+ register("panicCustomComplex64", panicCustomComplex64)
+ register("panicCustomComplex128", panicCustomComplex128)
+ register("panicCustomBool", panicCustomBool)
+ register("panicCustomFloat32", panicCustomFloat32)
+ register("panicCustomFloat64", panicCustomFloat64)
+ register("panicCustomInt", panicCustomInt)
+ register("panicCustomInt8", panicCustomInt8)
+ register("panicCustomInt16", panicCustomInt16)
+ register("panicCustomInt32", panicCustomInt32)
+ register("panicCustomInt64", panicCustomInt64)
+ register("panicCustomString", panicCustomString)
+ register("panicCustomUint", panicCustomUint)
+ register("panicCustomUint8", panicCustomUint8)
+ register("panicCustomUint16", panicCustomUint16)
+ register("panicCustomUint32", panicCustomUint32)
+ register("panicCustomUint64", panicCustomUint64)
+ register("panicCustomUintptr", panicCustomUintptr)
+}
diff --git a/src/runtime/testdata/testprog/preempt.go b/src/runtime/testdata/testprog/preempt.go
new file mode 100644
index 0000000..1c74d0e
--- /dev/null
+++ b/src/runtime/testdata/testprog/preempt.go
@@ -0,0 +1,71 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+ "runtime"
+ "runtime/debug"
+ "sync/atomic"
+)
+
+func init() {
+ register("AsyncPreempt", AsyncPreempt)
+}
+
+func AsyncPreempt() {
+ // Run with just 1 GOMAXPROCS so the runtime is required to
+ // use scheduler preemption.
+ runtime.GOMAXPROCS(1)
+ // Disable GC so we have complete control of what we're testing.
+ debug.SetGCPercent(-1)
+
+ // Start a goroutine with no sync safe-points.
+ var ready, ready2 uint32
+ go func() {
+ for {
+ atomic.StoreUint32(&ready, 1)
+ dummy()
+ dummy()
+ }
+ }()
+ // Also start one with a frameless function.
+ // This is an especially interesting case for
+ // LR machines.
+ go func() {
+ atomic.AddUint32(&ready2, 1)
+ frameless()
+ }()
+ // Also test empty infinite loop.
+ go func() {
+ atomic.AddUint32(&ready2, 1)
+ for {
+ }
+ }()
+
+ // Wait for the goroutine to stop passing through sync
+ // safe-points.
+ for atomic.LoadUint32(&ready) == 0 || atomic.LoadUint32(&ready2) < 2 {
+ runtime.Gosched()
+ }
+
+ // Run a GC, which will have to stop the goroutine for STW and
+ // for stack scanning. If this doesn't work, the test will
+ // deadlock and timeout.
+ runtime.GC()
+
+ println("OK")
+}
+
+//go:noinline
+func frameless() {
+ for i := int64(0); i < 1<<62; i++ {
+ out += i * i * i * i * i * 12345
+ }
+}
+
+var out int64
+
+//go:noinline
+func dummy() {}
diff --git a/src/runtime/testdata/testprog/signal.go b/src/runtime/testdata/testprog/signal.go
index 2ccbada..417e105 100644
--- a/src/runtime/testdata/testprog/signal.go
+++ b/src/runtime/testdata/testprog/signal.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build !windows,!plan9,!nacl
+// +build !windows,!plan9
package main
diff --git a/src/runtime/testdata/testprog/vdso.go b/src/runtime/testdata/testprog/vdso.go
new file mode 100644
index 0000000..ef92f48
--- /dev/null
+++ b/src/runtime/testdata/testprog/vdso.go
@@ -0,0 +1,55 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Invoke signal hander in the VDSO context (see issue 32912).
+
+package main
+
+import (
+ "fmt"
+ "io/ioutil"
+ "os"
+ "runtime/pprof"
+ "time"
+)
+
+func init() {
+ register("SignalInVDSO", signalInVDSO)
+}
+
+func signalInVDSO() {
+ f, err := ioutil.TempFile("", "timeprofnow")
+ if err != nil {
+ fmt.Fprintln(os.Stderr, err)
+ os.Exit(2)
+ }
+
+ if err := pprof.StartCPUProfile(f); err != nil {
+ fmt.Fprintln(os.Stderr, err)
+ os.Exit(2)
+ }
+
+ t0 := time.Now()
+ t1 := t0
+ // We should get a profiling signal 100 times a second,
+ // so running for 1 second should be sufficient.
+ for t1.Sub(t0) < time.Second {
+ t1 = time.Now()
+ }
+
+ pprof.StopCPUProfile()
+
+ name := f.Name()
+ if err := f.Close(); err != nil {
+ fmt.Fprintln(os.Stderr, err)
+ os.Exit(2)
+ }
+
+ if err := os.Remove(name); err != nil {
+ fmt.Fprintln(os.Stderr, err)
+ os.Exit(2)
+ }
+
+ fmt.Println("success")
+}
diff --git a/src/runtime/testdata/testprogcgo/eintr.go b/src/runtime/testdata/testprogcgo/eintr.go
new file mode 100644
index 0000000..791ff1b
--- /dev/null
+++ b/src/runtime/testdata/testprogcgo/eintr.go
@@ -0,0 +1,246 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !plan9,!windows
+
+package main
+
+/*
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+
+static int clearRestart(int sig) {
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof sa);
+ if (sigaction(sig, NULL, &sa) < 0) {
+ return errno;
+ }
+ sa.sa_flags &=~ SA_RESTART;
+ if (sigaction(sig, &sa, NULL) < 0) {
+ return errno;
+ }
+ return 0;
+}
+*/
+import "C"
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "log"
+ "net"
+ "os"
+ "os/exec"
+ "sync"
+ "syscall"
+ "time"
+)
+
+func init() {
+ register("EINTR", EINTR)
+ register("Block", Block)
+}
+
+// Test various operations when a signal handler is installed without
+// the SA_RESTART flag. This tests that the os and net APIs handle EINTR.
+func EINTR() {
+ if errno := C.clearRestart(C.int(syscall.SIGURG)); errno != 0 {
+ log.Fatal(syscall.Errno(errno))
+ }
+ if errno := C.clearRestart(C.int(syscall.SIGWINCH)); errno != 0 {
+ log.Fatal(syscall.Errno(errno))
+ }
+ if errno := C.clearRestart(C.int(syscall.SIGCHLD)); errno != 0 {
+ log.Fatal(syscall.Errno(errno))
+ }
+
+ var wg sync.WaitGroup
+ testPipe(&wg)
+ testNet(&wg)
+ testExec(&wg)
+ wg.Wait()
+ fmt.Println("OK")
+}
+
+// spin does CPU bound spinning and allocating for a millisecond,
+// to get a SIGURG.
+//go:noinline
+func spin() (float64, []byte) {
+ stop := time.Now().Add(time.Millisecond)
+ r1 := 0.0
+ r2 := make([]byte, 200)
+ for time.Now().Before(stop) {
+ for i := 1; i < 1e6; i++ {
+ r1 += r1 / float64(i)
+ r2 = append(r2, bytes.Repeat([]byte{byte(i)}, 100)...)
+ r2 = r2[100:]
+ }
+ }
+ return r1, r2
+}
+
+// winch sends a few SIGWINCH signals to the process.
+func winch() {
+ ticker := time.NewTicker(100 * time.Microsecond)
+ defer ticker.Stop()
+ pid := syscall.Getpid()
+ for n := 10; n > 0; n-- {
+ syscall.Kill(pid, syscall.SIGWINCH)
+ <-ticker.C
+ }
+}
+
+// sendSomeSignals triggers a few SIGURG and SIGWINCH signals.
+func sendSomeSignals() {
+ done := make(chan struct{})
+ go func() {
+ spin()
+ close(done)
+ }()
+ winch()
+ <-done
+}
+
+// testPipe tests pipe operations.
+func testPipe(wg *sync.WaitGroup) {
+ r, w, err := os.Pipe()
+ if err != nil {
+ log.Fatal(err)
+ }
+ if err := syscall.SetNonblock(int(r.Fd()), false); err != nil {
+ log.Fatal(err)
+ }
+ if err := syscall.SetNonblock(int(w.Fd()), false); err != nil {
+ log.Fatal(err)
+ }
+ wg.Add(2)
+ go func() {
+ defer wg.Done()
+ defer w.Close()
+ // Spin before calling Write so that the first ReadFull
+ // in the other goroutine will likely be interrupted
+ // by a signal.
+ sendSomeSignals()
+ // This Write will likely be interrupted by a signal
+ // as the other goroutine spins in the middle of reading.
+ // We write enough data that we should always fill the
+ // pipe buffer and need multiple write system calls.
+ if _, err := w.Write(bytes.Repeat([]byte{0}, 2<<20)); err != nil {
+ log.Fatal(err)
+ }
+ }()
+ go func() {
+ defer wg.Done()
+ defer r.Close()
+ b := make([]byte, 1<<20)
+ // This ReadFull will likely be interrupted by a signal,
+ // as the other goroutine spins before writing anything.
+ if _, err := io.ReadFull(r, b); err != nil {
+ log.Fatal(err)
+ }
+ // Spin after reading half the data so that the Write
+ // in the other goroutine will likely be interrupted
+ // before it completes.
+ sendSomeSignals()
+ if _, err := io.ReadFull(r, b); err != nil {
+ log.Fatal(err)
+ }
+ }()
+}
+
+// testNet tests network operations.
+func testNet(wg *sync.WaitGroup) {
+ ln, err := net.Listen("tcp4", "127.0.0.1:0")
+ if err != nil {
+ if errors.Is(err, syscall.EAFNOSUPPORT) || errors.Is(err, syscall.EPROTONOSUPPORT) {
+ return
+ }
+ log.Fatal(err)
+ }
+ wg.Add(2)
+ go func() {
+ defer wg.Done()
+ defer ln.Close()
+ c, err := ln.Accept()
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer c.Close()
+ cf, err := c.(*net.TCPConn).File()
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer cf.Close()
+ if err := syscall.SetNonblock(int(cf.Fd()), false); err != nil {
+ log.Fatal(err)
+ }
+ // See comments in testPipe.
+ sendSomeSignals()
+ if _, err := cf.Write(bytes.Repeat([]byte{0}, 2<<20)); err != nil {
+ log.Fatal(err)
+ }
+ }()
+ go func() {
+ defer wg.Done()
+ sendSomeSignals()
+ c, err := net.Dial("tcp", ln.Addr().String())
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer c.Close()
+ cf, err := c.(*net.TCPConn).File()
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer cf.Close()
+ if err := syscall.SetNonblock(int(cf.Fd()), false); err != nil {
+ log.Fatal(err)
+ }
+ // See comments in testPipe.
+ b := make([]byte, 1<<20)
+ if _, err := io.ReadFull(cf, b); err != nil {
+ log.Fatal(err)
+ }
+ sendSomeSignals()
+ if _, err := io.ReadFull(cf, b); err != nil {
+ log.Fatal(err)
+ }
+ }()
+}
+
+func testExec(wg *sync.WaitGroup) {
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ cmd := exec.Command(os.Args[0], "Block")
+ stdin, err := cmd.StdinPipe()
+ if err != nil {
+ log.Fatal(err)
+ }
+ cmd.Stderr = new(bytes.Buffer)
+ cmd.Stdout = cmd.Stderr
+ if err := cmd.Start(); err != nil {
+ log.Fatal(err)
+ }
+
+ go func() {
+ sendSomeSignals()
+ stdin.Close()
+ }()
+
+ if err := cmd.Wait(); err != nil {
+ log.Fatalf("%v:\n%s", err, cmd.Stdout)
+ }
+ }()
+}
+
+// Block blocks until stdin is closed.
+func Block() {
+ io.Copy(ioutil.Discard, os.Stdin)
+}
diff --git a/src/runtime/testdata/testprogcgo/numgoroutine.go b/src/runtime/testdata/testprogcgo/numgoroutine.go
index 12fda49..5bdfe52 100644
--- a/src/runtime/testdata/testprogcgo/numgoroutine.go
+++ b/src/runtime/testdata/testprogcgo/numgoroutine.go
@@ -41,13 +41,6 @@
// Test that there are just the expected number of goroutines
// running. Specifically, test that the spare M's goroutine
// doesn't show up.
- //
- // On non-Windows platforms there's a signal handling thread
- // started by os/signal.init in addition to the main
- // goroutine.
- if runtime.GOOS != "windows" {
- baseGoroutines = 1
- }
if _, ok := checkNumGoroutine("first", 1+baseGoroutines); !ok {
return
}
diff --git a/src/runtime/testdata/testprogcgo/segv.go b/src/runtime/testdata/testprogcgo/segv.go
new file mode 100644
index 0000000..3237a8c
--- /dev/null
+++ b/src/runtime/testdata/testprogcgo/segv.go
@@ -0,0 +1,56 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !plan9,!windows
+
+package main
+
+// static void nop() {}
+import "C"
+
+import (
+ "syscall"
+ "time"
+)
+
+func init() {
+ register("Segv", Segv)
+ register("SegvInCgo", SegvInCgo)
+}
+
+var Sum int
+
+func Segv() {
+ c := make(chan bool)
+ go func() {
+ close(c)
+ for i := 0; ; i++ {
+ Sum += i
+ }
+ }()
+
+ <-c
+
+ syscall.Kill(syscall.Getpid(), syscall.SIGSEGV)
+
+ // Give the OS time to deliver the signal.
+ time.Sleep(time.Second)
+}
+
+func SegvInCgo() {
+ c := make(chan bool)
+ go func() {
+ close(c)
+ for {
+ C.nop()
+ }
+ }()
+
+ <-c
+
+ syscall.Kill(syscall.Getpid(), syscall.SIGSEGV)
+
+ // Give the OS time to deliver the signal.
+ time.Sleep(time.Second)
+}
diff --git a/src/runtime/testdata/testprognet/signal.go b/src/runtime/testdata/testprognet/signal.go
index a1559fe..4d2de79 100644
--- a/src/runtime/testdata/testprognet/signal.go
+++ b/src/runtime/testdata/testprognet/signal.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build !windows,!plan9,!nacl
+// +build !windows,!plan9
// This is in testprognet instead of testprog because testprog
// must not import anything (like net, but also like os/signal)
diff --git a/src/runtime/testdata/testwinlib/main.c b/src/runtime/testdata/testwinlib/main.c
new file mode 100644
index 0000000..e84a32f
--- /dev/null
+++ b/src/runtime/testdata/testwinlib/main.c
@@ -0,0 +1,57 @@
+#include <stdio.h>
+#include <windows.h>
+#include "testwinlib.h"
+
+int exceptionCount;
+int continueCount;
+LONG WINAPI customExceptionHandlder(struct _EXCEPTION_POINTERS *ExceptionInfo)
+{
+ if (ExceptionInfo->ExceptionRecord->ExceptionCode == EXCEPTION_BREAKPOINT)
+ {
+ exceptionCount++;
+ // prepare context to resume execution
+ CONTEXT *c = ExceptionInfo->ContextRecord;
+ c->Rip = *(ULONG_PTR *)c->Rsp;
+ c->Rsp += 8;
+ return EXCEPTION_CONTINUE_EXECUTION;
+ }
+ return EXCEPTION_CONTINUE_SEARCH;
+}
+LONG WINAPI customContinueHandlder(struct _EXCEPTION_POINTERS *ExceptionInfo)
+{
+ if (ExceptionInfo->ExceptionRecord->ExceptionCode == EXCEPTION_BREAKPOINT)
+ {
+ continueCount++;
+ return EXCEPTION_CONTINUE_EXECUTION;
+ }
+ return EXCEPTION_CONTINUE_SEARCH;
+}
+
+void throwFromC()
+{
+ DebugBreak();
+}
+int main()
+{
+ // simulate a "lazily" attached debugger, by calling some go code before attaching the exception/continue handler
+ Dummy();
+ exceptionCount = 0;
+ continueCount = 0;
+ void *exceptionHandlerHandle = AddVectoredExceptionHandler(0, customExceptionHandlder);
+ if (NULL == exceptionHandlerHandle)
+ {
+ printf("cannot add vectored exception handler\n");
+ return 2;
+ }
+ void *continueHandlerHandle = AddVectoredContinueHandler(0, customContinueHandlder);
+ if (NULL == continueHandlerHandle)
+ {
+ printf("cannot add vectored continue handler\n");
+ return 2;
+ }
+ CallMeBack(throwFromC);
+ RemoveVectoredContinueHandler(continueHandlerHandle);
+ RemoveVectoredExceptionHandler(exceptionHandlerHandle);
+ printf("exceptionCount: %d\ncontinueCount: %d\n", exceptionCount, continueCount);
+ return 0;
+}
\ No newline at end of file
diff --git a/src/runtime/testdata/testwinlib/main.go b/src/runtime/testdata/testwinlib/main.go
new file mode 100644
index 0000000..400eaa1
--- /dev/null
+++ b/src/runtime/testdata/testwinlib/main.go
@@ -0,0 +1,28 @@
+// +build windows,cgo
+
+package main
+
+// #include <windows.h>
+// typedef void(*callmeBackFunc)();
+// static void bridgeCallback(callmeBackFunc callback) {
+// callback();
+//}
+import "C"
+
+// CallMeBack call backs C code.
+//export CallMeBack
+func CallMeBack(callback C.callmeBackFunc) {
+ C.bridgeCallback(callback)
+}
+
+// Dummy is called by the C code before registering the exception/continue handlers simulating a debugger.
+// This makes sure that the Go runtime's lastcontinuehandler is reached before the C continue handler and thus,
+// validate that it does not crash the program before another handler could take an action.
+// The idea here is to reproduce what happens when you attach a debugger to a running program.
+// It also simulate the behavior of the .Net debugger, which register its exception/continue handlers lazily.
+//export Dummy
+func Dummy() int {
+ return 42
+}
+
+func main() {}
diff --git a/src/runtime/testdata/testwinlibsignal/dummy.go b/src/runtime/testdata/testwinlibsignal/dummy.go
new file mode 100644
index 0000000..82dfd91
--- /dev/null
+++ b/src/runtime/testdata/testwinlibsignal/dummy.go
@@ -0,0 +1,10 @@
+// +build windows
+
+package main
+
+//export Dummy
+func Dummy() int {
+ return 42
+}
+
+func main() {}
diff --git a/src/runtime/testdata/testwinlibsignal/main.c b/src/runtime/testdata/testwinlibsignal/main.c
new file mode 100644
index 0000000..1787fef
--- /dev/null
+++ b/src/runtime/testdata/testwinlibsignal/main.c
@@ -0,0 +1,50 @@
+#include <windows.h>
+#include <stdio.h>
+
+HANDLE waitForCtrlBreakEvent;
+
+BOOL WINAPI CtrlHandler(DWORD fdwCtrlType)
+{
+ switch (fdwCtrlType)
+ {
+ case CTRL_BREAK_EVENT:
+ SetEvent(waitForCtrlBreakEvent);
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+int main(void)
+{
+ waitForCtrlBreakEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
+ if (!waitForCtrlBreakEvent) {
+ fprintf(stderr, "ERROR: Could not create event");
+ return 1;
+ }
+
+ if (!SetConsoleCtrlHandler(CtrlHandler, TRUE))
+ {
+ fprintf(stderr, "ERROR: Could not set control handler");
+ return 1;
+ }
+
+ // The library must be loaded after the SetConsoleCtrlHandler call
+ // so that the library handler registers after the main program.
+ // This way the library handler gets called first.
+ HMODULE dummyDll = LoadLibrary("dummy.dll");
+ if (!dummyDll) {
+ fprintf(stderr, "ERROR: Could not load dummy.dll");
+ return 1;
+ }
+
+ printf("ready\n");
+ fflush(stdout);
+
+ if (WaitForSingleObject(waitForCtrlBreakEvent, 5000) != WAIT_OBJECT_0) {
+ fprintf(stderr, "FAILURE: No signal received");
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/src/runtime/time.go b/src/runtime/time.go
index 28a4722..fdb5066 100644
--- a/src/runtime/time.go
+++ b/src/runtime/time.go
@@ -7,17 +7,18 @@
package runtime
import (
- "internal/cpu"
+ "runtime/internal/atomic"
+ "runtime/internal/sys"
"unsafe"
)
// Package time knows the layout of this structure.
// If this struct changes, adjust ../time/sleep.go:/runtimeTimer.
-// For GOOS=nacl, package syscall knows the layout of this structure.
-// If this struct changes, adjust ../syscall/net_nacl.go:/runtimeTimer.
type timer struct {
- tb *timersBucket // the bucket the timer lives in
- i int // heap index
+ // If this timer is on a heap, which P's heap it is on.
+ // puintptr rather than *p to match uintptr in the versions
+ // of this struct defined in other packages.
+ pp puintptr
// Timer wakes up at when, and then at when+period, ... (period > 0 only)
// each time calling f(arg, now) in the timer goroutine, so f must be
@@ -27,52 +28,141 @@
f func(interface{}, uintptr)
arg interface{}
seq uintptr
+
+ // What to set the when field to in timerModifiedXX status.
+ nextwhen int64
+
+ // The status field holds one of the values below.
+ status uint32
}
-// timersLen is the length of timers array.
+// Code outside this file has to be careful in using a timer value.
//
-// Ideally, this would be set to GOMAXPROCS, but that would require
-// dynamic reallocation
+// The pp, status, and nextwhen fields may only be used by code in this file.
//
-// The current value is a compromise between memory usage and performance
-// that should cover the majority of GOMAXPROCS values used in the wild.
-const timersLen = 64
-
-// timers contains "per-P" timer heaps.
+// Code that creates a new timer value can set the when, period, f,
+// arg, and seq fields.
+// A new timer value may be passed to addtimer (called by time.startTimer).
+// After doing that no fields may be touched.
//
-// Timers are queued into timersBucket associated with the current P,
-// so each P may work with its own timers independently of other P instances.
+// An active timer (one that has been passed to addtimer) may be
+// passed to deltimer (time.stopTimer), after which it is no longer an
+// active timer. It is an inactive timer.
+// In an inactive timer the period, f, arg, and seq fields may be modified,
+// but not the when field.
+// It's OK to just drop an inactive timer and let the GC collect it.
+// It's not OK to pass an inactive timer to addtimer.
+// Only newly allocated timer values may be passed to addtimer.
//
-// Each timersBucket may be associated with multiple P
-// if GOMAXPROCS > timersLen.
-var timers [timersLen]struct {
- timersBucket
+// An active timer may be passed to modtimer. No fields may be touched.
+// It remains an active timer.
+//
+// An inactive timer may be passed to resettimer to turn into an
+// active timer with an updated when field.
+// It's OK to pass a newly allocated timer value to resettimer.
+//
+// Timer operations are addtimer, deltimer, modtimer, resettimer,
+// cleantimers, adjusttimers, and runtimer.
+//
+// We don't permit calling addtimer/deltimer/modtimer/resettimer simultaneously,
+// but adjusttimers and runtimer can be called at the same time as any of those.
+//
+// Active timers live in heaps attached to P, in the timers field.
+// Inactive timers live there too temporarily, until they are removed.
+//
+// addtimer:
+// timerNoStatus -> timerWaiting
+// anything else -> panic: invalid value
+// deltimer:
+// timerWaiting -> timerModifying -> timerDeleted
+// timerModifiedEarlier -> timerModifying -> timerDeleted
+// timerModifiedLater -> timerModifying -> timerDeleted
+// timerNoStatus -> do nothing
+// timerDeleted -> do nothing
+// timerRemoving -> do nothing
+// timerRemoved -> do nothing
+// timerRunning -> wait until status changes
+// timerMoving -> wait until status changes
+// timerModifying -> wait until status changes
+// modtimer:
+// timerWaiting -> timerModifying -> timerModifiedXX
+// timerModifiedXX -> timerModifying -> timerModifiedYY
+// timerNoStatus -> timerModifying -> timerWaiting
+// timerRemoved -> timerModifying -> timerWaiting
+// timerDeleted -> timerModifying -> timerModifiedXX
+// timerRunning -> wait until status changes
+// timerMoving -> wait until status changes
+// timerRemoving -> wait until status changes
+// timerModifying -> wait until status changes
+// cleantimers (looks in P's timer heap):
+// timerDeleted -> timerRemoving -> timerRemoved
+// timerModifiedXX -> timerMoving -> timerWaiting
+// adjusttimers (looks in P's timer heap):
+// timerDeleted -> timerRemoving -> timerRemoved
+// timerModifiedXX -> timerMoving -> timerWaiting
+// runtimer (looks in P's timer heap):
+// timerNoStatus -> panic: uninitialized timer
+// timerWaiting -> timerWaiting or
+// timerWaiting -> timerRunning -> timerNoStatus or
+// timerWaiting -> timerRunning -> timerWaiting
+// timerModifying -> wait until status changes
+// timerModifiedXX -> timerMoving -> timerWaiting
+// timerDeleted -> timerRemoving -> timerRemoved
+// timerRunning -> panic: concurrent runtimer calls
+// timerRemoved -> panic: inconsistent timer heap
+// timerRemoving -> panic: inconsistent timer heap
+// timerMoving -> panic: inconsistent timer heap
- // The padding should eliminate false sharing
- // between timersBucket values.
- pad [cpu.CacheLinePadSize - unsafe.Sizeof(timersBucket{})%cpu.CacheLinePadSize]byte
-}
+// Values for the timer status field.
+const (
+ // Timer has no status set yet.
+ timerNoStatus = iota
-func (t *timer) assignBucket() *timersBucket {
- id := uint8(getg().m.p.ptr().id) % timersLen
- t.tb = &timers[id].timersBucket
- return t.tb
-}
+ // Waiting for timer to fire.
+ // The timer is in some P's heap.
+ timerWaiting
-//go:notinheap
-type timersBucket struct {
- lock mutex
- gp *g
- created bool
- sleeping bool
- rescheduling bool
- sleepUntil int64
- waitnote note
- t []*timer
-}
+ // Running the timer function.
+ // A timer will only have this status briefly.
+ timerRunning
-// nacl fake time support - time in nanoseconds since 1970
-var faketime int64
+ // The timer is deleted and should be removed.
+ // It should not be run, but it is still in some P's heap.
+ timerDeleted
+
+ // The timer is being removed.
+ // The timer will only have this status briefly.
+ timerRemoving
+
+ // The timer has been stopped.
+ // It is not in any P's heap.
+ timerRemoved
+
+ // The timer is being modified.
+ // The timer will only have this status briefly.
+ timerModifying
+
+ // The timer has been modified to an earlier time.
+ // The new when value is in the nextwhen field.
+ // The timer is in some P's heap, possibly in the wrong place.
+ timerModifiedEarlier
+
+ // The timer has been modified to the same or a later time.
+ // The new when value is in the nextwhen field.
+ // The timer is in some P's heap, possibly in the wrong place.
+ timerModifiedLater
+
+ // The timer has been modified and is being moved.
+ // The timer will only have this status briefly.
+ timerMoving
+)
+
+// maxWhen is the maximum value for timer's when field.
+const maxWhen = 1<<63 - 1
+
+// verifyTimers can be set to true to add debugging checks that the
+// timer heaps are valid.
+const verifyTimers = false
// Package time APIs.
// Godoc uses the comments in package time, not these.
@@ -92,17 +182,20 @@
t = new(timer)
gp.timer = t
}
- *t = timer{}
- t.when = nanotime() + ns
t.f = goroutineReady
t.arg = gp
- tb := t.assignBucket()
- lock(&tb.lock)
- if !tb.addtimerLocked(t) {
- unlock(&tb.lock)
- badTimer()
- }
- goparkunlock(&tb.lock, waitReasonSleep, traceEvGoSleep, 2)
+ t.nextwhen = nanotime() + ns
+ gopark(resetForSleep, unsafe.Pointer(t), waitReasonSleep, traceEvGoSleep, 1)
+}
+
+// resetForSleep is called after the goroutine is parked for timeSleep.
+// We can't call resettimer in timeSleep itself because if this is a short
+// sleep and there are many goroutines then the P can wind up running the
+// timer function, goroutineReady, before the goroutine has been parked.
+func resetForSleep(gp *g, ut unsafe.Pointer) bool {
+ t := (*timer)(ut)
+ resettimer(t, t.nextwhen)
+ return true
}
// startTimer adds t to the timer heap.
@@ -114,13 +207,29 @@
addtimer(t)
}
-// stopTimer removes t from the timer heap if it is there.
-// It returns true if t was removed, false if t wasn't even there.
+// stopTimer stops a timer.
+// It reports whether t was stopped before being run.
//go:linkname stopTimer time.stopTimer
func stopTimer(t *timer) bool {
return deltimer(t)
}
+// resetTimer resets an inactive timer, adding it to the heap.
+//go:linkname resetTimer time.resetTimer
+// Reports whether the timer was modified before it was run.
+func resetTimer(t *timer, when int64) bool {
+ if raceenabled {
+ racerelease(unsafe.Pointer(t))
+ }
+ return resettimer(t, when)
+}
+
+// modTimer modifies an existing timer.
+//go:linkname modTimer time.modTimer
+func modTimer(t *timer, when, period int64, f func(interface{}, uintptr), arg interface{}, seq uintptr) {
+ modtimer(t, when, period, f, arg, seq)
+}
+
// Go runtime.
// Ready the goroutine arg.
@@ -128,251 +237,808 @@
goready(arg.(*g), 0)
}
+// addtimer adds a timer to the current P.
+// This should only be called with a newly created timer.
+// That avoids the risk of changing the when field of a timer in some P's heap,
+// which could cause the heap to become unsorted.
func addtimer(t *timer) {
- tb := t.assignBucket()
- lock(&tb.lock)
- ok := tb.addtimerLocked(t)
- unlock(&tb.lock)
- if !ok {
- badTimer()
- }
-}
-
-// Add a timer to the heap and start or kick timerproc if the new timer is
-// earlier than any of the others.
-// Timers are locked.
-// Returns whether all is well: false if the data structure is corrupt
-// due to user-level races.
-func (tb *timersBucket) addtimerLocked(t *timer) bool {
- // when must never be negative; otherwise timerproc will overflow
+ // when must never be negative; otherwise runtimer will overflow
// during its delta calculation and never expire other runtime timers.
if t.when < 0 {
- t.when = 1<<63 - 1
+ t.when = maxWhen
}
- t.i = len(tb.t)
- tb.t = append(tb.t, t)
- if !siftupTimer(tb.t, t.i) {
- return false
+ if t.status != timerNoStatus {
+ throw("addtimer called with initialized timer")
}
- if t.i == 0 {
- // siftup moved to top: new earliest deadline.
- if tb.sleeping && tb.sleepUntil > t.when {
- tb.sleeping = false
- notewakeup(&tb.waitnote)
- }
- if tb.rescheduling {
- tb.rescheduling = false
- goready(tb.gp, 0)
- }
- if !tb.created {
- tb.created = true
- go timerproc(tb)
- }
- }
- return true
+ t.status = timerWaiting
+
+ when := t.when
+
+ pp := getg().m.p.ptr()
+ lock(&pp.timersLock)
+ cleantimers(pp)
+ doaddtimer(pp, t)
+ unlock(&pp.timersLock)
+
+ wakeNetPoller(when)
}
-// Delete timer t from the heap.
-// Do not need to update the timerproc: if it wakes up early, no big deal.
+// doaddtimer adds t to the current P's heap.
+// The caller must have locked the timers for pp.
+func doaddtimer(pp *p, t *timer) {
+ // Timers rely on the network poller, so make sure the poller
+ // has started.
+ if netpollInited == 0 {
+ netpollGenericInit()
+ }
+
+ if t.pp != 0 {
+ throw("doaddtimer: P already set in timer")
+ }
+ t.pp.set(pp)
+ i := len(pp.timers)
+ pp.timers = append(pp.timers, t)
+ siftupTimer(pp.timers, i)
+ if t == pp.timers[0] {
+ atomic.Store64(&pp.timer0When, uint64(t.when))
+ }
+ atomic.Xadd(&pp.numTimers, 1)
+}
+
+// deltimer deletes the timer t. It may be on some other P, so we can't
+// actually remove it from the timers heap. We can only mark it as deleted.
+// It will be removed in due course by the P whose heap it is on.
+// Reports whether the timer was removed before it was run.
func deltimer(t *timer) bool {
- if t.tb == nil {
- // t.tb can be nil if the user created a timer
- // directly, without invoking startTimer e.g
- // time.Ticker{C: c}
- // In this case, return early without any deletion.
- // See Issue 21874.
- return false
- }
-
- tb := t.tb
-
- lock(&tb.lock)
- removed, ok := tb.deltimerLocked(t)
- unlock(&tb.lock)
- if !ok {
- badTimer()
- }
- return removed
-}
-
-func (tb *timersBucket) deltimerLocked(t *timer) (removed, ok bool) {
- // t may not be registered anymore and may have
- // a bogus i (typically 0, if generated by Go).
- // Verify it before proceeding.
- i := t.i
- last := len(tb.t) - 1
- if i < 0 || i > last || tb.t[i] != t {
- return false, true
- }
- if i != last {
- tb.t[i] = tb.t[last]
- tb.t[i].i = i
- }
- tb.t[last] = nil
- tb.t = tb.t[:last]
- ok = true
- if i != last {
- if !siftupTimer(tb.t, i) {
- ok = false
- }
- if !siftdownTimer(tb.t, i) {
- ok = false
- }
- }
- return true, ok
-}
-
-func modtimer(t *timer, when, period int64, f func(interface{}, uintptr), arg interface{}, seq uintptr) {
- tb := t.tb
-
- lock(&tb.lock)
- _, ok := tb.deltimerLocked(t)
- if ok {
- t.when = when
- t.period = period
- t.f = f
- t.arg = arg
- t.seq = seq
- ok = tb.addtimerLocked(t)
- }
- unlock(&tb.lock)
- if !ok {
- badTimer()
- }
-}
-
-// Timerproc runs the time-driven events.
-// It sleeps until the next event in the tb heap.
-// If addtimer inserts a new earlier event, it wakes timerproc early.
-func timerproc(tb *timersBucket) {
- tb.gp = getg()
for {
- lock(&tb.lock)
- tb.sleeping = false
- now := nanotime()
- delta := int64(-1)
- for {
- if len(tb.t) == 0 {
- delta = -1
- break
- }
- t := tb.t[0]
- delta = t.when - now
- if delta > 0 {
- break
- }
- ok := true
- if t.period > 0 {
- // leave in heap but adjust next time to fire
- t.when += t.period * (1 + -delta/t.period)
- if !siftdownTimer(tb.t, 0) {
- ok = false
+ switch s := atomic.Load(&t.status); s {
+ case timerWaiting, timerModifiedLater:
+ // Prevent preemption while the timer is in timerModifying.
+ // This could lead to a self-deadlock. See #38070.
+ mp := acquirem()
+ if atomic.Cas(&t.status, s, timerModifying) {
+ // Must fetch t.pp before changing status,
+ // as cleantimers in another goroutine
+ // can clear t.pp of a timerDeleted timer.
+ tpp := t.pp.ptr()
+ if !atomic.Cas(&t.status, timerModifying, timerDeleted) {
+ badTimer()
}
+ releasem(mp)
+ atomic.Xadd(&tpp.deletedTimers, 1)
+ // Timer was not yet run.
+ return true
} else {
- // remove from heap
- last := len(tb.t) - 1
- if last > 0 {
- tb.t[0] = tb.t[last]
- tb.t[0].i = 0
- }
- tb.t[last] = nil
- tb.t = tb.t[:last]
- if last > 0 {
- if !siftdownTimer(tb.t, 0) {
- ok = false
- }
- }
- t.i = -1 // mark as removed
+ releasem(mp)
}
- f := t.f
- arg := t.arg
- seq := t.seq
- unlock(&tb.lock)
- if !ok {
+ case timerModifiedEarlier:
+ // Prevent preemption while the timer is in timerModifying.
+ // This could lead to a self-deadlock. See #38070.
+ mp := acquirem()
+ if atomic.Cas(&t.status, s, timerModifying) {
+ // Must fetch t.pp before setting status
+ // to timerDeleted.
+ tpp := t.pp.ptr()
+ atomic.Xadd(&tpp.adjustTimers, -1)
+ if !atomic.Cas(&t.status, timerModifying, timerDeleted) {
+ badTimer()
+ }
+ releasem(mp)
+ atomic.Xadd(&tpp.deletedTimers, 1)
+ // Timer was not yet run.
+ return true
+ } else {
+ releasem(mp)
+ }
+ case timerDeleted, timerRemoving, timerRemoved:
+ // Timer was already run.
+ return false
+ case timerRunning, timerMoving:
+ // The timer is being run or moved, by a different P.
+ // Wait for it to complete.
+ osyield()
+ case timerNoStatus:
+ // Removing timer that was never added or
+ // has already been run. Also see issue 21874.
+ return false
+ case timerModifying:
+ // Simultaneous calls to deltimer and modtimer.
+ // Wait for the other call to complete.
+ osyield()
+ default:
+ badTimer()
+ }
+ }
+}
+
+// dodeltimer removes timer i from the current P's heap.
+// We are locked on the P when this is called.
+// It reports whether it saw no problems due to races.
+// The caller must have locked the timers for pp.
+func dodeltimer(pp *p, i int) {
+ if t := pp.timers[i]; t.pp.ptr() != pp {
+ throw("dodeltimer: wrong P")
+ } else {
+ t.pp = 0
+ }
+ last := len(pp.timers) - 1
+ if i != last {
+ pp.timers[i] = pp.timers[last]
+ }
+ pp.timers[last] = nil
+ pp.timers = pp.timers[:last]
+ if i != last {
+ // Moving to i may have moved the last timer to a new parent,
+ // so sift up to preserve the heap guarantee.
+ siftupTimer(pp.timers, i)
+ siftdownTimer(pp.timers, i)
+ }
+ if i == 0 {
+ updateTimer0When(pp)
+ }
+ atomic.Xadd(&pp.numTimers, -1)
+}
+
+// dodeltimer0 removes timer 0 from the current P's heap.
+// We are locked on the P when this is called.
+// It reports whether it saw no problems due to races.
+// The caller must have locked the timers for pp.
+func dodeltimer0(pp *p) {
+ if t := pp.timers[0]; t.pp.ptr() != pp {
+ throw("dodeltimer0: wrong P")
+ } else {
+ t.pp = 0
+ }
+ last := len(pp.timers) - 1
+ if last > 0 {
+ pp.timers[0] = pp.timers[last]
+ }
+ pp.timers[last] = nil
+ pp.timers = pp.timers[:last]
+ if last > 0 {
+ siftdownTimer(pp.timers, 0)
+ }
+ updateTimer0When(pp)
+ atomic.Xadd(&pp.numTimers, -1)
+}
+
+// modtimer modifies an existing timer.
+// This is called by the netpoll code or time.Ticker.Reset.
+// Reports whether the timer was modified before it was run.
+func modtimer(t *timer, when, period int64, f func(interface{}, uintptr), arg interface{}, seq uintptr) bool {
+ if when < 0 {
+ when = maxWhen
+ }
+
+ status := uint32(timerNoStatus)
+ wasRemoved := false
+ var pending bool
+ var mp *m
+loop:
+ for {
+ switch status = atomic.Load(&t.status); status {
+ case timerWaiting, timerModifiedEarlier, timerModifiedLater:
+ // Prevent preemption while the timer is in timerModifying.
+ // This could lead to a self-deadlock. See #38070.
+ mp = acquirem()
+ if atomic.Cas(&t.status, status, timerModifying) {
+ pending = true // timer not yet run
+ break loop
+ }
+ releasem(mp)
+ case timerNoStatus, timerRemoved:
+ // Prevent preemption while the timer is in timerModifying.
+ // This could lead to a self-deadlock. See #38070.
+ mp = acquirem()
+
+ // Timer was already run and t is no longer in a heap.
+ // Act like addtimer.
+ if atomic.Cas(&t.status, status, timerModifying) {
+ wasRemoved = true
+ pending = false // timer already run or stopped
+ break loop
+ }
+ releasem(mp)
+ case timerDeleted:
+ // Prevent preemption while the timer is in timerModifying.
+ // This could lead to a self-deadlock. See #38070.
+ mp = acquirem()
+ if atomic.Cas(&t.status, status, timerModifying) {
+ atomic.Xadd(&t.pp.ptr().deletedTimers, -1)
+ pending = false // timer already stopped
+ break loop
+ }
+ releasem(mp)
+ case timerRunning, timerRemoving, timerMoving:
+ // The timer is being run or moved, by a different P.
+ // Wait for it to complete.
+ osyield()
+ case timerModifying:
+ // Multiple simultaneous calls to modtimer.
+ // Wait for the other call to complete.
+ osyield()
+ default:
+ badTimer()
+ }
+ }
+
+ t.period = period
+ t.f = f
+ t.arg = arg
+ t.seq = seq
+
+ if wasRemoved {
+ t.when = when
+ pp := getg().m.p.ptr()
+ lock(&pp.timersLock)
+ doaddtimer(pp, t)
+ unlock(&pp.timersLock)
+ if !atomic.Cas(&t.status, timerModifying, timerWaiting) {
+ badTimer()
+ }
+ releasem(mp)
+ wakeNetPoller(when)
+ } else {
+ // The timer is in some other P's heap, so we can't change
+ // the when field. If we did, the other P's heap would
+ // be out of order. So we put the new when value in the
+ // nextwhen field, and let the other P set the when field
+ // when it is prepared to resort the heap.
+ t.nextwhen = when
+
+ newStatus := uint32(timerModifiedLater)
+ if when < t.when {
+ newStatus = timerModifiedEarlier
+ }
+
+ // Update the adjustTimers field. Subtract one if we
+ // are removing a timerModifiedEarlier, add one if we
+ // are adding a timerModifiedEarlier.
+ adjust := int32(0)
+ if status == timerModifiedEarlier {
+ adjust--
+ }
+ if newStatus == timerModifiedEarlier {
+ adjust++
+ }
+ if adjust != 0 {
+ atomic.Xadd(&t.pp.ptr().adjustTimers, adjust)
+ }
+
+ // Set the new status of the timer.
+ if !atomic.Cas(&t.status, timerModifying, newStatus) {
+ badTimer()
+ }
+ releasem(mp)
+
+ // If the new status is earlier, wake up the poller.
+ if newStatus == timerModifiedEarlier {
+ wakeNetPoller(when)
+ }
+ }
+
+ return pending
+}
+
+// resettimer resets the time when a timer should fire.
+// If used for an inactive timer, the timer will become active.
+// This should be called instead of addtimer if the timer value has been,
+// or may have been, used previously.
+// Reports whether the timer was modified before it was run.
+func resettimer(t *timer, when int64) bool {
+ return modtimer(t, when, t.period, t.f, t.arg, t.seq)
+}
+
+// cleantimers cleans up the head of the timer queue. This speeds up
+// programs that create and delete timers; leaving them in the heap
+// slows down addtimer. Reports whether no timer problems were found.
+// The caller must have locked the timers for pp.
+func cleantimers(pp *p) {
+ gp := getg()
+ for {
+ if len(pp.timers) == 0 {
+ return
+ }
+
+ // This loop can theoretically run for a while, and because
+ // it is holding timersLock it cannot be preempted.
+ // If someone is trying to preempt us, just return.
+ // We can clean the timers later.
+ if gp.preemptStop {
+ return
+ }
+
+ t := pp.timers[0]
+ if t.pp.ptr() != pp {
+ throw("cleantimers: bad p")
+ }
+ switch s := atomic.Load(&t.status); s {
+ case timerDeleted:
+ if !atomic.Cas(&t.status, s, timerRemoving) {
+ continue
+ }
+ dodeltimer0(pp)
+ if !atomic.Cas(&t.status, timerRemoving, timerRemoved) {
badTimer()
}
- if raceenabled {
- raceacquire(unsafe.Pointer(t))
+ atomic.Xadd(&pp.deletedTimers, -1)
+ case timerModifiedEarlier, timerModifiedLater:
+ if !atomic.Cas(&t.status, s, timerMoving) {
+ continue
}
- f(arg, seq)
- lock(&tb.lock)
+ // Now we can change the when field.
+ t.when = t.nextwhen
+ // Move t to the right position.
+ dodeltimer0(pp)
+ doaddtimer(pp, t)
+ if s == timerModifiedEarlier {
+ atomic.Xadd(&pp.adjustTimers, -1)
+ }
+ if !atomic.Cas(&t.status, timerMoving, timerWaiting) {
+ badTimer()
+ }
+ default:
+ // Head of timers does not need adjustment.
+ return
}
- if delta < 0 || faketime > 0 {
- // No timers left - put goroutine to sleep.
- tb.rescheduling = true
- goparkunlock(&tb.lock, waitReasonTimerGoroutineIdle, traceEvGoBlock, 1)
+ }
+}
+
+// moveTimers moves a slice of timers to pp. The slice has been taken
+// from a different P.
+// This is currently called when the world is stopped, but the caller
+// is expected to have locked the timers for pp.
+func moveTimers(pp *p, timers []*timer) {
+ for _, t := range timers {
+ loop:
+ for {
+ switch s := atomic.Load(&t.status); s {
+ case timerWaiting:
+ t.pp = 0
+ doaddtimer(pp, t)
+ break loop
+ case timerModifiedEarlier, timerModifiedLater:
+ if !atomic.Cas(&t.status, s, timerMoving) {
+ continue
+ }
+ t.when = t.nextwhen
+ t.pp = 0
+ doaddtimer(pp, t)
+ if !atomic.Cas(&t.status, timerMoving, timerWaiting) {
+ badTimer()
+ }
+ break loop
+ case timerDeleted:
+ if !atomic.Cas(&t.status, s, timerRemoved) {
+ continue
+ }
+ t.pp = 0
+ // We no longer need this timer in the heap.
+ break loop
+ case timerModifying:
+ // Loop until the modification is complete.
+ osyield()
+ case timerNoStatus, timerRemoved:
+ // We should not see these status values in a timers heap.
+ badTimer()
+ case timerRunning, timerRemoving, timerMoving:
+ // Some other P thinks it owns this timer,
+ // which should not happen.
+ badTimer()
+ default:
+ badTimer()
+ }
+ }
+ }
+}
+
+// adjusttimers looks through the timers in the current P's heap for
+// any timers that have been modified to run earlier, and puts them in
+// the correct place in the heap. While looking for those timers,
+// it also moves timers that have been modified to run later,
+// and removes deleted timers. The caller must have locked the timers for pp.
+func adjusttimers(pp *p) {
+ if len(pp.timers) == 0 {
+ return
+ }
+ if atomic.Load(&pp.adjustTimers) == 0 {
+ if verifyTimers {
+ verifyTimerHeap(pp)
+ }
+ return
+ }
+ var moved []*timer
+loop:
+ for i := 0; i < len(pp.timers); i++ {
+ t := pp.timers[i]
+ if t.pp.ptr() != pp {
+ throw("adjusttimers: bad p")
+ }
+ switch s := atomic.Load(&t.status); s {
+ case timerDeleted:
+ if atomic.Cas(&t.status, s, timerRemoving) {
+ dodeltimer(pp, i)
+ if !atomic.Cas(&t.status, timerRemoving, timerRemoved) {
+ badTimer()
+ }
+ atomic.Xadd(&pp.deletedTimers, -1)
+ // Look at this heap position again.
+ i--
+ }
+ case timerModifiedEarlier, timerModifiedLater:
+ if atomic.Cas(&t.status, s, timerMoving) {
+ // Now we can change the when field.
+ t.when = t.nextwhen
+ // Take t off the heap, and hold onto it.
+ // We don't add it back yet because the
+ // heap manipulation could cause our
+ // loop to skip some other timer.
+ dodeltimer(pp, i)
+ moved = append(moved, t)
+ if s == timerModifiedEarlier {
+ if n := atomic.Xadd(&pp.adjustTimers, -1); int32(n) <= 0 {
+ break loop
+ }
+ }
+ // Look at this heap position again.
+ i--
+ }
+ case timerNoStatus, timerRunning, timerRemoving, timerRemoved, timerMoving:
+ badTimer()
+ case timerWaiting:
+ // OK, nothing to do.
+ case timerModifying:
+ // Check again after modification is complete.
+ osyield()
+ i--
+ default:
+ badTimer()
+ }
+ }
+
+ if len(moved) > 0 {
+ addAdjustedTimers(pp, moved)
+ }
+
+ if verifyTimers {
+ verifyTimerHeap(pp)
+ }
+}
+
+// addAdjustedTimers adds any timers we adjusted in adjusttimers
+// back to the timer heap.
+func addAdjustedTimers(pp *p, moved []*timer) {
+ for _, t := range moved {
+ doaddtimer(pp, t)
+ if !atomic.Cas(&t.status, timerMoving, timerWaiting) {
+ badTimer()
+ }
+ }
+}
+
+// nobarrierWakeTime looks at P's timers and returns the time when we
+// should wake up the netpoller. It returns 0 if there are no timers.
+// This function is invoked when dropping a P, and must run without
+// any write barriers. Therefore, if there are any timers that needs
+// to be moved earlier, it conservatively returns the current time.
+// The netpoller M will wake up and adjust timers before sleeping again.
+//go:nowritebarrierrec
+func nobarrierWakeTime(pp *p) int64 {
+ if atomic.Load(&pp.adjustTimers) > 0 {
+ return nanotime()
+ } else {
+ return int64(atomic.Load64(&pp.timer0When))
+ }
+}
+
+// runtimer examines the first timer in timers. If it is ready based on now,
+// it runs the timer and removes or updates it.
+// Returns 0 if it ran a timer, -1 if there are no more timers, or the time
+// when the first timer should run.
+// The caller must have locked the timers for pp.
+// If a timer is run, this will temporarily unlock the timers.
+//go:systemstack
+func runtimer(pp *p, now int64) int64 {
+ for {
+ t := pp.timers[0]
+ if t.pp.ptr() != pp {
+ throw("runtimer: bad p")
+ }
+ switch s := atomic.Load(&t.status); s {
+ case timerWaiting:
+ if t.when > now {
+ // Not ready to run.
+ return t.when
+ }
+
+ if !atomic.Cas(&t.status, s, timerRunning) {
+ continue
+ }
+ // Note that runOneTimer may temporarily unlock
+ // pp.timersLock.
+ runOneTimer(pp, t, now)
+ return 0
+
+ case timerDeleted:
+ if !atomic.Cas(&t.status, s, timerRemoving) {
+ continue
+ }
+ dodeltimer0(pp)
+ if !atomic.Cas(&t.status, timerRemoving, timerRemoved) {
+ badTimer()
+ }
+ atomic.Xadd(&pp.deletedTimers, -1)
+ if len(pp.timers) == 0 {
+ return -1
+ }
+
+ case timerModifiedEarlier, timerModifiedLater:
+ if !atomic.Cas(&t.status, s, timerMoving) {
+ continue
+ }
+ t.when = t.nextwhen
+ dodeltimer0(pp)
+ doaddtimer(pp, t)
+ if s == timerModifiedEarlier {
+ atomic.Xadd(&pp.adjustTimers, -1)
+ }
+ if !atomic.Cas(&t.status, timerMoving, timerWaiting) {
+ badTimer()
+ }
+
+ case timerModifying:
+ // Wait for modification to complete.
+ osyield()
+
+ case timerNoStatus, timerRemoved:
+ // Should not see a new or inactive timer on the heap.
+ badTimer()
+ case timerRunning, timerRemoving, timerMoving:
+ // These should only be set when timers are locked,
+ // and we didn't do it.
+ badTimer()
+ default:
+ badTimer()
+ }
+ }
+}
+
+// runOneTimer runs a single timer.
+// The caller must have locked the timers for pp.
+// This will temporarily unlock the timers while running the timer function.
+//go:systemstack
+func runOneTimer(pp *p, t *timer, now int64) {
+ if raceenabled {
+ ppcur := getg().m.p.ptr()
+ if ppcur.timerRaceCtx == 0 {
+ ppcur.timerRaceCtx = racegostart(funcPC(runtimer) + sys.PCQuantum)
+ }
+ raceacquirectx(ppcur.timerRaceCtx, unsafe.Pointer(t))
+ }
+
+ f := t.f
+ arg := t.arg
+ seq := t.seq
+
+ if t.period > 0 {
+ // Leave in heap but adjust next time to fire.
+ delta := t.when - now
+ t.when += t.period * (1 + -delta/t.period)
+ siftdownTimer(pp.timers, 0)
+ if !atomic.Cas(&t.status, timerRunning, timerWaiting) {
+ badTimer()
+ }
+ updateTimer0When(pp)
+ } else {
+ // Remove from heap.
+ dodeltimer0(pp)
+ if !atomic.Cas(&t.status, timerRunning, timerNoStatus) {
+ badTimer()
+ }
+ }
+
+ if raceenabled {
+ // Temporarily use the current P's racectx for g0.
+ gp := getg()
+ if gp.racectx != 0 {
+ throw("runOneTimer: unexpected racectx")
+ }
+ gp.racectx = gp.m.p.ptr().timerRaceCtx
+ }
+
+ unlock(&pp.timersLock)
+
+ f(arg, seq)
+
+ lock(&pp.timersLock)
+
+ if raceenabled {
+ gp := getg()
+ gp.racectx = 0
+ }
+}
+
+// clearDeletedTimers removes all deleted timers from the P's timer heap.
+// This is used to avoid clogging up the heap if the program
+// starts a lot of long-running timers and then stops them.
+// For example, this can happen via context.WithTimeout.
+//
+// This is the only function that walks through the entire timer heap,
+// other than moveTimers which only runs when the world is stopped.
+//
+// The caller must have locked the timers for pp.
+func clearDeletedTimers(pp *p) {
+ cdel := int32(0)
+ cearlier := int32(0)
+ to := 0
+ changedHeap := false
+ timers := pp.timers
+nextTimer:
+ for _, t := range timers {
+ for {
+ switch s := atomic.Load(&t.status); s {
+ case timerWaiting:
+ if changedHeap {
+ timers[to] = t
+ siftupTimer(timers, to)
+ }
+ to++
+ continue nextTimer
+ case timerModifiedEarlier, timerModifiedLater:
+ if atomic.Cas(&t.status, s, timerMoving) {
+ t.when = t.nextwhen
+ timers[to] = t
+ siftupTimer(timers, to)
+ to++
+ changedHeap = true
+ if !atomic.Cas(&t.status, timerMoving, timerWaiting) {
+ badTimer()
+ }
+ if s == timerModifiedEarlier {
+ cearlier++
+ }
+ continue nextTimer
+ }
+ case timerDeleted:
+ if atomic.Cas(&t.status, s, timerRemoving) {
+ t.pp = 0
+ cdel++
+ if !atomic.Cas(&t.status, timerRemoving, timerRemoved) {
+ badTimer()
+ }
+ changedHeap = true
+ continue nextTimer
+ }
+ case timerModifying:
+ // Loop until modification complete.
+ osyield()
+ case timerNoStatus, timerRemoved:
+ // We should not see these status values in a timer heap.
+ badTimer()
+ case timerRunning, timerRemoving, timerMoving:
+ // Some other P thinks it owns this timer,
+ // which should not happen.
+ badTimer()
+ default:
+ badTimer()
+ }
+ }
+ }
+
+ // Set remaining slots in timers slice to nil,
+ // so that the timer values can be garbage collected.
+ for i := to; i < len(timers); i++ {
+ timers[i] = nil
+ }
+
+ atomic.Xadd(&pp.deletedTimers, -cdel)
+ atomic.Xadd(&pp.numTimers, -cdel)
+ atomic.Xadd(&pp.adjustTimers, -cearlier)
+
+ timers = timers[:to]
+ pp.timers = timers
+ updateTimer0When(pp)
+
+ if verifyTimers {
+ verifyTimerHeap(pp)
+ }
+}
+
+// verifyTimerHeap verifies that the timer heap is in a valid state.
+// This is only for debugging, and is only called if verifyTimers is true.
+// The caller must have locked the timers.
+func verifyTimerHeap(pp *p) {
+ for i, t := range pp.timers {
+ if i == 0 {
+ // First timer has no parent.
continue
}
- // At least one timer pending. Sleep until then.
- tb.sleeping = true
- tb.sleepUntil = now + delta
- noteclear(&tb.waitnote)
- unlock(&tb.lock)
- notetsleepg(&tb.waitnote, delta)
+
+ // The heap is 4-ary. See siftupTimer and siftdownTimer.
+ p := (i - 1) / 4
+ if t.when < pp.timers[p].when {
+ print("bad timer heap at ", i, ": ", p, ": ", pp.timers[p].when, ", ", i, ": ", t.when, "\n")
+ throw("bad timer heap")
+ }
+ }
+ if numTimers := int(atomic.Load(&pp.numTimers)); len(pp.timers) != numTimers {
+ println("timer heap len", len(pp.timers), "!= numTimers", numTimers)
+ throw("bad timer heap len")
}
}
-func timejump() *g {
- if faketime == 0 {
- return nil
+// updateTimer0When sets the P's timer0When field.
+// The caller must have locked the timers for pp.
+func updateTimer0When(pp *p) {
+ if len(pp.timers) == 0 {
+ atomic.Store64(&pp.timer0When, 0)
+ } else {
+ atomic.Store64(&pp.timer0When, uint64(pp.timers[0].when))
}
-
- for i := range timers {
- lock(&timers[i].lock)
- }
- gp := timejumpLocked()
- for i := range timers {
- unlock(&timers[i].lock)
- }
-
- return gp
}
-func timejumpLocked() *g {
- // Determine a timer bucket with minimum when.
- var minT *timer
- for i := range timers {
- tb := &timers[i]
- if !tb.created || len(tb.t) == 0 {
+// timeSleepUntil returns the time when the next timer should fire,
+// and the P that holds the timer heap that that timer is on.
+// This is only called by sysmon and checkdead.
+func timeSleepUntil() (int64, *p) {
+ next := int64(maxWhen)
+ var pret *p
+
+ // Prevent allp slice changes. This is like retake.
+ lock(&allpLock)
+ for _, pp := range allp {
+ if pp == nil {
+ // This can happen if procresize has grown
+ // allp but not yet created new Ps.
continue
}
- t := tb.t[0]
- if minT == nil || t.when < minT.when {
- minT = t
+
+ c := atomic.Load(&pp.adjustTimers)
+ if c == 0 {
+ w := int64(atomic.Load64(&pp.timer0When))
+ if w != 0 && w < next {
+ next = w
+ pret = pp
+ }
+ continue
}
- }
- if minT == nil || minT.when <= faketime {
- return nil
- }
- faketime = minT.when
- tb := minT.tb
- if !tb.rescheduling {
- return nil
- }
- tb.rescheduling = false
- return tb.gp
-}
-
-func timeSleepUntil() int64 {
- next := int64(1<<63 - 1)
-
- // Determine minimum sleepUntil across all the timer buckets.
- //
- // The function can not return a precise answer,
- // as another timer may pop in as soon as timers have been unlocked.
- // So lock the timers one by one instead of all at once.
- for i := range timers {
- tb := &timers[i]
-
- lock(&tb.lock)
- if tb.sleeping && tb.sleepUntil < next {
- next = tb.sleepUntil
+ lock(&pp.timersLock)
+ for _, t := range pp.timers {
+ switch s := atomic.Load(&t.status); s {
+ case timerWaiting:
+ if t.when < next {
+ next = t.when
+ }
+ case timerModifiedEarlier, timerModifiedLater:
+ if t.nextwhen < next {
+ next = t.nextwhen
+ }
+ if s == timerModifiedEarlier {
+ c--
+ }
+ }
+ // The timers are sorted, so we only have to check
+ // the first timer for each P, unless there are
+ // some timerModifiedEarlier timers. The number
+ // of timerModifiedEarlier timers is in the adjustTimers
+ // field, used to initialize c, above.
+ //
+ // We don't worry about cases like timerModifying.
+ // New timers can show up at any time,
+ // so this function is necessarily imprecise.
+ // Do a signed check here since we aren't
+ // synchronizing the read of pp.adjustTimers
+ // with the check of a timer status.
+ if int32(c) <= 0 {
+ break
+ }
}
- unlock(&tb.lock)
+ unlock(&pp.timersLock)
}
+ unlock(&allpLock)
- return next
+ return next, pret
}
// Heap maintenance algorithms.
@@ -382,13 +1048,10 @@
// it will cause the program to crash with a mysterious
// "panic holding locks" message. Instead, we panic while not
// holding a lock.
-// The races can occur despite the bucket locks because assignBucket
-// itself is called without locks, so racy calls can cause a timer to
-// change buckets while executing these functions.
-func siftupTimer(t []*timer, i int) bool {
+func siftupTimer(t []*timer, i int) {
if i >= len(t) {
- return false
+ badTimer()
}
when := t[i].when
tmp := t[i]
@@ -398,20 +1061,17 @@
break
}
t[i] = t[p]
- t[i].i = i
i = p
}
if tmp != t[i] {
t[i] = tmp
- t[i].i = i
}
- return true
}
-func siftdownTimer(t []*timer, i int) bool {
+func siftdownTimer(t []*timer, i int) {
n := len(t)
if i >= n {
- return false
+ badTimer()
}
when := t[i].when
tmp := t[i]
@@ -441,14 +1101,11 @@
break
}
t[i] = t[c]
- t[i].i = i
i = c
}
if tmp != t[i] {
t[i] = tmp
- t[i].i = i
}
- return true
}
// badTimer is called if the timer data structures have been corrupted,
@@ -456,5 +1113,5 @@
// panicing due to invalid slice access while holding locks.
// See issue #25686.
func badTimer() {
- panic(errorString("racy use of timers"))
+ throw("timer data corruption")
}
diff --git a/src/runtime/time_fake.go b/src/runtime/time_fake.go
new file mode 100644
index 0000000..c64d299
--- /dev/null
+++ b/src/runtime/time_fake.go
@@ -0,0 +1,100 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build faketime
+// +build !windows
+
+// Faketime isn't currently supported on Windows. This would require:
+//
+// 1. Shadowing time_now, which is implemented in assembly on Windows.
+// Since that's exported directly to the time package from runtime
+// assembly, this would involve moving it from sys_windows_*.s into
+// its own assembly files build-tagged with !faketime and using the
+// implementation of time_now from timestub.go in faketime mode.
+//
+// 2. Modifying syscall.Write to call syscall.faketimeWrite,
+// translating the Stdout and Stderr handles into FDs 1 and 2.
+// (See CL 192739 PS 3.)
+
+package runtime
+
+import "unsafe"
+
+// faketime is the simulated time in nanoseconds since 1970 for the
+// playground.
+var faketime int64 = 1257894000000000000
+
+var faketimeState struct {
+ lock mutex
+
+ // lastfaketime is the last faketime value written to fd 1 or 2.
+ lastfaketime int64
+
+ // lastfd is the fd to which lastfaketime was written.
+ //
+ // Subsequent writes to the same fd may use the same
+ // timestamp, but the timestamp must increase if the fd
+ // changes.
+ lastfd uintptr
+}
+
+//go:nosplit
+func nanotime() int64 {
+ return faketime
+}
+
+func walltime() (sec int64, nsec int32) {
+ return faketime / 1000000000, int32(faketime % 1000000000)
+}
+
+func write(fd uintptr, p unsafe.Pointer, n int32) int32 {
+ if !(fd == 1 || fd == 2) {
+ // Do an ordinary write.
+ return write1(fd, p, n)
+ }
+
+ // Write with the playback header.
+
+ // First, lock to avoid interleaving writes.
+ lock(&faketimeState.lock)
+
+ // If the current fd doesn't match the fd of the previous write,
+ // ensure that the timestamp is strictly greater. That way, we can
+ // recover the original order even if we read the fds separately.
+ t := faketimeState.lastfaketime
+ if fd != faketimeState.lastfd {
+ t++
+ faketimeState.lastfd = fd
+ }
+ if faketime > t {
+ t = faketime
+ }
+ faketimeState.lastfaketime = t
+
+ // Playback header: 0 0 P B <8-byte time> <4-byte data length> (big endian)
+ var buf [4 + 8 + 4]byte
+ buf[2] = 'P'
+ buf[3] = 'B'
+ tu := uint64(t)
+ buf[4] = byte(tu >> (7 * 8))
+ buf[5] = byte(tu >> (6 * 8))
+ buf[6] = byte(tu >> (5 * 8))
+ buf[7] = byte(tu >> (4 * 8))
+ buf[8] = byte(tu >> (3 * 8))
+ buf[9] = byte(tu >> (2 * 8))
+ buf[10] = byte(tu >> (1 * 8))
+ buf[11] = byte(tu >> (0 * 8))
+ nu := uint32(n)
+ buf[12] = byte(nu >> (3 * 8))
+ buf[13] = byte(nu >> (2 * 8))
+ buf[14] = byte(nu >> (1 * 8))
+ buf[15] = byte(nu >> (0 * 8))
+ write1(fd, unsafe.Pointer(&buf[0]), int32(len(buf)))
+
+ // Write actual data.
+ res := write1(fd, p, n)
+
+ unlock(&faketimeState.lock)
+ return res
+}
diff --git a/src/runtime/time_nofake.go b/src/runtime/time_nofake.go
new file mode 100644
index 0000000..1912a94
--- /dev/null
+++ b/src/runtime/time_nofake.go
@@ -0,0 +1,31 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !faketime
+
+package runtime
+
+import "unsafe"
+
+// faketime is the simulated time in nanoseconds since 1970 for the
+// playground.
+//
+// Zero means not to use faketime.
+var faketime int64
+
+//go:nosplit
+func nanotime() int64 {
+ return nanotime1()
+}
+
+func walltime() (sec int64, nsec int32) {
+ return walltime1()
+}
+
+// write must be nosplit on Windows (see write1)
+//
+//go:nosplit
+func write(fd uintptr, p unsafe.Pointer, n int32) int32 {
+ return write1(fd, p, n)
+}
diff --git a/src/runtime/time_test.go b/src/runtime/time_test.go
new file mode 100644
index 0000000..bf29561
--- /dev/null
+++ b/src/runtime/time_test.go
@@ -0,0 +1,93 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "bytes"
+ "encoding/binary"
+ "errors"
+ "internal/testenv"
+ "os/exec"
+ "reflect"
+ "runtime"
+ "testing"
+)
+
+func TestFakeTime(t *testing.T) {
+ if runtime.GOOS == "windows" {
+ t.Skip("faketime not supported on windows")
+ }
+
+ t.Parallel()
+
+ exe, err := buildTestProg(t, "testfaketime", "-tags=faketime")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ var stdout, stderr bytes.Buffer
+ cmd := exec.Command(exe)
+ cmd.Stdout = &stdout
+ cmd.Stderr = &stderr
+
+ err = testenv.CleanCmdEnv(cmd).Run()
+ if err != nil {
+ t.Fatalf("exit status: %v\n%s", err, stderr.String())
+ }
+
+ t.Logf("raw stdout: %q", stdout.String())
+ t.Logf("raw stderr: %q", stdout.String())
+
+ f1, err1 := parseFakeTime(stdout.Bytes())
+ if err1 != nil {
+ t.Fatal(err1)
+ }
+ f2, err2 := parseFakeTime(stderr.Bytes())
+ if err2 != nil {
+ t.Fatal(err2)
+ }
+
+ const time0 = 1257894000000000000
+ got := [][]fakeTimeFrame{f1, f2}
+ var want = [][]fakeTimeFrame{{
+ {time0 + 1, "line 2\n"},
+ {time0 + 1, "line 3\n"},
+ {time0 + 1e9, "line 5\n"},
+ {time0 + 1e9, "2009-11-10T23:00:01Z"},
+ }, {
+ {time0, "line 1\n"},
+ {time0 + 2, "line 4\n"},
+ }}
+ if !reflect.DeepEqual(want, got) {
+ t.Fatalf("want %v, got %v", want, got)
+ }
+}
+
+type fakeTimeFrame struct {
+ time uint64
+ data string
+}
+
+func parseFakeTime(x []byte) ([]fakeTimeFrame, error) {
+ var frames []fakeTimeFrame
+ for len(x) != 0 {
+ if len(x) < 4+8+4 {
+ return nil, errors.New("truncated header")
+ }
+ const magic = "\x00\x00PB"
+ if string(x[:len(magic)]) != magic {
+ return nil, errors.New("bad magic")
+ }
+ x = x[len(magic):]
+ time := binary.BigEndian.Uint64(x)
+ x = x[8:]
+ dlen := binary.BigEndian.Uint32(x)
+ x = x[4:]
+ data := string(x[:dlen])
+ x = x[dlen:]
+ frames = append(frames, fakeTimeFrame{time, data})
+ }
+ return frames, nil
+}
diff --git a/src/runtime/timestub2.go b/src/runtime/timestub2.go
index 00c2c55..6d73aab 100644
--- a/src/runtime/timestub2.go
+++ b/src/runtime/timestub2.go
@@ -6,7 +6,8 @@
// +build !windows
// +build !freebsd
// +build !aix
+// +build !solaris
package runtime
-func walltime() (sec int64, nsec int32)
+func walltime1() (sec int64, nsec int32)
diff --git a/src/runtime/tls_arm.s b/src/runtime/tls_arm.s
index 350089a..e42de8d 100644
--- a/src/runtime/tls_arm.s
+++ b/src/runtime/tls_arm.s
@@ -17,14 +17,11 @@
// Note: both functions will clobber R0 and R11 and
// can be called from 5c ABI code.
-// On android and darwin, runtime.tls_g is a normal variable.
+// On android, runtime.tls_g is a normal variable.
// TLS offset is computed in x_cgo_inittls.
#ifdef GOOS_android
#define TLSG_IS_VARIABLE
#endif
-#ifdef GOOS_darwin
-#define TLSG_IS_VARIABLE
-#endif
// save_g saves the g register into pthread-provided
// thread-local memory, so that we can call externally compiled
@@ -33,11 +30,6 @@
// runtime.mcall assumes this function only clobbers R0 and R11.
// Returns with g in R0.
TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0
-#ifdef GOOS_nacl
- // nothing to do as nacl/arm does not use TLS at all.
- MOVW g, R0 // preserve R0 across call to setg<>
- RET
-#else
// If the host does not support MRC the linker will replace it with
// a call to runtime.read_tls_fallback which jumps to __kuser_get_tls.
// The replacement function saves LR in R11 over the call to read_tls_fallback.
@@ -48,16 +40,11 @@
MOVW g, 0(R0)
MOVW g, R0 // preserve R0 across call to setg<>
RET
-#endif
// load_g loads the g register from pthread-provided
// thread-local memory, for use after calling externally compiled
// ARM code that overwrote those registers.
TEXT runtime·load_g(SB),NOSPLIT,$0
-#ifdef GOOS_nacl
- // nothing to do as nacl/arm does not use TLS at all.
- RET
-#else
// See save_g
MRC 15, 0, R0, C13, C0, 3 // fetch TLS base pointer
BIC $3, R0 // Darwin/ARM might return unaligned pointer
@@ -65,7 +52,6 @@
ADD R11, R0
MOVW 0(R0), g
RET
-#endif
// This is called from rt0_go, which runs on the system stack
// using the initial stack allocated by the OS.
@@ -78,7 +64,6 @@
// Declare a dummy word ($4, not $0) to make sure the
// frame is 8 bytes and stays 8-byte-aligned.
TEXT runtime·_initcgo(SB),NOSPLIT,$4
-#ifndef GOOS_nacl
// if there is an _cgo_init, call it.
MOVW _cgo_init(SB), R4
CMP $0, R4
@@ -93,7 +78,6 @@
MOVW $setg_gcc<>(SB), R1 // arg 1: setg
MOVW g, R0 // arg 0: G
BL (R4) // will clobber R0-R3
-#endif
nocgo:
RET
diff --git a/src/runtime/tls_arm64.h b/src/runtime/tls_arm64.h
index 27f517c..f60f4f6 100644
--- a/src/runtime/tls_arm64.h
+++ b/src/runtime/tls_arm64.h
@@ -20,6 +20,11 @@
#define MRS_TPIDR_R0 WORD $0xd53bd060 // MRS TPIDRRO_EL0, R0
#endif
+#ifdef GOOS_freebsd
+#define TPIDR TPIDR_EL0
+#define MRS_TPIDR_R0 WORD $0xd53bd040 // MRS TPIDR_EL0, R0
+#endif
+
#ifdef GOOS_netbsd
#define TPIDR TPIDRRO_EL0
#define MRS_TPIDR_R0 WORD $0xd53bd040 // MRS TPIDRRO_EL0, R0
diff --git a/src/runtime/tls_arm64.s b/src/runtime/tls_arm64.s
index fb8627d..999914d 100644
--- a/src/runtime/tls_arm64.s
+++ b/src/runtime/tls_arm64.s
@@ -10,8 +10,7 @@
TEXT runtime·load_g(SB),NOSPLIT,$0
MOVB runtime·iscgo(SB), R0
- CMP $0, R0
- BEQ nocgo
+ CBZ R0, nocgo
MRS_TPIDR_R0
#ifdef GOOS_darwin
@@ -27,8 +26,7 @@
TEXT runtime·save_g(SB),NOSPLIT,$0
MOVB runtime·iscgo(SB), R0
- CMP $0, R0
- BEQ nocgo
+ CBZ R0, nocgo
MRS_TPIDR_R0
#ifdef GOOS_darwin
diff --git a/src/runtime/tls_riscv64.s b/src/runtime/tls_riscv64.s
new file mode 100644
index 0000000..8386980
--- /dev/null
+++ b/src/runtime/tls_riscv64.s
@@ -0,0 +1,18 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "go_tls.h"
+#include "funcdata.h"
+#include "textflag.h"
+
+// If !iscgo, this is a no-op.
+//
+// NOTE: mcall() assumes this clobbers only R23 (REGTMP).
+// FIXME: cgo
+TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0-0
+ RET
+
+TEXT runtime·load_g(SB),NOSPLIT|NOFRAME,$0-0
+ RET
diff --git a/src/runtime/trace.go b/src/runtime/trace.go
index 08e92d2..169b650 100644
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -54,7 +54,7 @@
traceEvGoInSyscall = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id]
traceEvHeapAlloc = 33 // memstats.heap_live change [timestamp, heap_alloc]
traceEvNextGC = 34 // memstats.next_gc change [timestamp, next_gc]
- traceEvTimerGoroutine = 35 // denotes timer goroutine [timer goroutine id]
+ traceEvTimerGoroutine = 35 // not currently used; previously denoted timer goroutine [timer goroutine id]
traceEvFutileWakeup = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp]
traceEvString = 37 // string dictionary entry [ID, length, string]
traceEvGoStartLocal = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id]
@@ -84,7 +84,7 @@
// and ppc64le.
// Tracing won't work reliably for architectures where cputicks is emulated
// by nanotime, so the value doesn't matter for those architectures.
- traceTickDiv = 16 + 48*(sys.Goarch386|sys.GoarchAmd64|sys.GoarchAmd64p32)
+ traceTickDiv = 16 + 48*(sys.Goarch386|sys.GoarchAmd64)
// Maximum number of PCs in a single stack trace.
// Since events contain only stack id rather than whole stack trace,
// we can allow quite large values here.
@@ -180,9 +180,15 @@
// Most clients should use the runtime/trace package or the testing package's
// -test.trace flag instead of calling StartTrace directly.
func StartTrace() error {
- // Stop the world, so that we can take a consistent snapshot
+ // Stop the world so that we can take a consistent snapshot
// of all goroutines at the beginning of the trace.
- stopTheWorld("start tracing")
+ // Do not stop the world during GC so we ensure we always see
+ // a consistent view of GC-related events (e.g. a start is always
+ // paired with an end).
+ stopTheWorldGC("start tracing")
+
+ // Prevent sysmon from running any code that could generate events.
+ lock(&sched.sysmonlock)
// We are in stop-the-world, but syscalls can finish and write to trace concurrently.
// Exitsyscall could check trace.enabled long before and then suddenly wake up
@@ -193,7 +199,8 @@
if trace.enabled || trace.shutdown {
unlock(&trace.bufLock)
- startTheWorld()
+ unlock(&sched.sysmonlock)
+ startTheWorldGC()
return errorString("tracing is already enabled")
}
@@ -264,7 +271,9 @@
unlock(&trace.bufLock)
- startTheWorld()
+ unlock(&sched.sysmonlock)
+
+ startTheWorldGC()
return nil
}
@@ -273,14 +282,18 @@
func StopTrace() {
// Stop the world so that we can collect the trace buffers from all p's below,
// and also to avoid races with traceEvent.
- stopTheWorld("stop tracing")
+ stopTheWorldGC("stop tracing")
+
+ // See the comment in StartTrace.
+ lock(&sched.sysmonlock)
// See the comment in StartTrace.
lock(&trace.bufLock)
if !trace.enabled {
unlock(&trace.bufLock)
- startTheWorld()
+ unlock(&sched.sysmonlock)
+ startTheWorldGC()
return
}
@@ -317,7 +330,9 @@
trace.shutdown = true
unlock(&trace.bufLock)
- startTheWorld()
+ unlock(&sched.sysmonlock)
+
+ startTheWorldGC()
// The world is started but we've set trace.shutdown, so new tracing can't start.
// Wait for the trace reader to flush pending buffers and stop.
@@ -413,13 +428,6 @@
var data []byte
data = append(data, traceEvFrequency|0<<traceArgCountShift)
data = traceAppend(data, uint64(freq))
- for i := range timers {
- tb := &timers[i]
- if tb.gp != nil {
- data = append(data, traceEvTimerGoroutine|0<<traceArgCountShift)
- data = traceAppend(data, uint64(tb.gp.goid))
- }
- }
// This will emit a bunch of full buffers, we will pick them up
// on the next iteration.
trace.stackTab.dump()
@@ -873,6 +881,7 @@
tab.mem.drop()
*tab = traceStackTable{}
+ lockInit(&((*tab).lock), lockRankTraceStackTab)
}
type traceFrame struct {
@@ -929,7 +938,7 @@
// alloc allocates n-byte block.
func (a *traceAlloc) alloc(n uintptr) unsafe.Pointer {
- n = round(n, sys.PtrSize)
+ n = alignUp(n, sys.PtrSize)
if a.head == 0 || a.off+n > uintptr(len(a.head.ptr().data)) {
if n > uintptr(len(a.head.ptr().data)) {
throw("trace: alloc too large")
diff --git a/src/runtime/trace/trace_stack_test.go b/src/runtime/trace/trace_stack_test.go
index 62c06e6..cfc0419 100644
--- a/src/runtime/trace/trace_stack_test.go
+++ b/src/runtime/trace/trace_stack_test.go
@@ -233,6 +233,7 @@
}},
{trace.EvGomaxprocs, []frame{
{"runtime.startTheWorld", 0}, // this is when the current gomaxprocs is logged.
+ {"runtime.startTheWorldGC", 0},
{"runtime.GOMAXPROCS", 0},
{"runtime/trace_test.TestTraceSymbolize", 0},
{"testing.tRunner", 0},
@@ -251,6 +252,7 @@
{trace.EvGoSysCall, []frame{
{"syscall.read", 0},
{"syscall.Read", 0},
+ {"internal/poll.ignoringEINTR", 0},
{"internal/poll.(*FD).Read", 0},
{"os.(*File).read", 0},
{"os.(*File).Read", 0},
diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go
index ef48c9f..944c847 100644
--- a/src/runtime/traceback.go
+++ b/src/runtime/traceback.go
@@ -26,8 +26,8 @@
// takes up only 4 bytes on the stack, while on 64-bit systems it takes up 8 bytes.
// Typically this is ptrSize.
//
-// As an exception, amd64p32 has ptrSize == 4 but the CALL instruction still
-// stores an 8-byte return PC onto the stack. To accommodate this, we use regSize
+// As an exception, amd64p32 had ptrSize == 4 but the CALL instruction still
+// stored an 8-byte return PC onto the stack. To accommodate this, we used regSize
// as the size of the architecture-pushed return PC.
//
// usesLR is defined below in terms of minFrameSize, which is defined in
@@ -340,7 +340,20 @@
pc := frame.pc
// backup to CALL instruction to read inlining info (same logic as below)
tracepc := pc
- if (n > 0 || flags&_TraceTrap == 0) && frame.pc > f.entry && !waspanic {
+ // Normally, pc is a return address. In that case, we want to look up
+ // file/line information using pc-1, because that is the pc of the
+ // call instruction (more precisely, the last byte of the call instruction).
+ // Callers expect the pc buffer to contain return addresses and do the
+ // same -1 themselves, so we keep pc unchanged.
+ // When the pc is from a signal (e.g. profiler or segv) then we want
+ // to look up file/line information using pc, and we store pc+1 in the
+ // pc buffer so callers can unconditionally subtract 1 before looking up.
+ // See issue 34123.
+ // The pc can be at function entry when the frame is initialized without
+ // actually running code, like runtime.mstart.
+ if (n == 0 && flags&_TraceTrap != 0) || waspanic || pc == f.entry {
+ pc++
+ } else {
tracepc--
}
@@ -462,6 +475,7 @@
}
waspanic = f.funcID == funcID_sigpanic
+ injectedCall := waspanic || f.funcID == funcID_asyncPreempt
// Do not unwind past the bottom of the stack.
if !flr.valid() {
@@ -477,8 +491,8 @@
frame.argmap = nil
// On link register architectures, sighandler saves the LR on stack
- // before faking a call to sigpanic.
- if usesLR && waspanic {
+ // before faking a call.
+ if usesLR && injectedCall {
x := *(*uintptr)(unsafe.Pointer(frame.sp))
frame.sp += sys.MinFrameSize
if GOARCH == "arm64" {
@@ -860,6 +874,7 @@
_Gwaiting: "waiting",
_Gdead: "dead",
_Gcopystack: "copystack",
+ _Gpreempted: "preempted",
}
func goroutineheader(gp *g) {
@@ -997,8 +1012,8 @@
// isSystemGoroutine reports whether the goroutine g must be omitted
// in stack dumps and deadlock detector. This is any goroutine that
-// starts at a runtime.* entry point, except for runtime.main and
-// sometimes runtime.runfinq.
+// starts at a runtime.* entry point, except for runtime.main,
+// runtime.handleAsyncEvent (wasm only) and sometimes runtime.runfinq.
//
// If fixed is true, any goroutine that can vary between user and
// system (that is, the finalizer goroutine) is considered a user
@@ -1009,7 +1024,7 @@
if !f.valid() {
return false
}
- if f.funcID == funcID_runtime_main {
+ if f.funcID == funcID_runtime_main || f.funcID == funcID_handleAsyncEvent {
return false
}
if f.funcID == funcID_runfinq {
diff --git a/src/runtime/treap_test.go b/src/runtime/treap_test.go
deleted file mode 100644
index 110f51c..0000000
--- a/src/runtime/treap_test.go
+++ /dev/null
@@ -1,270 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime_test
-
-import (
- "fmt"
- "runtime"
- "testing"
-)
-
-var spanDesc = map[uintptr]struct {
- pages uintptr
- scav bool
-}{
- 0xc0000000: {2, false},
- 0xc0006000: {1, false},
- 0xc0010000: {8, false},
- 0xc0022000: {7, false},
- 0xc0034000: {4, true},
- 0xc0040000: {5, false},
- 0xc0050000: {5, true},
- 0xc0060000: {5000, false},
-}
-
-// Wrap the Treap one more time because go:notinheap doesn't
-// actually follow a structure across package boundaries.
-//
-//go:notinheap
-type treap struct {
- runtime.Treap
-}
-
-func maskMatchName(mask, match runtime.TreapIterType) string {
- return fmt.Sprintf("%0*b-%0*b", runtime.TreapIterBits, uint8(mask), runtime.TreapIterBits, uint8(match))
-}
-
-func TestTreapFilter(t *testing.T) {
- var iterTypes = [...]struct {
- mask, match runtime.TreapIterType
- filter runtime.TreapIterFilter // expected filter
- }{
- {0, 0, 0xf},
- {runtime.TreapIterScav, 0, 0x5},
- {runtime.TreapIterScav, runtime.TreapIterScav, 0xa},
- {runtime.TreapIterScav | runtime.TreapIterHuge, runtime.TreapIterHuge, 0x4},
- {runtime.TreapIterScav | runtime.TreapIterHuge, 0, 0x1},
- {0, runtime.TreapIterScav, 0x0},
- }
- for _, it := range iterTypes {
- t.Run(maskMatchName(it.mask, it.match), func(t *testing.T) {
- if f := runtime.TreapFilter(it.mask, it.match); f != it.filter {
- t.Fatalf("got %#x, want %#x", f, it.filter)
- }
- })
- }
-}
-
-// This test ensures that the treap implementation in the runtime
-// maintains all stated invariants after different sequences of
-// insert, removeSpan, find, and erase. Invariants specific to the
-// treap data structure are checked implicitly: after each mutating
-// operation, treap-related invariants are checked for the entire
-// treap.
-func TestTreap(t *testing.T) {
- // Set up a bunch of spans allocated into mheap_.
- // Also, derive a set of typeCounts of each type of span
- // according to runtime.TreapIterType so we can verify against
- // them later.
- spans := make([]runtime.Span, 0, len(spanDesc))
- typeCounts := [1 << runtime.TreapIterBits][1 << runtime.TreapIterBits]int{}
- for base, de := range spanDesc {
- s := runtime.AllocSpan(base, de.pages, de.scav)
- defer s.Free()
- spans = append(spans, s)
-
- for i := runtime.TreapIterType(0); i < 1<<runtime.TreapIterBits; i++ {
- for j := runtime.TreapIterType(0); j < 1<<runtime.TreapIterBits; j++ {
- if s.MatchesIter(i, j) {
- typeCounts[i][j]++
- }
- }
- }
- }
- t.Run("TypeCountsSanity", func(t *testing.T) {
- // Just sanity check type counts for a few values.
- check := func(mask, match runtime.TreapIterType, count int) {
- tc := typeCounts[mask][match]
- if tc != count {
- name := maskMatchName(mask, match)
- t.Fatalf("failed a sanity check for mask/match %s counts: got %d, wanted %d", name, tc, count)
- }
- }
- check(0, 0, len(spanDesc))
- check(runtime.TreapIterScav, 0, 6)
- check(runtime.TreapIterScav, runtime.TreapIterScav, 2)
- })
- t.Run("Insert", func(t *testing.T) {
- tr := treap{}
- // Test just a very basic insert/remove for sanity.
- tr.Insert(spans[0])
- tr.RemoveSpan(spans[0])
- })
- t.Run("FindTrivial", func(t *testing.T) {
- tr := treap{}
- // Test just a very basic find operation for sanity.
- tr.Insert(spans[0])
- i := tr.Find(1)
- if i.Span() != spans[0] {
- t.Fatal("found unknown span in treap")
- }
- tr.RemoveSpan(spans[0])
- })
- t.Run("FindFirstFit", func(t *testing.T) {
- // Run this 10 times, recreating the treap each time.
- // Because of the non-deterministic structure of a treap,
- // we'll be able to test different structures this way.
- for i := 0; i < 10; i++ {
- tr := runtime.Treap{}
- for _, s := range spans {
- tr.Insert(s)
- }
- i := tr.Find(5)
- if i.Span().Base() != 0xc0010000 {
- t.Fatalf("expected span at lowest address which could fit 5 pages, instead found span at %x", i.Span().Base())
- }
- for _, s := range spans {
- tr.RemoveSpan(s)
- }
- }
- })
- t.Run("Iterate", func(t *testing.T) {
- for mask := runtime.TreapIterType(0); mask < 1<<runtime.TreapIterBits; mask++ {
- for match := runtime.TreapIterType(0); match < 1<<runtime.TreapIterBits; match++ {
- iterName := maskMatchName(mask, match)
- t.Run(iterName, func(t *testing.T) {
- t.Run("StartToEnd", func(t *testing.T) {
- // Ensure progressing an iterator actually goes over the whole treap
- // from the start and that it iterates over the elements in order.
- // Furthermore, ensure that it only iterates over the relevant parts
- // of the treap.
- // Finally, ensures that Start returns a valid iterator.
- tr := treap{}
- for _, s := range spans {
- tr.Insert(s)
- }
- nspans := 0
- lastBase := uintptr(0)
- for i := tr.Start(mask, match); i.Valid(); i = i.Next() {
- nspans++
- if lastBase > i.Span().Base() {
- t.Fatalf("not iterating in correct order: encountered base %x before %x", lastBase, i.Span().Base())
- }
- lastBase = i.Span().Base()
- if !i.Span().MatchesIter(mask, match) {
- t.Fatalf("found non-matching span while iteration over mask/match %s: base %x", iterName, i.Span().Base())
- }
- }
- if nspans != typeCounts[mask][match] {
- t.Fatal("failed to iterate forwards over full treap")
- }
- for _, s := range spans {
- tr.RemoveSpan(s)
- }
- })
- t.Run("EndToStart", func(t *testing.T) {
- // See StartToEnd tests.
- tr := treap{}
- for _, s := range spans {
- tr.Insert(s)
- }
- nspans := 0
- lastBase := ^uintptr(0)
- for i := tr.End(mask, match); i.Valid(); i = i.Prev() {
- nspans++
- if lastBase < i.Span().Base() {
- t.Fatalf("not iterating in correct order: encountered base %x before %x", lastBase, i.Span().Base())
- }
- lastBase = i.Span().Base()
- if !i.Span().MatchesIter(mask, match) {
- t.Fatalf("found non-matching span while iteration over mask/match %s: base %x", iterName, i.Span().Base())
- }
- }
- if nspans != typeCounts[mask][match] {
- t.Fatal("failed to iterate backwards over full treap")
- }
- for _, s := range spans {
- tr.RemoveSpan(s)
- }
- })
- })
- }
- }
- t.Run("Prev", func(t *testing.T) {
- // Test the iterator invariant that i.prev().next() == i.
- tr := treap{}
- for _, s := range spans {
- tr.Insert(s)
- }
- i := tr.Start(0, 0).Next().Next()
- p := i.Prev()
- if !p.Valid() {
- t.Fatal("i.prev() is invalid")
- }
- if p.Next().Span() != i.Span() {
- t.Fatal("i.prev().next() != i")
- }
- for _, s := range spans {
- tr.RemoveSpan(s)
- }
- })
- t.Run("Next", func(t *testing.T) {
- // Test the iterator invariant that i.next().prev() == i.
- tr := treap{}
- for _, s := range spans {
- tr.Insert(s)
- }
- i := tr.Start(0, 0).Next().Next()
- n := i.Next()
- if !n.Valid() {
- t.Fatal("i.next() is invalid")
- }
- if n.Prev().Span() != i.Span() {
- t.Fatal("i.next().prev() != i")
- }
- for _, s := range spans {
- tr.RemoveSpan(s)
- }
- })
- })
- t.Run("EraseOne", func(t *testing.T) {
- // Test that erasing one iterator correctly retains
- // all relationships between elements.
- tr := treap{}
- for _, s := range spans {
- tr.Insert(s)
- }
- i := tr.Start(0, 0).Next().Next().Next()
- s := i.Span()
- n := i.Next()
- p := i.Prev()
- tr.Erase(i)
- if n.Prev().Span() != p.Span() {
- t.Fatal("p, n := i.Prev(), i.Next(); n.prev() != p after i was erased")
- }
- if p.Next().Span() != n.Span() {
- t.Fatal("p, n := i.Prev(), i.Next(); p.next() != n after i was erased")
- }
- tr.Insert(s)
- for _, s := range spans {
- tr.RemoveSpan(s)
- }
- })
- t.Run("EraseAll", func(t *testing.T) {
- // Test that erasing iterators actually removes nodes from the treap.
- tr := treap{}
- for _, s := range spans {
- tr.Insert(s)
- }
- for i := tr.Start(0, 0); i.Valid(); {
- n := i.Next()
- tr.Erase(i)
- i = n
- }
- if size := tr.Size(); size != 0 {
- t.Fatalf("should have emptied out treap, %d spans left", size)
- }
- })
-}
diff --git a/src/runtime/type.go b/src/runtime/type.go
index 660b45e..52b6cb3 100644
--- a/src/runtime/type.go
+++ b/src/runtime/type.go
@@ -14,26 +14,31 @@
// cmd/compile/internal/gc/reflect.go
// cmd/link/internal/ld/decodesym.go
// reflect/type.go
+// internal/reflectlite/type.go
type tflag uint8
const (
- tflagUncommon tflag = 1 << 0
- tflagExtraStar tflag = 1 << 1
- tflagNamed tflag = 1 << 2
+ tflagUncommon tflag = 1 << 0
+ tflagExtraStar tflag = 1 << 1
+ tflagNamed tflag = 1 << 2
+ tflagRegularMemory tflag = 1 << 3 // equal and hash can treat values of this type as a single region of t.size bytes
)
// Needs to be in sync with ../cmd/link/internal/ld/decodesym.go:/^func.commonsize,
// ../cmd/compile/internal/gc/reflect.go:/^func.dcommontype and
// ../reflect/type.go:/^type.rtype.
+// ../internal/reflectlite/type.go:/^type.rtype.
type _type struct {
size uintptr
ptrdata uintptr // size of memory prefix holding all pointers
hash uint32
tflag tflag
align uint8
- fieldalign uint8
+ fieldAlign uint8
kind uint8
- alg *typeAlg
+ // function for comparing objects of this type
+ // (ptr to object A, ptr to object B) -> ==?
+ equal func(unsafe.Pointer, unsafe.Pointer) bool
// gcdata stores the GC type data for the garbage collector.
// If the KindGCProg bit is set in kind, gcdata is a GC program.
// Otherwise it is a ptrmask bitmap. See mbitmap.go for details.
@@ -287,7 +292,7 @@
for i := range md.textsectmap {
sectaddr := md.textsectmap[i].vaddr
sectlen := md.textsectmap[i].length
- if uintptr(off) >= sectaddr && uintptr(off) <= sectaddr+sectlen {
+ if uintptr(off) >= sectaddr && uintptr(off) < sectaddr+sectlen {
res = md.textsectmap[i].baseaddr + uintptr(off) - uintptr(md.textsectmap[i].vaddr)
break
}
@@ -358,10 +363,12 @@
}
type maptype struct {
- typ _type
- key *_type
- elem *_type
- bucket *_type // internal type representing a hash bucket
+ typ _type
+ key *_type
+ elem *_type
+ bucket *_type // internal type representing a hash bucket
+ // function for hashing keys (ptr to key, seed) -> hash
+ hasher func(unsafe.Pointer, uintptr) uintptr
keysize uint8 // size of key slot
elemsize uint8 // size of elem slot
bucketsize uint16 // size of bucket
@@ -497,6 +504,16 @@
return pkgPathName.name()
}
+func (n name) isBlank() bool {
+ if n.bytes == nil {
+ return false
+ }
+ if n.nameLen() != 1 {
+ return false
+ }
+ return *n.data(3) == '_'
+}
+
// typelinksinit scans the types from extra modules and builds the
// moduledata typemap used to de-duplicate type pointers.
func typelinksinit() {
diff --git a/src/runtime/utf8.go b/src/runtime/utf8.go
index 6bf5965..52b7576 100644
--- a/src/runtime/utf8.go
+++ b/src/runtime/utf8.go
@@ -7,7 +7,7 @@
// Numbers fundamental to the encoding.
const (
runeError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
- runeSelf = 0x80 // characters below Runeself are represented as themselves in a single byte.
+ runeSelf = 0x80 // characters below runeSelf are represented as themselves in a single byte.
maxRune = '\U0010FFFF' // Maximum valid Unicode code point.
)
diff --git a/src/runtime/vdso_elf64.go b/src/runtime/vdso_elf64.go
index 7c9bd96..6ded9d6 100644
--- a/src/runtime/vdso_elf64.go
+++ b/src/runtime/vdso_elf64.go
@@ -3,7 +3,7 @@
// license that can be found in the LICENSE file.
// +build linux
-// +build amd64 arm64 ppc64 ppc64le
+// +build amd64 arm64 mips64 mips64le ppc64 ppc64le
package runtime
diff --git a/src/runtime/vdso_freebsd.go b/src/runtime/vdso_freebsd.go
index 4e58919..122cc8b 100644
--- a/src/runtime/vdso_freebsd.go
+++ b/src/runtime/vdso_freebsd.go
@@ -97,7 +97,7 @@
func fallback_walltime() (sec int64, nsec int32)
//go:nosplit
-func nanotime() int64 {
+func nanotime1() int64 {
bt := vdsoClockGettime(_CLOCK_MONOTONIC)
if bt == zeroBintime {
return fallback_nanotime()
@@ -105,7 +105,7 @@
return int64((1e9 * uint64(bt.sec)) + ((1e9 * uint64(bt.frac>>32)) >> 32))
}
-func walltime() (sec int64, nsec int32) {
+func walltime1() (sec int64, nsec int32) {
bt := vdsoClockGettime(_CLOCK_REALTIME)
if bt == zeroBintime {
return fallback_walltime()
diff --git a/src/runtime/vdso_freebsd_arm64.go b/src/runtime/vdso_freebsd_arm64.go
new file mode 100644
index 0000000..7d9f62d
--- /dev/null
+++ b/src/runtime/vdso_freebsd_arm64.go
@@ -0,0 +1,21 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+const (
+ _VDSO_TH_ALGO_ARM_GENTIM = 1
+)
+
+func getCntxct(physical bool) uint32
+
+//go:nosplit
+func (th *vdsoTimehands) getTimecounter() (uint32, bool) {
+ switch th.algo {
+ case _VDSO_TH_ALGO_ARM_GENTIM:
+ return getCntxct(false), true
+ default:
+ return 0, false
+ }
+}
diff --git a/src/runtime/vdso_in_none.go b/src/runtime/vdso_in_none.go
index f2d6bb5..7f4019c 100644
--- a/src/runtime/vdso_in_none.go
+++ b/src/runtime/vdso_in_none.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build linux,!386,!amd64,!arm,!arm64,!ppc64,!ppc64le !linux
+// +build linux,!386,!amd64,!arm,!arm64,!mips64,!mips64le,!ppc64,!ppc64le !linux
package runtime
diff --git a/src/runtime/vdso_linux.go b/src/runtime/vdso_linux.go
index 71ba4ce..6e29424 100644
--- a/src/runtime/vdso_linux.go
+++ b/src/runtime/vdso_linux.go
@@ -3,7 +3,7 @@
// license that can be found in the LICENSE file.
// +build linux
-// +build 386 amd64 arm arm64 ppc64 ppc64le
+// +build 386 amd64 arm arm64 mips64 mips64le ppc64 ppc64le
package runtime
@@ -281,6 +281,7 @@
}
// vdsoMarker reports whether PC is on the VDSO page.
+//go:nosplit
func inVDSOPage(pc uintptr) bool {
for _, k := range vdsoSymbolKeys {
if *k.ptr != 0 {
diff --git a/src/runtime/vdso_linux_mips64x.go b/src/runtime/vdso_linux_mips64x.go
new file mode 100644
index 0000000..3a0f947
--- /dev/null
+++ b/src/runtime/vdso_linux_mips64x.go
@@ -0,0 +1,28 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux
+// +build mips64 mips64le
+
+package runtime
+
+const (
+ // vdsoArrayMax is the byte-size of a maximally sized array on this architecture.
+ // See cmd/compile/internal/mips64/galign.go arch.MAXWIDTH initialization.
+ vdsoArrayMax = 1<<50 - 1
+)
+
+// see man 7 vdso : mips
+var vdsoLinuxVersion = vdsoVersionKey{"LINUX_2.6", 0x3ae75f6}
+
+// The symbol name is not __kernel_clock_gettime as suggested by the manpage;
+// according to Linux source code it should be __vdso_clock_gettime instead.
+var vdsoSymbolKeys = []vdsoSymbolKey{
+ {"__vdso_clock_gettime", 0xd35ec75, 0x6e43a318, &vdsoClockgettimeSym},
+}
+
+// initialize to fall back to syscall
+var (
+ vdsoClockgettimeSym uintptr = 0
+)
diff --git a/src/runtime/vlop_386.s b/src/runtime/vlop_386.s
index 3387c51..b478ff8 100644
--- a/src/runtime/vlop_386.s
+++ b/src/runtime/vlop_386.s
@@ -1,5 +1,5 @@
// Inferno's libkern/vlop-386.s
-// https://bitbucket.org/inferno-os/inferno-os/src/default/libkern/vlop-386.s
+// https://bitbucket.org/inferno-os/inferno-os/src/master/libkern/vlop-386.s
//
// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
// Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved.
diff --git a/src/runtime/vlop_arm.s b/src/runtime/vlop_arm.s
index 41d2858..9e19938 100644
--- a/src/runtime/vlop_arm.s
+++ b/src/runtime/vlop_arm.s
@@ -1,5 +1,5 @@
// Inferno's libkern/vlop-arm.s
-// https://bitbucket.org/inferno-os/inferno-os/src/default/libkern/vlop-arm.s
+// https://bitbucket.org/inferno-os/inferno-os/src/master/libkern/vlop-arm.s
//
// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
// Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved.
@@ -40,9 +40,7 @@
#define Ra R11
// Be careful: Ra == R11 will be used by the linker for synthesized instructions.
-// Note: this function does not have a frame. If it ever needs a frame,
-// the RET instruction will clobber R12 on nacl, and the compiler's register
-// allocator needs to know.
+// Note: this function does not have a frame.
TEXT runtime·udiv(SB),NOSPLIT|NOFRAME,$0
MOVBU internal∕cpu·ARM+const_offsetARMHasIDIVA(SB), Ra
CMP $0, Ra
diff --git a/src/runtime/vlrt.go b/src/runtime/vlrt.go
index f790d3b..38e0b32 100644
--- a/src/runtime/vlrt.go
+++ b/src/runtime/vlrt.go
@@ -1,5 +1,5 @@
// Inferno's libkern/vlrt-arm.c
-// https://bitbucket.org/inferno-os/inferno-os/src/default/libkern/vlrt-arm.c
+// https://bitbucket.org/inferno-os/inferno-os/src/master/libkern/vlrt-arm.c
//
// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
// Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved.
@@ -130,9 +130,6 @@
return r
}
-//go:nosplit
-// nosplit because division is used in syscall context in nanotime on darwin/386
-// and darwin/arm where stack splits are not allowed.
func int64div(n, d int64) int64 {
// Check for 32 bit operands
if int64(int32(n)) == n && int64(int32(d)) == d {