Update linux-x86 Go prebuilts from ab/9878432
https://ci.android.com/builds/branches/aosp-build-tools-release/grid?head=9878432&tail=9878432
Update script: toolchain/go/update-prebuilts.sh
Test: Treehugger presubmit
Change-Id: I07818c960e04b2ef4373ab22161590b088582d39
diff --git a/src/runtime/HACKING.md b/src/runtime/HACKING.md
index 61b5a51..ce0b42a 100644
--- a/src/runtime/HACKING.md
+++ b/src/runtime/HACKING.md
@@ -235,7 +235,7 @@
objects of the same type.
In general, types that are allocated using any of these should be
-marked `//go:notinheap` (see below).
+marked as not in heap by embedding `runtime/internal/sys.NotInHeap`.
Objects that are allocated in unmanaged memory **must not** contain
heap pointers unless the following rules are also obeyed:
@@ -330,37 +330,3 @@
The conversion from pointer to uintptr must appear in the argument list of any
call to this function. This directive is used for some low-level system call
implementations.
-
-go:notinheap
-------------
-
-`go:notinheap` applies to type declarations. It indicates that a type
-must never be allocated from the GC'd heap or on the stack.
-Specifically, pointers to this type must always fail the
-`runtime.inheap` check. The type may be used for global variables, or
-for objects in unmanaged memory (e.g., allocated with `sysAlloc`,
-`persistentalloc`, `fixalloc`, or from a manually-managed span).
-Specifically:
-
-1. `new(T)`, `make([]T)`, `append([]T, ...)` and implicit heap
- allocation of T are disallowed. (Though implicit allocations are
- disallowed in the runtime anyway.)
-
-2. A pointer to a regular type (other than `unsafe.Pointer`) cannot be
- converted to a pointer to a `go:notinheap` type, even if they have
- the same underlying type.
-
-3. Any type that contains a `go:notinheap` type is itself
- `go:notinheap`. Structs and arrays are `go:notinheap` if their
- elements are. Maps and channels of `go:notinheap` types are
- disallowed. To keep things explicit, any type declaration where the
- type is implicitly `go:notinheap` must be explicitly marked
- `go:notinheap` as well.
-
-4. Write barriers on pointers to `go:notinheap` types can be omitted.
-
-The last point is the real benefit of `go:notinheap`. The runtime uses
-it for low-level internal structures to avoid memory barriers in the
-scheduler and the memory allocator where they are illegal or simply
-inefficient. This mechanism is reasonably safe and does not compromise
-the readability of the runtime.
diff --git a/src/runtime/align_runtime_test.go b/src/runtime/align_runtime_test.go
index ec7956d..d78b0b2 100644
--- a/src/runtime/align_runtime_test.go
+++ b/src/runtime/align_runtime_test.go
@@ -14,24 +14,7 @@
// operations (all the *64 operations in runtime/internal/atomic).
var AtomicFields = []uintptr{
unsafe.Offsetof(m{}.procid),
- unsafe.Offsetof(p{}.timer0When),
- unsafe.Offsetof(p{}.timerModifiedEarliest),
unsafe.Offsetof(p{}.gcFractionalMarkTime),
- unsafe.Offsetof(schedt{}.goidgen),
- unsafe.Offsetof(schedt{}.lastpoll),
- unsafe.Offsetof(schedt{}.pollUntil),
- unsafe.Offsetof(schedt{}.timeToRun),
- unsafe.Offsetof(gcControllerState{}.bgScanCredit),
- unsafe.Offsetof(gcControllerState{}.maxStackScan),
- unsafe.Offsetof(gcControllerState{}.heapLive),
- unsafe.Offsetof(gcControllerState{}.heapScan),
- unsafe.Offsetof(gcControllerState{}.dedicatedMarkTime),
- unsafe.Offsetof(gcControllerState{}.dedicatedMarkWorkersNeeded),
- unsafe.Offsetof(gcControllerState{}.fractionalMarkTime),
- unsafe.Offsetof(gcControllerState{}.idleMarkTime),
- unsafe.Offsetof(gcControllerState{}.globalsScan),
- unsafe.Offsetof(gcControllerState{}.lastStackScan),
- unsafe.Offsetof(timeHistogram{}.underflow),
unsafe.Offsetof(profBuf{}.overflow),
unsafe.Offsetof(profBuf{}.overflowTime),
unsafe.Offsetof(heapStatsDelta{}.tinyAllocCount),
@@ -50,10 +33,7 @@
unsafe.Offsetof(lfnode{}.next),
unsafe.Offsetof(mstats{}.last_gc_nanotime),
unsafe.Offsetof(mstats{}.last_gc_unix),
- unsafe.Offsetof(mstats{}.gcPauseDist),
- unsafe.Offsetof(ticksType{}.val),
unsafe.Offsetof(workType{}.bytesMarked),
- unsafe.Offsetof(timeHistogram{}.counts),
}
// AtomicVariables is the set of global variables on which we perform
diff --git a/src/runtime/align_test.go b/src/runtime/align_test.go
index 55cf783..5f225d6 100644
--- a/src/runtime/align_test.go
+++ b/src/runtime/align_test.go
@@ -5,7 +5,6 @@
package runtime_test
import (
- "bytes"
"go/ast"
"go/build"
"go/importer"
@@ -13,6 +12,7 @@
"go/printer"
"go/token"
"go/types"
+ "internal/testenv"
"os"
"regexp"
"runtime"
@@ -23,6 +23,8 @@
// Check that 64-bit fields on which we apply atomic operations
// are aligned to 8 bytes. This can be a problem on 32-bit systems.
func TestAtomicAlignment(t *testing.T) {
+ testenv.MustHaveGoBuild(t) // go command needed to resolve std .a files for importer.Default().
+
// Read the code making the tables above, to see which fields and
// variables we are currently checking.
checked := map[string]bool{}
@@ -180,7 +182,7 @@
}
func (v *Visitor) print(n ast.Node) string {
- var b bytes.Buffer
+ var b strings.Builder
printer.Fprint(&b, v.fset, n)
return b.String()
}
diff --git a/src/runtime/arena.go b/src/runtime/arena.go
new file mode 100644
index 0000000..c338d30
--- /dev/null
+++ b/src/runtime/arena.go
@@ -0,0 +1,1003 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Implementation of (safe) user arenas.
+//
+// This file contains the implementation of user arenas wherein Go values can
+// be manually allocated and freed in bulk. The act of manually freeing memory,
+// potentially before a GC cycle, means that a garbage collection cycle can be
+// delayed, improving efficiency by reducing GC cycle frequency. There are other
+// potential efficiency benefits, such as improved locality and access to a more
+// efficient allocation strategy.
+//
+// What makes the arenas here safe is that once they are freed, accessing the
+// arena's memory will cause an explicit program fault, and the arena's address
+// space will not be reused until no more pointers into it are found. There's one
+// exception to this: if an arena allocated memory that isn't exhausted, it's placed
+// back into a pool for reuse. This means that a crash is not always guaranteed.
+//
+// While this may seem unsafe, it still prevents memory corruption, and is in fact
+// necessary in order to make new(T) a valid implementation of arenas. Such a property
+// is desirable to allow for a trivial implementation. (It also avoids complexities
+// that arise from synchronization with the GC when trying to set the arena chunks to
+// fault while the GC is active.)
+//
+// The implementation works in layers. At the bottom, arenas are managed in chunks.
+// Each chunk must be a multiple of the heap arena size, or the heap arena size must
+// be divisible by the arena chunks. The address space for each chunk, and each
+// corresponding heapArena for that addres space, are eternelly reserved for use as
+// arena chunks. That is, they can never be used for the general heap. Each chunk
+// is also represented by a single mspan, and is modeled as a single large heap
+// allocation. It must be, because each chunk contains ordinary Go values that may
+// point into the heap, so it must be scanned just like any other object. Any
+// pointer into a chunk will therefore always cause the whole chunk to be scanned
+// while its corresponding arena is still live.
+//
+// Chunks may be allocated either from new memory mapped by the OS on our behalf,
+// or by reusing old freed chunks. When chunks are freed, their underlying memory
+// is returned to the OS, set to fault on access, and may not be reused until the
+// program doesn't point into the chunk anymore (the code refers to this state as
+// "quarantined"), a property checked by the GC.
+//
+// The sweeper handles moving chunks out of this quarantine state to be ready for
+// reuse. When the chunk is placed into the quarantine state, its corresponding
+// span is marked as noscan so that the GC doesn't try to scan memory that would
+// cause a fault.
+//
+// At the next layer are the user arenas themselves. They consist of a single
+// active chunk which new Go values are bump-allocated into and a list of chunks
+// that were exhausted when allocating into the arena. Once the arena is freed,
+// it frees all full chunks it references, and places the active one onto a reuse
+// list for a future arena to use. Each arena keeps its list of referenced chunks
+// explicitly live until it is freed. Each user arena also maps to an object which
+// has a finalizer attached that ensures the arena's chunks are all freed even if
+// the arena itself is never explicitly freed.
+//
+// Pointer-ful memory is bump-allocated from low addresses to high addresses in each
+// chunk, while pointer-free memory is bump-allocated from high address to low
+// addresses. The reason for this is to take advantage of a GC optimization wherein
+// the GC will stop scanning an object when there are no more pointers in it, which
+// also allows us to elide clearing the heap bitmap for pointer-free Go values
+// allocated into arenas.
+//
+// Note that arenas are not safe to use concurrently.
+//
+// In summary, there are 2 resources: arenas, and arena chunks. They exist in the
+// following lifecycle:
+//
+// (1) A new arena is created via newArena.
+// (2) Chunks are allocated to hold memory allocated into the arena with new or slice.
+// (a) Chunks are first allocated from the reuse list of partially-used chunks.
+// (b) If there are no such chunks, then chunks on the ready list are taken.
+// (c) Failing all the above, memory for a new chunk is mapped.
+// (3) The arena is freed, or all references to it are dropped, triggering its finalizer.
+// (a) If the GC is not active, exhausted chunks are set to fault and placed on a
+// quarantine list.
+// (b) If the GC is active, exhausted chunks are placed on a fault list and will
+// go through step (a) at a later point in time.
+// (c) Any remaining partially-used chunk is placed on a reuse list.
+// (4) Once no more pointers are found into quarantined arena chunks, the sweeper
+// takes these chunks out of quarantine and places them on the ready list.
+
+package runtime
+
+import (
+ "internal/goarch"
+ "runtime/internal/atomic"
+ "runtime/internal/math"
+ "unsafe"
+)
+
+// Functions starting with arena_ are meant to be exported to downstream users
+// of arenas. They should wrap these functions in a higher-lever API.
+//
+// The underlying arena and its resources are managed through an opaque unsafe.Pointer.
+
+// arena_newArena is a wrapper around newUserArena.
+//
+//go:linkname arena_newArena arena.runtime_arena_newArena
+func arena_newArena() unsafe.Pointer {
+ return unsafe.Pointer(newUserArena())
+}
+
+// arena_arena_New is a wrapper around (*userArena).new, except that typ
+// is an any (must be a *_type, still) and typ must be a type descriptor
+// for a pointer to the type to actually be allocated, i.e. pass a *T
+// to allocate a T. This is necessary because this function returns a *T.
+//
+//go:linkname arena_arena_New arena.runtime_arena_arena_New
+func arena_arena_New(arena unsafe.Pointer, typ any) any {
+ t := (*_type)(efaceOf(&typ).data)
+ if t.kind&kindMask != kindPtr {
+ throw("arena_New: non-pointer type")
+ }
+ te := (*ptrtype)(unsafe.Pointer(t)).elem
+ x := ((*userArena)(arena)).new(te)
+ var result any
+ e := efaceOf(&result)
+ e._type = t
+ e.data = x
+ return result
+}
+
+// arena_arena_Slice is a wrapper around (*userArena).slice.
+//
+//go:linkname arena_arena_Slice arena.runtime_arena_arena_Slice
+func arena_arena_Slice(arena unsafe.Pointer, slice any, cap int) {
+ ((*userArena)(arena)).slice(slice, cap)
+}
+
+// arena_arena_Free is a wrapper around (*userArena).free.
+//
+//go:linkname arena_arena_Free arena.runtime_arena_arena_Free
+func arena_arena_Free(arena unsafe.Pointer) {
+ ((*userArena)(arena)).free()
+}
+
+// arena_heapify takes a value that lives in an arena and makes a copy
+// of it on the heap. Values that don't live in an arena are returned unmodified.
+//
+//go:linkname arena_heapify arena.runtime_arena_heapify
+func arena_heapify(s any) any {
+ var v unsafe.Pointer
+ e := efaceOf(&s)
+ t := e._type
+ switch t.kind & kindMask {
+ case kindString:
+ v = stringStructOf((*string)(e.data)).str
+ case kindSlice:
+ v = (*slice)(e.data).array
+ case kindPtr:
+ v = e.data
+ default:
+ panic("arena: Clone only supports pointers, slices, and strings")
+ }
+ span := spanOf(uintptr(v))
+ if span == nil || !span.isUserArenaChunk {
+ // Not stored in a user arena chunk.
+ return s
+ }
+ // Heap-allocate storage for a copy.
+ var x any
+ switch t.kind & kindMask {
+ case kindString:
+ s1 := s.(string)
+ s2, b := rawstring(len(s1))
+ copy(b, s1)
+ x = s2
+ case kindSlice:
+ len := (*slice)(e.data).len
+ et := (*slicetype)(unsafe.Pointer(t)).elem
+ sl := new(slice)
+ *sl = slice{makeslicecopy(et, len, len, (*slice)(e.data).array), len, len}
+ xe := efaceOf(&x)
+ xe._type = t
+ xe.data = unsafe.Pointer(sl)
+ case kindPtr:
+ et := (*ptrtype)(unsafe.Pointer(t)).elem
+ e2 := newobject(et)
+ typedmemmove(et, e2, e.data)
+ xe := efaceOf(&x)
+ xe._type = t
+ xe.data = e2
+ }
+ return x
+}
+
+const (
+ // userArenaChunkBytes is the size of a user arena chunk.
+ userArenaChunkBytesMax = 8 << 20
+ userArenaChunkBytes = uintptr(int64(userArenaChunkBytesMax-heapArenaBytes)&(int64(userArenaChunkBytesMax-heapArenaBytes)>>63) + heapArenaBytes) // min(userArenaChunkBytesMax, heapArenaBytes)
+
+ // userArenaChunkPages is the number of pages a user arena chunk uses.
+ userArenaChunkPages = userArenaChunkBytes / pageSize
+
+ // userArenaChunkMaxAllocBytes is the maximum size of an object that can
+ // be allocated from an arena. This number is chosen to cap worst-case
+ // fragmentation of user arenas to 25%. Larger allocations are redirected
+ // to the heap.
+ userArenaChunkMaxAllocBytes = userArenaChunkBytes / 4
+)
+
+func init() {
+ if userArenaChunkPages*pageSize != userArenaChunkBytes {
+ throw("user arena chunk size is not a mutliple of the page size")
+ }
+ if userArenaChunkBytes%physPageSize != 0 {
+ throw("user arena chunk size is not a mutliple of the physical page size")
+ }
+ if userArenaChunkBytes < heapArenaBytes {
+ if heapArenaBytes%userArenaChunkBytes != 0 {
+ throw("user arena chunk size is smaller than a heap arena, but doesn't divide it")
+ }
+ } else {
+ if userArenaChunkBytes%heapArenaBytes != 0 {
+ throw("user arena chunks size is larger than a heap arena, but not a multiple")
+ }
+ }
+ lockInit(&userArenaState.lock, lockRankUserArenaState)
+}
+
+type userArena struct {
+ // full is a list of full chunks that have not enough free memory left, and
+ // that we'll free once this user arena is freed.
+ //
+ // Can't use mSpanList here because it's not-in-heap.
+ fullList *mspan
+
+ // active is the user arena chunk we're currently allocating into.
+ active *mspan
+
+ // refs is a set of references to the arena chunks so that they're kept alive.
+ //
+ // The last reference in the list always refers to active, while the rest of
+ // them correspond to fullList. Specifically, the head of fullList is the
+ // second-to-last one, fullList.next is the third-to-last, and so on.
+ //
+ // In other words, every time a new chunk becomes active, its appended to this
+ // list.
+ refs []unsafe.Pointer
+
+ // defunct is true if free has been called on this arena.
+ //
+ // This is just a best-effort way to discover a concurrent allocation
+ // and free. Also used to detect a double-free.
+ defunct atomic.Bool
+}
+
+// newUserArena creates a new userArena ready to be used.
+func newUserArena() *userArena {
+ a := new(userArena)
+ SetFinalizer(a, func(a *userArena) {
+ // If arena handle is dropped without being freed, then call
+ // free on the arena, so the arena chunks are never reclaimed
+ // by the garbage collector.
+ a.free()
+ })
+ a.refill()
+ return a
+}
+
+// new allocates a new object of the provided type into the arena, and returns
+// its pointer.
+//
+// This operation is not safe to call concurrently with other operations on the
+// same arena.
+func (a *userArena) new(typ *_type) unsafe.Pointer {
+ return a.alloc(typ, -1)
+}
+
+// slice allocates a new slice backing store. slice must be a pointer to a slice
+// (i.e. *[]T), because userArenaSlice will update the slice directly.
+//
+// cap determines the capacity of the slice backing store and must be non-negative.
+//
+// This operation is not safe to call concurrently with other operations on the
+// same arena.
+func (a *userArena) slice(sl any, cap int) {
+ if cap < 0 {
+ panic("userArena.slice: negative cap")
+ }
+ i := efaceOf(&sl)
+ typ := i._type
+ if typ.kind&kindMask != kindPtr {
+ panic("slice result of non-ptr type")
+ }
+ typ = (*ptrtype)(unsafe.Pointer(typ)).elem
+ if typ.kind&kindMask != kindSlice {
+ panic("slice of non-ptr-to-slice type")
+ }
+ typ = (*slicetype)(unsafe.Pointer(typ)).elem
+ // t is now the element type of the slice we want to allocate.
+
+ *((*slice)(i.data)) = slice{a.alloc(typ, cap), cap, cap}
+}
+
+// free returns the userArena's chunks back to mheap and marks it as defunct.
+//
+// Must be called at most once for any given arena.
+//
+// This operation is not safe to call concurrently with other operations on the
+// same arena.
+func (a *userArena) free() {
+ // Check for a double-free.
+ if a.defunct.Load() {
+ panic("arena double free")
+ }
+
+ // Mark ourselves as defunct.
+ a.defunct.Store(true)
+ SetFinalizer(a, nil)
+
+ // Free all the full arenas.
+ //
+ // The refs on this list are in reverse order from the second-to-last.
+ s := a.fullList
+ i := len(a.refs) - 2
+ for s != nil {
+ a.fullList = s.next
+ s.next = nil
+ freeUserArenaChunk(s, a.refs[i])
+ s = a.fullList
+ i--
+ }
+ if a.fullList != nil || i >= 0 {
+ // There's still something left on the full list, or we
+ // failed to actually iterate over the entire refs list.
+ throw("full list doesn't match refs list in length")
+ }
+
+ // Put the active chunk onto the reuse list.
+ //
+ // Note that active's reference is always the last reference in refs.
+ s = a.active
+ if s != nil {
+ if raceenabled || msanenabled || asanenabled {
+ // Don't reuse arenas with sanitizers enabled. We want to catch
+ // any use-after-free errors aggressively.
+ freeUserArenaChunk(s, a.refs[len(a.refs)-1])
+ } else {
+ lock(&userArenaState.lock)
+ userArenaState.reuse = append(userArenaState.reuse, liveUserArenaChunk{s, a.refs[len(a.refs)-1]})
+ unlock(&userArenaState.lock)
+ }
+ }
+ // nil out a.active so that a race with freeing will more likely cause a crash.
+ a.active = nil
+ a.refs = nil
+}
+
+// alloc reserves space in the current chunk or calls refill and reserves space
+// in a new chunk. If cap is negative, the type will be taken literally, otherwise
+// it will be considered as an element type for a slice backing store with capacity
+// cap.
+func (a *userArena) alloc(typ *_type, cap int) unsafe.Pointer {
+ s := a.active
+ var x unsafe.Pointer
+ for {
+ x = s.userArenaNextFree(typ, cap)
+ if x != nil {
+ break
+ }
+ s = a.refill()
+ }
+ return x
+}
+
+// refill inserts the current arena chunk onto the full list and obtains a new
+// one, either from the partial list or allocating a new one, both from mheap.
+func (a *userArena) refill() *mspan {
+ // If there's an active chunk, assume it's full.
+ s := a.active
+ if s != nil {
+ if s.userArenaChunkFree.size() > userArenaChunkMaxAllocBytes {
+ // It's difficult to tell when we're actually out of memory
+ // in a chunk because the allocation that failed may still leave
+ // some free space available. However, that amount of free space
+ // should never exceed the maximum allocation size.
+ throw("wasted too much memory in an arena chunk")
+ }
+ s.next = a.fullList
+ a.fullList = s
+ a.active = nil
+ s = nil
+ }
+ var x unsafe.Pointer
+
+ // Check the partially-used list.
+ lock(&userArenaState.lock)
+ if len(userArenaState.reuse) > 0 {
+ // Pick off the last arena chunk from the list.
+ n := len(userArenaState.reuse) - 1
+ x = userArenaState.reuse[n].x
+ s = userArenaState.reuse[n].mspan
+ userArenaState.reuse[n].x = nil
+ userArenaState.reuse[n].mspan = nil
+ userArenaState.reuse = userArenaState.reuse[:n]
+ }
+ unlock(&userArenaState.lock)
+ if s == nil {
+ // Allocate a new one.
+ x, s = newUserArenaChunk()
+ if s == nil {
+ throw("out of memory")
+ }
+ }
+ a.refs = append(a.refs, x)
+ a.active = s
+ return s
+}
+
+type liveUserArenaChunk struct {
+ *mspan // Must represent a user arena chunk.
+
+ // Reference to mspan.base() to keep the chunk alive.
+ x unsafe.Pointer
+}
+
+var userArenaState struct {
+ lock mutex
+
+ // reuse contains a list of partially-used and already-live
+ // user arena chunks that can be quickly reused for another
+ // arena.
+ //
+ // Protected by lock.
+ reuse []liveUserArenaChunk
+
+ // fault contains full user arena chunks that need to be faulted.
+ //
+ // Protected by lock.
+ fault []liveUserArenaChunk
+}
+
+// userArenaNextFree reserves space in the user arena for an item of the specified
+// type. If cap is not -1, this is for an array of cap elements of type t.
+func (s *mspan) userArenaNextFree(typ *_type, cap int) unsafe.Pointer {
+ size := typ.size
+ if cap > 0 {
+ if size > ^uintptr(0)/uintptr(cap) {
+ // Overflow.
+ throw("out of memory")
+ }
+ size *= uintptr(cap)
+ }
+ if size == 0 || cap == 0 {
+ return unsafe.Pointer(&zerobase)
+ }
+ if size > userArenaChunkMaxAllocBytes {
+ // Redirect allocations that don't fit into a chunk well directly
+ // from the heap.
+ if cap >= 0 {
+ return newarray(typ, cap)
+ }
+ return newobject(typ)
+ }
+
+ // Prevent preemption as we set up the space for a new object.
+ //
+ // Act like we're allocating.
+ mp := acquirem()
+ if mp.mallocing != 0 {
+ throw("malloc deadlock")
+ }
+ if mp.gsignal == getg() {
+ throw("malloc during signal")
+ }
+ mp.mallocing = 1
+
+ var ptr unsafe.Pointer
+ if typ.ptrdata == 0 {
+ // Allocate pointer-less objects from the tail end of the chunk.
+ v, ok := s.userArenaChunkFree.takeFromBack(size, typ.align)
+ if ok {
+ ptr = unsafe.Pointer(v)
+ }
+ } else {
+ v, ok := s.userArenaChunkFree.takeFromFront(size, typ.align)
+ if ok {
+ ptr = unsafe.Pointer(v)
+ }
+ }
+ if ptr == nil {
+ // Failed to allocate.
+ mp.mallocing = 0
+ releasem(mp)
+ return nil
+ }
+ if s.needzero != 0 {
+ throw("arena chunk needs zeroing, but should already be zeroed")
+ }
+ // Set up heap bitmap and do extra accounting.
+ if typ.ptrdata != 0 {
+ if cap >= 0 {
+ userArenaHeapBitsSetSliceType(typ, cap, ptr, s.base())
+ } else {
+ userArenaHeapBitsSetType(typ, ptr, s.base())
+ }
+ c := getMCache(mp)
+ if c == nil {
+ throw("mallocgc called without a P or outside bootstrapping")
+ }
+ if cap > 0 {
+ c.scanAlloc += size - (typ.size - typ.ptrdata)
+ } else {
+ c.scanAlloc += typ.ptrdata
+ }
+ }
+
+ // Ensure that the stores above that initialize x to
+ // type-safe memory and set the heap bits occur before
+ // the caller can make ptr observable to the garbage
+ // collector. Otherwise, on weakly ordered machines,
+ // the garbage collector could follow a pointer to x,
+ // but see uninitialized memory or stale heap bits.
+ publicationBarrier()
+
+ mp.mallocing = 0
+ releasem(mp)
+
+ return ptr
+}
+
+// userArenaHeapBitsSetType is the equivalent of heapBitsSetType but for
+// non-slice-backing-store Go values allocated in a user arena chunk. It
+// sets up the heap bitmap for the value with type typ allocated at address ptr.
+// base is the base address of the arena chunk.
+func userArenaHeapBitsSetType(typ *_type, ptr unsafe.Pointer, base uintptr) {
+ h := writeHeapBitsForAddr(uintptr(ptr))
+
+ // Our last allocation might have ended right at a noMorePtrs mark,
+ // which we would not have erased. We need to erase that mark here,
+ // because we're going to start adding new heap bitmap bits.
+ // We only need to clear one mark, because below we make sure to
+ // pad out the bits with zeroes and only write one noMorePtrs bit
+ // for each new object.
+ // (This is only necessary at noMorePtrs boundaries, as noMorePtrs
+ // marks within an object allocated with newAt will be erased by
+ // the normal writeHeapBitsForAddr mechanism.)
+ //
+ // Note that we skip this if this is the first allocation in the
+ // arena because there's definitely no previous noMorePtrs mark
+ // (in fact, we *must* do this, because we're going to try to back
+ // up a pointer to fix this up).
+ if uintptr(ptr)%(8*goarch.PtrSize*goarch.PtrSize) == 0 && uintptr(ptr) != base {
+ // Back up one pointer and rewrite that pointer. That will
+ // cause the writeHeapBits implementation to clear the
+ // noMorePtrs bit we need to clear.
+ r := heapBitsForAddr(uintptr(ptr)-goarch.PtrSize, goarch.PtrSize)
+ _, p := r.next()
+ b := uintptr(0)
+ if p == uintptr(ptr)-goarch.PtrSize {
+ b = 1
+ }
+ h = writeHeapBitsForAddr(uintptr(ptr) - goarch.PtrSize)
+ h = h.write(b, 1)
+ }
+
+ p := typ.gcdata // start of 1-bit pointer mask (or GC program)
+ var gcProgBits uintptr
+ if typ.kind&kindGCProg != 0 {
+ // Expand gc program, using the object itself for storage.
+ gcProgBits = runGCProg(addb(p, 4), (*byte)(ptr))
+ p = (*byte)(ptr)
+ }
+ nb := typ.ptrdata / goarch.PtrSize
+
+ for i := uintptr(0); i < nb; i += ptrBits {
+ k := nb - i
+ if k > ptrBits {
+ k = ptrBits
+ }
+ h = h.write(readUintptr(addb(p, i/8)), k)
+ }
+ // Note: we call pad here to ensure we emit explicit 0 bits
+ // for the pointerless tail of the object. This ensures that
+ // there's only a single noMorePtrs mark for the next object
+ // to clear. We don't need to do this to clear stale noMorePtrs
+ // markers from previous uses because arena chunk pointer bitmaps
+ // are always fully cleared when reused.
+ h = h.pad(typ.size - typ.ptrdata)
+ h.flush(uintptr(ptr), typ.size)
+
+ if typ.kind&kindGCProg != 0 {
+ // Zero out temporary ptrmask buffer inside object.
+ memclrNoHeapPointers(ptr, (gcProgBits+7)/8)
+ }
+
+ // Double-check that the bitmap was written out correctly.
+ //
+ // Derived from heapBitsSetType.
+ const doubleCheck = false
+ if doubleCheck {
+ size := typ.size
+ x := uintptr(ptr)
+ h := heapBitsForAddr(x, size)
+ for i := uintptr(0); i < size; i += goarch.PtrSize {
+ // Compute the pointer bit we want at offset i.
+ want := false
+ off := i % typ.size
+ if off < typ.ptrdata {
+ j := off / goarch.PtrSize
+ want = *addb(typ.gcdata, j/8)>>(j%8)&1 != 0
+ }
+ if want {
+ var addr uintptr
+ h, addr = h.next()
+ if addr != x+i {
+ throw("userArenaHeapBitsSetType: pointer entry not correct")
+ }
+ }
+ }
+ if _, addr := h.next(); addr != 0 {
+ throw("userArenaHeapBitsSetType: extra pointer")
+ }
+ }
+}
+
+// userArenaHeapBitsSetSliceType is the equivalent of heapBitsSetType but for
+// Go slice backing store values allocated in a user arena chunk. It sets up the
+// heap bitmap for n consecutive values with type typ allocated at address ptr.
+func userArenaHeapBitsSetSliceType(typ *_type, n int, ptr unsafe.Pointer, base uintptr) {
+ mem, overflow := math.MulUintptr(typ.size, uintptr(n))
+ if overflow || n < 0 || mem > maxAlloc {
+ panic(plainError("runtime: allocation size out of range"))
+ }
+ for i := 0; i < n; i++ {
+ userArenaHeapBitsSetType(typ, add(ptr, uintptr(i)*typ.size), base)
+ }
+}
+
+// newUserArenaChunk allocates a user arena chunk, which maps to a single
+// heap arena and single span. Returns a pointer to the base of the chunk
+// (this is really important: we need to keep the chunk alive) and the span.
+func newUserArenaChunk() (unsafe.Pointer, *mspan) {
+ if gcphase == _GCmarktermination {
+ throw("newUserArenaChunk called with gcphase == _GCmarktermination")
+ }
+
+ // Deduct assist credit. Because user arena chunks are modeled as one
+ // giant heap object which counts toward heapLive, we're obligated to
+ // assist the GC proportionally (and it's worth noting that the arena
+ // does represent additional work for the GC, but we also have no idea
+ // what that looks like until we actually allocate things into the
+ // arena).
+ deductAssistCredit(userArenaChunkBytes)
+
+ // Set mp.mallocing to keep from being preempted by GC.
+ mp := acquirem()
+ if mp.mallocing != 0 {
+ throw("malloc deadlock")
+ }
+ if mp.gsignal == getg() {
+ throw("malloc during signal")
+ }
+ mp.mallocing = 1
+
+ // Allocate a new user arena.
+ var span *mspan
+ systemstack(func() {
+ span = mheap_.allocUserArenaChunk()
+ })
+ if span == nil {
+ throw("out of memory")
+ }
+ x := unsafe.Pointer(span.base())
+
+ // Allocate black during GC.
+ // All slots hold nil so no scanning is needed.
+ // This may be racing with GC so do it atomically if there can be
+ // a race marking the bit.
+ if gcphase != _GCoff {
+ gcmarknewobject(span, span.base(), span.elemsize)
+ }
+
+ if raceenabled {
+ // TODO(mknyszek): Track individual objects.
+ racemalloc(unsafe.Pointer(span.base()), span.elemsize)
+ }
+
+ if msanenabled {
+ // TODO(mknyszek): Track individual objects.
+ msanmalloc(unsafe.Pointer(span.base()), span.elemsize)
+ }
+
+ if asanenabled {
+ // TODO(mknyszek): Track individual objects.
+ rzSize := computeRZlog(span.elemsize)
+ span.elemsize -= rzSize
+ span.limit -= rzSize
+ span.userArenaChunkFree = makeAddrRange(span.base(), span.limit)
+ asanpoison(unsafe.Pointer(span.limit), span.npages*pageSize-span.elemsize)
+ asanunpoison(unsafe.Pointer(span.base()), span.elemsize)
+ }
+
+ if rate := MemProfileRate; rate > 0 {
+ c := getMCache(mp)
+ if c == nil {
+ throw("newUserArenaChunk called without a P or outside bootstrapping")
+ }
+ // Note cache c only valid while m acquired; see #47302
+ if rate != 1 && userArenaChunkBytes < c.nextSample {
+ c.nextSample -= userArenaChunkBytes
+ } else {
+ profilealloc(mp, unsafe.Pointer(span.base()), userArenaChunkBytes)
+ }
+ }
+ mp.mallocing = 0
+ releasem(mp)
+
+ // Again, because this chunk counts toward heapLive, potentially trigger a GC.
+ if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
+ gcStart(t)
+ }
+
+ if debug.malloc {
+ if debug.allocfreetrace != 0 {
+ tracealloc(unsafe.Pointer(span.base()), userArenaChunkBytes, nil)
+ }
+
+ if inittrace.active && inittrace.id == getg().goid {
+ // Init functions are executed sequentially in a single goroutine.
+ inittrace.bytes += uint64(userArenaChunkBytes)
+ }
+ }
+
+ // Double-check it's aligned to the physical page size. Based on the current
+ // implementation this is trivially true, but it need not be in the future.
+ // However, if it's not aligned to the physical page size then we can't properly
+ // set it to fault later.
+ if uintptr(x)%physPageSize != 0 {
+ throw("user arena chunk is not aligned to the physical page size")
+ }
+
+ return x, span
+}
+
+// isUnusedUserArenaChunk indicates that the arena chunk has been set to fault
+// and doesn't contain any scannable memory anymore. However, it might still be
+// mSpanInUse as it sits on the quarantine list, since it needs to be swept.
+//
+// This is not safe to execute unless the caller has ownership of the mspan or
+// the world is stopped (preemption is prevented while the relevant state changes).
+//
+// This is really only meant to be used by accounting tests in the runtime to
+// distinguish when a span shouldn't be counted (since mSpanInUse might not be
+// enough).
+func (s *mspan) isUnusedUserArenaChunk() bool {
+ return s.isUserArenaChunk && s.spanclass == makeSpanClass(0, true)
+}
+
+// setUserArenaChunkToFault sets the address space for the user arena chunk to fault
+// and releases any underlying memory resources.
+//
+// Must be in a non-preemptible state to ensure the consistency of statistics
+// exported to MemStats.
+func (s *mspan) setUserArenaChunkToFault() {
+ if !s.isUserArenaChunk {
+ throw("invalid span in heapArena for user arena")
+ }
+ if s.npages*pageSize != userArenaChunkBytes {
+ throw("span on userArena.faultList has invalid size")
+ }
+
+ // Update the span class to be noscan. What we want to happen is that
+ // any pointer into the span keeps it from getting recycled, so we want
+ // the mark bit to get set, but we're about to set the address space to fault,
+ // so we have to prevent the GC from scanning this memory.
+ //
+ // It's OK to set it here because (1) a GC isn't in progress, so the scanning code
+ // won't make a bad decision, (2) we're currently non-preemptible and in the runtime,
+ // so a GC is blocked from starting. We might race with sweeping, which could
+ // put it on the "wrong" sweep list, but really don't care because the chunk is
+ // treated as a large object span and there's no meaningful difference between scan
+ // and noscan large objects in the sweeper. The STW at the start of the GC acts as a
+ // barrier for this update.
+ s.spanclass = makeSpanClass(0, true)
+
+ // Actually set the arena chunk to fault, so we'll get dangling pointer errors.
+ // sysFault currently uses a method on each OS that forces it to evacuate all
+ // memory backing the chunk.
+ sysFault(unsafe.Pointer(s.base()), s.npages*pageSize)
+
+ // Everything on the list is counted as in-use, however sysFault transitions to
+ // Reserved, not Prepared, so we skip updating heapFree or heapReleased and just
+ // remove the memory from the total altogether; it's just address space now.
+ gcController.heapInUse.add(-int64(s.npages * pageSize))
+
+ // Count this as a free of an object right now as opposed to when
+ // the span gets off the quarantine list. The main reason is so that the
+ // amount of bytes allocated doesn't exceed how much is counted as
+ // "mapped ready," which could cause a deadlock in the pacer.
+ gcController.totalFree.Add(int64(s.npages * pageSize))
+
+ // Update consistent stats to match.
+ //
+ // We're non-preemptible, so it's safe to update consistent stats (our P
+ // won't change out from under us).
+ stats := memstats.heapStats.acquire()
+ atomic.Xaddint64(&stats.committed, -int64(s.npages*pageSize))
+ atomic.Xaddint64(&stats.inHeap, -int64(s.npages*pageSize))
+ atomic.Xadd64(&stats.largeFreeCount, 1)
+ atomic.Xadd64(&stats.largeFree, int64(s.npages*pageSize))
+ memstats.heapStats.release()
+
+ // This counts as a free, so update heapLive.
+ gcController.update(-int64(s.npages*pageSize), 0)
+
+ // Mark it as free for the race detector.
+ if raceenabled {
+ racefree(unsafe.Pointer(s.base()), s.elemsize)
+ }
+
+ systemstack(func() {
+ // Add the user arena to the quarantine list.
+ lock(&mheap_.lock)
+ mheap_.userArena.quarantineList.insert(s)
+ unlock(&mheap_.lock)
+ })
+}
+
+// inUserArenaChunk returns true if p points to a user arena chunk.
+func inUserArenaChunk(p uintptr) bool {
+ s := spanOf(p)
+ if s == nil {
+ return false
+ }
+ return s.isUserArenaChunk
+}
+
+// freeUserArenaChunk releases the user arena represented by s back to the runtime.
+//
+// x must be a live pointer within s.
+//
+// The runtime will set the user arena to fault once it's safe (the GC is no longer running)
+// and then once the user arena is no longer referenced by the application, will allow it to
+// be reused.
+func freeUserArenaChunk(s *mspan, x unsafe.Pointer) {
+ if !s.isUserArenaChunk {
+ throw("span is not for a user arena")
+ }
+ if s.npages*pageSize != userArenaChunkBytes {
+ throw("invalid user arena span size")
+ }
+
+ // Mark the region as free to various santizers immediately instead
+ // of handling them at sweep time.
+ if raceenabled {
+ racefree(unsafe.Pointer(s.base()), s.elemsize)
+ }
+ if msanenabled {
+ msanfree(unsafe.Pointer(s.base()), s.elemsize)
+ }
+ if asanenabled {
+ asanpoison(unsafe.Pointer(s.base()), s.elemsize)
+ }
+
+ // Make ourselves non-preemptible as we manipulate state and statistics.
+ //
+ // Also required by setUserArenaChunksToFault.
+ mp := acquirem()
+
+ // We can only set user arenas to fault if we're in the _GCoff phase.
+ if gcphase == _GCoff {
+ lock(&userArenaState.lock)
+ faultList := userArenaState.fault
+ userArenaState.fault = nil
+ unlock(&userArenaState.lock)
+
+ s.setUserArenaChunkToFault()
+ for _, lc := range faultList {
+ lc.mspan.setUserArenaChunkToFault()
+ }
+
+ // Until the chunks are set to fault, keep them alive via the fault list.
+ KeepAlive(x)
+ KeepAlive(faultList)
+ } else {
+ // Put the user arena on the fault list.
+ lock(&userArenaState.lock)
+ userArenaState.fault = append(userArenaState.fault, liveUserArenaChunk{s, x})
+ unlock(&userArenaState.lock)
+ }
+ releasem(mp)
+}
+
+// allocUserArenaChunk attempts to reuse a free user arena chunk represented
+// as a span.
+//
+// Must be in a non-preemptible state to ensure the consistency of statistics
+// exported to MemStats.
+//
+// Acquires the heap lock. Must run on the system stack for that reason.
+//
+//go:systemstack
+func (h *mheap) allocUserArenaChunk() *mspan {
+ var s *mspan
+ var base uintptr
+
+ // First check the free list.
+ lock(&h.lock)
+ if !h.userArena.readyList.isEmpty() {
+ s = h.userArena.readyList.first
+ h.userArena.readyList.remove(s)
+ base = s.base()
+ } else {
+ // Free list was empty, so allocate a new arena.
+ hintList := &h.userArena.arenaHints
+ if raceenabled {
+ // In race mode just use the regular heap hints. We might fragment
+ // the address space, but the race detector requires that the heap
+ // is mapped contiguously.
+ hintList = &h.arenaHints
+ }
+ v, size := h.sysAlloc(userArenaChunkBytes, hintList, false)
+ if size%userArenaChunkBytes != 0 {
+ throw("sysAlloc size is not divisible by userArenaChunkBytes")
+ }
+ if size > userArenaChunkBytes {
+ // We got more than we asked for. This can happen if
+ // heapArenaSize > userArenaChunkSize, or if sysAlloc just returns
+ // some extra as a result of trying to find an aligned region.
+ //
+ // Divide it up and put it on the ready list.
+ for i := uintptr(userArenaChunkBytes); i < size; i += userArenaChunkBytes {
+ s := h.allocMSpanLocked()
+ s.init(uintptr(v)+i, userArenaChunkPages)
+ h.userArena.readyList.insertBack(s)
+ }
+ size = userArenaChunkBytes
+ }
+ base = uintptr(v)
+ if base == 0 {
+ // Out of memory.
+ unlock(&h.lock)
+ return nil
+ }
+ s = h.allocMSpanLocked()
+ }
+ unlock(&h.lock)
+
+ // sysAlloc returns Reserved address space, and any span we're
+ // reusing is set to fault (so, also Reserved), so transition
+ // it to Prepared and then Ready.
+ //
+ // Unlike (*mheap).grow, just map in everything that we
+ // asked for. We're likely going to use it all.
+ sysMap(unsafe.Pointer(base), userArenaChunkBytes, &gcController.heapReleased)
+ sysUsed(unsafe.Pointer(base), userArenaChunkBytes, userArenaChunkBytes)
+
+ // Model the user arena as a heap span for a large object.
+ spc := makeSpanClass(0, false)
+ h.initSpan(s, spanAllocHeap, spc, base, userArenaChunkPages)
+ s.isUserArenaChunk = true
+
+ // Account for this new arena chunk memory.
+ gcController.heapInUse.add(int64(userArenaChunkBytes))
+ gcController.heapReleased.add(-int64(userArenaChunkBytes))
+
+ stats := memstats.heapStats.acquire()
+ atomic.Xaddint64(&stats.inHeap, int64(userArenaChunkBytes))
+ atomic.Xaddint64(&stats.committed, int64(userArenaChunkBytes))
+
+ // Model the arena as a single large malloc.
+ atomic.Xadd64(&stats.largeAlloc, int64(userArenaChunkBytes))
+ atomic.Xadd64(&stats.largeAllocCount, 1)
+ memstats.heapStats.release()
+
+ // Count the alloc in inconsistent, internal stats.
+ gcController.totalAlloc.Add(int64(userArenaChunkBytes))
+
+ // Update heapLive.
+ gcController.update(int64(userArenaChunkBytes), 0)
+
+ // Put the large span in the mcentral swept list so that it's
+ // visible to the background sweeper.
+ h.central[spc].mcentral.fullSwept(h.sweepgen).push(s)
+ s.limit = s.base() + userArenaChunkBytes
+ s.freeindex = 1
+ s.allocCount = 1
+
+ // This must clear the entire heap bitmap so that it's safe
+ // to allocate noscan data without writing anything out.
+ s.initHeapBits(true)
+
+ // Clear the span preemptively. It's an arena chunk, so let's assume
+ // everything is going to be used.
+ //
+ // This also seems to make a massive difference as to whether or
+ // not Linux decides to back this memory with transparent huge
+ // pages. There's latency involved in this zeroing, but the hugepage
+ // gains are almost always worth it. Note: it's important that we
+ // clear even if it's freshly mapped and we know there's no point
+ // to zeroing as *that* is the critical signal to use huge pages.
+ memclrNoHeapPointers(unsafe.Pointer(s.base()), s.elemsize)
+ s.needzero = 0
+
+ s.freeIndexForScan = 1
+
+ // Set up the range for allocation.
+ s.userArenaChunkFree = makeAddrRange(base, s.limit)
+ return s
+}
diff --git a/src/runtime/arena_test.go b/src/runtime/arena_test.go
new file mode 100644
index 0000000..7e121ad
--- /dev/null
+++ b/src/runtime/arena_test.go
@@ -0,0 +1,529 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "internal/goarch"
+ "reflect"
+ . "runtime"
+ "runtime/debug"
+ "runtime/internal/atomic"
+ "testing"
+ "time"
+ "unsafe"
+)
+
+type smallScalar struct {
+ X uintptr
+}
+type smallPointer struct {
+ X *smallPointer
+}
+type smallPointerMix struct {
+ A *smallPointer
+ B byte
+ C *smallPointer
+ D [11]byte
+}
+type mediumScalarEven [8192]byte
+type mediumScalarOdd [3321]byte
+type mediumPointerEven [1024]*smallPointer
+type mediumPointerOdd [1023]*smallPointer
+
+type largeScalar [UserArenaChunkBytes + 1]byte
+type largePointer [UserArenaChunkBytes/unsafe.Sizeof(&smallPointer{}) + 1]*smallPointer
+
+func TestUserArena(t *testing.T) {
+ // Set GOMAXPROCS to 2 so we don't run too many of these
+ // tests in parallel.
+ defer GOMAXPROCS(GOMAXPROCS(2))
+
+ // Start a subtest so that we can clean up after any parallel tests within.
+ t.Run("Alloc", func(t *testing.T) {
+ ss := &smallScalar{5}
+ runSubTestUserArenaNew(t, ss, true)
+
+ sp := &smallPointer{new(smallPointer)}
+ runSubTestUserArenaNew(t, sp, true)
+
+ spm := &smallPointerMix{sp, 5, nil, [11]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}}
+ runSubTestUserArenaNew(t, spm, true)
+
+ mse := new(mediumScalarEven)
+ for i := range mse {
+ mse[i] = 121
+ }
+ runSubTestUserArenaNew(t, mse, true)
+
+ mso := new(mediumScalarOdd)
+ for i := range mso {
+ mso[i] = 122
+ }
+ runSubTestUserArenaNew(t, mso, true)
+
+ mpe := new(mediumPointerEven)
+ for i := range mpe {
+ mpe[i] = sp
+ }
+ runSubTestUserArenaNew(t, mpe, true)
+
+ mpo := new(mediumPointerOdd)
+ for i := range mpo {
+ mpo[i] = sp
+ }
+ runSubTestUserArenaNew(t, mpo, true)
+
+ ls := new(largeScalar)
+ for i := range ls {
+ ls[i] = 123
+ }
+ // Not in parallel because we don't want to hold this large allocation live.
+ runSubTestUserArenaNew(t, ls, false)
+
+ lp := new(largePointer)
+ for i := range lp {
+ lp[i] = sp
+ }
+ // Not in parallel because we don't want to hold this large allocation live.
+ runSubTestUserArenaNew(t, lp, false)
+
+ sss := make([]smallScalar, 25)
+ for i := range sss {
+ sss[i] = smallScalar{12}
+ }
+ runSubTestUserArenaSlice(t, sss, true)
+
+ mpos := make([]mediumPointerOdd, 5)
+ for i := range mpos {
+ mpos[i] = *mpo
+ }
+ runSubTestUserArenaSlice(t, mpos, true)
+
+ sps := make([]smallPointer, UserArenaChunkBytes/unsafe.Sizeof(smallPointer{})+1)
+ for i := range sps {
+ sps[i] = *sp
+ }
+ // Not in parallel because we don't want to hold this large allocation live.
+ runSubTestUserArenaSlice(t, sps, false)
+
+ // Test zero-sized types.
+ t.Run("struct{}", func(t *testing.T) {
+ arena := NewUserArena()
+ var x any
+ x = (*struct{})(nil)
+ arena.New(&x)
+ if v := unsafe.Pointer(x.(*struct{})); v != ZeroBase {
+ t.Errorf("expected zero-sized type to be allocated as zerobase: got %x, want %x", v, ZeroBase)
+ }
+ arena.Free()
+ })
+ t.Run("[]struct{}", func(t *testing.T) {
+ arena := NewUserArena()
+ var sl []struct{}
+ arena.Slice(&sl, 10)
+ if v := unsafe.Pointer(&sl[0]); v != ZeroBase {
+ t.Errorf("expected zero-sized type to be allocated as zerobase: got %x, want %x", v, ZeroBase)
+ }
+ arena.Free()
+ })
+ t.Run("[]int (cap 0)", func(t *testing.T) {
+ arena := NewUserArena()
+ var sl []int
+ arena.Slice(&sl, 0)
+ if len(sl) != 0 {
+ t.Errorf("expected requested zero-sized slice to still have zero length: got %x, want 0", len(sl))
+ }
+ arena.Free()
+ })
+ })
+
+ // Run a GC cycle to get any arenas off the quarantine list.
+ GC()
+
+ if n := GlobalWaitingArenaChunks(); n != 0 {
+ t.Errorf("expected zero waiting arena chunks, found %d", n)
+ }
+}
+
+func runSubTestUserArenaNew[S comparable](t *testing.T, value *S, parallel bool) {
+ t.Run(reflect.TypeOf(value).Elem().Name(), func(t *testing.T) {
+ if parallel {
+ t.Parallel()
+ }
+
+ // Allocate and write data, enough to exhaust the arena.
+ //
+ // This is an underestimate, likely leaving some space in the arena. That's a good thing,
+ // because it gives us coverage of boundary cases.
+ n := int(UserArenaChunkBytes / unsafe.Sizeof(*value))
+ if n == 0 {
+ n = 1
+ }
+
+ // Create a new arena and do a bunch of operations on it.
+ arena := NewUserArena()
+
+ arenaValues := make([]*S, 0, n)
+ for j := 0; j < n; j++ {
+ var x any
+ x = (*S)(nil)
+ arena.New(&x)
+ s := x.(*S)
+ *s = *value
+ arenaValues = append(arenaValues, s)
+ }
+ // Check integrity of allocated data.
+ for _, s := range arenaValues {
+ if *s != *value {
+ t.Errorf("failed integrity check: got %#v, want %#v", *s, *value)
+ }
+ }
+
+ // Release the arena.
+ arena.Free()
+ })
+}
+
+func runSubTestUserArenaSlice[S comparable](t *testing.T, value []S, parallel bool) {
+ t.Run("[]"+reflect.TypeOf(value).Elem().Name(), func(t *testing.T) {
+ if parallel {
+ t.Parallel()
+ }
+
+ // Allocate and write data, enough to exhaust the arena.
+ //
+ // This is an underestimate, likely leaving some space in the arena. That's a good thing,
+ // because it gives us coverage of boundary cases.
+ n := int(UserArenaChunkBytes / (unsafe.Sizeof(*new(S)) * uintptr(cap(value))))
+ if n == 0 {
+ n = 1
+ }
+
+ // Create a new arena and do a bunch of operations on it.
+ arena := NewUserArena()
+
+ arenaValues := make([][]S, 0, n)
+ for j := 0; j < n; j++ {
+ var sl []S
+ arena.Slice(&sl, cap(value))
+ copy(sl, value)
+ arenaValues = append(arenaValues, sl)
+ }
+ // Check integrity of allocated data.
+ for _, sl := range arenaValues {
+ for i := range sl {
+ got := sl[i]
+ want := value[i]
+ if got != want {
+ t.Errorf("failed integrity check: got %#v, want %#v at index %d", got, want, i)
+ }
+ }
+ }
+
+ // Release the arena.
+ arena.Free()
+ })
+}
+
+func TestUserArenaLiveness(t *testing.T) {
+ t.Run("Free", func(t *testing.T) {
+ testUserArenaLiveness(t, false)
+ })
+ t.Run("Finalizer", func(t *testing.T) {
+ testUserArenaLiveness(t, true)
+ })
+}
+
+func testUserArenaLiveness(t *testing.T, useArenaFinalizer bool) {
+ // Disable the GC so that there's zero chance we try doing anything arena related *during*
+ // a mark phase, since otherwise a bunch of arenas could end up on the fault list.
+ defer debug.SetGCPercent(debug.SetGCPercent(-1))
+
+ // Defensively ensure that any full arena chunks leftover from previous tests have been cleared.
+ GC()
+ GC()
+
+ arena := NewUserArena()
+
+ // Allocate a few pointer-ful but un-initialized objects so that later we can
+ // place a reference to heap object at a more interesting location.
+ for i := 0; i < 3; i++ {
+ var x any
+ x = (*mediumPointerOdd)(nil)
+ arena.New(&x)
+ }
+
+ var x any
+ x = (*smallPointerMix)(nil)
+ arena.New(&x)
+ v := x.(*smallPointerMix)
+
+ var safeToFinalize atomic.Bool
+ var finalized atomic.Bool
+ v.C = new(smallPointer)
+ SetFinalizer(v.C, func(_ *smallPointer) {
+ if !safeToFinalize.Load() {
+ t.Error("finalized arena-referenced object unexpectedly")
+ }
+ finalized.Store(true)
+ })
+
+ // Make sure it stays alive.
+ GC()
+ GC()
+
+ // In order to ensure the object can be freed, we now need to make sure to use
+ // the entire arena. Exhaust the rest of the arena.
+
+ for i := 0; i < int(UserArenaChunkBytes/unsafe.Sizeof(mediumScalarEven{})); i++ {
+ var x any
+ x = (*mediumScalarEven)(nil)
+ arena.New(&x)
+ }
+
+ // Make sure it stays alive again.
+ GC()
+ GC()
+
+ v = nil
+
+ safeToFinalize.Store(true)
+ if useArenaFinalizer {
+ arena = nil
+
+ // Try to queue the arena finalizer.
+ GC()
+ GC()
+
+ // In order for the finalizer we actually want to run to execute,
+ // we need to make sure this one runs first.
+ if !BlockUntilEmptyFinalizerQueue(int64(2 * time.Second)) {
+ t.Fatal("finalizer queue was never emptied")
+ }
+ } else {
+ // Free the arena explicitly.
+ arena.Free()
+ }
+
+ // Try to queue the object's finalizer that we set earlier.
+ GC()
+ GC()
+
+ if !BlockUntilEmptyFinalizerQueue(int64(2 * time.Second)) {
+ t.Fatal("finalizer queue was never emptied")
+ }
+ if !finalized.Load() {
+ t.Error("expected arena-referenced object to be finalized")
+ }
+}
+
+func TestUserArenaClearsPointerBits(t *testing.T) {
+ // This is a regression test for a serious issue wherein if pointer bits
+ // aren't properly cleared, it's possible to allocate scalar data down
+ // into a previously pointer-ful area, causing misinterpretation by the GC.
+
+ // Create a large object, grab a pointer into it, and free it.
+ x := new([8 << 20]byte)
+ xp := uintptr(unsafe.Pointer(&x[124]))
+ var finalized atomic.Bool
+ SetFinalizer(x, func(_ *[8 << 20]byte) {
+ finalized.Store(true)
+ })
+
+ // Write three chunks worth of pointer data. Three gives us a
+ // high likelihood that when we write 2 later, we'll get the behavior
+ // we want.
+ a := NewUserArena()
+ for i := 0; i < int(UserArenaChunkBytes/goarch.PtrSize*3); i++ {
+ var x any
+ x = (*smallPointer)(nil)
+ a.New(&x)
+ }
+ a.Free()
+
+ // Recycle the arena chunks.
+ GC()
+ GC()
+
+ a = NewUserArena()
+ for i := 0; i < int(UserArenaChunkBytes/goarch.PtrSize*2); i++ {
+ var x any
+ x = (*smallScalar)(nil)
+ a.New(&x)
+ v := x.(*smallScalar)
+ // Write a pointer that should not keep x alive.
+ *v = smallScalar{xp}
+ }
+ KeepAlive(x)
+ x = nil
+
+ // Try to free x.
+ GC()
+ GC()
+
+ if !BlockUntilEmptyFinalizerQueue(int64(2 * time.Second)) {
+ t.Fatal("finalizer queue was never emptied")
+ }
+ if !finalized.Load() {
+ t.Fatal("heap allocation kept alive through non-pointer reference")
+ }
+
+ // Clean up the arena.
+ a.Free()
+ GC()
+ GC()
+}
+
+func TestUserArenaCloneString(t *testing.T) {
+ a := NewUserArena()
+
+ // A static string (not on heap or arena)
+ var s = "abcdefghij"
+
+ // Create a byte slice in the arena, initialize it with s
+ var b []byte
+ a.Slice(&b, len(s))
+ copy(b, s)
+
+ // Create a string as using the same memory as the byte slice, hence in
+ // the arena. This could be an arena API, but hasn't really been needed
+ // yet.
+ var as string
+ asHeader := (*reflect.StringHeader)(unsafe.Pointer(&as))
+ asHeader.Data = (*reflect.SliceHeader)(unsafe.Pointer(&b)).Data
+ asHeader.Len = len(b)
+
+ // Clone should make a copy of as, since it is in the arena.
+ asCopy := UserArenaClone(as)
+ if (*reflect.StringHeader)(unsafe.Pointer(&as)).Data == (*reflect.StringHeader)(unsafe.Pointer(&asCopy)).Data {
+ t.Error("Clone did not make a copy")
+ }
+
+ // Clone should make a copy of subAs, since subAs is just part of as and so is in the arena.
+ subAs := as[1:3]
+ subAsCopy := UserArenaClone(subAs)
+ if (*reflect.StringHeader)(unsafe.Pointer(&subAs)).Data == (*reflect.StringHeader)(unsafe.Pointer(&subAsCopy)).Data {
+ t.Error("Clone did not make a copy")
+ }
+ if len(subAs) != len(subAsCopy) {
+ t.Errorf("Clone made an incorrect copy (bad length): %d -> %d", len(subAs), len(subAsCopy))
+ } else {
+ for i := range subAs {
+ if subAs[i] != subAsCopy[i] {
+ t.Errorf("Clone made an incorrect copy (data at index %d): %d -> %d", i, subAs[i], subAs[i])
+ }
+ }
+ }
+
+ // Clone should not make a copy of doubleAs, since doubleAs will be on the heap.
+ doubleAs := as + as
+ doubleAsCopy := UserArenaClone(doubleAs)
+ if (*reflect.StringHeader)(unsafe.Pointer(&doubleAs)).Data != (*reflect.StringHeader)(unsafe.Pointer(&doubleAsCopy)).Data {
+ t.Error("Clone should not have made a copy")
+ }
+
+ // Clone should not make a copy of s, since s is a static string.
+ sCopy := UserArenaClone(s)
+ if (*reflect.StringHeader)(unsafe.Pointer(&s)).Data != (*reflect.StringHeader)(unsafe.Pointer(&sCopy)).Data {
+ t.Error("Clone should not have made a copy")
+ }
+
+ a.Free()
+}
+
+func TestUserArenaClonePointer(t *testing.T) {
+ a := NewUserArena()
+
+ // Clone should not make a copy of a heap-allocated smallScalar.
+ x := Escape(new(smallScalar))
+ xCopy := UserArenaClone(x)
+ if unsafe.Pointer(x) != unsafe.Pointer(xCopy) {
+ t.Errorf("Clone should not have made a copy: %#v -> %#v", x, xCopy)
+ }
+
+ // Clone should make a copy of an arena-allocated smallScalar.
+ var i any
+ i = (*smallScalar)(nil)
+ a.New(&i)
+ xArena := i.(*smallScalar)
+ xArenaCopy := UserArenaClone(xArena)
+ if unsafe.Pointer(xArena) == unsafe.Pointer(xArenaCopy) {
+ t.Errorf("Clone should have made a copy: %#v -> %#v", xArena, xArenaCopy)
+ }
+ if *xArena != *xArenaCopy {
+ t.Errorf("Clone made an incorrect copy copy: %#v -> %#v", *xArena, *xArenaCopy)
+ }
+
+ a.Free()
+}
+
+func TestUserArenaCloneSlice(t *testing.T) {
+ a := NewUserArena()
+
+ // A static string (not on heap or arena)
+ var s = "klmnopqrstuv"
+
+ // Create a byte slice in the arena, initialize it with s
+ var b []byte
+ a.Slice(&b, len(s))
+ copy(b, s)
+
+ // Clone should make a copy of b, since it is in the arena.
+ bCopy := UserArenaClone(b)
+ if unsafe.Pointer(&b[0]) == unsafe.Pointer(&bCopy[0]) {
+ t.Errorf("Clone did not make a copy: %#v -> %#v", b, bCopy)
+ }
+ if len(b) != len(bCopy) {
+ t.Errorf("Clone made an incorrect copy (bad length): %d -> %d", len(b), len(bCopy))
+ } else {
+ for i := range b {
+ if b[i] != bCopy[i] {
+ t.Errorf("Clone made an incorrect copy (data at index %d): %d -> %d", i, b[i], bCopy[i])
+ }
+ }
+ }
+
+ // Clone should make a copy of bSub, since bSub is just part of b and so is in the arena.
+ bSub := b[1:3]
+ bSubCopy := UserArenaClone(bSub)
+ if unsafe.Pointer(&bSub[0]) == unsafe.Pointer(&bSubCopy[0]) {
+ t.Errorf("Clone did not make a copy: %#v -> %#v", bSub, bSubCopy)
+ }
+ if len(bSub) != len(bSubCopy) {
+ t.Errorf("Clone made an incorrect copy (bad length): %d -> %d", len(bSub), len(bSubCopy))
+ } else {
+ for i := range bSub {
+ if bSub[i] != bSubCopy[i] {
+ t.Errorf("Clone made an incorrect copy (data at index %d): %d -> %d", i, bSub[i], bSubCopy[i])
+ }
+ }
+ }
+
+ // Clone should not make a copy of bNotArena, since it will not be in an arena.
+ bNotArena := make([]byte, len(s))
+ copy(bNotArena, s)
+ bNotArenaCopy := UserArenaClone(bNotArena)
+ if unsafe.Pointer(&bNotArena[0]) != unsafe.Pointer(&bNotArenaCopy[0]) {
+ t.Error("Clone should not have made a copy")
+ }
+
+ a.Free()
+}
+
+func TestUserArenaClonePanic(t *testing.T) {
+ var s string
+ func() {
+ x := smallScalar{2}
+ defer func() {
+ if v := recover(); v != nil {
+ s = v.(string)
+ }
+ }()
+ UserArenaClone(x)
+ }()
+ if s == "" {
+ t.Errorf("expected panic from Clone")
+ }
+}
diff --git a/src/runtime/asan/asan.go b/src/runtime/asan/asan.go
index 4359f41..25f15ae 100644
--- a/src/runtime/asan/asan.go
+++ b/src/runtime/asan/asan.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build asan && linux && (arm64 || amd64 || riscv64)
+//go:build asan && linux && (arm64 || amd64 || riscv64 || ppc64le)
package asan
@@ -34,7 +34,7 @@
__asan_poison_memory_region(addr, sz);
}
-// Keep in sync with the defination in compiler-rt
+// Keep in sync with the definition in compiler-rt
// https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/asan/asan_interface_internal.h#L41
// This structure is used to describe the source location of
// a place where global was defined.
@@ -44,7 +44,7 @@
int column_no;
};
-// Keep in sync with the defination in compiler-rt
+// Keep in sync with the definition in compiler-rt
// https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/asan/asan_interface_internal.h#L48
// So far, the current implementation is only compatible with the ASan library from version v7 to v9.
// https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/asan/asan_init_version.h
diff --git a/src/runtime/asan_ppc64le.s b/src/runtime/asan_ppc64le.s
new file mode 100644
index 0000000..d13301a
--- /dev/null
+++ b/src/runtime/asan_ppc64le.s
@@ -0,0 +1,87 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build asan
+
+#include "go_asm.h"
+#include "textflag.h"
+
+#define RARG0 R3
+#define RARG1 R4
+#define RARG2 R5
+#define RARG3 R6
+#define FARG R12
+
+// Called from instrumented code.
+// func runtime·doasanread(addr unsafe.Pointer, sz, sp, pc uintptr)
+TEXT runtime·doasanread(SB),NOSPLIT|NOFRAME,$0-32
+ MOVD addr+0(FP), RARG0
+ MOVD sz+8(FP), RARG1
+ MOVD sp+16(FP), RARG2
+ MOVD pc+24(FP), RARG3
+ // void __asan_read_go(void *addr, uintptr_t sz, void *sp, void *pc);
+ MOVD $__asan_read_go(SB), FARG
+ BR asancall<>(SB)
+
+// func runtime·doasanwrite(addr unsafe.Pointer, sz, sp, pc uintptr)
+TEXT runtime·doasanwrite(SB),NOSPLIT|NOFRAME,$0-32
+ MOVD addr+0(FP), RARG0
+ MOVD sz+8(FP), RARG1
+ MOVD sp+16(FP), RARG2
+ MOVD pc+24(FP), RARG3
+ // void __asan_write_go(void *addr, uintptr_t sz, void *sp, void *pc);
+ MOVD $__asan_write_go(SB), FARG
+ BR asancall<>(SB)
+
+// func runtime·asanunpoison(addr unsafe.Pointer, sz uintptr)
+TEXT runtime·asanunpoison(SB),NOSPLIT|NOFRAME,$0-16
+ MOVD addr+0(FP), RARG0
+ MOVD sz+8(FP), RARG1
+ // void __asan_unpoison_go(void *addr, uintptr_t sz);
+ MOVD $__asan_unpoison_go(SB), FARG
+ BR asancall<>(SB)
+
+// func runtime·asanpoison(addr unsafe.Pointer, sz uintptr)
+TEXT runtime·asanpoison(SB),NOSPLIT|NOFRAME,$0-16
+ MOVD addr+0(FP), RARG0
+ MOVD sz+8(FP), RARG1
+ // void __asan_poison_go(void *addr, uintptr_t sz);
+ MOVD $__asan_poison_go(SB), FARG
+ BR asancall<>(SB)
+
+// func runtime·asanregisterglobals(addr unsafe.Pointer, n uintptr)
+TEXT runtime·asanregisterglobals(SB),NOSPLIT|NOFRAME,$0-16
+ MOVD addr+0(FP), RARG0
+ MOVD n+8(FP), RARG1
+ // void __asan_register_globals_go(void *addr, uintptr_t n);
+ MOVD $__asan_register_globals_go(SB), FARG
+ BR asancall<>(SB)
+
+// Switches SP to g0 stack and calls (FARG). Arguments already set.
+TEXT asancall<>(SB), NOSPLIT, $0-0
+ // LR saved in generated prologue
+ // Get info from the current goroutine
+ MOVD runtime·tls_g(SB), R10 // g offset in TLS
+ MOVD 0(R10), g
+ MOVD g_m(g), R7 // m for g
+ MOVD R1, R16 // callee-saved, preserved across C call
+ MOVD m_g0(R7), R10 // g0 for m
+ CMP R10, g // same g0?
+ BEQ call // already on g0
+ MOVD (g_sched+gobuf_sp)(R10), R1 // switch R1
+call:
+ // prepare frame for C ABI
+ SUB $32, R1 // create frame for callee saving LR, CR, R2 etc.
+ RLDCR $0, R1, $~15, R1 // align SP to 16 bytes
+ MOVD FARG, CTR // address of function to be called
+ MOVD R0, 0(R1) // clear back chain pointer
+ BL (CTR)
+ MOVD $0, R0 // C code can clobber R0 set it back to 0
+ MOVD R16, R1 // restore R1;
+ MOVD runtime·tls_g(SB), R10 // find correct g
+ MOVD 0(R10), g
+ RET
+
+// tls_g, g value for each thread in TLS
+GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8
diff --git a/src/runtime/asm_amd64.h b/src/runtime/asm_amd64.h
index 49e0ee2..f7a8896 100644
--- a/src/runtime/asm_amd64.h
+++ b/src/runtime/asm_amd64.h
@@ -5,10 +5,21 @@
// Define features that are guaranteed to be supported by setting the AMD64 variable.
// If a feature is supported, there's no need to check it at runtime every time.
+#ifdef GOAMD64_v2
+#define hasPOPCNT
+#define hasSSE42
+#endif
+
#ifdef GOAMD64_v3
+#define hasAVX
#define hasAVX2
+#define hasPOPCNT
+#define hasSSE42
#endif
#ifdef GOAMD64_v4
+#define hasAVX
#define hasAVX2
+#define hasPOPCNT
+#define hasSSE42
#endif
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s
index d2f7984..13c8de4 100644
--- a/src/runtime/asm_amd64.s
+++ b/src/runtime/asm_amd64.s
@@ -201,16 +201,16 @@
JZ needtls
// arg 1: g0, already in DI
MOVQ $setg_gcc<>(SB), SI // arg 2: setg_gcc
+ MOVQ $0, DX // arg 3, 4: not used when using platform's TLS
+ MOVQ $0, CX
#ifdef GOOS_android
MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
// arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
// Compensate for tls_g (+16).
MOVQ -16(TLS), CX
-#else
- MOVQ $0, DX // arg 3, 4: not used when using platform's TLS
- MOVQ $0, CX
#endif
#ifdef GOOS_windows
+ MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
// Adjust for the Win64 calling convention.
MOVQ CX, R9 // arg 4
MOVQ DX, R8 // arg 3
@@ -251,6 +251,10 @@
JMP ok
#endif
+#ifdef GOOS_windows
+ CALL runtime·wintls(SB)
+#endif
+
LEAQ runtime·m0+m_tls(SB), DI
CALL runtime·settls(SB)
@@ -2026,6 +2030,9 @@
DATA runtime·tls_g+0(SB)/8, $16
GLOBL runtime·tls_g+0(SB), NOPTR, $8
#endif
+#ifdef GOOS_windows
+GLOBL runtime·tls_g+0(SB), NOPTR, $8
+#endif
// The compiler and assembler's -spectre=ret mode rewrites
// all indirect CALL AX / JMP AX instructions to be
diff --git a/src/runtime/asm_arm.s b/src/runtime/asm_arm.s
index b47184e..591ef2a 100644
--- a/src/runtime/asm_arm.s
+++ b/src/runtime/asm_arm.s
@@ -387,6 +387,13 @@
RET
TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
+ // Force SPWRITE. This function doesn't actually write SP,
+ // but it is called with a special calling convention where
+ // the caller doesn't save LR on stack but passes it as a
+ // register (R3), and the unwinder currently doesn't understand.
+ // Make it SPWRITE to stop unwinding. (See issue 54332)
+ MOVW R13, R13
+
MOVW $0, R7
B runtime·morestack(SB)
diff --git a/src/runtime/asm_arm64.s b/src/runtime/asm_arm64.s
index 7836ba1..7eb5bcf 100644
--- a/src/runtime/asm_arm64.s
+++ b/src/runtime/asm_arm64.s
@@ -320,6 +320,13 @@
UNDEF
TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
+ // Force SPWRITE. This function doesn't actually write SP,
+ // but it is called with a special calling convention where
+ // the caller doesn't save LR on stack but passes it as a
+ // register (R3), and the unwinder currently doesn't understand.
+ // Make it SPWRITE to stop unwinding. (See issue 54332)
+ MOVD RSP, RSP
+
MOVW $0, R26
B runtime·morestack(SB)
diff --git a/src/runtime/asm_mips64x.s b/src/runtime/asm_mips64x.s
index 3597ebe..1abadb9 100644
--- a/src/runtime/asm_mips64x.s
+++ b/src/runtime/asm_mips64x.s
@@ -258,6 +258,13 @@
UNDEF
TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
+ // Force SPWRITE. This function doesn't actually write SP,
+ // but it is called with a special calling convention where
+ // the caller doesn't save LR on stack but passes it as a
+ // register (R3), and the unwinder currently doesn't understand.
+ // Make it SPWRITE to stop unwinding. (See issue 54332)
+ MOVV R29, R29
+
MOVV R0, REGCTXT
JMP runtime·morestack(SB)
diff --git a/src/runtime/asm_mipsx.s b/src/runtime/asm_mipsx.s
index 4a086b8..877c1bb 100644
--- a/src/runtime/asm_mipsx.s
+++ b/src/runtime/asm_mipsx.s
@@ -257,6 +257,13 @@
UNDEF
TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
+ // Force SPWRITE. This function doesn't actually write SP,
+ // but it is called with a special calling convention where
+ // the caller doesn't save LR on stack but passes it as a
+ // register (R3), and the unwinder currently doesn't understand.
+ // Make it SPWRITE to stop unwinding. (See issue 54332)
+ MOVW R29, R29
+
MOVW R0, REGCTXT
JMP runtime·morestack(SB)
diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s
index c6bcf82..61ff17a 100644
--- a/src/runtime/asm_ppc64x.s
+++ b/src/runtime/asm_ppc64x.s
@@ -334,6 +334,16 @@
UNDEF
TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
+ // Force SPWRITE. This function doesn't actually write SP,
+ // but it is called with a special calling convention where
+ // the caller doesn't save LR on stack but passes it as a
+ // register (R5), and the unwinder currently doesn't understand.
+ // Make it SPWRITE to stop unwinding. (See issue 54332)
+ // Use OR R0, R1 instead of MOVD R1, R1 as the MOVD instruction
+ // has a special affect on Power8,9,10 by lowering the thread
+ // priority and causing a slowdown in execution time
+
+ OR R0, R1
MOVD R0, R11
BR runtime·morestack(SB)
diff --git a/src/runtime/asm_riscv64.s b/src/runtime/asm_riscv64.s
index 00caa9f..31b81ae 100644
--- a/src/runtime/asm_riscv64.s
+++ b/src/runtime/asm_riscv64.s
@@ -158,8 +158,8 @@
*/
// Called during function prolog when more stack is needed.
-// Caller has already loaded:
-// R1: framesize, R2: argsize, R3: LR
+// Called with return address (i.e. caller's PC) in X5 (aka T0),
+// and the LR register contains the caller's LR.
//
// The traceback routines see morestack on a g0 as being
// the top of a stack (for example, morestack calling newstack
@@ -209,6 +209,13 @@
// func morestack_noctxt()
TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
+ // Force SPWRITE. This function doesn't actually write SP,
+ // but it is called with a special calling convention where
+ // the caller doesn't save LR on stack but passes it as a
+ // register, and the unwinder currently doesn't understand.
+ // Make it SPWRITE to stop unwinding. (See issue 54332)
+ MOV X2, X2
+
MOV ZERO, CTXT
JMP runtime·morestack(SB)
@@ -261,11 +268,7 @@
// func mcall(fn func(*g))
TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-8
-#ifdef GOEXPERIMENT_regabiargs
MOV X10, CTXT
-#else
- MOV fn+0(FP), CTXT
-#endif
// Save caller state in g->sched
MOV X2, (g_sched+gobuf_sp)(g)
@@ -637,7 +640,6 @@
MOV T0, ret+0(FP)
RET
-#ifdef GOEXPERIMENT_regabiargs
// spillArgs stores return values from registers to a *internal/abi.RegArgs in X25.
TEXT ·spillArgs(SB),NOSPLIT,$0-0
MOV X10, (0*8)(X25)
@@ -709,13 +711,6 @@
MOVD (30*8)(X25), F22
MOVD (31*8)(X25), F23
RET
-#else
-TEXT ·spillArgs(SB),NOSPLIT,$0-0
- RET
-
-TEXT ·unspillArgs(SB),NOSPLIT,$0-0
- RET
-#endif
// gcWriteBarrier performs a heap pointer write and informs the GC.
//
@@ -825,157 +820,72 @@
// corresponding runtime handler.
// The tail call makes these stubs disappear in backtraces.
TEXT runtime·panicIndex<ABIInternal>(SB),NOSPLIT,$0-16
-#ifdef GOEXPERIMENT_regabiargs
MOV T0, X10
MOV T1, X11
-#else
- MOV T0, x+0(FP)
- MOV T1, y+8(FP)
-#endif
JMP runtime·goPanicIndex<ABIInternal>(SB)
TEXT runtime·panicIndexU<ABIInternal>(SB),NOSPLIT,$0-16
-#ifdef GOEXPERIMENT_regabiargs
MOV T0, X10
MOV T1, X11
-#else
- MOV T0, x+0(FP)
- MOV T1, y+8(FP)
-#endif
JMP runtime·goPanicIndexU<ABIInternal>(SB)
TEXT runtime·panicSliceAlen<ABIInternal>(SB),NOSPLIT,$0-16
-#ifdef GOEXPERIMENT_regabiargs
MOV T1, X10
MOV T2, X11
-#else
- MOV T1, x+0(FP)
- MOV T2, y+8(FP)
-#endif
JMP runtime·goPanicSliceAlen<ABIInternal>(SB)
TEXT runtime·panicSliceAlenU<ABIInternal>(SB),NOSPLIT,$0-16
-#ifdef GOEXPERIMENT_regabiargs
MOV T1, X10
MOV T2, X11
-#else
- MOV T1, x+0(FP)
- MOV T2, y+8(FP)
-#endif
JMP runtime·goPanicSliceAlenU<ABIInternal>(SB)
TEXT runtime·panicSliceAcap<ABIInternal>(SB),NOSPLIT,$0-16
-#ifdef GOEXPERIMENT_regabiargs
MOV T1, X10
MOV T2, X11
-#else
- MOV T1, x+0(FP)
- MOV T2, y+8(FP)
-#endif
JMP runtime·goPanicSliceAcap<ABIInternal>(SB)
TEXT runtime·panicSliceAcapU<ABIInternal>(SB),NOSPLIT,$0-16
-#ifdef GOEXPERIMENT_regabiargs
MOV T1, X10
MOV T2, X11
-#else
- MOV T1, x+0(FP)
- MOV T2, y+8(FP)
-#endif
JMP runtime·goPanicSliceAcapU<ABIInternal>(SB)
TEXT runtime·panicSliceB<ABIInternal>(SB),NOSPLIT,$0-16
-#ifdef GOEXPERIMENT_regabiargs
MOV T0, X10
MOV T1, X11
-#else
- MOV T0, x+0(FP)
- MOV T1, y+8(FP)
-#endif
JMP runtime·goPanicSliceB<ABIInternal>(SB)
TEXT runtime·panicSliceBU<ABIInternal>(SB),NOSPLIT,$0-16
-#ifdef GOEXPERIMENT_regabiargs
MOV T0, X10
MOV T1, X11
-#else
- MOV T0, x+0(FP)
- MOV T1, y+8(FP)
-#endif
JMP runtime·goPanicSliceBU<ABIInternal>(SB)
TEXT runtime·panicSlice3Alen<ABIInternal>(SB),NOSPLIT,$0-16
-#ifdef GOEXPERIMENT_regabiargs
MOV T2, X10
MOV T3, X11
-#else
- MOV T2, x+0(FP)
- MOV T3, y+8(FP)
-#endif
JMP runtime·goPanicSlice3Alen<ABIInternal>(SB)
TEXT runtime·panicSlice3AlenU<ABIInternal>(SB),NOSPLIT,$0-16
-#ifdef GOEXPERIMENT_regabiargs
MOV T2, X10
MOV T3, X11
-#else
- MOV T2, x+0(FP)
- MOV T3, y+8(FP)
-#endif
JMP runtime·goPanicSlice3AlenU<ABIInternal>(SB)
TEXT runtime·panicSlice3Acap<ABIInternal>(SB),NOSPLIT,$0-16
-#ifdef GOEXPERIMENT_regabiargs
MOV T2, X10
MOV T3, X11
-#else
- MOV T2, x+0(FP)
- MOV T3, y+8(FP)
-#endif
JMP runtime·goPanicSlice3Acap<ABIInternal>(SB)
TEXT runtime·panicSlice3AcapU<ABIInternal>(SB),NOSPLIT,$0-16
-#ifdef GOEXPERIMENT_regabiargs
MOV T2, X10
MOV T3, X11
-#else
- MOV T2, x+0(FP)
- MOV T3, y+8(FP)
-#endif
JMP runtime·goPanicSlice3AcapU<ABIInternal>(SB)
TEXT runtime·panicSlice3B<ABIInternal>(SB),NOSPLIT,$0-16
-#ifdef GOEXPERIMENT_regabiargs
MOV T1, X10
MOV T2, X11
-#else
- MOV T1, x+0(FP)
- MOV T2, y+8(FP)
-#endif
JMP runtime·goPanicSlice3B<ABIInternal>(SB)
TEXT runtime·panicSlice3BU<ABIInternal>(SB),NOSPLIT,$0-16
-#ifdef GOEXPERIMENT_regabiargs
MOV T1, X10
MOV T2, X11
-#else
- MOV T1, x+0(FP)
- MOV T2, y+8(FP)
-#endif
JMP runtime·goPanicSlice3BU<ABIInternal>(SB)
TEXT runtime·panicSlice3C<ABIInternal>(SB),NOSPLIT,$0-16
-#ifdef GOEXPERIMENT_regabiargs
MOV T0, X10
MOV T1, X11
-#else
- MOV T0, x+0(FP)
- MOV T1, y+8(FP)
-#endif
JMP runtime·goPanicSlice3C<ABIInternal>(SB)
TEXT runtime·panicSlice3CU<ABIInternal>(SB),NOSPLIT,$0-16
-#ifdef GOEXPERIMENT_regabiargs
MOV T0, X10
MOV T1, X11
-#else
- MOV T0, x+0(FP)
- MOV T1, y+8(FP)
-#endif
JMP runtime·goPanicSlice3CU<ABIInternal>(SB)
TEXT runtime·panicSliceConvert<ABIInternal>(SB),NOSPLIT,$0-16
-#ifdef GOEXPERIMENT_regabiargs
MOV T2, X10
MOV T3, X11
-#else
- MOV T2, x+0(FP)
- MOV T3, y+8(FP)
-#endif
JMP runtime·goPanicSliceConvert<ABIInternal>(SB)
DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
diff --git a/src/runtime/asm_s390x.s b/src/runtime/asm_s390x.s
index 9159a67..334e1aa 100644
--- a/src/runtime/asm_s390x.s
+++ b/src/runtime/asm_s390x.s
@@ -346,6 +346,13 @@
UNDEF
TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
+ // Force SPWRITE. This function doesn't actually write SP,
+ // but it is called with a special calling convention where
+ // the caller doesn't save LR on stack but passes it as a
+ // register (R5), and the unwinder currently doesn't understand.
+ // Make it SPWRITE to stop unwinding. (See issue 54332)
+ MOVD R15, R15
+
MOVD $0, R12
BR runtime·morestack(SB)
diff --git a/src/runtime/asm_wasm.s b/src/runtime/asm_wasm.s
index d885da6..e075c72 100644
--- a/src/runtime/asm_wasm.s
+++ b/src/runtime/asm_wasm.s
@@ -320,10 +320,8 @@
I64Load stackArgs+16(FP); \
I32WrapI64; \
I64Load stackArgsSize+24(FP); \
- I64Const $3; \
- I64ShrU; \
I32WrapI64; \
- Call runtime·wasmMove(SB); \
+ MemoryCopy; \
End; \
\
MOVD f+8(FP), CTXT; \
diff --git a/src/runtime/atomic_pointer.go b/src/runtime/atomic_pointer.go
index b8f0c22..25e0e65 100644
--- a/src/runtime/atomic_pointer.go
+++ b/src/runtime/atomic_pointer.go
@@ -35,6 +35,27 @@
atomic.StorepNoWB(noescape(ptr), new)
}
+// atomic_storePointer is the implementation of runtime/internal/UnsafePointer.Store
+// (like StoreNoWB but with the write barrier).
+//
+//go:nosplit
+//go:linkname atomic_storePointer runtime/internal/atomic.storePointer
+func atomic_storePointer(ptr *unsafe.Pointer, new unsafe.Pointer) {
+ atomicstorep(unsafe.Pointer(ptr), new)
+}
+
+// atomic_casPointer is the implementation of runtime/internal/UnsafePointer.CompareAndSwap
+// (like CompareAndSwapNoWB but with the write barrier).
+//
+//go:nosplit
+//go:linkname atomic_casPointer runtime/internal/atomic.casPointer
+func atomic_casPointer(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool {
+ if writeBarrier.enabled {
+ atomicwb(ptr, new)
+ }
+ return atomic.Casp1(ptr, old, new)
+}
+
// Like above, but implement in terms of sync/atomic's uintptr operations.
// We cannot just call the runtime routines, because the race detector expects
// to be able to intercept the sync/atomic forms but not the runtime forms.
diff --git a/src/runtime/cgo/cgo.go b/src/runtime/cgo/cgo.go
index 298aa63..b8473e5 100644
--- a/src/runtime/cgo/cgo.go
+++ b/src/runtime/cgo/cgo.go
@@ -23,9 +23,18 @@
#cgo solaris LDFLAGS: -lxnet
#cgo solaris LDFLAGS: -lsocket
-#cgo CFLAGS: -Wall -Werror
+// We use -fno-stack-protector because internal linking won't find
+// the support functions. See issues #52919 and #54313.
+#cgo CFLAGS: -Wall -Werror -fno-stack-protector
#cgo solaris CPPFLAGS: -D_POSIX_PTHREAD_SEMANTICS
*/
import "C"
+
+import "runtime/internal/sys"
+
+// Incomplete is used specifically for the semantics of incomplete C types.
+type Incomplete struct {
+ _ sys.NotInHeap
+}
diff --git a/src/runtime/cgo/gcc_386.S b/src/runtime/cgo/gcc_386.S
index ff55b2c..5e6d715 100644
--- a/src/runtime/cgo/gcc_386.S
+++ b/src/runtime/cgo/gcc_386.S
@@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+.file "gcc_386.S"
+
/*
* Apple still insists on underscore prefixes for C function names.
*/
diff --git a/src/runtime/cgo/gcc_aix_ppc64.S b/src/runtime/cgo/gcc_aix_ppc64.S
index a00fae2..a77363e 100644
--- a/src/runtime/cgo/gcc_aix_ppc64.S
+++ b/src/runtime/cgo/gcc_aix_ppc64.S
@@ -5,6 +5,8 @@
// +build ppc64
// +build aix
+.file "gcc_aix_ppc64.S"
+
/*
* void crosscall_ppc64(void (*fn)(void), void *g)
*
diff --git a/src/runtime/cgo/gcc_amd64.S b/src/runtime/cgo/gcc_amd64.S
index 46699d1..5a1629e 100644
--- a/src/runtime/cgo/gcc_amd64.S
+++ b/src/runtime/cgo/gcc_amd64.S
@@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+.file "gcc_amd64.S"
+
/*
* Apple still insists on underscore prefixes for C function names.
*/
diff --git a/src/runtime/cgo/gcc_arm.S b/src/runtime/cgo/gcc_arm.S
index fe1c48b..6e8c14a 100644
--- a/src/runtime/cgo/gcc_arm.S
+++ b/src/runtime/cgo/gcc_arm.S
@@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+.file "gcc_arm.S"
+
/*
* Apple still insists on underscore prefixes for C function names.
*/
diff --git a/src/runtime/cgo/gcc_arm64.S b/src/runtime/cgo/gcc_arm64.S
index 9154d2a..865f67c 100644
--- a/src/runtime/cgo/gcc_arm64.S
+++ b/src/runtime/cgo/gcc_arm64.S
@@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+.file "gcc_arm64.S"
+
/*
* Apple still insists on underscore prefixes for C function names.
*/
diff --git a/src/runtime/cgo/gcc_darwin_amd64.c b/src/runtime/cgo/gcc_darwin_amd64.c
index d5b7fd8..955b81d 100644
--- a/src/runtime/cgo/gcc_darwin_amd64.c
+++ b/src/runtime/cgo/gcc_darwin_amd64.c
@@ -14,15 +14,12 @@
void
x_cgo_init(G *g, void (*setg)(void*), void **tlsg, void **tlsbase)
{
- pthread_attr_t attr;
size_t size;
setg_gcc = setg;
- pthread_attr_init(&attr);
- pthread_attr_getstacksize(&attr, &size);
- g->stacklo = (uintptr)&attr - size + 4096;
- pthread_attr_destroy(&attr);
+ size = pthread_get_stacksize_np(pthread_self());
+ g->stacklo = (uintptr)&size - size + 4096;
}
@@ -38,8 +35,9 @@
sigfillset(&ign);
pthread_sigmask(SIG_SETMASK, &ign, &oset);
+ size = pthread_get_stacksize_np(pthread_self());
pthread_attr_init(&attr);
- pthread_attr_getstacksize(&attr, &size);
+ pthread_attr_setstacksize(&attr, size);
// Leave stacklo=0 and set stackhi=size; mstart will do the rest.
ts->g->stackhi = size;
err = _cgo_try_pthread_create(&p, &attr, threadentry, ts);
diff --git a/src/runtime/cgo/gcc_darwin_arm64.c b/src/runtime/cgo/gcc_darwin_arm64.c
index 24be675..5b77a42 100644
--- a/src/runtime/cgo/gcc_darwin_arm64.c
+++ b/src/runtime/cgo/gcc_darwin_arm64.c
@@ -36,8 +36,9 @@
sigfillset(&ign);
pthread_sigmask(SIG_SETMASK, &ign, &oset);
+ size = pthread_get_stacksize_np(pthread_self());
pthread_attr_init(&attr);
- pthread_attr_getstacksize(&attr, &size);
+ pthread_attr_setstacksize(&attr, size);
// Leave stacklo=0 and set stackhi=size; mstart will do the rest.
ts->g->stackhi = size;
err = _cgo_try_pthread_create(&p, &attr, threadentry, ts);
@@ -126,15 +127,12 @@
void
x_cgo_init(G *g, void (*setg)(void*))
{
- pthread_attr_t attr;
size_t size;
//fprintf(stderr, "x_cgo_init = %p\n", &x_cgo_init); // aid debugging in presence of ASLR
setg_gcc = setg;
- pthread_attr_init(&attr);
- pthread_attr_getstacksize(&attr, &size);
- g->stacklo = (uintptr)&attr - size + 4096;
- pthread_attr_destroy(&attr);
+ size = pthread_get_stacksize_np(pthread_self());
+ g->stacklo = (uintptr)&size - size + 4096;
#if TARGET_OS_IPHONE
darwin_arm_init_mach_exception_handler();
diff --git a/src/runtime/cgo/gcc_freebsd_riscv64.c b/src/runtime/cgo/gcc_freebsd_riscv64.c
new file mode 100644
index 0000000..6ce5e65
--- /dev/null
+++ b/src/runtime/cgo/gcc_freebsd_riscv64.c
@@ -0,0 +1,67 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <sys/types.h>
+#include <errno.h>
+#include <sys/signalvar.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include "libcgo.h"
+#include "libcgo_unix.h"
+
+static void* threadentry(void*);
+static void (*setg_gcc)(void*);
+
+void
+x_cgo_init(G *g, void (*setg)(void*))
+{
+ pthread_attr_t attr;
+ size_t size;
+
+ setg_gcc = setg;
+ pthread_attr_init(&attr);
+ pthread_attr_getstacksize(&attr, &size);
+ g->stacklo = (uintptr)&attr - size + 4096;
+ pthread_attr_destroy(&attr);
+}
+
+void
+_cgo_sys_thread_start(ThreadStart *ts)
+{
+ pthread_attr_t attr;
+ sigset_t ign, oset;
+ pthread_t p;
+ size_t size;
+ int err;
+
+ SIGFILLSET(ign);
+ pthread_sigmask(SIG_SETMASK, &ign, &oset);
+
+ pthread_attr_init(&attr);
+ pthread_attr_getstacksize(&attr, &size);
+ // Leave stacklo=0 and set stackhi=size; mstart will do the rest.
+ ts->g->stackhi = size;
+ err = _cgo_try_pthread_create(&p, &attr, threadentry, ts);
+
+ pthread_sigmask(SIG_SETMASK, &oset, nil);
+
+ if (err != 0) {
+ fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err));
+ abort();
+ }
+}
+
+extern void crosscall1(void (*fn)(void), void (*setg_gcc)(void*), void *g);
+static void*
+threadentry(void *v)
+{
+ ThreadStart ts;
+
+ ts = *(ThreadStart*)v;
+ free(v);
+
+ crosscall1(ts.fn, setg_gcc, (void*)ts.g);
+ return nil;
+}
diff --git a/src/runtime/cgo/gcc_linux_ppc64x.S b/src/runtime/cgo/gcc_linux_ppc64x.S
index 595eb38..957ef3a 100644
--- a/src/runtime/cgo/gcc_linux_ppc64x.S
+++ b/src/runtime/cgo/gcc_linux_ppc64x.S
@@ -5,6 +5,8 @@
// +build ppc64 ppc64le
// +build linux
+.file "gcc_linux_ppc64x.S"
+
/*
* Apple still insists on underscore prefixes for C function names.
*/
diff --git a/src/runtime/cgo/gcc_loong64.S b/src/runtime/cgo/gcc_loong64.S
index 100aa33..6b7668f 100644
--- a/src/runtime/cgo/gcc_loong64.S
+++ b/src/runtime/cgo/gcc_loong64.S
@@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+.file "gcc_loong64.S"
+
/*
* void crosscall1(void (*fn)(void), void (*setg_gcc)(void *g), void *g)
*
diff --git a/src/runtime/cgo/gcc_mips64x.S b/src/runtime/cgo/gcc_mips64x.S
index 908dd21..ec24d71 100644
--- a/src/runtime/cgo/gcc_mips64x.S
+++ b/src/runtime/cgo/gcc_mips64x.S
@@ -4,6 +4,8 @@
// +build mips64 mips64le
+.file "gcc_mips64x.S"
+
/*
* void crosscall1(void (*fn)(void), void (*setg_gcc)(void *g), void *g)
*
diff --git a/src/runtime/cgo/gcc_mipsx.S b/src/runtime/cgo/gcc_mipsx.S
index 54f4b82..2867f6a 100644
--- a/src/runtime/cgo/gcc_mipsx.S
+++ b/src/runtime/cgo/gcc_mipsx.S
@@ -4,6 +4,8 @@
// +build mips mipsle
+.file "gcc_mipsx.S"
+
/*
* void crosscall1(void (*fn)(void), void (*setg_gcc)(void *g), void *g)
*
diff --git a/src/runtime/cgo/gcc_mmap.c b/src/runtime/cgo/gcc_mmap.c
index 698a7e3..83d857f 100644
--- a/src/runtime/cgo/gcc_mmap.c
+++ b/src/runtime/cgo/gcc_mmap.c
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build linux,amd64 linux,arm64 linux,ppc64le
+// +build linux,amd64 linux,arm64 linux,ppc64le freebsd,amd64
#include <errno.h>
#include <stdint.h>
diff --git a/src/runtime/cgo/gcc_riscv64.S b/src/runtime/cgo/gcc_riscv64.S
index f429dc6..8f07649 100644
--- a/src/runtime/cgo/gcc_riscv64.S
+++ b/src/runtime/cgo/gcc_riscv64.S
@@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+.file "gcc_riscv64.S"
+
/*
* void crosscall1(void (*fn)(void), void (*setg_gcc)(void *g), void *g)
*
diff --git a/src/runtime/cgo/gcc_s390x.S b/src/runtime/cgo/gcc_s390x.S
index 614de4b..8bd30fe 100644
--- a/src/runtime/cgo/gcc_s390x.S
+++ b/src/runtime/cgo/gcc_s390x.S
@@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+.file "gcc_s390x.S"
+
/*
* void crosscall_s390x(void (*fn)(void), void *g)
*
diff --git a/src/runtime/cgo/gcc_windows_amd64.c b/src/runtime/cgo/gcc_windows_amd64.c
index 996947e..3ff3c64 100644
--- a/src/runtime/cgo/gcc_windows_amd64.c
+++ b/src/runtime/cgo/gcc_windows_amd64.c
@@ -13,11 +13,13 @@
static void threadentry(void*);
static void (*setg_gcc)(void*);
+static DWORD *tls_g;
void
x_cgo_init(G *g, void (*setg)(void*), void **tlsg, void **tlsbase)
{
setg_gcc = setg;
+ tls_g = (DWORD *)tlsg;
}
@@ -41,8 +43,8 @@
* Set specific keys in thread local storage.
*/
asm volatile (
- "movq %0, %%gs:0x28\n" // MOVL tls0, 0x28(GS)
- :: "r"(ts.tls)
+ "movq %0, %%gs:0(%1)\n" // MOVL tls0, 0(tls_g)(GS)
+ :: "r"(ts.tls), "r"(*tls_g)
);
crosscall_amd64(ts.fn, setg_gcc, (void*)ts.g);
diff --git a/src/runtime/cgo/mmap.go b/src/runtime/cgo/mmap.go
index eae0a9e..2f7e83b 100644
--- a/src/runtime/cgo/mmap.go
+++ b/src/runtime/cgo/mmap.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build (linux && amd64) || (linux && arm64)
+//go:build (linux && amd64) || (linux && arm64) || (freebsd && amd64)
package cgo
diff --git a/src/runtime/cgo_mmap.go b/src/runtime/cgo_mmap.go
index 4cb3e65..30660f7 100644
--- a/src/runtime/cgo_mmap.go
+++ b/src/runtime/cgo_mmap.go
@@ -4,7 +4,7 @@
// Support for memory sanitizer. See runtime/cgo/mmap.go.
-//go:build (linux && amd64) || (linux && arm64)
+//go:build (linux && amd64) || (linux && arm64) || (freebsd && amd64)
package runtime
diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go
index 892654e..9c75280 100644
--- a/src/runtime/cgocall.go
+++ b/src/runtime/cgocall.go
@@ -86,7 +86,6 @@
import (
"internal/goarch"
- "runtime/internal/atomic"
"runtime/internal/sys"
"unsafe"
)
@@ -259,7 +258,7 @@
// We must still stay on the same m.
defer unlockOSThread()
- if gp.m.needextram || atomic.Load(&extraMWaiters) > 0 {
+ if gp.m.needextram || extraMWaiters.Load() > 0 {
gp.m.needextram = false
systemstack(newextram)
}
@@ -347,12 +346,12 @@
}
}
-// called from assembly
+// called from assembly.
func badcgocallback() {
throw("misaligned stack in cgocallback")
}
-// called from (incomplete) assembly
+// called from (incomplete) assembly.
func cgounimpl() {
throw("cgo not implemented")
}
@@ -568,17 +567,16 @@
if base == 0 {
return
}
- hbits := heapBitsForAddr(base)
n := span.elemsize
- for i = uintptr(0); i < n; i += goarch.PtrSize {
- if !hbits.morePointers() {
- // No more possible pointers.
+ hbits := heapBitsForAddr(base, n)
+ for {
+ var addr uintptr
+ if hbits, addr = hbits.next(); addr == 0 {
break
}
- if hbits.isPointer() && cgoIsGoPointer(*(*unsafe.Pointer)(unsafe.Pointer(base + i))) {
+ if cgoIsGoPointer(*(*unsafe.Pointer)(unsafe.Pointer(addr))) {
panic(errorString(msg))
}
- hbits = hbits.next()
}
return
diff --git a/src/runtime/cgocheck.go b/src/runtime/cgocheck.go
index 74a2ec0..84e7516 100644
--- a/src/runtime/cgocheck.go
+++ b/src/runtime/cgocheck.go
@@ -32,14 +32,14 @@
// If we are running on the system stack then dst might be an
// address on the stack, which is OK.
- g := getg()
- if g == g.m.g0 || g == g.m.gsignal {
+ gp := getg()
+ if gp == gp.m.g0 || gp == gp.m.gsignal {
return
}
// Allocating memory can write to various mfixalloc structs
// that look like they are non-Go memory.
- if g.m.mallocing != 0 {
+ if gp.m.mallocing != 0 {
return
}
@@ -153,16 +153,16 @@
// src must be in the regular heap.
- hbits := heapBitsForAddr(uintptr(src))
- for i := uintptr(0); i < off+size; i += goarch.PtrSize {
- bits := hbits.bits()
- if i >= off && bits&bitPointer != 0 {
- v := *(*unsafe.Pointer)(add(src, i))
- if cgoIsGoPointer(v) {
- throw(cgoWriteBarrierFail)
- }
+ hbits := heapBitsForAddr(uintptr(src), size)
+ for {
+ var addr uintptr
+ if hbits, addr = hbits.next(); addr == 0 {
+ break
}
- hbits = hbits.next()
+ v := *(*unsafe.Pointer)(unsafe.Pointer(addr))
+ if cgoIsGoPointer(v) {
+ throw(cgoWriteBarrierFail)
+ }
}
}
diff --git a/src/runtime/chan.go b/src/runtime/chan.go
index ca516ad..6a0ad35 100644
--- a/src/runtime/chan.go
+++ b/src/runtime/chan.go
@@ -138,7 +138,7 @@
return c.qcount == c.dataqsiz
}
-// entry point for c <- x from compiled code
+// entry point for c <- x from compiled code.
//
//go:nosplit
func chansend1(c *hchan, elem unsafe.Pointer) {
@@ -255,7 +255,7 @@
// to park on a channel. The window between when this G's status
// changes and when we set gp.activeStackChans is not safe for
// stack shrinking.
- atomic.Store8(&gp.parkingOnChan, 1)
+ gp.parkingOnChan.Store(true)
gopark(chanparkcommit, unsafe.Pointer(&c.lock), waitReasonChanSend, traceEvGoBlockSend, 2)
// Ensure the value being sent is kept alive until the
// receiver copies it out. The sudog has a pointer to the
@@ -435,7 +435,7 @@
return atomic.Loaduint(&c.qcount) == 0
}
-// entry points for <- c from compiled code
+// entry points for <- c from compiled code.
//
//go:nosplit
func chanrecv1(c *hchan, elem unsafe.Pointer) {
@@ -579,7 +579,7 @@
// to park on a channel. The window between when this G's status
// changes and when we set gp.activeStackChans is not safe for
// stack shrinking.
- atomic.Store8(&gp.parkingOnChan, 1)
+ gp.parkingOnChan.Store(true)
gopark(chanparkcommit, unsafe.Pointer(&c.lock), waitReasonChanReceive, traceEvGoBlockRecv, 2)
// someone woke us up
@@ -664,7 +664,7 @@
// Mark that it's safe for stack shrinking to occur now,
// because any thread acquiring this G's stack for shrinking
// is guaranteed to observe activeStackChans after this store.
- atomic.Store8(&gp.parkingOnChan, 0)
+ gp.parkingOnChan.Store(false)
// Make sure we unlock after setting activeStackChans and
// unsetting parkingOnChan. The moment we unlock chanLock
// we risk gp getting readied by a channel operation and
@@ -791,7 +791,7 @@
// We use a flag in the G struct to tell us when someone
// else has won the race to signal this goroutine but the goroutine
// hasn't removed itself from the queue yet.
- if sgp.isSelect && !atomic.Cas(&sgp.g.selectDone, 0, 1) {
+ if sgp.isSelect && !sgp.g.selectDone.CompareAndSwap(0, 1) {
continue
}
diff --git a/src/runtime/checkptr_test.go b/src/runtime/checkptr_test.go
index 15011ec..811c0f0 100644
--- a/src/runtime/checkptr_test.go
+++ b/src/runtime/checkptr_test.go
@@ -39,6 +39,8 @@
{"CheckPtrSmall", "fatal error: checkptr: pointer arithmetic computed bad pointer value\n"},
{"CheckPtrSliceOK", ""},
{"CheckPtrSliceFail", "fatal error: checkptr: unsafe.Slice result straddles multiple allocations\n"},
+ {"CheckPtrStringOK", ""},
+ {"CheckPtrStringFail", "fatal error: checkptr: unsafe.String result straddles multiple allocations\n"},
}
for _, tc := range testCases {
diff --git a/src/runtime/coverage/apis.go b/src/runtime/coverage/apis.go
new file mode 100644
index 0000000..7d851f9
--- /dev/null
+++ b/src/runtime/coverage/apis.go
@@ -0,0 +1,178 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package coverage
+
+import (
+ "fmt"
+ "internal/coverage"
+ "io"
+ "reflect"
+ "sync/atomic"
+ "unsafe"
+)
+
+// WriteMetaDir writes a coverage meta-data file for the currently
+// running program to the directory specified in 'dir'. An error will
+// be returned if the operation can't be completed successfully (for
+// example, if the currently running program was not built with
+// "-cover", or if the directory does not exist).
+func WriteMetaDir(dir string) error {
+ if !finalHashComputed {
+ return fmt.Errorf("error: no meta-data available (binary not built with -cover?)")
+ }
+ return emitMetaDataToDirectory(dir, getCovMetaList())
+}
+
+// WriteMeta writes the meta-data content (the payload that would
+// normally be emitted to a meta-data file) for the currently running
+// program to the the writer 'w'. An error will be returned if the
+// operation can't be completed successfully (for example, if the
+// currently running program was not built with "-cover", or if a
+// write fails).
+func WriteMeta(w io.Writer) error {
+ if w == nil {
+ return fmt.Errorf("error: nil writer in WriteMeta")
+ }
+ if !finalHashComputed {
+ return fmt.Errorf("error: no meta-data available (binary not built with -cover?)")
+ }
+ ml := getCovMetaList()
+ return writeMetaData(w, ml, cmode, cgran, finalHash)
+}
+
+// WriteCountersDir writes a coverage counter-data file for the
+// currently running program to the directory specified in 'dir'. An
+// error will be returned if the operation can't be completed
+// successfully (for example, if the currently running program was not
+// built with "-cover", or if the directory does not exist). The
+// counter data written will be a snapshot taken at the point of the
+// call.
+func WriteCountersDir(dir string) error {
+ return emitCounterDataToDirectory(dir)
+}
+
+// WriteCounters writes coverage counter-data content for
+// the currently running program to the writer 'w'. An error will be
+// returned if the operation can't be completed successfully (for
+// example, if the currently running program was not built with
+// "-cover", or if a write fails). The counter data written will be a
+// snapshot taken at the point of the invocation.
+func WriteCounters(w io.Writer) error {
+ if w == nil {
+ return fmt.Errorf("error: nil writer in WriteCounters")
+ }
+ // Ask the runtime for the list of coverage counter symbols.
+ cl := getCovCounterList()
+ if len(cl) == 0 {
+ return fmt.Errorf("program not built with -cover")
+ }
+ if !finalHashComputed {
+ return fmt.Errorf("meta-data not written yet, unable to write counter data")
+ }
+
+ pm := getCovPkgMap()
+ s := &emitState{
+ counterlist: cl,
+ pkgmap: pm,
+ }
+ return s.emitCounterDataToWriter(w)
+}
+
+// ClearCounters clears/resets all coverage counter variables in the
+// currently running program. It returns an error if the program in
+// question was not built with the "-cover" flag. Clearing of coverage
+// counters is also not supported for programs not using atomic
+// counter mode (see more detailed comments below for the rationale
+// here).
+func ClearCounters() error {
+ cl := getCovCounterList()
+ if len(cl) == 0 {
+ return fmt.Errorf("program not built with -cover")
+ }
+ if cmode != coverage.CtrModeAtomic {
+ return fmt.Errorf("ClearCounters invoked for program build with -covermode=%s (please use -covermode=atomic)", cmode.String())
+ }
+
+ // Implementation note: this function would be faster and simpler
+ // if we could just zero out the entire counter array, but for the
+ // moment we go through and zero out just the slots in the array
+ // corresponding to the counter values. We do this to avoid the
+ // following bad scenario: suppose that a user builds their Go
+ // program with "-cover", and that program has a function (call it
+ // main.XYZ) that invokes ClearCounters:
+ //
+ // func XYZ() {
+ // ... do some stuff ...
+ // coverage.ClearCounters()
+ // if someCondition { <<--- HERE
+ // ...
+ // }
+ // }
+ //
+ // At the point where ClearCounters executes, main.XYZ has not yet
+ // finished running, thus as soon as the call returns the line
+ // marked "HERE" above will trigger the writing of a non-zero
+ // value into main.XYZ's counter slab. However since we've just
+ // finished clearing the entire counter segment, we will have lost
+ // the values in the prolog portion of main.XYZ's counter slab
+ // (nctrs, pkgid, funcid). This means that later on at the end of
+ // program execution as we walk through the entire counter array
+ // for the program looking for executed functions, we'll zoom past
+ // main.XYZ's prolog (which was zero'd) and hit the non-zero
+ // counter value corresponding to the "HERE" block, which will
+ // then be interpreted as the start of another live function.
+ // Things will go downhill from there.
+ //
+ // This same scenario is also a potential risk if the program is
+ // running on an architecture that permits reordering of
+ // writes/stores, since the inconsistency described above could
+ // arise here. Example scenario:
+ //
+ // func ABC() {
+ // ... // prolog
+ // if alwaysTrue() {
+ // XYZ() // counter update here
+ // }
+ // }
+ //
+ // In the instrumented version of ABC, the prolog of the function
+ // will contain a series of stores to the initial portion of the
+ // counter array to write number-of-counters, pkgid, funcid. Later
+ // in the function there is also a store to increment a counter
+ // for the block containing the call to XYZ(). If the CPU is
+ // allowed to reorder stores and decides to issue the XYZ store
+ // before the prolog stores, this could be observable as an
+ // inconsistency similar to the one above. Hence the requirement
+ // for atomic counter mode: according to package atomic docs,
+ // "...operations that happen in a specific order on one thread,
+ // will always be observed to happen in exactly that order by
+ // another thread". Thus we can be sure that there will be no
+ // inconsistency when reading the counter array from the thread
+ // running ClearCounters.
+
+ var sd []atomic.Uint32
+
+ bufHdr := (*reflect.SliceHeader)(unsafe.Pointer(&sd))
+ for _, c := range cl {
+ bufHdr.Data = uintptr(unsafe.Pointer(c.Counters))
+ bufHdr.Len = int(c.Len)
+ bufHdr.Cap = int(c.Len)
+ for i := 0; i < len(sd); i++ {
+ // Skip ahead until the next non-zero value.
+ sdi := sd[i].Load()
+ if sdi == 0 {
+ continue
+ }
+ // We found a function that was executed; clear its counters.
+ nCtrs := sdi
+ for j := 0; j < int(nCtrs); j++ {
+ sd[i+coverage.FirstCtrOffset+j].Store(0)
+ }
+ // Move to next function.
+ i += coverage.FirstCtrOffset + int(nCtrs) - 1
+ }
+ }
+ return nil
+}
diff --git a/src/runtime/coverage/dummy.s b/src/runtime/coverage/dummy.s
new file mode 100644
index 0000000..7592859
--- /dev/null
+++ b/src/runtime/coverage/dummy.s
@@ -0,0 +1,8 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The runtime package uses //go:linkname to push a few functions into this
+// package but we still need a .s file so the Go tool does not pass -complete
+// to 'go tool compile' so the latter does not complain about Go functions
+// with no bodies.
diff --git a/src/runtime/coverage/emit.go b/src/runtime/coverage/emit.go
new file mode 100644
index 0000000..2aed99c
--- /dev/null
+++ b/src/runtime/coverage/emit.go
@@ -0,0 +1,667 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package coverage
+
+import (
+ "crypto/md5"
+ "fmt"
+ "internal/coverage"
+ "internal/coverage/encodecounter"
+ "internal/coverage/encodemeta"
+ "internal/coverage/rtcov"
+ "io"
+ "os"
+ "path/filepath"
+ "reflect"
+ "runtime"
+ "sync/atomic"
+ "time"
+ "unsafe"
+)
+
+// This file contains functions that support the writing of data files
+// emitted at the end of code coverage testing runs, from instrumented
+// executables.
+
+// getCovMetaList returns a list of meta-data blobs registered
+// for the currently executing instrumented program. It is defined in the
+// runtime.
+func getCovMetaList() []rtcov.CovMetaBlob
+
+// getCovCounterList returns a list of counter-data blobs registered
+// for the currently executing instrumented program. It is defined in the
+// runtime.
+func getCovCounterList() []rtcov.CovCounterBlob
+
+// getCovPkgMap returns a map storing the remapped package IDs for
+// hard-coded runtime packages (see internal/coverage/pkgid.go for
+// more on why hard-coded package IDs are needed). This function
+// is defined in the runtime.
+func getCovPkgMap() map[int]int
+
+// emitState holds useful state information during the emit process.
+//
+// When an instrumented program finishes execution and starts the
+// process of writing out coverage data, it's possible that an
+// existing meta-data file already exists in the output directory. In
+// this case openOutputFiles() below will leave the 'mf' field below
+// as nil. If a new meta-data file is needed, field 'mfname' will be
+// the final desired path of the meta file, 'mftmp' will be a
+// temporary file, and 'mf' will be an open os.File pointer for
+// 'mftmp'. The meta-data file payload will be written to 'mf', the
+// temp file will be then closed and renamed (from 'mftmp' to
+// 'mfname'), so as to insure that the meta-data file is created
+// atomically; we want this so that things work smoothly in cases
+// where there are several instances of a given instrumented program
+// all terminating at the same time and trying to create meta-data
+// files simultaneously.
+//
+// For counter data files there is less chance of a collision, hence
+// the openOutputFiles() stores the counter data file in 'cfname' and
+// then places the *io.File into 'cf'.
+type emitState struct {
+ mfname string // path of final meta-data output file
+ mftmp string // path to meta-data temp file (if needed)
+ mf *os.File // open os.File for meta-data temp file
+ cfname string // path of final counter data file
+ cftmp string // path to counter data temp file
+ cf *os.File // open os.File for counter data file
+ outdir string // output directory
+
+ // List of meta-data symbols obtained from the runtime
+ metalist []rtcov.CovMetaBlob
+
+ // List of counter-data symbols obtained from the runtime
+ counterlist []rtcov.CovCounterBlob
+
+ // Table to use for remapping hard-coded pkg ids.
+ pkgmap map[int]int
+
+ // emit debug trace output
+ debug bool
+}
+
+var (
+ // finalHash is computed at init time from the list of meta-data
+ // symbols registered during init. It is used both for writing the
+ // meta-data file and counter-data files.
+ finalHash [16]byte
+ // Set to true when we've computed finalHash + finalMetaLen.
+ finalHashComputed bool
+ // Total meta-data length.
+ finalMetaLen uint64
+ // Records whether we've already attempted to write meta-data.
+ metaDataEmitAttempted bool
+ // Counter mode for this instrumented program run.
+ cmode coverage.CounterMode
+ // Counter granularity for this instrumented program run.
+ cgran coverage.CounterGranularity
+ // Cached value of GOCOVERDIR environment variable.
+ goCoverDir string
+ // Copy of os.Args made at init time, converted into map format.
+ capturedOsArgs map[string]string
+ // Flag used in tests to signal that coverage data already written.
+ covProfileAlreadyEmitted bool
+)
+
+// fileType is used to select between counter-data files and
+// meta-data files.
+type fileType int
+
+const (
+ noFile = 1 << iota
+ metaDataFile
+ counterDataFile
+)
+
+// emitMetaData emits the meta-data output file for this coverage run.
+// This entry point is intended to be invoked by the compiler from
+// an instrumented program's main package init func.
+func emitMetaData() {
+ if covProfileAlreadyEmitted {
+ return
+ }
+ ml, err := prepareForMetaEmit()
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "error: coverage meta-data prep failed: %v\n", err)
+ if os.Getenv("GOCOVERDEBUG") != "" {
+ panic("meta-data write failure")
+ }
+ }
+ if len(ml) == 0 {
+ fmt.Fprintf(os.Stderr, "program not built with -cover\n")
+ return
+ }
+
+ goCoverDir = os.Getenv("GOCOVERDIR")
+ if goCoverDir == "" {
+ fmt.Fprintf(os.Stderr, "warning: GOCOVERDIR not set, no coverage data emitted\n")
+ return
+ }
+
+ if err := emitMetaDataToDirectory(goCoverDir, ml); err != nil {
+ fmt.Fprintf(os.Stderr, "error: coverage meta-data emit failed: %v\n", err)
+ if os.Getenv("GOCOVERDEBUG") != "" {
+ panic("meta-data write failure")
+ }
+ }
+}
+
+func modeClash(m coverage.CounterMode) bool {
+ if m == coverage.CtrModeRegOnly || m == coverage.CtrModeTestMain {
+ return false
+ }
+ if cmode == coverage.CtrModeInvalid {
+ cmode = m
+ return false
+ }
+ return cmode != m
+}
+
+func granClash(g coverage.CounterGranularity) bool {
+ if cgran == coverage.CtrGranularityInvalid {
+ cgran = g
+ return false
+ }
+ return cgran != g
+}
+
+// prepareForMetaEmit performs preparatory steps needed prior to
+// emitting a meta-data file, notably computing a final hash of
+// all meta-data blobs and capturing os args.
+func prepareForMetaEmit() ([]rtcov.CovMetaBlob, error) {
+ // Ask the runtime for the list of coverage meta-data symbols.
+ ml := getCovMetaList()
+
+ // In the normal case (go build -o prog.exe ... ; ./prog.exe)
+ // len(ml) will always be non-zero, but we check here since at
+ // some point this function will be reachable via user-callable
+ // APIs (for example, to write out coverage data from a server
+ // program that doesn't ever call os.Exit).
+ if len(ml) == 0 {
+ return nil, nil
+ }
+
+ s := &emitState{
+ metalist: ml,
+ debug: os.Getenv("GOCOVERDEBUG") != "",
+ }
+
+ // Capture os.Args() now so as to avoid issues if args
+ // are rewritten during program execution.
+ capturedOsArgs = captureOsArgs()
+
+ if s.debug {
+ fmt.Fprintf(os.Stderr, "=+= GOCOVERDIR is %s\n", os.Getenv("GOCOVERDIR"))
+ fmt.Fprintf(os.Stderr, "=+= contents of covmetalist:\n")
+ for k, b := range ml {
+ fmt.Fprintf(os.Stderr, "=+= slot: %d path: %s ", k, b.PkgPath)
+ if b.PkgID != -1 {
+ fmt.Fprintf(os.Stderr, " hcid: %d", b.PkgID)
+ }
+ fmt.Fprintf(os.Stderr, "\n")
+ }
+ pm := getCovPkgMap()
+ fmt.Fprintf(os.Stderr, "=+= remap table:\n")
+ for from, to := range pm {
+ fmt.Fprintf(os.Stderr, "=+= from %d to %d\n",
+ uint32(from), uint32(to))
+ }
+ }
+
+ h := md5.New()
+ tlen := uint64(unsafe.Sizeof(coverage.MetaFileHeader{}))
+ for _, entry := range ml {
+ if _, err := h.Write(entry.Hash[:]); err != nil {
+ return nil, err
+ }
+ tlen += uint64(entry.Len)
+ ecm := coverage.CounterMode(entry.CounterMode)
+ if modeClash(ecm) {
+ return nil, fmt.Errorf("coverage counter mode clash: package %s uses mode=%d, but package %s uses mode=%s\n", ml[0].PkgPath, cmode, entry.PkgPath, ecm)
+ }
+ ecg := coverage.CounterGranularity(entry.CounterGranularity)
+ if granClash(ecg) {
+ return nil, fmt.Errorf("coverage counter granularity clash: package %s uses gran=%d, but package %s uses gran=%s\n", ml[0].PkgPath, cgran, entry.PkgPath, ecg)
+ }
+ }
+
+ // Hash mode and granularity as well.
+ h.Write([]byte(cmode.String()))
+ h.Write([]byte(cgran.String()))
+
+ // Compute final digest.
+ fh := h.Sum(nil)
+ copy(finalHash[:], fh)
+ finalHashComputed = true
+ finalMetaLen = tlen
+
+ return ml, nil
+}
+
+// emitMetaData emits the meta-data output file to the specified
+// directory, returning an error if something went wrong.
+func emitMetaDataToDirectory(outdir string, ml []rtcov.CovMetaBlob) error {
+ ml, err := prepareForMetaEmit()
+ if err != nil {
+ return err
+ }
+ if len(ml) == 0 {
+ return nil
+ }
+
+ metaDataEmitAttempted = true
+
+ s := &emitState{
+ metalist: ml,
+ debug: os.Getenv("GOCOVERDEBUG") != "",
+ outdir: outdir,
+ }
+
+ // Open output files.
+ if err := s.openOutputFiles(finalHash, finalMetaLen, metaDataFile); err != nil {
+ return err
+ }
+
+ // Emit meta-data file only if needed (may already be present).
+ if s.needMetaDataFile() {
+ if err := s.emitMetaDataFile(finalHash, finalMetaLen); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// emitCounterData emits the counter data output file for this coverage run.
+// This entry point is intended to be invoked by the runtime when an
+// instrumented program is terminating or calling os.Exit().
+func emitCounterData() {
+ if goCoverDir == "" || !finalHashComputed || covProfileAlreadyEmitted {
+ return
+ }
+ if err := emitCounterDataToDirectory(goCoverDir); err != nil {
+ fmt.Fprintf(os.Stderr, "error: coverage counter data emit failed: %v\n", err)
+ if os.Getenv("GOCOVERDEBUG") != "" {
+ panic("counter-data write failure")
+ }
+ }
+}
+
+// emitMetaData emits the counter-data output file for this coverage run.
+func emitCounterDataToDirectory(outdir string) error {
+ // Ask the runtime for the list of coverage counter symbols.
+ cl := getCovCounterList()
+ if len(cl) == 0 {
+ // no work to do here.
+ return nil
+ }
+
+ if !finalHashComputed {
+ return fmt.Errorf("error: meta-data not available (binary not built with -cover?)")
+ }
+
+ // Ask the runtime for the list of coverage counter symbols.
+ pm := getCovPkgMap()
+ s := &emitState{
+ counterlist: cl,
+ pkgmap: pm,
+ outdir: outdir,
+ debug: os.Getenv("GOCOVERDEBUG") != "",
+ }
+
+ // Open output file.
+ if err := s.openOutputFiles(finalHash, finalMetaLen, counterDataFile); err != nil {
+ return err
+ }
+ if s.cf == nil {
+ return fmt.Errorf("counter data output file open failed (no additional info")
+ }
+
+ // Emit counter data file.
+ if err := s.emitCounterDataFile(finalHash, s.cf); err != nil {
+ return err
+ }
+ if err := s.cf.Close(); err != nil {
+ return fmt.Errorf("closing counter data file: %v", err)
+ }
+
+ // Counter file has now been closed. Rename the temp to the
+ // final desired path.
+ if err := os.Rename(s.cftmp, s.cfname); err != nil {
+ return fmt.Errorf("writing %s: rename from %s failed: %v\n", s.cfname, s.cftmp, err)
+ }
+
+ return nil
+}
+
+// emitMetaData emits counter data for this coverage run to an io.Writer.
+func (s *emitState) emitCounterDataToWriter(w io.Writer) error {
+ if err := s.emitCounterDataFile(finalHash, w); err != nil {
+ return err
+ }
+ return nil
+}
+
+// openMetaFile determines whether we need to emit a meta-data output
+// file, or whether we can reuse the existing file in the coverage out
+// dir. It updates mfname/mftmp/mf fields in 's', returning an error
+// if something went wrong. See the comment on the emitState type
+// definition above for more on how file opening is managed.
+func (s *emitState) openMetaFile(metaHash [16]byte, metaLen uint64) error {
+
+ // Open meta-outfile for reading to see if it exists.
+ fn := fmt.Sprintf("%s.%x", coverage.MetaFilePref, metaHash)
+ s.mfname = filepath.Join(s.outdir, fn)
+ fi, err := os.Stat(s.mfname)
+ if err != nil || fi.Size() != int64(metaLen) {
+ // We need a new meta-file.
+ tname := "tmp." + fn + fmt.Sprintf("%d", time.Now().UnixNano())
+ s.mftmp = filepath.Join(s.outdir, tname)
+ s.mf, err = os.Create(s.mftmp)
+ if err != nil {
+ return fmt.Errorf("creating meta-data file %s: %v", s.mftmp, err)
+ }
+ }
+ return nil
+}
+
+// openCounterFile opens an output file for the counter data portion
+// of a test coverage run. If updates the 'cfname' and 'cf' fields in
+// 's', returning an error if something went wrong.
+func (s *emitState) openCounterFile(metaHash [16]byte) error {
+ processID := os.Getpid()
+ fn := fmt.Sprintf(coverage.CounterFileTempl, coverage.CounterFilePref, metaHash, processID, time.Now().UnixNano())
+ s.cfname = filepath.Join(s.outdir, fn)
+ s.cftmp = filepath.Join(s.outdir, "tmp."+fn)
+ var err error
+ s.cf, err = os.Create(s.cftmp)
+ if err != nil {
+ return fmt.Errorf("creating counter data file %s: %v", s.cftmp, err)
+ }
+ return nil
+}
+
+// openOutputFiles opens output files in preparation for emitting
+// coverage data. In the case of the meta-data file, openOutputFiles
+// may determine that we can reuse an existing meta-data file in the
+// outdir, in which case it will leave the 'mf' field in the state
+// struct as nil. If a new meta-file is needed, the field 'mfname'
+// will be the final desired path of the meta file, 'mftmp' will be a
+// temporary file, and 'mf' will be an open os.File pointer for
+// 'mftmp'. The idea is that the client/caller will write content into
+// 'mf', close it, and then rename 'mftmp' to 'mfname'. This function
+// also opens the counter data output file, setting 'cf' and 'cfname'
+// in the state struct.
+func (s *emitState) openOutputFiles(metaHash [16]byte, metaLen uint64, which fileType) error {
+ fi, err := os.Stat(s.outdir)
+ if err != nil {
+ return fmt.Errorf("output directory %q inaccessible (err: %v); no coverage data written", s.outdir, err)
+ }
+ if !fi.IsDir() {
+ return fmt.Errorf("output directory %q not a directory; no coverage data written", s.outdir)
+ }
+
+ if (which & metaDataFile) != 0 {
+ if err := s.openMetaFile(metaHash, metaLen); err != nil {
+ return err
+ }
+ }
+ if (which & counterDataFile) != 0 {
+ if err := s.openCounterFile(metaHash); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// emitMetaDataFile emits coverage meta-data to a previously opened
+// temporary file (s.mftmp), then renames the generated file to the
+// final path (s.mfname).
+func (s *emitState) emitMetaDataFile(finalHash [16]byte, tlen uint64) error {
+ if err := writeMetaData(s.mf, s.metalist, cmode, cgran, finalHash); err != nil {
+ return fmt.Errorf("writing %s: %v\n", s.mftmp, err)
+ }
+ if err := s.mf.Close(); err != nil {
+ return fmt.Errorf("closing meta data temp file: %v", err)
+ }
+
+ // Temp file has now been flushed and closed. Rename the temp to the
+ // final desired path.
+ if err := os.Rename(s.mftmp, s.mfname); err != nil {
+ return fmt.Errorf("writing %s: rename from %s failed: %v\n", s.mfname, s.mftmp, err)
+ }
+
+ return nil
+}
+
+// needMetaDataFile returns TRUE if we need to emit a meta-data file
+// for this program run. It should be used only after
+// openOutputFiles() has been invoked.
+func (s *emitState) needMetaDataFile() bool {
+ return s.mf != nil
+}
+
+func writeMetaData(w io.Writer, metalist []rtcov.CovMetaBlob, cmode coverage.CounterMode, gran coverage.CounterGranularity, finalHash [16]byte) error {
+ mfw := encodemeta.NewCoverageMetaFileWriter("<io.Writer>", w)
+
+ // Note: "sd" is re-initialized on each iteration of the loop
+ // below, and would normally be declared inside the loop, but
+ // placed here escape analysis since we capture it in bufHdr.
+ var sd []byte
+ bufHdr := (*reflect.SliceHeader)(unsafe.Pointer(&sd))
+
+ var blobs [][]byte
+ for _, e := range metalist {
+ bufHdr.Data = uintptr(unsafe.Pointer(e.P))
+ bufHdr.Len = int(e.Len)
+ bufHdr.Cap = int(e.Len)
+ blobs = append(blobs, sd)
+ }
+ return mfw.Write(finalHash, blobs, cmode, gran)
+}
+
+func (s *emitState) NumFuncs() (int, error) {
+ var sd []atomic.Uint32
+ bufHdr := (*reflect.SliceHeader)(unsafe.Pointer(&sd))
+
+ totalFuncs := 0
+ for _, c := range s.counterlist {
+ bufHdr.Data = uintptr(unsafe.Pointer(c.Counters))
+ bufHdr.Len = int(c.Len)
+ bufHdr.Cap = int(c.Len)
+ for i := 0; i < len(sd); i++ {
+ // Skip ahead until the next non-zero value.
+ sdi := sd[i].Load()
+ if sdi == 0 {
+ continue
+ }
+
+ // We found a function that was executed.
+ nCtrs := sdi
+
+ // Check to make sure that we have at least one live
+ // counter. See the implementation note in ClearCoverageCounters
+ // for a description of why this is needed.
+ isLive := false
+ st := i + coverage.FirstCtrOffset
+ counters := sd[st : st+int(nCtrs)]
+ for i := 0; i < len(counters); i++ {
+ if counters[i].Load() != 0 {
+ isLive = true
+ break
+ }
+ }
+ if !isLive {
+ // Skip this function.
+ i += coverage.FirstCtrOffset + int(nCtrs) - 1
+ continue
+ }
+
+ totalFuncs++
+
+ // Move to the next function.
+ i += coverage.FirstCtrOffset + int(nCtrs) - 1
+ }
+ }
+ return totalFuncs, nil
+}
+
+func (s *emitState) VisitFuncs(f encodecounter.CounterVisitorFn) error {
+ var sd []atomic.Uint32
+ var tcounters []uint32
+ bufHdr := (*reflect.SliceHeader)(unsafe.Pointer(&sd))
+
+ rdCounters := func(actrs []atomic.Uint32, ctrs []uint32) []uint32 {
+ ctrs = ctrs[:0]
+ for i := range actrs {
+ ctrs = append(ctrs, actrs[i].Load())
+ }
+ return ctrs
+ }
+
+ dpkg := uint32(0)
+ for _, c := range s.counterlist {
+ bufHdr.Data = uintptr(unsafe.Pointer(c.Counters))
+ bufHdr.Len = int(c.Len)
+ bufHdr.Cap = int(c.Len)
+ for i := 0; i < len(sd); i++ {
+ // Skip ahead until the next non-zero value.
+ sdi := sd[i].Load()
+ if sdi == 0 {
+ continue
+ }
+
+ // We found a function that was executed.
+ nCtrs := sd[i+coverage.NumCtrsOffset].Load()
+ pkgId := sd[i+coverage.PkgIdOffset].Load()
+ funcId := sd[i+coverage.FuncIdOffset].Load()
+ cst := i + coverage.FirstCtrOffset
+ counters := sd[cst : cst+int(nCtrs)]
+
+ // Check to make sure that we have at least one live
+ // counter. See the implementation note in ClearCoverageCounters
+ // for a description of why this is needed.
+ isLive := false
+ for i := 0; i < len(counters); i++ {
+ if counters[i].Load() != 0 {
+ isLive = true
+ break
+ }
+ }
+ if !isLive {
+ // Skip this function.
+ i += coverage.FirstCtrOffset + int(nCtrs) - 1
+ continue
+ }
+
+ if s.debug {
+ if pkgId != dpkg {
+ dpkg = pkgId
+ fmt.Fprintf(os.Stderr, "\n=+= %d: pk=%d visit live fcn",
+ i, pkgId)
+ }
+ fmt.Fprintf(os.Stderr, " {i=%d F%d NC%d}", i, funcId, nCtrs)
+ }
+
+ // Vet and/or fix up package ID. A package ID of zero
+ // indicates that there is some new package X that is a
+ // runtime dependency, and this package has code that
+ // executes before its corresponding init package runs.
+ // This is a fatal error that we should only see during
+ // Go development (e.g. tip).
+ ipk := int32(pkgId)
+ if ipk == 0 {
+ fmt.Fprintf(os.Stderr, "\n")
+ reportErrorInHardcodedList(int32(i), ipk, funcId, nCtrs)
+ } else if ipk < 0 {
+ if newId, ok := s.pkgmap[int(ipk)]; ok {
+ pkgId = uint32(newId)
+ } else {
+ fmt.Fprintf(os.Stderr, "\n")
+ reportErrorInHardcodedList(int32(i), ipk, funcId, nCtrs)
+ }
+ } else {
+ // The package ID value stored in the counter array
+ // has 1 added to it (so as to preclude the
+ // possibility of a zero value ; see
+ // runtime.addCovMeta), so subtract off 1 here to form
+ // the real package ID.
+ pkgId--
+ }
+
+ tcounters = rdCounters(counters, tcounters)
+ if err := f(pkgId, funcId, tcounters); err != nil {
+ return err
+ }
+
+ // Skip over this function.
+ i += coverage.FirstCtrOffset + int(nCtrs) - 1
+ }
+ if s.debug {
+ fmt.Fprintf(os.Stderr, "\n")
+ }
+ }
+ return nil
+}
+
+// captureOsArgs converts os.Args() into the format we use to store
+// this info in the counter data file (counter data file "args"
+// section is a generic key-value collection). See the 'args' section
+// in internal/coverage/defs.go for more info. The args map
+// is also used to capture GOOS + GOARCH values as well.
+func captureOsArgs() map[string]string {
+ m := make(map[string]string)
+ m["argc"] = fmt.Sprintf("%d", len(os.Args))
+ for k, a := range os.Args {
+ m[fmt.Sprintf("argv%d", k)] = a
+ }
+ m["GOOS"] = runtime.GOOS
+ m["GOARCH"] = runtime.GOARCH
+ return m
+}
+
+// emitCounterDataFile emits the counter data portion of a
+// coverage output file (to the file 's.cf').
+func (s *emitState) emitCounterDataFile(finalHash [16]byte, w io.Writer) error {
+ cfw := encodecounter.NewCoverageDataWriter(w, coverage.CtrULeb128)
+ if err := cfw.Write(finalHash, capturedOsArgs, s); err != nil {
+ return err
+ }
+ return nil
+}
+
+// markProfileEmitted signals the runtime/coverage machinery that
+// coverate data output files have already been written out, and there
+// is no need to take any additional action at exit time. This
+// function is called (via linknamed reference) from the
+// coverage-related boilerplate code in _testmain.go emitted for go
+// unit tests.
+func markProfileEmitted(val bool) {
+ covProfileAlreadyEmitted = val
+}
+
+func reportErrorInHardcodedList(slot, pkgID int32, fnID, nCtrs uint32) {
+ metaList := getCovMetaList()
+ pkgMap := getCovPkgMap()
+
+ println("internal error in coverage meta-data tracking:")
+ println("encountered bad pkgID:", pkgID, " at slot:", slot,
+ " fnID:", fnID, " numCtrs:", nCtrs)
+ println("list of hard-coded runtime package IDs needs revising.")
+ println("[see the comment on the 'rtPkgs' var in ")
+ println(" <goroot>/src/internal/coverage/pkid.go]")
+ println("registered list:")
+ for k, b := range metaList {
+ print("slot: ", k, " path='", b.PkgPath, "' ")
+ if b.PkgID != -1 {
+ print(" hard-coded id: ", b.PkgID)
+ }
+ println("")
+ }
+ println("remap table:")
+ for from, to := range pkgMap {
+ println("from ", from, " to ", to)
+ }
+}
diff --git a/src/runtime/coverage/emitdata_test.go b/src/runtime/coverage/emitdata_test.go
new file mode 100644
index 0000000..3839e44
--- /dev/null
+++ b/src/runtime/coverage/emitdata_test.go
@@ -0,0 +1,451 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package coverage
+
+import (
+ "fmt"
+ "internal/coverage"
+ "internal/goexperiment"
+ "internal/platform"
+ "internal/testenv"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "runtime"
+ "strings"
+ "testing"
+)
+
+// Set to true for debugging (linux only).
+const fixedTestDir = false
+
+func TestCoverageApis(t *testing.T) {
+ if testing.Short() {
+ t.Skipf("skipping test: too long for short mode")
+ }
+ if !goexperiment.CoverageRedesign {
+ t.Skipf("skipping new coverage tests (experiment not enabled)")
+ }
+ testenv.MustHaveGoBuild(t)
+ dir := t.TempDir()
+ if fixedTestDir {
+ dir = "/tmp/qqqzzz"
+ os.RemoveAll(dir)
+ mkdir(t, dir)
+ }
+
+ // Build harness.
+ bdir := mkdir(t, filepath.Join(dir, "build"))
+ hargs := []string{"-cover", "-coverpkg=all"}
+ if testing.CoverMode() != "" {
+ hargs = append(hargs, "-covermode="+testing.CoverMode())
+ }
+ harnessPath := buildHarness(t, bdir, hargs)
+
+ t.Logf("harness path is %s", harnessPath)
+
+ // Sub-tests for each API we want to inspect, plus
+ // extras for error testing.
+ t.Run("emitToDir", func(t *testing.T) {
+ t.Parallel()
+ testEmitToDir(t, harnessPath, dir)
+ })
+ t.Run("emitToWriter", func(t *testing.T) {
+ t.Parallel()
+ testEmitToWriter(t, harnessPath, dir)
+ })
+ t.Run("emitToNonexistentDir", func(t *testing.T) {
+ t.Parallel()
+ testEmitToNonexistentDir(t, harnessPath, dir)
+ })
+ t.Run("emitToNilWriter", func(t *testing.T) {
+ t.Parallel()
+ testEmitToNilWriter(t, harnessPath, dir)
+ })
+ t.Run("emitToFailingWriter", func(t *testing.T) {
+ t.Parallel()
+ testEmitToFailingWriter(t, harnessPath, dir)
+ })
+ t.Run("emitWithCounterClear", func(t *testing.T) {
+ t.Parallel()
+ testEmitWithCounterClear(t, harnessPath, dir)
+ })
+
+}
+
+// upmergeCoverData helps improve coverage data for this package
+// itself. If this test itself is being invoked with "-cover", then
+// what we'd like is for package coverage data (that is, coverage for
+// routines in "runtime/coverage") to be incorporated into the test
+// run from the "harness.exe" runs we've just done. We can accomplish
+// this by doing a merge from the harness gocoverdir's to the test
+// gocoverdir.
+func upmergeCoverData(t *testing.T, gocoverdir string) {
+ if testing.CoverMode() == "" {
+ return
+ }
+ testGoCoverDir := os.Getenv("GOCOVERDIR")
+ if testGoCoverDir == "" {
+ return
+ }
+ args := []string{"tool", "covdata", "merge", "-pkg=runtime/coverage",
+ "-o", testGoCoverDir, "-i", gocoverdir}
+ t.Logf("up-merge of covdata from %s to %s", gocoverdir, testGoCoverDir)
+ t.Logf("executing: go %+v", args)
+ cmd := exec.Command(testenv.GoToolPath(t), args...)
+ if b, err := cmd.CombinedOutput(); err != nil {
+ t.Fatalf("covdata merge failed (%v): %s", err, b)
+ }
+}
+
+// buildHarness builds the helper program "harness.exe".
+func buildHarness(t *testing.T, dir string, opts []string) string {
+ harnessPath := filepath.Join(dir, "harness.exe")
+ harnessSrc := filepath.Join("testdata", "harness.go")
+ args := []string{"build", "-o", harnessPath}
+ args = append(args, opts...)
+ args = append(args, harnessSrc)
+ //t.Logf("harness build: go %+v\n", args)
+ cmd := exec.Command(testenv.GoToolPath(t), args...)
+ if b, err := cmd.CombinedOutput(); err != nil {
+ t.Fatalf("build failed (%v): %s", err, b)
+ }
+ return harnessPath
+}
+
+func mkdir(t *testing.T, d string) string {
+ t.Helper()
+ if err := os.Mkdir(d, 0777); err != nil {
+ t.Fatalf("mkdir failed: %v", err)
+ }
+ return d
+}
+
+// updateGoCoverDir updates the specified environment 'env' to set
+// GOCOVERDIR to 'gcd' (if setGoCoverDir is TRUE) or removes
+// GOCOVERDIR from the environment (if setGoCoverDir is false).
+func updateGoCoverDir(env []string, gcd string, setGoCoverDir bool) []string {
+ rv := []string{}
+ found := false
+ for _, v := range env {
+ if strings.HasPrefix(v, "GOCOVERDIR=") {
+ if !setGoCoverDir {
+ continue
+ }
+ v = "GOCOVERDIR=" + gcd
+ found = true
+ }
+ rv = append(rv, v)
+ }
+ if !found && setGoCoverDir {
+ rv = append(rv, "GOCOVERDIR="+gcd)
+ }
+ return rv
+}
+
+func runHarness(t *testing.T, harnessPath string, tp string, setGoCoverDir bool, rdir, edir string) (string, error) {
+ t.Logf("running: %s -tp %s -o %s with rdir=%s and GOCOVERDIR=%v", harnessPath, tp, edir, rdir, setGoCoverDir)
+ cmd := exec.Command(harnessPath, "-tp", tp, "-o", edir)
+ cmd.Dir = rdir
+ cmd.Env = updateGoCoverDir(os.Environ(), rdir, setGoCoverDir)
+ b, err := cmd.CombinedOutput()
+ //t.Logf("harness run output: %s\n", string(b))
+ return string(b), err
+}
+
+func testForSpecificFunctions(t *testing.T, dir string, want []string, avoid []string) string {
+ args := []string{"tool", "covdata", "debugdump",
+ "-live", "-pkg=command-line-arguments", "-i=" + dir}
+ t.Logf("running: go %v\n", args)
+ cmd := exec.Command(testenv.GoToolPath(t), args...)
+ b, err := cmd.CombinedOutput()
+ if err != nil {
+ t.Fatalf("'go tool covdata failed (%v): %s", err, b)
+ }
+ output := string(b)
+ rval := ""
+ for _, f := range want {
+ wf := "Func: " + f + "\n"
+ if strings.Contains(output, wf) {
+ continue
+ }
+ rval += fmt.Sprintf("error: output should contain %q but does not\n", wf)
+ }
+ for _, f := range avoid {
+ wf := "Func: " + f + "\n"
+ if strings.Contains(output, wf) {
+ rval += fmt.Sprintf("error: output should not contain %q but does\n", wf)
+ }
+ }
+ if rval != "" {
+ t.Logf("=-= begin output:\n" + output + "\n=-= end output\n")
+ }
+ return rval
+}
+
+func withAndWithoutRunner(f func(setit bool, tag string)) {
+ // Run 'f' with and without GOCOVERDIR set.
+ for i := 0; i < 2; i++ {
+ tag := "x"
+ setGoCoverDir := true
+ if i == 0 {
+ setGoCoverDir = false
+ tag = "y"
+ }
+ f(setGoCoverDir, tag)
+ }
+}
+
+func mktestdirs(t *testing.T, tag, tp, dir string) (string, string) {
+ t.Helper()
+ rdir := mkdir(t, filepath.Join(dir, tp+"-rdir-"+tag))
+ edir := mkdir(t, filepath.Join(dir, tp+"-edir-"+tag))
+ return rdir, edir
+}
+
+func testEmitToDir(t *testing.T, harnessPath string, dir string) {
+ withAndWithoutRunner(func(setGoCoverDir bool, tag string) {
+ tp := "emitToDir"
+ rdir, edir := mktestdirs(t, tag, tp, dir)
+ output, err := runHarness(t, harnessPath, tp,
+ setGoCoverDir, rdir, edir)
+ if err != nil {
+ t.Logf("%s", output)
+ t.Fatalf("running 'harness -tp emitDir': %v", err)
+ }
+
+ // Just check to make sure meta-data file and counter data file were
+ // written. Another alternative would be to run "go tool covdata"
+ // or equivalent, but for now, this is what we've got.
+ dents, err := os.ReadDir(edir)
+ if err != nil {
+ t.Fatalf("os.ReadDir(%s) failed: %v", edir, err)
+ }
+ mfc := 0
+ cdc := 0
+ for _, e := range dents {
+ if e.IsDir() {
+ continue
+ }
+ if strings.HasPrefix(e.Name(), coverage.MetaFilePref) {
+ mfc++
+ } else if strings.HasPrefix(e.Name(), coverage.CounterFilePref) {
+ cdc++
+ }
+ }
+ wantmf := 1
+ wantcf := 1
+ if mfc != wantmf {
+ t.Errorf("EmitToDir: want %d meta-data files, got %d\n", wantmf, mfc)
+ }
+ if cdc != wantcf {
+ t.Errorf("EmitToDir: want %d counter-data files, got %d\n", wantcf, cdc)
+ }
+ upmergeCoverData(t, edir)
+ upmergeCoverData(t, rdir)
+ })
+}
+
+func testEmitToWriter(t *testing.T, harnessPath string, dir string) {
+ withAndWithoutRunner(func(setGoCoverDir bool, tag string) {
+ tp := "emitToWriter"
+ rdir, edir := mktestdirs(t, tag, tp, dir)
+ output, err := runHarness(t, harnessPath, tp, setGoCoverDir, rdir, edir)
+ if err != nil {
+ t.Logf("%s", output)
+ t.Fatalf("running 'harness -tp %s': %v", tp, err)
+ }
+ want := []string{"main", tp}
+ avoid := []string{"final"}
+ if msg := testForSpecificFunctions(t, edir, want, avoid); msg != "" {
+ t.Errorf("coverage data from %q output match failed: %s", tp, msg)
+ }
+ upmergeCoverData(t, edir)
+ upmergeCoverData(t, rdir)
+ })
+}
+
+func testEmitToNonexistentDir(t *testing.T, harnessPath string, dir string) {
+ withAndWithoutRunner(func(setGoCoverDir bool, tag string) {
+ tp := "emitToNonexistentDir"
+ rdir, edir := mktestdirs(t, tag, tp, dir)
+ output, err := runHarness(t, harnessPath, tp, setGoCoverDir, rdir, edir)
+ if err != nil {
+ t.Logf("%s", output)
+ t.Fatalf("running 'harness -tp %s': %v", tp, err)
+ }
+ upmergeCoverData(t, edir)
+ upmergeCoverData(t, rdir)
+ })
+}
+
+func testEmitToUnwritableDir(t *testing.T, harnessPath string, dir string) {
+ withAndWithoutRunner(func(setGoCoverDir bool, tag string) {
+
+ tp := "emitToUnwritableDir"
+ rdir, edir := mktestdirs(t, tag, tp, dir)
+
+ // Make edir unwritable.
+ if err := os.Chmod(edir, 0555); err != nil {
+ t.Fatalf("chmod failed: %v", err)
+ }
+ defer os.Chmod(edir, 0777)
+
+ output, err := runHarness(t, harnessPath, tp, setGoCoverDir, rdir, edir)
+ if err != nil {
+ t.Logf("%s", output)
+ t.Fatalf("running 'harness -tp %s': %v", tp, err)
+ }
+ upmergeCoverData(t, edir)
+ upmergeCoverData(t, rdir)
+ })
+}
+
+func testEmitToNilWriter(t *testing.T, harnessPath string, dir string) {
+ withAndWithoutRunner(func(setGoCoverDir bool, tag string) {
+ tp := "emitToNilWriter"
+ rdir, edir := mktestdirs(t, tag, tp, dir)
+ output, err := runHarness(t, harnessPath, tp, setGoCoverDir, rdir, edir)
+ if err != nil {
+ t.Logf("%s", output)
+ t.Fatalf("running 'harness -tp %s': %v", tp, err)
+ }
+ upmergeCoverData(t, edir)
+ upmergeCoverData(t, rdir)
+ })
+}
+
+func testEmitToFailingWriter(t *testing.T, harnessPath string, dir string) {
+ withAndWithoutRunner(func(setGoCoverDir bool, tag string) {
+ tp := "emitToFailingWriter"
+ rdir, edir := mktestdirs(t, tag, tp, dir)
+ output, err := runHarness(t, harnessPath, tp, setGoCoverDir, rdir, edir)
+ if err != nil {
+ t.Logf("%s", output)
+ t.Fatalf("running 'harness -tp %s': %v", tp, err)
+ }
+ upmergeCoverData(t, edir)
+ upmergeCoverData(t, rdir)
+ })
+}
+
+func testEmitWithCounterClear(t *testing.T, harnessPath string, dir string) {
+ // Ensure that we have two versions of the harness: one built with
+ // -covermode=atomic and one built with -covermode=set (we need
+ // both modes to test all of the functionality).
+ var nonatomicHarnessPath, atomicHarnessPath string
+ if testing.CoverMode() != "atomic" {
+ nonatomicHarnessPath = harnessPath
+ bdir2 := mkdir(t, filepath.Join(dir, "build2"))
+ hargs := []string{"-covermode=atomic", "-coverpkg=all"}
+ atomicHarnessPath = buildHarness(t, bdir2, hargs)
+ } else {
+ atomicHarnessPath = harnessPath
+ mode := "set"
+ if testing.CoverMode() != "" && testing.CoverMode() != "atomic" {
+ mode = testing.CoverMode()
+ }
+ // Build a special nonatomic covermode version of the harness
+ // (we need both modes to test all of the functionality).
+ bdir2 := mkdir(t, filepath.Join(dir, "build2"))
+ hargs := []string{"-covermode=" + mode, "-coverpkg=all"}
+ nonatomicHarnessPath = buildHarness(t, bdir2, hargs)
+ }
+
+ withAndWithoutRunner(func(setGoCoverDir bool, tag string) {
+ // First a run with the nonatomic harness path, which we
+ // expect to fail.
+ tp := "emitWithCounterClear"
+ rdir1, edir1 := mktestdirs(t, tag, tp+"1", dir)
+ output, err := runHarness(t, nonatomicHarnessPath, tp,
+ setGoCoverDir, rdir1, edir1)
+ if err == nil {
+ t.Logf("%s", output)
+ t.Fatalf("running '%s -tp %s': unexpected success",
+ nonatomicHarnessPath, tp)
+ }
+
+ // Next a run with the atomic harness path, which we
+ // expect to succeed.
+ rdir2, edir2 := mktestdirs(t, tag, tp+"2", dir)
+ output, err = runHarness(t, atomicHarnessPath, tp,
+ setGoCoverDir, rdir2, edir2)
+ if err != nil {
+ t.Logf("%s", output)
+ t.Fatalf("running 'harness -tp %s': %v", tp, err)
+ }
+ want := []string{tp, "postClear"}
+ avoid := []string{"preClear", "main", "final"}
+ if msg := testForSpecificFunctions(t, edir2, want, avoid); msg != "" {
+ t.Logf("%s", output)
+ t.Errorf("coverage data from %q output match failed: %s", tp, msg)
+ }
+
+ if testing.CoverMode() == "atomic" {
+ upmergeCoverData(t, edir2)
+ upmergeCoverData(t, rdir2)
+ } else {
+ upmergeCoverData(t, edir1)
+ upmergeCoverData(t, rdir1)
+ }
+ })
+}
+
+func TestApisOnNocoverBinary(t *testing.T) {
+ if testing.Short() {
+ t.Skipf("skipping test: too long for short mode")
+ }
+ testenv.MustHaveGoBuild(t)
+ dir := t.TempDir()
+
+ // Build harness with no -cover.
+ bdir := mkdir(t, filepath.Join(dir, "nocover"))
+ edir := mkdir(t, filepath.Join(dir, "emitDirNo"))
+ harnessPath := buildHarness(t, bdir, nil)
+ output, err := runHarness(t, harnessPath, "emitToDir", false, edir, edir)
+ if err == nil {
+ t.Fatalf("expected error on TestApisOnNocoverBinary harness run")
+ }
+ const want = "not built with -cover"
+ if !strings.Contains(output, want) {
+ t.Errorf("error output does not contain %q: %s", want, output)
+ }
+}
+
+func TestIssue56006EmitDataRaceCoverRunningGoroutine(t *testing.T) {
+ if testing.Short() {
+ t.Skipf("skipping test: too long for short mode")
+ }
+ if !goexperiment.CoverageRedesign {
+ t.Skipf("skipping new coverage tests (experiment not enabled)")
+ }
+
+ // This test requires "go test -race -cover", meaning that we need
+ // go build, go run, and "-race" support.
+ testenv.MustHaveGoRun(t)
+ if !platform.RaceDetectorSupported(runtime.GOOS, runtime.GOARCH) ||
+ !testenv.HasCGO() {
+ t.Skip("skipped due to lack of race detector support / CGO")
+ }
+
+ // This will run a program with -cover and -race where we have a
+ // goroutine still running (and updating counters) at the point where
+ // the test runtime is trying to write out counter data.
+ cmd := exec.Command(testenv.GoToolPath(t), "test", "-cover", "-race")
+ cmd.Dir = filepath.Join("testdata", "issue56006")
+ b, err := cmd.CombinedOutput()
+ if err != nil {
+ t.Fatalf("go test -cover -race failed: %v", err)
+ }
+
+ // Don't want to see any data races in output.
+ avoid := []string{"DATA RACE"}
+ for _, no := range avoid {
+ if strings.Contains(string(b), no) {
+ t.Logf("%s\n", string(b))
+ t.Fatalf("found %s in test output, not permitted", no)
+ }
+ }
+}
diff --git a/src/runtime/coverage/hooks.go b/src/runtime/coverage/hooks.go
new file mode 100644
index 0000000..a9fbf9d
--- /dev/null
+++ b/src/runtime/coverage/hooks.go
@@ -0,0 +1,42 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package coverage
+
+import _ "unsafe"
+
+// initHook is invoked from the main package "init" routine in
+// programs built with "-cover". This function is intended to be
+// called only by the compiler.
+//
+// If 'istest' is false, it indicates we're building a regular program
+// ("go build -cover ..."), in which case we immediately try to write
+// out the meta-data file, and register emitCounterData as an exit
+// hook.
+//
+// If 'istest' is true (indicating that the program in question is a
+// Go test binary), then we tentatively queue up both emitMetaData and
+// emitCounterData as exit hooks. In the normal case (e.g. regular "go
+// test -cover" run) the testmain.go boilerplate will run at the end
+// of the test, write out the coverage percentage, and then invoke
+// markProfileEmitted() to indicate that no more work needs to be
+// done. If however that call is never made, this is a sign that the
+// test binary is being used as a replacement binary for the tool
+// being tested, hence we do want to run exit hooks when the program
+// terminates.
+func initHook(istest bool) {
+ // Note: hooks are run in reverse registration order, so
+ // register the counter data hook before the meta-data hook
+ // (in the case where two hooks are needed).
+ runOnNonZeroExit := true
+ runtime_addExitHook(emitCounterData, runOnNonZeroExit)
+ if istest {
+ runtime_addExitHook(emitMetaData, runOnNonZeroExit)
+ } else {
+ emitMetaData()
+ }
+}
+
+//go:linkname runtime_addExitHook runtime.addExitHook
+func runtime_addExitHook(f func(), runOnNonZeroExit bool)
diff --git a/src/runtime/coverage/testdata/harness.go b/src/runtime/coverage/testdata/harness.go
new file mode 100644
index 0000000..5c87e4c
--- /dev/null
+++ b/src/runtime/coverage/testdata/harness.go
@@ -0,0 +1,259 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+ "flag"
+ "fmt"
+ "internal/coverage/slicewriter"
+ "io"
+ "io/ioutil"
+ "log"
+ "path/filepath"
+ "runtime/coverage"
+ "strings"
+)
+
+var verbflag = flag.Int("v", 0, "Verbose trace output level")
+var testpointflag = flag.String("tp", "", "Testpoint to run")
+var outdirflag = flag.String("o", "", "Output dir into which to emit")
+
+func emitToWriter() {
+ log.SetPrefix("emitToWriter: ")
+ var slwm slicewriter.WriteSeeker
+ if err := coverage.WriteMeta(&slwm); err != nil {
+ log.Fatalf("error: WriteMeta returns %v", err)
+ }
+ mf := filepath.Join(*outdirflag, "covmeta.0abcdef")
+ if err := ioutil.WriteFile(mf, slwm.BytesWritten(), 0666); err != nil {
+ log.Fatalf("error: writing %s: %v", mf, err)
+ }
+ var slwc slicewriter.WriteSeeker
+ if err := coverage.WriteCounters(&slwc); err != nil {
+ log.Fatalf("error: WriteCounters returns %v", err)
+ }
+ cf := filepath.Join(*outdirflag, "covcounters.0abcdef.99.77")
+ if err := ioutil.WriteFile(cf, slwc.BytesWritten(), 0666); err != nil {
+ log.Fatalf("error: writing %s: %v", cf, err)
+ }
+}
+
+func emitToDir() {
+ log.SetPrefix("emitToDir: ")
+ if err := coverage.WriteMetaDir(*outdirflag); err != nil {
+ log.Fatalf("error: WriteMetaDir returns %v", err)
+ }
+ if err := coverage.WriteCountersDir(*outdirflag); err != nil {
+ log.Fatalf("error: WriteCountersDir returns %v", err)
+ }
+}
+
+func emitToNonexistentDir() {
+ log.SetPrefix("emitToNonexistentDir: ")
+
+ want := []string{
+ "no such file or directory", // linux-ish
+ "system cannot find the file specified", // windows
+ "does not exist", // plan9
+ }
+
+ checkWant := func(which string, got string) {
+ found := false
+ for _, w := range want {
+ if strings.Contains(got, w) {
+ found = true
+ break
+ }
+ }
+ if !found {
+ log.Fatalf("%s emit to bad dir: got error:\n %v\nwanted error with one of:\n %+v", which, got, want)
+ }
+ }
+
+ // Mangle the output directory to produce something nonexistent.
+ mangled := *outdirflag + "_MANGLED"
+ if err := coverage.WriteMetaDir(mangled); err == nil {
+ log.Fatal("expected error from WriteMetaDir to nonexistent dir")
+ } else {
+ got := fmt.Sprintf("%v", err)
+ checkWant("meta data", got)
+ }
+
+ // Now try to emit counter data file to a bad dir.
+ if err := coverage.WriteCountersDir(mangled); err == nil {
+ log.Fatal("expected error emitting counter data to bad dir")
+ } else {
+ got := fmt.Sprintf("%v", err)
+ checkWant("counter data", got)
+ }
+}
+
+func emitToUnwritableDir() {
+ log.SetPrefix("emitToUnwritableDir: ")
+
+ want := "permission denied"
+
+ if err := coverage.WriteMetaDir(*outdirflag); err == nil {
+ log.Fatal("expected error from WriteMetaDir to unwritable dir")
+ } else {
+ got := fmt.Sprintf("%v", err)
+ if !strings.Contains(got, want) {
+ log.Fatalf("meta-data emit to unwritable dir: wanted error containing %q got %q", want, got)
+ }
+ }
+
+ // Similarly with writing counter data.
+ if err := coverage.WriteCountersDir(*outdirflag); err == nil {
+ log.Fatal("expected error emitting counter data to unwritable dir")
+ } else {
+ got := fmt.Sprintf("%v", err)
+ if !strings.Contains(got, want) {
+ log.Fatalf("emitting counter data to unwritable dir: wanted error containing %q got %q", want, got)
+ }
+ }
+}
+
+func emitToNilWriter() {
+ log.SetPrefix("emitToWriter: ")
+ want := "nil writer"
+ var bad io.WriteSeeker
+ if err := coverage.WriteMeta(bad); err == nil {
+ log.Fatal("expected error passing nil writer for meta emit")
+ } else {
+ got := fmt.Sprintf("%v", err)
+ if !strings.Contains(got, want) {
+ log.Fatalf("emitting meta-data passing nil writer: wanted error containing %q got %q", want, got)
+ }
+ }
+
+ if err := coverage.WriteCounters(bad); err == nil {
+ log.Fatal("expected error passing nil writer for counter emit")
+ } else {
+ got := fmt.Sprintf("%v", err)
+ if !strings.Contains(got, want) {
+ log.Fatalf("emitting counter data passing nil writer: wanted error containing %q got %q", want, got)
+ }
+ }
+}
+
+type failingWriter struct {
+ writeCount int
+ writeLimit int
+ slws slicewriter.WriteSeeker
+}
+
+func (f *failingWriter) Write(p []byte) (n int, err error) {
+ c := f.writeCount
+ f.writeCount++
+ if f.writeLimit < 0 || c < f.writeLimit {
+ return f.slws.Write(p)
+ }
+ return 0, fmt.Errorf("manufactured write error")
+}
+
+func (f *failingWriter) Seek(offset int64, whence int) (int64, error) {
+ return f.slws.Seek(offset, whence)
+}
+
+func (f *failingWriter) reset(lim int) {
+ f.writeCount = 0
+ f.writeLimit = lim
+ f.slws = slicewriter.WriteSeeker{}
+}
+
+func writeStressTest(tag string, testf func(testf *failingWriter) error) {
+ // Invoke the function initially without the write limit
+ // set, to capture the number of writes performed.
+ fw := &failingWriter{writeLimit: -1}
+ testf(fw)
+
+ // Now that we know how many writes are going to happen, run the
+ // function repeatedly, each time with a Write operation set to
+ // fail at a new spot. The goal here is to make sure that:
+ // A) an error is reported, and B) nothing crashes.
+ tot := fw.writeCount
+ for i := 0; i < tot; i++ {
+ fw.reset(i)
+ err := testf(fw)
+ if err == nil {
+ log.Fatalf("no error from write %d tag %s", i, tag)
+ }
+ }
+}
+
+func postClear() int {
+ return 42
+}
+
+func preClear() int {
+ return 42
+}
+
+// This test is designed to ensure that write errors are properly
+// handled by the code that writes out coverage data. It repeatedly
+// invokes the 'emit to writer' apis using a specially crafted writer
+// that captures the total number of expected writes, then replays the
+// execution N times with a manufactured write error at the
+// appropriate spot.
+func emitToFailingWriter() {
+ log.SetPrefix("emitToFailingWriter: ")
+
+ writeStressTest("emit-meta", func(f *failingWriter) error {
+ return coverage.WriteMeta(f)
+ })
+ writeStressTest("emit-counter", func(f *failingWriter) error {
+ return coverage.WriteCounters(f)
+ })
+}
+
+func emitWithCounterClear() {
+ log.SetPrefix("emitWitCounterClear: ")
+ preClear()
+ if err := coverage.ClearCounters(); err != nil {
+ log.Fatalf("clear failed: %v", err)
+ }
+ postClear()
+ if err := coverage.WriteMetaDir(*outdirflag); err != nil {
+ log.Fatalf("error: WriteMetaDir returns %v", err)
+ }
+ if err := coverage.WriteCountersDir(*outdirflag); err != nil {
+ log.Fatalf("error: WriteCountersDir returns %v", err)
+ }
+}
+
+func final() int {
+ println("I run last.")
+ return 43
+}
+
+func main() {
+ log.SetFlags(0)
+ flag.Parse()
+ if *testpointflag == "" {
+ log.Fatalf("error: no testpoint (use -tp flag)")
+ }
+ if *outdirflag == "" {
+ log.Fatalf("error: no output dir specified (use -o flag)")
+ }
+ switch *testpointflag {
+ case "emitToDir":
+ emitToDir()
+ case "emitToWriter":
+ emitToWriter()
+ case "emitToNonexistentDir":
+ emitToNonexistentDir()
+ case "emitToUnwritableDir":
+ emitToUnwritableDir()
+ case "emitToNilWriter":
+ emitToNilWriter()
+ case "emitToFailingWriter":
+ emitToFailingWriter()
+ case "emitWithCounterClear":
+ emitWithCounterClear()
+ default:
+ log.Fatalf("error: unknown testpoint %q", *testpointflag)
+ }
+ final()
+}
diff --git a/src/runtime/coverage/testdata/issue56006/repro.go b/src/runtime/coverage/testdata/issue56006/repro.go
new file mode 100644
index 0000000..60a4925
--- /dev/null
+++ b/src/runtime/coverage/testdata/issue56006/repro.go
@@ -0,0 +1,26 @@
+package main
+
+//go:noinline
+func blah(x int) int {
+ if x != 0 {
+ return x + 42
+ }
+ return x - 42
+}
+
+func main() {
+ go infloop()
+ println(blah(1) + blah(0))
+}
+
+var G int
+
+func infloop() {
+ for {
+ G += blah(1)
+ G += blah(0)
+ if G > 10000 {
+ G = 0
+ }
+ }
+}
diff --git a/src/runtime/coverage/testdata/issue56006/repro_test.go b/src/runtime/coverage/testdata/issue56006/repro_test.go
new file mode 100644
index 0000000..674d819
--- /dev/null
+++ b/src/runtime/coverage/testdata/issue56006/repro_test.go
@@ -0,0 +1,8 @@
+package main
+
+import "testing"
+
+func TestSomething(t *testing.T) {
+ go infloop()
+ println(blah(1) + blah(0))
+}
diff --git a/src/runtime/coverage/testsupport.go b/src/runtime/coverage/testsupport.go
new file mode 100644
index 0000000..a481bbb
--- /dev/null
+++ b/src/runtime/coverage/testsupport.go
@@ -0,0 +1,234 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package coverage
+
+import (
+ "fmt"
+ "internal/coverage"
+ "internal/coverage/calloc"
+ "internal/coverage/cformat"
+ "internal/coverage/cmerge"
+ "internal/coverage/decodecounter"
+ "internal/coverage/decodemeta"
+ "internal/coverage/pods"
+ "io"
+ "os"
+ "strings"
+)
+
+// processCoverTestDir is called (via a linknamed reference) from
+// testmain code when "go test -cover" is in effect. It is not
+// intended to be used other than internally by the Go command's
+// generated code.
+func processCoverTestDir(dir string, cfile string, cm string, cpkg string) error {
+ return processCoverTestDirInternal(dir, cfile, cm, cpkg, os.Stdout)
+}
+
+// processCoverTestDirInternal is an io.Writer version of processCoverTestDir,
+// exposed for unit testing.
+func processCoverTestDirInternal(dir string, cfile string, cm string, cpkg string, w io.Writer) error {
+ cmode := coverage.ParseCounterMode(cm)
+ if cmode == coverage.CtrModeInvalid {
+ return fmt.Errorf("invalid counter mode %q", cm)
+ }
+
+ // Emit meta-data and counter data.
+ ml := getCovMetaList()
+ if len(ml) == 0 {
+ // This corresponds to the case where we have a package that
+ // contains test code but no functions (which is fine). In this
+ // case there is no need to emit anything.
+ } else {
+ if err := emitMetaDataToDirectory(dir, ml); err != nil {
+ return err
+ }
+ if err := emitCounterDataToDirectory(dir); err != nil {
+ return err
+ }
+ }
+
+ // Collect pods from test run. For the majority of cases we would
+ // expect to see a single pod here, but allow for multiple pods in
+ // case the test harness is doing extra work to collect data files
+ // from builds that it kicks off as part of the testing.
+ podlist, err := pods.CollectPods([]string{dir}, false)
+ if err != nil {
+ return fmt.Errorf("reading from %s: %v", dir, err)
+ }
+
+ // Open text output file if appropriate.
+ var tf *os.File
+ var tfClosed bool
+ if cfile != "" {
+ var err error
+ tf, err = os.Create(cfile)
+ if err != nil {
+ return fmt.Errorf("internal error: opening coverage data output file %q: %v", cfile, err)
+ }
+ defer func() {
+ if !tfClosed {
+ tfClosed = true
+ tf.Close()
+ }
+ }()
+ }
+
+ // Read/process the pods.
+ ts := &tstate{
+ cm: &cmerge.Merger{},
+ cf: cformat.NewFormatter(cmode),
+ cmode: cmode,
+ }
+ // Generate the expected hash string based on the final meta-data
+ // hash for this test, then look only for pods that refer to that
+ // hash (just in case there are multiple instrumented executables
+ // in play). See issue #57924 for more on this.
+ hashstring := fmt.Sprintf("%x", finalHash)
+ for _, p := range podlist {
+ if !strings.Contains(p.MetaFile, hashstring) {
+ continue
+ }
+ if err := ts.processPod(p); err != nil {
+ return err
+ }
+ }
+
+ // Emit percent.
+ if err := ts.cf.EmitPercent(w, cpkg, true); err != nil {
+ return err
+ }
+
+ // Emit text output.
+ if tf != nil {
+ if err := ts.cf.EmitTextual(tf); err != nil {
+ return err
+ }
+ tfClosed = true
+ if err := tf.Close(); err != nil {
+ return fmt.Errorf("closing %s: %v", cfile, err)
+ }
+ }
+
+ return nil
+}
+
+type tstate struct {
+ calloc.BatchCounterAlloc
+ cm *cmerge.Merger
+ cf *cformat.Formatter
+ cmode coverage.CounterMode
+}
+
+// processPod reads coverage counter data for a specific pod.
+func (ts *tstate) processPod(p pods.Pod) error {
+ // Open meta-data file
+ f, err := os.Open(p.MetaFile)
+ if err != nil {
+ return fmt.Errorf("unable to open meta-data file %s: %v", p.MetaFile, err)
+ }
+ defer func() {
+ f.Close()
+ }()
+ var mfr *decodemeta.CoverageMetaFileReader
+ mfr, err = decodemeta.NewCoverageMetaFileReader(f, nil)
+ if err != nil {
+ return fmt.Errorf("error reading meta-data file %s: %v", p.MetaFile, err)
+ }
+ newmode := mfr.CounterMode()
+ if newmode != ts.cmode {
+ return fmt.Errorf("internal error: counter mode clash: %q from test harness, %q from data file %s", ts.cmode.String(), newmode.String(), p.MetaFile)
+ }
+ newgran := mfr.CounterGranularity()
+ if err := ts.cm.SetModeAndGranularity(p.MetaFile, cmode, newgran); err != nil {
+ return err
+ }
+
+ // A map to store counter data, indexed by pkgid/fnid tuple.
+ pmm := make(map[pkfunc][]uint32)
+
+ // Helper to read a single counter data file.
+ readcdf := func(cdf string) error {
+ cf, err := os.Open(cdf)
+ if err != nil {
+ return fmt.Errorf("opening counter data file %s: %s", cdf, err)
+ }
+ defer cf.Close()
+ var cdr *decodecounter.CounterDataReader
+ cdr, err = decodecounter.NewCounterDataReader(cdf, cf)
+ if err != nil {
+ return fmt.Errorf("reading counter data file %s: %s", cdf, err)
+ }
+ var data decodecounter.FuncPayload
+ for {
+ ok, err := cdr.NextFunc(&data)
+ if err != nil {
+ return fmt.Errorf("reading counter data file %s: %v", cdf, err)
+ }
+ if !ok {
+ break
+ }
+
+ // NB: sanity check on pkg and func IDs?
+ key := pkfunc{pk: data.PkgIdx, fcn: data.FuncIdx}
+ if prev, found := pmm[key]; found {
+ // Note: no overflow reporting here.
+ if err, _ := ts.cm.MergeCounters(data.Counters, prev); err != nil {
+ return fmt.Errorf("processing counter data file %s: %v", cdf, err)
+ }
+ }
+ c := ts.AllocateCounters(len(data.Counters))
+ copy(c, data.Counters)
+ pmm[key] = c
+ }
+ return nil
+ }
+
+ // Read counter data files.
+ for _, cdf := range p.CounterDataFiles {
+ if err := readcdf(cdf); err != nil {
+ return err
+ }
+ }
+
+ // Visit meta-data file.
+ np := uint32(mfr.NumPackages())
+ payload := []byte{}
+ for pkIdx := uint32(0); pkIdx < np; pkIdx++ {
+ var pd *decodemeta.CoverageMetaDataDecoder
+ pd, payload, err = mfr.GetPackageDecoder(pkIdx, payload)
+ if err != nil {
+ return fmt.Errorf("reading pkg %d from meta-file %s: %s", pkIdx, p.MetaFile, err)
+ }
+ ts.cf.SetPackage(pd.PackagePath())
+ var fd coverage.FuncDesc
+ nf := pd.NumFuncs()
+ for fnIdx := uint32(0); fnIdx < nf; fnIdx++ {
+ if err := pd.ReadFunc(fnIdx, &fd); err != nil {
+ return fmt.Errorf("reading meta-data file %s: %v",
+ p.MetaFile, err)
+ }
+ key := pkfunc{pk: pkIdx, fcn: fnIdx}
+ counters, haveCounters := pmm[key]
+ for i := 0; i < len(fd.Units); i++ {
+ u := fd.Units[i]
+ // Skip units with non-zero parent (no way to represent
+ // these in the existing format).
+ if u.Parent != 0 {
+ continue
+ }
+ count := uint32(0)
+ if haveCounters {
+ count = counters[i]
+ }
+ ts.cf.AddUnit(fd.Srcfile, fd.Funcname, fd.Lit, u, count)
+ }
+ }
+ }
+ return nil
+}
+
+type pkfunc struct {
+ pk, fcn uint32
+}
diff --git a/src/runtime/coverage/ts_test.go b/src/runtime/coverage/ts_test.go
new file mode 100644
index 0000000..b826058
--- /dev/null
+++ b/src/runtime/coverage/ts_test.go
@@ -0,0 +1,58 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package coverage
+
+import (
+ "internal/goexperiment"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+ _ "unsafe"
+)
+
+//go:linkname testing_testGoCoverDir testing.testGoCoverDir
+func testing_testGoCoverDir() string
+
+// TestTestSupport does a basic verification of the functionality in
+// runtime/coverage.processCoverTestDir (doing this here as opposed to
+// relying on other test paths will provide a better signal when
+// running "go test -cover" for this package).
+func TestTestSupport(t *testing.T) {
+ if !goexperiment.CoverageRedesign {
+ return
+ }
+ if testing.CoverMode() == "" {
+ return
+ }
+ t.Logf("testing.testGoCoverDir() returns %s mode=%s\n",
+ testing_testGoCoverDir(), testing.CoverMode())
+
+ textfile := filepath.Join(t.TempDir(), "file.txt")
+ var sb strings.Builder
+ err := processCoverTestDirInternal(testing_testGoCoverDir(), textfile,
+ testing.CoverMode(), "", &sb)
+ if err != nil {
+ t.Fatalf("bad: %v", err)
+ }
+
+ // Check for existence of text file.
+ if inf, err := os.Open(textfile); err != nil {
+ t.Fatalf("problems opening text file %s: %v", textfile, err)
+ } else {
+ inf.Close()
+ }
+
+ // Check for percent output with expected tokens.
+ strout := sb.String()
+ want1 := "runtime/coverage"
+ want2 := "of statements"
+ if !strings.Contains(strout, want1) ||
+ !strings.Contains(strout, want2) {
+ t.Logf("output from run: %s\n", strout)
+ t.Fatalf("percent output missing key tokens: %q and %q",
+ want1, want2)
+ }
+}
diff --git a/src/runtime/covercounter.go b/src/runtime/covercounter.go
new file mode 100644
index 0000000..72842bd
--- /dev/null
+++ b/src/runtime/covercounter.go
@@ -0,0 +1,26 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+ "internal/coverage/rtcov"
+ "unsafe"
+)
+
+//go:linkname runtime_coverage_getCovCounterList runtime/coverage.getCovCounterList
+func runtime_coverage_getCovCounterList() []rtcov.CovCounterBlob {
+ res := []rtcov.CovCounterBlob{}
+ u32sz := unsafe.Sizeof(uint32(0))
+ for datap := &firstmoduledata; datap != nil; datap = datap.next {
+ if datap.covctrs == datap.ecovctrs {
+ continue
+ }
+ res = append(res, rtcov.CovCounterBlob{
+ Counters: (*uint32)(unsafe.Pointer(datap.covctrs)),
+ Len: uint64((datap.ecovctrs - datap.covctrs) / u32sz),
+ })
+ }
+ return res
+}
diff --git a/src/runtime/covermeta.go b/src/runtime/covermeta.go
new file mode 100644
index 0000000..54ef42a
--- /dev/null
+++ b/src/runtime/covermeta.go
@@ -0,0 +1,72 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+ "internal/coverage/rtcov"
+ "unsafe"
+)
+
+// covMeta is the top-level container for bits of state related to
+// code coverage meta-data in the runtime.
+var covMeta struct {
+ // metaList contains the list of currently registered meta-data
+ // blobs for the running program.
+ metaList []rtcov.CovMetaBlob
+
+ // pkgMap records mappings from hard-coded package IDs to
+ // slots in the covMetaList above.
+ pkgMap map[int]int
+
+ // Set to true if we discover a package mapping glitch.
+ hardCodedListNeedsUpdating bool
+}
+
+// addCovMeta is invoked during package "init" functions by the
+// compiler when compiling for coverage instrumentation; here 'p' is a
+// meta-data blob of length 'dlen' for the package in question, 'hash'
+// is a compiler-computed md5.sum for the blob, 'pkpath' is the
+// package path, 'pkid' is the hard-coded ID that the compiler is
+// using for the package (or -1 if the compiler doesn't think a
+// hard-coded ID is needed), and 'cmode'/'cgran' are the coverage
+// counter mode and granularity requested by the user. Return value is
+// the ID for the package for use by the package code itself.
+func addCovMeta(p unsafe.Pointer, dlen uint32, hash [16]byte, pkpath string, pkid int, cmode uint8, cgran uint8) uint32 {
+ slot := len(covMeta.metaList)
+ covMeta.metaList = append(covMeta.metaList,
+ rtcov.CovMetaBlob{
+ P: (*byte)(p),
+ Len: dlen,
+ Hash: hash,
+ PkgPath: pkpath,
+ PkgID: pkid,
+ CounterMode: cmode,
+ CounterGranularity: cgran,
+ })
+ if pkid != -1 {
+ if covMeta.pkgMap == nil {
+ covMeta.pkgMap = make(map[int]int)
+ }
+ if _, ok := covMeta.pkgMap[pkid]; ok {
+ throw("runtime.addCovMeta: coverage package map collision")
+ }
+ // Record the real slot (position on meta-list) for this
+ // package; we'll use the map to fix things up later on.
+ covMeta.pkgMap[pkid] = slot
+ }
+
+ // ID zero is reserved as invalid.
+ return uint32(slot + 1)
+}
+
+//go:linkname runtime_coverage_getCovMetaList runtime/coverage.getCovMetaList
+func runtime_coverage_getCovMetaList() []rtcov.CovMetaBlob {
+ return covMeta.metaList
+}
+
+//go:linkname runtime_coverage_getCovPkgMap runtime/coverage.getCovPkgMap
+func runtime_coverage_getCovPkgMap() map[int]int {
+ return covMeta.pkgMap
+}
diff --git a/src/runtime/cpuflags_arm64.go b/src/runtime/cpuflags_arm64.go
index 7576bef..a0f1d11 100644
--- a/src/runtime/cpuflags_arm64.go
+++ b/src/runtime/cpuflags_arm64.go
@@ -11,7 +11,7 @@
var arm64UseAlignedLoads bool
func init() {
- if cpu.ARM64.IsNeoverseN1 || cpu.ARM64.IsZeus {
+ if cpu.ARM64.IsNeoverseN1 || cpu.ARM64.IsNeoverseV1 {
arm64UseAlignedLoads = true
}
}
diff --git a/src/runtime/cpuprof.go b/src/runtime/cpuprof.go
index 2f7f6b4..6ef374e 100644
--- a/src/runtime/cpuprof.go
+++ b/src/runtime/cpuprof.go
@@ -14,7 +14,6 @@
import (
"internal/abi"
- "runtime/internal/atomic"
"runtime/internal/sys"
"unsafe"
)
@@ -106,12 +105,12 @@
//go:nowritebarrierrec
func (p *cpuProfile) add(tagPtr *unsafe.Pointer, stk []uintptr) {
// Simple cas-lock to coordinate with setcpuprofilerate.
- for !atomic.Cas(&prof.signalLock, 0, 1) {
+ for !prof.signalLock.CompareAndSwap(0, 1) {
// TODO: Is it safe to osyield here? https://go.dev/issue/52672
osyield()
}
- if prof.hz != 0 { // implies cpuprof.log != nil
+ if prof.hz.Load() != 0 { // implies cpuprof.log != nil
if p.numExtra > 0 || p.lostExtra > 0 || p.lostAtomic > 0 {
p.addExtra()
}
@@ -123,7 +122,7 @@
cpuprof.log.write(tagPtr, nanotime(), hdr[:], stk)
}
- atomic.Store(&prof.signalLock, 0)
+ prof.signalLock.Store(0)
}
// addNonGo adds the non-Go stack trace to the profile.
@@ -143,7 +142,7 @@
// process at a time. If not, this lock will serialize those too.
// The use of timer_create(2) on Linux to request process-targeted
// signals may have changed this.)
- for !atomic.Cas(&prof.signalLock, 0, 1) {
+ for !prof.signalLock.CompareAndSwap(0, 1) {
// TODO: Is it safe to osyield here? https://go.dev/issue/52672
osyield()
}
@@ -157,7 +156,7 @@
cpuprof.lostExtra++
}
- atomic.Store(&prof.signalLock, 0)
+ prof.signalLock.Store(0)
}
// addExtra adds the "extra" profiling events,
diff --git a/src/runtime/crash_cgo_test.go b/src/runtime/crash_cgo_test.go
index 5e58712..51d7bb5 100644
--- a/src/runtime/crash_cgo_test.go
+++ b/src/runtime/crash_cgo_test.go
@@ -8,6 +8,7 @@
import (
"fmt"
+ "internal/goos"
"internal/testenv"
"os"
"os/exec"
@@ -217,7 +218,9 @@
}
func TestCgoPprofCallback(t *testing.T) {
- t.Parallel()
+ if testing.Short() {
+ t.Skip("skipping in short mode") // takes a full second
+ }
switch runtime.GOOS {
case "windows", "plan9":
t.Skipf("skipping cgo pprof callback test on %s", runtime.GOOS)
@@ -603,8 +606,14 @@
t.Skipf("no signals on %s", runtime.GOOS)
}
- for _, test := range []string{"Segv", "SegvInCgo"} {
+ for _, test := range []string{"Segv", "SegvInCgo", "TgkillSegv", "TgkillSegvInCgo"} {
test := test
+
+ // The tgkill variants only run on Linux.
+ if runtime.GOOS != "linux" && strings.HasPrefix(test, "Tgkill") {
+ continue
+ }
+
t.Run(test, func(t *testing.T) {
t.Parallel()
got := runTestProg(t, "testprogcgo", test)
@@ -633,9 +642,14 @@
testenv.SkipFlaky(t, 50979)
}
- nowant := "runtime: "
- if strings.Contains(got, nowant) {
- t.Errorf("unexpectedly saw %q in output", nowant)
+ for _, nowant := range []string{"fatal error: ", "runtime: "} {
+ if strings.Contains(got, nowant) {
+ if runtime.GOOS == "darwin" && strings.Contains(got, "0xb01dfacedebac1e") {
+ // See the comment in signal_darwin_amd64.go.
+ t.Skip("skipping due to Darwin handling of malformed addresses")
+ }
+ t.Errorf("unexpectedly saw %q in output", nowant)
+ }
}
})
}
@@ -710,3 +724,47 @@
t.Fatalf("want %s, got %s\n", want, output)
}
}
+
+func TestCgoTraceParser(t *testing.T) {
+ // Test issue 29707.
+ switch runtime.GOOS {
+ case "plan9", "windows":
+ t.Skipf("no pthreads on %s", runtime.GOOS)
+ }
+ output := runTestProg(t, "testprogcgo", "CgoTraceParser")
+ want := "OK\n"
+ ErrTimeOrder := "ErrTimeOrder\n"
+ if output == ErrTimeOrder {
+ t.Skipf("skipping due to golang.org/issue/16755: %v", output)
+ } else if output != want {
+ t.Fatalf("want %s, got %s\n", want, output)
+ }
+}
+
+func TestCgoTraceParserWithOneProc(t *testing.T) {
+ // Test issue 29707.
+ switch runtime.GOOS {
+ case "plan9", "windows":
+ t.Skipf("no pthreads on %s", runtime.GOOS)
+ }
+ output := runTestProg(t, "testprogcgo", "CgoTraceParser", "GOMAXPROCS=1")
+ want := "OK\n"
+ ErrTimeOrder := "ErrTimeOrder\n"
+ if output == ErrTimeOrder {
+ t.Skipf("skipping due to golang.org/issue/16755: %v", output)
+ } else if output != want {
+ t.Fatalf("GOMAXPROCS=1, want %s, got %s\n", want, output)
+ }
+}
+
+func TestCgoSigfwd(t *testing.T) {
+ t.Parallel()
+ if !goos.IsUnix {
+ t.Skipf("no signals on %s", runtime.GOOS)
+ }
+
+ got := runTestProg(t, "testprogcgo", "CgoSigfwd", "GO_TEST_CGOSIGFWD=1")
+ if want := "OK\n"; got != want {
+ t.Fatalf("expected %q, but got:\n%s", want, got)
+ }
+}
diff --git a/src/runtime/crash_test.go b/src/runtime/crash_test.go
index 01d7cbe..309777d 100644
--- a/src/runtime/crash_test.go
+++ b/src/runtime/crash_test.go
@@ -18,6 +18,7 @@
"strings"
"sync"
"testing"
+ "time"
)
var toRemove []string
@@ -58,18 +59,31 @@
}
func runBuiltTestProg(t *testing.T, exe, name string, env ...string) string {
+ t.Helper()
+
if *flagQuick {
t.Skip("-quick")
}
- testenv.MustHaveGoBuild(t)
+ start := time.Now()
- cmd := testenv.CleanCmdEnv(exec.Command(exe, name))
+ cmd := testenv.CleanCmdEnv(testenv.Command(t, exe, name))
cmd.Env = append(cmd.Env, env...)
if testing.Short() {
cmd.Env = append(cmd.Env, "RUNTIME_TEST_SHORT=1")
}
- out, _ := testenv.RunWithTimeout(t, cmd)
+ out, err := cmd.CombinedOutput()
+ if err == nil {
+ t.Logf("%v (%v): ok", cmd, time.Since(start))
+ } else {
+ if _, ok := err.(*exec.ExitError); ok {
+ t.Logf("%v: %v", cmd, err)
+ } else if errors.Is(err, exec.ErrWaitDelay) {
+ t.Fatalf("%v: %v", cmd, err)
+ } else {
+ t.Fatalf("%v failed to start: %v", cmd, err)
+ }
+ }
return string(out)
}
@@ -844,3 +858,11 @@
}
}
}
+
+func TestPanicOnUnsafeSlice(t *testing.T) {
+ output := runTestProg(t, "testprog", "panicOnNilAndEleSizeIsZero")
+ want := "panic: runtime error: unsafe.Slice: ptr is nil and len is not zero"
+ if !strings.Contains(output, want) {
+ t.Errorf("output does not contain %q:\n%s", want, output)
+ }
+}
diff --git a/src/runtime/create_file_nounix.go b/src/runtime/create_file_nounix.go
new file mode 100644
index 0000000..60f7517
--- /dev/null
+++ b/src/runtime/create_file_nounix.go
@@ -0,0 +1,14 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !unix
+
+package runtime
+
+const canCreateFile = false
+
+func create(name *byte, perm int32) int32 {
+ throw("unimplemented")
+ return -1
+}
diff --git a/src/runtime/create_file_unix.go b/src/runtime/create_file_unix.go
new file mode 100644
index 0000000..7280810
--- /dev/null
+++ b/src/runtime/create_file_unix.go
@@ -0,0 +1,14 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build unix
+
+package runtime
+
+const canCreateFile = true
+
+// create returns an fd to a write-only file.
+func create(name *byte, perm int32) int32 {
+ return open(name, _O_CREAT|_O_WRONLY|_O_TRUNC, perm)
+}
diff --git a/src/runtime/debug.go b/src/runtime/debug.go
index 0ab23e0..669c36f 100644
--- a/src/runtime/debug.go
+++ b/src/runtime/debug.go
@@ -85,13 +85,13 @@
//go:linkname mayMoreStackPreempt
func mayMoreStackPreempt() {
// Don't do anything on the g0 or gsignal stack.
- g := getg()
- if g == g.m.g0 || g == g.m.gsignal {
+ gp := getg()
+ if gp == gp.m.g0 || gp == gp.m.gsignal {
return
}
// Force a preemption, unless the stack is already poisoned.
- if g.stackguard0 < stackPoisonMin {
- g.stackguard0 = stackPreempt
+ if gp.stackguard0 < stackPoisonMin {
+ gp.stackguard0 = stackPreempt
}
}
@@ -104,12 +104,12 @@
//go:linkname mayMoreStackMove
func mayMoreStackMove() {
// Don't do anything on the g0 or gsignal stack.
- g := getg()
- if g == g.m.g0 || g == g.m.gsignal {
+ gp := getg()
+ if gp == gp.m.g0 || gp == gp.m.gsignal {
return
}
// Force stack movement, unless the stack is already poisoned.
- if g.stackguard0 < stackPoisonMin {
- g.stackguard0 = stackForceMove
+ if gp.stackguard0 < stackPoisonMin {
+ gp.stackguard0 = stackForceMove
}
}
diff --git a/src/runtime/debug/mod.go b/src/runtime/debug/mod.go
index 688e258..8b7a423 100644
--- a/src/runtime/debug/mod.go
+++ b/src/runtime/debug/mod.go
@@ -11,7 +11,7 @@
"strings"
)
-// exported from runtime
+// exported from runtime.
func modinfo() string
// ReadBuildInfo returns the build information embedded
@@ -39,14 +39,26 @@
// BuildInfo represents the build information read from a Go binary.
type BuildInfo struct {
- GoVersion string // Version of Go that produced this binary.
- Path string // The main package path
- Main Module // The module containing the main package
- Deps []*Module // Module dependencies
- Settings []BuildSetting // Other information about the build.
+ // GoVersion is the version of the Go toolchain that built the binary
+ // (for example, "go1.19.2").
+ GoVersion string
+
+ // Path is the package path of the main package for the binary
+ // (for example, "golang.org/x/tools/cmd/stringer").
+ Path string
+
+ // Main describes the module that contains the main package for the binary.
+ Main Module
+
+ // Deps describes all the dependency modules, both direct and indirect,
+ // that contributed packages to the build of this binary.
+ Deps []*Module
+
+ // Settings describes the build settings used to build the binary.
+ Settings []BuildSetting
}
-// Module represents a module.
+// A Module describes a single module included in a build.
type Module struct {
Path string // module path
Version string // module version
@@ -54,8 +66,24 @@
Replace *Module // replaced by this module
}
-// BuildSetting describes a setting that may be used to understand how the
-// binary was built. For example, VCS commit and dirty status is stored here.
+// A BuildSetting is a key-value pair describing one setting that influenced a build.
+//
+// Defined keys include:
+//
+// - -buildmode: the buildmode flag used (typically "exe")
+// - -compiler: the compiler toolchain flag used (typically "gc")
+// - CGO_ENABLED: the effective CGO_ENABLED environment variable
+// - CGO_CFLAGS: the effective CGO_CFLAGS environment variable
+// - CGO_CPPFLAGS: the effective CGO_CPPFLAGS environment variable
+// - CGO_CXXFLAGS: the effective CGO_CPPFLAGS environment variable
+// - CGO_LDFLAGS: the effective CGO_CPPFLAGS environment variable
+// - GOARCH: the architecture target
+// - GOAMD64/GOARM64/GO386/etc: the architecture feature level for GOARCH
+// - GOOS: the operating system target
+// - vcs: the version control system for the source tree where the build ran
+// - vcs.revision: the revision identifier for the current commit or checkout
+// - vcs.time: the modification time associated with vcs.revision, in RFC3339 format
+// - vcs.modified: true or false indicating whether the source tree had local modifications
type BuildSetting struct {
// Key and Value describe the build setting.
// Key must not contain an equals sign, space, tab, or newline.
diff --git a/src/runtime/debugcall.go b/src/runtime/debugcall.go
index 2f164e7..a4393b1 100644
--- a/src/runtime/debugcall.go
+++ b/src/runtime/debugcall.go
@@ -158,11 +158,10 @@
gp.schedlink = 0
// Park the calling goroutine.
- gp.waitreason = waitReasonDebugCall
if trace.enabled {
traceGoPark(traceEvGoBlock, 1)
}
- casgstatus(gp, _Grunning, _Gwaiting)
+ casGToWaiting(gp, _Grunning, waitReasonDebugCall)
dropg()
// Directly execute the new goroutine. The debug
diff --git a/src/runtime/debuglog.go b/src/runtime/debuglog.go
index ca1a791..b18774e 100644
--- a/src/runtime/debuglog.go
+++ b/src/runtime/debuglog.go
@@ -17,6 +17,7 @@
import (
"runtime/internal/atomic"
+ "runtime/internal/sys"
"unsafe"
)
@@ -63,7 +64,7 @@
allp := (*uintptr)(unsafe.Pointer(&allDloggers))
all := (*dlogger)(unsafe.Pointer(atomic.Loaduintptr(allp)))
for l1 := all; l1 != nil; l1 = l1.allLink {
- if atomic.Load(&l1.owned) == 0 && atomic.Cas(&l1.owned, 0, 1) {
+ if l1.owned.Load() == 0 && l1.owned.CompareAndSwap(0, 1) {
l = l1
break
}
@@ -79,7 +80,7 @@
throw("failed to allocate debug log")
}
l.w.r.data = &l.w.data
- l.owned = 1
+ l.owned.Store(1)
// Prepend to allDloggers list.
headp := (*uintptr)(unsafe.Pointer(&allDloggers))
@@ -121,9 +122,8 @@
//
// To obtain a dlogger, call dlog(). When done with the dlogger, call
// end().
-//
-//go:notinheap
type dlogger struct {
+ _ sys.NotInHeap
w debugLogWriter
// allLink is the next dlogger in the allDloggers list.
@@ -131,7 +131,7 @@
// owned indicates that this dlogger is owned by an M. This is
// accessed atomically.
- owned uint32
+ owned atomic.Uint32
}
// allDloggers is a list of all dloggers, linked through
@@ -160,7 +160,7 @@
}
// Return the logger to the global pool.
- atomic.Store(&l.owned, 0)
+ l.owned.Store(0)
}
const (
@@ -292,21 +292,24 @@
if !dlogEnabled {
return l
}
- str := stringStructOf(&x)
+
+ strData := unsafe.StringData(x)
datap := &firstmoduledata
- if len(x) > 4 && datap.etext <= uintptr(str.str) && uintptr(str.str) < datap.end {
+ if len(x) > 4 && datap.etext <= uintptr(unsafe.Pointer(strData)) && uintptr(unsafe.Pointer(strData)) < datap.end {
// String constants are in the rodata section, which
// isn't recorded in moduledata. But it has to be
// somewhere between etext and end.
l.w.byte(debugLogConstString)
- l.w.uvarint(uint64(str.len))
- l.w.uvarint(uint64(uintptr(str.str) - datap.etext))
+ l.w.uvarint(uint64(len(x)))
+ l.w.uvarint(uint64(uintptr(unsafe.Pointer(strData)) - datap.etext))
} else {
l.w.byte(debugLogString)
+ // We can't use unsafe.Slice as it may panic, which isn't safe
+ // in this (potentially) nowritebarrier context.
var b []byte
bb := (*slice)(unsafe.Pointer(&b))
- bb.array = str.str
- bb.len, bb.cap = str.len, str.len
+ bb.array = unsafe.Pointer(strData)
+ bb.len, bb.cap = len(x), len(x)
if len(b) > debugLogStringLimit {
b = b[:debugLogStringLimit]
}
@@ -356,9 +359,8 @@
// overwrite old records. Hence, it maintains a reader that consumes
// the log as it gets overwritten. That reader state is where an
// actual log reader would start.
-//
-//go:notinheap
type debugLogWriter struct {
+ _ sys.NotInHeap
write uint64
data debugLogBuf
@@ -376,8 +378,10 @@
buf [10]byte
}
-//go:notinheap
-type debugLogBuf [debugLogBytes]byte
+type debugLogBuf struct {
+ _ sys.NotInHeap
+ b [debugLogBytes]byte
+}
const (
// debugLogHeaderSize is the number of bytes in the framing
@@ -390,7 +394,7 @@
//go:nosplit
func (l *debugLogWriter) ensure(n uint64) {
- for l.write+n >= l.r.begin+uint64(len(l.data)) {
+ for l.write+n >= l.r.begin+uint64(len(l.data.b)) {
// Consume record at begin.
if l.r.skip() == ^uint64(0) {
// Wrapped around within a record.
@@ -406,8 +410,8 @@
//go:nosplit
func (l *debugLogWriter) writeFrameAt(pos, size uint64) bool {
- l.data[pos%uint64(len(l.data))] = uint8(size)
- l.data[(pos+1)%uint64(len(l.data))] = uint8(size >> 8)
+ l.data.b[pos%uint64(len(l.data.b))] = uint8(size)
+ l.data.b[(pos+1)%uint64(len(l.data.b))] = uint8(size >> 8)
return size <= 0xFFFF
}
@@ -441,7 +445,7 @@
l.ensure(1)
pos := l.write
l.write++
- l.data[pos%uint64(len(l.data))] = x
+ l.data.b[pos%uint64(len(l.data.b))] = x
}
//go:nosplit
@@ -450,7 +454,7 @@
pos := l.write
l.write += uint64(len(x))
for len(x) > 0 {
- n := copy(l.data[pos%uint64(len(l.data)):], x)
+ n := copy(l.data.b[pos%uint64(len(l.data.b)):], x)
pos += uint64(n)
x = x[n:]
}
@@ -513,15 +517,15 @@
//go:nosplit
func (r *debugLogReader) readUint16LEAt(pos uint64) uint16 {
- return uint16(r.data[pos%uint64(len(r.data))]) |
- uint16(r.data[(pos+1)%uint64(len(r.data))])<<8
+ return uint16(r.data.b[pos%uint64(len(r.data.b))]) |
+ uint16(r.data.b[(pos+1)%uint64(len(r.data.b))])<<8
}
//go:nosplit
func (r *debugLogReader) readUint64LEAt(pos uint64) uint64 {
var b [8]byte
for i := range b {
- b[i] = r.data[pos%uint64(len(r.data))]
+ b[i] = r.data.b[pos%uint64(len(r.data.b))]
pos++
}
return uint64(b[0]) | uint64(b[1])<<8 |
@@ -557,7 +561,7 @@
pos := r.begin + debugLogHeaderSize
var u uint64
for i := uint(0); ; i += 7 {
- b := r.data[pos%uint64(len(r.data))]
+ b := r.data.b[pos%uint64(len(r.data.b))]
pos++
u |= uint64(b&^0x80) << i
if b&0x80 == 0 {
@@ -588,7 +592,7 @@
func (r *debugLogReader) uvarint() uint64 {
var u uint64
for i := uint(0); ; i += 7 {
- b := r.data[r.begin%uint64(len(r.data))]
+ b := r.data.b[r.begin%uint64(len(r.data.b))]
r.begin++
u |= uint64(b&^0x80) << i
if b&0x80 == 0 {
@@ -610,7 +614,7 @@
}
func (r *debugLogReader) printVal() bool {
- typ := r.data[r.begin%uint64(len(r.data))]
+ typ := r.data.b[r.begin%uint64(len(r.data.b))]
r.begin++
switch typ {
@@ -644,7 +648,7 @@
break
}
for sl > 0 {
- b := r.data[r.begin%uint64(len(r.data)):]
+ b := r.data.b[r.begin%uint64(len(r.data.b)):]
if uint64(len(b)) > sl {
b = b[:sl]
}
@@ -656,6 +660,8 @@
case debugLogConstString:
len, ptr := int(r.uvarint()), uintptr(r.uvarint())
ptr += firstmoduledata.etext
+ // We can't use unsafe.String as it may panic, which isn't safe
+ // in this (potentially) nowritebarrier context.
str := stringStruct{
str: unsafe.Pointer(ptr),
len: len,
diff --git a/src/runtime/debuglog_test.go b/src/runtime/debuglog_test.go
index 2570e35..18c54a8 100644
--- a/src/runtime/debuglog_test.go
+++ b/src/runtime/debuglog_test.go
@@ -23,8 +23,8 @@
package runtime_test
import (
- "bytes"
"fmt"
+ "internal/testenv"
"regexp"
"runtime"
"strings"
@@ -94,7 +94,7 @@
}
wg.Done()
}()
- var want bytes.Buffer
+ var want strings.Builder
for i := 0; i < 1000; i++ {
runtime.Dlog().I(i).End()
fmt.Fprintf(&want, "[] %d\n", i)
@@ -122,7 +122,7 @@
runtime.ResetDebugLog()
var longString = strings.Repeat("a", 128)
- var want bytes.Buffer
+ var want strings.Builder
for i, j := 0, 0; j < 2*runtime.DebugLogBytes; i, j = i+1, j+len(longString) {
runtime.Dlog().I(i).S(longString).End()
fmt.Fprintf(&want, "[] %d %s\n", i, longString)
@@ -156,3 +156,14 @@
t.Fatalf("want %q, got %q", want, got)
}
}
+
+// TestDebugLogBuild verifies that the runtime builds with -tags=debuglog.
+func TestDebugLogBuild(t *testing.T) {
+ testenv.MustHaveGoBuild(t)
+
+ // It doesn't matter which program we build, anything will rebuild the
+ // runtime.
+ if _, err := buildTestProg(t, "testprog", "-tags=debuglog"); err != nil {
+ t.Fatal(err)
+ }
+}
diff --git a/src/runtime/defs1_netbsd_386.go b/src/runtime/defs1_netbsd_386.go
index b6e47a0..f7fe45b 100644
--- a/src/runtime/defs1_netbsd_386.go
+++ b/src/runtime/defs1_netbsd_386.go
@@ -8,7 +8,10 @@
_EFAULT = 0xe
_EAGAIN = 0x23
+ _O_WRONLY = 0x1
_O_NONBLOCK = 0x4
+ _O_CREAT = 0x200
+ _O_TRUNC = 0x400
_O_CLOEXEC = 0x400000
_PROT_NONE = 0x0
@@ -20,7 +23,8 @@
_MAP_PRIVATE = 0x2
_MAP_FIXED = 0x10
- _MADV_FREE = 0x6
+ _MADV_DONTNEED = 0x4
+ _MADV_FREE = 0x6
_SA_SIGINFO = 0x40
_SA_RESTART = 0x2
diff --git a/src/runtime/defs1_netbsd_amd64.go b/src/runtime/defs1_netbsd_amd64.go
index b8292fa..80908cd 100644
--- a/src/runtime/defs1_netbsd_amd64.go
+++ b/src/runtime/defs1_netbsd_amd64.go
@@ -8,7 +8,10 @@
_EFAULT = 0xe
_EAGAIN = 0x23
+ _O_WRONLY = 0x1
_O_NONBLOCK = 0x4
+ _O_CREAT = 0x200
+ _O_TRUNC = 0x400
_O_CLOEXEC = 0x400000
_PROT_NONE = 0x0
@@ -20,7 +23,8 @@
_MAP_PRIVATE = 0x2
_MAP_FIXED = 0x10
- _MADV_FREE = 0x6
+ _MADV_DONTNEED = 0x4
+ _MADV_FREE = 0x6
_SA_SIGINFO = 0x40
_SA_RESTART = 0x2
diff --git a/src/runtime/defs1_netbsd_arm.go b/src/runtime/defs1_netbsd_arm.go
index d2cb486..c63e592 100644
--- a/src/runtime/defs1_netbsd_arm.go
+++ b/src/runtime/defs1_netbsd_arm.go
@@ -8,7 +8,10 @@
_EFAULT = 0xe
_EAGAIN = 0x23
+ _O_WRONLY = 0x1
_O_NONBLOCK = 0x4
+ _O_CREAT = 0x200
+ _O_TRUNC = 0x400
_O_CLOEXEC = 0x400000
_PROT_NONE = 0x0
@@ -20,7 +23,8 @@
_MAP_PRIVATE = 0x2
_MAP_FIXED = 0x10
- _MADV_FREE = 0x6
+ _MADV_DONTNEED = 0x4
+ _MADV_FREE = 0x6
_SA_SIGINFO = 0x40
_SA_RESTART = 0x2
diff --git a/src/runtime/defs1_netbsd_arm64.go b/src/runtime/defs1_netbsd_arm64.go
index 7776fe1..804b5b0 100644
--- a/src/runtime/defs1_netbsd_arm64.go
+++ b/src/runtime/defs1_netbsd_arm64.go
@@ -8,7 +8,10 @@
_EFAULT = 0xe
_EAGAIN = 0x23
+ _O_WRONLY = 0x1
_O_NONBLOCK = 0x4
+ _O_CREAT = 0x200
+ _O_TRUNC = 0x400
_O_CLOEXEC = 0x400000
_PROT_NONE = 0x0
@@ -20,7 +23,8 @@
_MAP_PRIVATE = 0x2
_MAP_FIXED = 0x10
- _MADV_FREE = 0x6
+ _MADV_DONTNEED = 0x4
+ _MADV_FREE = 0x6
_SA_SIGINFO = 0x40
_SA_RESTART = 0x2
diff --git a/src/runtime/defs1_solaris_amd64.go b/src/runtime/defs1_solaris_amd64.go
index 3c13f33..bb53c22 100644
--- a/src/runtime/defs1_solaris_amd64.go
+++ b/src/runtime/defs1_solaris_amd64.go
@@ -23,7 +23,8 @@
_MAP_PRIVATE = 0x2
_MAP_FIXED = 0x10
- _MADV_FREE = 0x5
+ _MADV_DONTNEED = 0x4
+ _MADV_FREE = 0x5
_SA_SIGINFO = 0x8
_SA_RESTART = 0x4
@@ -90,7 +91,10 @@
_MAXHOSTNAMELEN = 0x100
+ _O_WRONLY = 0x1
_O_NONBLOCK = 0x80
+ _O_TRUNC = 0x200
+ _O_CREAT = 0x100
_O_CLOEXEC = 0x800000
_FD_CLOEXEC = 0x1
_F_GETFL = 0x3
diff --git a/src/runtime/defs2_linux.go b/src/runtime/defs2_linux.go
index 41ad735..5d6730a 100644
--- a/src/runtime/defs2_linux.go
+++ b/src/runtime/defs2_linux.go
@@ -121,17 +121,6 @@
O_RDONLY = C.O_RDONLY
O_CLOEXEC = C.O_CLOEXEC
-
- EPOLLIN = C.POLLIN
- EPOLLOUT = C.POLLOUT
- EPOLLERR = C.POLLERR
- EPOLLHUP = C.POLLHUP
- EPOLLRDHUP = C.POLLRDHUP
- EPOLLET = C.EPOLLET
- EPOLL_CLOEXEC = C.EPOLL_CLOEXEC
- EPOLL_CTL_ADD = C.EPOLL_CTL_ADD
- EPOLL_CTL_DEL = C.EPOLL_CTL_DEL
- EPOLL_CTL_MOD = C.EPOLL_CTL_MOD
)
type Fpreg C.struct__fpreg
diff --git a/src/runtime/defs_aix.go b/src/runtime/defs_aix.go
index b794cd5..3895989 100644
--- a/src/runtime/defs_aix.go
+++ b/src/runtime/defs_aix.go
@@ -124,7 +124,10 @@
_ITIMER_PROF = C.ITIMER_PROF
_O_RDONLY = C.O_RDONLY
+ _O_WRONLY = C.O_WRONLY
_O_NONBLOCK = C.O_NONBLOCK
+ _O_CREAT = C.O_CREAT
+ _O_TRUNC = C.O_TRUNC
_SS_DISABLE = C.SS_DISABLE
_SI_USER = C.SI_USER
diff --git a/src/runtime/defs_aix_ppc64.go b/src/runtime/defs_aix_ppc64.go
index 4e20c85..2d25b7c 100644
--- a/src/runtime/defs_aix_ppc64.go
+++ b/src/runtime/defs_aix_ppc64.go
@@ -81,7 +81,10 @@
_ITIMER_PROF = 0x2
_O_RDONLY = 0x0
+ _O_WRONLY = 0x1
_O_NONBLOCK = 0x4
+ _O_CREAT = 0x100
+ _O_TRUNC = 0x200
_SS_DISABLE = 0x2
_SI_USER = 0x0
diff --git a/src/runtime/defs_darwin.go b/src/runtime/defs_darwin.go
index 59b81cf..89e4253 100644
--- a/src/runtime/defs_darwin.go
+++ b/src/runtime/defs_darwin.go
@@ -120,7 +120,10 @@
F_SETFL = C.F_SETFL
FD_CLOEXEC = C.FD_CLOEXEC
+ O_WRONLY = C.O_WRONLY
O_NONBLOCK = C.O_NONBLOCK
+ O_CREAT = C.O_CREAT
+ O_TRUNC = C.O_TRUNC
)
type StackT C.struct_sigaltstack
diff --git a/src/runtime/defs_darwin_amd64.go b/src/runtime/defs_darwin_amd64.go
index cbc26bf..84e6f37 100644
--- a/src/runtime/defs_darwin_amd64.go
+++ b/src/runtime/defs_darwin_amd64.go
@@ -99,7 +99,10 @@
_F_SETFL = 0x4
_FD_CLOEXEC = 0x1
- _O_NONBLOCK = 4
+ _O_WRONLY = 0x1
+ _O_NONBLOCK = 0x4
+ _O_CREAT = 0x200
+ _O_TRUNC = 0x400
)
type stackt struct {
diff --git a/src/runtime/defs_darwin_arm64.go b/src/runtime/defs_darwin_arm64.go
index 9076e8b..30d7443 100644
--- a/src/runtime/defs_darwin_arm64.go
+++ b/src/runtime/defs_darwin_arm64.go
@@ -101,7 +101,10 @@
_F_SETFL = 0x4
_FD_CLOEXEC = 0x1
- _O_NONBLOCK = 4
+ _O_WRONLY = 0x1
+ _O_NONBLOCK = 0x4
+ _O_CREAT = 0x200
+ _O_TRUNC = 0x400
)
type stackt struct {
diff --git a/src/runtime/defs_dragonfly.go b/src/runtime/defs_dragonfly.go
index 952163b..9dcfdf0 100644
--- a/src/runtime/defs_dragonfly.go
+++ b/src/runtime/defs_dragonfly.go
@@ -32,7 +32,10 @@
EBUSY = C.EBUSY
EAGAIN = C.EAGAIN
+ O_WRONLY = C.O_WRONLY
O_NONBLOCK = C.O_NONBLOCK
+ O_CREAT = C.O_CREAT
+ O_TRUNC = C.O_TRUNC
O_CLOEXEC = C.O_CLOEXEC
PROT_NONE = C.PROT_NONE
@@ -44,7 +47,8 @@
MAP_PRIVATE = C.MAP_PRIVATE
MAP_FIXED = C.MAP_FIXED
- MADV_FREE = C.MADV_FREE
+ MADV_DONTNEED = C.MADV_DONTNEED
+ MADV_FREE = C.MADV_FREE
SA_SIGINFO = C.SA_SIGINFO
SA_RESTART = C.SA_RESTART
diff --git a/src/runtime/defs_dragonfly_amd64.go b/src/runtime/defs_dragonfly_amd64.go
index 4358c1e..f1a2302 100644
--- a/src/runtime/defs_dragonfly_amd64.go
+++ b/src/runtime/defs_dragonfly_amd64.go
@@ -11,7 +11,10 @@
_EBUSY = 0x10
_EAGAIN = 0x23
+ _O_WRONLY = 0x1
_O_NONBLOCK = 0x4
+ _O_CREAT = 0x200
+ _O_TRUNC = 0x400
_O_CLOEXEC = 0x20000
_PROT_NONE = 0x0
@@ -23,7 +26,8 @@
_MAP_PRIVATE = 0x2
_MAP_FIXED = 0x10
- _MADV_FREE = 0x5
+ _MADV_DONTNEED = 0x4
+ _MADV_FREE = 0x5
_SA_SIGINFO = 0x40
_SA_RESTART = 0x2
diff --git a/src/runtime/defs_freebsd.go b/src/runtime/defs_freebsd.go
index 3fbd580..d86ae91 100644
--- a/src/runtime/defs_freebsd.go
+++ b/src/runtime/defs_freebsd.go
@@ -16,10 +16,11 @@
/*
#include <sys/types.h>
+#include <unistd.h>
+#include <fcntl.h>
#include <sys/time.h>
#include <signal.h>
#include <errno.h>
-#define _WANT_FREEBSD11_KEVENT 1
#include <sys/event.h>
#include <sys/mman.h>
#include <sys/ucontext.h>
@@ -45,11 +46,15 @@
)
const (
- EINTR = C.EINTR
- EFAULT = C.EFAULT
- EAGAIN = C.EAGAIN
+ EINTR = C.EINTR
+ EFAULT = C.EFAULT
+ EAGAIN = C.EAGAIN
+ ETIMEDOUT = C.ETIMEDOUT
+ O_WRONLY = C.O_WRONLY
O_NONBLOCK = C.O_NONBLOCK
+ O_CREAT = C.O_CREAT
+ O_TRUNC = C.O_TRUNC
O_CLOEXEC = C.O_CLOEXEC
PROT_NONE = C.PROT_NONE
@@ -62,7 +67,8 @@
MAP_PRIVATE = C.MAP_PRIVATE
MAP_FIXED = C.MAP_FIXED
- MADV_FREE = C.MADV_FREE
+ MADV_DONTNEED = C.MADV_DONTNEED
+ MADV_FREE = C.MADV_FREE
SA_SIGINFO = C.SA_SIGINFO
SA_RESTART = C.SA_RESTART
@@ -154,7 +160,7 @@
type Umtx_time C.struct__umtx_time
-type Kevent C.struct_kevent_freebsd11
+type KeventT C.struct_kevent
type bintime C.struct_bintime
type vdsoTimehands C.struct_vdso_timehands
diff --git a/src/runtime/defs_freebsd_386.go b/src/runtime/defs_freebsd_386.go
index ff4dcfa..ee82741 100644
--- a/src/runtime/defs_freebsd_386.go
+++ b/src/runtime/defs_freebsd_386.go
@@ -1,5 +1,6 @@
-// created by cgo -cdefs and then converted to Go
-// cgo -cdefs defs_freebsd.go
+// Code generated by cgo, then manually converted into appropriate naming and code
+// for the Go runtime.
+// go tool cgo -godefs defs_freebsd.go
package runtime
@@ -18,7 +19,10 @@
_EAGAIN = 0x23
_ETIMEDOUT = 0x3c
+ _O_WRONLY = 0x1
_O_NONBLOCK = 0x4
+ _O_CREAT = 0x200
+ _O_TRUNC = 0x400
_O_CLOEXEC = 0x100000
_PROT_NONE = 0x0
@@ -31,7 +35,8 @@
_MAP_PRIVATE = 0x2
_MAP_FIXED = 0x10
- _MADV_FREE = 0x5
+ _MADV_DONTNEED = 0x4
+ _MADV_FREE = 0x5
_SA_SIGINFO = 0x40
_SA_RESTART = 0x2
@@ -228,8 +233,9 @@
filter int16
flags uint16
fflags uint32
- data int32
+ data int64
udata *byte
+ ext [4]uint64
}
type bintime struct {
diff --git a/src/runtime/defs_freebsd_amd64.go b/src/runtime/defs_freebsd_amd64.go
index f537c89..9003f92 100644
--- a/src/runtime/defs_freebsd_amd64.go
+++ b/src/runtime/defs_freebsd_amd64.go
@@ -1,5 +1,6 @@
-// created by cgo -cdefs and then converted to Go
-// cgo -cdefs defs_freebsd.go
+// Code generated by cgo, then manually converted into appropriate naming and code
+// for the Go runtime.
+// go tool cgo -godefs defs_freebsd.go
package runtime
@@ -18,7 +19,10 @@
_EAGAIN = 0x23
_ETIMEDOUT = 0x3c
+ _O_WRONLY = 0x1
_O_NONBLOCK = 0x4
+ _O_CREAT = 0x200
+ _O_TRUNC = 0x400
_O_CLOEXEC = 0x100000
_PROT_NONE = 0x0
@@ -31,7 +35,8 @@
_MAP_PRIVATE = 0x2
_MAP_FIXED = 0x10
- _MADV_FREE = 0x5
+ _MADV_DONTNEED = 0x4
+ _MADV_FREE = 0x5
_SA_SIGINFO = 0x40
_SA_RESTART = 0x2
@@ -241,6 +246,7 @@
fflags uint32
data int64
udata *byte
+ ext [4]uint64
}
type bintime struct {
diff --git a/src/runtime/defs_freebsd_arm.go b/src/runtime/defs_freebsd_arm.go
index 2e20ae7..68cc1b9 100644
--- a/src/runtime/defs_freebsd_arm.go
+++ b/src/runtime/defs_freebsd_arm.go
@@ -1,5 +1,6 @@
-// created by cgo -cdefs and then converted to Go
-// cgo -cdefs defs_freebsd.go
+// Code generated by cgo, then manually converted into appropriate naming and code
+// for the Go runtime.
+// go tool cgo -godefs defs_freebsd.go
package runtime
@@ -18,7 +19,10 @@
_EAGAIN = 0x23
_ETIMEDOUT = 0x3c
+ _O_WRONLY = 0x1
_O_NONBLOCK = 0x4
+ _O_CREAT = 0x200
+ _O_TRUNC = 0x400
_O_CLOEXEC = 0x100000
_PROT_NONE = 0x0
@@ -31,7 +35,8 @@
_MAP_PRIVATE = 0x2
_MAP_FIXED = 0x10
- _MADV_FREE = 0x5
+ _MADV_DONTNEED = 0x4
+ _MADV_FREE = 0x5
_SA_SIGINFO = 0x40
_SA_RESTART = 0x2
@@ -197,12 +202,15 @@
}
type keventt struct {
- ident uint32
- filter int16
- flags uint16
- fflags uint32
- data int32
- udata *byte
+ ident uint32
+ filter int16
+ flags uint16
+ fflags uint32
+ pad_cgo_0 [4]byte
+ data int64
+ udata *byte
+ pad_cgo_1 [4]byte
+ ext [4]uint64
}
type bintime struct {
diff --git a/src/runtime/defs_freebsd_arm64.go b/src/runtime/defs_freebsd_arm64.go
index 1838108..1d67236 100644
--- a/src/runtime/defs_freebsd_arm64.go
+++ b/src/runtime/defs_freebsd_arm64.go
@@ -1,5 +1,6 @@
-// created by cgo -cdefs and then converted to Go
-// cgo -cdefs defs_freebsd.go
+// Code generated by cgo, then manually converted into appropriate naming and code
+// for the Go runtime.
+// go tool cgo -godefs defs_freebsd.go
package runtime
@@ -18,7 +19,10 @@
_EAGAIN = 0x23
_ETIMEDOUT = 0x3c
+ _O_WRONLY = 0x1
_O_NONBLOCK = 0x4
+ _O_CREAT = 0x200
+ _O_TRUNC = 0x400
_O_CLOEXEC = 0x100000
_PROT_NONE = 0x0
@@ -31,7 +35,8 @@
_MAP_PRIVATE = 0x2
_MAP_FIXED = 0x10
- _MADV_FREE = 0x5
+ _MADV_DONTNEED = 0x4
+ _MADV_FREE = 0x5
_SA_SIGINFO = 0x40
_SA_RESTART = 0x2
@@ -225,6 +230,7 @@
fflags uint32
data int64
udata *byte
+ ext [4]uint64
}
type bintime struct {
diff --git a/src/runtime/defs_freebsd_riscv64.go b/src/runtime/defs_freebsd_riscv64.go
new file mode 100644
index 0000000..b977bde
--- /dev/null
+++ b/src/runtime/defs_freebsd_riscv64.go
@@ -0,0 +1,266 @@
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_freebsd.go
+
+package runtime
+
+import "unsafe"
+
+const (
+ _NBBY = 0x8
+ _CTL_MAXNAME = 0x18
+ _CPU_LEVEL_WHICH = 0x3
+ _CPU_WHICH_PID = 0x2
+)
+
+const (
+ _EINTR = 0x4
+ _EFAULT = 0xe
+ _EAGAIN = 0x23
+ _ETIMEDOUT = 0x3c
+
+ _O_WRONLY = 0x1
+ _O_NONBLOCK = 0x4
+ _O_CREAT = 0x200
+ _O_TRUNC = 0x400
+ _O_CLOEXEC = 0x100000
+
+ _PROT_NONE = 0x0
+ _PROT_READ = 0x1
+ _PROT_WRITE = 0x2
+ _PROT_EXEC = 0x4
+
+ _MAP_ANON = 0x1000
+ _MAP_SHARED = 0x1
+ _MAP_PRIVATE = 0x2
+ _MAP_FIXED = 0x10
+
+ _MADV_DONTNEED = 0x4
+ _MADV_FREE = 0x5
+
+ _SA_SIGINFO = 0x40
+ _SA_RESTART = 0x2
+ _SA_ONSTACK = 0x1
+
+ _CLOCK_MONOTONIC = 0x4
+ _CLOCK_REALTIME = 0x0
+
+ _UMTX_OP_WAIT_UINT = 0xb
+ _UMTX_OP_WAIT_UINT_PRIVATE = 0xf
+ _UMTX_OP_WAKE = 0x3
+ _UMTX_OP_WAKE_PRIVATE = 0x10
+
+ _SIGHUP = 0x1
+ _SIGINT = 0x2
+ _SIGQUIT = 0x3
+ _SIGILL = 0x4
+ _SIGTRAP = 0x5
+ _SIGABRT = 0x6
+ _SIGEMT = 0x7
+ _SIGFPE = 0x8
+ _SIGKILL = 0x9
+ _SIGBUS = 0xa
+ _SIGSEGV = 0xb
+ _SIGSYS = 0xc
+ _SIGPIPE = 0xd
+ _SIGALRM = 0xe
+ _SIGTERM = 0xf
+ _SIGURG = 0x10
+ _SIGSTOP = 0x11
+ _SIGTSTP = 0x12
+ _SIGCONT = 0x13
+ _SIGCHLD = 0x14
+ _SIGTTIN = 0x15
+ _SIGTTOU = 0x16
+ _SIGIO = 0x17
+ _SIGXCPU = 0x18
+ _SIGXFSZ = 0x19
+ _SIGVTALRM = 0x1a
+ _SIGPROF = 0x1b
+ _SIGWINCH = 0x1c
+ _SIGINFO = 0x1d
+ _SIGUSR1 = 0x1e
+ _SIGUSR2 = 0x1f
+
+ _FPE_INTDIV = 0x2
+ _FPE_INTOVF = 0x1
+ _FPE_FLTDIV = 0x3
+ _FPE_FLTOVF = 0x4
+ _FPE_FLTUND = 0x5
+ _FPE_FLTRES = 0x6
+ _FPE_FLTINV = 0x7
+ _FPE_FLTSUB = 0x8
+
+ _BUS_ADRALN = 0x1
+ _BUS_ADRERR = 0x2
+ _BUS_OBJERR = 0x3
+
+ _SEGV_MAPERR = 0x1
+ _SEGV_ACCERR = 0x2
+
+ _ITIMER_REAL = 0x0
+ _ITIMER_VIRTUAL = 0x1
+ _ITIMER_PROF = 0x2
+
+ _EV_ADD = 0x1
+ _EV_DELETE = 0x2
+ _EV_CLEAR = 0x20
+ _EV_RECEIPT = 0x40
+ _EV_ERROR = 0x4000
+ _EV_EOF = 0x8000
+ _EVFILT_READ = -0x1
+ _EVFILT_WRITE = -0x2
+)
+
+type rtprio struct {
+ _type uint16
+ prio uint16
+}
+
+type thrparam struct {
+ start_func uintptr
+ arg unsafe.Pointer
+ stack_base uintptr
+ stack_size uintptr
+ tls_base unsafe.Pointer
+ tls_size uintptr
+ child_tid unsafe.Pointer // *int64
+ parent_tid *int64
+ flags int32
+ pad_cgo_0 [4]byte
+ rtp *rtprio
+ spare [3]uintptr
+}
+
+type thread int64 // long
+
+type sigset struct {
+ __bits [4]uint32
+}
+
+type stackt struct {
+ ss_sp uintptr
+ ss_size uintptr
+ ss_flags int32
+ pad_cgo_0 [4]byte
+}
+
+type siginfo struct {
+ si_signo int32
+ si_errno int32
+ si_code int32
+ si_pid int32
+ si_uid uint32
+ si_status int32
+ si_addr uint64
+ si_value [8]byte
+ _reason [40]byte
+}
+
+type gpregs struct {
+ gp_ra uint64
+ gp_sp uint64
+ gp_gp uint64
+ gp_tp uint64
+ gp_t [7]uint64
+ gp_s [12]uint64
+ gp_a [8]uint64
+ gp_sepc uint64
+ gp_sstatus uint64
+}
+
+type fpregs struct {
+ fp_x [64]uint64 // actually __uint64_t fp_x[32][2]
+ fp_fcsr uint64
+ fp_flags int32
+ pad int32
+}
+
+type mcontext struct {
+ mc_gpregs gpregs
+ mc_fpregs fpregs
+ mc_flags int32
+ mc_pad int32
+ mc_spare [8]uint64
+}
+
+type ucontext struct {
+ uc_sigmask sigset
+ uc_mcontext mcontext
+ uc_link *ucontext
+ uc_stack stackt
+ uc_flags int32
+ __spare__ [4]int32
+ pad_cgo_0 [12]byte
+}
+
+type timespec struct {
+ tv_sec int64
+ tv_nsec int64
+}
+
+//go:nosplit
+func (ts *timespec) setNsec(ns int64) {
+ ts.tv_sec = ns / 1e9
+ ts.tv_nsec = ns % 1e9
+}
+
+type timeval struct {
+ tv_sec int64
+ tv_usec int64
+}
+
+func (tv *timeval) set_usec(x int32) {
+ tv.tv_usec = int64(x)
+}
+
+type itimerval struct {
+ it_interval timeval
+ it_value timeval
+}
+
+type umtx_time struct {
+ _timeout timespec
+ _flags uint32
+ _clockid uint32
+}
+
+type keventt struct {
+ ident uint64
+ filter int16
+ flags uint16
+ fflags uint32
+ data int64
+ udata *byte
+ ext [4]uint64
+}
+
+type bintime struct {
+ sec int64
+ frac uint64
+}
+
+type vdsoTimehands struct {
+ algo uint32
+ gen uint32
+ scale uint64
+ offset_count uint32
+ counter_mask uint32
+ offset bintime
+ boottime bintime
+ physical uint32
+ res [7]uint32
+}
+
+type vdsoTimekeep struct {
+ ver uint32
+ enabled uint32
+ current uint32
+ pad_cgo_0 [4]byte
+}
+
+const (
+ _VDSO_TK_VER_CURR = 0x1
+
+ vdsoTimehandsSize = 0x58
+ vdsoTimekeepSize = 0x10
+)
diff --git a/src/runtime/defs_linux.go b/src/runtime/defs_linux.go
index e55bb6b..296fcb4 100644
--- a/src/runtime/defs_linux.go
+++ b/src/runtime/defs_linux.go
@@ -115,17 +115,6 @@
CLOCK_THREAD_CPUTIME_ID = C.CLOCK_THREAD_CPUTIME_ID
SIGEV_THREAD_ID = C.SIGEV_THREAD_ID
-
- EPOLLIN = C.POLLIN
- EPOLLOUT = C.POLLOUT
- EPOLLERR = C.POLLERR
- EPOLLHUP = C.POLLHUP
- EPOLLRDHUP = C.POLLRDHUP
- EPOLLET = C.EPOLLET
- EPOLL_CLOEXEC = C.EPOLL_CLOEXEC
- EPOLL_CTL_ADD = C.EPOLL_CTL_ADD
- EPOLL_CTL_DEL = C.EPOLL_CTL_DEL
- EPOLL_CTL_MOD = C.EPOLL_CTL_MOD
)
type Sigset C.sigset_t
@@ -136,4 +125,3 @@
type Itimerspec C.struct_itimerspec
type Itimerval C.struct_itimerval
type Sigevent C.struct_sigevent
-type EpollEvent C.struct_epoll_event
diff --git a/src/runtime/defs_linux_386.go b/src/runtime/defs_linux_386.go
index 5376bde..72339f4 100644
--- a/src/runtime/defs_linux_386.go
+++ b/src/runtime/defs_linux_386.go
@@ -90,20 +90,12 @@
_SIGEV_THREAD_ID = 0x4
_O_RDONLY = 0x0
+ _O_WRONLY = 0x1
+ _O_CREAT = 0x40
+ _O_TRUNC = 0x200
_O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000
- _EPOLLIN = 0x1
- _EPOLLOUT = 0x4
- _EPOLLERR = 0x8
- _EPOLLHUP = 0x10
- _EPOLLRDHUP = 0x2000
- _EPOLLET = 0x80000000
- _EPOLL_CLOEXEC = 0x80000
- _EPOLL_CTL_ADD = 0x1
- _EPOLL_CTL_DEL = 0x2
- _EPOLL_CTL_MOD = 0x3
-
_AF_UNIX = 0x1
_SOCK_DGRAM = 0x2
)
@@ -254,11 +246,6 @@
_ [_sigev_max_size - unsafe.Sizeof(sigeventFields{})]byte
}
-type epollevent struct {
- events uint32
- data [8]byte // to match amd64
-}
-
type sockaddr_un struct {
family uint16
path [108]byte
diff --git a/src/runtime/defs_linux_amd64.go b/src/runtime/defs_linux_amd64.go
index da4d357..298f3eb 100644
--- a/src/runtime/defs_linux_amd64.go
+++ b/src/runtime/defs_linux_amd64.go
@@ -89,17 +89,6 @@
_SIGEV_THREAD_ID = 0x4
- _EPOLLIN = 0x1
- _EPOLLOUT = 0x4
- _EPOLLERR = 0x8
- _EPOLLHUP = 0x10
- _EPOLLRDHUP = 0x2000
- _EPOLLET = 0x80000000
- _EPOLL_CLOEXEC = 0x80000
- _EPOLL_CTL_ADD = 0x1
- _EPOLL_CTL_DEL = 0x2
- _EPOLL_CTL_MOD = 0x3
-
_AF_UNIX = 0x1
_SOCK_DGRAM = 0x2
)
@@ -171,16 +160,14 @@
_ [_sigev_max_size - unsafe.Sizeof(sigeventFields{})]byte
}
-type epollevent struct {
- events uint32
- data [8]byte // unaligned uintptr
-}
-
// created by cgo -cdefs and then converted to Go
// cgo -cdefs defs_linux.go defs1_linux.go
const (
_O_RDONLY = 0x0
+ _O_WRONLY = 0x1
+ _O_CREAT = 0x40
+ _O_TRUNC = 0x200
_O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000
)
diff --git a/src/runtime/defs_linux_arm.go b/src/runtime/defs_linux_arm.go
index 18aa093..6fee57d 100644
--- a/src/runtime/defs_linux_arm.go
+++ b/src/runtime/defs_linux_arm.go
@@ -80,6 +80,9 @@
_ITIMER_PROF = 0x2
_ITIMER_VIRTUAL = 0x1
_O_RDONLY = 0
+ _O_WRONLY = 0x1
+ _O_CREAT = 0x40
+ _O_TRUNC = 0x200
_O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000
@@ -87,17 +90,6 @@
_SIGEV_THREAD_ID = 0x4
- _EPOLLIN = 0x1
- _EPOLLOUT = 0x4
- _EPOLLERR = 0x8
- _EPOLLHUP = 0x10
- _EPOLLRDHUP = 0x2000
- _EPOLLET = 0x80000000
- _EPOLL_CLOEXEC = 0x80000
- _EPOLL_CTL_ADD = 0x1
- _EPOLL_CTL_DEL = 0x2
- _EPOLL_CTL_MOD = 0x3
-
_AF_UNIX = 0x1
_SOCK_DGRAM = 0x2
)
@@ -208,12 +200,6 @@
sa_mask uint64
}
-type epollevent struct {
- events uint32
- _pad uint32
- data [8]byte // to match amd64
-}
-
type sockaddr_un struct {
family uint16
path [108]byte
diff --git a/src/runtime/defs_linux_arm64.go b/src/runtime/defs_linux_arm64.go
index c5d7d7e..0216096 100644
--- a/src/runtime/defs_linux_arm64.go
+++ b/src/runtime/defs_linux_arm64.go
@@ -89,17 +89,6 @@
_SIGEV_THREAD_ID = 0x4
- _EPOLLIN = 0x1
- _EPOLLOUT = 0x4
- _EPOLLERR = 0x8
- _EPOLLHUP = 0x10
- _EPOLLRDHUP = 0x2000
- _EPOLLET = 0x80000000
- _EPOLL_CLOEXEC = 0x80000
- _EPOLL_CTL_ADD = 0x1
- _EPOLL_CTL_DEL = 0x2
- _EPOLL_CTL_MOD = 0x3
-
_AF_UNIX = 0x1
_SOCK_DGRAM = 0x2
)
@@ -171,17 +160,14 @@
_ [_sigev_max_size - unsafe.Sizeof(sigeventFields{})]byte
}
-type epollevent struct {
- events uint32
- _pad uint32
- data [8]byte // to match amd64
-}
-
// Created by cgo -cdefs and then converted to Go by hand
// ../cmd/cgo/cgo -cdefs defs_linux.go defs1_linux.go defs2_linux.go
const (
_O_RDONLY = 0x0
+ _O_WRONLY = 0x1
+ _O_CREAT = 0x40
+ _O_TRUNC = 0x200
_O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000
)
diff --git a/src/runtime/defs_linux_loong64.go b/src/runtime/defs_linux_loong64.go
index dda4009..6eca18b 100644
--- a/src/runtime/defs_linux_loong64.go
+++ b/src/runtime/defs_linux_loong64.go
@@ -89,17 +89,6 @@
_CLOCK_THREAD_CPUTIME_ID = 0x3
_SIGEV_THREAD_ID = 0x4
-
- _EPOLLIN = 0x1
- _EPOLLOUT = 0x4
- _EPOLLERR = 0x8
- _EPOLLHUP = 0x10
- _EPOLLRDHUP = 0x2000
- _EPOLLET = 0x80000000
- _EPOLL_CLOEXEC = 0x80000
- _EPOLL_CTL_ADD = 0x1
- _EPOLL_CTL_DEL = 0x2
- _EPOLL_CTL_MOD = 0x3
)
type timespec struct {
@@ -146,14 +135,11 @@
_ [_sigev_max_size - unsafe.Sizeof(sigeventFields{})]byte
}
-type epollevent struct {
- events uint32
- pad_cgo_0 [4]byte
- data [8]byte // unaligned uintptr
-}
-
const (
_O_RDONLY = 0x0
+ _O_WRONLY = 0x1
+ _O_CREAT = 0x40
+ _O_TRUNC = 0x200
_O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000
)
diff --git a/src/runtime/defs_linux_mips64x.go b/src/runtime/defs_linux_mips64x.go
index e645248..2e8c405 100644
--- a/src/runtime/defs_linux_mips64x.go
+++ b/src/runtime/defs_linux_mips64x.go
@@ -90,17 +90,6 @@
_CLOCK_THREAD_CPUTIME_ID = 0x3
_SIGEV_THREAD_ID = 0x4
-
- _EPOLLIN = 0x1
- _EPOLLOUT = 0x4
- _EPOLLERR = 0x8
- _EPOLLHUP = 0x10
- _EPOLLRDHUP = 0x2000
- _EPOLLET = 0x80000000
- _EPOLL_CLOEXEC = 0x80000
- _EPOLL_CTL_ADD = 0x1
- _EPOLL_CTL_DEL = 0x2
- _EPOLL_CTL_MOD = 0x3
)
//struct Sigset {
@@ -178,14 +167,11 @@
_ [_sigev_max_size - unsafe.Sizeof(sigeventFields{})]byte
}
-type epollevent struct {
- events uint32
- pad_cgo_0 [4]byte
- data [8]byte // unaligned uintptr
-}
-
const (
_O_RDONLY = 0x0
+ _O_WRONLY = 0x1
+ _O_CREAT = 0x100
+ _O_TRUNC = 0x200
_O_NONBLOCK = 0x80
_O_CLOEXEC = 0x80000
_SA_RESTORER = 0
diff --git a/src/runtime/defs_linux_mipsx.go b/src/runtime/defs_linux_mipsx.go
index 5afb6f4..7593600 100644
--- a/src/runtime/defs_linux_mipsx.go
+++ b/src/runtime/defs_linux_mipsx.go
@@ -90,17 +90,6 @@
_CLOCK_THREAD_CPUTIME_ID = 0x3
_SIGEV_THREAD_ID = 0x4
-
- _EPOLLIN = 0x1
- _EPOLLOUT = 0x4
- _EPOLLERR = 0x8
- _EPOLLHUP = 0x10
- _EPOLLRDHUP = 0x2000
- _EPOLLET = 0x80000000
- _EPOLL_CLOEXEC = 0x80000
- _EPOLL_CTL_ADD = 0x1
- _EPOLL_CTL_DEL = 0x2
- _EPOLL_CTL_MOD = 0x3
)
type timespec struct {
@@ -172,15 +161,12 @@
_ [_sigev_max_size - unsafe.Sizeof(sigeventFields{})]byte
}
-type epollevent struct {
- events uint32
- pad_cgo_0 [4]byte
- data uint64
-}
-
const (
_O_RDONLY = 0x0
+ _O_WRONLY = 0x1
_O_NONBLOCK = 0x80
+ _O_CREAT = 0x100
+ _O_TRUNC = 0x200
_O_CLOEXEC = 0x80000
_SA_RESTORER = 0
)
diff --git a/src/runtime/defs_linux_ppc64.go b/src/runtime/defs_linux_ppc64.go
index f3e305e..bb3ac01 100644
--- a/src/runtime/defs_linux_ppc64.go
+++ b/src/runtime/defs_linux_ppc64.go
@@ -87,17 +87,6 @@
_CLOCK_THREAD_CPUTIME_ID = 0x3
_SIGEV_THREAD_ID = 0x4
-
- _EPOLLIN = 0x1
- _EPOLLOUT = 0x4
- _EPOLLERR = 0x8
- _EPOLLHUP = 0x10
- _EPOLLRDHUP = 0x2000
- _EPOLLET = 0x80000000
- _EPOLL_CLOEXEC = 0x80000
- _EPOLL_CTL_ADD = 0x1
- _EPOLL_CTL_DEL = 0x2
- _EPOLL_CTL_MOD = 0x3
)
//struct Sigset {
@@ -172,17 +161,14 @@
_ [_sigev_max_size - unsafe.Sizeof(sigeventFields{})]byte
}
-type epollevent struct {
- events uint32
- pad_cgo_0 [4]byte
- data [8]byte // unaligned uintptr
-}
-
// created by cgo -cdefs and then converted to Go
// cgo -cdefs defs_linux.go defs3_linux.go
const (
_O_RDONLY = 0x0
+ _O_WRONLY = 0x1
+ _O_CREAT = 0x40
+ _O_TRUNC = 0x200
_O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000
_SA_RESTORER = 0
diff --git a/src/runtime/defs_linux_ppc64le.go b/src/runtime/defs_linux_ppc64le.go
index f3e305e..bb3ac01 100644
--- a/src/runtime/defs_linux_ppc64le.go
+++ b/src/runtime/defs_linux_ppc64le.go
@@ -87,17 +87,6 @@
_CLOCK_THREAD_CPUTIME_ID = 0x3
_SIGEV_THREAD_ID = 0x4
-
- _EPOLLIN = 0x1
- _EPOLLOUT = 0x4
- _EPOLLERR = 0x8
- _EPOLLHUP = 0x10
- _EPOLLRDHUP = 0x2000
- _EPOLLET = 0x80000000
- _EPOLL_CLOEXEC = 0x80000
- _EPOLL_CTL_ADD = 0x1
- _EPOLL_CTL_DEL = 0x2
- _EPOLL_CTL_MOD = 0x3
)
//struct Sigset {
@@ -172,17 +161,14 @@
_ [_sigev_max_size - unsafe.Sizeof(sigeventFields{})]byte
}
-type epollevent struct {
- events uint32
- pad_cgo_0 [4]byte
- data [8]byte // unaligned uintptr
-}
-
// created by cgo -cdefs and then converted to Go
// cgo -cdefs defs_linux.go defs3_linux.go
const (
_O_RDONLY = 0x0
+ _O_WRONLY = 0x1
+ _O_CREAT = 0x40
+ _O_TRUNC = 0x200
_O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000
_SA_RESTORER = 0
diff --git a/src/runtime/defs_linux_riscv64.go b/src/runtime/defs_linux_riscv64.go
index 29496ac..ce4a7f3 100644
--- a/src/runtime/defs_linux_riscv64.go
+++ b/src/runtime/defs_linux_riscv64.go
@@ -89,17 +89,6 @@
_CLOCK_THREAD_CPUTIME_ID = 0x3
_SIGEV_THREAD_ID = 0x4
-
- _EPOLLIN = 0x1
- _EPOLLOUT = 0x4
- _EPOLLERR = 0x8
- _EPOLLHUP = 0x10
- _EPOLLRDHUP = 0x2000
- _EPOLLET = 0x80000000
- _EPOLL_CLOEXEC = 0x80000
- _EPOLL_CTL_ADD = 0x1
- _EPOLL_CTL_DEL = 0x2
- _EPOLL_CTL_MOD = 0x3
)
type timespec struct {
@@ -171,14 +160,11 @@
_ [_sigev_max_size - unsafe.Sizeof(sigeventFields{})]byte
}
-type epollevent struct {
- events uint32
- pad_cgo_0 [4]byte
- data [8]byte // unaligned uintptr
-}
-
const (
_O_RDONLY = 0x0
+ _O_WRONLY = 0x1
+ _O_CREAT = 0x40
+ _O_TRUNC = 0x200
_O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000
)
diff --git a/src/runtime/defs_linux_s390x.go b/src/runtime/defs_linux_s390x.go
index 817a29e..36497dd 100644
--- a/src/runtime/defs_linux_s390x.go
+++ b/src/runtime/defs_linux_s390x.go
@@ -88,17 +88,6 @@
_CLOCK_THREAD_CPUTIME_ID = 0x3
_SIGEV_THREAD_ID = 0x4
-
- _EPOLLIN = 0x1
- _EPOLLOUT = 0x4
- _EPOLLERR = 0x8
- _EPOLLHUP = 0x10
- _EPOLLRDHUP = 0x2000
- _EPOLLET = 0x80000000
- _EPOLL_CLOEXEC = 0x80000
- _EPOLL_CTL_ADD = 0x1
- _EPOLL_CTL_DEL = 0x2
- _EPOLL_CTL_MOD = 0x3
)
type timespec struct {
@@ -168,14 +157,11 @@
_ [_sigev_max_size - unsafe.Sizeof(sigeventFields{})]byte
}
-type epollevent struct {
- events uint32
- pad_cgo_0 [4]byte
- data [8]byte // unaligned uintptr
-}
-
const (
_O_RDONLY = 0x0
+ _O_WRONLY = 0x1
+ _O_CREAT = 0x40
+ _O_TRUNC = 0x200
_O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000
_SA_RESTORER = 0
diff --git a/src/runtime/defs_netbsd.go b/src/runtime/defs_netbsd.go
index 6b084c0..43923e3 100644
--- a/src/runtime/defs_netbsd.go
+++ b/src/runtime/defs_netbsd.go
@@ -34,7 +34,10 @@
EFAULT = C.EFAULT
EAGAIN = C.EAGAIN
+ O_WRONLY = C.O_WRONLY
O_NONBLOCK = C.O_NONBLOCK
+ O_CREAT = C.O_CREAT
+ O_TRUNC = C.O_TRUNC
O_CLOEXEC = C.O_CLOEXEC
PROT_NONE = C.PROT_NONE
@@ -46,7 +49,8 @@
MAP_PRIVATE = C.MAP_PRIVATE
MAP_FIXED = C.MAP_FIXED
- MADV_FREE = C.MADV_FREE
+ MADV_DONTNEED = C.MADV_DONTNEED
+ MADV_FREE = C.MADV_FREE
SA_SIGINFO = C.SA_SIGINFO
SA_RESTART = C.SA_RESTART
diff --git a/src/runtime/defs_openbsd.go b/src/runtime/defs_openbsd.go
index cbf53eb..4161e21 100644
--- a/src/runtime/defs_openbsd.go
+++ b/src/runtime/defs_openbsd.go
@@ -48,7 +48,8 @@
MAP_FIXED = C.MAP_FIXED
MAP_STACK = C.MAP_STACK
- MADV_FREE = C.MADV_FREE
+ MADV_DONTNEED = C.MADV_DONTNEED
+ MADV_FREE = C.MADV_FREE
SA_SIGINFO = C.SA_SIGINFO
SA_RESTART = C.SA_RESTART
diff --git a/src/runtime/defs_openbsd_386.go b/src/runtime/defs_openbsd_386.go
index 35c559b..25524c5 100644
--- a/src/runtime/defs_openbsd_386.go
+++ b/src/runtime/defs_openbsd_386.go
@@ -10,7 +10,10 @@
_EFAULT = 0xe
_EAGAIN = 0x23
+ _O_WRONLY = 0x1
_O_NONBLOCK = 0x4
+ _O_CREAT = 0x200
+ _O_TRUNC = 0x400
_O_CLOEXEC = 0x10000
_PROT_NONE = 0x0
@@ -23,7 +26,8 @@
_MAP_FIXED = 0x10
_MAP_STACK = 0x4000
- _MADV_FREE = 0x6
+ _MADV_DONTNEED = 0x4
+ _MADV_FREE = 0x6
_SA_SIGINFO = 0x40
_SA_RESTART = 0x2
diff --git a/src/runtime/defs_openbsd_amd64.go b/src/runtime/defs_openbsd_amd64.go
index d7432da..a31d03b 100644
--- a/src/runtime/defs_openbsd_amd64.go
+++ b/src/runtime/defs_openbsd_amd64.go
@@ -10,7 +10,10 @@
_EFAULT = 0xe
_EAGAIN = 0x23
+ _O_WRONLY = 0x1
_O_NONBLOCK = 0x4
+ _O_CREAT = 0x200
+ _O_TRUNC = 0x400
_O_CLOEXEC = 0x10000
_PROT_NONE = 0x0
@@ -23,7 +26,8 @@
_MAP_FIXED = 0x10
_MAP_STACK = 0x4000
- _MADV_FREE = 0x6
+ _MADV_DONTNEED = 0x4
+ _MADV_FREE = 0x6
_SA_SIGINFO = 0x40
_SA_RESTART = 0x2
diff --git a/src/runtime/defs_openbsd_arm.go b/src/runtime/defs_openbsd_arm.go
index 471b306..1d1767b 100644
--- a/src/runtime/defs_openbsd_arm.go
+++ b/src/runtime/defs_openbsd_arm.go
@@ -10,7 +10,10 @@
_EFAULT = 0xe
_EAGAIN = 0x23
+ _O_WRONLY = 0x1
_O_NONBLOCK = 0x4
+ _O_CREAT = 0x200
+ _O_TRUNC = 0x400
_O_CLOEXEC = 0x10000
_PROT_NONE = 0x0
@@ -23,7 +26,8 @@
_MAP_FIXED = 0x10
_MAP_STACK = 0x4000
- _MADV_FREE = 0x6
+ _MADV_DONTNEED = 0x4
+ _MADV_FREE = 0x6
_SA_SIGINFO = 0x40
_SA_RESTART = 0x2
diff --git a/src/runtime/defs_openbsd_arm64.go b/src/runtime/defs_openbsd_arm64.go
index 5300ab0..745d0d3 100644
--- a/src/runtime/defs_openbsd_arm64.go
+++ b/src/runtime/defs_openbsd_arm64.go
@@ -11,7 +11,10 @@
_EFAULT = 0xe
_EAGAIN = 0x23
+ _O_WRONLY = 0x1
_O_NONBLOCK = 0x4
+ _O_CREAT = 0x200
+ _O_TRUNC = 0x400
_O_CLOEXEC = 0x10000
_PROT_NONE = 0x0
@@ -24,7 +27,8 @@
_MAP_FIXED = 0x10
_MAP_STACK = 0x4000
- _MADV_FREE = 0x6
+ _MADV_DONTNEED = 0x4
+ _MADV_FREE = 0x6
_SA_SIGINFO = 0x40
_SA_RESTART = 0x2
diff --git a/src/runtime/defs_openbsd_mips64.go b/src/runtime/defs_openbsd_mips64.go
index a8789ef..1e469e4 100644
--- a/src/runtime/defs_openbsd_mips64.go
+++ b/src/runtime/defs_openbsd_mips64.go
@@ -17,7 +17,10 @@
_EFAULT = 0xe
_EAGAIN = 0x23
+ _O_WRONLY = 0x1
_O_NONBLOCK = 0x4
+ _O_CREAT = 0x200
+ _O_TRUNC = 0x400
_O_CLOEXEC = 0x10000
_PROT_NONE = 0x0
@@ -30,7 +33,8 @@
_MAP_FIXED = 0x10
_MAP_STACK = 0x4000
- _MADV_FREE = 0x6
+ _MADV_DONTNEED = 0x4
+ _MADV_FREE = 0x6
_SA_SIGINFO = 0x40
_SA_RESTART = 0x2
diff --git a/src/runtime/defs_solaris.go b/src/runtime/defs_solaris.go
index f626498..406304d 100644
--- a/src/runtime/defs_solaris.go
+++ b/src/runtime/defs_solaris.go
@@ -53,7 +53,8 @@
MAP_PRIVATE = C.MAP_PRIVATE
MAP_FIXED = C.MAP_FIXED
- MADV_FREE = C.MADV_FREE
+ MADV_DONTNEED = C.MADV_DONTNEED
+ MADV_FREE = C.MADV_FREE
SA_SIGINFO = C.SA_SIGINFO
SA_RESTART = C.SA_RESTART
@@ -119,7 +120,10 @@
MAXHOSTNAMELEN = C.MAXHOSTNAMELEN
+ O_WRONLY = C.O_WRONLY
O_NONBLOCK = C.O_NONBLOCK
+ O_CREAT = C.O_CREAT
+ O_TRUNC = C.O_TRUNC
O_CLOEXEC = C.O_CLOEXEC
FD_CLOEXEC = C.FD_CLOEXEC
F_GETFL = C.F_GETFL
diff --git a/src/runtime/ehooks_test.go b/src/runtime/ehooks_test.go
new file mode 100644
index 0000000..ee286ec
--- /dev/null
+++ b/src/runtime/ehooks_test.go
@@ -0,0 +1,91 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "internal/platform"
+ "internal/testenv"
+ "os/exec"
+ "runtime"
+ "strings"
+ "testing"
+)
+
+func TestExitHooks(t *testing.T) {
+ bmodes := []string{""}
+ if testing.Short() {
+ t.Skip("skipping due to -short")
+ }
+ // Note the HasCGO() test below; this is to prevent the test
+ // running if CGO_ENABLED=0 is in effect.
+ haverace := platform.RaceDetectorSupported(runtime.GOOS, runtime.GOARCH)
+ if haverace && testenv.HasCGO() {
+ bmodes = append(bmodes, "-race")
+ }
+ for _, bmode := range bmodes {
+ scenarios := []struct {
+ mode string
+ expected string
+ musthave string
+ }{
+ {
+ mode: "simple",
+ expected: "bar foo",
+ musthave: "",
+ },
+ {
+ mode: "goodexit",
+ expected: "orange apple",
+ musthave: "",
+ },
+ {
+ mode: "badexit",
+ expected: "blub blix",
+ musthave: "",
+ },
+ {
+ mode: "panics",
+ expected: "",
+ musthave: "fatal error: internal error: exit hook invoked panic",
+ },
+ {
+ mode: "callsexit",
+ expected: "",
+ musthave: "fatal error: internal error: exit hook invoked exit",
+ },
+ }
+
+ exe, err := buildTestProg(t, "testexithooks", bmode)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ bt := ""
+ if bmode != "" {
+ bt = " bmode: " + bmode
+ }
+ for _, s := range scenarios {
+ cmd := exec.Command(exe, []string{"-mode", s.mode}...)
+ out, _ := cmd.CombinedOutput()
+ outs := strings.ReplaceAll(string(out), "\n", " ")
+ outs = strings.TrimSpace(outs)
+ if s.expected != "" {
+ if s.expected != outs {
+ t.Logf("raw output: %q", outs)
+ t.Errorf("failed%s mode %s: wanted %q got %q", bt,
+ s.mode, s.expected, outs)
+ }
+ } else if s.musthave != "" {
+ if !strings.Contains(outs, s.musthave) {
+ t.Logf("raw output: %q", outs)
+ t.Errorf("failed mode %s: output does not contain %q",
+ s.mode, s.musthave)
+ }
+ } else {
+ panic("badly written scenario")
+ }
+ }
+ }
+}
diff --git a/src/runtime/env_plan9.go b/src/runtime/env_plan9.go
index 65480c8..d206c5d 100644
--- a/src/runtime/env_plan9.go
+++ b/src/runtime/env_plan9.go
@@ -17,7 +17,7 @@
nameOffset = 39
)
-// Goenvs caches the Plan 9 environment variables at start of execution into
+// goenvs caches the Plan 9 environment variables at start of execution into
// string array envs, to supply the initial contents for os.Environ.
// Subsequent calls to os.Setenv will change this cache, without writing back
// to the (possibly shared) Plan 9 environment, so that Setenv and Getenv
@@ -70,7 +70,7 @@
})
}
-// Dofiles reads the directory opened with file descriptor fd, applying function f
+// dofiles reads the directory opened with file descriptor fd, applying function f
// to each filename in it.
//
//go:nosplit
@@ -95,7 +95,7 @@
}
}
-// Gdirname returns the first filename from a buffer of directory entries,
+// gdirname returns the first filename from a buffer of directory entries,
// and a slice containing the remaining directory entries.
// If the buffer doesn't start with a valid directory entry, the returned name is nil.
//
@@ -117,7 +117,7 @@
return
}
-// Gbit16 reads a 16-bit little-endian binary number from b and returns it
+// gbit16 reads a 16-bit little-endian binary number from b and returns it
// with the remaining slice of b.
//
//go:nosplit
diff --git a/src/runtime/env_posix.go b/src/runtime/env_posix.go
index 94a19d8..0eb4f0d 100644
--- a/src/runtime/env_posix.go
+++ b/src/runtime/env_posix.go
@@ -2,8 +2,6 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build unix || (js && wasm) || windows || plan9
-
package runtime
import "unsafe"
@@ -48,10 +46,7 @@
var _cgo_unsetenv unsafe.Pointer // pointer to C function
// Update the C environment if cgo is loaded.
-// Called from syscall.Setenv.
-//
-//go:linkname syscall_setenv_c syscall.setenv_c
-func syscall_setenv_c(k string, v string) {
+func setenv_c(k string, v string) {
if _cgo_setenv == nil {
return
}
@@ -60,10 +55,7 @@
}
// Update the C environment if cgo is loaded.
-// Called from syscall.unsetenv.
-//
-//go:linkname syscall_unsetenv_c syscall.unsetenv_c
-func syscall_unsetenv_c(k string) {
+func unsetenv_c(k string) {
if _cgo_unsetenv == nil {
return
}
diff --git a/src/runtime/error.go b/src/runtime/error.go
index b11473c..a211fbf 100644
--- a/src/runtime/error.go
+++ b/src/runtime/error.go
@@ -151,7 +151,7 @@
boundsSlice3Acap: "slice bounds out of range [::%x] with capacity %y",
boundsSlice3B: "slice bounds out of range [:%x:%y]",
boundsSlice3C: "slice bounds out of range [%x:%y:]",
- boundsConvert: "cannot convert slice with length %y to pointer to array with length %x",
+ boundsConvert: "cannot convert slice with length %y to array or pointer to array with length %x",
}
// boundsNegErrorFmts are overriding formats if x is negative. In this case there's no need to report y.
diff --git a/src/runtime/exithook.go b/src/runtime/exithook.go
new file mode 100644
index 0000000..bb29a94
--- /dev/null
+++ b/src/runtime/exithook.go
@@ -0,0 +1,69 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+// addExitHook registers the specified function 'f' to be run at
+// program termination (e.g. when someone invokes os.Exit(), or when
+// main.main returns). Hooks are run in reverse order of registration:
+// first hook added is the last one run.
+//
+// CAREFUL: the expectation is that addExitHook should only be called
+// from a safe context (e.g. not an error/panic path or signal
+// handler, preemption enabled, allocation allowed, write barriers
+// allowed, etc), and that the exit function 'f' will be invoked under
+// similar circumstances. That is the say, we are expecting that 'f'
+// uses normal / high-level Go code as opposed to one of the more
+// restricted dialects used for the trickier parts of the runtime.
+func addExitHook(f func(), runOnNonZeroExit bool) {
+ exitHooks.hooks = append(exitHooks.hooks, exitHook{f: f, runOnNonZeroExit: runOnNonZeroExit})
+}
+
+// exitHook stores a function to be run on program exit, registered
+// by the utility runtime.addExitHook.
+type exitHook struct {
+ f func() // func to run
+ runOnNonZeroExit bool // whether to run on non-zero exit code
+}
+
+// exitHooks stores state related to hook functions registered to
+// run when program execution terminates.
+var exitHooks struct {
+ hooks []exitHook
+ runningExitHooks bool
+}
+
+// runExitHooks runs any registered exit hook functions (funcs
+// previously registered using runtime.addExitHook). Here 'exitCode'
+// is the status code being passed to os.Exit, or zero if the program
+// is terminating normally without calling os.Exit).
+func runExitHooks(exitCode int) {
+ if exitHooks.runningExitHooks {
+ throw("internal error: exit hook invoked exit")
+ }
+ exitHooks.runningExitHooks = true
+
+ runExitHook := func(f func()) (caughtPanic bool) {
+ defer func() {
+ if x := recover(); x != nil {
+ caughtPanic = true
+ }
+ }()
+ f()
+ return
+ }
+
+ finishPageTrace()
+ for i := range exitHooks.hooks {
+ h := exitHooks.hooks[len(exitHooks.hooks)-i-1]
+ if exitCode != 0 && !h.runOnNonZeroExit {
+ continue
+ }
+ if caughtPanic := runExitHook(h.f); caughtPanic {
+ throw("internal error: exit hook invoked panic")
+ }
+ }
+ exitHooks.hooks = nil
+ exitHooks.runningExitHooks = false
+}
diff --git a/src/runtime/export_debug_test.go b/src/runtime/export_debug_test.go
index 09e9779..2d8a133 100644
--- a/src/runtime/export_debug_test.go
+++ b/src/runtime/export_debug_test.go
@@ -109,7 +109,7 @@
// a signal handler. Add the go:nowritebarrierrec annotation and restructure
// this to avoid write barriers.
- switch h.gp.atomicstatus {
+ switch h.gp.atomicstatus.Load() {
case _Grunning:
if getg().m != h.mp {
println("trap on wrong M", getg().m, h.mp)
diff --git a/src/runtime/export_debuglog_test.go b/src/runtime/export_debuglog_test.go
index 1a9074e..c9dfdcb 100644
--- a/src/runtime/export_debuglog_test.go
+++ b/src/runtime/export_debuglog_test.go
@@ -25,11 +25,11 @@
func (l *dlogger) PC(x uintptr) *dlogger { return l.pc(x) }
func DumpDebugLog() string {
- g := getg()
- g.writebuf = make([]byte, 0, 1<<20)
+ gp := getg()
+ gp.writebuf = make([]byte, 0, 1<<20)
printDebugLog()
- buf := g.writebuf
- g.writebuf = nil
+ buf := gp.writebuf
+ gp.writebuf = nil
return string(buf)
}
diff --git a/src/runtime/export_linux_test.go b/src/runtime/export_linux_test.go
index dea94a9..a441c0e 100644
--- a/src/runtime/export_linux_test.go
+++ b/src/runtime/export_linux_test.go
@@ -6,19 +6,17 @@
package runtime
-import "unsafe"
+import (
+ "runtime/internal/syscall"
+)
const SiginfoMaxSize = _si_max_size
const SigeventMaxSize = _sigev_max_size
+var Closeonexec = syscall.CloseOnExec
var NewOSProc0 = newosproc0
var Mincore = mincore
var Add = add
-type EpollEvent epollevent
type Siginfo siginfo
type Sigevent sigevent
-
-func Epollctl(epfd, op, fd int32, ev unsafe.Pointer) int32 {
- return epollctl(epfd, op, fd, (*epollevent)(ev))
-}
diff --git a/src/runtime/export_openbsd_test.go b/src/runtime/export_openbsd_test.go
new file mode 100644
index 0000000..ef680dc
--- /dev/null
+++ b/src/runtime/export_openbsd_test.go
@@ -0,0 +1,15 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build openbsd && !mips64
+
+package runtime
+
+func Fcntl(fd, cmd, arg uintptr) (uintptr, uintptr) {
+ r := fcntl(int32(fd), int32(cmd), int32(arg))
+ if r < 0 {
+ return ^uintptr(0), uintptr(-r)
+ }
+ return uintptr(r), 0
+}
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index 9639946..e7476e6 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -69,6 +69,9 @@
func LFStackPop(head *uint64) *LFNode {
return (*LFNode)(unsafe.Pointer((*lfstack)(head).pop()))
}
+func LFNodeValidate(node *LFNode) {
+ lfnodeValidate((*lfnode)(unsafe.Pointer(node)))
+}
func Netpoll(delta int64) {
systemstack(func() {
@@ -84,23 +87,23 @@
}
func RunSchedLocalQueueTest() {
- _p_ := new(p)
- gs := make([]g, len(_p_.runq))
+ pp := new(p)
+ gs := make([]g, len(pp.runq))
Escape(gs) // Ensure gs doesn't move, since we use guintptrs
- for i := 0; i < len(_p_.runq); i++ {
- if g, _ := runqget(_p_); g != nil {
+ for i := 0; i < len(pp.runq); i++ {
+ if g, _ := runqget(pp); g != nil {
throw("runq is not empty initially")
}
for j := 0; j < i; j++ {
- runqput(_p_, &gs[i], false)
+ runqput(pp, &gs[i], false)
}
for j := 0; j < i; j++ {
- if g, _ := runqget(_p_); g != &gs[i] {
+ if g, _ := runqget(pp); g != &gs[i] {
print("bad element at iter ", i, "/", j, "\n")
throw("bad element")
}
}
- if g, _ := runqget(_p_); g != nil {
+ if g, _ := runqget(pp); g != nil {
throw("runq is not empty afterwards")
}
}
@@ -362,6 +365,9 @@
if s.state.get() != mSpanInUse {
continue
}
+ if s.isUnusedUserArenaChunk() {
+ continue
+ }
if sizeclass := s.spanclass.sizeclass(); sizeclass == 0 {
slow.Mallocs++
slow.Alloc += uint64(s.elemsize)
@@ -460,17 +466,17 @@
}
func LockOSCounts() (external, internal uint32) {
- g := getg()
- if g.m.lockedExt+g.m.lockedInt == 0 {
- if g.lockedm != 0 {
+ gp := getg()
+ if gp.m.lockedExt+gp.m.lockedInt == 0 {
+ if gp.lockedm != 0 {
panic("lockedm on non-locked goroutine")
}
} else {
- if g.lockedm == 0 {
+ if gp.lockedm == 0 {
panic("nil lockedm on locked goroutine")
}
}
- return g.m.lockedExt, g.m.lockedInt
+ return gp.m.lockedExt, gp.m.lockedInt
}
//go:noinline
@@ -500,7 +506,10 @@
// MapNextArenaHint reserves a page at the next arena growth hint,
// preventing the arena from growing there, and returns the range of
// addresses that are no longer viable.
-func MapNextArenaHint() (start, end uintptr) {
+//
+// This may fail to reserve memory. If it fails, it still returns the
+// address range it attempted to reserve.
+func MapNextArenaHint() (start, end uintptr, ok bool) {
hint := mheap_.arenaHints
addr := hint.addr
if hint.down {
@@ -509,7 +518,13 @@
} else {
start, end = addr, addr+heapArenaBytes
}
- sysReserve(unsafe.Pointer(addr), physPageSize)
+ got := sysReserve(unsafe.Pointer(addr), physPageSize)
+ ok = (addr == uintptr(got))
+ if !ok {
+ // We were unable to get the requested reservation.
+ // Release what we did get and fail.
+ sysFreeOS(got, physPageSize)
+ }
return
}
@@ -525,6 +540,12 @@
return getg()
}
+func GIsWaitingOnMutex(gp *G) bool {
+ return readgstatus(gp) == _Gwaiting && gp.waitreason.isMutexWait()
+}
+
+var CasGStatusAlwaysTrack = &casgstatusAlwaysTrack
+
//go:noinline
func PanicForTesting(b []byte, i int) byte {
return unexportedPanicForTesting(b, i)
@@ -1164,7 +1185,7 @@
func SemNwait(addr *uint32) uint32 {
root := semtable.rootFor(addr)
- return atomic.Load(&root.nwait)
+ return root.nwait.Load()
}
const SemTableSize = semTabSize
@@ -1196,8 +1217,6 @@
}
// mspan wrapper for testing.
-//
-//go:notinheap
type MSpan mspan
// Allocate an mspan for testing.
@@ -1230,23 +1249,29 @@
}
const (
- TimeHistSubBucketBits = timeHistSubBucketBits
- TimeHistNumSubBuckets = timeHistNumSubBuckets
- TimeHistNumSuperBuckets = timeHistNumSuperBuckets
+ TimeHistSubBucketBits = timeHistSubBucketBits
+ TimeHistNumSubBuckets = timeHistNumSubBuckets
+ TimeHistNumBuckets = timeHistNumBuckets
+ TimeHistMinBucketBits = timeHistMinBucketBits
+ TimeHistMaxBucketBits = timeHistMaxBucketBits
)
type TimeHistogram timeHistogram
// Counts returns the counts for the given bucket, subBucket indices.
// Returns true if the bucket was valid, otherwise returns the counts
-// for the underflow bucket and false.
-func (th *TimeHistogram) Count(bucket, subBucket uint) (uint64, bool) {
+// for the overflow bucket if bucket > 0 or the underflow bucket if
+// bucket < 0, and false.
+func (th *TimeHistogram) Count(bucket, subBucket int) (uint64, bool) {
t := (*timeHistogram)(th)
- i := bucket*TimeHistNumSubBuckets + subBucket
- if i >= uint(len(t.counts)) {
- return t.underflow, false
+ if bucket < 0 {
+ return t.underflow.Load(), false
}
- return t.counts[i], true
+ i := bucket*TimeHistNumSubBuckets + subBucket
+ if i >= len(t.counts) {
+ return t.overflow.Load(), false
+ }
+ return t.counts[i].Load(), true
}
func (th *TimeHistogram) Record(duration int64) {
@@ -1266,10 +1291,7 @@
}
func FinalizerGAsleep() bool {
- lock(&finlock)
- result := fingwait
- unlock(&finlock)
- return result
+ return fingStatus.Load()&fingWait != 0
}
// For GCTestMoveStackOnNextCall, it's important not to introduce an
@@ -1322,10 +1344,10 @@
if c.heapMarked > trigger {
trigger = c.heapMarked
}
- c.maxStackScan = stackSize
- c.globalsScan = globalsSize
- c.heapLive = trigger
- c.heapScan += uint64(float64(trigger-c.heapMarked) * scannableFrac)
+ c.maxStackScan.Store(stackSize)
+ c.globalsScan.Store(globalsSize)
+ c.heapLive.Store(trigger)
+ c.heapScan.Add(int64(float64(trigger-c.heapMarked) * scannableFrac))
c.startCycle(0, gomaxprocs, gcTrigger{kind: gcTriggerHeap})
}
@@ -1338,7 +1360,7 @@
}
func (c *GCController) HeapLive() uint64 {
- return c.heapLive
+ return c.heapLive.Load()
}
func (c *GCController) HeapMarked() uint64 {
@@ -1358,8 +1380,8 @@
}
func (c *GCController) Revise(d GCControllerReviseDelta) {
- c.heapLive += uint64(d.HeapLive)
- c.heapScan += uint64(d.HeapScan)
+ c.heapLive.Add(d.HeapLive)
+ c.heapScan.Add(d.HeapScan)
c.heapScanWork.Add(d.HeapScanWork)
c.stackScanWork.Add(d.StackScanWork)
c.globalsScanWork.Add(d.GlobalsScanWork)
@@ -1616,3 +1638,83 @@
func (s *ScavengeIndex) Clear(ci ChunkIdx) {
s.i.clear(chunkIdx(ci))
}
+
+const GTrackingPeriod = gTrackingPeriod
+
+var ZeroBase = unsafe.Pointer(&zerobase)
+
+const UserArenaChunkBytes = userArenaChunkBytes
+
+type UserArena struct {
+ arena *userArena
+}
+
+func NewUserArena() *UserArena {
+ return &UserArena{newUserArena()}
+}
+
+func (a *UserArena) New(out *any) {
+ i := efaceOf(out)
+ typ := i._type
+ if typ.kind&kindMask != kindPtr {
+ panic("new result of non-ptr type")
+ }
+ typ = (*ptrtype)(unsafe.Pointer(typ)).elem
+ i.data = a.arena.new(typ)
+}
+
+func (a *UserArena) Slice(sl any, cap int) {
+ a.arena.slice(sl, cap)
+}
+
+func (a *UserArena) Free() {
+ a.arena.free()
+}
+
+func GlobalWaitingArenaChunks() int {
+ n := 0
+ systemstack(func() {
+ lock(&mheap_.lock)
+ for s := mheap_.userArena.quarantineList.first; s != nil; s = s.next {
+ n++
+ }
+ unlock(&mheap_.lock)
+ })
+ return n
+}
+
+func UserArenaClone[T any](s T) T {
+ return arena_heapify(s).(T)
+}
+
+var AlignUp = alignUp
+
+// BlockUntilEmptyFinalizerQueue blocks until either the finalizer
+// queue is emptied (and the finalizers have executed) or the timeout
+// is reached. Returns true if the finalizer queue was emptied.
+func BlockUntilEmptyFinalizerQueue(timeout int64) bool {
+ start := nanotime()
+ for nanotime()-start < timeout {
+ lock(&finlock)
+ // We know the queue has been drained when both finq is nil
+ // and the finalizer g has stopped executing.
+ empty := finq == nil
+ empty = empty && readgstatus(fing) == _Gwaiting && fing.waitreason == waitReasonFinalizerWait
+ unlock(&finlock)
+ if empty {
+ return true
+ }
+ Gosched()
+ }
+ return false
+}
+
+func FrameStartLine(f *Frame) int {
+ return f.startLine
+}
+
+// PersistentAlloc allocates some memory that lives outside the Go heap.
+// This memory will never be freed; use sparingly.
+func PersistentAlloc(n uintptr) unsafe.Pointer {
+ return persistentalloc(n, 0, &memstats.other_sys)
+}
diff --git a/src/runtime/export_unix2_test.go b/src/runtime/export_unix2_test.go
new file mode 100644
index 0000000..360565f
--- /dev/null
+++ b/src/runtime/export_unix2_test.go
@@ -0,0 +1,10 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build unix && !linux
+
+package runtime
+
+// for linux close-on-exec implemented in runtime/internal/syscall
+var Closeonexec = closeonexec
diff --git a/src/runtime/export_unix_test.go b/src/runtime/export_unix_test.go
index a548cf7..71a55d8 100644
--- a/src/runtime/export_unix_test.go
+++ b/src/runtime/export_unix_test.go
@@ -9,7 +9,6 @@
import "unsafe"
var NonblockingPipe = nonblockingPipe
-var Closeonexec = closeonexec
func sigismember(mask *sigset, i int) bool {
clear := *mask
@@ -90,3 +89,9 @@
func SendSigusr1(mp *M) {
signalM(mp, _SIGUSR1)
}
+
+const (
+ O_WRONLY = _O_WRONLY
+ O_CREAT = _O_CREAT
+ O_TRUNC = _O_TRUNC
+)
diff --git a/src/runtime/extern.go b/src/runtime/extern.go
index 15c519d..6c41c62 100644
--- a/src/runtime/extern.go
+++ b/src/runtime/extern.go
@@ -42,6 +42,12 @@
clobber the memory content of an object with bad content when it frees
the object.
+ cpu.*: cpu.all=off disables the use of all optional instruction set extensions.
+ cpu.extension=off disables use of instructions from the specified instruction set extension.
+ extension is the lower case name for the instruction set extension such as sse41 or avx
+ as listed in internal/cpu package. As an example cpu.avx=off disables runtime detection
+ and thereby use of AVX instructions.
+
cgocheck: setting cgocheck=0 disables all checks for packages
using cgo to incorrectly pass Go pointers to non-Go code.
Setting cgocheck=1 (the default) enables relatively cheap
@@ -73,7 +79,7 @@
error at each collection, summarizing the amount of memory collected and the
length of the pause. The format of this line is subject to change.
Currently, it is:
- gc # @#s #%: #+#+# ms clock, #+#/#/#+# ms cpu, #->#-># MB, # MB goal, # P
+ gc # @#s #%: #+#+# ms clock, #+#/#/#+# ms cpu, #->#-># MB, # MB goal, # MB stacks, #MB globals, # P
where the fields are as follows:
gc # the GC number, incremented at each GC
@#s time in seconds since program start
@@ -112,12 +118,22 @@
madvdontneed: setting madvdontneed=0 will use MADV_FREE
instead of MADV_DONTNEED on Linux when returning memory to the
kernel. This is more efficient, but means RSS numbers will
- drop only when the OS is under memory pressure.
+ drop only when the OS is under memory pressure. On the BSDs and
+ Illumos/Solaris, setting madvdontneed=1 will use MADV_DONTNEED instead
+ of MADV_FREE. This is less efficient, but causes RSS numbers to drop
+ more quickly.
memprofilerate: setting memprofilerate=X will update the value of runtime.MemProfileRate.
When set to 0 memory profiling is disabled. Refer to the description of
MemProfileRate for the default value.
+ pagetrace: setting pagetrace=/path/to/file will write out a trace of page events
+ that can be viewed, analyzed, and visualized using the x/debug/cmd/pagetrace tool.
+ Build your program with GOEXPERIMENT=pagetrace to enable this functionality. Do not
+ enable this functionality if your program is a setuid binary as it introduces a security
+ risk in that scenario. Currently not supported on Windows, plan9 or js/wasm. Setting this
+ option for some applications can produce large traces, so use with care.
+
invalidptr: invalidptr=1 (the default) causes the garbage collector and stack
copier to crash the program if an invalid pointer value (for example, 1)
is found in a pointer-typed location. Setting invalidptr=0 disables this check.
diff --git a/src/runtime/float.go b/src/runtime/float.go
index c80c8b7..9f281c4 100644
--- a/src/runtime/float.go
+++ b/src/runtime/float.go
@@ -24,12 +24,12 @@
return !isNaN(f) && !isFinite(f)
}
-// Abs returns the absolute value of x.
+// abs returns the absolute value of x.
//
// Special cases are:
//
-// Abs(±Inf) = +Inf
-// Abs(NaN) = NaN
+// abs(±Inf) = +Inf
+// abs(NaN) = NaN
func abs(x float64) float64 {
const sign = 1 << 63
return float64frombits(float64bits(x) &^ sign)
@@ -42,12 +42,12 @@
return float64frombits(float64bits(x)&^sign | float64bits(y)&sign)
}
-// Float64bits returns the IEEE 754 binary representation of f.
+// float64bits returns the IEEE 754 binary representation of f.
func float64bits(f float64) uint64 {
return *(*uint64)(unsafe.Pointer(&f))
}
-// Float64frombits returns the floating point number corresponding
+// float64frombits returns the floating point number corresponding
// the IEEE 754 binary representation b.
func float64frombits(b uint64) float64 {
return *(*float64)(unsafe.Pointer(&b))
diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go
index 122818f..0b2c972 100644
--- a/src/runtime/gc_test.go
+++ b/src/runtime/gc_test.go
@@ -689,7 +689,7 @@
time.Sleep(100 * time.Millisecond)
start := time.Now()
runtime.ReadMemStats(&ms)
- latencies = append(latencies, time.Now().Sub(start))
+ latencies = append(latencies, time.Since(start))
}
// Make sure to stop the timer before we wait! The load created above
// is very heavy-weight and not easy to stop, so we could end up
diff --git a/src/runtime/hash_test.go b/src/runtime/hash_test.go
index e726006..d4a2b3f 100644
--- a/src/runtime/hash_test.go
+++ b/src/runtime/hash_test.go
@@ -6,6 +6,7 @@
import (
"fmt"
+ "internal/race"
"math"
"math/rand"
. "runtime"
@@ -125,6 +126,9 @@
// All 0-3 byte strings have distinct hashes.
func TestSmhasherSmallKeys(t *testing.T) {
+ if race.Enabled {
+ t.Skip("Too long for race mode")
+ }
h := newHashSet()
var b [3]byte
for i := 0; i < 256; i++ {
@@ -166,6 +170,9 @@
if testing.Short() {
t.Skip("Skipping in short mode")
}
+ if race.Enabled {
+ t.Skip("Too long for race mode")
+ }
h := newHashSet()
for n := 2; n <= 16; n++ {
twoNonZero(h, n)
@@ -208,6 +215,9 @@
if testing.Short() {
t.Skip("Skipping in short mode")
}
+ if race.Enabled {
+ t.Skip("Too long for race mode")
+ }
r := rand.New(rand.NewSource(1234))
const REPEAT = 8
const N = 1000000
@@ -275,6 +285,9 @@
if testing.Short() {
t.Skip("Skipping in short mode")
}
+ if race.Enabled {
+ t.Skip("Too long for race mode")
+ }
permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
permutation(t, []uint32{0, 1}, 20)
@@ -447,6 +460,9 @@
if testing.Short() {
t.Skip("Skipping in short mode")
}
+ if race.Enabled {
+ t.Skip("Too long for race mode")
+ }
avalancheTest1(t, &BytesKey{make([]byte, 2)})
avalancheTest1(t, &BytesKey{make([]byte, 4)})
avalancheTest1(t, &BytesKey{make([]byte, 8)})
@@ -514,6 +530,9 @@
// All bit rotations of a set of distinct keys
func TestSmhasherWindowed(t *testing.T) {
+ if race.Enabled {
+ t.Skip("Too long for race mode")
+ }
t.Logf("32 bit keys")
windowed(t, &Int32Key{})
t.Logf("64 bit keys")
diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go
index c7f2b7a..f57a1a1 100644
--- a/src/runtime/heapdump.go
+++ b/src/runtime/heapdump.go
@@ -120,7 +120,7 @@
var typecache [typeCacheBuckets]typeCacheBucket
-// dump a uint64 in a varint format parseable by encoding/binary
+// dump a uint64 in a varint format parseable by encoding/binary.
func dumpint(v uint64) {
var buf [10]byte
var n int
@@ -142,7 +142,7 @@
}
}
-// dump varint uint64 length followed by memory contents
+// dump varint uint64 length followed by memory contents.
func dumpmemrange(data unsafe.Pointer, len uintptr) {
dumpint(uint64(len))
dwrite(data, len)
@@ -156,11 +156,10 @@
}
func dumpstr(s string) {
- sp := stringStructOf(&s)
- dumpmemrange(sp.str, uintptr(sp.len))
+ dumpmemrange(unsafe.Pointer(unsafe.StringData(s)), uintptr(len(s)))
}
-// dump information for a type
+// dump information for a type.
func dumptype(t *_type) {
if t == nil {
return
@@ -197,19 +196,17 @@
if x := t.uncommon(); x == nil || t.nameOff(x.pkgpath).name() == "" {
dumpstr(t.string())
} else {
- pkgpathstr := t.nameOff(x.pkgpath).name()
- pkgpath := stringStructOf(&pkgpathstr)
- namestr := t.name()
- name := stringStructOf(&namestr)
- dumpint(uint64(uintptr(pkgpath.len) + 1 + uintptr(name.len)))
- dwrite(pkgpath.str, uintptr(pkgpath.len))
+ pkgpath := t.nameOff(x.pkgpath).name()
+ name := t.name()
+ dumpint(uint64(uintptr(len(pkgpath)) + 1 + uintptr(len(name))))
+ dwrite(unsafe.Pointer(unsafe.StringData(pkgpath)), uintptr(len(pkgpath)))
dwritebyte('.')
- dwrite(name.str, uintptr(name.len))
+ dwrite(unsafe.Pointer(unsafe.StringData(name)), uintptr(len(name)))
}
dumpbool(t.kind&kindDirectIface == 0 || t.ptrdata != 0)
}
-// dump an object
+// dump an object.
func dumpobj(obj unsafe.Pointer, size uintptr, bv bitvector) {
dumpint(tagObject)
dumpint(uint64(uintptr(obj)))
@@ -242,7 +239,7 @@
depth uintptr // depth in call stack (0 == most recent)
}
-// dump kinds & offsets of interesting fields in bv
+// dump kinds & offsets of interesting fields in bv.
func dumpbv(cbv *bitvector, offset uintptr) {
for i := uintptr(0); i < uintptr(cbv.n); i++ {
if cbv.ptrbit(i) == 1 {
@@ -327,7 +324,7 @@
// Record arg info for parent.
child.argoff = s.argp - s.fp
- child.arglen = s.arglen
+ child.arglen = s.argBytes()
child.sp = (*uint8)(unsafe.Pointer(s.sp))
child.depth++
stkmap = (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
@@ -354,7 +351,7 @@
dumpint(tagGoroutine)
dumpint(uint64(uintptr(unsafe.Pointer(gp))))
dumpint(uint64(sp))
- dumpint(uint64(gp.goid))
+ dumpint(gp.goid)
dumpint(uint64(gp.gopc))
dumpint(uint64(readgstatus(gp)))
dumpbool(isSystemGoroutine(gp, false))
@@ -693,9 +690,8 @@
func writeheapdump_m(fd uintptr, m *MemStats) {
assertWorldStopped()
- _g_ := getg()
- casgstatus(_g_.m.curg, _Grunning, _Gwaiting)
- _g_.waitreason = waitReasonDumpingHeap
+ gp := getg()
+ casGToWaiting(gp.m.curg, _Grunning, waitReasonDumpingHeap)
// Set dump file.
dumpfd = fd
@@ -710,7 +706,7 @@
tmpbuf = nil
}
- casgstatus(_g_.m.curg, _Gwaiting, _Grunning)
+ casgstatus(gp.m.curg, _Gwaiting, _Grunning)
}
// dumpint() the kind & offset of each field in an object.
@@ -737,16 +733,16 @@
for i := uintptr(0); i < nptr/8+1; i++ {
tmpbuf[i] = 0
}
- i := uintptr(0)
- hbits := heapBitsForAddr(p)
- for ; i < nptr; i++ {
- if !hbits.morePointers() {
- break // end of object
+
+ hbits := heapBitsForAddr(p, size)
+ for {
+ var addr uintptr
+ hbits, addr = hbits.next()
+ if addr == 0 {
+ break
}
- if hbits.isPointer() {
- tmpbuf[i/8] |= 1 << (i % 8)
- }
- hbits = hbits.next()
+ i := (addr - p) / goarch.PtrSize
+ tmpbuf[i/8] |= 1 << (i % 8)
}
- return bitvector{int32(i), &tmpbuf[0]}
+ return bitvector{int32(nptr), &tmpbuf[0]}
}
diff --git a/src/runtime/histogram.go b/src/runtime/histogram.go
index eddfbab..43dfe61 100644
--- a/src/runtime/histogram.go
+++ b/src/runtime/histogram.go
@@ -12,72 +12,88 @@
const (
// For the time histogram type, we use an HDR histogram.
- // Values are placed in super-buckets based solely on the most
- // significant set bit. Thus, super-buckets are power-of-2 sized.
+ // Values are placed in buckets based solely on the most
+ // significant set bit. Thus, buckets are power-of-2 sized.
// Values are then placed into sub-buckets based on the value of
// the next timeHistSubBucketBits most significant bits. Thus,
- // sub-buckets are linear within a super-bucket.
+ // sub-buckets are linear within a bucket.
//
// Therefore, the number of sub-buckets (timeHistNumSubBuckets)
// defines the error. This error may be computed as
// 1/timeHistNumSubBuckets*100%. For example, for 16 sub-buckets
- // per super-bucket the error is approximately 6%.
+ // per bucket the error is approximately 6%.
//
- // The number of super-buckets (timeHistNumSuperBuckets), on the
- // other hand, defines the range. To reserve room for sub-buckets,
- // bit timeHistSubBucketBits is the first bit considered for
- // super-buckets, so super-bucket indices are adjusted accordingly.
+ // The number of buckets (timeHistNumBuckets), on the
+ // other hand, defines the range. To avoid producing a large number
+ // of buckets that are close together, especially for small numbers
+ // (e.g. 1, 2, 3, 4, 5 ns) that aren't very useful, timeHistNumBuckets
+ // is defined in terms of the least significant bit (timeHistMinBucketBits)
+ // that needs to be set before we start bucketing and the most
+ // significant bit (timeHistMaxBucketBits) that we bucket before we just
+ // dump it into a catch-all bucket.
//
- // As an example, consider 45 super-buckets with 16 sub-buckets.
+ // As an example, consider the configuration:
//
- // 00110
- // ^----
- // │ ^
- // │ └---- Lowest 4 bits -> sub-bucket 6
- // â””------- Bit 4 unset -> super-bucket 0
+ // timeHistMinBucketBits = 9
+ // timeHistMaxBucketBits = 48
+ // timeHistSubBucketBits = 2
//
- // 10110
- // ^----
- // │ ^
- // │ └---- Next 4 bits -> sub-bucket 6
- // â””------- Bit 4 set -> super-bucket 1
- // 100010
- // ^----^
- // │ ^ └-- Lower bits ignored
- // │ └---- Next 4 bits -> sub-bucket 1
- // â””------- Bit 5 set -> super-bucket 2
+ // Then:
//
- // Following this pattern, super-bucket 44 will have the bit 47 set. We don't
- // have any buckets for higher values, so the highest sub-bucket will
- // contain values of 2^48-1 nanoseconds or approx. 3 days. This range is
- // more than enough to handle durations produced by the runtime.
- timeHistSubBucketBits = 4
- timeHistNumSubBuckets = 1 << timeHistSubBucketBits
- timeHistNumSuperBuckets = 45
- timeHistTotalBuckets = timeHistNumSuperBuckets*timeHistNumSubBuckets + 1
+ // 011000001
+ // ^--
+ // │ ^
+ // │ └---- Next 2 bits -> sub-bucket 3
+ // â””------- Bit 9 unset -> bucket 0
+ //
+ // 110000001
+ // ^--
+ // │ ^
+ // │ └---- Next 2 bits -> sub-bucket 2
+ // â””------- Bit 9 set -> bucket 1
+ //
+ // 1000000010
+ // ^-- ^
+ // │ ^ └-- Lower bits ignored
+ // │ └---- Next 2 bits -> sub-bucket 0
+ // â””------- Bit 10 set -> bucket 2
+ //
+ // Following this pattern, bucket 38 will have the bit 46 set. We don't
+ // have any buckets for higher values, so we spill the rest into an overflow
+ // bucket containing values of 2^47-1 nanoseconds or approx. 1 day or more.
+ // This range is more than enough to handle durations produced by the runtime.
+ timeHistMinBucketBits = 9
+ timeHistMaxBucketBits = 48 // Note that this is exclusive; 1 higher than the actual range.
+ timeHistSubBucketBits = 2
+ timeHistNumSubBuckets = 1 << timeHistSubBucketBits
+ timeHistNumBuckets = timeHistMaxBucketBits - timeHistMinBucketBits + 1
+ // Two extra buckets, one for underflow, one for overflow.
+ timeHistTotalBuckets = timeHistNumBuckets*timeHistNumSubBuckets + 2
)
// timeHistogram represents a distribution of durations in
// nanoseconds.
//
// The accuracy and range of the histogram is defined by the
-// timeHistSubBucketBits and timeHistNumSuperBuckets constants.
+// timeHistSubBucketBits and timeHistNumBuckets constants.
//
// It is an HDR histogram with exponentially-distributed
// buckets and linearly distributed sub-buckets.
//
-// Counts in the histogram are updated atomically, so it is safe
-// for concurrent use. It is also safe to read all the values
-// atomically.
+// The histogram is safe for concurrent reads and writes.
type timeHistogram struct {
- counts [timeHistNumSuperBuckets * timeHistNumSubBuckets]uint64
+ counts [timeHistNumBuckets * timeHistNumSubBuckets]atomic.Uint64
// underflow counts all the times we got a negative duration
// sample. Because of how time works on some platforms, it's
// possible to measure negative durations. We could ignore them,
// but we record them anyway because it's better to have some
// signal that it's happening than just missing samples.
- underflow uint64
+ underflow atomic.Uint64
+
+ // overflow counts all the times we got a duration that exceeded
+ // the range counts represents.
+ overflow atomic.Uint64
}
// record adds the given duration to the distribution.
@@ -87,36 +103,35 @@
//
//go:nosplit
func (h *timeHistogram) record(duration int64) {
+ // If the duration is negative, capture that in underflow.
if duration < 0 {
- atomic.Xadd64(&h.underflow, 1)
+ h.underflow.Add(1)
return
}
- // The index of the exponential bucket is just the index
- // of the highest set bit adjusted for how many bits we
- // use for the subbucket. Note that it's timeHistSubBucketsBits-1
- // because we use the 0th bucket to hold values < timeHistNumSubBuckets.
- var superBucket, subBucket uint
- if duration >= timeHistNumSubBuckets {
- // At this point, we know the duration value will always be
- // at least timeHistSubBucketsBits long.
- superBucket = uint(sys.Len64(uint64(duration))) - timeHistSubBucketBits
- if superBucket*timeHistNumSubBuckets >= uint(len(h.counts)) {
- // The bucket index we got is larger than what we support, so
- // include this count in the highest bucket, which extends to
- // infinity.
- superBucket = timeHistNumSuperBuckets - 1
- subBucket = timeHistNumSubBuckets - 1
- } else {
- // The linear subbucket index is just the timeHistSubBucketsBits
- // bits after the top bit. To extract that value, shift down
- // the duration such that we leave the top bit and the next bits
- // intact, then extract the index.
- subBucket = uint((duration >> (superBucket - 1)) % timeHistNumSubBuckets)
- }
+ // bucketBit is the target bit for the bucket which is usually the
+ // highest 1 bit, but if we're less than the minimum, is the highest
+ // 1 bit of the minimum (which will be zero in the duration).
+ //
+ // bucket is the bucket index, which is the bucketBit minus the
+ // highest bit of the minimum, plus one to leave room for the catch-all
+ // bucket for samples lower than the minimum.
+ var bucketBit, bucket uint
+ if l := sys.Len64(uint64(duration)); l < timeHistMinBucketBits {
+ bucketBit = timeHistMinBucketBits
+ bucket = 0 // bucketBit - timeHistMinBucketBits
} else {
- subBucket = uint(duration)
+ bucketBit = uint(l)
+ bucket = bucketBit - timeHistMinBucketBits + 1
}
- atomic.Xadd64(&h.counts[superBucket*timeHistNumSubBuckets+subBucket], 1)
+ // If the bucket we computed is greater than the number of buckets,
+ // count that in overflow.
+ if bucket >= timeHistNumBuckets {
+ h.overflow.Add(1)
+ return
+ }
+ // The sub-bucket index is just next timeHistSubBucketBits after the bucketBit.
+ subBucket := uint(duration>>(bucketBit-1-timeHistSubBucketBits)) % timeHistNumSubBuckets
+ h.counts[bucket*timeHistNumSubBuckets+subBucket].Add(1)
}
const (
@@ -139,33 +154,37 @@
// not nanoseconds like the timeHistogram represents durations.
func timeHistogramMetricsBuckets() []float64 {
b := make([]float64, timeHistTotalBuckets+1)
+ // Underflow bucket.
b[0] = float64NegInf()
- // Super-bucket 0 has no bits above timeHistSubBucketBits
- // set, so just iterate over each bucket and assign the
- // incrementing bucket.
- for i := 0; i < timeHistNumSubBuckets; i++ {
- bucketNanos := uint64(i)
- b[i+1] = float64(bucketNanos) / 1e9
+
+ for j := 0; j < timeHistNumSubBuckets; j++ {
+ // No bucket bit for the first few buckets. Just sub-bucket bits after the
+ // min bucket bit.
+ bucketNanos := uint64(j) << (timeHistMinBucketBits - 1 - timeHistSubBucketBits)
+ // Convert nanoseconds to seconds via a division.
+ // These values will all be exactly representable by a float64.
+ b[j+1] = float64(bucketNanos) / 1e9
}
- // Generate the rest of the super-buckets. It's easier to reason
- // about if we cut out the 0'th bucket, so subtract one since
- // we just handled that bucket.
- for i := 0; i < timeHistNumSuperBuckets-1; i++ {
+ // Generate the rest of the buckets. It's easier to reason
+ // about if we cut out the 0'th bucket.
+ for i := timeHistMinBucketBits; i < timeHistMaxBucketBits; i++ {
for j := 0; j < timeHistNumSubBuckets; j++ {
- // Set the super-bucket bit.
- bucketNanos := uint64(1) << (i + timeHistSubBucketBits)
+ // Set the bucket bit.
+ bucketNanos := uint64(1) << (i - 1)
// Set the sub-bucket bits.
- bucketNanos |= uint64(j) << i
- // The index for this bucket is going to be the (i+1)'th super bucket
- // (note that we're starting from zero, but handled the first super-bucket
+ bucketNanos |= uint64(j) << (i - 1 - timeHistSubBucketBits)
+ // The index for this bucket is going to be the (i+1)'th bucket
+ // (note that we're starting from zero, but handled the first bucket
// earlier, so we need to compensate), and the j'th sub bucket.
// Add 1 because we left space for -Inf.
- bucketIndex := (i+1)*timeHistNumSubBuckets + j + 1
+ bucketIndex := (i-timeHistMinBucketBits+1)*timeHistNumSubBuckets + j + 1
// Convert nanoseconds to seconds via a division.
// These values will all be exactly representable by a float64.
b[bucketIndex] = float64(bucketNanos) / 1e9
}
}
+ // Overflow bucket.
+ b[len(b)-2] = float64(uint64(1)<<(timeHistMaxBucketBits-1)) / 1e9
b[len(b)-1] = float64Inf()
return b
}
diff --git a/src/runtime/histogram_test.go b/src/runtime/histogram_test.go
index b12b65a..5246e86 100644
--- a/src/runtime/histogram_test.go
+++ b/src/runtime/histogram_test.go
@@ -20,50 +20,54 @@
h := &dummyTimeHistogram
// Record exactly one sample in each bucket.
- for i := 0; i < TimeHistNumSuperBuckets; i++ {
- var base int64
- if i > 0 {
- base = int64(1) << (i + TimeHistSubBucketBits - 1)
- }
- for j := 0; j < TimeHistNumSubBuckets; j++ {
- v := int64(j)
- if i > 0 {
- v <<= i - 1
- }
- h.Record(base + v)
+ for j := 0; j < TimeHistNumSubBuckets; j++ {
+ v := int64(j) << (TimeHistMinBucketBits - 1 - TimeHistSubBucketBits)
+ for k := 0; k < j; k++ {
+ // Record a number of times equal to the bucket index.
+ h.Record(v)
}
}
- // Hit the underflow bucket.
+ for i := TimeHistMinBucketBits; i < TimeHistMaxBucketBits; i++ {
+ base := int64(1) << (i - 1)
+ for j := 0; j < TimeHistNumSubBuckets; j++ {
+ v := int64(j) << (i - 1 - TimeHistSubBucketBits)
+ for k := 0; k < (i+1-TimeHistMinBucketBits)*TimeHistNumSubBuckets+j; k++ {
+ // Record a number of times equal to the bucket index.
+ h.Record(base + v)
+ }
+ }
+ }
+ // Hit the underflow and overflow buckets.
h.Record(int64(-1))
+ h.Record(math.MaxInt64)
+ h.Record(math.MaxInt64)
// Check to make sure there's exactly one count in each
// bucket.
- for i := uint(0); i < TimeHistNumSuperBuckets; i++ {
- for j := uint(0); j < TimeHistNumSubBuckets; j++ {
+ for i := 0; i < TimeHistNumBuckets; i++ {
+ for j := 0; j < TimeHistNumSubBuckets; j++ {
c, ok := h.Count(i, j)
if !ok {
- t.Errorf("hit underflow bucket unexpectedly: (%d, %d)", i, j)
- } else if c != 1 {
- t.Errorf("bucket (%d, %d) has count that is not 1: %d", i, j, c)
+ t.Errorf("unexpected invalid bucket: (%d, %d)", i, j)
+ } else if idx := uint64(i*TimeHistNumSubBuckets + j); c != idx {
+ t.Errorf("bucket (%d, %d) has count that is not %d: %d", i, j, idx, c)
}
}
}
- c, ok := h.Count(TimeHistNumSuperBuckets, 0)
+ c, ok := h.Count(-1, 0)
if ok {
- t.Errorf("expected to hit underflow bucket: (%d, %d)", TimeHistNumSuperBuckets, 0)
+ t.Errorf("expected to hit underflow bucket: (%d, %d)", -1, 0)
}
if c != 1 {
- t.Errorf("underflow bucket has count that is not 1: %d", c)
+ t.Errorf("overflow bucket has count that is not 1: %d", c)
}
- // Check overflow behavior.
- // By hitting a high value, we should just be adding into the highest bucket.
- h.Record(math.MaxInt64)
- c, ok = h.Count(TimeHistNumSuperBuckets-1, TimeHistNumSubBuckets-1)
- if !ok {
- t.Error("hit underflow bucket in highest bucket unexpectedly")
- } else if c != 2 {
- t.Errorf("highest has count that is not 2: %d", c)
+ c, ok = h.Count(TimeHistNumBuckets+1, 0)
+ if ok {
+ t.Errorf("expected to hit overflow bucket: (%d, %d)", TimeHistNumBuckets+1, 0)
+ }
+ if c != 2 {
+ t.Errorf("overflow bucket has count that is not 2: %d", c)
}
dummyTimeHistogram = TimeHistogram{}
@@ -72,34 +76,32 @@
func TestTimeHistogramMetricsBuckets(t *testing.T) {
buckets := TimeHistogramMetricsBuckets()
- nonInfBucketsLen := TimeHistNumSubBuckets * TimeHistNumSuperBuckets
- expBucketsLen := nonInfBucketsLen + 2 // Count -Inf and +Inf.
+ nonInfBucketsLen := TimeHistNumSubBuckets * TimeHistNumBuckets
+ expBucketsLen := nonInfBucketsLen + 3 // Count -Inf, the edge for the overflow bucket, and +Inf.
if len(buckets) != expBucketsLen {
t.Fatalf("unexpected length of buckets: got %d, want %d", len(buckets), expBucketsLen)
}
- // Check the first non-Inf 2*TimeHistNumSubBuckets buckets in order, skipping the
- // first bucket which should be -Inf (checked later).
- //
- // Because of the way this scheme works, the bottom TimeHistNumSubBuckets
- // buckets are fully populated, and then the next TimeHistNumSubBuckets
- // have the TimeHistSubBucketBits'th bit set, while the bottom are once
- // again fully populated.
- for i := 1; i <= 2*TimeHistNumSubBuckets+1; i++ {
- if got, want := buckets[i], float64(i-1)/1e9; got != want {
- t.Errorf("expected bucket %d to have value %e, got %e", i, want, got)
- }
- }
// Check some values.
idxToBucket := map[int]float64{
0: math.Inf(-1),
- 33: float64(0x10<<1) / 1e9,
- 34: float64(0x11<<1) / 1e9,
- 49: float64(0x10<<2) / 1e9,
- 58: float64(0x19<<2) / 1e9,
- 65: float64(0x10<<3) / 1e9,
- 513: float64(0x10<<31) / 1e9,
- 519: float64(0x16<<31) / 1e9,
- expBucketsLen - 2: float64(0x1f<<43) / 1e9,
+ 1: 0.0,
+ 2: float64(0x040) / 1e9,
+ 3: float64(0x080) / 1e9,
+ 4: float64(0x0c0) / 1e9,
+ 5: float64(0x100) / 1e9,
+ 6: float64(0x140) / 1e9,
+ 7: float64(0x180) / 1e9,
+ 8: float64(0x1c0) / 1e9,
+ 9: float64(0x200) / 1e9,
+ 10: float64(0x280) / 1e9,
+ 11: float64(0x300) / 1e9,
+ 12: float64(0x380) / 1e9,
+ 13: float64(0x400) / 1e9,
+ 15: float64(0x600) / 1e9,
+ 81: float64(0x8000000) / 1e9,
+ 82: float64(0xa000000) / 1e9,
+ 108: float64(0x380000000) / 1e9,
+ expBucketsLen - 2: float64(0x1<<47) / 1e9,
expBucketsLen - 1: math.Inf(1),
}
for idx, bucket := range idxToBucket {
diff --git a/src/runtime/internal/atomic/atomic_loong64.go b/src/runtime/internal/atomic/atomic_loong64.go
index 908a7d6..d82a5b8 100644
--- a/src/runtime/internal/atomic/atomic_loong64.go
+++ b/src/runtime/internal/atomic/atomic_loong64.go
@@ -42,6 +42,9 @@
func LoadAcq(ptr *uint32) uint32
//go:noescape
+func LoadAcq64(ptr *uint64) uint64
+
+//go:noescape
func LoadAcquintptr(ptr *uintptr) uintptr
//go:noescape
@@ -80,4 +83,7 @@
func StoreRel(ptr *uint32, val uint32)
//go:noescape
+func StoreRel64(ptr *uint64, val uint64)
+
+//go:noescape
func StoreReluintptr(ptr *uintptr, val uintptr)
diff --git a/src/runtime/internal/atomic/atomic_loong64.s b/src/runtime/internal/atomic/atomic_loong64.s
index bfb6c7e..3d802be 100644
--- a/src/runtime/internal/atomic/atomic_loong64.s
+++ b/src/runtime/internal/atomic/atomic_loong64.s
@@ -156,6 +156,9 @@
TEXT ·StoreRel(SB), NOSPLIT, $0-12
JMP ·Store(SB)
+TEXT ·StoreRel64(SB), NOSPLIT, $0-16
+ JMP ·Store64(SB)
+
TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16
JMP ·Store64(SB)
@@ -293,6 +296,10 @@
TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-12
JMP atomic·Load(SB)
+// uint64 ·LoadAcq64(uint64 volatile* ptr)
+TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$0-16
+ JMP atomic·Load64(SB)
+
// uintptr ·LoadAcquintptr(uintptr volatile* ptr)
TEXT ·LoadAcquintptr(SB),NOSPLIT|NOFRAME,$0-16
JMP atomic·Load64(SB)
diff --git a/src/runtime/internal/atomic/atomic_test.go b/src/runtime/internal/atomic/atomic_test.go
index 2ae60b8..2427bfd 100644
--- a/src/runtime/internal/atomic/atomic_test.go
+++ b/src/runtime/internal/atomic/atomic_test.go
@@ -345,6 +345,36 @@
}
}
+func TestCasRel(t *testing.T) {
+ const _magic = 0x5a5aa5a5
+ var x struct {
+ before uint32
+ i uint32
+ after uint32
+ o uint32
+ n uint32
+ }
+
+ x.before = _magic
+ x.after = _magic
+ for j := 0; j < 32; j += 1 {
+ x.i = (1 << j) + 0
+ x.o = (1 << j) + 0
+ x.n = (1 << j) + 1
+ if !atomic.CasRel(&x.i, x.o, x.n) {
+ t.Fatalf("should have swapped %#x %#x", x.o, x.n)
+ }
+
+ if x.i != x.n {
+ t.Fatalf("wrong x.i after swap: x.i=%#x x.n=%#x", x.i, x.n)
+ }
+
+ if x.before != _magic || x.after != _magic {
+ t.Fatalf("wrong magic: %#x _ %#x != %#x _ %#x", x.before, x.after, _magic, _magic)
+ }
+ }
+}
+
func TestStorepNoWB(t *testing.T) {
var p [2]*int
for i := range p {
diff --git a/src/runtime/internal/atomic/sys_linux_arm.s b/src/runtime/internal/atomic/sys_linux_arm.s
index 0cc7fa7..9225df8 100644
--- a/src/runtime/internal/atomic/sys_linux_arm.s
+++ b/src/runtime/internal/atomic/sys_linux_arm.s
@@ -15,9 +15,6 @@
// LR = return address
// The function returns with CS true if the swap happened.
// http://lxr.linux.no/linux+v2.6.37.2/arch/arm/kernel/entry-armv.S#L850
-// On older kernels (before 2.6.24) the function can incorrectly
-// report a conflict, so we have to double-check the compare ourselves
-// and retry if necessary.
//
// https://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=b49c0f24cf6744a3f4fd09289fe7cade349dead5
//
@@ -37,20 +34,13 @@
// because we don't know how to traceback through __kuser_cmpxchg
MOVW (R2), R0
MOVW old+4(FP), R0
-loop:
MOVW new+8(FP), R1
BL cas<>(SB)
- BCC check
+ BCC ret0
MOVW $1, R0
MOVB R0, ret+12(FP)
RET
-check:
- // Kernel lies; double-check.
- MOVW ptr+0(FP), R2
- MOVW old+4(FP), R0
- MOVW 0(R2), R3
- CMP R0, R3
- BEQ loop
+ret0:
MOVW $0, R0
MOVB R0, ret+12(FP)
RET
diff --git a/src/runtime/internal/atomic/types.go b/src/runtime/internal/atomic/types.go
index d346a76..0d75226 100644
--- a/src/runtime/internal/atomic/types.go
+++ b/src/runtime/internal/atomic/types.go
@@ -15,25 +15,32 @@
}
// Load accesses and returns the value atomically.
+//
+//go:nosplit
func (i *Int32) Load() int32 {
return Loadint32(&i.value)
}
// Store updates the value atomically.
+//
+//go:nosplit
func (i *Int32) Store(value int32) {
Storeint32(&i.value, value)
}
// CompareAndSwap atomically compares i's value with old,
// and if they're equal, swaps i's value with new.
+// It reports whether the swap ran.
//
-// Returns true if the operation succeeded.
+//go:nosplit
func (i *Int32) CompareAndSwap(old, new int32) bool {
return Casint32(&i.value, old, new)
}
// Swap replaces i's value with new, returning
// i's value before the replacement.
+//
+//go:nosplit
func (i *Int32) Swap(new int32) int32 {
return Xchgint32(&i.value, new)
}
@@ -43,6 +50,8 @@
//
// This operation wraps around in the usual
// two's-complement way.
+//
+//go:nosplit
func (i *Int32) Add(delta int32) int32 {
return Xaddint32(&i.value, delta)
}
@@ -59,25 +68,32 @@
}
// Load accesses and returns the value atomically.
+//
+//go:nosplit
func (i *Int64) Load() int64 {
return Loadint64(&i.value)
}
// Store updates the value atomically.
+//
+//go:nosplit
func (i *Int64) Store(value int64) {
Storeint64(&i.value, value)
}
// CompareAndSwap atomically compares i's value with old,
// and if they're equal, swaps i's value with new.
+// It reports whether the swap ran.
//
-// Returns true if the operation succeeded.
+//go:nosplit
func (i *Int64) CompareAndSwap(old, new int64) bool {
return Casint64(&i.value, old, new)
}
// Swap replaces i's value with new, returning
// i's value before the replacement.
+//
+//go:nosplit
func (i *Int64) Swap(new int64) int64 {
return Xchgint64(&i.value, new)
}
@@ -87,6 +103,8 @@
//
// This operation wraps around in the usual
// two's-complement way.
+//
+//go:nosplit
func (i *Int64) Add(delta int64) int64 {
return Xaddint64(&i.value, delta)
}
@@ -100,11 +118,15 @@
}
// Load accesses and returns the value atomically.
+//
+//go:nosplit
func (u *Uint8) Load() uint8 {
return Load8(&u.value)
}
// Store updates the value atomically.
+//
+//go:nosplit
func (u *Uint8) Store(value uint8) {
Store8(&u.value, value)
}
@@ -114,6 +136,8 @@
// the result into u.
//
// The full process is performed atomically.
+//
+//go:nosplit
func (u *Uint8) And(value uint8) {
And8(&u.value, value)
}
@@ -123,6 +147,8 @@
// the result into u.
//
// The full process is performed atomically.
+//
+//go:nosplit
func (u *Uint8) Or(value uint8) {
Or8(&u.value, value)
}
@@ -136,11 +162,15 @@
}
// Load accesses and returns the value atomically.
+//
+//go:nosplit
func (b *Bool) Load() bool {
return b.u.Load() != 0
}
// Store updates the value atomically.
+//
+//go:nosplit
func (b *Bool) Store(value bool) {
s := uint8(0)
if value {
@@ -158,6 +188,8 @@
}
// Load accesses and returns the value atomically.
+//
+//go:nosplit
func (u *Uint32) Load() uint32 {
return Load(&u.value)
}
@@ -169,11 +201,15 @@
// on this thread can be observed to occur before it.
//
// WARNING: Use sparingly and with great care.
+//
+//go:nosplit
func (u *Uint32) LoadAcquire() uint32 {
return LoadAcq(&u.value)
}
// Store updates the value atomically.
+//
+//go:nosplit
func (u *Uint32) Store(value uint32) {
Store(&u.value, value)
}
@@ -185,14 +221,17 @@
// on this thread can be observed to occur after it.
//
// WARNING: Use sparingly and with great care.
+//
+//go:nosplit
func (u *Uint32) StoreRelease(value uint32) {
StoreRel(&u.value, value)
}
// CompareAndSwap atomically compares u's value with old,
// and if they're equal, swaps u's value with new.
+// It reports whether the swap ran.
//
-// Returns true if the operation succeeded.
+//go:nosplit
func (u *Uint32) CompareAndSwap(old, new uint32) bool {
return Cas(&u.value, old, new)
}
@@ -202,16 +241,19 @@
// may observe operations that occur after this operation to
// precede it, but no operation that precedes it
// on this thread can be observed to occur after it.
-//
-// Returns true if the operation succeeded.
+// It reports whether the swap ran.
//
// WARNING: Use sparingly and with great care.
+//
+//go:nosplit
func (u *Uint32) CompareAndSwapRelease(old, new uint32) bool {
return CasRel(&u.value, old, new)
}
// Swap replaces u's value with new, returning
// u's value before the replacement.
+//
+//go:nosplit
func (u *Uint32) Swap(value uint32) uint32 {
return Xchg(&u.value, value)
}
@@ -221,6 +263,8 @@
// the result into u.
//
// The full process is performed atomically.
+//
+//go:nosplit
func (u *Uint32) And(value uint32) {
And(&u.value, value)
}
@@ -230,6 +274,8 @@
// the result into u.
//
// The full process is performed atomically.
+//
+//go:nosplit
func (u *Uint32) Or(value uint32) {
Or(&u.value, value)
}
@@ -239,6 +285,8 @@
//
// This operation wraps around in the usual
// two's-complement way.
+//
+//go:nosplit
func (u *Uint32) Add(delta int32) uint32 {
return Xadd(&u.value, delta)
}
@@ -255,25 +303,32 @@
}
// Load accesses and returns the value atomically.
+//
+//go:nosplit
func (u *Uint64) Load() uint64 {
return Load64(&u.value)
}
// Store updates the value atomically.
+//
+//go:nosplit
func (u *Uint64) Store(value uint64) {
Store64(&u.value, value)
}
// CompareAndSwap atomically compares u's value with old,
// and if they're equal, swaps u's value with new.
+// It reports whether the swap ran.
//
-// Returns true if the operation succeeded.
+//go:nosplit
func (u *Uint64) CompareAndSwap(old, new uint64) bool {
return Cas64(&u.value, old, new)
}
// Swap replaces u's value with new, returning
// u's value before the replacement.
+//
+//go:nosplit
func (u *Uint64) Swap(value uint64) uint64 {
return Xchg64(&u.value, value)
}
@@ -283,6 +338,8 @@
//
// This operation wraps around in the usual
// two's-complement way.
+//
+//go:nosplit
func (u *Uint64) Add(delta int64) uint64 {
return Xadd64(&u.value, delta)
}
@@ -296,6 +353,8 @@
}
// Load accesses and returns the value atomically.
+//
+//go:nosplit
func (u *Uintptr) Load() uintptr {
return Loaduintptr(&u.value)
}
@@ -307,11 +366,15 @@
// on this thread can be observed to occur before it.
//
// WARNING: Use sparingly and with great care.
+//
+//go:nosplit
func (u *Uintptr) LoadAcquire() uintptr {
return LoadAcquintptr(&u.value)
}
// Store updates the value atomically.
+//
+//go:nosplit
func (u *Uintptr) Store(value uintptr) {
Storeuintptr(&u.value, value)
}
@@ -323,20 +386,25 @@
// on this thread can be observed to occur after it.
//
// WARNING: Use sparingly and with great care.
+//
+//go:nosplit
func (u *Uintptr) StoreRelease(value uintptr) {
StoreReluintptr(&u.value, value)
}
// CompareAndSwap atomically compares u's value with old,
// and if they're equal, swaps u's value with new.
+// It reports whether the swap ran.
//
-// Returns true if the operation succeeded.
+//go:nosplit
func (u *Uintptr) CompareAndSwap(old, new uintptr) bool {
return Casuintptr(&u.value, old, new)
}
// Swap replaces u's value with new, returning
// u's value before the replacement.
+//
+//go:nosplit
func (u *Uintptr) Swap(value uintptr) uintptr {
return Xchguintptr(&u.value, value)
}
@@ -346,6 +414,8 @@
//
// This operation wraps around in the usual
// two's-complement way.
+//
+//go:nosplit
func (u *Uintptr) Add(delta uintptr) uintptr {
return Xadduintptr(&u.value, delta)
}
@@ -361,12 +431,16 @@
}
// Load accesses and returns the value atomically.
+//
+//go:nosplit
func (f *Float64) Load() float64 {
r := f.u.Load()
return *(*float64)(unsafe.Pointer(&r))
}
// Store updates the value atomically.
+//
+//go:nosplit
func (f *Float64) Store(value float64) {
f.u.Store(*(*uint64)(unsafe.Pointer(&value)))
}
@@ -386,6 +460,8 @@
}
// Load accesses and returns the value atomically.
+//
+//go:nosplit
func (u *UnsafePointer) Load() unsafe.Pointer {
return Loadp(unsafe.Pointer(&u.value))
}
@@ -396,24 +472,102 @@
// perform a write barrier on value, and so this operation may
// hide pointers from the GC. Use with care and sparingly.
// It is safe to use with values not found in the Go heap.
+// Prefer Store instead.
+//
+//go:nosplit
func (u *UnsafePointer) StoreNoWB(value unsafe.Pointer) {
StorepNoWB(unsafe.Pointer(&u.value), value)
}
+// Store updates the value atomically.
+func (u *UnsafePointer) Store(value unsafe.Pointer) {
+ storePointer(&u.value, value)
+}
+
+// provided by runtime
+//go:linkname storePointer
+func storePointer(ptr *unsafe.Pointer, new unsafe.Pointer)
+
// CompareAndSwapNoWB atomically (with respect to other methods)
// compares u's value with old, and if they're equal,
// swaps u's value with new.
-//
-// Returns true if the operation succeeded.
+// It reports whether the swap ran.
//
// WARNING: As the name implies this operation does *not*
// perform a write barrier on value, and so this operation may
// hide pointers from the GC. Use with care and sparingly.
// It is safe to use with values not found in the Go heap.
+// Prefer CompareAndSwap instead.
+//
+//go:nosplit
func (u *UnsafePointer) CompareAndSwapNoWB(old, new unsafe.Pointer) bool {
return Casp1(&u.value, old, new)
}
+// CompareAndSwap atomically compares u's value with old,
+// and if they're equal, swaps u's value with new.
+// It reports whether the swap ran.
+func (u *UnsafePointer) CompareAndSwap(old, new unsafe.Pointer) bool {
+ return casPointer(&u.value, old, new)
+}
+
+func casPointer(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool
+
+// Pointer is an atomic pointer of type *T.
+type Pointer[T any] struct {
+ u UnsafePointer
+}
+
+// Load accesses and returns the value atomically.
+//
+//go:nosplit
+func (p *Pointer[T]) Load() *T {
+ return (*T)(p.u.Load())
+}
+
+// StoreNoWB updates the value atomically.
+//
+// WARNING: As the name implies this operation does *not*
+// perform a write barrier on value, and so this operation may
+// hide pointers from the GC. Use with care and sparingly.
+// It is safe to use with values not found in the Go heap.
+// Prefer Store instead.
+//
+//go:nosplit
+func (p *Pointer[T]) StoreNoWB(value *T) {
+ p.u.StoreNoWB(unsafe.Pointer(value))
+}
+
+// Store updates the value atomically.
+//go:nosplit
+func (p *Pointer[T]) Store(value *T) {
+ p.u.Store(unsafe.Pointer(value))
+}
+
+// CompareAndSwapNoWB atomically (with respect to other methods)
+// compares u's value with old, and if they're equal,
+// swaps u's value with new.
+// It reports whether the swap ran.
+//
+// WARNING: As the name implies this operation does *not*
+// perform a write barrier on value, and so this operation may
+// hide pointers from the GC. Use with care and sparingly.
+// It is safe to use with values not found in the Go heap.
+// Prefer CompareAndSwap instead.
+//
+//go:nosplit
+func (p *Pointer[T]) CompareAndSwapNoWB(old, new *T) bool {
+ return p.u.CompareAndSwapNoWB(unsafe.Pointer(old), unsafe.Pointer(new))
+}
+
+// CompareAndSwap atomically (with respect to other methods)
+// compares u's value with old, and if they're equal,
+// swaps u's value with new.
+// It reports whether the swap ran.
+func (p *Pointer[T]) CompareAndSwap(old, new *T) bool {
+ return p.u.CompareAndSwap(unsafe.Pointer(old), unsafe.Pointer(new))
+}
+
// noCopy may be embedded into structs which must not be copied
// after the first use.
//
diff --git a/src/runtime/internal/atomic/types_64bit.go b/src/runtime/internal/atomic/types_64bit.go
index 43c1ba2..006e83b 100644
--- a/src/runtime/internal/atomic/types_64bit.go
+++ b/src/runtime/internal/atomic/types_64bit.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build amd64 || arm64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || wasm
+//go:build amd64 || arm64 || loong64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || wasm
package atomic
@@ -13,6 +13,8 @@
// on this thread can be observed to occur before it.
//
// WARNING: Use sparingly and with great care.
+//
+//go:nosplit
func (u *Uint64) LoadAcquire() uint64 {
return LoadAcq64(&u.value)
}
@@ -24,6 +26,8 @@
// on this thread can be observed to occur after it.
//
// WARNING: Use sparingly and with great care.
+//
+//go:nosplit
func (u *Uint64) StoreRelease(value uint64) {
StoreRel64(&u.value, value)
}
diff --git a/src/runtime/internal/startlinetest/func_amd64.go b/src/runtime/internal/startlinetest/func_amd64.go
new file mode 100644
index 0000000..ab7063d
--- /dev/null
+++ b/src/runtime/internal/startlinetest/func_amd64.go
@@ -0,0 +1,13 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package startlinetest contains helpers for runtime_test.TestStartLineAsm.
+package startlinetest
+
+// Defined in func_amd64.s, this is a trivial assembly function that calls
+// runtime_test.callerStartLine.
+func AsmFunc() int
+
+// Provided by runtime_test.
+var CallerStartLine func(bool) int
diff --git a/src/runtime/internal/startlinetest/func_amd64.s b/src/runtime/internal/startlinetest/func_amd64.s
new file mode 100644
index 0000000..96982be
--- /dev/null
+++ b/src/runtime/internal/startlinetest/func_amd64.s
@@ -0,0 +1,28 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "funcdata.h"
+#include "textflag.h"
+
+// Assembly function for runtime_test.TestStartLineAsm.
+//
+// Note that this file can't be built directly as part of runtime_test, as assembly
+// files can't declare an alternative package. Building it into runtime is
+// possible, but linkshared complicates things:
+//
+// 1. linkshared mode leaves the function around in the final output of
+// non-test builds.
+// 2. Due of (1), the linker can't resolve the callerStartLine relocation
+// (as runtime_test isn't built for non-test builds).
+//
+// Thus it is simpler to just put this in its own package, imported only by
+// runtime_test. We use ABIInternal as no ABI wrapper is generated for
+// callerStartLine since it is in a different package.
+
+TEXT ·AsmFunc<ABIInternal>(SB),NOSPLIT,$8-0
+ NO_LOCAL_POINTERS
+ MOVQ $0, AX // wantInlined
+ MOVQ ·CallerStartLine(SB), DX
+ CALL (DX)
+ RET
diff --git a/src/runtime/internal/sys/consts.go b/src/runtime/internal/sys/consts.go
index fffcf81..98c0f09 100644
--- a/src/runtime/internal/sys/consts.go
+++ b/src/runtime/internal/sys/consts.go
@@ -10,7 +10,9 @@
)
// AIX requires a larger stack for syscalls.
-const StackGuardMultiplier = StackGuardMultiplierDefault*(1-goos.IsAix) + 2*goos.IsAix
+// The race build also needs more stack. See issue 54291.
+// This arithmetic must match that in cmd/internal/objabi/stack.go:stackGuardMultiplier.
+const StackGuardMultiplier = 1 + goos.IsAix + isRace
// DefaultPhysPageSize is the default physical page size.
const DefaultPhysPageSize = goarch.DefaultPhysPageSize
diff --git a/src/runtime/internal/sys/consts_norace.go b/src/runtime/internal/sys/consts_norace.go
new file mode 100644
index 0000000..a9613b8
--- /dev/null
+++ b/src/runtime/internal/sys/consts_norace.go
@@ -0,0 +1,9 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !race
+
+package sys
+
+const isRace = 0
diff --git a/src/runtime/internal/sys/consts_race.go b/src/runtime/internal/sys/consts_race.go
new file mode 100644
index 0000000..f824fb3
--- /dev/null
+++ b/src/runtime/internal/sys/consts_race.go
@@ -0,0 +1,9 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build race
+
+package sys
+
+const isRace = 1
diff --git a/src/runtime/internal/sys/intrinsics.go b/src/runtime/internal/sys/intrinsics.go
index 5af4901..902d893 100644
--- a/src/runtime/internal/sys/intrinsics.go
+++ b/src/runtime/internal/sys/intrinsics.go
@@ -5,56 +5,75 @@
//go:build !386
// TODO finish intrinsifying 386, deadcode the assembly, remove build tags, merge w/ intrinsics_common
-// TODO replace all uses of CtzXX with TrailingZerosXX; they are the same.
package sys
-// Using techniques from http://supertech.csail.mit.edu/papers/debruijn.pdf
+// Copied from math/bits to avoid dependence.
-const deBruijn64ctz = 0x0218a392cd3d5dbf
-
-var deBruijnIdx64ctz = [64]byte{
- 0, 1, 2, 7, 3, 13, 8, 19,
- 4, 25, 14, 28, 9, 34, 20, 40,
- 5, 17, 26, 38, 15, 46, 29, 48,
- 10, 31, 35, 54, 21, 50, 41, 57,
- 63, 6, 12, 18, 24, 27, 33, 39,
- 16, 37, 45, 47, 30, 53, 49, 56,
- 62, 11, 23, 32, 36, 44, 52, 55,
- 61, 22, 43, 51, 60, 42, 59, 58,
+var deBruijn32tab = [32]byte{
+ 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+ 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
}
-const deBruijn32ctz = 0x04653adf
+const deBruijn32 = 0x077CB531
-var deBruijnIdx32ctz = [32]byte{
- 0, 1, 2, 6, 3, 11, 7, 16,
- 4, 14, 12, 21, 8, 23, 17, 26,
- 31, 5, 10, 15, 13, 20, 22, 25,
- 30, 9, 19, 24, 29, 18, 28, 27,
+var deBruijn64tab = [64]byte{
+ 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
+ 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
+ 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
+ 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
}
-// Ctz64 counts trailing (low-order) zeroes,
-// and if all are zero, then 64.
-func Ctz64(x uint64) int {
- x &= -x // isolate low-order bit
- y := x * deBruijn64ctz >> 58 // extract part of deBruijn sequence
- i := int(deBruijnIdx64ctz[y]) // convert to bit index
- z := int((x - 1) >> 57 & 64) // adjustment if zero
- return i + z
+const deBruijn64 = 0x03f79d71b4ca8b09
+
+const ntz8tab = "" +
+ "\x08\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+ "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+ "\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+ "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+ "\x06\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+ "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+ "\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+ "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+ "\x07\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+ "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+ "\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+ "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+ "\x06\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+ "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+ "\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+ "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00"
+
+// TrailingZeros32 returns the number of trailing zero bits in x; the result is 32 for x == 0.
+func TrailingZeros32(x uint32) int {
+ if x == 0 {
+ return 32
+ }
+ // see comment in TrailingZeros64
+ return int(deBruijn32tab[(x&-x)*deBruijn32>>(32-5)])
}
-// Ctz32 counts trailing (low-order) zeroes,
-// and if all are zero, then 32.
-func Ctz32(x uint32) int {
- x &= -x // isolate low-order bit
- y := x * deBruijn32ctz >> 27 // extract part of deBruijn sequence
- i := int(deBruijnIdx32ctz[y]) // convert to bit index
- z := int((x - 1) >> 26 & 32) // adjustment if zero
- return i + z
+// TrailingZeros64 returns the number of trailing zero bits in x; the result is 64 for x == 0.
+func TrailingZeros64(x uint64) int {
+ if x == 0 {
+ return 64
+ }
+ // If popcount is fast, replace code below with return popcount(^x & (x - 1)).
+ //
+ // x & -x leaves only the right-most bit set in the word. Let k be the
+ // index of that bit. Since only a single bit is set, the value is two
+ // to the power of k. Multiplying by a power of two is equivalent to
+ // left shifting, in this case by k bits. The de Bruijn (64 bit) constant
+ // is such that all six bit, consecutive substrings are distinct.
+ // Therefore, if we have a left shifted version of this constant we can
+ // find by how many bits it was shifted by looking at which six bit
+ // substring ended up at the top of the word.
+ // (Knuth, volume 4, section 7.3.1)
+ return int(deBruijn64tab[(x&-x)*deBruijn64>>(64-6)])
}
-// Ctz8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
-func Ctz8(x uint8) int {
+// TrailingZeros8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
+func TrailingZeros8(x uint8) int {
return int(ntz8tab[x])
}
diff --git a/src/runtime/internal/sys/intrinsics_386.s b/src/runtime/internal/sys/intrinsics_386.s
index 784b246..f33ade0 100644
--- a/src/runtime/internal/sys/intrinsics_386.s
+++ b/src/runtime/internal/sys/intrinsics_386.s
@@ -4,7 +4,7 @@
#include "textflag.h"
-TEXT runtime∕internal∕sys·Ctz64(SB), NOSPLIT, $0-12
+TEXT runtime∕internal∕sys·TrailingZeros64(SB), NOSPLIT, $0-12
// Try low 32 bits.
MOVL x_lo+0(FP), AX
BSFL AX, AX
@@ -26,7 +26,7 @@
MOVL $64, ret+8(FP)
RET
-TEXT runtime∕internal∕sys·Ctz32(SB), NOSPLIT, $0-8
+TEXT runtime∕internal∕sys·TrailingZeros32(SB), NOSPLIT, $0-8
MOVL x+0(FP), AX
BSFL AX, AX
JNZ 2(PC)
@@ -34,7 +34,7 @@
MOVL AX, ret+4(FP)
RET
-TEXT runtime∕internal∕sys·Ctz8(SB), NOSPLIT, $0-8
+TEXT runtime∕internal∕sys·TrailingZeros8(SB), NOSPLIT, $0-8
MOVBLZX x+0(FP), AX
BSFL AX, AX
JNZ 2(PC)
diff --git a/src/runtime/internal/sys/intrinsics_common.go b/src/runtime/internal/sys/intrinsics_common.go
index 48d9759..1461551 100644
--- a/src/runtime/internal/sys/intrinsics_common.go
+++ b/src/runtime/internal/sys/intrinsics_common.go
@@ -6,45 +6,29 @@
// Copied from math/bits to avoid dependence.
-var len8tab = [256]uint8{
- 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
- 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
- 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
- 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
- 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
- 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
- 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
- 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
- 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
- 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
- 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
- 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
- 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
- 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
- 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
- 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
-}
+const len8tab = "" +
+ "\x00\x01\x02\x02\x03\x03\x03\x03\x04\x04\x04\x04\x04\x04\x04\x04" +
+ "\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05" +
+ "\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" +
+ "\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" +
+ "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
+ "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
+ "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
+ "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
+ "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
+ "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
+ "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
+ "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
+ "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
+ "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
+ "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
+ "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08"
-var ntz8tab = [256]uint8{
- 0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
-}
-
-// len64 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
+// Len64 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
+//
+// nosplit because this is used in src/runtime/histogram.go, which make run in sensitive contexts.
+//
+//go:nosplit
func Len64(x uint64) (n int) {
if x >= 1<<32 {
x >>= 32
@@ -98,45 +82,12 @@
return int(x) & (1<<7 - 1)
}
-var deBruijn64tab = [64]byte{
- 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
- 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
- 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
- 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
-}
-
-const deBruijn64 = 0x03f79d71b4ca8b09
-
-// TrailingZeros64 returns the number of trailing zero bits in x; the result is 64 for x == 0.
-func TrailingZeros64(x uint64) int {
- if x == 0 {
- return 64
- }
- // If popcount is fast, replace code below with return popcount(^x & (x - 1)).
- //
- // x & -x leaves only the right-most bit set in the word. Let k be the
- // index of that bit. Since only a single bit is set, the value is two
- // to the power of k. Multiplying by a power of two is equivalent to
- // left shifting, in this case by k bits. The de Bruijn (64 bit) constant
- // is such that all six bit, consecutive substrings are distinct.
- // Therefore, if we have a left shifted version of this constant we can
- // find by how many bits it was shifted by looking at which six bit
- // substring ended up at the top of the word.
- // (Knuth, volume 4, section 7.3.1)
- return int(deBruijn64tab[(x&-x)*deBruijn64>>(64-6)])
-}
-
// LeadingZeros64 returns the number of leading zero bits in x; the result is 64 for x == 0.
func LeadingZeros64(x uint64) int { return 64 - Len64(x) }
// LeadingZeros8 returns the number of leading zero bits in x; the result is 8 for x == 0.
func LeadingZeros8(x uint8) int { return 8 - Len8(x) }
-// TrailingZeros8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
-func TrailingZeros8(x uint8) int {
- return int(ntz8tab[x])
-}
-
// Len8 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
func Len8(x uint8) int {
return int(len8tab[x])
diff --git a/src/runtime/internal/sys/intrinsics_stubs.go b/src/runtime/internal/sys/intrinsics_stubs.go
index a020652..66cfcde 100644
--- a/src/runtime/internal/sys/intrinsics_stubs.go
+++ b/src/runtime/internal/sys/intrinsics_stubs.go
@@ -6,8 +6,8 @@
package sys
-func Ctz64(x uint64) int
-func Ctz32(x uint32) int
-func Ctz8(x uint8) int
+func TrailingZeros64(x uint64) int
+func TrailingZeros32(x uint32) int
+func TrailingZeros8(x uint8) int
func Bswap64(x uint64) uint64
func Bswap32(x uint32) uint32
diff --git a/src/runtime/internal/sys/intrinsics_test.go b/src/runtime/internal/sys/intrinsics_test.go
index 0444183..bf75f19 100644
--- a/src/runtime/internal/sys/intrinsics_test.go
+++ b/src/runtime/internal/sys/intrinsics_test.go
@@ -5,19 +5,19 @@
"testing"
)
-func TestCtz64(t *testing.T) {
+func TestTrailingZeros64(t *testing.T) {
for i := 0; i <= 64; i++ {
x := uint64(5) << uint(i)
- if got := sys.Ctz64(x); got != i {
- t.Errorf("Ctz64(%d)=%d, want %d", x, got, i)
+ if got := sys.TrailingZeros64(x); got != i {
+ t.Errorf("TrailingZeros64(%d)=%d, want %d", x, got, i)
}
}
}
-func TestCtz32(t *testing.T) {
+func TestTrailingZeros32(t *testing.T) {
for i := 0; i <= 32; i++ {
x := uint32(5) << uint(i)
- if got := sys.Ctz32(x); got != i {
- t.Errorf("Ctz32(%d)=%d, want %d", x, got, i)
+ if got := sys.TrailingZeros32(x); got != i {
+ t.Errorf("TrailingZeros32(%d)=%d, want %d", x, got, i)
}
}
}
diff --git a/src/runtime/internal/sys/nih.go b/src/runtime/internal/sys/nih.go
new file mode 100644
index 0000000..17eab67
--- /dev/null
+++ b/src/runtime/internal/sys/nih.go
@@ -0,0 +1,41 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package sys
+
+// NOTE: keep in sync with cmd/compile/internal/types.CalcSize
+// to make the compiler recognize this as an intrinsic type.
+type nih struct{}
+
+// NotInHeap is a type must never be allocated from the GC'd heap or on the stack,
+// and is called not-in-heap.
+//
+// Other types can embed NotInHeap to make it not-in-heap. Specifically, pointers
+// to these types must always fail the `runtime.inheap` check. The type may be used
+// for global variables, or for objects in unmanaged memory (e.g., allocated with
+// `sysAlloc`, `persistentalloc`, r`fixalloc`, or from a manually-managed span).
+//
+// Specifically:
+//
+// 1. `new(T)`, `make([]T)`, `append([]T, ...)` and implicit heap
+// allocation of T are disallowed. (Though implicit allocations are
+// disallowed in the runtime anyway.)
+//
+// 2. A pointer to a regular type (other than `unsafe.Pointer`) cannot be
+// converted to a pointer to a not-in-heap type, even if they have the
+// same underlying type.
+//
+// 3. Any type that containing a not-in-heap type is itself considered as not-in-heap.
+//
+// - Structs and arrays are not-in-heap if their elements are not-in-heap.
+// - Maps and channels contains no-in-heap types are disallowed.
+//
+// 4. Write barriers on pointers to not-in-heap types can be omitted.
+//
+// The last point is the real benefit of NotInHeap. The runtime uses
+// it for low-level internal structures to avoid memory barriers in the
+// scheduler and the memory allocator where they are illegal or simply
+// inefficient. This mechanism is reasonably safe and does not compromise
+// the readability of the runtime.
+type NotInHeap struct{ _ nih }
diff --git a/src/runtime/internal/sys/zversion.go b/src/runtime/internal/sys/zversion.go
index b058a3d..184c263 100644
--- a/src/runtime/internal/sys/zversion.go
+++ b/src/runtime/internal/sys/zversion.go
@@ -1,5 +1,3 @@
// Code generated by go tool dist; DO NOT EDIT.
package sys
-
-const StackGuardMultiplierDefault = 1
diff --git a/src/runtime/internal/syscall/asm_linux_mips64x.s b/src/runtime/internal/syscall/asm_linux_mips64x.s
index 0e88a2d..6b7c524 100644
--- a/src/runtime/internal/syscall/asm_linux_mips64x.s
+++ b/src/runtime/internal/syscall/asm_linux_mips64x.s
@@ -15,6 +15,7 @@
MOVV a4+32(FP), R7
MOVV a5+40(FP), R8
MOVV a6+48(FP), R9
+ MOVV R0, R3 // reset R3 to 0 as 1-ret SYSCALL keeps it
SYSCALL
BEQ R7, ok
MOVV $-1, R1
diff --git a/src/runtime/internal/syscall/asm_linux_mipsx.s b/src/runtime/internal/syscall/asm_linux_mipsx.s
index 050029e..561310f 100644
--- a/src/runtime/internal/syscall/asm_linux_mipsx.s
+++ b/src/runtime/internal/syscall/asm_linux_mipsx.s
@@ -20,6 +20,7 @@
MOVW a6+24(FP), R9
MOVW R8, 16(R29)
MOVW R9, 20(R29)
+ MOVW R0, R3 // reset R3 to 0 as 1-ret SYSCALL keeps it
SYSCALL
BEQ R7, ok
MOVW $-1, R1
diff --git a/src/runtime/internal/syscall/asm_linux_ppc64x.s b/src/runtime/internal/syscall/asm_linux_ppc64x.s
index 8cf8737..3e985ed 100644
--- a/src/runtime/internal/syscall/asm_linux_ppc64x.s
+++ b/src/runtime/internal/syscall/asm_linux_ppc64x.s
@@ -7,22 +7,17 @@
#include "textflag.h"
// func Syscall6(num, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, errno uintptr)
-TEXT ·Syscall6(SB),NOSPLIT,$0-80
- MOVD num+0(FP), R9 // syscall entry
- MOVD a1+8(FP), R3
- MOVD a2+16(FP), R4
- MOVD a3+24(FP), R5
- MOVD a4+32(FP), R6
- MOVD a5+40(FP), R7
- MOVD a6+48(FP), R8
- SYSCALL R9
- MOVD R0, r2+64(FP) // r2 is not used. Always set to 0.
- BVC ok
- MOVD $-1, R4
- MOVD R4, r1+56(FP)
- MOVD R3, errno+72(FP)
- RET
-ok:
- MOVD R3, r1+56(FP)
- MOVD R0, errno+72(FP)
+TEXT ·Syscall6<ABIInternal>(SB),NOSPLIT,$0-80
+ MOVD R3, R10 // Move syscall number to R10. SYSCALL will move it R0, and restore R0.
+ MOVD R4, R3
+ MOVD R5, R4
+ MOVD R6, R5
+ MOVD R7, R6
+ MOVD R8, R7
+ MOVD R9, R8
+ SYSCALL R10
+ MOVD $-1, R6
+ ISEL CR0SO, R3, R0, R5 // errno = (error) ? R3 : 0
+ ISEL CR0SO, R6, R3, R3 // r1 = (error) ? -1 : 0
+ MOVD $0, R4 // r2 is not used on linux/ppc64
RET
diff --git a/src/runtime/internal/syscall/asm_linux_riscv64.s b/src/runtime/internal/syscall/asm_linux_riscv64.s
index a8652fd..15e50ec 100644
--- a/src/runtime/internal/syscall/asm_linux_riscv64.s
+++ b/src/runtime/internal/syscall/asm_linux_riscv64.s
@@ -5,25 +5,39 @@
#include "textflag.h"
// func Syscall6(num, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, errno uintptr)
-TEXT ·Syscall6(SB),NOSPLIT,$0-80
- MOV num+0(FP), A7 // syscall entry
- MOV a1+8(FP), A0
- MOV a2+16(FP), A1
- MOV a3+24(FP), A2
- MOV a4+32(FP), A3
- MOV a5+40(FP), A4
- MOV a6+48(FP), A5
+//
+// We need to convert to the syscall ABI.
+//
+// arg | ABIInternal | Syscall
+// ---------------------------
+// num | A0 | A7
+// a1 | A1 | A0
+// a2 | A2 | A1
+// a3 | A3 | A2
+// a4 | A4 | A3
+// a5 | A5 | A4
+// a6 | A6 | A5
+//
+// r1 | A0 | A0
+// r2 | A1 | A1
+// err | A2 | part of A0
+TEXT ·Syscall6<ABIInternal>(SB),NOSPLIT,$0-80
+ MOV A0, A7
+ MOV A1, A0
+ MOV A2, A1
+ MOV A3, A2
+ MOV A4, A3
+ MOV A5, A4
+ MOV A6, A5
ECALL
MOV $-4096, T0
BLTU T0, A0, err
- MOV A0, r1+56(FP)
- MOV A1, r2+64(FP)
- MOV ZERO, errno+72(FP)
+ // r1 already in A0
+ // r2 already in A1
+ MOV ZERO, A2 // errno
RET
err:
- MOV $-1, T0
- MOV T0, r1+56(FP)
- MOV ZERO, r2+64(FP)
- SUB A0, ZERO, A0
- MOV A0, errno+72(FP)
+ SUB A0, ZERO, A2 // errno
+ MOV $-1, A0 // r1
+ MOV ZERO, A1 // r2
RET
diff --git a/src/runtime/internal/syscall/defs_linux.go b/src/runtime/internal/syscall/defs_linux.go
new file mode 100644
index 0000000..71f1fa1
--- /dev/null
+++ b/src/runtime/internal/syscall/defs_linux.go
@@ -0,0 +1,10 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syscall
+
+const (
+ F_SETFD = 2
+ FD_CLOEXEC = 1
+)
diff --git a/src/runtime/internal/syscall/defs_linux_386.go b/src/runtime/internal/syscall/defs_linux_386.go
new file mode 100644
index 0000000..dc723a6
--- /dev/null
+++ b/src/runtime/internal/syscall/defs_linux_386.go
@@ -0,0 +1,29 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syscall
+
+const (
+ SYS_FCNTL = 55
+ SYS_EPOLL_CTL = 255
+ SYS_EPOLL_PWAIT = 319
+ SYS_EPOLL_CREATE1 = 329
+ SYS_EPOLL_PWAIT2 = 441
+
+ EPOLLIN = 0x1
+ EPOLLOUT = 0x4
+ EPOLLERR = 0x8
+ EPOLLHUP = 0x10
+ EPOLLRDHUP = 0x2000
+ EPOLLET = 0x80000000
+ EPOLL_CLOEXEC = 0x80000
+ EPOLL_CTL_ADD = 0x1
+ EPOLL_CTL_DEL = 0x2
+ EPOLL_CTL_MOD = 0x3
+)
+
+type EpollEvent struct {
+ Events uint32
+ Data [8]byte // to match amd64
+}
diff --git a/src/runtime/internal/syscall/defs_linux_amd64.go b/src/runtime/internal/syscall/defs_linux_amd64.go
new file mode 100644
index 0000000..886eb5b
--- /dev/null
+++ b/src/runtime/internal/syscall/defs_linux_amd64.go
@@ -0,0 +1,29 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syscall
+
+const (
+ SYS_FCNTL = 72
+ SYS_EPOLL_CTL = 233
+ SYS_EPOLL_PWAIT = 281
+ SYS_EPOLL_CREATE1 = 291
+ SYS_EPOLL_PWAIT2 = 441
+
+ EPOLLIN = 0x1
+ EPOLLOUT = 0x4
+ EPOLLERR = 0x8
+ EPOLLHUP = 0x10
+ EPOLLRDHUP = 0x2000
+ EPOLLET = 0x80000000
+ EPOLL_CLOEXEC = 0x80000
+ EPOLL_CTL_ADD = 0x1
+ EPOLL_CTL_DEL = 0x2
+ EPOLL_CTL_MOD = 0x3
+)
+
+type EpollEvent struct {
+ Events uint32
+ Data [8]byte // unaligned uintptr
+}
diff --git a/src/runtime/internal/syscall/defs_linux_arm.go b/src/runtime/internal/syscall/defs_linux_arm.go
new file mode 100644
index 0000000..8f812a2
--- /dev/null
+++ b/src/runtime/internal/syscall/defs_linux_arm.go
@@ -0,0 +1,30 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syscall
+
+const (
+ SYS_FCNTL = 55
+ SYS_EPOLL_CTL = 251
+ SYS_EPOLL_PWAIT = 346
+ SYS_EPOLL_CREATE1 = 357
+ SYS_EPOLL_PWAIT2 = 441
+
+ EPOLLIN = 0x1
+ EPOLLOUT = 0x4
+ EPOLLERR = 0x8
+ EPOLLHUP = 0x10
+ EPOLLRDHUP = 0x2000
+ EPOLLET = 0x80000000
+ EPOLL_CLOEXEC = 0x80000
+ EPOLL_CTL_ADD = 0x1
+ EPOLL_CTL_DEL = 0x2
+ EPOLL_CTL_MOD = 0x3
+)
+
+type EpollEvent struct {
+ Events uint32
+ _pad uint32
+ Data [8]byte // to match amd64
+}
diff --git a/src/runtime/internal/syscall/defs_linux_arm64.go b/src/runtime/internal/syscall/defs_linux_arm64.go
new file mode 100644
index 0000000..48e11b0
--- /dev/null
+++ b/src/runtime/internal/syscall/defs_linux_arm64.go
@@ -0,0 +1,30 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syscall
+
+const (
+ SYS_EPOLL_CREATE1 = 20
+ SYS_EPOLL_CTL = 21
+ SYS_EPOLL_PWAIT = 22
+ SYS_FCNTL = 25
+ SYS_EPOLL_PWAIT2 = 441
+
+ EPOLLIN = 0x1
+ EPOLLOUT = 0x4
+ EPOLLERR = 0x8
+ EPOLLHUP = 0x10
+ EPOLLRDHUP = 0x2000
+ EPOLLET = 0x80000000
+ EPOLL_CLOEXEC = 0x80000
+ EPOLL_CTL_ADD = 0x1
+ EPOLL_CTL_DEL = 0x2
+ EPOLL_CTL_MOD = 0x3
+)
+
+type EpollEvent struct {
+ Events uint32
+ _pad uint32
+ Data [8]byte // to match amd64
+}
diff --git a/src/runtime/internal/syscall/defs_linux_loong64.go b/src/runtime/internal/syscall/defs_linux_loong64.go
new file mode 100644
index 0000000..b78ef81
--- /dev/null
+++ b/src/runtime/internal/syscall/defs_linux_loong64.go
@@ -0,0 +1,30 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syscall
+
+const (
+ SYS_EPOLL_CREATE1 = 20
+ SYS_EPOLL_CTL = 21
+ SYS_EPOLL_PWAIT = 22
+ SYS_FCNTL = 25
+ SYS_EPOLL_PWAIT2 = 441
+
+ EPOLLIN = 0x1
+ EPOLLOUT = 0x4
+ EPOLLERR = 0x8
+ EPOLLHUP = 0x10
+ EPOLLRDHUP = 0x2000
+ EPOLLET = 0x80000000
+ EPOLL_CLOEXEC = 0x80000
+ EPOLL_CTL_ADD = 0x1
+ EPOLL_CTL_DEL = 0x2
+ EPOLL_CTL_MOD = 0x3
+)
+
+type EpollEvent struct {
+ Events uint32
+ pad_cgo_0 [4]byte
+ Data [8]byte // unaligned uintptr
+}
diff --git a/src/runtime/internal/syscall/defs_linux_mips64x.go b/src/runtime/internal/syscall/defs_linux_mips64x.go
new file mode 100644
index 0000000..92b49ca
--- /dev/null
+++ b/src/runtime/internal/syscall/defs_linux_mips64x.go
@@ -0,0 +1,32 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build linux && (mips64 || mips64le)
+
+package syscall
+
+const (
+ SYS_FCNTL = 5070
+ SYS_EPOLL_CTL = 5208
+ SYS_EPOLL_PWAIT = 5272
+ SYS_EPOLL_CREATE1 = 5285
+ SYS_EPOLL_PWAIT2 = 5441
+
+ EPOLLIN = 0x1
+ EPOLLOUT = 0x4
+ EPOLLERR = 0x8
+ EPOLLHUP = 0x10
+ EPOLLRDHUP = 0x2000
+ EPOLLET = 0x80000000
+ EPOLL_CLOEXEC = 0x80000
+ EPOLL_CTL_ADD = 0x1
+ EPOLL_CTL_DEL = 0x2
+ EPOLL_CTL_MOD = 0x3
+)
+
+type EpollEvent struct {
+ Events uint32
+ pad_cgo_0 [4]byte
+ Data [8]byte // unaligned uintptr
+}
diff --git a/src/runtime/internal/syscall/defs_linux_mipsx.go b/src/runtime/internal/syscall/defs_linux_mipsx.go
new file mode 100644
index 0000000..e28d09c
--- /dev/null
+++ b/src/runtime/internal/syscall/defs_linux_mipsx.go
@@ -0,0 +1,32 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build linux && (mips || mipsle)
+
+package syscall
+
+const (
+ SYS_FCNTL = 4055
+ SYS_EPOLL_CTL = 4249
+ SYS_EPOLL_PWAIT = 4313
+ SYS_EPOLL_CREATE1 = 4326
+ SYS_EPOLL_PWAIT2 = 4441
+
+ EPOLLIN = 0x1
+ EPOLLOUT = 0x4
+ EPOLLERR = 0x8
+ EPOLLHUP = 0x10
+ EPOLLRDHUP = 0x2000
+ EPOLLET = 0x80000000
+ EPOLL_CLOEXEC = 0x80000
+ EPOLL_CTL_ADD = 0x1
+ EPOLL_CTL_DEL = 0x2
+ EPOLL_CTL_MOD = 0x3
+)
+
+type EpollEvent struct {
+ Events uint32
+ pad_cgo_0 [4]byte
+ Data uint64
+}
diff --git a/src/runtime/internal/syscall/defs_linux_ppc64x.go b/src/runtime/internal/syscall/defs_linux_ppc64x.go
new file mode 100644
index 0000000..a74483e
--- /dev/null
+++ b/src/runtime/internal/syscall/defs_linux_ppc64x.go
@@ -0,0 +1,32 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build linux && (ppc64 || ppc64le)
+
+package syscall
+
+const (
+ SYS_FCNTL = 55
+ SYS_EPOLL_CTL = 237
+ SYS_EPOLL_PWAIT = 303
+ SYS_EPOLL_CREATE1 = 315
+ SYS_EPOLL_PWAIT2 = 441
+
+ EPOLLIN = 0x1
+ EPOLLOUT = 0x4
+ EPOLLERR = 0x8
+ EPOLLHUP = 0x10
+ EPOLLRDHUP = 0x2000
+ EPOLLET = 0x80000000
+ EPOLL_CLOEXEC = 0x80000
+ EPOLL_CTL_ADD = 0x1
+ EPOLL_CTL_DEL = 0x2
+ EPOLL_CTL_MOD = 0x3
+)
+
+type EpollEvent struct {
+ Events uint32
+ pad_cgo_0 [4]byte
+ Data [8]byte // unaligned uintptr
+}
diff --git a/src/runtime/internal/syscall/defs_linux_riscv64.go b/src/runtime/internal/syscall/defs_linux_riscv64.go
new file mode 100644
index 0000000..b78ef81
--- /dev/null
+++ b/src/runtime/internal/syscall/defs_linux_riscv64.go
@@ -0,0 +1,30 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syscall
+
+const (
+ SYS_EPOLL_CREATE1 = 20
+ SYS_EPOLL_CTL = 21
+ SYS_EPOLL_PWAIT = 22
+ SYS_FCNTL = 25
+ SYS_EPOLL_PWAIT2 = 441
+
+ EPOLLIN = 0x1
+ EPOLLOUT = 0x4
+ EPOLLERR = 0x8
+ EPOLLHUP = 0x10
+ EPOLLRDHUP = 0x2000
+ EPOLLET = 0x80000000
+ EPOLL_CLOEXEC = 0x80000
+ EPOLL_CTL_ADD = 0x1
+ EPOLL_CTL_DEL = 0x2
+ EPOLL_CTL_MOD = 0x3
+)
+
+type EpollEvent struct {
+ Events uint32
+ pad_cgo_0 [4]byte
+ Data [8]byte // unaligned uintptr
+}
diff --git a/src/runtime/internal/syscall/defs_linux_s390x.go b/src/runtime/internal/syscall/defs_linux_s390x.go
new file mode 100644
index 0000000..a7bb1ba
--- /dev/null
+++ b/src/runtime/internal/syscall/defs_linux_s390x.go
@@ -0,0 +1,30 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syscall
+
+const (
+ SYS_FCNTL = 55
+ SYS_EPOLL_CTL = 250
+ SYS_EPOLL_PWAIT = 312
+ SYS_EPOLL_CREATE1 = 327
+ SYS_EPOLL_PWAIT2 = 441
+
+ EPOLLIN = 0x1
+ EPOLLOUT = 0x4
+ EPOLLERR = 0x8
+ EPOLLHUP = 0x10
+ EPOLLRDHUP = 0x2000
+ EPOLLET = 0x80000000
+ EPOLL_CLOEXEC = 0x80000
+ EPOLL_CTL_ADD = 0x1
+ EPOLL_CTL_DEL = 0x2
+ EPOLL_CTL_MOD = 0x3
+)
+
+type EpollEvent struct {
+ Events uint32
+ pad_cgo_0 [4]byte
+ Data [8]byte // unaligned uintptr
+}
diff --git a/src/runtime/internal/syscall/syscall_linux.go b/src/runtime/internal/syscall/syscall_linux.go
index 7f268e8..a103d31 100644
--- a/src/runtime/internal/syscall/syscall_linux.go
+++ b/src/runtime/internal/syscall/syscall_linux.go
@@ -6,7 +6,7 @@
package syscall
import (
- _ "unsafe" // for go:linkname
+ "unsafe"
)
// TODO(https://go.dev/issue/51087): This package is incomplete and currently
@@ -37,3 +37,30 @@
func syscall_RawSyscall6(num, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, errno uintptr) {
return Syscall6(num, a1, a2, a3, a4, a5, a6)
}
+
+func EpollCreate1(flags int32) (fd int32, errno uintptr) {
+ r1, _, e := Syscall6(SYS_EPOLL_CREATE1, uintptr(flags), 0, 0, 0, 0, 0)
+ return int32(r1), e
+}
+
+var _zero uintptr
+
+func EpollWait(epfd int32, events []EpollEvent, maxev, waitms int32) (n int32, errno uintptr) {
+ var ev unsafe.Pointer
+ if len(events) > 0 {
+ ev = unsafe.Pointer(&events[0])
+ } else {
+ ev = unsafe.Pointer(&_zero)
+ }
+ r1, _, e := Syscall6(SYS_EPOLL_PWAIT, uintptr(epfd), uintptr(ev), uintptr(maxev), uintptr(waitms), 0, 0)
+ return int32(r1), e
+}
+
+func EpollCtl(epfd, op, fd int32, event *EpollEvent) (errno uintptr) {
+ _, _, e := Syscall6(SYS_EPOLL_CTL, uintptr(epfd), uintptr(op), uintptr(fd), uintptr(unsafe.Pointer(event)), 0, 0)
+ return e
+}
+
+func CloseOnExec(fd int32) {
+ Syscall6(SYS_FCNTL, uintptr(fd), F_SETFD, FD_CLOEXEC, 0, 0, 0)
+}
diff --git a/src/runtime/internal/syscall/syscall_linux_test.go b/src/runtime/internal/syscall/syscall_linux_test.go
new file mode 100644
index 0000000..1976da5
--- /dev/null
+++ b/src/runtime/internal/syscall/syscall_linux_test.go
@@ -0,0 +1,19 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syscall_test
+
+import (
+ "runtime/internal/syscall"
+ "testing"
+)
+
+func TestEpollctlErrorSign(t *testing.T) {
+ v := syscall.EpollCtl(-1, 1, -1, &syscall.EpollEvent{})
+
+ const EBADF = 0x09
+ if v != EBADF {
+ t.Errorf("epollctl = %v, want %v", v, EBADF)
+ }
+}
diff --git a/src/runtime/lfstack.go b/src/runtime/lfstack.go
index 406561a..306a8e8 100644
--- a/src/runtime/lfstack.go
+++ b/src/runtime/lfstack.go
@@ -18,8 +18,7 @@
// This stack is intrusive. Nodes must embed lfnode as the first field.
//
// The stack does not keep GC-visible pointers to nodes, so the caller
-// is responsible for ensuring the nodes are not garbage collected
-// (typically by allocating them from manually-managed memory).
+// must ensure the nodes are allocated outside the Go heap.
type lfstack uint64
func (head *lfstack) push(node *lfnode) {
@@ -59,6 +58,9 @@
// lfnodeValidate panics if node is not a valid address for use with
// lfstack.push. This only needs to be called when node is allocated.
func lfnodeValidate(node *lfnode) {
+ if base, _, _ := findObject(uintptr(unsafe.Pointer(node)), 0, 0); base != 0 {
+ throw("lfstack node allocated from the heap")
+ }
if lfstackUnpack(lfstackPack(node, ^uintptr(0))) != node {
printlock()
println("runtime: bad lfnode address", hex(uintptr(unsafe.Pointer(node))))
diff --git a/src/runtime/lfstack_64bit.go b/src/runtime/lfstack_64bit.go
index 154130c..88cbd3b 100644
--- a/src/runtime/lfstack_64bit.go
+++ b/src/runtime/lfstack_64bit.go
@@ -36,12 +36,21 @@
// We use one bit to distinguish between the two ranges.
aixAddrBits = 57
aixCntBits = 64 - aixAddrBits + 3
+
+ // riscv64 SV57 mode gives 56 bits of userspace VA.
+ // lfstack code supports it, but broader support for SV57 mode is incomplete,
+ // and there may be other issues (see #54104).
+ riscv64AddrBits = 56
+ riscv64CntBits = 64 - riscv64AddrBits + 3
)
func lfstackPack(node *lfnode, cnt uintptr) uint64 {
if GOARCH == "ppc64" && GOOS == "aix" {
return uint64(uintptr(unsafe.Pointer(node)))<<(64-aixAddrBits) | uint64(cnt&(1<<aixCntBits-1))
}
+ if GOARCH == "riscv64" {
+ return uint64(uintptr(unsafe.Pointer(node)))<<(64-riscv64AddrBits) | uint64(cnt&(1<<riscv64CntBits-1))
+ }
return uint64(uintptr(unsafe.Pointer(node)))<<(64-addrBits) | uint64(cnt&(1<<cntBits-1))
}
@@ -54,5 +63,8 @@
if GOARCH == "ppc64" && GOOS == "aix" {
return (*lfnode)(unsafe.Pointer(uintptr((val >> aixCntBits << 3) | 0xa<<56)))
}
+ if GOARCH == "riscv64" {
+ return (*lfnode)(unsafe.Pointer(uintptr(val >> riscv64CntBits << 3)))
+ }
return (*lfnode)(unsafe.Pointer(uintptr(val >> cntBits << 3)))
}
diff --git a/src/runtime/lfstack_test.go b/src/runtime/lfstack_test.go
index d0a1b6b..e36297e 100644
--- a/src/runtime/lfstack_test.go
+++ b/src/runtime/lfstack_test.go
@@ -16,6 +16,17 @@
data int
}
+// allocMyNode allocates nodes that are stored in an lfstack
+// outside the Go heap.
+// We require lfstack objects to live outside the heap so that
+// checkptr passes on the unsafe shenanigans used.
+func allocMyNode(data int) *MyNode {
+ n := (*MyNode)(PersistentAlloc(unsafe.Sizeof(MyNode{})))
+ LFNodeValidate(&n.LFNode)
+ n.data = data
+ return n
+}
+
func fromMyNode(node *MyNode) *LFNode {
return (*LFNode)(unsafe.Pointer(node))
}
@@ -30,22 +41,17 @@
stack := new(uint64)
global = stack // force heap allocation
- // Need to keep additional references to nodes, the stack is not all that type-safe.
- var nodes []*MyNode
-
// Check the stack is initially empty.
if LFStackPop(stack) != nil {
t.Fatalf("stack is not empty")
}
// Push one element.
- node := &MyNode{data: 42}
- nodes = append(nodes, node)
+ node := allocMyNode(42)
LFStackPush(stack, fromMyNode(node))
// Push another.
- node = &MyNode{data: 43}
- nodes = append(nodes, node)
+ node = allocMyNode(43)
LFStackPush(stack, fromMyNode(node))
// Pop one element.
@@ -75,8 +81,6 @@
}
}
-var stress []*MyNode
-
func TestLFStackStress(t *testing.T) {
const K = 100
P := 4 * GOMAXPROCS(-1)
@@ -86,15 +90,11 @@
}
// Create 2 stacks.
stacks := [2]*uint64{new(uint64), new(uint64)}
- // Need to keep additional references to nodes,
- // the lock-free stack is not type-safe.
- stress = nil
// Push K elements randomly onto the stacks.
sum := 0
for i := 0; i < K; i++ {
sum += i
- node := &MyNode{data: i}
- stress = append(stress, node)
+ node := allocMyNode(i)
LFStackPush(stacks[i%2], fromMyNode(node))
}
c := make(chan bool, P)
@@ -134,7 +134,4 @@
if sum2 != sum {
t.Fatalf("Wrong sum %d/%d", sum2, sum)
}
-
- // Let nodes be collected now.
- stress = nil
}
diff --git a/src/runtime/libfuzzer.go b/src/runtime/libfuzzer.go
index 6bfaef8..0ece035 100644
--- a/src/runtime/libfuzzer.go
+++ b/src/runtime/libfuzzer.go
@@ -20,49 +20,49 @@
// This may result in these functions having callers that are nosplit. That is why they must be nosplit.
//
//go:nosplit
-func libfuzzerTraceCmp1(arg0, arg1 uint8, fakePC int) {
+func libfuzzerTraceCmp1(arg0, arg1 uint8, fakePC uint) {
fakePC = fakePC % retSledSize
libfuzzerCallTraceIntCmp(&__sanitizer_cov_trace_cmp1, uintptr(arg0), uintptr(arg1), uintptr(fakePC))
}
//go:nosplit
-func libfuzzerTraceCmp2(arg0, arg1 uint16, fakePC int) {
+func libfuzzerTraceCmp2(arg0, arg1 uint16, fakePC uint) {
fakePC = fakePC % retSledSize
libfuzzerCallTraceIntCmp(&__sanitizer_cov_trace_cmp2, uintptr(arg0), uintptr(arg1), uintptr(fakePC))
}
//go:nosplit
-func libfuzzerTraceCmp4(arg0, arg1 uint32, fakePC int) {
+func libfuzzerTraceCmp4(arg0, arg1 uint32, fakePC uint) {
fakePC = fakePC % retSledSize
libfuzzerCallTraceIntCmp(&__sanitizer_cov_trace_cmp4, uintptr(arg0), uintptr(arg1), uintptr(fakePC))
}
//go:nosplit
-func libfuzzerTraceCmp8(arg0, arg1 uint64, fakePC int) {
+func libfuzzerTraceCmp8(arg0, arg1 uint64, fakePC uint) {
fakePC = fakePC % retSledSize
libfuzzerCallTraceIntCmp(&__sanitizer_cov_trace_cmp8, uintptr(arg0), uintptr(arg1), uintptr(fakePC))
}
//go:nosplit
-func libfuzzerTraceConstCmp1(arg0, arg1 uint8, fakePC int) {
+func libfuzzerTraceConstCmp1(arg0, arg1 uint8, fakePC uint) {
fakePC = fakePC % retSledSize
libfuzzerCallTraceIntCmp(&__sanitizer_cov_trace_const_cmp1, uintptr(arg0), uintptr(arg1), uintptr(fakePC))
}
//go:nosplit
-func libfuzzerTraceConstCmp2(arg0, arg1 uint16, fakePC int) {
+func libfuzzerTraceConstCmp2(arg0, arg1 uint16, fakePC uint) {
fakePC = fakePC % retSledSize
libfuzzerCallTraceIntCmp(&__sanitizer_cov_trace_const_cmp2, uintptr(arg0), uintptr(arg1), uintptr(fakePC))
}
//go:nosplit
-func libfuzzerTraceConstCmp4(arg0, arg1 uint32, fakePC int) {
+func libfuzzerTraceConstCmp4(arg0, arg1 uint32, fakePC uint) {
fakePC = fakePC % retSledSize
libfuzzerCallTraceIntCmp(&__sanitizer_cov_trace_const_cmp4, uintptr(arg0), uintptr(arg1), uintptr(fakePC))
}
//go:nosplit
-func libfuzzerTraceConstCmp8(arg0, arg1 uint64, fakePC int) {
+func libfuzzerTraceConstCmp8(arg0, arg1 uint64, fakePC uint) {
fakePC = fakePC % retSledSize
libfuzzerCallTraceIntCmp(&__sanitizer_cov_trace_const_cmp8, uintptr(arg0), uintptr(arg1), uintptr(fakePC))
}
@@ -148,13 +148,8 @@
//go:cgo_import_static __sanitizer_cov_8bit_counters_init
var __sanitizer_cov_8bit_counters_init byte
-//go:linkname __start___sancov_cntrs __start___sancov_cntrs
-//go:cgo_import_static __start___sancov_cntrs
-var __start___sancov_cntrs byte
-
-//go:linkname __stop___sancov_cntrs __stop___sancov_cntrs
-//go:cgo_import_static __stop___sancov_cntrs
-var __stop___sancov_cntrs byte
+// start, stop markers of counters, set by the linker
+var __start___sancov_cntrs, __stop___sancov_cntrs byte
//go:linkname __sanitizer_cov_pcs_init __sanitizer_cov_pcs_init
//go:cgo_import_static __sanitizer_cov_pcs_init
diff --git a/src/runtime/libfuzzer_amd64.s b/src/runtime/libfuzzer_amd64.s
index 65ac7a3..4355369 100644
--- a/src/runtime/libfuzzer_amd64.s
+++ b/src/runtime/libfuzzer_amd64.s
@@ -52,7 +52,7 @@
// manipulating the return address so that libfuzzer's integer compare hooks
// work
// libFuzzer's compare hooks obtain the caller's address from the compiler
-// builtin __builtin_return_adress. Since we invoke the hooks always
+// builtin __builtin_return_address. Since we invoke the hooks always
// from the same native function, this builtin would always return the same
// value. Internally, the libFuzzer hooks call through to the always inlined
// HandleCmp and thus can't be mimicked without patching libFuzzer.
diff --git a/src/runtime/lock_futex.go b/src/runtime/lock_futex.go
index 1578984..cc7d465 100644
--- a/src/runtime/lock_futex.go
+++ b/src/runtime/lock_futex.go
@@ -226,7 +226,7 @@
}
// same as runtime·notetsleep, but called on user g (not g0)
-// calls only nosplit functions between entersyscallblock/exitsyscall
+// calls only nosplit functions between entersyscallblock/exitsyscall.
func notetsleepg(n *note, ns int64) bool {
gp := getg()
if gp == gp.m.g0 {
diff --git a/src/runtime/lock_sema.go b/src/runtime/lock_sema.go
index c5e8cfe..e15bbf7 100644
--- a/src/runtime/lock_sema.go
+++ b/src/runtime/lock_sema.go
@@ -284,7 +284,7 @@
}
// same as runtime·notetsleep, but called on user g (not g0)
-// calls only nosplit functions between entersyscallblock/exitsyscall
+// calls only nosplit functions between entersyscallblock/exitsyscall.
func notetsleepg(n *note, ns int64) bool {
gp := getg()
if gp == gp.m.g0 {
diff --git a/src/runtime/lockrank.go b/src/runtime/lockrank.go
index bb0b189..284a61e 100644
--- a/src/runtime/lockrank.go
+++ b/src/runtime/lockrank.go
@@ -1,183 +1,120 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This file records the static ranks of the locks in the runtime. If a lock
-// is not given a rank, then it is assumed to be a leaf lock, which means no other
-// lock can be acquired while it is held. Therefore, leaf locks do not need to be
-// given an explicit rank. We list all of the architecture-independent leaf locks
-// for documentation purposes, but don't list any of the architecture-dependent
-// locks (which are all leaf locks). debugLock is ignored for ranking, since it is used
-// when printing out lock ranking errors.
-//
-// lockInit(l *mutex, rank int) is used to set the rank of lock before it is used.
-// If there is no clear place to initialize a lock, then the rank of a lock can be
-// specified during the lock call itself via lockWithrank(l *mutex, rank int).
-//
-// Besides the static lock ranking (which is a total ordering of the locks), we
-// also represent and enforce the actual partial order among the locks in the
-// arcs[] array below. That is, if it is possible that lock B can be acquired when
-// lock A is the previous acquired lock that is still held, then there should be
-// an entry for A in arcs[B][]. We will currently fail not only if the total order
-// (the lock ranking) is violated, but also if there is a missing entry in the
-// partial order.
+// Code generated by mklockrank.go; DO NOT EDIT.
package runtime
type lockRank int
-// Constants representing the lock rank of the architecture-independent locks in
-// the runtime. Locks with lower rank must be taken before locks with higher
-// rank.
+// Constants representing the ranks of all non-leaf runtime locks, in rank order.
+// Locks with lower rank must be taken before locks with higher rank,
+// in addition to satisfying the partial order in lockPartialOrder.
+// A few ranks allow self-cycles, which are specified in lockPartialOrder.
const (
- lockRankDummy lockRank = iota
+ lockRankUnknown lockRank = iota
- // Locks held above sched
lockRankSysmon
lockRankScavenge
lockRankForcegc
+ lockRankDefer
lockRankSweepWaiters
lockRankAssistQueue
- lockRankCpuprof
lockRankSweep
-
lockRankPollDesc
+ lockRankCpuprof
lockRankSched
- lockRankDeadlock
lockRankAllg
lockRankAllp
-
- lockRankTimers // Multiple timers locked simultaneously in destroy()
+ lockRankTimers
+ lockRankNetpollInit
+ lockRankHchan
+ lockRankNotifyList
+ lockRankSudog
+ lockRankRwmutexW
+ lockRankRwmutexR
+ lockRankRoot
lockRankItab
lockRankReflectOffs
- lockRankHchan // Multiple hchans acquired in lock order in syncadjustsudogs()
+ lockRankUserArenaState
+ // TRACEGLOBAL
lockRankTraceBuf
- lockRankFin
- lockRankNotifyList
lockRankTraceStrings
+ // MALLOC
+ lockRankFin
+ lockRankGcBitsArenas
+ lockRankMheapSpecial
lockRankMspanSpecial
+ lockRankSpanSetSpine
+ // MPROF
lockRankProfInsert
lockRankProfBlock
lockRankProfMemActive
lockRankProfMemFuture
- lockRankGcBitsArenas
- lockRankRoot
- lockRankTrace
- lockRankTraceStackTab
- lockRankNetpollInit
-
- lockRankRwmutexW
- lockRankRwmutexR
-
- lockRankSpanSetSpine
+ // STACKGROW
lockRankGscan
lockRankStackpool
lockRankStackLarge
- lockRankDefer
- lockRankSudog
-
- // Memory-related non-leaf locks
+ lockRankHchanLeaf
+ // WB
lockRankWbufSpans
lockRankMheap
- lockRankMheapSpecial
-
- // Memory-related leaf locks
lockRankGlobalAlloc
- lockRankPageAllocScav
-
- // Other leaf locks
- lockRankGFree
- // Generally, hchan must be acquired before gscan. But in one specific
- // case (in syncadjustsudogs from markroot after the g has been suspended
- // by suspendG), we allow gscan to be acquired, and then an hchan lock. To
- // allow this case, we get this lockRankHchanLeaf rank in
- // syncadjustsudogs(), rather than lockRankHchan. By using this special
- // rank, we don't allow any further locks to be acquired other than more
- // hchan locks.
- lockRankHchanLeaf
+ // TRACE
+ lockRankTrace
+ lockRankTraceStackTab
lockRankPanic
-
- // Leaf locks with no dependencies, so these constants are not actually used anywhere.
- // There are other architecture-dependent leaf locks as well.
- lockRankNewmHandoff
- lockRankDebugPtrmask
- lockRankFaketimeState
- lockRankTicks
- lockRankRaceFini
- lockRankPollCache
- lockRankDebug
+ lockRankDeadlock
)
-// lockRankLeafRank is the rank of lock that does not have a declared rank, and hence is
-// a leaf lock.
+// lockRankLeafRank is the rank of lock that does not have a declared rank,
+// and hence is a leaf lock.
const lockRankLeafRank lockRank = 1000
-// lockNames gives the names associated with each of the above ranks
+// lockNames gives the names associated with each of the above ranks.
var lockNames = []string{
- lockRankDummy: "",
-
- lockRankSysmon: "sysmon",
- lockRankScavenge: "scavenge",
- lockRankForcegc: "forcegc",
- lockRankSweepWaiters: "sweepWaiters",
- lockRankAssistQueue: "assistQueue",
- lockRankCpuprof: "cpuprof",
- lockRankSweep: "sweep",
-
- lockRankPollDesc: "pollDesc",
- lockRankSched: "sched",
- lockRankDeadlock: "deadlock",
- lockRankAllg: "allg",
- lockRankAllp: "allp",
-
- lockRankTimers: "timers",
- lockRankItab: "itab",
- lockRankReflectOffs: "reflectOffs",
-
- lockRankHchan: "hchan",
- lockRankTraceBuf: "traceBuf",
- lockRankFin: "fin",
- lockRankNotifyList: "notifyList",
- lockRankTraceStrings: "traceStrings",
- lockRankMspanSpecial: "mspanSpecial",
- lockRankProfInsert: "profInsert",
- lockRankProfBlock: "profBlock",
- lockRankProfMemActive: "profMemActive",
- lockRankProfMemFuture: "profMemFuture",
- lockRankGcBitsArenas: "gcBitsArenas",
- lockRankRoot: "root",
- lockRankTrace: "trace",
- lockRankTraceStackTab: "traceStackTab",
- lockRankNetpollInit: "netpollInit",
-
- lockRankRwmutexW: "rwmutexW",
- lockRankRwmutexR: "rwmutexR",
-
- lockRankSpanSetSpine: "spanSetSpine",
- lockRankGscan: "gscan",
- lockRankStackpool: "stackpool",
- lockRankStackLarge: "stackLarge",
- lockRankDefer: "defer",
- lockRankSudog: "sudog",
-
- lockRankWbufSpans: "wbufSpans",
- lockRankMheap: "mheap",
- lockRankMheapSpecial: "mheapSpecial",
-
- lockRankGlobalAlloc: "globalAlloc.mutex",
- lockRankPageAllocScav: "pageAlloc.scav.lock",
-
- lockRankGFree: "gFree",
- lockRankHchanLeaf: "hchanLeaf",
- lockRankPanic: "panic",
-
- lockRankNewmHandoff: "newmHandoff.lock",
- lockRankDebugPtrmask: "debugPtrmask.lock",
- lockRankFaketimeState: "faketimeState.lock",
- lockRankTicks: "ticks.lock",
- lockRankRaceFini: "raceFiniLock",
- lockRankPollCache: "pollCache.lock",
- lockRankDebug: "debugLock",
+ lockRankSysmon: "sysmon",
+ lockRankScavenge: "scavenge",
+ lockRankForcegc: "forcegc",
+ lockRankDefer: "defer",
+ lockRankSweepWaiters: "sweepWaiters",
+ lockRankAssistQueue: "assistQueue",
+ lockRankSweep: "sweep",
+ lockRankPollDesc: "pollDesc",
+ lockRankCpuprof: "cpuprof",
+ lockRankSched: "sched",
+ lockRankAllg: "allg",
+ lockRankAllp: "allp",
+ lockRankTimers: "timers",
+ lockRankNetpollInit: "netpollInit",
+ lockRankHchan: "hchan",
+ lockRankNotifyList: "notifyList",
+ lockRankSudog: "sudog",
+ lockRankRwmutexW: "rwmutexW",
+ lockRankRwmutexR: "rwmutexR",
+ lockRankRoot: "root",
+ lockRankItab: "itab",
+ lockRankReflectOffs: "reflectOffs",
+ lockRankUserArenaState: "userArenaState",
+ lockRankTraceBuf: "traceBuf",
+ lockRankTraceStrings: "traceStrings",
+ lockRankFin: "fin",
+ lockRankGcBitsArenas: "gcBitsArenas",
+ lockRankMheapSpecial: "mheapSpecial",
+ lockRankMspanSpecial: "mspanSpecial",
+ lockRankSpanSetSpine: "spanSetSpine",
+ lockRankProfInsert: "profInsert",
+ lockRankProfBlock: "profBlock",
+ lockRankProfMemActive: "profMemActive",
+ lockRankProfMemFuture: "profMemFuture",
+ lockRankGscan: "gscan",
+ lockRankStackpool: "stackpool",
+ lockRankStackLarge: "stackLarge",
+ lockRankHchanLeaf: "hchanLeaf",
+ lockRankWbufSpans: "wbufSpans",
+ lockRankMheap: "mheap",
+ lockRankGlobalAlloc: "globalAlloc",
+ lockRankTrace: "trace",
+ lockRankTraceStackTab: "traceStackTab",
+ lockRankPanic: "panic",
+ lockRankDeadlock: "deadlock",
}
func (rank lockRank) String() string {
@@ -187,74 +124,61 @@
if rank == lockRankLeafRank {
return "LEAF"
}
+ if rank < 0 || int(rank) >= len(lockNames) {
+ return "BAD RANK"
+ }
return lockNames[rank]
}
-// lockPartialOrder is a partial order among the various lock types, listing the
-// immediate ordering that has actually been observed in the runtime. Each entry
-// (which corresponds to a particular lock rank) specifies the list of locks
-// that can already be held immediately "above" it.
+// lockPartialOrder is the transitive closure of the lock rank graph.
+// An entry for rank X lists all of the ranks that can already be held
+// when rank X is acquired.
//
-// So, for example, the lockRankSched entry shows that all the locks preceding
-// it in rank can actually be held. The allp lock shows that only the sysmon or
-// sched lock can be held immediately above it when it is acquired.
+// Lock ranks that allow self-cycles list themselves.
var lockPartialOrder [][]lockRank = [][]lockRank{
- lockRankDummy: {},
- lockRankSysmon: {},
- lockRankScavenge: {lockRankSysmon},
- lockRankForcegc: {lockRankSysmon},
- lockRankSweepWaiters: {},
- lockRankAssistQueue: {},
- lockRankCpuprof: {},
- lockRankSweep: {},
- lockRankPollDesc: {},
- lockRankSched: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc},
- lockRankDeadlock: {lockRankDeadlock},
- lockRankAllg: {lockRankSysmon, lockRankSched},
- lockRankAllp: {lockRankSysmon, lockRankSched},
- lockRankTimers: {lockRankSysmon, lockRankScavenge, lockRankPollDesc, lockRankSched, lockRankAllp, lockRankTimers},
- lockRankItab: {},
- lockRankReflectOffs: {lockRankItab},
- lockRankHchan: {lockRankScavenge, lockRankSweep, lockRankHchan},
- lockRankTraceBuf: {lockRankSysmon, lockRankScavenge},
- lockRankFin: {lockRankSysmon, lockRankScavenge, lockRankSched, lockRankAllg, lockRankTimers, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf},
- lockRankNotifyList: {},
- lockRankTraceStrings: {lockRankTraceBuf},
- lockRankMspanSpecial: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings},
- lockRankProfInsert: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings},
- lockRankProfBlock: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings},
- lockRankProfMemActive: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings},
- lockRankProfMemFuture: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings, lockRankProfMemActive},
- lockRankGcBitsArenas: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings},
- lockRankRoot: {},
- lockRankTrace: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankSweep, lockRankSched, lockRankHchan, lockRankTraceBuf, lockRankTraceStrings, lockRankRoot},
- lockRankTraceStackTab: {lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankSched, lockRankAllg, lockRankTimers, lockRankHchan, lockRankTraceBuf, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankRoot, lockRankTrace},
- lockRankNetpollInit: {lockRankTimers},
-
- lockRankRwmutexW: {},
- lockRankRwmutexR: {lockRankSysmon, lockRankRwmutexW},
-
- lockRankSpanSetSpine: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings},
- lockRankGscan: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankSpanSetSpine},
- lockRankStackpool: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankRwmutexR, lockRankSpanSetSpine, lockRankGscan},
- lockRankStackLarge: {lockRankSysmon, lockRankAssistQueue, lockRankSched, lockRankItab, lockRankHchan, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGcBitsArenas, lockRankRoot, lockRankSpanSetSpine, lockRankGscan},
- lockRankDefer: {},
- lockRankSudog: {lockRankHchan, lockRankNotifyList},
- lockRankWbufSpans: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankMspanSpecial, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankRoot, lockRankTrace, lockRankGscan, lockRankDefer, lockRankSudog},
- lockRankMheap: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankMspanSpecial, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankSpanSetSpine, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankDefer, lockRankSudog, lockRankWbufSpans},
- lockRankMheapSpecial: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings},
- lockRankGlobalAlloc: {lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankSpanSetSpine, lockRankMheap, lockRankMheapSpecial},
- lockRankPageAllocScav: {lockRankMheap},
-
- lockRankGFree: {lockRankSched},
- lockRankHchanLeaf: {lockRankGscan, lockRankHchanLeaf},
- lockRankPanic: {lockRankDeadlock}, // plus any other lock held on throw.
-
- lockRankNewmHandoff: {},
- lockRankDebugPtrmask: {},
- lockRankFaketimeState: {},
- lockRankTicks: {},
- lockRankRaceFini: {},
- lockRankPollCache: {},
- lockRankDebug: {},
+ lockRankSysmon: {},
+ lockRankScavenge: {lockRankSysmon},
+ lockRankForcegc: {lockRankSysmon},
+ lockRankDefer: {},
+ lockRankSweepWaiters: {},
+ lockRankAssistQueue: {},
+ lockRankSweep: {},
+ lockRankPollDesc: {},
+ lockRankCpuprof: {},
+ lockRankSched: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof},
+ lockRankAllg: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof, lockRankSched},
+ lockRankAllp: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof, lockRankSched},
+ lockRankTimers: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof, lockRankSched, lockRankAllp, lockRankTimers},
+ lockRankNetpollInit: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof, lockRankSched, lockRankAllp, lockRankTimers},
+ lockRankHchan: {lockRankSysmon, lockRankScavenge, lockRankSweep, lockRankHchan},
+ lockRankNotifyList: {},
+ lockRankSudog: {lockRankSysmon, lockRankScavenge, lockRankSweep, lockRankHchan, lockRankNotifyList},
+ lockRankRwmutexW: {},
+ lockRankRwmutexR: {lockRankSysmon, lockRankRwmutexW},
+ lockRankRoot: {},
+ lockRankItab: {},
+ lockRankReflectOffs: {lockRankItab},
+ lockRankUserArenaState: {},
+ lockRankTraceBuf: {lockRankSysmon, lockRankScavenge},
+ lockRankTraceStrings: {lockRankSysmon, lockRankScavenge, lockRankTraceBuf},
+ lockRankFin: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankHchan, lockRankNotifyList, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings},
+ lockRankGcBitsArenas: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankHchan, lockRankNotifyList, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings},
+ lockRankMheapSpecial: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankHchan, lockRankNotifyList, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings},
+ lockRankMspanSpecial: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankHchan, lockRankNotifyList, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings},
+ lockRankSpanSetSpine: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankHchan, lockRankNotifyList, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings},
+ lockRankProfInsert: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankHchan, lockRankNotifyList, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings},
+ lockRankProfBlock: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankHchan, lockRankNotifyList, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings},
+ lockRankProfMemActive: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankHchan, lockRankNotifyList, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings},
+ lockRankProfMemFuture: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankHchan, lockRankNotifyList, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankProfMemActive},
+ lockRankGscan: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankNetpollInit, lockRankHchan, lockRankNotifyList, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankGcBitsArenas, lockRankSpanSetSpine, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture},
+ lockRankStackpool: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankNetpollInit, lockRankHchan, lockRankNotifyList, lockRankRwmutexW, lockRankRwmutexR, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankGcBitsArenas, lockRankSpanSetSpine, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan},
+ lockRankStackLarge: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankNetpollInit, lockRankHchan, lockRankNotifyList, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankGcBitsArenas, lockRankSpanSetSpine, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan},
+ lockRankHchanLeaf: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankNetpollInit, lockRankHchan, lockRankNotifyList, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankGcBitsArenas, lockRankSpanSetSpine, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan, lockRankHchanLeaf},
+ lockRankWbufSpans: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankDefer, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankNetpollInit, lockRankHchan, lockRankNotifyList, lockRankSudog, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankGcBitsArenas, lockRankMspanSpecial, lockRankSpanSetSpine, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan},
+ lockRankMheap: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankDefer, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankNetpollInit, lockRankHchan, lockRankNotifyList, lockRankSudog, lockRankRwmutexW, lockRankRwmutexR, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankGcBitsArenas, lockRankMspanSpecial, lockRankSpanSetSpine, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankWbufSpans},
+ lockRankGlobalAlloc: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankDefer, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankNetpollInit, lockRankHchan, lockRankNotifyList, lockRankSudog, lockRankRwmutexW, lockRankRwmutexR, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankGcBitsArenas, lockRankMheapSpecial, lockRankMspanSpecial, lockRankSpanSetSpine, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankWbufSpans, lockRankMheap},
+ lockRankTrace: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankDefer, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankNetpollInit, lockRankHchan, lockRankNotifyList, lockRankSudog, lockRankRwmutexW, lockRankRwmutexR, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankGcBitsArenas, lockRankMspanSpecial, lockRankSpanSetSpine, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankWbufSpans, lockRankMheap},
+ lockRankTraceStackTab: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankDefer, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankNetpollInit, lockRankHchan, lockRankNotifyList, lockRankSudog, lockRankRwmutexW, lockRankRwmutexR, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankGcBitsArenas, lockRankMspanSpecial, lockRankSpanSetSpine, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankWbufSpans, lockRankMheap, lockRankTrace},
+ lockRankPanic: {},
+ lockRankDeadlock: {lockRankPanic, lockRankDeadlock},
}
diff --git a/src/runtime/lockrank_on.go b/src/runtime/lockrank_on.go
index a170569..5dcc79b 100644
--- a/src/runtime/lockrank_on.go
+++ b/src/runtime/lockrank_on.go
@@ -13,7 +13,7 @@
// worldIsStopped is accessed atomically to track world-stops. 1 == world
// stopped.
-var worldIsStopped uint32
+var worldIsStopped atomic.Uint32
// lockRankStruct is embedded in mutex
type lockRankStruct struct {
@@ -24,6 +24,9 @@
pad int
}
+// lockInit(l *mutex, rank int) sets the rank of lock before it is used.
+// If there is no clear place to initialize a lock, then the rank of a lock can be
+// specified during the lock call itself via lockWithRank(l *mutex, rank int).
func lockInit(l *mutex, rank lockRank) {
l.rank = rank
}
@@ -298,7 +301,7 @@
//
//go:nosplit
func worldStopped() {
- if stopped := atomic.Xadd(&worldIsStopped, 1); stopped != 1 {
+ if stopped := worldIsStopped.Add(1); stopped != 1 {
systemstack(func() {
print("world stop count=", stopped, "\n")
throw("recursive world stop")
@@ -314,7 +317,7 @@
//
//go:nosplit
func worldStarted() {
- if stopped := atomic.Xadd(&worldIsStopped, -1); stopped != 0 {
+ if stopped := worldIsStopped.Add(-1); stopped != 0 {
systemstack(func() {
print("world stop count=", stopped, "\n")
throw("released non-stopped world stop")
@@ -326,7 +329,7 @@
//
//go:nosplit
func checkWorldStopped() bool {
- stopped := atomic.Load(&worldIsStopped)
+ stopped := worldIsStopped.Load()
if stopped > 1 {
systemstack(func() {
print("inconsistent world stop count=", stopped, "\n")
diff --git a/src/runtime/lockrank_test.go b/src/runtime/lockrank_test.go
index 4b2fc0e..a7b1b8d 100644
--- a/src/runtime/lockrank_test.go
+++ b/src/runtime/lockrank_test.go
@@ -1,41 +1,29 @@
-// Copyright 2021 The Go Authors. All rights reserved.
+// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime_test
import (
- . "runtime"
+ "bytes"
+ "internal/testenv"
+ "os"
+ "os/exec"
"testing"
)
-// Check that the partial order in lockPartialOrder fits within the total order
-// determined by the order of the lockRank constants.
-func TestLockRankPartialOrder(t *testing.T) {
- for r, list := range LockPartialOrder {
- rank := LockRank(r)
- for _, e := range list {
- entry := LockRank(e)
- if entry > rank {
- t.Errorf("lockPartialOrder row %v entry %v is inconsistent with total lock ranking order", rank, entry)
- }
- }
+// Test that the generated code for the lock rank graph is up-to-date.
+func TestLockRankGenerated(t *testing.T) {
+ testenv.MustHaveGoRun(t)
+ want, err := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "run", "mklockrank.go")).CombinedOutput()
+ if err != nil {
+ t.Fatal(err)
}
-}
-
-// Verify that partial order lists are kept sorted. This is a purely cosemetic
-// check to make manual reviews simpler. It does not affect correctness, unlike
-// the above test.
-func TestLockRankPartialOrderSortedEntries(t *testing.T) {
- for r, list := range LockPartialOrder {
- rank := LockRank(r)
- var prev LockRank
- for _, e := range list {
- entry := LockRank(e)
- if entry <= prev {
- t.Errorf("Partial order for rank %v out of order: %v <= %v in %v", rank, entry, prev, list)
- }
- prev = entry
- }
+ got, err := os.ReadFile("lockrank.go")
+ if err != nil {
+ t.Fatal(err)
+ }
+ if !bytes.Equal(want, got) {
+ t.Fatalf("lockrank.go is out of date. Please run go generate.")
}
}
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index eb24fdb..7ff2190 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -247,13 +247,15 @@
// memory.
heapArenaBytes = 1 << logHeapArenaBytes
+ heapArenaWords = heapArenaBytes / goarch.PtrSize
+
// logHeapArenaBytes is log_2 of heapArenaBytes. For clarity,
// prefer using heapArenaBytes where possible (we need the
// constant to compute some other constants).
logHeapArenaBytes = (6+20)*(_64bit*(1-goos.IsWindows)*(1-goarch.IsWasm)*(1-goos.IsIos*goarch.IsArm64)) + (2+20)*(_64bit*goos.IsWindows) + (2+20)*(1-_64bit) + (2+20)*goarch.IsWasm + (2+20)*goos.IsIos*goarch.IsArm64
- // heapArenaBitmapBytes is the size of each heap arena's bitmap.
- heapArenaBitmapBytes = heapArenaBytes / (goarch.PtrSize * 8 / 2)
+ // heapArenaBitmapWords is the size of each heap arena's bitmap in uintptrs.
+ heapArenaBitmapWords = heapArenaWords / (8 * goarch.PtrSize)
pagesPerArena = heapArenaBytes / pageSize
@@ -353,10 +355,10 @@
throw("bad TinySizeClass")
}
- if heapArenaBitmapBytes&(heapArenaBitmapBytes-1) != 0 {
+ if heapArenaBitmapWords&(heapArenaBitmapWords-1) != 0 {
// heapBits expects modular arithmetic on bitmap
// addresses to work.
- throw("heapArenaBitmapBytes not a power of 2")
+ throw("heapArenaBitmapWords not a power of 2")
}
// Check physPageSize.
@@ -450,6 +452,14 @@
//
// On AIX, mmaps starts at 0x0A00000000000000 for 64-bit.
// processes.
+ //
+ // Space mapped for user arenas comes immediately after the range
+ // originally reserved for the regular heap when race mode is not
+ // enabled because user arena chunks can never be used for regular heap
+ // allocations and we want to avoid fragmenting the address space.
+ //
+ // In race mode we have no choice but to just use the same hints because
+ // the race detector requires that the heap be mapped contiguously.
for i := 0x7f; i >= 0; i-- {
var p uintptr
switch {
@@ -475,9 +485,16 @@
default:
p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
}
+ // Switch to generating hints for user arenas if we've gone
+ // through about half the hints. In race mode, take only about
+ // a quarter; we don't have very much space to work with.
+ hintList := &mheap_.arenaHints
+ if (!raceenabled && i > 0x3f) || (raceenabled && i > 0x5f) {
+ hintList = &mheap_.userArena.arenaHints
+ }
hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc())
hint.addr = p
- hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
+ hint.next, *hintList = *hintList, hint
}
} else {
// On a 32-bit machine, we're much more concerned
@@ -545,6 +562,14 @@
hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc())
hint.addr = p
hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
+
+ // Place the hint for user arenas just after the large reservation.
+ //
+ // While this potentially competes with the hint above, in practice we probably
+ // aren't going to be getting this far anyway on 32-bit platforms.
+ userArenaHint := (*arenaHint)(mheap_.arenaHintAlloc.alloc())
+ userArenaHint.addr = p
+ userArenaHint.next, mheap_.userArena.arenaHints = mheap_.userArena.arenaHints, userArenaHint
}
}
@@ -554,26 +579,37 @@
// heapArenaBytes. sysAlloc returns nil on failure.
// There is no corresponding free function.
//
+// hintList is a list of hint addresses for where to allocate new
+// heap arenas. It must be non-nil.
+//
+// register indicates whether the heap arena should be registered
+// in allArenas.
+//
// sysAlloc returns a memory region in the Reserved state. This region must
// be transitioned to Prepared and then Ready before use.
//
// h must be locked.
-func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) {
+func (h *mheap) sysAlloc(n uintptr, hintList **arenaHint, register bool) (v unsafe.Pointer, size uintptr) {
assertLockHeld(&h.lock)
n = alignUp(n, heapArenaBytes)
- // First, try the arena pre-reservation.
- // Newly-used mappings are considered released.
- v = h.arena.alloc(n, heapArenaBytes, &gcController.heapReleased)
- if v != nil {
- size = n
- goto mapped
+ if hintList == &h.arenaHints {
+ // First, try the arena pre-reservation.
+ // Newly-used mappings are considered released.
+ //
+ // Only do this if we're using the regular heap arena hints.
+ // This behavior is only for the heap.
+ v = h.arena.alloc(n, heapArenaBytes, &gcController.heapReleased)
+ if v != nil {
+ size = n
+ goto mapped
+ }
}
// Try to grow the heap at a hint address.
- for h.arenaHints != nil {
- hint := h.arenaHints
+ for *hintList != nil {
+ hint := *hintList
p := hint.addr
if hint.down {
p -= n
@@ -605,7 +641,7 @@
if v != nil {
sysFreeOS(v, n)
}
- h.arenaHints = hint.next
+ *hintList = hint.next
h.arenaHintAlloc.free(unsafe.Pointer(hint))
}
@@ -690,26 +726,28 @@
}
}
- // Add the arena to the arenas list.
- if len(h.allArenas) == cap(h.allArenas) {
- size := 2 * uintptr(cap(h.allArenas)) * goarch.PtrSize
- if size == 0 {
- size = physPageSize
+ // Register the arena in allArenas if requested.
+ if register {
+ if len(h.allArenas) == cap(h.allArenas) {
+ size := 2 * uintptr(cap(h.allArenas)) * goarch.PtrSize
+ if size == 0 {
+ size = physPageSize
+ }
+ newArray := (*notInHeap)(persistentalloc(size, goarch.PtrSize, &memstats.gcMiscSys))
+ if newArray == nil {
+ throw("out of memory allocating allArenas")
+ }
+ oldSlice := h.allArenas
+ *(*notInHeapSlice)(unsafe.Pointer(&h.allArenas)) = notInHeapSlice{newArray, len(h.allArenas), int(size / goarch.PtrSize)}
+ copy(h.allArenas, oldSlice)
+ // Do not free the old backing array because
+ // there may be concurrent readers. Since we
+ // double the array each time, this can lead
+ // to at most 2x waste.
}
- newArray := (*notInHeap)(persistentalloc(size, goarch.PtrSize, &memstats.gcMiscSys))
- if newArray == nil {
- throw("out of memory allocating allArenas")
- }
- oldSlice := h.allArenas
- *(*notInHeapSlice)(unsafe.Pointer(&h.allArenas)) = notInHeapSlice{newArray, len(h.allArenas), int(size / goarch.PtrSize)}
- copy(h.allArenas, oldSlice)
- // Do not free the old backing array because
- // there may be concurrent readers. Since we
- // double the array each time, this can lead
- // to at most 2x waste.
+ h.allArenas = h.allArenas[:len(h.allArenas)+1]
+ h.allArenas[len(h.allArenas)-1] = ri
}
- h.allArenas = h.allArenas[:len(h.allArenas)+1]
- h.allArenas[len(h.allArenas)-1] = ri
// Store atomically just in case an object from the
// new heap arena becomes visible before the heap lock
@@ -740,8 +778,6 @@
case p == 0:
return nil, 0
case p&(align-1) == 0:
- // We got lucky and got an aligned region, so we can
- // use the whole thing.
return unsafe.Pointer(p), size + align
case GOOS == "windows":
// On Windows we can't release pieces of a
@@ -780,7 +816,7 @@
// nextFreeFast returns the next free object if one is quickly available.
// Otherwise it returns 0.
func nextFreeFast(s *mspan) gclinkptr {
- theBit := sys.Ctz64(s.allocCache) // Is there a free object in the allocCache?
+ theBit := sys.TrailingZeros64(s.allocCache) // Is there a free object in the allocCache?
if theBit < 64 {
result := s.freeindex + uintptr(theBit)
if result < s.nelems {
@@ -847,6 +883,11 @@
if size == 0 {
return unsafe.Pointer(&zerobase)
}
+
+ // It's possible for any malloc to trigger sweeping, which may in
+ // turn queue finalizers. Record this dynamic lock edge.
+ lockRankMayQueueFinalizer()
+
userSize := size
if asanenabled {
// Refer to ASAN runtime library, the malloc() function allocates extra memory,
@@ -888,24 +929,7 @@
// assistG is the G to charge for this allocation, or nil if
// GC is not currently active.
- var assistG *g
- if gcBlackenEnabled != 0 {
- // Charge the current user G for this allocation.
- assistG = getg()
- if assistG.m.curg != nil {
- assistG = assistG.m.curg
- }
- // Charge the allocation against the G. We'll account
- // for internal fragmentation at the end of mallocgc.
- assistG.gcAssistBytes -= int64(size)
-
- if assistG.gcAssistBytes < 0 {
- // This G is in debt. Assist the GC to correct
- // this before allocating. This must happen
- // before disabling preemption.
- gcAssistAlloc(assistG)
- }
- }
+ assistG := deductAssistCredit(size)
// Set mp.mallocing to keep from being preempted by GC.
mp := acquirem()
@@ -1019,7 +1043,7 @@
}
x = unsafe.Pointer(v)
if needzero && span.needzero != 0 {
- memclrNoHeapPointers(unsafe.Pointer(v), size)
+ memclrNoHeapPointers(x, size)
}
}
} else {
@@ -1045,8 +1069,8 @@
}
}
- var scanSize uintptr
if !noscan {
+ var scanSize uintptr
heapBitsSetType(uintptr(x), size, dataSize, typ)
if dataSize > typ.size {
// Array allocation. If there are any
@@ -1068,13 +1092,23 @@
// the garbage collector could follow a pointer to x,
// but see uninitialized memory or stale heap bits.
publicationBarrier()
+ // As x and the heap bits are initialized, update
+ // freeIndexForScan now so x is seen by the GC
+ // (including convervative scan) as an allocated object.
+ // While this pointer can't escape into user code as a
+ // _live_ pointer until we return, conservative scanning
+ // may find a dead pointer that happens to point into this
+ // object. Delaying this update until now ensures that
+ // conservative scanning considers this pointer dead until
+ // this point.
+ span.freeIndexForScan = span.freeindex
// Allocate black during GC.
// All slots hold nil so no scanning is needed.
// This may be racing with GC so do it atomically if there can be
// a race marking the bit.
if gcphase != _GCoff {
- gcmarknewobject(span, uintptr(x), size, scanSize)
+ gcmarknewobject(span, uintptr(x), size)
}
if raceenabled {
@@ -1158,6 +1192,34 @@
return x
}
+// deductAssistCredit reduces the current G's assist credit
+// by size bytes, and assists the GC if necessary.
+//
+// Caller must be preemptible.
+//
+// Returns the G for which the assist credit was accounted.
+func deductAssistCredit(size uintptr) *g {
+ var assistG *g
+ if gcBlackenEnabled != 0 {
+ // Charge the current user G for this allocation.
+ assistG = getg()
+ if assistG.m.curg != nil {
+ assistG = assistG.m.curg
+ }
+ // Charge the allocation against the G. We'll account
+ // for internal fragmentation at the end of mallocgc.
+ assistG.gcAssistBytes -= int64(size)
+
+ if assistG.gcAssistBytes < 0 {
+ // This G is in debt. Assist the GC to correct
+ // this before allocating. This must happen
+ // before disabling preemption.
+ gcAssistAlloc(assistG)
+ }
+ }
+ return assistG
+}
+
// memclrNoHeapPointersChunked repeatedly calls memclrNoHeapPointers
// on chunks of the buffer to be zeroed, with opportunities for preemption
// along the way. memclrNoHeapPointers contains no safepoints and also
@@ -1187,7 +1249,7 @@
// implementation of new builtin
// compiler (both frontend and SSA backend) knows the signature
-// of this function
+// of this function.
func newobject(typ *_type) unsafe.Pointer {
return mallocgc(typ.size, typ, true)
}
@@ -1245,7 +1307,7 @@
}
if GOOS == "plan9" {
// Plan 9 doesn't support floating point in note handler.
- if g := getg(); g == g.m.gsignal {
+ if gp := getg(); gp == gp.m.gsignal {
return nextSampleNoFP()
}
}
@@ -1323,7 +1385,8 @@
// The returned memory will be zeroed.
// sysStat must be non-nil.
//
-// Consider marking persistentalloc'd types go:notinheap.
+// Consider marking persistentalloc'd types not in heap by embedding
+// runtime/internal/sys.NotInHeap.
func persistentalloc(size, align uintptr, sysStat *sysMemStat) unsafe.Pointer {
var p *notInHeap
systemstack(func() {
@@ -1464,14 +1527,12 @@
// notInHeap is off-heap memory allocated by a lower-level allocator
// like sysAlloc or persistentAlloc.
//
-// In general, it's better to use real types marked as go:notinheap,
-// but this serves as a generic type for situations where that isn't
-// possible (like in the allocators).
+// In general, it's better to use real types which embed
+// runtime/internal/sys.NotInHeap, but this serves as a generic type
+// for situations where that isn't possible (like in the allocators).
//
// TODO: Use this as the return type of sysAlloc, persistentAlloc, etc?
-//
-//go:notinheap
-type notInHeap struct{}
+type notInHeap struct{ _ sys.NotInHeap }
func (p *notInHeap) add(bytes uintptr) *notInHeap {
return (*notInHeap)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + bytes))
diff --git a/src/runtime/malloc_test.go b/src/runtime/malloc_test.go
index cc20076..5b9ce98 100644
--- a/src/runtime/malloc_test.go
+++ b/src/runtime/malloc_test.go
@@ -294,7 +294,11 @@
for i := 0; i < 5; i++ {
// Reserve memory at the next hint so it can't be used
// for the heap.
- start, end := MapNextArenaHint()
+ start, end, ok := MapNextArenaHint()
+ if !ok {
+ t.Skipf("failed to reserve memory at next arena hint [%#x, %#x)", start, end)
+ }
+ t.Logf("reserved [%#x, %#x)", start, end)
disallowed = append(disallowed, [2]uintptr{start, end})
// Allocate until the runtime tries to use the hint we
// just mapped over.
@@ -314,46 +318,36 @@
}
}
-var mallocSink uintptr
-
func BenchmarkMalloc8(b *testing.B) {
- var x uintptr
for i := 0; i < b.N; i++ {
p := new(int64)
- x ^= uintptr(unsafe.Pointer(p))
+ Escape(p)
}
- mallocSink = x
}
func BenchmarkMalloc16(b *testing.B) {
- var x uintptr
for i := 0; i < b.N; i++ {
p := new([2]int64)
- x ^= uintptr(unsafe.Pointer(p))
+ Escape(p)
}
- mallocSink = x
}
func BenchmarkMallocTypeInfo8(b *testing.B) {
- var x uintptr
for i := 0; i < b.N; i++ {
p := new(struct {
p [8 / unsafe.Sizeof(uintptr(0))]*int
})
- x ^= uintptr(unsafe.Pointer(p))
+ Escape(p)
}
- mallocSink = x
}
func BenchmarkMallocTypeInfo16(b *testing.B) {
- var x uintptr
for i := 0; i < b.N; i++ {
p := new(struct {
p [16 / unsafe.Sizeof(uintptr(0))]*int
})
- x ^= uintptr(unsafe.Pointer(p))
+ Escape(p)
}
- mallocSink = x
}
type LargeStruct struct {
@@ -361,12 +355,10 @@
}
func BenchmarkMallocLargeStruct(b *testing.B) {
- var x uintptr
for i := 0; i < b.N; i++ {
p := make([]LargeStruct, 2)
- x ^= uintptr(unsafe.Pointer(&p[0]))
+ Escape(p)
}
- mallocSink = x
}
var n = flag.Int("n", 1000, "number of goroutines")
diff --git a/src/runtime/map.go b/src/runtime/map.go
index 65be472..f546ce8 100644
--- a/src/runtime/map.go
+++ b/src/runtime/map.go
@@ -514,7 +514,7 @@
return unsafe.Pointer(&zeroVal[0]), false
}
-// returns both key and elem. Used by map iterator
+// returns both key and elem. Used by map iterator.
func mapaccessK(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, unsafe.Pointer) {
if h == nil || h.count == 0 {
return nil, nil
diff --git a/src/runtime/mbarrier.go b/src/runtime/mbarrier.go
index c3b4541..46ef42f 100644
--- a/src/runtime/mbarrier.go
+++ b/src/runtime/mbarrier.go
@@ -196,7 +196,7 @@
reflect_typedmemmove(typ, dst, src)
}
-// typedmemmovepartial is like typedmemmove but assumes that
+// reflect_typedmemmovepartial is like typedmemmove but assumes that
// dst and src point off bytes into the value and only copies size bytes.
// off must be a multiple of goarch.PtrSize.
//
@@ -311,6 +311,8 @@
// If the caller knows that typ has pointers, it can alternatively
// call memclrHasPointers.
//
+// TODO: A "go:nosplitrec" annotation would be perfect for this.
+//
//go:nosplit
func typedmemclr(typ *_type, ptr unsafe.Pointer) {
if writeBarrier.needed && typ.ptrdata != 0 {
diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go
index a3a6590..088b566 100644
--- a/src/runtime/mbitmap.go
+++ b/src/runtime/mbitmap.go
@@ -14,34 +14,28 @@
//
// Heap bitmap
//
-// The heap bitmap comprises 2 bits for each pointer-sized word in the heap,
-// stored in the heapArena metadata backing each heap arena.
-// That is, if ha is the heapArena for the arena starting a start,
-// then ha.bitmap[0] holds the 2-bit entries for the four words start
-// through start+3*ptrSize, ha.bitmap[1] holds the entries for
-// start+4*ptrSize through start+7*ptrSize, and so on.
+// The heap bitmap comprises 1 bit for each pointer-sized word in the heap,
+// recording whether a pointer is stored in that word or not. This bitmap
+// is stored in the heapArena metadata backing each heap arena.
+// That is, if ha is the heapArena for the arena starting at "start",
+// then ha.bitmap[0] holds the 64 bits for the 64 words "start"
+// through start+63*ptrSize, ha.bitmap[1] holds the entries for
+// start+64*ptrSize through start+127*ptrSize, and so on.
+// Bits correspond to words in little-endian order. ha.bitmap[0]&1 represents
+// the word at "start", ha.bitmap[0]>>1&1 represents the word at start+8, etc.
+// (For 32-bit platforms, s/64/32/.)
//
-// In each 2-bit entry, the lower bit is a pointer/scalar bit, just
-// like in the stack/data bitmaps described above. The upper bit
-// indicates scan/dead: a "1" value ("scan") indicates that there may
-// be pointers in later words of the allocation, and a "0" value
-// ("dead") indicates there are no more pointers in the allocation. If
-// the upper bit is 0, the lower bit must also be 0, and this
-// indicates scanning can ignore the rest of the allocation.
+// We also keep a noMorePtrs bitmap which allows us to stop scanning
+// the heap bitmap early in certain situations. If ha.noMorePtrs[i]>>j&1
+// is 1, then the object containing the last word described by ha.bitmap[8*i+j]
+// has no more pointers beyond those described by ha.bitmap[8*i+j].
+// If ha.noMorePtrs[i]>>j&1 is set, the entries in ha.bitmap[8*i+j+1] and
+// beyond must all be zero until the start of the next object.
//
-// The 2-bit entries are split when written into the byte, so that the top half
-// of the byte contains 4 high (scan) bits and the bottom half contains 4 low
-// (pointer) bits. This form allows a copy from the 1-bit to the 4-bit form to
-// keep the pointer bits contiguous, instead of having to space them out.
+// The bitmap for noscan spans is set to all zero at span allocation time.
//
-// The code makes use of the fact that the zero value for a heap
-// bitmap means scalar/dead. This property must be preserved when
-// modifying the encoding.
-//
-// The bitmap for noscan spans is not maintained. Code must ensure
-// that an object is scannable before consulting its bitmap by
-// checking either the noscan bit in the span or by consulting its
-// type's information.
+// The bitmap for unallocated objects in scannable spans is not maintained
+// (can be junk).
package runtime
@@ -52,18 +46,6 @@
"unsafe"
)
-const (
- bitPointer = 1 << 0
- bitScan = 1 << 4
-
- heapBitsShift = 1 // shift offset between successive bitPointer or bitScan entries
- wordsPerBitmapByte = 8 / 2 // heap words described by one bitmap byte
-
- // all scan/pointer bits in a byte
- bitScanAll = bitScan | bitScan<<heapBitsShift | bitScan<<(2*heapBitsShift) | bitScan<<(3*heapBitsShift)
- bitPointerAll = bitPointer | bitPointer<<heapBitsShift | bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift)
-)
-
// addb returns the byte pointer p+n.
//
//go:nowritebarrier
@@ -110,21 +92,6 @@
return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1))
}
-// heapBits provides access to the bitmap bits for a single heap word.
-// The methods on heapBits take value receivers so that the compiler
-// can more easily inline calls to those methods and registerize the
-// struct fields independently.
-type heapBits struct {
- bitp *uint8
- shift uint32
- arena uint32 // Index of heap arena containing bitp
- last *uint8 // Last byte arena's bitmap
-}
-
-// Make the compiler check that heapBits.arena is large enough to hold
-// the maximum arena frame number.
-var _ = heapBits{arena: (1<<heapAddrBits)/heapArenaBytes - 1}
-
// markBits provides access to the mark bit for an object in the heap.
// bytep points to the byte holding the mark bit.
// mask is a byte with a single bit set that can be &ed with *bytep
@@ -180,7 +147,7 @@
aCache := s.allocCache
- bitIndex := sys.Ctz64(aCache)
+ bitIndex := sys.TrailingZeros64(aCache)
for bitIndex == 64 {
// Move index to start of next cached bits.
sfreeindex = (sfreeindex + 64) &^ (64 - 1)
@@ -192,7 +159,7 @@
// Refill s.allocCache with the next 64 alloc bits.
s.refillAllocCache(whichByte)
aCache = s.allocCache
- bitIndex = sys.Ctz64(aCache)
+ bitIndex = sys.TrailingZeros64(aCache)
// nothing available in cached bits
// grab the next 8 bytes and try again.
}
@@ -224,7 +191,7 @@
// been no preemption points since ensuring this (which could allow a
// GC transition, which would allow the state to change).
func (s *mspan) isFree(index uintptr) bool {
- if index < s.freeindex {
+ if index < s.freeIndexForScan {
return false
}
bytep, mask := s.allocBits.bitp(index)
@@ -264,7 +231,7 @@
}
func (s *mspan) markBitsForBase() markBits {
- return markBits{(*uint8)(s.gcmarkBits), uint8(1), 0}
+ return markBits{&s.gcmarkBits.x, uint8(1), 0}
}
// isMarked reports whether mark bit m is set.
@@ -313,32 +280,6 @@
m.index++
}
-// heapBitsForAddr returns the heapBits for the address addr.
-// The caller must ensure addr is in an allocated span.
-// In particular, be careful not to point past the end of an object.
-//
-// nosplit because it is used during write barriers and must not be preempted.
-//
-//go:nosplit
-func heapBitsForAddr(addr uintptr) (h heapBits) {
- // 2 bits per word, 4 pairs per byte, and a mask is hard coded.
- arena := arenaIndex(addr)
- ha := mheap_.arenas[arena.l1()][arena.l2()]
- // The compiler uses a load for nil checking ha, but in this
- // case we'll almost never hit that cache line again, so it
- // makes more sense to do a value check.
- if ha == nil {
- // addr is not in the heap. Return nil heapBits, which
- // we expect to crash in the caller.
- return
- }
- h.bitp = &ha.bitmap[(addr/(goarch.PtrSize*4))%heapArenaBitmapBytes]
- h.shift = uint32((addr / goarch.PtrSize) & 3)
- h.arena = uint32(arena)
- h.last = &ha.bitmap[len(ha.bitmap)-1]
- return
-}
-
// clobberdeadPtr is a special value that is used by the compiler to
// clobber dead stack slots, when -clobberdead flag is set.
const clobberdeadPtr = uintptr(0xdeaddead | 0xdeaddead<<((^uintptr(0)>>63)*32))
@@ -423,7 +364,7 @@
return
}
-// verifyNotInHeapPtr reports whether converting the not-in-heap pointer into a unsafe.Pointer is ok.
+// reflect_verifyNotInHeapPtr reports whether converting the not-in-heap pointer into a unsafe.Pointer is ok.
//
//go:linkname reflect_verifyNotInHeapPtr reflect.verifyNotInHeapPtr
func reflect_verifyNotInHeapPtr(p uintptr) bool {
@@ -433,121 +374,134 @@
return spanOf(p) == nil && p != clobberdeadPtr
}
-// next returns the heapBits describing the next pointer-sized word in memory.
-// That is, if h describes address p, h.next() describes p+ptrSize.
+const ptrBits = 8 * goarch.PtrSize
+
+// heapBits provides access to the bitmap bits for a single heap word.
+// The methods on heapBits take value receivers so that the compiler
+// can more easily inline calls to those methods and registerize the
+// struct fields independently.
+type heapBits struct {
+ // heapBits will report on pointers in the range [addr,addr+size).
+ // The low bit of mask contains the pointerness of the word at addr
+ // (assuming valid>0).
+ addr, size uintptr
+
+ // The next few pointer bits representing words starting at addr.
+ // Those bits already returned by next() are zeroed.
+ mask uintptr
+ // Number of bits in mask that are valid. mask is always less than 1<<valid.
+ valid uintptr
+}
+
+// heapBitsForAddr returns the heapBits for the address addr.
+// The caller must ensure [addr,addr+size) is in an allocated span.
+// In particular, be careful not to point past the end of an object.
+//
+// nosplit because it is used during write barriers and must not be preempted.
+//
+//go:nosplit
+func heapBitsForAddr(addr, size uintptr) heapBits {
+ // Find arena
+ ai := arenaIndex(addr)
+ ha := mheap_.arenas[ai.l1()][ai.l2()]
+
+ // Word index in arena.
+ word := addr / goarch.PtrSize % heapArenaWords
+
+ // Word index and bit offset in bitmap array.
+ idx := word / ptrBits
+ off := word % ptrBits
+
+ // Grab relevant bits of bitmap.
+ mask := ha.bitmap[idx] >> off
+ valid := ptrBits - off
+
+ // Process depending on where the object ends.
+ nptr := size / goarch.PtrSize
+ if nptr < valid {
+ // Bits for this object end before the end of this bitmap word.
+ // Squash bits for the following objects.
+ mask &= 1<<(nptr&(ptrBits-1)) - 1
+ valid = nptr
+ } else if nptr == valid {
+ // Bits for this object end at exactly the end of this bitmap word.
+ // All good.
+ } else {
+ // Bits for this object extend into the next bitmap word. See if there
+ // may be any pointers recorded there.
+ if uintptr(ha.noMorePtrs[idx/8])>>(idx%8)&1 != 0 {
+ // No more pointers in this object after this bitmap word.
+ // Update size so we know not to look there.
+ size = valid * goarch.PtrSize
+ }
+ }
+
+ return heapBits{addr: addr, size: size, mask: mask, valid: valid}
+}
+
+// Returns the (absolute) address of the next known pointer and
+// a heapBits iterator representing any remaining pointers.
+// If there are no more pointers, returns address 0.
// Note that next does not modify h. The caller must record the result.
//
// nosplit because it is used during write barriers and must not be preempted.
//
//go:nosplit
-func (h heapBits) next() heapBits {
- if h.shift < 3*heapBitsShift {
- h.shift += heapBitsShift
- } else if h.bitp != h.last {
- h.bitp, h.shift = add1(h.bitp), 0
+func (h heapBits) next() (heapBits, uintptr) {
+ for {
+ if h.mask != 0 {
+ var i int
+ if goarch.PtrSize == 8 {
+ i = sys.TrailingZeros64(uint64(h.mask))
+ } else {
+ i = sys.TrailingZeros32(uint32(h.mask))
+ }
+ h.mask ^= uintptr(1) << (i & (ptrBits - 1))
+ return h, h.addr + uintptr(i)*goarch.PtrSize
+ }
+
+ // Skip words that we've already processed.
+ h.addr += h.valid * goarch.PtrSize
+ h.size -= h.valid * goarch.PtrSize
+ if h.size == 0 {
+ return h, 0 // no more pointers
+ }
+
+ // Grab more bits and try again.
+ h = heapBitsForAddr(h.addr, h.size)
+ }
+}
+
+// nextFast is like next, but can return 0 even when there are more pointers
+// to be found. Callers should call next if nextFast returns 0 as its second
+// return value.
+//
+// if addr, h = h.nextFast(); addr == 0 {
+// if addr, h = h.next(); addr == 0 {
+// ... no more pointers ...
+// }
+// }
+// ... process pointer at addr ...
+//
+// nextFast is designed to be inlineable.
+//
+//go:nosplit
+func (h heapBits) nextFast() (heapBits, uintptr) {
+ // TESTQ/JEQ
+ if h.mask == 0 {
+ return h, 0
+ }
+ // BSFQ
+ var i int
+ if goarch.PtrSize == 8 {
+ i = sys.TrailingZeros64(uint64(h.mask))
} else {
- // Move to the next arena.
- return h.nextArena()
+ i = sys.TrailingZeros32(uint32(h.mask))
}
- return h
-}
-
-// nextArena advances h to the beginning of the next heap arena.
-//
-// This is a slow-path helper to next. gc's inliner knows that
-// heapBits.next can be inlined even though it calls this. This is
-// marked noinline so it doesn't get inlined into next and cause next
-// to be too big to inline.
-//
-//go:nosplit
-//go:noinline
-func (h heapBits) nextArena() heapBits {
- h.arena++
- ai := arenaIdx(h.arena)
- l2 := mheap_.arenas[ai.l1()]
- if l2 == nil {
- // We just passed the end of the object, which
- // was also the end of the heap. Poison h. It
- // should never be dereferenced at this point.
- return heapBits{}
- }
- ha := l2[ai.l2()]
- if ha == nil {
- return heapBits{}
- }
- h.bitp, h.shift = &ha.bitmap[0], 0
- h.last = &ha.bitmap[len(ha.bitmap)-1]
- return h
-}
-
-// forward returns the heapBits describing n pointer-sized words ahead of h in memory.
-// That is, if h describes address p, h.forward(n) describes p+n*ptrSize.
-// h.forward(1) is equivalent to h.next(), just slower.
-// Note that forward does not modify h. The caller must record the result.
-// bits returns the heap bits for the current word.
-//
-//go:nosplit
-func (h heapBits) forward(n uintptr) heapBits {
- n += uintptr(h.shift) / heapBitsShift
- nbitp := uintptr(unsafe.Pointer(h.bitp)) + n/4
- h.shift = uint32(n%4) * heapBitsShift
- if nbitp <= uintptr(unsafe.Pointer(h.last)) {
- h.bitp = (*uint8)(unsafe.Pointer(nbitp))
- return h
- }
-
- // We're in a new heap arena.
- past := nbitp - (uintptr(unsafe.Pointer(h.last)) + 1)
- h.arena += 1 + uint32(past/heapArenaBitmapBytes)
- ai := arenaIdx(h.arena)
- if l2 := mheap_.arenas[ai.l1()]; l2 != nil && l2[ai.l2()] != nil {
- a := l2[ai.l2()]
- h.bitp = &a.bitmap[past%heapArenaBitmapBytes]
- h.last = &a.bitmap[len(a.bitmap)-1]
- } else {
- h.bitp, h.last = nil, nil
- }
- return h
-}
-
-// forwardOrBoundary is like forward, but stops at boundaries between
-// contiguous sections of the bitmap. It returns the number of words
-// advanced over, which will be <= n.
-func (h heapBits) forwardOrBoundary(n uintptr) (heapBits, uintptr) {
- maxn := 4 * ((uintptr(unsafe.Pointer(h.last)) + 1) - uintptr(unsafe.Pointer(h.bitp)))
- if n > maxn {
- n = maxn
- }
- return h.forward(n), n
-}
-
-// The caller can test morePointers and isPointer by &-ing with bitScan and bitPointer.
-// The result includes in its higher bits the bits for subsequent words
-// described by the same bitmap byte.
-//
-// nosplit because it is used during write barriers and must not be preempted.
-//
-//go:nosplit
-func (h heapBits) bits() uint32 {
- // The (shift & 31) eliminates a test and conditional branch
- // from the generated code.
- return uint32(*h.bitp) >> (h.shift & 31)
-}
-
-// morePointers reports whether this word and all remaining words in this object
-// are scalars.
-// h must not describe the second word of the object.
-func (h heapBits) morePointers() bool {
- return h.bits()&bitScan != 0
-}
-
-// isPointer reports whether the heap bits describe a pointer word.
-//
-// nosplit because it is used during write barriers and must not be preempted.
-//
-//go:nosplit
-func (h heapBits) isPointer() bool {
- return h.bits()&bitPointer != 0
+ // BTCQ
+ h.mask ^= uintptr(1) << (i & (ptrBits - 1))
+ // LEAQ (XX)(XX*8)
+ return h, h.addr + uintptr(i)*goarch.PtrSize
}
// bulkBarrierPreWrite executes a write barrier
@@ -611,27 +565,29 @@
}
buf := &getg().m.p.ptr().wbBuf
- h := heapBitsForAddr(dst)
+ h := heapBitsForAddr(dst, size)
if src == 0 {
- for i := uintptr(0); i < size; i += goarch.PtrSize {
- if h.isPointer() {
- dstx := (*uintptr)(unsafe.Pointer(dst + i))
- if !buf.putFast(*dstx, 0) {
- wbBufFlush(nil, 0)
- }
+ for {
+ var addr uintptr
+ if h, addr = h.next(); addr == 0 {
+ break
}
- h = h.next()
+ dstx := (*uintptr)(unsafe.Pointer(addr))
+ if !buf.putFast(*dstx, 0) {
+ wbBufFlush(nil, 0)
+ }
}
} else {
- for i := uintptr(0); i < size; i += goarch.PtrSize {
- if h.isPointer() {
- dstx := (*uintptr)(unsafe.Pointer(dst + i))
- srcx := (*uintptr)(unsafe.Pointer(src + i))
- if !buf.putFast(*dstx, *srcx) {
- wbBufFlush(nil, 0)
- }
+ for {
+ var addr uintptr
+ if h, addr = h.next(); addr == 0 {
+ break
}
- h = h.next()
+ dstx := (*uintptr)(unsafe.Pointer(addr))
+ srcx := (*uintptr)(unsafe.Pointer(src + (addr - dst)))
+ if !buf.putFast(*dstx, *srcx) {
+ wbBufFlush(nil, 0)
+ }
}
}
}
@@ -654,15 +610,16 @@
return
}
buf := &getg().m.p.ptr().wbBuf
- h := heapBitsForAddr(dst)
- for i := uintptr(0); i < size; i += goarch.PtrSize {
- if h.isPointer() {
- srcx := (*uintptr)(unsafe.Pointer(src + i))
- if !buf.putFast(0, *srcx) {
- wbBufFlush(nil, 0)
- }
+ h := heapBitsForAddr(dst, size)
+ for {
+ var addr uintptr
+ if h, addr = h.next(); addr == 0 {
+ break
}
- h = h.next()
+ srcx := (*uintptr)(unsafe.Pointer(addr - dst + src))
+ if !buf.putFast(0, *srcx) {
+ wbBufFlush(nil, 0)
+ }
}
}
@@ -759,43 +716,31 @@
}
}
-// The methods operating on spans all require that h has been returned
-// by heapBitsForSpan and that size, n, total are the span layout description
-// returned by the mspan's layout method.
-// If total > size*n, it means that there is extra leftover memory in the span,
-// usually due to rounding.
-//
-// TODO(rsc): Perhaps introduce a different heapBitsSpan type.
-
-// initSpan initializes the heap bitmap for a span.
-// If this is a span of pointer-sized objects, it initializes all
-// words to pointer/scan.
-// Otherwise, it initializes all words to scalar/dead.
-func (h heapBits) initSpan(s *mspan) {
- // Clear bits corresponding to objects.
- nw := (s.npages << _PageShift) / goarch.PtrSize
- if nw%wordsPerBitmapByte != 0 {
- throw("initSpan: unaligned length")
- }
- if h.shift != 0 {
- throw("initSpan: unaligned base")
+// initHeapBits initializes the heap bitmap for a span.
+// If this is a span of single pointer allocations, it initializes all
+// words to pointer. If force is true, clears all bits.
+func (s *mspan) initHeapBits(forceClear bool) {
+ if forceClear || s.spanclass.noscan() {
+ // Set all the pointer bits to zero. We do this once
+ // when the span is allocated so we don't have to do it
+ // for each object allocation.
+ base := s.base()
+ size := s.npages * pageSize
+ h := writeHeapBitsForAddr(base)
+ h.flush(base, size)
+ return
}
isPtrs := goarch.PtrSize == 8 && s.elemsize == goarch.PtrSize
- for nw > 0 {
- hNext, anw := h.forwardOrBoundary(nw)
- nbyte := anw / wordsPerBitmapByte
- if isPtrs {
- bitp := h.bitp
- for i := uintptr(0); i < nbyte; i++ {
- *bitp = bitPointerAll | bitScanAll
- bitp = add1(bitp)
- }
- } else {
- memclrNoHeapPointers(unsafe.Pointer(h.bitp), nbyte)
- }
- h = hNext
- nw -= anw
+ if !isPtrs {
+ return // nothing to do
}
+ h := writeHeapBitsForAddr(s.base())
+ size := s.npages * pageSize
+ nptrs := size / goarch.PtrSize
+ for i := uintptr(0); i < nptrs; i += ptrBits {
+ h = h.write(^uintptr(0), ptrBits)
+ }
+ h.flush(s.base(), size)
}
// countAlloc returns the number of objects allocated in span s by
@@ -818,6 +763,159 @@
return count
}
+type writeHeapBits struct {
+ addr uintptr // address that the low bit of mask represents the pointer state of.
+ mask uintptr // some pointer bits starting at the address addr.
+ valid uintptr // number of bits in buf that are valid (including low)
+ low uintptr // number of low-order bits to not overwrite
+}
+
+func writeHeapBitsForAddr(addr uintptr) (h writeHeapBits) {
+ // We start writing bits maybe in the middle of a heap bitmap word.
+ // Remember how many bits into the word we started, so we can be sure
+ // not to overwrite the previous bits.
+ h.low = addr / goarch.PtrSize % ptrBits
+
+ // round down to heap word that starts the bitmap word.
+ h.addr = addr - h.low*goarch.PtrSize
+
+ // We don't have any bits yet.
+ h.mask = 0
+ h.valid = h.low
+
+ return
+}
+
+// write appends the pointerness of the next valid pointer slots
+// using the low valid bits of bits. 1=pointer, 0=scalar.
+func (h writeHeapBits) write(bits, valid uintptr) writeHeapBits {
+ if h.valid+valid <= ptrBits {
+ // Fast path - just accumulate the bits.
+ h.mask |= bits << h.valid
+ h.valid += valid
+ return h
+ }
+ // Too many bits to fit in this word. Write the current word
+ // out and move on to the next word.
+
+ data := h.mask | bits<<h.valid // mask for this word
+ h.mask = bits >> (ptrBits - h.valid) // leftover for next word
+ h.valid += valid - ptrBits // have h.valid+valid bits, writing ptrBits of them
+
+ // Flush mask to the memory bitmap.
+ // TODO: figure out how to cache arena lookup.
+ ai := arenaIndex(h.addr)
+ ha := mheap_.arenas[ai.l1()][ai.l2()]
+ idx := h.addr / (ptrBits * goarch.PtrSize) % heapArenaBitmapWords
+ m := uintptr(1)<<h.low - 1
+ ha.bitmap[idx] = ha.bitmap[idx]&m | data
+ // Note: no synchronization required for this write because
+ // the allocator has exclusive access to the page, and the bitmap
+ // entries are all for a single page. Also, visibility of these
+ // writes is guaranteed by the publication barrier in mallocgc.
+
+ // Clear noMorePtrs bit, since we're going to be writing bits
+ // into the following word.
+ ha.noMorePtrs[idx/8] &^= uint8(1) << (idx % 8)
+ // Note: same as above
+
+ // Move to next word of bitmap.
+ h.addr += ptrBits * goarch.PtrSize
+ h.low = 0
+ return h
+}
+
+// Add padding of size bytes.
+func (h writeHeapBits) pad(size uintptr) writeHeapBits {
+ if size == 0 {
+ return h
+ }
+ words := size / goarch.PtrSize
+ for words > ptrBits {
+ h = h.write(0, ptrBits)
+ words -= ptrBits
+ }
+ return h.write(0, words)
+}
+
+// Flush the bits that have been written, and add zeros as needed
+// to cover the full object [addr, addr+size).
+func (h writeHeapBits) flush(addr, size uintptr) {
+ // zeros counts the number of bits needed to represent the object minus the
+ // number of bits we've already written. This is the number of 0 bits
+ // that need to be added.
+ zeros := (addr+size-h.addr)/goarch.PtrSize - h.valid
+
+ // Add zero bits up to the bitmap word boundary
+ if zeros > 0 {
+ z := ptrBits - h.valid
+ if z > zeros {
+ z = zeros
+ }
+ h.valid += z
+ zeros -= z
+ }
+
+ // Find word in bitmap that we're going to write.
+ ai := arenaIndex(h.addr)
+ ha := mheap_.arenas[ai.l1()][ai.l2()]
+ idx := h.addr / (ptrBits * goarch.PtrSize) % heapArenaBitmapWords
+
+ // Write remaining bits.
+ if h.valid != h.low {
+ m := uintptr(1)<<h.low - 1 // don't clear existing bits below "low"
+ m |= ^(uintptr(1)<<h.valid - 1) // don't clear existing bits above "valid"
+ ha.bitmap[idx] = ha.bitmap[idx]&m | h.mask
+ }
+ if zeros == 0 {
+ return
+ }
+
+ // Record in the noMorePtrs map that there won't be any more 1 bits,
+ // so readers can stop early.
+ ha.noMorePtrs[idx/8] |= uint8(1) << (idx % 8)
+
+ // Advance to next bitmap word.
+ h.addr += ptrBits * goarch.PtrSize
+
+ // Continue on writing zeros for the rest of the object.
+ // For standard use of the ptr bits this is not required, as
+ // the bits are read from the beginning of the object. Some uses,
+ // like noscan spans, oblets, bulk write barriers, and cgocheck, might
+ // start mid-object, so these writes are still required.
+ for {
+ // Write zero bits.
+ ai := arenaIndex(h.addr)
+ ha := mheap_.arenas[ai.l1()][ai.l2()]
+ idx := h.addr / (ptrBits * goarch.PtrSize) % heapArenaBitmapWords
+ if zeros < ptrBits {
+ ha.bitmap[idx] &^= uintptr(1)<<zeros - 1
+ break
+ } else if zeros == ptrBits {
+ ha.bitmap[idx] = 0
+ break
+ } else {
+ ha.bitmap[idx] = 0
+ zeros -= ptrBits
+ }
+ ha.noMorePtrs[idx/8] |= uint8(1) << (idx % 8)
+ h.addr += ptrBits * goarch.PtrSize
+ }
+}
+
+// Read the bytes starting at the aligned pointer p into a uintptr.
+// Read is little-endian.
+func readUintptr(p *byte) uintptr {
+ x := *(*uintptr)(unsafe.Pointer(p))
+ if goarch.BigEndian {
+ if goarch.PtrSize == 8 {
+ return uintptr(sys.Bswap64(uint64(x)))
+ }
+ return uintptr(sys.Bswap32(uint32(x)))
+ }
+ return x
+}
+
// heapBitsSetType records that the new allocation [x, x+size)
// holds in [x, x+dataSize) one or more values of type typ.
// (The number of values is given by dataSize / typ.size.)
@@ -829,7 +927,7 @@
// heapBitsSweepSpan.
//
// There can only be one allocation from a given span active at a time,
-// and the bitmap for a span always falls on byte boundaries,
+// and the bitmap for a span always falls on word boundaries,
// so there are no write-write races for access to the heap bitmap.
// Hence, heapBitsSetType can access the bitmap without atomics.
//
@@ -844,209 +942,61 @@
func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
const doubleCheck = false // slow but helpful; enable to test modifications to this code
- const (
- mask1 = bitPointer | bitScan // 00010001
- mask2 = bitPointer | bitScan | mask1<<heapBitsShift // 00110011
- mask3 = bitPointer | bitScan | mask2<<heapBitsShift // 01110111
- )
-
- // dataSize is always size rounded up to the next malloc size class,
- // except in the case of allocating a defer block, in which case
- // size is sizeof(_defer{}) (at least 6 words) and dataSize may be
- // arbitrarily larger.
- //
- // The checks for size == goarch.PtrSize and size == 2*goarch.PtrSize can therefore
- // assume that dataSize == size without checking it explicitly.
+ if doubleCheck && dataSize%typ.size != 0 {
+ throw("heapBitsSetType: dataSize not a multiple of typ.size")
+ }
if goarch.PtrSize == 8 && size == goarch.PtrSize {
// It's one word and it has pointers, it must be a pointer.
// Since all allocated one-word objects are pointers
// (non-pointers are aggregated into tinySize allocations),
- // initSpan sets the pointer bits for us. Nothing to do here.
+ // (*mspan).initHeapBits sets the pointer bits for us.
+ // Nothing to do here.
if doubleCheck {
- h := heapBitsForAddr(x)
- if !h.isPointer() {
+ h, addr := heapBitsForAddr(x, size).next()
+ if addr != x {
throw("heapBitsSetType: pointer bit missing")
}
- if !h.morePointers() {
- throw("heapBitsSetType: scan bit missing")
+ _, addr = h.next()
+ if addr != 0 {
+ throw("heapBitsSetType: second pointer bit found")
}
}
return
}
- h := heapBitsForAddr(x)
- ptrmask := typ.gcdata // start of 1-bit pointer mask (or GC program, handled below)
+ h := writeHeapBitsForAddr(x)
- // 2-word objects only have 4 bitmap bits and 3-word objects only have 6 bitmap bits.
- // Therefore, these objects share a heap bitmap byte with the objects next to them.
- // These are called out as a special case primarily so the code below can assume all
- // objects are at least 4 words long and that their bitmaps start either at the beginning
- // of a bitmap byte, or half-way in (h.shift of 0 and 2 respectively).
-
- if size == 2*goarch.PtrSize {
- if typ.size == goarch.PtrSize {
- // We're allocating a block big enough to hold two pointers.
- // On 64-bit, that means the actual object must be two pointers,
- // or else we'd have used the one-pointer-sized block.
- // On 32-bit, however, this is the 8-byte block, the smallest one.
- // So it could be that we're allocating one pointer and this was
- // just the smallest block available. Distinguish by checking dataSize.
- // (In general the number of instances of typ being allocated is
- // dataSize/typ.size.)
- if goarch.PtrSize == 4 && dataSize == goarch.PtrSize {
- // 1 pointer object. On 32-bit machines clear the bit for the
- // unused second word.
- *h.bitp &^= (bitPointer | bitScan | (bitPointer|bitScan)<<heapBitsShift) << h.shift
- *h.bitp |= (bitPointer | bitScan) << h.shift
- } else {
- // 2-element array of pointer.
- *h.bitp |= (bitPointer | bitScan | (bitPointer|bitScan)<<heapBitsShift) << h.shift
- }
- return
- }
- // Otherwise typ.size must be 2*goarch.PtrSize,
- // and typ.kind&kindGCProg == 0.
- if doubleCheck {
- if typ.size != 2*goarch.PtrSize || typ.kind&kindGCProg != 0 {
- print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n")
- throw("heapBitsSetType")
- }
- }
- b := uint32(*ptrmask)
- hb := b & 3
- hb |= bitScanAll & ((bitScan << (typ.ptrdata / goarch.PtrSize)) - 1)
- // Clear the bits for this object so we can set the
- // appropriate ones.
- *h.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << h.shift
- *h.bitp |= uint8(hb << h.shift)
- return
- } else if size == 3*goarch.PtrSize {
- b := uint8(*ptrmask)
- if doubleCheck {
- if b == 0 {
- println("runtime: invalid type ", typ.string())
- throw("heapBitsSetType: called with non-pointer type")
- }
- if goarch.PtrSize != 8 {
- throw("heapBitsSetType: unexpected 3 pointer wide size class on 32 bit")
- }
- if typ.kind&kindGCProg != 0 {
- throw("heapBitsSetType: unexpected GC prog for 3 pointer wide size class")
- }
- if typ.size == 2*goarch.PtrSize {
- print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, "\n")
- throw("heapBitsSetType: inconsistent object sizes")
- }
- }
- if typ.size == goarch.PtrSize {
- // The type contains a pointer otherwise heapBitsSetType wouldn't have been called.
- // Since the type is only 1 pointer wide and contains a pointer, its gcdata must be exactly 1.
- if doubleCheck && *typ.gcdata != 1 {
- print("runtime: heapBitsSetType size=", size, " typ.size=", typ.size, "but *typ.gcdata", *typ.gcdata, "\n")
- throw("heapBitsSetType: unexpected gcdata for 1 pointer wide type size in 3 pointer wide size class")
- }
- // 3 element array of pointers. Unrolling ptrmask 3 times into p yields 00000111.
- b = 7
- }
-
- hb := b & 7
- // Set bitScan bits for all pointers.
- hb |= hb << wordsPerBitmapByte
- // First bitScan bit is always set since the type contains pointers.
- hb |= bitScan
- // Second bitScan bit needs to also be set if the third bitScan bit is set.
- hb |= hb & (bitScan << (2 * heapBitsShift)) >> 1
-
- // For h.shift > 1 heap bits cross a byte boundary and need to be written part
- // to h.bitp and part to the next h.bitp.
- switch h.shift {
- case 0:
- *h.bitp &^= mask3 << 0
- *h.bitp |= hb << 0
- case 1:
- *h.bitp &^= mask3 << 1
- *h.bitp |= hb << 1
- case 2:
- *h.bitp &^= mask2 << 2
- *h.bitp |= (hb & mask2) << 2
- // Two words written to the first byte.
- // Advance two words to get to the next byte.
- h = h.next().next()
- *h.bitp &^= mask1
- *h.bitp |= (hb >> 2) & mask1
- case 3:
- *h.bitp &^= mask1 << 3
- *h.bitp |= (hb & mask1) << 3
- // One word written to the first byte.
- // Advance one word to get to the next byte.
- h = h.next()
- *h.bitp &^= mask2
- *h.bitp |= (hb >> 1) & mask2
- }
- return
- }
-
- // Copy from 1-bit ptrmask into 2-bit bitmap.
- // The basic approach is to use a single uintptr as a bit buffer,
- // alternating between reloading the buffer and writing bitmap bytes.
- // In general, one load can supply two bitmap byte writes.
- // This is a lot of lines of code, but it compiles into relatively few
- // machine instructions.
-
- outOfPlace := false
- if arenaIndex(x+size-1) != arenaIdx(h.arena) || (doubleCheck && fastrandn(2) == 0) {
- // This object spans heap arenas, so the bitmap may be
- // discontiguous. Unroll it into the object instead
- // and then copy it out.
- //
- // In doubleCheck mode, we randomly do this anyway to
- // stress test the bitmap copying path.
- outOfPlace = true
- h.bitp = (*uint8)(unsafe.Pointer(x))
- h.last = nil
- }
-
- var (
- // Ptrmask input.
- p *byte // last ptrmask byte read
- b uintptr // ptrmask bits already loaded
- nb uintptr // number of bits in b at next read
- endp *byte // final ptrmask byte to read (then repeat)
- endnb uintptr // number of valid bits in *endp
- pbits uintptr // alternate source of bits
-
- // Heap bitmap output.
- w uintptr // words processed
- nw uintptr // number of words to process
- hbitp *byte // next heap bitmap byte to write
- hb uintptr // bits being prepared for *hbitp
- )
-
- hbitp = h.bitp
-
- // Handle GC program. Delayed until this part of the code
- // so that we can use the same double-checking mechanism
- // as the 1-bit case. Nothing above could have encountered
- // GC programs: the cases were all too small.
+ // Handle GC program.
if typ.kind&kindGCProg != 0 {
- heapBitsSetTypeGCProg(h, typ.ptrdata, typ.size, dataSize, size, addb(typ.gcdata, 4))
- if doubleCheck {
- // Double-check the heap bits written by GC program
- // by running the GC program to create a 1-bit pointer mask
- // and then jumping to the double-check code below.
- // This doesn't catch bugs shared between the 1-bit and 4-bit
- // GC program execution, but it does catch mistakes specific
- // to just one of those and bugs in heapBitsSetTypeGCProg's
- // implementation of arrays.
- lock(&debugPtrmask.lock)
- if debugPtrmask.data == nil {
- debugPtrmask.data = (*byte)(persistentalloc(1<<20, 1, &memstats.other_sys))
+ // Expand the gc program into the storage we're going to use for the actual object.
+ obj := (*uint8)(unsafe.Pointer(x))
+ n := runGCProg(addb(typ.gcdata, 4), obj)
+ // Use the expanded program to set the heap bits.
+ for i := uintptr(0); true; i += typ.size {
+ // Copy expanded program to heap bitmap.
+ p := obj
+ j := n
+ for j > 8 {
+ h = h.write(uintptr(*p), 8)
+ p = add1(p)
+ j -= 8
}
- ptrmask = debugPtrmask.data
- runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1)
+ h = h.write(uintptr(*p), j)
+
+ if i+typ.size == dataSize {
+ break // no padding after last element
+ }
+
+ // Pad with zeros to the start of the next element.
+ h = h.pad(typ.size - n*goarch.PtrSize)
}
- goto Phase4
+
+ h.flush(x, size)
+
+ // Erase the expanded GC program.
+ memclrNoHeapPointers(unsafe.Pointer(obj), (n+7)/8)
+ return
}
// Note about sizes:
@@ -1061,424 +1011,98 @@
// to scan the buffer's heap bitmap at all.
// The 1-bit ptrmasks are sized to contain only bits for
// the typ.ptrdata prefix, zero padded out to a full byte
- // of bitmap. This code sets nw (below) so that heap bitmap
- // bits are only written for the typ.ptrdata prefix; if there is
- // more room in the allocated object, the next heap bitmap
- // entry is a 00, indicating that there are no more pointers
- // to scan. So only the ptrmask for the ptrdata bytes is needed.
+ // of bitmap. If there is more room in the allocated object,
+ // that space is pointerless. The noMorePtrs bitmap will prevent
+ // scanning large pointerless tails of an object.
//
// Replicated copies are not as nice: if there is an array of
// objects with scalar tails, all but the last tail does have to
// be initialized, because there is no way to say "skip forward".
- // However, because of the possibility of a repeated type with
- // size not a multiple of 4 pointers (one heap bitmap byte),
- // the code already must handle the last ptrmask byte specially
- // by treating it as containing only the bits for endnb pointers,
- // where endnb <= 4. We represent large scalar tails that must
- // be expanded in the replication by setting endnb larger than 4.
- // This will have the effect of reading many bits out of b,
- // but once the real bits are shifted out, b will supply as many
- // zero bits as we try to read, which is exactly what we need.
- p = ptrmask
- if typ.size < dataSize {
- // Filling in bits for an array of typ.
- // Set up for repetition of ptrmask during main loop.
- // Note that ptrmask describes only a prefix of
- const maxBits = goarch.PtrSize*8 - 7
- if typ.ptrdata/goarch.PtrSize <= maxBits {
- // Entire ptrmask fits in uintptr with room for a byte fragment.
- // Load into pbits and never read from ptrmask again.
- // This is especially important when the ptrmask has
- // fewer than 8 bits in it; otherwise the reload in the middle
- // of the Phase 2 loop would itself need to loop to gather
- // at least 8 bits.
-
- // Accumulate ptrmask into b.
- // ptrmask is sized to describe only typ.ptrdata, but we record
- // it as describing typ.size bytes, since all the high bits are zero.
- nb = typ.ptrdata / goarch.PtrSize
- for i := uintptr(0); i < nb; i += 8 {
- b |= uintptr(*p) << i
- p = add1(p)
- }
- nb = typ.size / goarch.PtrSize
-
- // Replicate ptrmask to fill entire pbits uintptr.
- // Doubling and truncating is fewer steps than
- // iterating by nb each time. (nb could be 1.)
- // Since we loaded typ.ptrdata/goarch.PtrSize bits
- // but are pretending to have typ.size/goarch.PtrSize,
- // there might be no replication necessary/possible.
- pbits = b
- endnb = nb
- if nb+nb <= maxBits {
- for endnb <= goarch.PtrSize*8 {
- pbits |= pbits << endnb
- endnb += endnb
+ ptrs := typ.ptrdata / goarch.PtrSize
+ if typ.size == dataSize { // Single element
+ if ptrs <= ptrBits { // Single small element
+ m := readUintptr(typ.gcdata)
+ h = h.write(m, ptrs)
+ } else { // Single large element
+ p := typ.gcdata
+ for {
+ h = h.write(readUintptr(p), ptrBits)
+ p = addb(p, ptrBits/8)
+ ptrs -= ptrBits
+ if ptrs <= ptrBits {
+ break
}
- // Truncate to a multiple of original ptrmask.
- // Because nb+nb <= maxBits, nb fits in a byte.
- // Byte division is cheaper than uintptr division.
- endnb = uintptr(maxBits/byte(nb)) * nb
- pbits &= 1<<endnb - 1
- b = pbits
- nb = endnb
}
-
- // Clear p and endp as sentinel for using pbits.
- // Checked during Phase 2 loop.
- p = nil
- endp = nil
- } else {
- // Ptrmask is larger. Read it multiple times.
- n := (typ.ptrdata/goarch.PtrSize+7)/8 - 1
- endp = addb(ptrmask, n)
- endnb = typ.size/goarch.PtrSize - n*8
+ m := readUintptr(p)
+ h = h.write(m, ptrs)
}
- }
- if p != nil {
- b = uintptr(*p)
- p = add1(p)
- nb = 8
- }
-
- if typ.size == dataSize {
- // Single entry: can stop once we reach the non-pointer data.
- nw = typ.ptrdata / goarch.PtrSize
- } else {
- // Repeated instances of typ in an array.
- // Have to process first N-1 entries in full, but can stop
- // once we reach the non-pointer data in the final entry.
- nw = ((dataSize/typ.size-1)*typ.size + typ.ptrdata) / goarch.PtrSize
- }
- if nw == 0 {
- // No pointers! Caller was supposed to check.
- println("runtime: invalid type ", typ.string())
- throw("heapBitsSetType: called with non-pointer type")
- return
- }
-
- // Phase 1: Special case for leading byte (shift==0) or half-byte (shift==2).
- // The leading byte is special because it contains the bits for word 1,
- // which does not have the scan bit set.
- // The leading half-byte is special because it's a half a byte,
- // so we have to be careful with the bits already there.
- switch {
- default:
- throw("heapBitsSetType: unexpected shift")
-
- case h.shift == 0:
- // Ptrmask and heap bitmap are aligned.
- //
- // This is a fast path for small objects.
- //
- // The first byte we write out covers the first four
- // words of the object. The scan/dead bit on the first
- // word must be set to scan since there are pointers
- // somewhere in the object.
- // In all following words, we set the scan/dead
- // appropriately to indicate that the object continues
- // to the next 2-bit entry in the bitmap.
- //
- // We set four bits at a time here, but if the object
- // is fewer than four words, phase 3 will clear
- // unnecessary bits.
- hb = b & bitPointerAll
- hb |= bitScanAll
- if w += 4; w >= nw {
- goto Phase3
- }
- *hbitp = uint8(hb)
- hbitp = add1(hbitp)
- b >>= 4
- nb -= 4
-
- case h.shift == 2:
- // Ptrmask and heap bitmap are misaligned.
- //
- // On 32 bit architectures only the 6-word object that corresponds
- // to a 24 bytes size class can start with h.shift of 2 here since
- // all other non 16 byte aligned size classes have been handled by
- // special code paths at the beginning of heapBitsSetType on 32 bit.
- //
- // Many size classes are only 16 byte aligned. On 64 bit architectures
- // this results in a heap bitmap position starting with a h.shift of 2.
- //
- // The bits for the first two words are in a byte shared
- // with another object, so we must be careful with the bits
- // already there.
- //
- // We took care of 1-word, 2-word, and 3-word objects above,
- // so this is at least a 6-word object.
- hb = (b & (bitPointer | bitPointer<<heapBitsShift)) << (2 * heapBitsShift)
- hb |= bitScan << (2 * heapBitsShift)
- if nw > 1 {
- hb |= bitScan << (3 * heapBitsShift)
- }
- b >>= 2
- nb -= 2
- *hbitp &^= uint8((bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << (2 * heapBitsShift))
- *hbitp |= uint8(hb)
- hbitp = add1(hbitp)
- if w += 2; w >= nw {
- // We know that there is more data, because we handled 2-word and 3-word objects above.
- // This must be at least a 6-word object. If we're out of pointer words,
- // mark no scan in next bitmap byte and finish.
- hb = 0
- w += 4
- goto Phase3
- }
- }
-
- // Phase 2: Full bytes in bitmap, up to but not including write to last byte (full or partial) in bitmap.
- // The loop computes the bits for that last write but does not execute the write;
- // it leaves the bits in hb for processing by phase 3.
- // To avoid repeated adjustment of nb, we subtract out the 4 bits we're going to
- // use in the first half of the loop right now, and then we only adjust nb explicitly
- // if the 8 bits used by each iteration isn't balanced by 8 bits loaded mid-loop.
- nb -= 4
- for {
- // Emit bitmap byte.
- // b has at least nb+4 bits, with one exception:
- // if w+4 >= nw, then b has only nw-w bits,
- // but we'll stop at the break and then truncate
- // appropriately in Phase 3.
- hb = b & bitPointerAll
- hb |= bitScanAll
- if w += 4; w >= nw {
- break
- }
- *hbitp = uint8(hb)
- hbitp = add1(hbitp)
- b >>= 4
-
- // Load more bits. b has nb right now.
- if p != endp {
- // Fast path: keep reading from ptrmask.
- // nb unmodified: we just loaded 8 bits,
- // and the next iteration will consume 8 bits,
- // leaving us with the same nb the next time we're here.
- if nb < 8 {
- b |= uintptr(*p) << nb
- p = add1(p)
- } else {
- // Reduce the number of bits in b.
- // This is important if we skipped
- // over a scalar tail, since nb could
- // be larger than the bit width of b.
- nb -= 8
+ } else { // Repeated element
+ words := typ.size / goarch.PtrSize // total words, including scalar tail
+ if words <= ptrBits { // Repeated small element
+ n := dataSize / typ.size
+ m := readUintptr(typ.gcdata)
+ // Make larger unit to repeat
+ for words <= ptrBits/2 {
+ if n&1 != 0 {
+ h = h.write(m, words)
+ }
+ n /= 2
+ m |= m << words
+ ptrs += words
+ words *= 2
+ if n == 1 {
+ break
+ }
}
- } else if p == nil {
- // Almost as fast path: track bit count and refill from pbits.
- // For short repetitions.
- if nb < 8 {
- b |= pbits << nb
- nb += endnb
+ for n > 1 {
+ h = h.write(m, words)
+ n--
}
- nb -= 8 // for next iteration
- } else {
- // Slow path: reached end of ptrmask.
- // Process final partial byte and rewind to start.
- b |= uintptr(*p) << nb
- nb += endnb
- if nb < 8 {
- b |= uintptr(*ptrmask) << nb
- p = add1(ptrmask)
- } else {
- nb -= 8
- p = ptrmask
+ h = h.write(m, ptrs)
+ } else { // Repeated large element
+ for i := uintptr(0); true; i += typ.size {
+ p := typ.gcdata
+ j := ptrs
+ for j > ptrBits {
+ h = h.write(readUintptr(p), ptrBits)
+ p = addb(p, ptrBits/8)
+ j -= ptrBits
+ }
+ m := readUintptr(p)
+ h = h.write(m, j)
+ if i+typ.size == dataSize {
+ break // don't need the trailing nonptr bits on the last element.
+ }
+ // Pad with zeros to the start of the next element.
+ h = h.pad(typ.size - typ.ptrdata)
}
}
-
- // Emit bitmap byte.
- hb = b & bitPointerAll
- hb |= bitScanAll
- if w += 4; w >= nw {
- break
- }
- *hbitp = uint8(hb)
- hbitp = add1(hbitp)
- b >>= 4
}
+ h.flush(x, size)
-Phase3:
- // Phase 3: Write last byte or partial byte and zero the rest of the bitmap entries.
- if w > nw {
- // Counting the 4 entries in hb not yet written to memory,
- // there are more entries than possible pointer slots.
- // Discard the excess entries (can't be more than 3).
- mask := uintptr(1)<<(4-(w-nw)) - 1
- hb &= mask | mask<<4 // apply mask to both pointer bits and scan bits
- }
-
- // Change nw from counting possibly-pointer words to total words in allocation.
- nw = size / goarch.PtrSize
-
- // Write whole bitmap bytes.
- // The first is hb, the rest are zero.
- if w <= nw {
- *hbitp = uint8(hb)
- hbitp = add1(hbitp)
- hb = 0 // for possible final half-byte below
- for w += 4; w <= nw; w += 4 {
- *hbitp = 0
- hbitp = add1(hbitp)
- }
- }
-
- // Write final partial bitmap byte if any.
- // We know w > nw, or else we'd still be in the loop above.
- // It can be bigger only due to the 4 entries in hb that it counts.
- // If w == nw+4 then there's nothing left to do: we wrote all nw entries
- // and can discard the 4 sitting in hb.
- // But if w == nw+2, we need to write first two in hb.
- // The byte is shared with the next object, so be careful with
- // existing bits.
- if w == nw+2 {
- *hbitp = *hbitp&^(bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift) | uint8(hb)
- }
-
-Phase4:
- // Phase 4: Copy unrolled bitmap to per-arena bitmaps, if necessary.
- if outOfPlace {
- // TODO: We could probably make this faster by
- // handling [x+dataSize, x+size) specially.
- h := heapBitsForAddr(x)
- // cnw is the number of heap words, or bit pairs
- // remaining (like nw above).
- cnw := size / goarch.PtrSize
- src := (*uint8)(unsafe.Pointer(x))
- // We know the first and last byte of the bitmap are
- // not the same, but it's still possible for small
- // objects span arenas, so it may share bitmap bytes
- // with neighboring objects.
- //
- // Handle the first byte specially if it's shared. See
- // Phase 1 for why this is the only special case we need.
- if doubleCheck {
- if !(h.shift == 0 || h.shift == 2) {
- print("x=", x, " size=", size, " cnw=", h.shift, "\n")
- throw("bad start shift")
- }
- }
- if h.shift == 2 {
- *h.bitp = *h.bitp&^((bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift)<<(2*heapBitsShift)) | *src
- h = h.next().next()
- cnw -= 2
- src = addb(src, 1)
- }
- // We're now byte aligned. Copy out to per-arena
- // bitmaps until the last byte (which may again be
- // partial).
- for cnw >= 4 {
- // This loop processes four words at a time,
- // so round cnw down accordingly.
- hNext, words := h.forwardOrBoundary(cnw / 4 * 4)
-
- // n is the number of bitmap bytes to copy.
- n := words / 4
- memmove(unsafe.Pointer(h.bitp), unsafe.Pointer(src), n)
- cnw -= words
- h = hNext
- src = addb(src, n)
- }
- if doubleCheck && h.shift != 0 {
- print("cnw=", cnw, " h.shift=", h.shift, "\n")
- throw("bad shift after block copy")
- }
- // Handle the last byte if it's shared.
- if cnw == 2 {
- *h.bitp = *h.bitp&^(bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift) | *src
- src = addb(src, 1)
- h = h.next().next()
- }
- if doubleCheck {
- if uintptr(unsafe.Pointer(src)) > x+size {
- throw("copy exceeded object size")
- }
- if !(cnw == 0 || cnw == 2) {
- print("x=", x, " size=", size, " cnw=", cnw, "\n")
- throw("bad number of remaining words")
- }
- // Set up hbitp so doubleCheck code below can check it.
- hbitp = h.bitp
- }
- // Zero the object where we wrote the bitmap.
- memclrNoHeapPointers(unsafe.Pointer(x), uintptr(unsafe.Pointer(src))-x)
- }
-
- // Double check the whole bitmap.
if doubleCheck {
- // x+size may not point to the heap, so back up one
- // word and then advance it the way we do above.
- end := heapBitsForAddr(x + size - goarch.PtrSize)
- if outOfPlace {
- // In out-of-place copying, we just advance
- // using next.
- end = end.next()
- } else {
- // Don't use next because that may advance to
- // the next arena and the in-place logic
- // doesn't do that.
- end.shift += heapBitsShift
- if end.shift == 4*heapBitsShift {
- end.bitp, end.shift = add1(end.bitp), 0
+ h := heapBitsForAddr(x, size)
+ for i := uintptr(0); i < size; i += goarch.PtrSize {
+ // Compute the pointer bit we want at offset i.
+ want := false
+ if i < dataSize {
+ off := i % typ.size
+ if off < typ.ptrdata {
+ j := off / goarch.PtrSize
+ want = *addb(typ.gcdata, j/8)>>(j%8)&1 != 0
+ }
+ }
+ if want {
+ var addr uintptr
+ h, addr = h.next()
+ if addr != x+i {
+ throw("heapBitsSetType: pointer entry not correct")
+ }
}
}
- if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) {
- println("ended at wrong bitmap byte for", typ.string(), "x", dataSize/typ.size)
- print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
- print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
- h0 := heapBitsForAddr(x)
- print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
- print("ended at hbitp=", hbitp, " but next starts at bitp=", end.bitp, " shift=", end.shift, "\n")
- throw("bad heapBitsSetType")
- }
-
- // Double-check that bits to be written were written correctly.
- // Does not check that other bits were not written, unfortunately.
- h := heapBitsForAddr(x)
- nptr := typ.ptrdata / goarch.PtrSize
- ndata := typ.size / goarch.PtrSize
- count := dataSize / typ.size
- totalptr := ((count-1)*typ.size + typ.ptrdata) / goarch.PtrSize
- for i := uintptr(0); i < size/goarch.PtrSize; i++ {
- j := i % ndata
- var have, want uint8
- have = (*h.bitp >> h.shift) & (bitPointer | bitScan)
- if i >= totalptr {
- if typ.kind&kindGCProg != 0 && i < (totalptr+3)/4*4 {
- // heapBitsSetTypeGCProg always fills
- // in full nibbles of bitScan.
- want = bitScan
- }
- } else {
- if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 {
- want |= bitPointer
- }
- want |= bitScan
- }
- if have != want {
- println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size)
- print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
- print("kindGCProg=", typ.kind&kindGCProg != 0, " outOfPlace=", outOfPlace, "\n")
- print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
- h0 := heapBitsForAddr(x)
- print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
- print("current bits h.bitp=", h.bitp, " h.shift=", h.shift, " *h.bitp=", hex(*h.bitp), "\n")
- print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n")
- println("at word", i, "offset", i*goarch.PtrSize, "have", hex(have), "want", hex(want))
- if typ.kind&kindGCProg != 0 {
- println("GC program:")
- dumpGCProg(addb(typ.gcdata, 4))
- }
- throw("bad heapBitsSetType")
- }
- h = h.next()
- }
- if ptrmask == debugPtrmask.data {
- unlock(&debugPtrmask.lock)
+ if _, addr := h.next(); addr != 0 {
+ throw("heapBitsSetType: extra pointer")
}
}
}
@@ -1488,92 +1112,6 @@
data *byte
}
-// heapBitsSetTypeGCProg implements heapBitsSetType using a GC program.
-// progSize is the size of the memory described by the program.
-// elemSize is the size of the element that the GC program describes (a prefix of).
-// dataSize is the total size of the intended data, a multiple of elemSize.
-// allocSize is the total size of the allocated memory.
-//
-// GC programs are only used for large allocations.
-// heapBitsSetType requires that allocSize is a multiple of 4 words,
-// so that the relevant bitmap bytes are not shared with surrounding
-// objects.
-func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize uintptr, prog *byte) {
- if goarch.PtrSize == 8 && allocSize%(4*goarch.PtrSize) != 0 {
- // Alignment will be wrong.
- throw("heapBitsSetTypeGCProg: small allocation")
- }
- var totalBits uintptr
- if elemSize == dataSize {
- totalBits = runGCProg(prog, nil, h.bitp, 2)
- if totalBits*goarch.PtrSize != progSize {
- println("runtime: heapBitsSetTypeGCProg: total bits", totalBits, "but progSize", progSize)
- throw("heapBitsSetTypeGCProg: unexpected bit count")
- }
- } else {
- count := dataSize / elemSize
-
- // Piece together program trailer to run after prog that does:
- // literal(0)
- // repeat(1, elemSize-progSize-1) // zeros to fill element size
- // repeat(elemSize, count-1) // repeat that element for count
- // This zero-pads the data remaining in the first element and then
- // repeats that first element to fill the array.
- var trailer [40]byte // 3 varints (max 10 each) + some bytes
- i := 0
- if n := elemSize/goarch.PtrSize - progSize/goarch.PtrSize; n > 0 {
- // literal(0)
- trailer[i] = 0x01
- i++
- trailer[i] = 0
- i++
- if n > 1 {
- // repeat(1, n-1)
- trailer[i] = 0x81
- i++
- n--
- for ; n >= 0x80; n >>= 7 {
- trailer[i] = byte(n | 0x80)
- i++
- }
- trailer[i] = byte(n)
- i++
- }
- }
- // repeat(elemSize/ptrSize, count-1)
- trailer[i] = 0x80
- i++
- n := elemSize / goarch.PtrSize
- for ; n >= 0x80; n >>= 7 {
- trailer[i] = byte(n | 0x80)
- i++
- }
- trailer[i] = byte(n)
- i++
- n = count - 1
- for ; n >= 0x80; n >>= 7 {
- trailer[i] = byte(n | 0x80)
- i++
- }
- trailer[i] = byte(n)
- i++
- trailer[i] = 0
- i++
-
- runGCProg(prog, &trailer[0], h.bitp, 2)
-
- // Even though we filled in the full array just now,
- // record that we only filled in up to the ptrdata of the
- // last element. This will cause the code below to
- // memclr the dead section of the final array element,
- // so that scanobject can stop early in the final element.
- totalBits = (elemSize*(count-1) + progSize) / goarch.PtrSize
- }
- endProg := unsafe.Pointer(addb(h.bitp, (totalBits+3)/4))
- endAlloc := unsafe.Pointer(addb(h.bitp, allocSize/goarch.PtrSize/wordsPerBitmapByte))
- memclrNoHeapPointers(endProg, uintptr(endAlloc)-uintptr(endProg))
-}
-
// progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
// size the size of the region described by prog, in bytes.
// The resulting bitvector will have no more than size/goarch.PtrSize bits.
@@ -1581,7 +1119,7 @@
n := (size/goarch.PtrSize + 7) / 8
x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1]
x[len(x)-1] = 0xa1 // overflow check sentinel
- n = runGCProg(prog, nil, &x[0], 1)
+ n = runGCProg(prog, &x[0])
if x[len(x)-1] != 0xa1 {
throw("progToPointerMask: overflow")
}
@@ -1602,15 +1140,8 @@
// 10000000 n c: repeat the previous n bits c times; n, c are varints
// 1nnnnnnn c: repeat the previous n bits c times; c is a varint
-// runGCProg executes the GC program prog, and then trailer if non-nil,
-// writing to dst with entries of the given size.
-// If size == 1, dst is a 1-bit pointer mask laid out moving forward from dst.
-// If size == 2, dst is the 2-bit heap bitmap, and writes move backward
-// starting at dst (because the heap bitmap does). In this case, the caller guarantees
-// that only whole bytes in dst need to be written.
-//
-// runGCProg returns the number of 1- or 2-bit entries written to memory.
-func runGCProg(prog, trailer, dst *byte, size int) uintptr {
+// runGCProg returns the number of 1-bit entries written to memory.
+func runGCProg(prog, dst *byte) uintptr {
dstStart := dst
// Bits waiting to be written to memory.
@@ -1623,20 +1154,9 @@
// Flush accumulated full bytes.
// The rest of the loop assumes that nbits <= 7.
for ; nbits >= 8; nbits -= 8 {
- if size == 1 {
- *dst = uint8(bits)
- dst = add1(dst)
- bits >>= 8
- } else {
- v := bits&bitPointerAll | bitScanAll
- *dst = uint8(v)
- dst = add1(dst)
- bits >>= 4
- v = bits&bitPointerAll | bitScanAll
- *dst = uint8(v)
- dst = add1(dst)
- bits >>= 4
- }
+ *dst = uint8(bits)
+ dst = add1(dst)
+ bits >>= 8
}
// Process one instruction.
@@ -1646,32 +1166,16 @@
if inst&0x80 == 0 {
// Literal bits; n == 0 means end of program.
if n == 0 {
- // Program is over; continue in trailer if present.
- if trailer != nil {
- p = trailer
- trailer = nil
- continue
- }
+ // Program is over.
break Run
}
nbyte := n / 8
for i := uintptr(0); i < nbyte; i++ {
bits |= uintptr(*p) << nbits
p = add1(p)
- if size == 1 {
- *dst = uint8(bits)
- dst = add1(dst)
- bits >>= 8
- } else {
- v := bits&0xf | bitScanAll
- *dst = uint8(v)
- dst = add1(dst)
- bits >>= 4
- v = bits&0xf | bitScanAll
- *dst = uint8(v)
- dst = add1(dst)
- bits >>= 4
- }
+ *dst = uint8(bits)
+ dst = add1(dst)
+ bits >>= 8
}
if n %= 8; n > 0 {
bits |= uintptr(*p) << nbits
@@ -1720,22 +1224,12 @@
npattern := nbits
// If we need more bits, fetch them from memory.
- if size == 1 {
+ src = subtract1(src)
+ for npattern < n {
+ pattern <<= 8
+ pattern |= uintptr(*src)
src = subtract1(src)
- for npattern < n {
- pattern <<= 8
- pattern |= uintptr(*src)
- src = subtract1(src)
- npattern += 8
- }
- } else {
- src = subtract1(src)
- for npattern < n {
- pattern <<= 4
- pattern |= uintptr(*src) & 0xf
- src = subtract1(src)
- npattern += 4
- }
+ npattern += 8
}
// We started with the whole bit output buffer,
@@ -1785,20 +1279,11 @@
for ; c >= npattern; c -= npattern {
bits |= pattern << nbits
nbits += npattern
- if size == 1 {
- for nbits >= 8 {
- *dst = uint8(bits)
- dst = add1(dst)
- bits >>= 8
- nbits -= 8
- }
- } else {
- for nbits >= 4 {
- *dst = uint8(bits&0xf | bitScanAll)
- dst = add1(dst)
- bits >>= 4
- nbits -= 4
- }
+ for nbits >= 8 {
+ *dst = uint8(bits)
+ dst = add1(dst)
+ bits >>= 8
+ nbits -= 8
}
}
@@ -1815,75 +1300,38 @@
// Since nbits <= 7, we know the first few bytes of repeated data
// are already written to memory.
off := n - nbits // n > nbits because n > maxBits and nbits <= 7
- if size == 1 {
- // Leading src fragment.
- src = subtractb(src, (off+7)/8)
- if frag := off & 7; frag != 0 {
- bits |= uintptr(*src) >> (8 - frag) << nbits
- src = add1(src)
- nbits += frag
- c -= frag
- }
- // Main loop: load one byte, write another.
- // The bits are rotating through the bit buffer.
- for i := c / 8; i > 0; i-- {
- bits |= uintptr(*src) << nbits
- src = add1(src)
- *dst = uint8(bits)
- dst = add1(dst)
- bits >>= 8
- }
- // Final src fragment.
- if c %= 8; c > 0 {
- bits |= (uintptr(*src) & (1<<c - 1)) << nbits
- nbits += c
- }
- } else {
- // Leading src fragment.
- src = subtractb(src, (off+3)/4)
- if frag := off & 3; frag != 0 {
- bits |= (uintptr(*src) & 0xf) >> (4 - frag) << nbits
- src = add1(src)
- nbits += frag
- c -= frag
- }
- // Main loop: load one byte, write another.
- // The bits are rotating through the bit buffer.
- for i := c / 4; i > 0; i-- {
- bits |= (uintptr(*src) & 0xf) << nbits
- src = add1(src)
- *dst = uint8(bits&0xf | bitScanAll)
- dst = add1(dst)
- bits >>= 4
- }
- // Final src fragment.
- if c %= 4; c > 0 {
- bits |= (uintptr(*src) & (1<<c - 1)) << nbits
- nbits += c
- }
+ // Leading src fragment.
+ src = subtractb(src, (off+7)/8)
+ if frag := off & 7; frag != 0 {
+ bits |= uintptr(*src) >> (8 - frag) << nbits
+ src = add1(src)
+ nbits += frag
+ c -= frag
}
- }
-
- // Write any final bits out, using full-byte writes, even for the final byte.
- var totalBits uintptr
- if size == 1 {
- totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits
- nbits += -nbits & 7
- for ; nbits > 0; nbits -= 8 {
+ // Main loop: load one byte, write another.
+ // The bits are rotating through the bit buffer.
+ for i := c / 8; i > 0; i-- {
+ bits |= uintptr(*src) << nbits
+ src = add1(src)
*dst = uint8(bits)
dst = add1(dst)
bits >>= 8
}
- } else {
- totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*4 + nbits
- nbits += -nbits & 3
- for ; nbits > 0; nbits -= 4 {
- v := bits&0xf | bitScanAll
- *dst = uint8(v)
- dst = add1(dst)
- bits >>= 4
+ // Final src fragment.
+ if c %= 8; c > 0 {
+ bits |= (uintptr(*src) & (1<<c - 1)) << nbits
+ nbits += c
}
}
+
+ // Write any final bits out, using full-byte writes, even for the final byte.
+ totalBits := (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits
+ nbits += -nbits & 7
+ for ; nbits > 0; nbits -= 8 {
+ *dst = uint8(bits)
+ dst = add1(dst)
+ bits >>= 8
+ }
return totalBits
}
@@ -1898,7 +1346,7 @@
// Compute the number of pages needed for bitmapBytes.
pages := divRoundUp(bitmapBytes, pageSize)
s := mheap_.allocManual(pages, spanAllocPtrScalarBits)
- runGCProg(addb(prog, 4), nil, (*byte)(unsafe.Pointer(s.startAddr)), 1)
+ runGCProg(addb(prog, 4), (*byte)(unsafe.Pointer(s.startAddr)))
return s
}
func dematerializeGCProg(s *mspan) {
@@ -1961,18 +1409,12 @@
return true
}
-// gcbits returns the GC type info for x, for testing.
+// reflect_gcbits returns the GC type info for x, for testing.
// The result is the bitmap entries (0 or 1), one entry per byte.
//
//go:linkname reflect_gcbits reflect.gcbits
func reflect_gcbits(x any) []byte {
- ret := getgcmask(x)
- typ := (*ptrtype)(unsafe.Pointer(efaceOf(&x)._type)).elem
- nptr := typ.ptrdata / goarch.PtrSize
- for uintptr(len(ret)) > nptr && ret[len(ret)-1] == 0 {
- ret = ret[:len(ret)-1]
- }
- return ret
+ return getgcmask(x)
}
// Returns GC type info for the pointer stored in ep for testing.
@@ -2011,30 +1453,33 @@
// heap
if base, s, _ := findObject(uintptr(p), 0, 0); base != 0 {
- hbits := heapBitsForAddr(base)
+ if s.spanclass.noscan() {
+ return nil
+ }
n := s.elemsize
+ hbits := heapBitsForAddr(base, n)
mask = make([]byte, n/goarch.PtrSize)
- for i := uintptr(0); i < n; i += goarch.PtrSize {
- if hbits.isPointer() {
- mask[i/goarch.PtrSize] = 1
- }
- if !hbits.morePointers() {
- mask = mask[:i/goarch.PtrSize]
+ for {
+ var addr uintptr
+ if hbits, addr = hbits.next(); addr == 0 {
break
}
- hbits = hbits.next()
+ mask[(addr-base)/goarch.PtrSize] = 1
+ }
+ // Callers expect this mask to end at the last pointer.
+ for len(mask) > 0 && mask[len(mask)-1] == 0 {
+ mask = mask[:len(mask)-1]
}
return
}
// stack
- if _g_ := getg(); _g_.m.curg.stack.lo <= uintptr(p) && uintptr(p) < _g_.m.curg.stack.hi {
+ if gp := getg(); gp.m.curg.stack.lo <= uintptr(p) && uintptr(p) < gp.m.curg.stack.hi {
var frame stkframe
frame.sp = uintptr(p)
- _g_ := getg()
- gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0)
+ gentraceback(gp.m.curg.sched.pc, gp.m.curg.sched.sp, 0, gp.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0)
if frame.fn.valid() {
- locals, _, _ := getStackMap(&frame, nil, false)
+ locals, _, _ := frame.getStackMap(nil, false)
if locals.n == 0 {
return
}
diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go
index 1f484fb..acfd99b 100644
--- a/src/runtime/mcache.go
+++ b/src/runtime/mcache.go
@@ -6,6 +6,7 @@
import (
"runtime/internal/atomic"
+ "runtime/internal/sys"
"unsafe"
)
@@ -15,9 +16,9 @@
//
// mcaches are allocated from non-GC'd memory, so any heap pointers
// must be specially handled.
-//
-//go:notinheap
type mcache struct {
+ _ sys.NotInHeap
+
// The following members are accessed on every malloc,
// so they are grouped here for better caching.
nextSample uintptr // trigger heap sample after allocating this many bytes
@@ -49,7 +50,7 @@
// was last flushed. If flushGen != mheap_.sweepgen, the spans
// in this mcache are stale and need to the flushed so they
// can be swept. This is done in acquirep.
- flushGen uint32
+ flushGen atomic.Uint32
}
// A gclink is a node in a linked list of blocks, like mlink,
@@ -86,7 +87,7 @@
systemstack(func() {
lock(&mheap_.lock)
c = (*mcache)(mheap_.cachealloc.alloc())
- c.flushGen = mheap_.sweepgen
+ c.flushGen.Store(mheap_.sweepgen)
unlock(&mheap_.lock)
})
for i := range c.alloc {
@@ -251,7 +252,7 @@
// visible to the background sweeper.
mheap_.central[spc].mcentral.fullSwept(mheap_.sweepgen).push(s)
s.limit = s.base() + size
- heapBitsForAddr(s.base()).initSpan(s)
+ s.initHeapBits(false)
return s
}
@@ -317,13 +318,14 @@
// allocate-black. However, with this approach it's difficult
// to avoid spilling mark bits into the *next* GC cycle.
sg := mheap_.sweepgen
- if c.flushGen == sg {
+ flushGen := c.flushGen.Load()
+ if flushGen == sg {
return
- } else if c.flushGen != sg-2 {
- println("bad flushGen", c.flushGen, "in prepareForSweep; sweepgen", sg)
+ } else if flushGen != sg-2 {
+ println("bad flushGen", flushGen, "in prepareForSweep; sweepgen", sg)
throw("bad flushGen")
}
c.releaseAll()
stackcache_clear(c)
- atomic.Store(&c.flushGen, mheap_.sweepgen) // Synchronizes with gcStart
+ c.flushGen.Store(mheap_.sweepgen) // Synchronizes with gcStart
}
diff --git a/src/runtime/mcentral.go b/src/runtime/mcentral.go
index e4bdf35..3382c54 100644
--- a/src/runtime/mcentral.go
+++ b/src/runtime/mcentral.go
@@ -12,12 +12,14 @@
package runtime
-import "runtime/internal/atomic"
+import (
+ "runtime/internal/atomic"
+ "runtime/internal/sys"
+)
// Central list of free objects of a given size.
-//
-//go:notinheap
type mcentral struct {
+ _ sys.NotInHeap
spanclass spanClass
// partial and full contain two mspan sets: one of swept in-use
@@ -250,6 +252,6 @@
// n := (npages << _PageShift) / size
n := s.divideByElemSize(npages << _PageShift)
s.limit = s.base() + size*n
- heapBitsForAddr(s.base()).initSpan(s)
+ s.initHeapBits(false)
return s
}
diff --git a/src/runtime/mcheckmark.go b/src/runtime/mcheckmark.go
index 1dd2858..73c1a10 100644
--- a/src/runtime/mcheckmark.go
+++ b/src/runtime/mcheckmark.go
@@ -15,6 +15,7 @@
import (
"internal/goarch"
"runtime/internal/atomic"
+ "runtime/internal/sys"
"unsafe"
)
@@ -22,9 +23,10 @@
// per-arena bitmap with a bit for every word in the arena. The mark
// is stored on the bit corresponding to the first word of the marked
// allocation.
-//
-//go:notinheap
-type checkmarksMap [heapArenaBytes / goarch.PtrSize / 8]uint8
+type checkmarksMap struct {
+ _ sys.NotInHeap
+ b [heapArenaBytes / goarch.PtrSize / 8]uint8
+}
// If useCheckmark is true, marking of an object uses the checkmark
// bits instead of the standard mark bits.
@@ -50,8 +52,8 @@
arena.checkmarks = bitmap
} else {
// Otherwise clear the existing bitmap.
- for i := range bitmap {
- bitmap[i] = 0
+ for i := range bitmap.b {
+ bitmap.b[i] = 0
}
}
}
@@ -88,9 +90,9 @@
ai := arenaIndex(obj)
arena := mheap_.arenas[ai.l1()][ai.l2()]
- arenaWord := (obj / heapArenaBytes / 8) % uintptr(len(arena.checkmarks))
+ arenaWord := (obj / heapArenaBytes / 8) % uintptr(len(arena.checkmarks.b))
mask := byte(1 << ((obj / heapArenaBytes) % 8))
- bytep := &arena.checkmarks[arenaWord]
+ bytep := &arena.checkmarks.b[arenaWord]
if atomic.Load8(bytep)&mask != 0 {
// Already checkmarked.
diff --git a/src/runtime/mem_bsd.go b/src/runtime/mem_bsd.go
index 782465a..6c5edb1 100644
--- a/src/runtime/mem_bsd.go
+++ b/src/runtime/mem_bsd.go
@@ -23,7 +23,11 @@
}
func sysUnusedOS(v unsafe.Pointer, n uintptr) {
- madvise(v, n, _MADV_FREE)
+ if debug.madvdontneed != 0 {
+ madvise(v, n, _MADV_DONTNEED)
+ } else {
+ madvise(v, n, _MADV_FREE)
+ }
}
func sysUsedOS(v unsafe.Pointer, n uintptr) {
diff --git a/src/runtime/mem_plan9.go b/src/runtime/mem_plan9.go
index 0e8bf74..88e7d92 100644
--- a/src/runtime/mem_plan9.go
+++ b/src/runtime/mem_plan9.go
@@ -92,7 +92,7 @@
}
func memCheck() {
- if memDebug == false {
+ if !memDebug {
return
}
for p := memFreelist.ptr(); p != nil && p.next != 0; p = p.next.ptr() {
diff --git a/src/runtime/memclr_riscv64.s b/src/runtime/memclr_riscv64.s
index f0e517a..d12b545 100644
--- a/src/runtime/memclr_riscv64.s
+++ b/src/runtime/memclr_riscv64.s
@@ -8,41 +8,96 @@
// void runtime·memclrNoHeapPointers(void*, uintptr)
TEXT runtime·memclrNoHeapPointers<ABIInternal>(SB),NOSPLIT,$0-16
-#ifndef GOEXPERIMENT_regabiargs
- MOV ptr+0(FP), A0
- MOV n+8(FP), A1
-#endif
- ADD A0, A1, T4
+ // X10 = ptr
+ // X11 = n
- // If less than eight bytes, do one byte at a time.
- SLTU $8, A1, T3
- BNE T3, ZERO, outcheck
+ // If less than 8 bytes, do single byte zeroing.
+ MOV $8, X9
+ BLT X11, X9, check4
- // Do one byte at a time until eight-aligned.
- JMP aligncheck
+ // Check alignment
+ AND $3, X10, X5
+ BEQZ X5, aligned
+
+ // Zero one byte at a time until we reach 8 byte alignment.
+ SUB X5, X11, X11
align:
- MOVB ZERO, (A0)
- ADD $1, A0
-aligncheck:
- AND $7, A0, T3
- BNE T3, ZERO, align
+ ADD $-1, X5
+ MOVB ZERO, 0(X10)
+ ADD $1, X10
+ BNEZ X5, align
- // Do eight bytes at a time as long as there is room.
- ADD $-7, T4, T5
- JMP wordscheck
-words:
- MOV ZERO, (A0)
- ADD $8, A0
-wordscheck:
- SLTU T5, A0, T3
- BNE T3, ZERO, words
+aligned:
+ MOV $8, X9
+ BLT X11, X9, check4
+ MOV $16, X9
+ BLT X11, X9, zero8
+ MOV $32, X9
+ BLT X11, X9, zero16
+ MOV $64, X9
+ BLT X11, X9, zero32
+loop64:
+ MOV ZERO, 0(X10)
+ MOV ZERO, 8(X10)
+ MOV ZERO, 16(X10)
+ MOV ZERO, 24(X10)
+ MOV ZERO, 32(X10)
+ MOV ZERO, 40(X10)
+ MOV ZERO, 48(X10)
+ MOV ZERO, 56(X10)
+ ADD $64, X10
+ ADD $-64, X11
+ BGE X11, X9, loop64
+ BEQZ X11, done
- JMP outcheck
-out:
- MOVB ZERO, (A0)
- ADD $1, A0
-outcheck:
- BNE A0, T4, out
+check32:
+ MOV $32, X9
+ BLT X11, X9, check16
+zero32:
+ MOV ZERO, 0(X10)
+ MOV ZERO, 8(X10)
+ MOV ZERO, 16(X10)
+ MOV ZERO, 24(X10)
+ ADD $32, X10
+ ADD $-32, X11
+ BEQZ X11, done
+
+check16:
+ MOV $16, X9
+ BLT X11, X9, check8
+zero16:
+ MOV ZERO, 0(X10)
+ MOV ZERO, 8(X10)
+ ADD $16, X10
+ ADD $-16, X11
+ BEQZ X11, done
+
+check8:
+ MOV $8, X9
+ BLT X11, X9, check4
+zero8:
+ MOV ZERO, 0(X10)
+ ADD $8, X10
+ ADD $-8, X11
+ BEQZ X11, done
+
+check4:
+ MOV $4, X9
+ BLT X11, X9, loop1
+zero4:
+ MOVB ZERO, 0(X10)
+ MOVB ZERO, 1(X10)
+ MOVB ZERO, 2(X10)
+ MOVB ZERO, 3(X10)
+ ADD $4, X10
+ ADD $-4, X11
+
+loop1:
+ BEQZ X11, done
+ MOVB ZERO, 0(X10)
+ ADD $1, X10
+ ADD $-1, X11
+ JMP loop1
done:
RET
diff --git a/src/runtime/memclr_wasm.s b/src/runtime/memclr_wasm.s
index 5a05304..19d08ff 100644
--- a/src/runtime/memclr_wasm.s
+++ b/src/runtime/memclr_wasm.s
@@ -11,29 +11,10 @@
MOVD ptr+0(FP), R0
MOVD n+8(FP), R1
-loop:
- Loop
- Get R1
- I64Eqz
- If
- RET
- End
-
- Get R0
- I32WrapI64
- I64Const $0
- I64Store8 $0
-
- Get R0
- I64Const $1
- I64Add
- Set R0
-
- Get R1
- I64Const $1
- I64Sub
- Set R1
-
- Br loop
- End
- UNDEF
+ Get R0
+ I32WrapI64
+ I32Const $0
+ Get R1
+ I32WrapI64
+ MemoryFill
+ RET
diff --git a/src/runtime/memmove_linux_amd64_test.go b/src/runtime/memmove_linux_amd64_test.go
index b3ccd90..5f90062 100644
--- a/src/runtime/memmove_linux_amd64_test.go
+++ b/src/runtime/memmove_linux_amd64_test.go
@@ -6,7 +6,6 @@
import (
"os"
- "reflect"
"syscall"
"testing"
"unsafe"
@@ -45,11 +44,7 @@
defer syscall.Syscall(syscall.SYS_MUNMAP, base+off, 65536, 0)
}
- var s []byte
- sp := (*reflect.SliceHeader)(unsafe.Pointer(&s))
- sp.Data = base
- sp.Len, sp.Cap = 3<<30, 3<<30
-
+ s := unsafe.Slice((*byte)(unsafe.Pointer(base)), 3<<30)
n := copy(s[1:], s)
if n != 3<<30-1 {
t.Fatalf("copied %d bytes, expected %d", n, 3<<30-1)
diff --git a/src/runtime/memmove_riscv64.s b/src/runtime/memmove_riscv64.s
index 538aee3..ea622ed 100644
--- a/src/runtime/memmove_riscv64.s
+++ b/src/runtime/memmove_riscv64.s
@@ -8,93 +8,311 @@
// void runtime·memmove(void*, void*, uintptr)
TEXT runtime·memmove<ABIInternal>(SB),NOSPLIT,$-0-24
-#ifndef GOEXPERIMENT_regabiargs
- MOV to+0(FP), A0
- MOV from+8(FP), A1
- MOV n+16(FP), A2
-#endif
- ADD A1, A2, T5
+ // X10 = to
+ // X11 = from
+ // X12 = n
+ BEQ X10, X11, done
+ BEQZ X12, done
// If the destination is ahead of the source, start at the end of the
// buffer and go backward.
- BLTU A1, A0, b
+ BGTU X10, X11, backward
- // If less than eight bytes, do one byte at a time.
- SLTU $8, A2, T3
- BNE T3, ZERO, f_outcheck
+ // If less than 8 bytes, do single byte copies.
+ MOV $8, X9
+ BLT X12, X9, f_loop4_check
- // Do one byte at a time until from is eight-aligned.
- JMP f_aligncheck
+ // Check alignment - if alignment differs we have to do one byte at a time.
+ AND $3, X10, X5
+ AND $3, X11, X6
+ BNE X5, X6, f_loop8_unaligned_check
+ BEQZ X5, f_loop_check
+
+ // Move one byte at a time until we reach 8 byte alignment.
+ SUB X5, X12, X12
f_align:
- MOVB (A1), T3
- MOVB T3, (A0)
- ADD $1, A0
- ADD $1, A1
-f_aligncheck:
- AND $7, A1, T3
- BNE T3, ZERO, f_align
+ ADD $-1, X5
+ MOVB 0(X11), X14
+ MOVB X14, 0(X10)
+ ADD $1, X10
+ ADD $1, X11
+ BNEZ X5, f_align
- // Do eight bytes at a time as long as there is room.
- ADD $-7, T5, T6
- JMP f_wordscheck
-f_words:
- MOV (A1), T3
- MOV T3, (A0)
- ADD $8, A0
- ADD $8, A1
-f_wordscheck:
- SLTU T6, A1, T3
- BNE T3, ZERO, f_words
+f_loop_check:
+ MOV $16, X9
+ BLT X12, X9, f_loop8_check
+ MOV $32, X9
+ BLT X12, X9, f_loop16_check
+ MOV $64, X9
+ BLT X12, X9, f_loop32_check
+f_loop64:
+ MOV 0(X11), X14
+ MOV 8(X11), X15
+ MOV 16(X11), X16
+ MOV 24(X11), X17
+ MOV 32(X11), X18
+ MOV 40(X11), X19
+ MOV 48(X11), X20
+ MOV 56(X11), X21
+ MOV X14, 0(X10)
+ MOV X15, 8(X10)
+ MOV X16, 16(X10)
+ MOV X17, 24(X10)
+ MOV X18, 32(X10)
+ MOV X19, 40(X10)
+ MOV X20, 48(X10)
+ MOV X21, 56(X10)
+ ADD $64, X10
+ ADD $64, X11
+ ADD $-64, X12
+ BGE X12, X9, f_loop64
+ BEQZ X12, done
- // Finish off the remaining partial word.
- JMP f_outcheck
-f_out:
- MOVB (A1), T3
- MOVB T3, (A0)
- ADD $1, A0
- ADD $1, A1
-f_outcheck:
- BNE A1, T5, f_out
+f_loop32_check:
+ MOV $32, X9
+ BLT X12, X9, f_loop16_check
+f_loop32:
+ MOV 0(X11), X14
+ MOV 8(X11), X15
+ MOV 16(X11), X16
+ MOV 24(X11), X17
+ MOV X14, 0(X10)
+ MOV X15, 8(X10)
+ MOV X16, 16(X10)
+ MOV X17, 24(X10)
+ ADD $32, X10
+ ADD $32, X11
+ ADD $-32, X12
+ BGE X12, X9, f_loop32
+ BEQZ X12, done
- RET
+f_loop16_check:
+ MOV $16, X9
+ BLT X12, X9, f_loop8_check
+f_loop16:
+ MOV 0(X11), X14
+ MOV 8(X11), X15
+ MOV X14, 0(X10)
+ MOV X15, 8(X10)
+ ADD $16, X10
+ ADD $16, X11
+ ADD $-16, X12
+ BGE X12, X9, f_loop16
+ BEQZ X12, done
-b:
- ADD A0, A2, T4
- // If less than eight bytes, do one byte at a time.
- SLTU $8, A2, T3
- BNE T3, ZERO, b_outcheck
+f_loop8_check:
+ MOV $8, X9
+ BLT X12, X9, f_loop4_check
+f_loop8:
+ MOV 0(X11), X14
+ MOV X14, 0(X10)
+ ADD $8, X10
+ ADD $8, X11
+ ADD $-8, X12
+ BGE X12, X9, f_loop8
+ BEQZ X12, done
+ JMP f_loop4_check
- // Do one byte at a time until from+n is eight-aligned.
- JMP b_aligncheck
+f_loop8_unaligned_check:
+ MOV $8, X9
+ BLT X12, X9, f_loop4_check
+f_loop8_unaligned:
+ MOVB 0(X11), X14
+ MOVB 1(X11), X15
+ MOVB 2(X11), X16
+ MOVB 3(X11), X17
+ MOVB 4(X11), X18
+ MOVB 5(X11), X19
+ MOVB 6(X11), X20
+ MOVB 7(X11), X21
+ MOVB X14, 0(X10)
+ MOVB X15, 1(X10)
+ MOVB X16, 2(X10)
+ MOVB X17, 3(X10)
+ MOVB X18, 4(X10)
+ MOVB X19, 5(X10)
+ MOVB X20, 6(X10)
+ MOVB X21, 7(X10)
+ ADD $8, X10
+ ADD $8, X11
+ ADD $-8, X12
+ BGE X12, X9, f_loop8_unaligned
+
+f_loop4_check:
+ MOV $4, X9
+ BLT X12, X9, f_loop1
+f_loop4:
+ MOVB 0(X11), X14
+ MOVB 1(X11), X15
+ MOVB 2(X11), X16
+ MOVB 3(X11), X17
+ MOVB X14, 0(X10)
+ MOVB X15, 1(X10)
+ MOVB X16, 2(X10)
+ MOVB X17, 3(X10)
+ ADD $4, X10
+ ADD $4, X11
+ ADD $-4, X12
+ BGE X12, X9, f_loop4
+
+f_loop1:
+ BEQZ X12, done
+ MOVB 0(X11), X14
+ MOVB X14, 0(X10)
+ ADD $1, X10
+ ADD $1, X11
+ ADD $-1, X12
+ JMP f_loop1
+
+backward:
+ ADD X10, X12, X10
+ ADD X11, X12, X11
+
+ // If less than 8 bytes, do single byte copies.
+ MOV $8, X9
+ BLT X12, X9, b_loop4_check
+
+ // Check alignment - if alignment differs we have to do one byte at a time.
+ AND $3, X10, X5
+ AND $3, X11, X6
+ BNE X5, X6, b_loop8_unaligned_check
+ BEQZ X5, b_loop_check
+
+ // Move one byte at a time until we reach 8 byte alignment.
+ SUB X5, X12, X12
b_align:
- ADD $-1, T4
- ADD $-1, T5
- MOVB (T5), T3
- MOVB T3, (T4)
-b_aligncheck:
- AND $7, T5, T3
- BNE T3, ZERO, b_align
+ ADD $-1, X5
+ ADD $-1, X10
+ ADD $-1, X11
+ MOVB 0(X11), X14
+ MOVB X14, 0(X10)
+ BNEZ X5, b_align
- // Do eight bytes at a time as long as there is room.
- ADD $7, A1, T6
- JMP b_wordscheck
-b_words:
- ADD $-8, T4
- ADD $-8, T5
- MOV (T5), T3
- MOV T3, (T4)
-b_wordscheck:
- SLTU T5, T6, T3
- BNE T3, ZERO, b_words
+b_loop_check:
+ MOV $16, X9
+ BLT X12, X9, b_loop8_check
+ MOV $32, X9
+ BLT X12, X9, b_loop16_check
+ MOV $64, X9
+ BLT X12, X9, b_loop32_check
+b_loop64:
+ ADD $-64, X10
+ ADD $-64, X11
+ MOV 0(X11), X14
+ MOV 8(X11), X15
+ MOV 16(X11), X16
+ MOV 24(X11), X17
+ MOV 32(X11), X18
+ MOV 40(X11), X19
+ MOV 48(X11), X20
+ MOV 56(X11), X21
+ MOV X14, 0(X10)
+ MOV X15, 8(X10)
+ MOV X16, 16(X10)
+ MOV X17, 24(X10)
+ MOV X18, 32(X10)
+ MOV X19, 40(X10)
+ MOV X20, 48(X10)
+ MOV X21, 56(X10)
+ ADD $-64, X12
+ BGE X12, X9, b_loop64
+ BEQZ X12, done
- // Finish off the remaining partial word.
- JMP b_outcheck
-b_out:
- ADD $-1, T4
- ADD $-1, T5
- MOVB (T5), T3
- MOVB T3, (T4)
-b_outcheck:
- BNE T5, A1, b_out
+b_loop32_check:
+ MOV $32, X9
+ BLT X12, X9, b_loop16_check
+b_loop32:
+ ADD $-32, X10
+ ADD $-32, X11
+ MOV 0(X11), X14
+ MOV 8(X11), X15
+ MOV 16(X11), X16
+ MOV 24(X11), X17
+ MOV X14, 0(X10)
+ MOV X15, 8(X10)
+ MOV X16, 16(X10)
+ MOV X17, 24(X10)
+ ADD $-32, X12
+ BGE X12, X9, b_loop32
+ BEQZ X12, done
+b_loop16_check:
+ MOV $16, X9
+ BLT X12, X9, b_loop8_check
+b_loop16:
+ ADD $-16, X10
+ ADD $-16, X11
+ MOV 0(X11), X14
+ MOV 8(X11), X15
+ MOV X14, 0(X10)
+ MOV X15, 8(X10)
+ ADD $-16, X12
+ BGE X12, X9, b_loop16
+ BEQZ X12, done
+
+b_loop8_check:
+ MOV $8, X9
+ BLT X12, X9, b_loop4_check
+b_loop8:
+ ADD $-8, X10
+ ADD $-8, X11
+ MOV 0(X11), X14
+ MOV X14, 0(X10)
+ ADD $-8, X12
+ BGE X12, X9, b_loop8
+ BEQZ X12, done
+ JMP b_loop4_check
+
+b_loop8_unaligned_check:
+ MOV $8, X9
+ BLT X12, X9, b_loop4_check
+b_loop8_unaligned:
+ ADD $-8, X10
+ ADD $-8, X11
+ MOVB 0(X11), X14
+ MOVB 1(X11), X15
+ MOVB 2(X11), X16
+ MOVB 3(X11), X17
+ MOVB 4(X11), X18
+ MOVB 5(X11), X19
+ MOVB 6(X11), X20
+ MOVB 7(X11), X21
+ MOVB X14, 0(X10)
+ MOVB X15, 1(X10)
+ MOVB X16, 2(X10)
+ MOVB X17, 3(X10)
+ MOVB X18, 4(X10)
+ MOVB X19, 5(X10)
+ MOVB X20, 6(X10)
+ MOVB X21, 7(X10)
+ ADD $-8, X12
+ BGE X12, X9, b_loop8_unaligned
+
+b_loop4_check:
+ MOV $4, X9
+ BLT X12, X9, b_loop1
+b_loop4:
+ ADD $-4, X10
+ ADD $-4, X11
+ MOVB 0(X11), X14
+ MOVB 1(X11), X15
+ MOVB 2(X11), X16
+ MOVB 3(X11), X17
+ MOVB X14, 0(X10)
+ MOVB X15, 1(X10)
+ MOVB X16, 2(X10)
+ MOVB X17, 3(X10)
+ ADD $-4, X12
+ BGE X12, X9, b_loop4
+
+b_loop1:
+ BEQZ X12, done
+ ADD $-1, X10
+ ADD $-1, X11
+ MOVB 0(X11), X14
+ MOVB X14, 0(X10)
+ ADD $-1, X12
+ JMP b_loop1
+
+done:
RET
diff --git a/src/runtime/memmove_test.go b/src/runtime/memmove_test.go
index 8887320..f1247f6 100644
--- a/src/runtime/memmove_test.go
+++ b/src/runtime/memmove_test.go
@@ -244,23 +244,23 @@
dst[i] = nil
}
- var ready uint32
+ var ready atomic.Uint32
go func() {
sp := unsafe.Pointer(&src[0])
dp := unsafe.Pointer(&dst[0])
- atomic.StoreUint32(&ready, 1)
+ ready.Store(1)
for i := 0; i < 10000; i++ {
Memmove(dp, sp, sz)
MemclrNoHeapPointers(dp, sz)
}
- atomic.StoreUint32(&ready, 2)
+ ready.Store(2)
}()
- for atomic.LoadUint32(&ready) == 0 {
+ for ready.Load() == 0 {
Gosched()
}
- for atomic.LoadUint32(&ready) != 2 {
+ for ready.Load() != 2 {
for i := range dst {
p := dst[i]
if p != nil && p != &x {
@@ -417,20 +417,20 @@
}
benchSizes := []RunData{
- RunData{[]int{1043, 1078, 1894, 1582, 1044, 1165, 1467, 1100, 1919, 1562, 1932, 1645,
+ {[]int{1043, 1078, 1894, 1582, 1044, 1165, 1467, 1100, 1919, 1562, 1932, 1645,
1412, 1038, 1576, 1200, 1029, 1336, 1095, 1494, 1350, 1025, 1502, 1548, 1316, 1296,
1868, 1639, 1546, 1626, 1642, 1308, 1726, 1665, 1678, 1187, 1515, 1598, 1353, 1237,
1977, 1452, 2012, 1914, 1514, 1136, 1975, 1618, 1536, 1695, 1600, 1733, 1392, 1099,
1358, 1996, 1224, 1783, 1197, 1838, 1460, 1556, 1554, 2020}}, // 1kb-2kb
- RunData{[]int{3964, 5139, 6573, 7775, 6553, 2413, 3466, 5394, 2469, 7336, 7091, 6745,
+ {[]int{3964, 5139, 6573, 7775, 6553, 2413, 3466, 5394, 2469, 7336, 7091, 6745,
4028, 5643, 6164, 3475, 4138, 6908, 7559, 3335, 5660, 4122, 3945, 2082, 7564, 6584,
5111, 2288, 6789, 2797, 4928, 7986, 5163, 5447, 2999, 4968, 3174, 3202, 7908, 8137,
4735, 6161, 4646, 7592, 3083, 5329, 3687, 2754, 3599, 7231, 6455, 2549, 8063, 2189,
7121, 5048, 4277, 6626, 6306, 2815, 7473, 3963, 7549, 7255}}, // 2kb-8kb
- RunData{[]int{16304, 15936, 15760, 4736, 9136, 11184, 10160, 5952, 14560, 15744,
+ {[]int{16304, 15936, 15760, 4736, 9136, 11184, 10160, 5952, 14560, 15744,
6624, 5872, 13088, 14656, 14192, 10304, 4112, 10384, 9344, 4496, 11392, 7024,
5200, 10064, 14784, 5808, 13504, 10480, 8512, 4896, 13264, 5600}}, // 4kb-16kb
- RunData{[]int{164576, 233136, 220224, 183280, 214112, 217248, 228560, 201728}}, // 128kb-256kb
+ {[]int{164576, 233136, 220224, 183280, 214112, 217248, 228560, 201728}}, // 128kb-256kb
}
for _, t := range benchSizes {
@@ -468,160 +468,382 @@
}
}
+func BenchmarkClearFat7(b *testing.B) {
+ p := new([7]byte)
+ Escape(p)
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ *p = [7]byte{}
+ }
+}
+
func BenchmarkClearFat8(b *testing.B) {
+ p := new([8 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- var x [8 / 4]uint32
- _ = x
+ *p = [8 / 4]uint32{}
}
}
+
+func BenchmarkClearFat11(b *testing.B) {
+ p := new([11]byte)
+ Escape(p)
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ *p = [11]byte{}
+ }
+}
+
func BenchmarkClearFat12(b *testing.B) {
+ p := new([12 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- var x [12 / 4]uint32
- _ = x
+ *p = [12 / 4]uint32{}
}
}
+
+func BenchmarkClearFat13(b *testing.B) {
+ p := new([13]byte)
+ Escape(p)
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ *p = [13]byte{}
+ }
+}
+
+func BenchmarkClearFat14(b *testing.B) {
+ p := new([14]byte)
+ Escape(p)
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ *p = [14]byte{}
+ }
+}
+
+func BenchmarkClearFat15(b *testing.B) {
+ p := new([15]byte)
+ Escape(p)
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ *p = [15]byte{}
+ }
+}
+
func BenchmarkClearFat16(b *testing.B) {
+ p := new([16 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- var x [16 / 4]uint32
- _ = x
+ *p = [16 / 4]uint32{}
}
}
+
func BenchmarkClearFat24(b *testing.B) {
+ p := new([24 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- var x [24 / 4]uint32
- _ = x
+ *p = [24 / 4]uint32{}
}
}
+
func BenchmarkClearFat32(b *testing.B) {
+ p := new([32 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- var x [32 / 4]uint32
- _ = x
+ *p = [32 / 4]uint32{}
}
}
+
func BenchmarkClearFat40(b *testing.B) {
+ p := new([40 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- var x [40 / 4]uint32
- _ = x
+ *p = [40 / 4]uint32{}
}
}
+
func BenchmarkClearFat48(b *testing.B) {
+ p := new([48 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- var x [48 / 4]uint32
- _ = x
+ *p = [48 / 4]uint32{}
}
}
+
func BenchmarkClearFat56(b *testing.B) {
+ p := new([56 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- var x [56 / 4]uint32
- _ = x
+ *p = [56 / 4]uint32{}
}
}
+
func BenchmarkClearFat64(b *testing.B) {
+ p := new([64 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- var x [64 / 4]uint32
- _ = x
+ *p = [64 / 4]uint32{}
}
}
+
+func BenchmarkClearFat72(b *testing.B) {
+ p := new([72 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ *p = [72 / 4]uint32{}
+ }
+}
+
func BenchmarkClearFat128(b *testing.B) {
+ p := new([128 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- var x [128 / 4]uint32
- _ = x
+ *p = [128 / 4]uint32{}
}
}
+
func BenchmarkClearFat256(b *testing.B) {
+ p := new([256 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- var x [256 / 4]uint32
- _ = x
+ *p = [256 / 4]uint32{}
}
}
+
func BenchmarkClearFat512(b *testing.B) {
+ p := new([512 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- var x [512 / 4]uint32
- _ = x
+ *p = [512 / 4]uint32{}
}
}
+
func BenchmarkClearFat1024(b *testing.B) {
+ p := new([1024 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- var x [1024 / 4]uint32
- _ = x
+ *p = [1024 / 4]uint32{}
+ }
+}
+
+func BenchmarkClearFat1032(b *testing.B) {
+ p := new([1032 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ *p = [1032 / 4]uint32{}
+ }
+}
+
+func BenchmarkClearFat1040(b *testing.B) {
+ p := new([1040 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ *p = [1040 / 4]uint32{}
+ }
+}
+
+func BenchmarkCopyFat7(b *testing.B) {
+ var x [7]byte
+ p := new([7]byte)
+ Escape(p)
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ *p = x
}
}
func BenchmarkCopyFat8(b *testing.B) {
var x [8 / 4]uint32
+ p := new([8 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- y := x
- _ = y
+ *p = x
}
}
+
+func BenchmarkCopyFat11(b *testing.B) {
+ var x [11]byte
+ p := new([11]byte)
+ Escape(p)
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ *p = x
+ }
+}
+
func BenchmarkCopyFat12(b *testing.B) {
var x [12 / 4]uint32
+ p := new([12 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- y := x
- _ = y
+ *p = x
}
}
+
+func BenchmarkCopyFat13(b *testing.B) {
+ var x [13]byte
+ p := new([13]byte)
+ Escape(p)
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ *p = x
+ }
+}
+
+func BenchmarkCopyFat14(b *testing.B) {
+ var x [14]byte
+ p := new([14]byte)
+ Escape(p)
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ *p = x
+ }
+}
+
+func BenchmarkCopyFat15(b *testing.B) {
+ var x [15]byte
+ p := new([15]byte)
+ Escape(p)
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ *p = x
+ }
+}
+
func BenchmarkCopyFat16(b *testing.B) {
var x [16 / 4]uint32
+ p := new([16 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- y := x
- _ = y
+ *p = x
}
}
+
func BenchmarkCopyFat24(b *testing.B) {
var x [24 / 4]uint32
+ p := new([24 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- y := x
- _ = y
+ *p = x
}
}
+
func BenchmarkCopyFat32(b *testing.B) {
var x [32 / 4]uint32
+ p := new([32 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- y := x
- _ = y
+ *p = x
}
}
+
func BenchmarkCopyFat64(b *testing.B) {
var x [64 / 4]uint32
+ p := new([64 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- y := x
- _ = y
+ *p = x
}
}
+
+func BenchmarkCopyFat72(b *testing.B) {
+ var x [72 / 4]uint32
+ p := new([72 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ *p = x
+ }
+}
+
func BenchmarkCopyFat128(b *testing.B) {
var x [128 / 4]uint32
+ p := new([128 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- y := x
- _ = y
+ *p = x
}
}
+
func BenchmarkCopyFat256(b *testing.B) {
var x [256 / 4]uint32
+ p := new([256 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- y := x
- _ = y
+ *p = x
}
}
+
func BenchmarkCopyFat512(b *testing.B) {
var x [512 / 4]uint32
+ p := new([512 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- y := x
- _ = y
+ *p = x
}
}
+
func BenchmarkCopyFat520(b *testing.B) {
var x [520 / 4]uint32
+ p := new([520 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- y := x
- _ = y
+ *p = x
}
}
+
func BenchmarkCopyFat1024(b *testing.B) {
var x [1024 / 4]uint32
+ p := new([1024 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- y := x
- _ = y
+ *p = x
+ }
+}
+
+func BenchmarkCopyFat1032(b *testing.B) {
+ var x [1032 / 4]uint32
+ p := new([1032 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ *p = x
+ }
+}
+
+func BenchmarkCopyFat1040(b *testing.B) {
+ var x [1040 / 4]uint32
+ p := new([1040 / 4]uint32)
+ Escape(p)
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ *p = x
}
}
diff --git a/src/runtime/memmove_wasm.s b/src/runtime/memmove_wasm.s
index 8525fea..1be8487 100644
--- a/src/runtime/memmove_wasm.s
+++ b/src/runtime/memmove_wasm.s
@@ -13,142 +13,10 @@
MOVD n+16(FP), R2
Get R0
+ I32WrapI64
Get R1
- I64LtU
- If // forward
-exit_forward_64:
- Block
-loop_forward_64:
- Loop
- Get R2
- I64Const $8
- I64LtU
- BrIf exit_forward_64
-
- MOVD 0(R1), 0(R0)
-
- Get R0
- I64Const $8
- I64Add
- Set R0
-
- Get R1
- I64Const $8
- I64Add
- Set R1
-
- Get R2
- I64Const $8
- I64Sub
- Set R2
-
- Br loop_forward_64
- End
- End
-
-loop_forward_8:
- Loop
- Get R2
- I64Eqz
- If
- RET
- End
-
- Get R0
- I32WrapI64
- I64Load8U (R1)
- I64Store8 $0
-
- Get R0
- I64Const $1
- I64Add
- Set R0
-
- Get R1
- I64Const $1
- I64Add
- Set R1
-
- Get R2
- I64Const $1
- I64Sub
- Set R2
-
- Br loop_forward_8
- End
-
- Else
- // backward
- Get R0
- Get R2
- I64Add
- Set R0
-
- Get R1
- Get R2
- I64Add
- Set R1
-
-exit_backward_64:
- Block
-loop_backward_64:
- Loop
- Get R2
- I64Const $8
- I64LtU
- BrIf exit_backward_64
-
- Get R0
- I64Const $8
- I64Sub
- Set R0
-
- Get R1
- I64Const $8
- I64Sub
- Set R1
-
- Get R2
- I64Const $8
- I64Sub
- Set R2
-
- MOVD 0(R1), 0(R0)
-
- Br loop_backward_64
- End
- End
-
-loop_backward_8:
- Loop
- Get R2
- I64Eqz
- If
- RET
- End
-
- Get R0
- I64Const $1
- I64Sub
- Set R0
-
- Get R1
- I64Const $1
- I64Sub
- Set R1
-
- Get R2
- I64Const $1
- I64Sub
- Set R2
-
- Get R0
- I32WrapI64
- I64Load8U (R1)
- I64Store8 $0
-
- Br loop_backward_8
- End
- End
-
- UNDEF
+ I32WrapI64
+ Get R2
+ I32WrapI64
+ MemoryCopy
+ RET
diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go
index 986121b..2061dc0 100644
--- a/src/runtime/metrics.go
+++ b/src/runtime/metrics.go
@@ -7,7 +7,6 @@
// Metrics implementation exported to runtime/metrics.
import (
- "runtime/internal/atomic"
"unsafe"
)
@@ -41,7 +40,7 @@
// Acquire the metricsSema but with handoff. Operations are typically
// expensive enough that queueing up goroutines and handing off between
// them will be noticeably better-behaved.
- semacquire1(&metricsSema, true, 0, 0)
+ semacquire1(&metricsSema, true, 0, 0, waitReasonSemacquire)
if raceenabled {
raceacquire(unsafe.Pointer(&metricsSema))
}
@@ -91,6 +90,83 @@
out.scalar = uint64(NumCgoCall())
},
},
+ "/cpu/classes/gc/mark/assist:cpu-seconds": {
+ deps: makeStatDepSet(cpuStatsDep),
+ compute: func(in *statAggregate, out *metricValue) {
+ out.kind = metricKindFloat64
+ out.scalar = float64bits(nsToSec(in.cpuStats.gcAssistTime))
+ },
+ },
+ "/cpu/classes/gc/mark/dedicated:cpu-seconds": {
+ deps: makeStatDepSet(cpuStatsDep),
+ compute: func(in *statAggregate, out *metricValue) {
+ out.kind = metricKindFloat64
+ out.scalar = float64bits(nsToSec(in.cpuStats.gcDedicatedTime))
+ },
+ },
+ "/cpu/classes/gc/mark/idle:cpu-seconds": {
+ deps: makeStatDepSet(cpuStatsDep),
+ compute: func(in *statAggregate, out *metricValue) {
+ out.kind = metricKindFloat64
+ out.scalar = float64bits(nsToSec(in.cpuStats.gcIdleTime))
+ },
+ },
+ "/cpu/classes/gc/pause:cpu-seconds": {
+ deps: makeStatDepSet(cpuStatsDep),
+ compute: func(in *statAggregate, out *metricValue) {
+ out.kind = metricKindFloat64
+ out.scalar = float64bits(nsToSec(in.cpuStats.gcPauseTime))
+ },
+ },
+ "/cpu/classes/gc/total:cpu-seconds": {
+ deps: makeStatDepSet(cpuStatsDep),
+ compute: func(in *statAggregate, out *metricValue) {
+ out.kind = metricKindFloat64
+ out.scalar = float64bits(nsToSec(in.cpuStats.gcTotalTime))
+ },
+ },
+ "/cpu/classes/idle:cpu-seconds": {
+ deps: makeStatDepSet(cpuStatsDep),
+ compute: func(in *statAggregate, out *metricValue) {
+ out.kind = metricKindFloat64
+ out.scalar = float64bits(nsToSec(in.cpuStats.idleTime))
+ },
+ },
+ "/cpu/classes/scavenge/assist:cpu-seconds": {
+ deps: makeStatDepSet(cpuStatsDep),
+ compute: func(in *statAggregate, out *metricValue) {
+ out.kind = metricKindFloat64
+ out.scalar = float64bits(nsToSec(in.cpuStats.scavengeAssistTime))
+ },
+ },
+ "/cpu/classes/scavenge/background:cpu-seconds": {
+ deps: makeStatDepSet(cpuStatsDep),
+ compute: func(in *statAggregate, out *metricValue) {
+ out.kind = metricKindFloat64
+ out.scalar = float64bits(nsToSec(in.cpuStats.scavengeBgTime))
+ },
+ },
+ "/cpu/classes/scavenge/total:cpu-seconds": {
+ deps: makeStatDepSet(cpuStatsDep),
+ compute: func(in *statAggregate, out *metricValue) {
+ out.kind = metricKindFloat64
+ out.scalar = float64bits(nsToSec(in.cpuStats.scavengeTotalTime))
+ },
+ },
+ "/cpu/classes/total:cpu-seconds": {
+ deps: makeStatDepSet(cpuStatsDep),
+ compute: func(in *statAggregate, out *metricValue) {
+ out.kind = metricKindFloat64
+ out.scalar = float64bits(nsToSec(in.cpuStats.totalTime))
+ },
+ },
+ "/cpu/classes/user:cpu-seconds": {
+ deps: makeStatDepSet(cpuStatsDep),
+ compute: func(in *statAggregate, out *metricValue) {
+ out.kind = metricKindFloat64
+ out.scalar = float64bits(nsToSec(in.cpuStats.userTime))
+ },
+ },
"/gc/cycles/automatic:gc-cycles": {
deps: makeStatDepSet(sysStatsDep),
compute: func(in *statAggregate, out *metricValue) {
@@ -197,10 +273,11 @@
// The bottom-most bucket, containing negative values, is tracked
// as a separately as underflow, so fill that in manually and then
// iterate over the rest.
- hist.counts[0] = atomic.Load64(&memstats.gcPauseDist.underflow)
+ hist.counts[0] = memstats.gcPauseDist.underflow.Load()
for i := range memstats.gcPauseDist.counts {
- hist.counts[i+1] = atomic.Load64(&memstats.gcPauseDist.counts[i])
+ hist.counts[i+1] = memstats.gcPauseDist.counts[i].Load()
}
+ hist.counts[len(hist.counts)-1] = memstats.gcPauseDist.overflow.Load()
},
},
"/gc/stack/starting-size:bytes": {
@@ -327,10 +404,17 @@
"/sched/latencies:seconds": {
compute: func(_ *statAggregate, out *metricValue) {
hist := out.float64HistOrInit(timeHistBuckets)
- hist.counts[0] = atomic.Load64(&sched.timeToRun.underflow)
+ hist.counts[0] = sched.timeToRun.underflow.Load()
for i := range sched.timeToRun.counts {
- hist.counts[i+1] = atomic.Load64(&sched.timeToRun.counts[i])
+ hist.counts[i+1] = sched.timeToRun.counts[i].Load()
}
+ hist.counts[len(hist.counts)-1] = sched.timeToRun.overflow.Load()
+ },
+ },
+ "/sync/mutex/wait/total:seconds": {
+ compute: func(_ *statAggregate, out *metricValue) {
+ out.kind = metricKindFloat64
+ out.scalar = float64bits(nsToSec(sched.totalMutexWaitTime.Load()))
},
},
}
@@ -344,6 +428,7 @@
const (
heapStatsDep statDep = iota // corresponds to heapStatsAggregate
sysStatsDep // corresponds to sysStatsAggregate
+ cpuStatsDep // corresponds to cpuStatsAggregate
numStatsDeps
)
@@ -489,6 +574,23 @@
})
}
+// cpuStatsAggregate represents CPU stats obtained from the runtime
+// acquired together to avoid skew and inconsistencies.
+type cpuStatsAggregate struct {
+ cpuStats
+}
+
+// compute populates the cpuStatsAggregate with values from the runtime.
+func (a *cpuStatsAggregate) compute() {
+ a.cpuStats = work.cpuStats
+}
+
+// nsToSec takes a duration in nanoseconds and converts it to seconds as
+// a float64.
+func nsToSec(ns int64) float64 {
+ return float64(ns) / 1e9
+}
+
// statAggregate is the main driver of the metrics implementation.
//
// It contains multiple aggregates of runtime statistics, as well
@@ -498,6 +600,7 @@
ensured statDepSet
heapStats heapStatsAggregate
sysStats sysStatsAggregate
+ cpuStats cpuStatsAggregate
}
// ensure populates statistics aggregates determined by deps if they
@@ -516,12 +619,14 @@
a.heapStats.compute()
case sysStatsDep:
a.sysStats.compute()
+ case cpuStatsDep:
+ a.cpuStats.compute()
}
}
a.ensured = a.ensured.union(missing)
}
-// metricValidKind is a runtime copy of runtime/metrics.ValueKind and
+// metricKind is a runtime copy of runtime/metrics.ValueKind and
// must be kept structurally identical to that type.
type metricKind int
diff --git a/src/runtime/metrics/description.go b/src/runtime/metrics/description.go
index ee99d39..dcfe01e 100644
--- a/src/runtime/metrics/description.go
+++ b/src/runtime/metrics/description.go
@@ -58,6 +58,122 @@
Cumulative: true,
},
{
+ Name: "/cpu/classes/gc/mark/assist:cpu-seconds",
+ Description: "Estimated total CPU time goroutines spent performing GC tasks " +
+ "to assist the GC and prevent it from falling behind the application. " +
+ "This metric is an overestimate, and not directly comparable to " +
+ "system CPU time measurements. Compare only with other /cpu/classes " +
+ "metrics.",
+ Kind: KindFloat64,
+ Cumulative: true,
+ },
+ {
+ Name: "/cpu/classes/gc/mark/dedicated:cpu-seconds",
+ Description: "Estimated total CPU time spent performing GC tasks on " +
+ "processors (as defined by GOMAXPROCS) dedicated to those tasks. " +
+ "This includes time spent with the world stopped due to the GC. " +
+ "This metric is an overestimate, and not directly comparable to " +
+ "system CPU time measurements. Compare only with other /cpu/classes " +
+ "metrics.",
+ Kind: KindFloat64,
+ Cumulative: true,
+ },
+ {
+ Name: "/cpu/classes/gc/mark/idle:cpu-seconds",
+ Description: "Estimated total CPU time spent performing GC tasks on " +
+ "spare CPU resources that the Go scheduler could not otherwise find " +
+ "a use for. This should be subtracted from the total GC CPU time to " +
+ "obtain a measure of compulsory GC CPU time. " +
+ "This metric is an overestimate, and not directly comparable to " +
+ "system CPU time measurements. Compare only with other /cpu/classes " +
+ "metrics.",
+ Kind: KindFloat64,
+ Cumulative: true,
+ },
+ {
+ Name: "/cpu/classes/gc/pause:cpu-seconds",
+ Description: "Estimated total CPU time spent with the application paused by " +
+ "the GC. Even if only one thread is running during the pause, this is " +
+ "computed as GOMAXPROCS times the pause latency because nothing else " +
+ "can be executing. This is the exact sum of samples in /gc/pause:seconds " +
+ "if each sample is multiplied by GOMAXPROCS at the time it is taken. " +
+ "This metric is an overestimate, and not directly comparable to " +
+ "system CPU time measurements. Compare only with other /cpu/classes " +
+ "metrics.",
+ Kind: KindFloat64,
+ Cumulative: true,
+ },
+ {
+ Name: "/cpu/classes/gc/total:cpu-seconds",
+ Description: "Estimated total CPU time spent performing GC tasks. " +
+ "This metric is an overestimate, and not directly comparable to " +
+ "system CPU time measurements. Compare only with other /cpu/classes " +
+ "metrics. Sum of all metrics in /cpu/classes/gc.",
+ Kind: KindFloat64,
+ Cumulative: true,
+ },
+ {
+ Name: "/cpu/classes/idle:cpu-seconds",
+ Description: "Estimated total available CPU time not spent executing any Go or Go runtime code. " +
+ "In other words, the part of /cpu/classes/total:cpu-seconds that was unused. " +
+ "This metric is an overestimate, and not directly comparable to " +
+ "system CPU time measurements. Compare only with other /cpu/classes " +
+ "metrics.",
+ Kind: KindFloat64,
+ Cumulative: true,
+ },
+ {
+ Name: "/cpu/classes/scavenge/assist:cpu-seconds",
+ Description: "Estimated total CPU time spent returning unused memory to the " +
+ "underlying platform in response eagerly in response to memory pressure. " +
+ "This metric is an overestimate, and not directly comparable to " +
+ "system CPU time measurements. Compare only with other /cpu/classes " +
+ "metrics.",
+ Kind: KindFloat64,
+ Cumulative: true,
+ },
+ {
+ Name: "/cpu/classes/scavenge/background:cpu-seconds",
+ Description: "Estimated total CPU time spent performing background tasks " +
+ "to return unused memory to the underlying platform. " +
+ "This metric is an overestimate, and not directly comparable to " +
+ "system CPU time measurements. Compare only with other /cpu/classes " +
+ "metrics.",
+ Kind: KindFloat64,
+ Cumulative: true,
+ },
+ {
+ Name: "/cpu/classes/scavenge/total:cpu-seconds",
+ Description: "Estimated total CPU time spent performing tasks that return " +
+ "unused memory to the underlying platform. " +
+ "This metric is an overestimate, and not directly comparable to " +
+ "system CPU time measurements. Compare only with other /cpu/classes " +
+ "metrics. Sum of all metrics in /cpu/classes/scavenge.",
+ Kind: KindFloat64,
+ Cumulative: true,
+ },
+ {
+ Name: "/cpu/classes/total:cpu-seconds",
+ Description: "Estimated total available CPU time for user Go code " +
+ "or the Go runtime, as defined by GOMAXPROCS. In other words, GOMAXPROCS " +
+ "integrated over the wall-clock duration this process has been executing for. " +
+ "This metric is an overestimate, and not directly comparable to " +
+ "system CPU time measurements. Compare only with other /cpu/classes " +
+ "metrics. Sum of all metrics in /cpu/classes.",
+ Kind: KindFloat64,
+ Cumulative: true,
+ },
+ {
+ Name: "/cpu/classes/user:cpu-seconds",
+ Description: "Estimated total CPU time spent running user Go code. This may " +
+ "also include some small amount of time spent in the Go runtime. " +
+ "This metric is an overestimate, and not directly comparable to " +
+ "system CPU time measurements. Compare only with other /cpu/classes " +
+ "metrics.",
+ Kind: KindFloat64,
+ Cumulative: true,
+ },
+ {
Name: "/gc/cycles/automatic:gc-cycles",
Description: "Count of completed GC cycles generated by the Go runtime.",
Kind: KindUint64,
@@ -250,6 +366,12 @@
Description: "Distribution of the time goroutines have spent in the scheduler in a runnable state before actually running.",
Kind: KindFloat64Histogram,
},
+ {
+ Name: "/sync/mutex/wait/total:seconds",
+ Description: "Approximate cumulative time goroutines have spent blocked on a sync.Mutex or sync.RWMutex. This metric is useful for identifying global changes in lock contention. Collect a mutex or block profile using the runtime/pprof package for more detailed contention data.",
+ Kind: KindFloat64,
+ Cumulative: true,
+ },
}
// All returns a slice of containing metric descriptions for all supported metrics.
diff --git a/src/runtime/metrics/doc.go b/src/runtime/metrics/doc.go
index 28c9f6a..b593d8d 100644
--- a/src/runtime/metrics/doc.go
+++ b/src/runtime/metrics/doc.go
@@ -54,6 +54,90 @@
/cgo/go-to-c-calls:calls
Count of calls made from Go to C by the current process.
+ /cpu/classes/gc/mark/assist:cpu-seconds
+ Estimated total CPU time goroutines spent performing GC tasks
+ to assist the GC and prevent it from falling behind the application.
+ This metric is an overestimate, and not directly comparable to
+ system CPU time measurements. Compare only with other /cpu/classes
+ metrics.
+
+ /cpu/classes/gc/mark/dedicated:cpu-seconds
+ Estimated total CPU time spent performing GC tasks on
+ processors (as defined by GOMAXPROCS) dedicated to those tasks.
+ This includes time spent with the world stopped due to the GC.
+ This metric is an overestimate, and not directly comparable to
+ system CPU time measurements. Compare only with other /cpu/classes
+ metrics.
+
+ /cpu/classes/gc/mark/idle:cpu-seconds
+ Estimated total CPU time spent performing GC tasks on
+ spare CPU resources that the Go scheduler could not otherwise find
+ a use for. This should be subtracted from the total GC CPU time to
+ obtain a measure of compulsory GC CPU time.
+ This metric is an overestimate, and not directly comparable to
+ system CPU time measurements. Compare only with other /cpu/classes
+ metrics.
+
+ /cpu/classes/gc/pause:cpu-seconds
+ Estimated total CPU time spent with the application paused by
+ the GC. Even if only one thread is running during the pause, this is
+ computed as GOMAXPROCS times the pause latency because nothing else
+ can be executing. This is the exact sum of samples in /gc/pause:seconds
+ if each sample is multiplied by GOMAXPROCS at the time it is taken.
+ This metric is an overestimate, and not directly comparable to
+ system CPU time measurements. Compare only with other /cpu/classes
+ metrics.
+
+ /cpu/classes/gc/total:cpu-seconds
+ Estimated total CPU time spent performing GC tasks.
+ This metric is an overestimate, and not directly comparable to
+ system CPU time measurements. Compare only with other /cpu/classes
+ metrics. Sum of all metrics in /cpu/classes/gc.
+
+ /cpu/classes/idle:cpu-seconds
+ Estimated total available CPU time not spent executing any Go or Go
+ runtime code. In other words, the part of /cpu/classes/total:cpu-seconds
+ that was unused.
+ This metric is an overestimate, and not directly comparable to
+ system CPU time measurements. Compare only with other /cpu/classes
+ metrics.
+
+ /cpu/classes/scavenge/assist:cpu-seconds
+ Estimated total CPU time spent returning unused memory to the
+ underlying platform in response eagerly in response to memory pressure.
+ This metric is an overestimate, and not directly comparable to
+ system CPU time measurements. Compare only with other /cpu/classes
+ metrics.
+
+ /cpu/classes/scavenge/background:cpu-seconds
+ Estimated total CPU time spent performing background tasks
+ to return unused memory to the underlying platform.
+ This metric is an overestimate, and not directly comparable to
+ system CPU time measurements. Compare only with other /cpu/classes
+ metrics.
+
+ /cpu/classes/scavenge/total:cpu-seconds
+ Estimated total CPU time spent performing tasks that return
+ unused memory to the underlying platform.
+ This metric is an overestimate, and not directly comparable to
+ system CPU time measurements. Compare only with other /cpu/classes
+ metrics. Sum of all metrics in /cpu/classes/scavenge.
+
+ /cpu/classes/total:cpu-seconds
+ Estimated total available CPU time for user Go code or the Go runtime, as
+ defined by GOMAXPROCS. In other words, GOMAXPROCS integrated over the
+ wall-clock duration this process has been executing for.
+ This metric is an overestimate, and not directly comparable to
+ system CPU time measurements. Compare only with other /cpu/classes
+ metrics. Sum of all metrics in /cpu/classes.
+
+ /cpu/classes/user:cpu-seconds
+ Estimated total CPU time spent running user Go code. This may
+ also include some small amount of time spent in the Go runtime.
+ This metric is an overestimate, and not directly comparable to
+ system CPU time measurements. Compare only with other /cpu/classes
+ metrics.
+
/gc/cycles/automatic:gc-cycles
Count of completed GC cycles generated by the Go runtime.
@@ -188,5 +272,12 @@
/sched/latencies:seconds
Distribution of the time goroutines have spent in the scheduler
in a runnable state before actually running.
+
+ /sync/mutex/wait/total:seconds
+ Approximate cumulative time goroutines have spent blocked on a
+ sync.Mutex or sync.RWMutex. This metric is useful for identifying
+ global changes in lock contention. Collect a mutex or block
+ profile using the runtime/pprof package for more detailed
+ contention data.
*/
package metrics
diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go
index 8baf020..d981c8e 100644
--- a/src/runtime/metrics_test.go
+++ b/src/runtime/metrics_test.go
@@ -5,6 +5,7 @@
package runtime_test
import (
+ "reflect"
"runtime"
"runtime/metrics"
"sort"
@@ -156,13 +157,19 @@
// Tests whether readMetrics produces consistent, sensible values.
// The values are read concurrently with the runtime doing other
// things (e.g. allocating) so what we read can't reasonably compared
- // to runtime values.
+ // to other runtime values (e.g. MemStats).
// Run a few GC cycles to get some of the stats to be non-zero.
runtime.GC()
runtime.GC()
runtime.GC()
+ // Set GOMAXPROCS high then sleep briefly to ensure we generate
+ // some idle time.
+ oldmaxprocs := runtime.GOMAXPROCS(10)
+ time.Sleep(time.Millisecond)
+ runtime.GOMAXPROCS(oldmaxprocs)
+
// Read all the supported metrics through the metrics package.
descs, samples := prepareAllMetricsSamples()
metrics.Read(samples)
@@ -181,6 +188,22 @@
numGC uint64
pauses uint64
}
+ var cpu struct {
+ gcAssist float64
+ gcDedicated float64
+ gcIdle float64
+ gcPause float64
+ gcTotal float64
+
+ idle float64
+ user float64
+
+ scavengeAssist float64
+ scavengeBg float64
+ scavengeTotal float64
+
+ total float64
+ }
for i := range samples {
kind := samples[i].Value.Kind()
if want := descs[samples[i].Name].Kind; kind != want {
@@ -199,6 +222,28 @@
}
}
switch samples[i].Name {
+ case "/cpu/classes/gc/mark/assist:cpu-seconds":
+ cpu.gcAssist = samples[i].Value.Float64()
+ case "/cpu/classes/gc/mark/dedicated:cpu-seconds":
+ cpu.gcDedicated = samples[i].Value.Float64()
+ case "/cpu/classes/gc/mark/idle:cpu-seconds":
+ cpu.gcIdle = samples[i].Value.Float64()
+ case "/cpu/classes/gc/pause:cpu-seconds":
+ cpu.gcPause = samples[i].Value.Float64()
+ case "/cpu/classes/gc/total:cpu-seconds":
+ cpu.gcTotal = samples[i].Value.Float64()
+ case "/cpu/classes/idle:cpu-seconds":
+ cpu.idle = samples[i].Value.Float64()
+ case "/cpu/classes/scavenge/assist:cpu-seconds":
+ cpu.scavengeAssist = samples[i].Value.Float64()
+ case "/cpu/classes/scavenge/background:cpu-seconds":
+ cpu.scavengeBg = samples[i].Value.Float64()
+ case "/cpu/classes/scavenge/total:cpu-seconds":
+ cpu.scavengeTotal = samples[i].Value.Float64()
+ case "/cpu/classes/total:cpu-seconds":
+ cpu.total = samples[i].Value.Float64()
+ case "/cpu/classes/user:cpu-seconds":
+ cpu.user = samples[i].Value.Float64()
case "/memory/classes/total:bytes":
totalVirtual.got = samples[i].Value.Uint64()
case "/memory/classes/heap/objects:bytes":
@@ -235,6 +280,33 @@
}
}
}
+ // Only check this on Linux where we can be reasonably sure we have a high-resolution timer.
+ if runtime.GOOS == "linux" {
+ if cpu.gcDedicated <= 0 && cpu.gcAssist <= 0 && cpu.gcIdle <= 0 {
+ t.Errorf("found no time spent on GC work: %#v", cpu)
+ }
+ if cpu.gcPause <= 0 {
+ t.Errorf("found no GC pauses: %f", cpu.gcPause)
+ }
+ if cpu.idle <= 0 {
+ t.Errorf("found no idle time: %f", cpu.idle)
+ }
+ if total := cpu.gcDedicated + cpu.gcAssist + cpu.gcIdle + cpu.gcPause; !withinEpsilon(cpu.gcTotal, total, 0.01) {
+ t.Errorf("calculated total GC CPU not within 1%% of sampled total: %f vs. %f", total, cpu.gcTotal)
+ }
+ if total := cpu.scavengeAssist + cpu.scavengeBg; !withinEpsilon(cpu.scavengeTotal, total, 0.01) {
+ t.Errorf("calculated total scavenge CPU not within 1%% of sampled total: %f vs. %f", total, cpu.scavengeTotal)
+ }
+ if cpu.total <= 0 {
+ t.Errorf("found no total CPU time passed")
+ }
+ if cpu.user <= 0 {
+ t.Errorf("found no user time passed")
+ }
+ if total := cpu.gcTotal + cpu.scavengeTotal + cpu.user + cpu.idle; !withinEpsilon(cpu.total, total, 0.02) {
+ t.Errorf("calculated total CPU not within 2%% of sampled total: %f vs. %f", total, cpu.total)
+ }
+ }
if totalVirtual.got != totalVirtual.want {
t.Errorf(`"/memory/classes/total:bytes" does not match sum of /memory/classes/**: got %d, want %d`, totalVirtual.got, totalVirtual.want)
}
@@ -303,7 +375,7 @@
for i := 0; i < b.N; i++ {
start := time.Now()
metrics.Read(samples)
- latencies = append(latencies, time.Now().Sub(start))
+ latencies = append(latencies, time.Since(start))
}
// Make sure to stop the timer before we wait! The load created above
// is very heavy-weight and not easy to stop, so we could end up
@@ -411,3 +483,131 @@
wg.Wait()
}
+
+func withinEpsilon(v1, v2, e float64) bool {
+ return v2-v2*e <= v1 && v1 <= v2+v2*e
+}
+
+func TestMutexWaitTimeMetric(t *testing.T) {
+ var sample [1]metrics.Sample
+ sample[0].Name = "/sync/mutex/wait/total:seconds"
+
+ locks := []locker2{
+ new(mutex),
+ new(rwmutexWrite),
+ new(rwmutexReadWrite),
+ new(rwmutexWriteRead),
+ }
+ for _, lock := range locks {
+ t.Run(reflect.TypeOf(lock).Elem().Name(), func(t *testing.T) {
+ metrics.Read(sample[:])
+ before := time.Duration(sample[0].Value.Float64() * 1e9)
+
+ minMutexWaitTime := generateMutexWaitTime(lock)
+
+ metrics.Read(sample[:])
+ after := time.Duration(sample[0].Value.Float64() * 1e9)
+
+ if wt := after - before; wt < minMutexWaitTime {
+ t.Errorf("too little mutex wait time: got %s, want %s", wt, minMutexWaitTime)
+ }
+ })
+ }
+}
+
+// locker2 represents an API surface of two concurrent goroutines
+// locking the same resource, but through different APIs. It's intended
+// to abstract over the relationship of two Lock calls or an RLock
+// and a Lock call.
+type locker2 interface {
+ Lock1()
+ Unlock1()
+ Lock2()
+ Unlock2()
+}
+
+type mutex struct {
+ mu sync.Mutex
+}
+
+func (m *mutex) Lock1() { m.mu.Lock() }
+func (m *mutex) Unlock1() { m.mu.Unlock() }
+func (m *mutex) Lock2() { m.mu.Lock() }
+func (m *mutex) Unlock2() { m.mu.Unlock() }
+
+type rwmutexWrite struct {
+ mu sync.RWMutex
+}
+
+func (m *rwmutexWrite) Lock1() { m.mu.Lock() }
+func (m *rwmutexWrite) Unlock1() { m.mu.Unlock() }
+func (m *rwmutexWrite) Lock2() { m.mu.Lock() }
+func (m *rwmutexWrite) Unlock2() { m.mu.Unlock() }
+
+type rwmutexReadWrite struct {
+ mu sync.RWMutex
+}
+
+func (m *rwmutexReadWrite) Lock1() { m.mu.RLock() }
+func (m *rwmutexReadWrite) Unlock1() { m.mu.RUnlock() }
+func (m *rwmutexReadWrite) Lock2() { m.mu.Lock() }
+func (m *rwmutexReadWrite) Unlock2() { m.mu.Unlock() }
+
+type rwmutexWriteRead struct {
+ mu sync.RWMutex
+}
+
+func (m *rwmutexWriteRead) Lock1() { m.mu.Lock() }
+func (m *rwmutexWriteRead) Unlock1() { m.mu.Unlock() }
+func (m *rwmutexWriteRead) Lock2() { m.mu.RLock() }
+func (m *rwmutexWriteRead) Unlock2() { m.mu.RUnlock() }
+
+// generateMutexWaitTime causes a couple of goroutines
+// to block a whole bunch of times on a sync.Mutex, returning
+// the minimum amount of time that should be visible in the
+// /sync/mutex-wait:seconds metric.
+func generateMutexWaitTime(mu locker2) time.Duration {
+ // Set up the runtime to always track casgstatus transitions for metrics.
+ *runtime.CasGStatusAlwaysTrack = true
+
+ mu.Lock1()
+
+ // Start up a goroutine to wait on the lock.
+ gc := make(chan *runtime.G)
+ done := make(chan bool)
+ go func() {
+ gc <- runtime.Getg()
+
+ for {
+ mu.Lock2()
+ mu.Unlock2()
+ if <-done {
+ return
+ }
+ }
+ }()
+ gp := <-gc
+
+ // Set the block time high enough so that it will always show up, even
+ // on systems with coarse timer granularity.
+ const blockTime = 100 * time.Millisecond
+
+ // Make sure the goroutine spawned above actually blocks on the lock.
+ for {
+ if runtime.GIsWaitingOnMutex(gp) {
+ break
+ }
+ runtime.Gosched()
+ }
+
+ // Let some amount of time pass.
+ time.Sleep(blockTime)
+
+ // Let the other goroutine acquire the lock.
+ mu.Unlock1()
+ done <- true
+
+ // Reset flag.
+ *runtime.CasGStatusAlwaysTrack = false
+ return blockTime
+}
diff --git a/src/runtime/mfinal.go b/src/runtime/mfinal.go
index f3f3a79..d4d4f1f 100644
--- a/src/runtime/mfinal.go
+++ b/src/runtime/mfinal.go
@@ -10,6 +10,7 @@
"internal/abi"
"internal/goarch"
"runtime/internal/atomic"
+ "runtime/internal/sys"
"unsafe"
)
@@ -19,9 +20,8 @@
// finblock is allocated from non-GC'd memory, so any heap pointers
// must be specially handled. GC currently assumes that the finalizer
// queue does not grow during marking (but it can shrink).
-//
-//go:notinheap
type finblock struct {
+ _ sys.NotInHeap
alllink *finblock
next *finblock
cnt uint32
@@ -29,13 +29,23 @@
fin [(_FinBlockSize - 2*goarch.PtrSize - 2*4) / unsafe.Sizeof(finalizer{})]finalizer
}
+var fingStatus atomic.Uint32
+
+// finalizer goroutine status.
+const (
+ fingUninitialized uint32 = iota
+ fingCreated uint32 = 1 << (iota - 1)
+ fingRunningFinalizer
+ fingWait
+ fingWake
+)
+
var finlock mutex // protects the following variables
var fing *g // goroutine that runs finalizers
var finq *finblock // list of finalizers that are to be executed
var finc *finblock // cache of free blocks
var finptrmask [_FinBlockSize / goarch.PtrSize / 8]byte
-var fingwait bool
-var fingwake bool
+
var allfin *finblock // list of all blocks
// NOTE: Layout known to queuefinalizer.
@@ -75,6 +85,12 @@
0<<0 | 1<<1 | 1<<2 | 1<<3 | 1<<4 | 0<<5 | 1<<6 | 1<<7,
}
+// lockRankMayQueueFinalizer records the lock ranking effects of a
+// function that may call queuefinalizer.
+func lockRankMayQueueFinalizer() {
+ lockWithRankMayAcquire(&finlock, getLockRank(&finlock))
+}
+
func queuefinalizer(p unsafe.Pointer, fn *funcval, nret uintptr, fint *_type, ot *ptrtype) {
if gcphase != _GCoff {
// Currently we assume that the finalizer queue won't
@@ -120,8 +136,8 @@
f.fint = fint
f.ot = ot
f.arg = p
- fingwake = true
unlock(&finlock)
+ fingStatus.Or(fingWake)
}
//go:nowritebarrier
@@ -135,30 +151,28 @@
}
func wakefing() *g {
- var res *g
- lock(&finlock)
- if fingwait && fingwake {
- fingwait = false
- fingwake = false
- res = fing
+ if ok := fingStatus.CompareAndSwap(fingCreated|fingWait|fingWake, fingCreated); ok {
+ return fing
}
- unlock(&finlock)
- return res
+ return nil
}
-var (
- fingCreate uint32
- fingRunning bool
-)
-
func createfing() {
// start the finalizer goroutine exactly once
- if fingCreate == 0 && atomic.Cas(&fingCreate, 0, 1) {
+ if fingStatus.Load() == fingUninitialized && fingStatus.CompareAndSwap(fingUninitialized, fingCreated) {
go runfinq()
}
}
-// This is the goroutine that runs all of the finalizers
+func finalizercommit(gp *g, lock unsafe.Pointer) bool {
+ unlock((*mutex)(lock))
+ // fingStatus should be modified after fing is put into a waiting state
+ // to avoid waking fing in running state, even if it is about to be parked.
+ fingStatus.Or(fingWait)
+ return true
+}
+
+// This is the goroutine that runs all of the finalizers.
func runfinq() {
var (
frame unsafe.Pointer
@@ -176,8 +190,7 @@
fb := finq
finq = nil
if fb == nil {
- fingwait = true
- goparkunlock(&finlock, waitReasonFinalizerWait, traceEvGoBlock, 1)
+ gopark(finalizercommit, unsafe.Pointer(&finlock), waitReasonFinalizerWait, traceEvGoBlock, 1)
continue
}
argRegs = intArgRegs
@@ -238,9 +251,9 @@
default:
throw("bad kind in runfinq")
}
- fingRunning = true
+ fingStatus.Or(fingRunningFinalizer)
reflectcall(nil, unsafe.Pointer(f.fn), frame, uint32(framesz), uint32(framesz), uint32(framesz), ®s)
- fingRunning = false
+ fingStatus.And(^fingRunningFinalizer)
// Drop finalizer queue heap references
// before hiding them from markroot.
@@ -299,12 +312,21 @@
// bufio.Writer, because the buffer would not be flushed at program exit.
//
// It is not guaranteed that a finalizer will run if the size of *obj is
-// zero bytes.
+// zero bytes, because it may share same address with other zero-size
+// objects in memory. See https://go.dev/ref/spec#Size_and_alignment_guarantees.
//
// It is not guaranteed that a finalizer will run for objects allocated
// in initializers for package-level variables. Such objects may be
// linker-allocated, not heap-allocated.
//
+// Note that because finalizers may execute arbitrarily far into the future
+// after an object is no longer referenced, the runtime is allowed to perform
+// a space-saving optimization that batches objects together in a single
+// allocation slot. The finalizer for an unreferenced object in such an
+// allocation may never run if it always exists in the same batch as a
+// referenced object. Typically, this batching only happens for tiny
+// (on the order of 16 bytes or less) and pointer-free objects.
+//
// A finalizer may run as soon as an object becomes unreachable.
// In order to use finalizers correctly, the program must ensure that
// the object is reachable until it is no longer required.
@@ -357,6 +379,11 @@
throw("nil elem type!")
}
+ if inUserArenaChunk(uintptr(e.data)) {
+ // Arena-allocated objects are not eligible for finalizers.
+ throw("runtime.SetFinalizer: first argument was allocated into an arena")
+ }
+
// find the containing object
base, _, _ := findObject(uintptr(e.data), 0, 0)
diff --git a/src/runtime/mfinal_test.go b/src/runtime/mfinal_test.go
index 902ccc5..61d625a 100644
--- a/src/runtime/mfinal_test.go
+++ b/src/runtime/mfinal_test.go
@@ -53,7 +53,7 @@
}},
}
- for i, tt := range finalizerTests {
+ for _, tt := range finalizerTests {
done := make(chan bool, 1)
go func() {
// allocate struct with pointer to avoid hitting tinyalloc.
@@ -71,11 +71,7 @@
}()
<-done
runtime.GC()
- select {
- case <-ch:
- case <-time.After(time.Second * 4):
- t.Errorf("#%d: finalizer for type %T didn't run", i, tt.finalizer)
- }
+ <-ch
}
}
@@ -109,11 +105,7 @@
}()
<-done
runtime.GC()
- select {
- case <-ch:
- case <-time.After(4 * time.Second):
- t.Errorf("finalizer for type *bigValue didn't run")
- }
+ <-ch
}
func fin(v *int) {
@@ -188,11 +180,7 @@
fin := make(chan bool, 1)
runtime.SetFinalizer(y, func(z *objtype) { fin <- true })
runtime.GC()
- select {
- case <-fin:
- case <-time.After(4 * time.Second):
- t.Errorf("finalizer of next object in memory didn't run")
- }
+ <-fin
xsglobal = xs // keep empty slice alive until here
}
@@ -220,11 +208,7 @@
// set finalizer on string contents of y
runtime.SetFinalizer(y, func(z *objtype) { fin <- true })
runtime.GC()
- select {
- case <-fin:
- case <-time.After(4 * time.Second):
- t.Errorf("finalizer of next string in memory didn't run")
- }
+ <-fin
ssglobal = ss // keep 0-length string live until here
}
diff --git a/src/runtime/mfixalloc.go b/src/runtime/mfixalloc.go
index b701a09..8788d95 100644
--- a/src/runtime/mfixalloc.go
+++ b/src/runtime/mfixalloc.go
@@ -8,7 +8,10 @@
package runtime
-import "unsafe"
+import (
+ "runtime/internal/sys"
+ "unsafe"
+)
// FixAlloc is a simple free-list allocator for fixed size objects.
// Malloc uses a FixAlloc wrapped around sysAlloc to manage its
@@ -23,7 +26,8 @@
// Callers can keep state in the object but the first word is
// smashed by freeing and reallocating.
//
-// Consider marking fixalloc'd types go:notinheap.
+// Consider marking fixalloc'd types not in heap by embedding
+// runtime/internal/sys.NotInHeap.
type fixalloc struct {
size uintptr
first func(arg, p unsafe.Pointer) // called first time p is returned
@@ -42,9 +46,8 @@
// this cannot be used by some of the internal GC structures. For example when
// the sweeper is placing an unmarked object on the free list it does not want the
// write barrier to be called since that could result in the object being reachable.
-//
-//go:notinheap
type mlink struct {
+ _ sys.NotInHeap
next *mlink
}
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 63e0463..1b05707 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -366,10 +366,6 @@
// explicit user call.
userForced bool
- // totaltime is the CPU nanoseconds spent in GC since the
- // program started if debug.gctrace > 0.
- totaltime int64
-
// initialHeapLive is the value of gcController.heapLive at the
// beginning of this GC cycle.
initialHeapLive uint64
@@ -393,7 +389,7 @@
// cycle is sweep termination, mark, mark termination, and
// sweep. This differs from memstats.numgc, which is
// incremented at mark termination.
- cycles uint32
+ cycles atomic.Uint32
// Timing/utilization stats for this cycle.
stwprocs, maxprocs int32
@@ -404,6 +400,9 @@
// debug.gctrace heap sizes for this cycle.
heap0, heap1, heap2 uint64
+
+ // Cumulative estimated CPU usage.
+ cpuStats
}
// GC runs a garbage collection and blocks the caller until the
@@ -436,7 +435,7 @@
// Wait until the current sweep termination, mark, and mark
// termination complete.
- n := atomic.Load(&work.cycles)
+ n := work.cycles.Load()
gcWaitOnMark(n)
// We're now in sweep N or later. Trigger GC cycle N+1, which
@@ -451,7 +450,7 @@
// complete the cycle and because runtime.GC() is often used
// as part of tests and benchmarks to get the system into a
// relatively stable and isolated state.
- for atomic.Load(&work.cycles) == n+1 && sweepone() != ^uintptr(0) {
+ for work.cycles.Load() == n+1 && sweepone() != ^uintptr(0) {
sweep.nbgsweep++
Gosched()
}
@@ -467,7 +466,7 @@
// First, wait for sweeping to finish. (We know there are no
// more spans on the sweep queue, but we may be concurrently
// sweeping spans, so we have to wait.)
- for atomic.Load(&work.cycles) == n+1 && !isSweepDone() {
+ for work.cycles.Load() == n+1 && !isSweepDone() {
Gosched()
}
@@ -475,7 +474,7 @@
// stable heap profile. Only do this if we haven't already hit
// another mark termination.
mp := acquirem()
- cycle := atomic.Load(&work.cycles)
+ cycle := work.cycles.Load()
if cycle == n+1 || (gcphase == _GCmark && cycle == n+2) {
mProf_PostSweep()
}
@@ -488,7 +487,7 @@
for {
// Disable phase transitions.
lock(&work.sweepWaiters.lock)
- nMarks := atomic.Load(&work.cycles)
+ nMarks := work.cycles.Load()
if gcphase != _GCmark {
// We've already completed this cycle's mark.
nMarks++
@@ -546,7 +545,7 @@
// that the exit condition for the _GCoff phase has been met. The exit
// condition should be tested when allocating.
func (t gcTrigger) test() bool {
- if !memstats.enablegc || panicking != 0 || gcphase != _GCoff {
+ if !memstats.enablegc || panicking.Load() != 0 || gcphase != _GCoff {
return false
}
switch t.kind {
@@ -556,7 +555,7 @@
// atomically wrote gcController.heapLive anyway and we'll see our
// own write.
trigger, _ := gcController.trigger()
- return atomic.Load64(&gcController.heapLive) >= trigger
+ return gcController.heapLive.Load() >= trigger
case gcTriggerTime:
if gcController.gcPercent.Load() < 0 {
return false
@@ -565,7 +564,7 @@
return lastgc != 0 && t.now-lastgc > forcegcperiod
case gcTriggerCycle:
// t.n > work.cycles, but accounting for wraparound.
- return int32(t.n-work.cycles) > 0
+ return int32(t.n-work.cycles.Load()) > 0
}
return true
}
@@ -612,9 +611,6 @@
return
}
- // For stats, check if this GC was forced by the user.
- work.userForced = trigger.kind == gcTriggerCycle
-
// In gcstoptheworld debug mode, upgrade the mode accordingly.
// We do this after re-checking the transition condition so
// that multiple goroutines that detect the heap trigger don't
@@ -630,13 +626,17 @@
semacquire(&gcsema)
semacquire(&worldsema)
+ // For stats, check if this GC was forced by the user.
+ // Update it under gcsema to avoid gctrace getting wrong values.
+ work.userForced = trigger.kind == gcTriggerCycle
+
if trace.enabled {
traceGCStart()
}
// Check that all Ps have finished deferred mcache flushes.
for _, p := range allp {
- if fg := atomic.Load(&p.mcache.flushGen); fg != mheap_.sweepgen {
+ if fg := p.mcache.flushGen.Load(); fg != mheap_.sweepgen {
println("runtime: p", p.id, "flushGen", fg, "!= sweepgen", mheap_.sweepgen)
throw("p mcache not flushed")
}
@@ -652,7 +652,7 @@
// so it can't be more than ncpu, even if GOMAXPROCS is.
work.stwprocs = ncpu
}
- work.heap0 = atomic.Load64(&gcController.heapLive)
+ work.heap0 = gcController.heapLive.Load()
work.pauseNS = 0
work.mode = mode
@@ -672,7 +672,7 @@
// reclaimed until the next GC cycle.
clearpools()
- work.cycles++
+ work.cycles.Add(1)
// Assists and workers can start the moment we start
// the world.
@@ -810,22 +810,22 @@
// Otherwise, our attempt to force all P's to a safepoint could
// result in a deadlock as we attempt to preempt a worker that's
// trying to preempt us (e.g. for a stack scan).
- casgstatus(gp, _Grunning, _Gwaiting)
- forEachP(func(_p_ *p) {
+ casGToWaiting(gp, _Grunning, waitReasonGCMarkTermination)
+ forEachP(func(pp *p) {
// Flush the write barrier buffer, since this may add
// work to the gcWork.
- wbBufFlush1(_p_)
+ wbBufFlush1(pp)
// Flush the gcWork, since this may create global work
// and set the flushedWork flag.
//
// TODO(austin): Break up these workbufs to
// better distribute work.
- _p_.gcw.dispose()
+ pp.gcw.dispose()
// Collect the flushedWork flag.
- if _p_.gcw.flushedWork {
+ if pp.gcw.flushedWork {
atomic.Xadd(&gcMarkDoneFlushed, 1)
- _p_.gcw.flushedWork = false
+ pp.gcw.flushedWork = false
}
})
casgstatus(gp, _Gwaiting, _Grunning)
@@ -879,7 +879,7 @@
if restart {
getg().m.preemptoff = ""
systemstack(func() {
- now := startTheWorldWithSema(true)
+ now := startTheWorldWithSema(trace.enabled)
work.pauseNS += now - work.pauseStart
memstats.gcPauseDist.record(now - work.pauseStart)
})
@@ -924,16 +924,14 @@
// Start marktermination (write barrier remains enabled for now).
setGCPhase(_GCmarktermination)
- work.heap1 = gcController.heapLive
+ work.heap1 = gcController.heapLive.Load()
startTime := nanotime()
mp := acquirem()
mp.preemptoff = "gcing"
- _g_ := getg()
- _g_.m.traceback = 2
- gp := _g_.m.curg
- casgstatus(gp, _Grunning, _Gwaiting)
- gp.waitreason = waitReasonGarbageCollection
+ mp.traceback = 2
+ curgp := mp.curg
+ casGToWaiting(curgp, _Grunning, waitReasonGarbageCollection)
// Run gc on the g0 stack. We do this so that the g stack
// we're currently running on will no longer change. Cuts
@@ -972,8 +970,8 @@
gcSweep(work.mode)
})
- _g_.m.traceback = 0
- casgstatus(gp, _Gwaiting, _Grunning)
+ mp.traceback = 0
+ casgstatus(curgp, _Gwaiting, _Grunning)
if trace.enabled {
traceGCDone()
@@ -1006,24 +1004,57 @@
memstats.pause_end[memstats.numgc%uint32(len(memstats.pause_end))] = uint64(unixNow)
memstats.pause_total_ns += uint64(work.pauseNS)
- // Update work.totaltime.
sweepTermCpu := int64(work.stwprocs) * (work.tMark - work.tSweepTerm)
// We report idle marking time below, but omit it from the
// overall utilization here since it's "free".
- markCpu := gcController.assistTime.Load() + gcController.dedicatedMarkTime + gcController.fractionalMarkTime
+ markAssistCpu := gcController.assistTime.Load()
+ markDedicatedCpu := gcController.dedicatedMarkTime.Load()
+ markFractionalCpu := gcController.fractionalMarkTime.Load()
+ markIdleCpu := gcController.idleMarkTime.Load()
markTermCpu := int64(work.stwprocs) * (work.tEnd - work.tMarkTerm)
- cycleCpu := sweepTermCpu + markCpu + markTermCpu
- work.totaltime += cycleCpu
+ scavAssistCpu := scavenge.assistTime.Load()
+ scavBgCpu := scavenge.backgroundTime.Load()
+
+ // Update cumulative GC CPU stats.
+ work.cpuStats.gcAssistTime += markAssistCpu
+ work.cpuStats.gcDedicatedTime += markDedicatedCpu + markFractionalCpu
+ work.cpuStats.gcIdleTime += markIdleCpu
+ work.cpuStats.gcPauseTime += sweepTermCpu + markTermCpu
+ work.cpuStats.gcTotalTime += sweepTermCpu + markAssistCpu + markDedicatedCpu + markFractionalCpu + markIdleCpu + markTermCpu
+
+ // Update cumulative scavenge CPU stats.
+ work.cpuStats.scavengeAssistTime += scavAssistCpu
+ work.cpuStats.scavengeBgTime += scavBgCpu
+ work.cpuStats.scavengeTotalTime += scavAssistCpu + scavBgCpu
+
+ // Update total CPU.
+ work.cpuStats.totalTime = sched.totaltime + (now-sched.procresizetime)*int64(gomaxprocs)
+ work.cpuStats.idleTime += sched.idleTime.Load()
+
+ // Compute userTime. We compute this indirectly as everything that's not the above.
+ //
+ // Since time spent in _Pgcstop is covered by gcPauseTime, and time spent in _Pidle
+ // is covered by idleTime, what we're left with is time spent in _Prunning and _Psyscall,
+ // the latter of which is fine because the P will either go idle or get used for something
+ // else via sysmon. Meanwhile if we subtract GC time from whatever's left, we get non-GC
+ // _Prunning time. Note that this still leaves time spent in sweeping and in the scheduler,
+ // but that's fine. The overwhelming majority of this time will be actual user time.
+ work.cpuStats.userTime = work.cpuStats.totalTime - (work.cpuStats.gcTotalTime +
+ work.cpuStats.scavengeTotalTime + work.cpuStats.idleTime)
// Compute overall GC CPU utilization.
- totalCpu := sched.totaltime + (now-sched.procresizetime)*int64(gomaxprocs)
- memstats.gc_cpu_fraction = float64(work.totaltime) / float64(totalCpu)
+ // Omit idle marking time from the overall utilization here since it's "free".
+ memstats.gc_cpu_fraction = float64(work.cpuStats.gcTotalTime-work.cpuStats.gcIdleTime) / float64(work.cpuStats.totalTime)
- // Reset assist time stat.
+ // Reset assist time and background time stats.
//
// Do this now, instead of at the start of the next GC cycle, because
// these two may keep accumulating even if the GC is not active.
- mheap_.pages.scav.assistTime.Store(0)
+ scavenge.assistTime.Store(0)
+ scavenge.backgroundTime.Store(0)
+
+ // Reset idle time stat.
+ sched.idleTime.Store(0)
// Reset sweep state.
sweep.nbgsweep = 0
@@ -1056,7 +1087,7 @@
throw("failed to set sweep barrier")
}
- systemstack(func() { startTheWorldWithSema(true) })
+ systemstack(func() { startTheWorldWithSema(trace.enabled) })
// Flush the heap profile so we can start a new cycle next GC.
// This is relatively expensive, so we don't do it with the
@@ -1075,8 +1106,8 @@
// is necessary to sweep all spans, we need to ensure all
// mcaches are flushed before we start the next GC cycle.
systemstack(func() {
- forEachP(func(_p_ *p) {
- _p_.mcache.prepareForSweep()
+ forEachP(func(pp *p) {
+ pp.mcache.prepareForSweep()
})
})
// Now that we've swept stale spans in mcaches, they don't
@@ -1106,8 +1137,8 @@
for i, ns := range []int64{
sweepTermCpu,
gcController.assistTime.Load(),
- gcController.dedicatedMarkTime + gcController.fractionalMarkTime,
- gcController.idleMarkTime,
+ gcController.dedicatedMarkTime.Load() + gcController.fractionalMarkTime.Load(),
+ gcController.idleMarkTime.Load(),
markTermCpu,
} {
if i == 2 || i == 3 {
@@ -1121,8 +1152,8 @@
print(" ms cpu, ",
work.heap0>>20, "->", work.heap1>>20, "->", work.heap2>>20, " MB, ",
gcController.lastHeapGoal>>20, " MB goal, ",
- atomic.Load64(&gcController.maxStackScan)>>20, " MB stacks, ",
- gcController.globalsScan>>20, " MB globals, ",
+ gcController.lastStackScan.Load()>>20, " MB stacks, ",
+ gcController.globalsScan.Load()>>20, " MB globals, ",
work.maxprocs, " P")
if work.userForced {
print(" (forced)")
@@ -1131,6 +1162,15 @@
printunlock()
}
+ // Set any arena chunks that were deferred to fault.
+ lock(&userArenaState.lock)
+ faultList := userArenaState.fault
+ userArenaState.fault = nil
+ unlock(&userArenaState.lock)
+ for _, lc := range faultList {
+ lc.mspan.setUserArenaChunkToFault()
+ }
+
semrelease(&worldsema)
semrelease(&gcsema)
// Careful: another GC cycle may start now.
@@ -1183,7 +1223,7 @@
work.nwait = ^uint32(0)
}
-// gcBgMarkWorker is an entry in the gcBgMarkWorkerPool. It points to a single
+// gcBgMarkWorkerNode is an entry in the gcBgMarkWorkerPool. It points to a single
// gcBgMarkWorker goroutine.
type gcBgMarkWorkerNode struct {
// Unused workers are managed in a lock-free stack. This field must be first.
@@ -1300,7 +1340,7 @@
// the G stack. However, stack shrinking is
// disabled for mark workers, so it is safe to
// read from the G stack.
- casgstatus(gp, _Grunning, _Gwaiting)
+ casGToWaiting(gp, _Grunning, waitReasonGCWorkerActive)
switch pp.gcMarkWorkerMode {
default:
throw("gcBgMarkWorker: unexpected gcMarkWorkerMode")
@@ -1566,7 +1606,7 @@
}
work.bytesMarked = 0
- work.initialHeapLive = atomic.Load64(&gcController.heapLive)
+ work.initialHeapLive = gcController.heapLive.Load()
}
// Hooks for other packages
diff --git a/src/runtime/mgclimit.go b/src/runtime/mgclimit.go
index d94e471..bcbe7f8 100644
--- a/src/runtime/mgclimit.go
+++ b/src/runtime/mgclimit.go
@@ -55,8 +55,6 @@
// the mark and sweep phases.
transitioning bool
- _ uint32 // Align assistTimePool and lastUpdate on 32-bit platforms.
-
// assistTimePool is the accumulated assist time since the last update.
assistTimePool atomic.Int64
@@ -339,7 +337,7 @@
l.unlock()
}
-// limiterEventType indicates the type of an event occuring on some P.
+// limiterEventType indicates the type of an event occurring on some P.
//
// These events represent the full set of events that the GC CPU limiter tracks
// to execute its function.
@@ -471,9 +469,10 @@
// Account for the event.
switch typ {
case limiterEventIdleMarkWork:
- fallthrough
+ gcCPULimiter.addIdleTime(duration)
case limiterEventIdle:
gcCPULimiter.addIdleTime(duration)
+ sched.idleTime.Add(duration)
case limiterEventMarkAssist:
fallthrough
case limiterEventScavengeAssist:
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index 7463707..cfda706 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -218,8 +218,7 @@
userG := getg().m.curg
selfScan := gp == userG && readgstatus(userG) == _Grunning
if selfScan {
- casgstatus(userG, _Grunning, _Gwaiting)
- userG.waitreason = waitReasonGarbageCollectionScan
+ casGToWaiting(userG, _Grunning, waitReasonGarbageCollectionScan)
}
// TODO: suspendG blocks (and spins) until gp
@@ -387,7 +386,9 @@
// Mark everything that can be reached from
// the object (but *not* the object itself or
// we'll never collect it).
- scanobject(p, gcw)
+ if !s.spanclass.noscan() {
+ scanobject(p, gcw)
+ }
// The special itself is a root.
scanblock(uintptr(unsafe.Pointer(&spf.fn)), goarch.PtrSize, &oneptrmask[0], gcw, nil)
@@ -440,7 +441,7 @@
// will just cause steals to fail until credit is accumulated
// again, so in the long run it doesn't really matter, but we
// do have to handle the negative credit case.
- bgScanCredit := atomic.Loadint64(&gcController.bgScanCredit)
+ bgScanCredit := gcController.bgScanCredit.Load()
stolen := int64(0)
if bgScanCredit > 0 {
if bgScanCredit < scanWork {
@@ -450,7 +451,7 @@
stolen = scanWork
gp.gcAssistBytes += debtBytes
}
- atomic.Xaddint64(&gcController.bgScanCredit, -stolen)
+ gcController.bgScanCredit.Add(-stolen)
scanWork -= stolen
@@ -558,8 +559,7 @@
}
// gcDrainN requires the caller to be preemptible.
- casgstatus(gp, _Grunning, _Gwaiting)
- gp.waitreason = waitReasonGCAssistMarking
+ casGToWaiting(gp, _Grunning, waitReasonGCAssistMarking)
// drain own cached work first in the hopes that it
// will be more cache friendly.
@@ -595,15 +595,15 @@
}
now := nanotime()
duration := now - startTime
- _p_ := gp.m.p.ptr()
- _p_.gcAssistTime += duration
+ pp := gp.m.p.ptr()
+ pp.gcAssistTime += duration
if trackLimiterEvent {
- _p_.limiterEvent.stop(limiterEventMarkAssist, now)
+ pp.limiterEvent.stop(limiterEventMarkAssist, now)
}
- if _p_.gcAssistTime > gcAssistTimeSlack {
- gcController.assistTime.Add(_p_.gcAssistTime)
+ if pp.gcAssistTime > gcAssistTimeSlack {
+ gcController.assistTime.Add(pp.gcAssistTime)
gcCPULimiter.update(now)
- _p_.gcAssistTime = 0
+ pp.gcAssistTime = 0
}
}
@@ -639,7 +639,7 @@
// the queue, but can still back out. This avoids a
// race in case background marking has flushed more
// credit since we checked above.
- if atomic.Loadint64(&gcController.bgScanCredit) > 0 {
+ if gcController.bgScanCredit.Load() > 0 {
work.assistQueue.q = oldList
if oldList.tail != 0 {
oldList.tail.ptr().schedlink.set(nil)
@@ -668,7 +668,7 @@
// small window here where an assist may add itself to
// the blocked queue and park. If that happens, we'll
// just get it on the next flush.
- atomic.Xaddint64(&gcController.bgScanCredit, scanWork)
+ gcController.bgScanCredit.Add(scanWork)
return
}
@@ -708,7 +708,7 @@
// Convert from scan bytes back to work.
assistWorkPerByte := gcController.assistWorkPerByte.Load()
scanWork = int64(float64(scanBytes) * assistWorkPerByte)
- atomic.Xaddint64(&gcController.bgScanCredit, scanWork)
+ gcController.bgScanCredit.Add(scanWork)
}
unlock(&work.assistQueue.lock)
}
@@ -943,10 +943,10 @@
}
// Scan arguments to this frame.
- if frame.arglen != 0 {
+ if n := frame.argBytes(); n != 0 {
// TODO: We could pass the entry argument map
// to narrow this down further.
- scanConservative(frame.argp, frame.arglen, nil, gcw, state)
+ scanConservative(frame.argp, n, nil, gcw, state)
}
if isAsyncPreempt || isDebugCall {
@@ -964,7 +964,7 @@
return
}
- locals, args, objs := getStackMap(frame, &state.cache, false)
+ locals, args, objs := frame.getStackMap(&state.cache, false)
// Scan local variables if stack frame has been allocated.
if locals.n > 0 {
@@ -1061,7 +1061,7 @@
// Drain root marking jobs.
if work.markrootNext < work.markrootJobs {
// Stop if we're preemptible or if someone wants to STW.
- for !(gp.preempt && (preemptible || atomic.Load(&sched.gcwaiting) != 0)) {
+ for !(gp.preempt && (preemptible || sched.gcwaiting.Load())) {
job := atomic.Xadd(&work.markrootNext, +1) - 1
if job >= work.markrootJobs {
break
@@ -1075,7 +1075,7 @@
// Drain heap marking jobs.
// Stop if we're preemptible or if someone wants to STW.
- for !(gp.preempt && (preemptible || atomic.Load(&sched.gcwaiting) != 0)) {
+ for !(gp.preempt && (preemptible || sched.gcwaiting.Load())) {
// Try to keep work available on the global queue. We used to
// check if there were waiting workers, but it's better to
// just keep work available than to make workers wait. In the
@@ -1265,28 +1265,21 @@
// b is either the beginning of an object, in which case this
// is the size of the object to scan, or it points to an
// oblet, in which case we compute the size to scan below.
- hbits := heapBitsForAddr(b)
s := spanOfUnchecked(b)
n := s.elemsize
if n == 0 {
throw("scanobject n == 0")
}
+ if s.spanclass.noscan() {
+ // Correctness-wise this is ok, but it's inefficient
+ // if noscan objects reach here.
+ throw("scanobject of a noscan object")
+ }
if n > maxObletBytes {
// Large object. Break into oblets for better
// parallelism and lower latency.
if b == s.base() {
- // It's possible this is a noscan object (not
- // from greyobject, but from other code
- // paths), in which case we must *not* enqueue
- // oblets since their bitmaps will be
- // uninitialized.
- if s.spanclass.noscan() {
- // Bypass the whole scan.
- gcw.bytesMarked += uint64(n)
- return
- }
-
// Enqueue the other oblets to scan later.
// Some oblets may be in b's scalar tail, but
// these will be marked as "no more pointers",
@@ -1308,20 +1301,24 @@
}
}
- var i uintptr
- for i = 0; i < n; i, hbits = i+goarch.PtrSize, hbits.next() {
- // Load bits once. See CL 22712 and issue 16973 for discussion.
- bits := hbits.bits()
- if bits&bitScan == 0 {
- break // no more pointers in this object
+ hbits := heapBitsForAddr(b, n)
+ var scanSize uintptr
+ for {
+ var addr uintptr
+ if hbits, addr = hbits.nextFast(); addr == 0 {
+ if hbits, addr = hbits.next(); addr == 0 {
+ break
+ }
}
- if bits&bitPointer == 0 {
- continue // not a pointer
- }
+
+ // Keep track of farthest pointer we found, so we can
+ // update heapScanWork. TODO: is there a better metric,
+ // now that we can skip scalar portions pretty efficiently?
+ scanSize = addr - b + goarch.PtrSize
// Work here is duplicated in scanblock and above.
// If you make changes here, make changes there too.
- obj := *(*uintptr)(unsafe.Pointer(b + i))
+ obj := *(*uintptr)(unsafe.Pointer(addr))
// At this point we have extracted the next potential pointer.
// Quickly filter out nil and pointers back to the current object.
@@ -1335,13 +1332,13 @@
// heap. In this case, we know the object was
// just allocated and hence will be marked by
// allocation itself.
- if obj, span, objIndex := findObject(obj, b, i); obj != 0 {
- greyobject(obj, b, i, span, gcw, objIndex)
+ if obj, span, objIndex := findObject(obj, b, addr-b); obj != 0 {
+ greyobject(obj, b, addr-b, span, gcw, objIndex)
}
}
}
gcw.bytesMarked += uint64(n)
- gcw.heapScanWork += int64(i)
+ gcw.heapScanWork += int64(scanSize)
}
// scanConservative scans block [b, b+n) conservatively, treating any
@@ -1564,7 +1561,7 @@
//
//go:nowritebarrier
//go:nosplit
-func gcmarknewobject(span *mspan, obj, size, scanSize uintptr) {
+func gcmarknewobject(span *mspan, obj, size uintptr) {
if useCheckmark { // The world should be stopped so this should not happen.
throw("gcmarknewobject called while doing checkmark")
}
diff --git a/src/runtime/mgcpacer.go b/src/runtime/mgcpacer.go
index 2d9fd27..9d9840e 100644
--- a/src/runtime/mgcpacer.go
+++ b/src/runtime/mgcpacer.go
@@ -8,7 +8,7 @@
"internal/cpu"
"internal/goexperiment"
"runtime/internal/atomic"
- "unsafe"
+ _ "unsafe" // for go:linkname
)
// go119MemoryLimitSupport is a feature flag for a number of changes
@@ -74,13 +74,6 @@
memoryLimitHeapGoalHeadroom = 1 << 20
)
-func init() {
- if offset := unsafe.Offsetof(gcController.heapLive); offset%8 != 0 {
- println(offset)
- throw("gcController.heapLive not aligned to 8 bytes")
- }
-}
-
// gcController implements the GC pacing controller that determines
// when to trigger concurrent garbage collection and how much marking
// work to do in mutator assists and background marking.
@@ -99,8 +92,6 @@
// Initialized from GOGC. GOGC=off means no GC.
gcPercent atomic.Int32
- _ uint32 // padding so following 64-bit values are 8-byte aligned
-
// memoryLimit is the soft memory limit in bytes.
//
// Initialized from GOMEMLIMIT. GOMEMLIMIT=off is equivalent to MaxInt64
@@ -145,14 +136,10 @@
// Updated at the end of each GC cycle, in endCycle.
consMark float64
- // consMarkController holds the state for the mark-cons ratio
- // estimation over time.
- //
- // Its purpose is to smooth out noisiness in the computation of
- // consMark; see consMark for details.
- consMarkController piController
-
- _ uint32 // Padding for atomics on 32-bit platforms.
+ // lastConsMark is the computed cons/mark value for the previous GC
+ // cycle. Note that this is *not* the last value of cons/mark, but the
+ // actual computed value. See endCycle for details.
+ lastConsMark float64
// gcPercentHeapGoal is the goal heapLive for when next GC ends derived
// from gcPercent.
@@ -193,32 +180,27 @@
// hence goes up as we allocate and down as we sweep) while heapLive
// excludes these objects (and hence only goes up between GCs).
//
- // This is updated atomically without locking. To reduce
- // contention, this is updated only when obtaining a span from
- // an mcentral and at this point it counts all of the
- // unallocated slots in that span (which will be allocated
- // before that mcache obtains another span from that
- // mcentral). Hence, it slightly overestimates the "true" live
- // heap size. It's better to overestimate than to
- // underestimate because 1) this triggers the GC earlier than
- // necessary rather than potentially too late and 2) this
- // leads to a conservative GC rate rather than a GC rate that
- // is potentially too low.
- //
- // Reads should likewise be atomic (or during STW).
+ // To reduce contention, this is updated only when obtaining a span
+ // from an mcentral and at this point it counts all of the unallocated
+ // slots in that span (which will be allocated before that mcache
+ // obtains another span from that mcentral). Hence, it slightly
+ // overestimates the "true" live heap size. It's better to overestimate
+ // than to underestimate because 1) this triggers the GC earlier than
+ // necessary rather than potentially too late and 2) this leads to a
+ // conservative GC rate rather than a GC rate that is potentially too
+ // low.
//
// Whenever this is updated, call traceHeapAlloc() and
// this gcControllerState's revise() method.
- heapLive uint64
+ heapLive atomic.Uint64
- // heapScan is the number of bytes of "scannable" heap. This
- // is the live heap (as counted by heapLive), but omitting
- // no-scan objects and no-scan tails of objects.
+ // heapScan is the number of bytes of "scannable" heap. This is the
+ // live heap (as counted by heapLive), but omitting no-scan objects and
+ // no-scan tails of objects.
//
- // This value is fixed at the start of a GC cycle, so during a
- // GC cycle it is safe to read without atomics, and it represents
- // the maximum scannable heap.
- heapScan uint64
+ // This value is fixed at the start of a GC cycle. It represents the
+ // maximum scannable heap.
+ heapScan atomic.Uint64
// lastHeapScan is the number of bytes of heap that were scanned
// last GC cycle. It is the same as heapMarked, but only
@@ -229,7 +211,7 @@
// lastStackScan is the number of bytes of stack that were scanned
// last GC cycle.
- lastStackScan uint64
+ lastStackScan atomic.Uint64
// maxStackScan is the amount of allocated goroutine stack space in
// use by goroutines.
@@ -239,15 +221,11 @@
// goroutine stack space is much harder to measure cheaply. By using
// allocated space, we make an overestimate; this is OK, it's better
// to conservatively overcount than undercount.
- //
- // Read and updated atomically.
- maxStackScan uint64
+ maxStackScan atomic.Uint64
// globalsScan is the total amount of global variable space
// that is scannable.
- //
- // Read and updated atomically.
- globalsScan uint64
+ globalsScan atomic.Uint64
// heapMarked is the number of bytes marked by the previous
// GC. After mark termination, heapLive == heapMarked, but
@@ -273,12 +251,11 @@
stackScanWork atomic.Int64
globalsScanWork atomic.Int64
- // bgScanCredit is the scan work credit accumulated by the
- // concurrent background scan. This credit is accumulated by
- // the background scan and stolen by mutator assists. This is
- // updated atomically. Updates occur in bounded batches, since
- // it is both written and read throughout the cycle.
- bgScanCredit int64
+ // bgScanCredit is the scan work credit accumulated by the concurrent
+ // background scan. This credit is accumulated by the background scan
+ // and stolen by mutator assists. Updates occur in bounded batches,
+ // since it is both written and read throughout the cycle.
+ bgScanCredit atomic.Int64
// assistTime is the nanoseconds spent in mutator assists
// during this cycle. This is updated atomically, and must also
@@ -287,31 +264,29 @@
// written and read throughout the cycle.
assistTime atomic.Int64
- // dedicatedMarkTime is the nanoseconds spent in dedicated
- // mark workers during this cycle. This is updated atomically
- // at the end of the concurrent mark phase.
- dedicatedMarkTime int64
+ // dedicatedMarkTime is the nanoseconds spent in dedicated mark workers
+ // during this cycle. This is updated at the end of the concurrent mark
+ // phase.
+ dedicatedMarkTime atomic.Int64
- // fractionalMarkTime is the nanoseconds spent in the
- // fractional mark worker during this cycle. This is updated
- // atomically throughout the cycle and will be up-to-date if
- // the fractional mark worker is not currently running.
- fractionalMarkTime int64
+ // fractionalMarkTime is the nanoseconds spent in the fractional mark
+ // worker during this cycle. This is updated throughout the cycle and
+ // will be up-to-date if the fractional mark worker is not currently
+ // running.
+ fractionalMarkTime atomic.Int64
- // idleMarkTime is the nanoseconds spent in idle marking
- // during this cycle. This is updated atomically throughout
- // the cycle.
- idleMarkTime int64
+ // idleMarkTime is the nanoseconds spent in idle marking during this
+ // cycle. This is updated throughout the cycle.
+ idleMarkTime atomic.Int64
// markStartTime is the absolute start time in nanoseconds
// that assists and background mark workers started.
markStartTime int64
- // dedicatedMarkWorkersNeeded is the number of dedicated mark
- // workers that need to be started. This is computed at the
- // beginning of each cycle and decremented atomically as
- // dedicated mark workers get started.
- dedicatedMarkWorkersNeeded int64
+ // dedicatedMarkWorkersNeeded is the number of dedicated mark workers
+ // that need to be started. This is computed at the beginning of each
+ // cycle and decremented as dedicated mark workers get started.
+ dedicatedMarkWorkersNeeded atomic.Int64
// idleMarkWorkers is two packed int32 values in a single uint64.
// These two values are always updated simultaneously.
@@ -395,28 +370,6 @@
func (c *gcControllerState) init(gcPercent int32, memoryLimit int64) {
c.heapMinimum = defaultHeapMinimum
c.triggered = ^uint64(0)
-
- c.consMarkController = piController{
- // Tuned first via the Ziegler-Nichols process in simulation,
- // then the integral time was manually tuned against real-world
- // applications to deal with noisiness in the measured cons/mark
- // ratio.
- kp: 0.9,
- ti: 4.0,
-
- // Set a high reset time in GC cycles.
- // This is inversely proportional to the rate at which we
- // accumulate error from clipping. By making this very high
- // we make the accumulation slow. In general, clipping is
- // OK in our situation, hence the choice.
- //
- // Tune this if we get unintended effects from clipping for
- // a long time.
- tt: 1000,
- min: -1000,
- max: 1000,
- }
-
c.setGCPercent(gcPercent)
c.setMemoryLimit(memoryLimit)
c.commit(true) // No sweep phase in the first GC cycle.
@@ -433,32 +386,13 @@
c.heapScanWork.Store(0)
c.stackScanWork.Store(0)
c.globalsScanWork.Store(0)
- c.bgScanCredit = 0
+ c.bgScanCredit.Store(0)
c.assistTime.Store(0)
- c.dedicatedMarkTime = 0
- c.fractionalMarkTime = 0
- c.idleMarkTime = 0
+ c.dedicatedMarkTime.Store(0)
+ c.fractionalMarkTime.Store(0)
+ c.idleMarkTime.Store(0)
c.markStartTime = markStartTime
-
- // TODO(mknyszek): This is supposed to be the actual trigger point for the heap, but
- // causes regressions in memory use. The cause is that the PI controller used to smooth
- // the cons/mark ratio measurements tends to flail when using the less accurate precomputed
- // trigger for the cons/mark calculation, and this results in the controller being more
- // conservative about steady-states it tries to find in the future.
- //
- // This conservatism is transient, but these transient states tend to matter for short-lived
- // programs, especially because the PI controller is overdamped, partially because it is
- // configured with a relatively large time constant.
- //
- // Ultimately, I think this is just two mistakes piled on one another: the choice of a swingy
- // smoothing function that recalls a fairly long history (due to its overdamped time constant)
- // coupled with an inaccurate cons/mark calculation. It just so happens this works better
- // today, and it makes it harder to change things in the future.
- //
- // This is described in #53738. Fix this for #53892 by changing back to the actual trigger
- // point and simplifying the smoothing function.
- heapTrigger, heapGoal := c.trigger()
- c.triggered = heapTrigger
+ c.triggered = c.heapLive.Load()
// Compute the background mark utilization goal. In general,
// this may not come out exactly. We round the number of
@@ -466,26 +400,26 @@
// 25%. For small GOMAXPROCS, this would introduce too much
// error, so we add fractional workers in that case.
totalUtilizationGoal := float64(procs) * gcBackgroundUtilization
- c.dedicatedMarkWorkersNeeded = int64(totalUtilizationGoal + 0.5)
- utilError := float64(c.dedicatedMarkWorkersNeeded)/totalUtilizationGoal - 1
+ dedicatedMarkWorkersNeeded := int64(totalUtilizationGoal + 0.5)
+ utilError := float64(dedicatedMarkWorkersNeeded)/totalUtilizationGoal - 1
const maxUtilError = 0.3
if utilError < -maxUtilError || utilError > maxUtilError {
// Rounding put us more than 30% off our goal. With
// gcBackgroundUtilization of 25%, this happens for
// GOMAXPROCS<=3 or GOMAXPROCS=6. Enable fractional
// workers to compensate.
- if float64(c.dedicatedMarkWorkersNeeded) > totalUtilizationGoal {
+ if float64(dedicatedMarkWorkersNeeded) > totalUtilizationGoal {
// Too many dedicated workers.
- c.dedicatedMarkWorkersNeeded--
+ dedicatedMarkWorkersNeeded--
}
- c.fractionalUtilizationGoal = (totalUtilizationGoal - float64(c.dedicatedMarkWorkersNeeded)) / float64(procs)
+ c.fractionalUtilizationGoal = (totalUtilizationGoal - float64(dedicatedMarkWorkersNeeded)) / float64(procs)
} else {
c.fractionalUtilizationGoal = 0
}
// In STW mode, we just want dedicated workers.
if debug.gcstoptheworld > 0 {
- c.dedicatedMarkWorkersNeeded = int64(procs)
+ dedicatedMarkWorkersNeeded = int64(procs)
c.fractionalUtilizationGoal = 0
}
@@ -500,7 +434,7 @@
// required. However, we need at least one dedicated mark worker or
// idle GC worker to ensure GC progress in some scenarios (see comment
// on maxIdleMarkWorkers).
- if c.dedicatedMarkWorkersNeeded > 0 {
+ if dedicatedMarkWorkersNeeded > 0 {
c.setMaxIdleMarkWorkers(0)
} else {
// TODO(mknyszek): The fundamental reason why we need this is because
@@ -510,22 +444,24 @@
c.setMaxIdleMarkWorkers(1)
}
} else {
- // N.B. gomaxprocs and dedicatedMarkWorkersNeeded is guaranteed not to
+ // N.B. gomaxprocs and dedicatedMarkWorkersNeeded are guaranteed not to
// change during a GC cycle.
- c.setMaxIdleMarkWorkers(int32(procs) - int32(c.dedicatedMarkWorkersNeeded))
+ c.setMaxIdleMarkWorkers(int32(procs) - int32(dedicatedMarkWorkersNeeded))
}
// Compute initial values for controls that are updated
// throughout the cycle.
+ c.dedicatedMarkWorkersNeeded.Store(dedicatedMarkWorkersNeeded)
c.revise()
if debug.gcpacertrace > 0 {
+ heapGoal := c.heapGoal()
assistRatio := c.assistWorkPerByte.Load()
print("pacer: assist ratio=", assistRatio,
- " (scan ", gcController.heapScan>>20, " MB in ",
+ " (scan ", gcController.heapScan.Load()>>20, " MB in ",
work.initialHeapLive>>20, "->",
heapGoal>>20, " MB)",
- " workers=", c.dedicatedMarkWorkersNeeded,
+ " workers=", dedicatedMarkWorkersNeeded,
"+", c.fractionalUtilizationGoal, "\n")
}
}
@@ -559,8 +495,8 @@
// act like GOGC is huge for the below calculations.
gcPercent = 100000
}
- live := atomic.Load64(&c.heapLive)
- scan := atomic.Load64(&c.heapScan)
+ live := c.heapLive.Load()
+ scan := c.heapScan.Load()
work := c.heapScanWork.Load() + c.stackScanWork.Load() + c.globalsScanWork.Load()
// Assume we're under the soft goal. Pace GC to complete at
@@ -569,14 +505,14 @@
// The expected scan work is computed as the amount of bytes scanned last
// GC cycle (both heap and stack), plus our estimate of globals work for this cycle.
- scanWorkExpected := int64(c.lastHeapScan + c.lastStackScan + c.globalsScan)
+ scanWorkExpected := int64(c.lastHeapScan + c.lastStackScan.Load() + c.globalsScan.Load())
// maxScanWork is a worst-case estimate of the amount of scan work that
// needs to be performed in this GC cycle. Specifically, it represents
// the case where *all* scannable memory turns out to be live, and
// *all* allocated stack space is scannable.
- maxStackScan := atomic.Load64(&c.maxStackScan)
- maxScanWork := int64(scan + maxStackScan + c.globalsScan)
+ maxStackScan := c.maxStackScan.Load()
+ maxScanWork := int64(scan + maxStackScan + c.globalsScan.Load())
if work > scanWorkExpected {
// We've already done more scan work than expected. Because our expectation
// is based on a steady-state scannable heap size, we assume this means our
@@ -675,7 +611,7 @@
utilization += float64(c.assistTime.Load()) / float64(assistDuration*int64(procs))
}
- if c.heapLive <= c.triggered {
+ if c.heapLive.Load() <= c.triggered {
// Shouldn't happen, but let's be very safe about this in case the
// GC is somehow extremely short.
//
@@ -688,7 +624,7 @@
}
idleUtilization := 0.0
if assistDuration > 0 {
- idleUtilization = float64(c.idleMarkTime) / float64(assistDuration*int64(procs))
+ idleUtilization = float64(c.idleMarkTime.Load()) / float64(assistDuration*int64(procs))
}
// Determine the cons/mark ratio.
//
@@ -706,7 +642,7 @@
//
// assistDuration * procs * (utilization + idleUtilization)
//
- // In this case, we *include* idle utilization, because that is additional CPU time that the
+ // In this case, we *include* idle utilization, because that is additional CPU time that
// the GC had available to it.
//
// In effect, idle GC time is sort of double-counted here, but it's very weird compared
@@ -719,44 +655,23 @@
//
// Note that because we only care about the ratio, assistDuration and procs cancel out.
scanWork := c.heapScanWork.Load() + c.stackScanWork.Load() + c.globalsScanWork.Load()
- currentConsMark := (float64(c.heapLive-c.triggered) * (utilization + idleUtilization)) /
+ currentConsMark := (float64(c.heapLive.Load()-c.triggered) * (utilization + idleUtilization)) /
(float64(scanWork) * (1 - utilization))
- // Update cons/mark controller. The time period for this is 1 GC cycle.
- //
- // This use of a PI controller might seem strange. So, here's an explanation:
- //
- // currentConsMark represents the consMark we *should've* had to be perfectly
- // on-target for this cycle. Given that we assume the next GC will be like this
- // one in the steady-state, it stands to reason that we should just pick that
- // as our next consMark. In practice, however, currentConsMark is too noisy:
- // we're going to be wildly off-target in each GC cycle if we do that.
- //
- // What we do instead is make a long-term assumption: there is some steady-state
- // consMark value, but it's obscured by noise. By constantly shooting for this
- // noisy-but-perfect consMark value, the controller will bounce around a bit,
- // but its average behavior, in aggregate, should be less noisy and closer to
- // the true long-term consMark value, provided its tuned to be slightly overdamped.
- var ok bool
+ // Update our cons/mark estimate. This is the raw value above, but averaged over 2 GC cycles
+ // because it tends to be jittery, even in the steady-state. The smoothing helps the GC to
+ // maintain much more stable cycle-by-cycle behavior.
oldConsMark := c.consMark
- c.consMark, ok = c.consMarkController.next(c.consMark, currentConsMark, 1.0)
- if !ok {
- // The error spiraled out of control. This is incredibly unlikely seeing
- // as this controller is essentially just a smoothing function, but it might
- // mean that something went very wrong with how currentConsMark was calculated.
- // Just reset consMark and keep going.
- c.consMark = 0
- }
+ c.consMark = (currentConsMark + c.lastConsMark) / 2
+ c.lastConsMark = currentConsMark
if debug.gcpacertrace > 0 {
printlock()
goal := gcGoalUtilization * 100
print("pacer: ", int(utilization*100), "% CPU (", int(goal), " exp.) for ")
- print(c.heapScanWork.Load(), "+", c.stackScanWork.Load(), "+", c.globalsScanWork.Load(), " B work (", c.lastHeapScan+c.lastStackScan+c.globalsScan, " B exp.) ")
- print("in ", c.triggered, " B -> ", c.heapLive, " B (∆goal ", int64(c.heapLive)-int64(c.lastHeapGoal), ", cons/mark ", oldConsMark, ")")
- if !ok {
- print("[controller reset]")
- }
+ print(c.heapScanWork.Load(), "+", c.stackScanWork.Load(), "+", c.globalsScanWork.Load(), " B work (", c.lastHeapScan+c.lastStackScan.Load()+c.globalsScan.Load(), " B exp.) ")
+ live := c.heapLive.Load()
+ print("in ", c.triggered, " B -> ", live, " B (∆goal ", int64(live)-int64(c.lastHeapGoal), ", cons/mark ", oldConsMark, ")")
println()
printunlock()
}
@@ -771,14 +686,14 @@
// If there are idle Ps, wake one so it will run an idle worker.
// NOTE: This is suspected of causing deadlocks. See golang.org/issue/19112.
//
- // if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 {
+ // if sched.npidle.Load() != 0 && sched.nmspinning.Load() == 0 {
// wakep()
// return
// }
// There are no idle Ps. If we need more dedicated workers,
// try to preempt a running P so it will switch to a worker.
- if c.dedicatedMarkWorkersNeeded <= 0 {
+ if c.dedicatedMarkWorkersNeeded.Load() <= 0 {
return
}
// Pick a random other P to preempt.
@@ -805,9 +720,9 @@
}
}
-// findRunnableGCWorker returns a background mark worker for _p_ if it
+// findRunnableGCWorker returns a background mark worker for pp if it
// should be run. This must only be called when gcBlackenEnabled != 0.
-func (c *gcControllerState) findRunnableGCWorker(_p_ *p, now int64) (*g, int64) {
+func (c *gcControllerState) findRunnableGCWorker(pp *p, now int64) (*g, int64) {
if gcBlackenEnabled == 0 {
throw("gcControllerState.findRunnable: blackening not enabled")
}
@@ -823,7 +738,7 @@
gcCPULimiter.update(now)
}
- if !gcMarkWorkAvailable(_p_) {
+ if !gcMarkWorkAvailable(pp) {
// No work to be done right now. This can happen at
// the end of the mark phase when there are still
// assists tapering off. Don't bother running a worker
@@ -848,14 +763,14 @@
return nil, now
}
- decIfPositive := func(ptr *int64) bool {
+ decIfPositive := func(val *atomic.Int64) bool {
for {
- v := atomic.Loadint64(ptr)
+ v := val.Load()
if v <= 0 {
return false
}
- if atomic.Casint64(ptr, v, v-1) {
+ if val.CompareAndSwap(v, v-1) {
return true
}
}
@@ -864,7 +779,7 @@
if decIfPositive(&c.dedicatedMarkWorkersNeeded) {
// This P is now dedicated to marking until the end of
// the concurrent mark phase.
- _p_.gcMarkWorkerMode = gcMarkWorkerDedicatedMode
+ pp.gcMarkWorkerMode = gcMarkWorkerDedicatedMode
} else if c.fractionalUtilizationGoal == 0 {
// No need for fractional workers.
gcBgMarkWorkerPool.push(&node.node)
@@ -875,13 +790,13 @@
//
// This should be kept in sync with pollFractionalWorkerExit.
delta := now - c.markStartTime
- if delta > 0 && float64(_p_.gcFractionalMarkTime)/float64(delta) > c.fractionalUtilizationGoal {
+ if delta > 0 && float64(pp.gcFractionalMarkTime)/float64(delta) > c.fractionalUtilizationGoal {
// Nope. No need to run a fractional worker.
gcBgMarkWorkerPool.push(&node.node)
return nil, now
}
// Run a fractional worker.
- _p_.gcMarkWorkerMode = gcMarkWorkerFractionalMode
+ pp.gcMarkWorkerMode = gcMarkWorkerFractionalMode
}
// Run the background mark worker.
@@ -900,15 +815,15 @@
// The world must be stopped.
func (c *gcControllerState) resetLive(bytesMarked uint64) {
c.heapMarked = bytesMarked
- c.heapLive = bytesMarked
- c.heapScan = uint64(c.heapScanWork.Load())
+ c.heapLive.Store(bytesMarked)
+ c.heapScan.Store(uint64(c.heapScanWork.Load()))
c.lastHeapScan = uint64(c.heapScanWork.Load())
- c.lastStackScan = uint64(c.stackScanWork.Load())
+ c.lastStackScan.Store(uint64(c.stackScanWork.Load()))
c.triggered = ^uint64(0) // Reset triggered.
// heapLive was updated, so emit a trace event.
if trace.enabled {
- traceHeapAlloc()
+ traceHeapAlloc(bytesMarked)
}
}
@@ -921,12 +836,12 @@
func (c *gcControllerState) markWorkerStop(mode gcMarkWorkerMode, duration int64) {
switch mode {
case gcMarkWorkerDedicatedMode:
- atomic.Xaddint64(&c.dedicatedMarkTime, duration)
- atomic.Xaddint64(&c.dedicatedMarkWorkersNeeded, 1)
+ c.dedicatedMarkTime.Add(duration)
+ c.dedicatedMarkWorkersNeeded.Add(1)
case gcMarkWorkerFractionalMode:
- atomic.Xaddint64(&c.fractionalMarkTime, duration)
+ c.fractionalMarkTime.Add(duration)
case gcMarkWorkerIdleMode:
- atomic.Xaddint64(&c.idleMarkTime, duration)
+ c.idleMarkTime.Add(duration)
c.removeIdleMarkWorker()
default:
throw("markWorkerStop: unknown mark worker mode")
@@ -935,17 +850,17 @@
func (c *gcControllerState) update(dHeapLive, dHeapScan int64) {
if dHeapLive != 0 {
- atomic.Xadd64(&gcController.heapLive, dHeapLive)
+ live := gcController.heapLive.Add(dHeapLive)
if trace.enabled {
// gcController.heapLive changed.
- traceHeapAlloc()
+ traceHeapAlloc(live)
}
}
if gcBlackenEnabled == 0 {
// Update heapScan when we're not in a current GC. It is fixed
// at the beginning of a cycle.
if dHeapScan != 0 {
- atomic.Xadd64(&gcController.heapScan, dHeapScan)
+ gcController.heapScan.Add(dHeapScan)
}
} else {
// gcController.heapLive changed.
@@ -955,18 +870,18 @@
func (c *gcControllerState) addScannableStack(pp *p, amount int64) {
if pp == nil {
- atomic.Xadd64(&c.maxStackScan, amount)
+ c.maxStackScan.Add(amount)
return
}
pp.maxStackScanDelta += amount
if pp.maxStackScanDelta >= maxStackScanSlack || pp.maxStackScanDelta <= -maxStackScanSlack {
- atomic.Xadd64(&c.maxStackScan, pp.maxStackScanDelta)
+ c.maxStackScan.Add(pp.maxStackScanDelta)
pp.maxStackScanDelta = 0
}
}
func (c *gcControllerState) addGlobals(amount int64) {
- atomic.Xadd64(&c.globalsScan, amount)
+ c.globalsScan.Add(amount)
}
// heapGoal returns the current heap goal.
@@ -1260,7 +1175,7 @@
// Concurrent sweep happens in the heap growth
// from gcController.heapLive to trigger. Make sure we
// give the sweeper some runway if it doesn't have enough.
- c.sweepDistMinTrigger.Store(atomic.Load64(&c.heapLive) + sweepMinHeapDistance)
+ c.sweepDistMinTrigger.Store(c.heapLive.Load() + sweepMinHeapDistance)
}
// Compute the next GC goal, which is when the allocated heap
@@ -1268,7 +1183,7 @@
// plus additional runway for non-heap sources of GC work.
gcPercentHeapGoal := ^uint64(0)
if gcPercent := c.gcPercent.Load(); gcPercent >= 0 {
- gcPercentHeapGoal = c.heapMarked + (c.heapMarked+atomic.Load64(&c.lastStackScan)+atomic.Load64(&c.globalsScan))*uint64(gcPercent)/100
+ gcPercentHeapGoal = c.heapMarked + (c.heapMarked+c.lastStackScan.Load()+c.globalsScan.Load())*uint64(gcPercent)/100
}
// Apply the minimum heap size here. It's defined in terms of gcPercent
// and is only updated by functions that call commit.
@@ -1300,7 +1215,7 @@
// Furthermore, by setting the runway so that CPU resources are divided
// this way, assuming that the cons/mark ratio is correct, we make that
// division a reality.
- c.runway.Store(uint64((c.consMark * (1 - gcGoalUtilization) / (gcGoalUtilization)) * float64(c.lastHeapScan+c.lastStackScan+c.globalsScan)))
+ c.runway.Store(uint64((c.consMark * (1 - gcGoalUtilization) / (gcGoalUtilization)) * float64(c.lastHeapScan+c.lastStackScan.Load()+c.globalsScan.Load())))
}
// setGCPercent updates gcPercent. commit must be called after.
@@ -1335,7 +1250,7 @@
// If we just disabled GC, wait for any concurrent GC mark to
// finish so we always return with no GC running.
if in < 0 {
- gcWaitOnMark(atomic.Load(&work.cycles))
+ gcWaitOnMark(work.cycles.Load())
}
return out
@@ -1400,74 +1315,6 @@
return n
}
-type piController struct {
- kp float64 // Proportional constant.
- ti float64 // Integral time constant.
- tt float64 // Reset time.
-
- min, max float64 // Output boundaries.
-
- // PI controller state.
-
- errIntegral float64 // Integral of the error from t=0 to now.
-
- // Error flags.
- errOverflow bool // Set if errIntegral ever overflowed.
- inputOverflow bool // Set if an operation with the input overflowed.
-}
-
-// next provides a new sample to the controller.
-//
-// input is the sample, setpoint is the desired point, and period is how much
-// time (in whatever unit makes the most sense) has passed since the last sample.
-//
-// Returns a new value for the variable it's controlling, and whether the operation
-// completed successfully. One reason this might fail is if error has been growing
-// in an unbounded manner, to the point of overflow.
-//
-// In the specific case of an error overflow occurs, the errOverflow field will be
-// set and the rest of the controller's internal state will be fully reset.
-func (c *piController) next(input, setpoint, period float64) (float64, bool) {
- // Compute the raw output value.
- prop := c.kp * (setpoint - input)
- rawOutput := prop + c.errIntegral
-
- // Clamp rawOutput into output.
- output := rawOutput
- if isInf(output) || isNaN(output) {
- // The input had a large enough magnitude that either it was already
- // overflowed, or some operation with it overflowed.
- // Set a flag and reset. That's the safest thing to do.
- c.reset()
- c.inputOverflow = true
- return c.min, false
- }
- if output < c.min {
- output = c.min
- } else if output > c.max {
- output = c.max
- }
-
- // Update the controller's state.
- if c.ti != 0 && c.tt != 0 {
- c.errIntegral += (c.kp*period/c.ti)*(setpoint-input) + (period/c.tt)*(output-rawOutput)
- if isInf(c.errIntegral) || isNaN(c.errIntegral) {
- // So much error has accumulated that we managed to overflow.
- // The assumptions around the controller have likely broken down.
- // Set a flag and reset. That's the safest thing to do.
- c.reset()
- c.errOverflow = true
- return c.min, false
- }
- }
- return output, true
-}
-
-// reset resets the controller state, except for controller error flags.
-func (c *piController) reset() {
- c.errIntegral = 0
-}
-
// addIdleMarkWorker attempts to add a new idle mark worker.
//
// If this returns true, the caller must become an idle mark worker unless
diff --git a/src/runtime/mgcpacer_test.go b/src/runtime/mgcpacer_test.go
index 12d885d..e373e32 100644
--- a/src/runtime/mgcpacer_test.go
+++ b/src/runtime/mgcpacer_test.go
@@ -1019,51 +1019,6 @@
}
}
-func FuzzPIController(f *testing.F) {
- isNormal := func(x float64) bool {
- return !math.IsInf(x, 0) && !math.IsNaN(x)
- }
- isPositive := func(x float64) bool {
- return isNormal(x) && x > 0
- }
- // Seed with constants from controllers in the runtime.
- // It's not critical that we keep these in sync, they're just
- // reasonable seed inputs.
- f.Add(0.3375, 3.2e6, 1e9, 0.001, 1000.0, 0.01)
- f.Add(0.9, 4.0, 1000.0, -1000.0, 1000.0, 0.84)
- f.Fuzz(func(t *testing.T, kp, ti, tt, min, max, setPoint float64) {
- // Ignore uninteresting invalid parameters. These parameters
- // are constant, so in practice surprising values will be documented
- // or will be other otherwise immediately visible.
- //
- // We just want to make sure that given a non-Inf, non-NaN input,
- // we always get a non-Inf, non-NaN output.
- if !isPositive(kp) || !isPositive(ti) || !isPositive(tt) {
- return
- }
- if !isNormal(min) || !isNormal(max) || min > max {
- return
- }
- // Use a random source, but make it deterministic.
- rs := rand.New(rand.NewSource(800))
- randFloat64 := func() float64 {
- return math.Float64frombits(rs.Uint64())
- }
- p := NewPIController(kp, ti, tt, min, max)
- state := float64(0)
- for i := 0; i < 100; i++ {
- input := randFloat64()
- // Ignore the "ok" parameter. We're just trying to break it.
- // state is intentionally completely uncorrelated with the input.
- var ok bool
- state, ok = p.Next(input, setPoint, 1.0)
- if !isNormal(state) {
- t.Fatalf("got NaN or Inf result from controller: %f %v", state, ok)
- }
- }
- })
-}
-
func TestIdleMarkWorkerCount(t *testing.T) {
const workers = 10
c := NewGCController(100, math.MaxInt64)
diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go
index bf38f87..e59340e 100644
--- a/src/runtime/mgcscavenge.go
+++ b/src/runtime/mgcscavenge.go
@@ -221,6 +221,16 @@
// gcController.memoryLimit by choosing to target the memory limit or
// some lower target to keep the scavenger working.
memoryLimitGoal atomic.Uint64
+
+ // assistTime is the time spent by the allocator scavenging in the last GC cycle.
+ //
+ // This is reset once a GC cycle ends.
+ assistTime atomic.Int64
+
+ // backgroundTime is the time spent by the background scavenger in the last GC cycle.
+ //
+ // This is reset once a GC cycle ends.
+ backgroundTime atomic.Int64
}
const (
@@ -361,6 +371,7 @@
if start >= end {
return r, 0
}
+ scavenge.backgroundTime.Add(end - start)
return r, end - start
}
}
@@ -718,7 +729,7 @@
if p.summary[len(p.summary)-1][ci].max() >= uint(minPages) {
// We only bother looking for a candidate if there at least
// minPages free pages at all.
- base, npages := p.chunkOf(ci).findScavengeCandidate(pallocChunkPages-1, minPages, maxPages)
+ base, npages := p.chunkOf(ci).findScavengeCandidate(searchIdx, minPages, maxPages)
// If we found something, scavenge it and return!
if npages != 0 {
@@ -736,6 +747,8 @@
unlock(p.mheapLock)
if !p.test {
+ pageTraceScav(getg().m.p.ptr(), 0, addr, uintptr(npages))
+
// Only perform the actual scavenging if we're not in a test.
// It's dangerous to do so otherwise.
sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize)
@@ -1103,3 +1116,71 @@
func (s *scavengeIndex) clear(ci chunkIdx) {
s.chunks[ci/8].And(^uint8(1 << (ci % 8)))
}
+
+type piController struct {
+ kp float64 // Proportional constant.
+ ti float64 // Integral time constant.
+ tt float64 // Reset time.
+
+ min, max float64 // Output boundaries.
+
+ // PI controller state.
+
+ errIntegral float64 // Integral of the error from t=0 to now.
+
+ // Error flags.
+ errOverflow bool // Set if errIntegral ever overflowed.
+ inputOverflow bool // Set if an operation with the input overflowed.
+}
+
+// next provides a new sample to the controller.
+//
+// input is the sample, setpoint is the desired point, and period is how much
+// time (in whatever unit makes the most sense) has passed since the last sample.
+//
+// Returns a new value for the variable it's controlling, and whether the operation
+// completed successfully. One reason this might fail is if error has been growing
+// in an unbounded manner, to the point of overflow.
+//
+// In the specific case of an error overflow occurs, the errOverflow field will be
+// set and the rest of the controller's internal state will be fully reset.
+func (c *piController) next(input, setpoint, period float64) (float64, bool) {
+ // Compute the raw output value.
+ prop := c.kp * (setpoint - input)
+ rawOutput := prop + c.errIntegral
+
+ // Clamp rawOutput into output.
+ output := rawOutput
+ if isInf(output) || isNaN(output) {
+ // The input had a large enough magnitude that either it was already
+ // overflowed, or some operation with it overflowed.
+ // Set a flag and reset. That's the safest thing to do.
+ c.reset()
+ c.inputOverflow = true
+ return c.min, false
+ }
+ if output < c.min {
+ output = c.min
+ } else if output > c.max {
+ output = c.max
+ }
+
+ // Update the controller's state.
+ if c.ti != 0 && c.tt != 0 {
+ c.errIntegral += (c.kp*period/c.ti)*(setpoint-input) + (period/c.tt)*(output-rawOutput)
+ if isInf(c.errIntegral) || isNaN(c.errIntegral) {
+ // So much error has accumulated that we managed to overflow.
+ // The assumptions around the controller have likely broken down.
+ // Set a flag and reset. That's the safest thing to do.
+ c.reset()
+ c.errOverflow = true
+ return c.min, false
+ }
+ }
+ return output, true
+}
+
+// reset resets the controller state, except for controller error flags.
+func (c *piController) reset() {
+ c.errIntegral = 0
+}
diff --git a/src/runtime/mgcscavenge_test.go b/src/runtime/mgcscavenge_test.go
index 620392f..c436ff0 100644
--- a/src/runtime/mgcscavenge_test.go
+++ b/src/runtime/mgcscavenge_test.go
@@ -7,6 +7,7 @@
import (
"fmt"
"internal/goos"
+ "math"
"math/rand"
. "runtime"
"runtime/internal/atomic"
@@ -707,3 +708,48 @@
find(0, 0)
})
}
+
+func FuzzPIController(f *testing.F) {
+ isNormal := func(x float64) bool {
+ return !math.IsInf(x, 0) && !math.IsNaN(x)
+ }
+ isPositive := func(x float64) bool {
+ return isNormal(x) && x > 0
+ }
+ // Seed with constants from controllers in the runtime.
+ // It's not critical that we keep these in sync, they're just
+ // reasonable seed inputs.
+ f.Add(0.3375, 3.2e6, 1e9, 0.001, 1000.0, 0.01)
+ f.Add(0.9, 4.0, 1000.0, -1000.0, 1000.0, 0.84)
+ f.Fuzz(func(t *testing.T, kp, ti, tt, min, max, setPoint float64) {
+ // Ignore uninteresting invalid parameters. These parameters
+ // are constant, so in practice surprising values will be documented
+ // or will be other otherwise immediately visible.
+ //
+ // We just want to make sure that given a non-Inf, non-NaN input,
+ // we always get a non-Inf, non-NaN output.
+ if !isPositive(kp) || !isPositive(ti) || !isPositive(tt) {
+ return
+ }
+ if !isNormal(min) || !isNormal(max) || min > max {
+ return
+ }
+ // Use a random source, but make it deterministic.
+ rs := rand.New(rand.NewSource(800))
+ randFloat64 := func() float64 {
+ return math.Float64frombits(rs.Uint64())
+ }
+ p := NewPIController(kp, ti, tt, min, max)
+ state := float64(0)
+ for i := 0; i < 100; i++ {
+ input := randFloat64()
+ // Ignore the "ok" parameter. We're just trying to break it.
+ // state is intentionally completely uncorrelated with the input.
+ var ok bool
+ state, ok = p.Next(input, setPoint, 1.0)
+ if !isNormal(state) {
+ t.Fatalf("got NaN or Inf result from controller: %f %v", state, ok)
+ }
+ }
+ })
+}
diff --git a/src/runtime/mgcstack.go b/src/runtime/mgcstack.go
index 472c61a..6b55220 100644
--- a/src/runtime/mgcstack.go
+++ b/src/runtime/mgcstack.go
@@ -96,6 +96,7 @@
import (
"internal/goarch"
+ "runtime/internal/sys"
"unsafe"
)
@@ -103,17 +104,15 @@
// Buffer for pointers found during stack tracing.
// Must be smaller than or equal to workbuf.
-//
-//go:notinheap
type stackWorkBuf struct {
+ _ sys.NotInHeap
stackWorkBufHdr
obj [(_WorkbufSize - unsafe.Sizeof(stackWorkBufHdr{})) / goarch.PtrSize]uintptr
}
// Header declaration must come after the buf declaration above, because of issue #14620.
-//
-//go:notinheap
type stackWorkBufHdr struct {
+ _ sys.NotInHeap
workbufhdr
next *stackWorkBuf // linked list of workbufs
// Note: we could theoretically repurpose lfnode.next as this next pointer.
@@ -123,15 +122,14 @@
// Buffer for stack objects found on a goroutine stack.
// Must be smaller than or equal to workbuf.
-//
-//go:notinheap
type stackObjectBuf struct {
+ _ sys.NotInHeap
stackObjectBufHdr
obj [(_WorkbufSize - unsafe.Sizeof(stackObjectBufHdr{})) / unsafe.Sizeof(stackObject{})]stackObject
}
-//go:notinheap
type stackObjectBufHdr struct {
+ _ sys.NotInHeap
workbufhdr
next *stackObjectBuf
}
@@ -147,9 +145,8 @@
// A stackObject represents a variable on the stack that has had
// its address taken.
-//
-//go:notinheap
type stackObject struct {
+ _ sys.NotInHeap
off uint32 // offset above stack.lo
size uint32 // size of object
r *stackObjectRecord // info of the object (for ptr/nonptr bits). nil if object has been scanned.
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go
index de57f18..6ccf090 100644
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -33,10 +33,9 @@
// State of background sweep.
type sweepdata struct {
- lock mutex
- g *g
- parked bool
- started bool
+ lock mutex
+ g *g
+ parked bool
nbgsweep uint32
npausesweep uint32
@@ -177,7 +176,8 @@
return
}
if debug.gcpacertrace > 0 {
- print("pacer: sweep done at heap size ", gcController.heapLive>>20, "MB; allocated ", (gcController.heapLive-mheap_.sweepHeapLiveBasis)>>20, "MB during sweep; swept ", mheap_.pagesSwept.Load(), " pages at ", mheap_.sweepPagesPerByte, " pages/byte\n")
+ live := gcController.heapLive.Load()
+ print("pacer: sweep done at heap size ", live>>20, "MB; allocated ", (live-mheap_.sweepHeapLiveBasis)>>20, "MB during sweep; swept ", mheap_.pagesSwept.Load(), " pages at ", mheap_.sweepPagesPerByte, " pages/byte\n")
}
return
}
@@ -278,12 +278,34 @@
goparkunlock(&sweep.lock, waitReasonGCSweepWait, traceEvGoBlock, 1)
for {
+ // bgsweep attempts to be a "low priority" goroutine by intentionally
+ // yielding time. It's OK if it doesn't run, because goroutines allocating
+ // memory will sweep and ensure that all spans are swept before the next
+ // GC cycle. We really only want to run when we're idle.
+ //
+ // However, calling Gosched after each span swept produces a tremendous
+ // amount of tracing events, sometimes up to 50% of events in a trace. It's
+ // also inefficient to call into the scheduler so much because sweeping a
+ // single span is in general a very fast operation, taking as little as 30 ns
+ // on modern hardware. (See #54767.)
+ //
+ // As a result, bgsweep sweeps in batches, and only calls into the scheduler
+ // at the end of every batch. Furthermore, it only yields its time if there
+ // isn't spare idle time available on other cores. If there's available idle
+ // time, helping to sweep can reduce allocation latencies by getting ahead of
+ // the proportional sweeper and having spans ready to go for allocation.
+ const sweepBatchSize = 10
+ nSwept := 0
for sweepone() != ^uintptr(0) {
sweep.nbgsweep++
- Gosched()
+ nSwept++
+ if nSwept%sweepBatchSize == 0 {
+ goschedIfBusy()
+ }
}
for freeSomeWbufs(true) {
- Gosched()
+ // N.B. freeSomeWbufs is already batched internally.
+ goschedIfBusy()
}
lock(&sweep.lock)
if !isSweepDone() {
@@ -431,8 +453,8 @@
// Caller must disable preemption.
// Otherwise when this function returns the span can become unswept again
// (if GC is triggered on another goroutine).
- _g_ := getg()
- if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 {
+ gp := getg()
+ if gp.m.locks == 0 && gp.m.mallocing == 0 && gp != gp.m.g0 {
throw("mspan.ensureSwept: m is not locked")
}
@@ -470,8 +492,8 @@
func (sl *sweepLocked) sweep(preserve bool) bool {
// It's critical that we enter this function with preemption disabled,
// GC must not start while we are in the middle of this function.
- _g_ := getg()
- if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 {
+ gp := getg()
+ if gp.m.locks == 0 && gp.m.mallocing == 0 && gp != gp.m.g0 {
throw("mspan.sweep: m is not locked")
}
@@ -579,13 +601,14 @@
if debug.clobberfree != 0 {
clobberfree(unsafe.Pointer(x), size)
}
- if raceenabled {
+ // User arenas are handled on explicit free.
+ if raceenabled && !s.isUserArenaChunk {
racefree(unsafe.Pointer(x), size)
}
- if msanenabled {
+ if msanenabled && !s.isUserArenaChunk {
msanfree(unsafe.Pointer(x), size)
}
- if asanenabled {
+ if asanenabled && !s.isUserArenaChunk {
asanpoison(unsafe.Pointer(x), size)
}
}
@@ -625,6 +648,7 @@
s.allocCount = nalloc
s.freeindex = 0 // reset allocation index to start of span.
+ s.freeIndexForScan = 0
if trace.enabled {
getg().m.p.ptr().traceReclaimed += uintptr(nfreed) * s.elemsize
}
@@ -659,6 +683,41 @@
// to go so release the span.
atomic.Store(&s.sweepgen, sweepgen)
+ if s.isUserArenaChunk {
+ if preserve {
+ // This is a case that should never be handled by a sweeper that
+ // preserves the span for reuse.
+ throw("sweep: tried to preserve a user arena span")
+ }
+ if nalloc > 0 {
+ // There still exist pointers into the span or the span hasn't been
+ // freed yet. It's not ready to be reused. Put it back on the
+ // full swept list for the next cycle.
+ mheap_.central[spc].mcentral.fullSwept(sweepgen).push(s)
+ return false
+ }
+
+ // It's only at this point that the sweeper doesn't actually need to look
+ // at this arena anymore, so subtract from pagesInUse now.
+ mheap_.pagesInUse.Add(-s.npages)
+ s.state.set(mSpanDead)
+
+ // The arena is ready to be recycled. Remove it from the quarantine list
+ // and place it on the ready list. Don't add it back to any sweep lists.
+ systemstack(func() {
+ // It's the arena code's responsibility to get the chunk on the quarantine
+ // list by the time all references to the chunk are gone.
+ if s.list != &mheap_.userArena.quarantineList {
+ throw("user arena span is on the wrong list")
+ }
+ lock(&mheap_.lock)
+ mheap_.userArena.quarantineList.remove(s)
+ mheap_.userArena.readyList.insert(s)
+ unlock(&mheap_.lock)
+ })
+ return false
+ }
+
if spc.sizeclass() != 0 {
// Handle spans for small objects.
if nfreed > 0 {
@@ -814,11 +873,30 @@
traceGCSweepStart()
}
+ // Fix debt if necessary.
retry:
sweptBasis := mheap_.pagesSweptBasis.Load()
-
- // Fix debt if necessary.
- newHeapLive := uintptr(atomic.Load64(&gcController.heapLive)-mheap_.sweepHeapLiveBasis) + spanBytes
+ live := gcController.heapLive.Load()
+ liveBasis := mheap_.sweepHeapLiveBasis
+ newHeapLive := spanBytes
+ if liveBasis < live {
+ // Only do this subtraction when we don't overflow. Otherwise, pagesTarget
+ // might be computed as something really huge, causing us to get stuck
+ // sweeping here until the next mark phase.
+ //
+ // Overflow can happen here if gcPaceSweeper is called concurrently with
+ // sweeping (i.e. not during a STW, like it usually is) because this code
+ // is intentionally racy. A concurrent call to gcPaceSweeper can happen
+ // if a GC tuning parameter is modified and we read an older value of
+ // heapLive than what was used to set the basis.
+ //
+ // This state should be transient, so it's fine to just let newHeapLive
+ // be a relatively small number. We'll probably just skip this attempt to
+ // sweep.
+ //
+ // See issue #57523.
+ newHeapLive += uintptr(live - liveBasis)
+ }
pagesTarget := int64(mheap_.sweepPagesPerByte*float64(newHeapLive)) - int64(callerSweepPages)
for pagesTarget > int64(mheap_.pagesSwept.Load()-sweptBasis) {
if sweepone() == ^uintptr(0) {
@@ -862,7 +940,7 @@
// trigger. Compute the ratio of in-use pages to sweep
// per byte allocated, accounting for the fact that
// some might already be swept.
- heapLiveBasis := atomic.Load64(&gcController.heapLive)
+ heapLiveBasis := gcController.heapLive.Load()
heapDistance := int64(trigger) - int64(heapLiveBasis)
// Add a little margin so rounding errors and
// concurrent sweep are less likely to leave pages
diff --git a/src/runtime/mgcwork.go b/src/runtime/mgcwork.go
index 424de2f..7ab8975 100644
--- a/src/runtime/mgcwork.go
+++ b/src/runtime/mgcwork.go
@@ -7,6 +7,7 @@
import (
"internal/goarch"
"runtime/internal/atomic"
+ "runtime/internal/sys"
"unsafe"
)
@@ -320,8 +321,8 @@
nobj int
}
-//go:notinheap
type workbuf struct {
+ _ sys.NotInHeap
workbufhdr
// account for the above fields
obj [(_WorkbufSize - unsafe.Sizeof(workbufhdr{})) / goarch.PtrSize]uintptr
@@ -420,7 +421,7 @@
}
// trygetfull tries to get a full or partially empty workbuffer.
-// If one is not immediately available return nil
+// If one is not immediately available return nil.
//
//go:nowritebarrier
func trygetfull() *workbuf {
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index b19a2ff..1401e92 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -12,6 +12,7 @@
"internal/cpu"
"internal/goarch"
"runtime/internal/atomic"
+ "runtime/internal/sys"
"unsafe"
)
@@ -57,15 +58,13 @@
//
// mheap must not be heap-allocated because it contains mSpanLists,
// which must not be heap-allocated.
-//
-//go:notinheap
type mheap struct {
+ _ sys.NotInHeap
+
// lock must only be acquired on the system stack, otherwise a g
// could self-deadlock if its stack grows with the lock held.
lock mutex
- _ uint32 // 8-byte align pages so its alignment is consistent with tests.
-
pages pageAlloc // page allocation data structure
sweepgen uint32 // sweep generation, see comment in mspan; written during STW
@@ -83,8 +82,6 @@
// access (since that may free the backing store).
allspans []*mspan // all spans out there
- // _ uint32 // align uint64 fields on 32-bit for atomics
-
// Proportional sweep
//
// These parameters represent a linear function from gcController.heapLive
@@ -103,13 +100,11 @@
// accounting for current progress. If we could only adjust
// the slope, it would create a discontinuity in debt if any
// progress has already been made.
- pagesInUse atomic.Uint64 // pages of spans in stats mSpanInUse
- pagesSwept atomic.Uint64 // pages swept this cycle
- pagesSweptBasis atomic.Uint64 // pagesSwept to use as the origin of the sweep ratio
- sweepHeapLiveBasis uint64 // value of gcController.heapLive to use as the origin of sweep ratio; written with lock, read without
- sweepPagesPerByte float64 // proportional sweep ratio; written with lock, read without
- // TODO(austin): pagesInUse should be a uintptr, but the 386
- // compiler can't 8-byte align fields.
+ pagesInUse atomic.Uintptr // pages of spans in stats mSpanInUse
+ pagesSwept atomic.Uint64 // pages swept this cycle
+ pagesSweptBasis atomic.Uint64 // pagesSwept to use as the origin of the sweep ratio
+ sweepHeapLiveBasis uint64 // value of gcController.heapLive to use as the origin of sweep ratio; written with lock, read without
+ sweepPagesPerByte float64 // proportional sweep ratio; written with lock, read without
// Page reclaimer state
@@ -190,8 +185,6 @@
base, end uintptr
}
- _ uint32 // ensure 64-bit alignment of central
-
// central free lists for small size classes.
// the padding makes sure that the mcentrals are
// spaced CacheLinePadSize bytes apart, so that each mcentral.lock
@@ -199,7 +192,7 @@
// central is indexed by spanClass.
central [numSpanClasses]struct {
mcentral mcentral
- pad [cpu.CacheLinePadSize - unsafe.Sizeof(mcentral{})%cpu.CacheLinePadSize]byte
+ pad [(cpu.CacheLinePadSize - unsafe.Sizeof(mcentral{})%cpu.CacheLinePadSize) % cpu.CacheLinePadSize]byte
}
spanalloc fixalloc // allocator for span*
@@ -210,6 +203,25 @@
speciallock mutex // lock for special record allocators.
arenaHintAlloc fixalloc // allocator for arenaHints
+ // User arena state.
+ //
+ // Protected by mheap_.lock.
+ userArena struct {
+ // arenaHints is a list of addresses at which to attempt to
+ // add more heap arenas for user arena chunks. This is initially
+ // populated with a set of general hint addresses, and grown with
+ // the bounds of actual heap arena ranges.
+ arenaHints *arenaHint
+
+ // quarantineList is a list of user arena spans that have been set to fault, but
+ // are waiting for all pointers into them to go away. Sweeping handles
+ // identifying when this is true, and moves the span to the ready list.
+ quarantineList mSpanList
+
+ // readyList is a list of empty user arena spans that are ready for reuse.
+ readyList mSpanList
+ }
+
unused *specialfinalizer // never set, just here to force the specialfinalizer type into DWARF
}
@@ -217,13 +229,26 @@
// A heapArena stores metadata for a heap arena. heapArenas are stored
// outside of the Go heap and accessed via the mheap_.arenas index.
-//
-//go:notinheap
type heapArena struct {
+ _ sys.NotInHeap
+
// bitmap stores the pointer/scalar bitmap for the words in
- // this arena. See mbitmap.go for a description. Use the
- // heapBits type to access this.
- bitmap [heapArenaBitmapBytes]byte
+ // this arena. See mbitmap.go for a description.
+ // This array uses 1 bit per word of heap, or 1.6% of the heap size (for 64-bit).
+ bitmap [heapArenaBitmapWords]uintptr
+
+ // If the ith bit of noMorePtrs is true, then there are no more
+ // pointers for the object containing the word described by the
+ // high bit of bitmap[i].
+ // In that case, bitmap[i+1], ... must be zero until the start
+ // of the next object.
+ // We never operate on these entries using bit-parallel techniques,
+ // so it is ok if they are small. Also, they can't be bigger than
+ // uint16 because at that size a single noMorePtrs entry
+ // represents 8K of memory, the minimum size of a span. Any larger
+ // and we'd have to worry about concurrent updates.
+ // This array uses 1 bit per word of bitmap, or .024% of the heap size (for 64-bit).
+ noMorePtrs [heapArenaBitmapWords / 8]uint8
// spans maps from virtual address page ID within this arena to *mspan.
// For allocated spans, their pages map to the span itself.
@@ -290,9 +315,8 @@
// arenaHint is a hint for where to grow the heap arenas. See
// mheap_.arenaHints.
-//
-//go:notinheap
type arenaHint struct {
+ _ sys.NotInHeap
addr uintptr
down bool
next *arenaHint
@@ -347,34 +371,39 @@
"mSpanDead",
"mSpanInUse",
"mSpanManual",
- "mSpanFree",
}
-// mSpanStateBox holds an mSpanState and provides atomic operations on
-// it. This is a separate type to disallow accidental comparison or
-// assignment with mSpanState.
+// mSpanStateBox holds an atomic.Uint8 to provide atomic operations on
+// an mSpanState. This is a separate type to disallow accidental comparison
+// or assignment with mSpanState.
type mSpanStateBox struct {
- s mSpanState
+ s atomic.Uint8
}
+// It is nosplit to match get, below.
+
+//go:nosplit
func (b *mSpanStateBox) set(s mSpanState) {
- atomic.Store8((*uint8)(&b.s), uint8(s))
+ b.s.Store(uint8(s))
}
+// It is nosplit because it's called indirectly by typedmemclr,
+// which must not be preempted.
+
+//go:nosplit
func (b *mSpanStateBox) get() mSpanState {
- return mSpanState(atomic.Load8((*uint8)(&b.s)))
+ return mSpanState(b.s.Load())
}
// mSpanList heads a linked list of spans.
-//
-//go:notinheap
type mSpanList struct {
+ _ sys.NotInHeap
first *mspan // first span in list, or nil if none
last *mspan // last span in list, or nil if none
}
-//go:notinheap
type mspan struct {
+ _ sys.NotInHeap
next *mspan // next span in list, or nil if none
prev *mspan // previous span in list, or nil if none
list *mSpanList // For debugging. TODO: Remove.
@@ -451,11 +480,21 @@
spanclass spanClass // size class and noscan (uint8)
state mSpanStateBox // mSpanInUse etc; accessed atomically (get/set methods)
needzero uint8 // needs to be zeroed before allocation
+ isUserArenaChunk bool // whether or not this span represents a user arena
allocCountBeforeCache uint16 // a copy of allocCount that is stored just before this span is cached
elemsize uintptr // computed from sizeclass or from npages
limit uintptr // end of data in span
speciallock mutex // guards specials list
specials *special // linked list of special records sorted by offset.
+ userArenaChunkFree addrRange // interval for managing chunk allocation
+
+ // freeIndexForScan is like freeindex, except that freeindex is
+ // used by the allocator whereas freeIndexForScan is used by the
+ // GC scanner. They are two fields so that the GC sees the object
+ // is allocated only when the object and the heap bits are
+ // initialized (see also the assignment of freeIndexForScan in
+ // mallocgc, and issue 54596).
+ freeIndexForScan uintptr
}
func (s *mspan) base() uintptr {
@@ -565,6 +604,12 @@
type arenaIdx uint
+// l1 returns the "l1" portion of an arenaIdx.
+//
+// Marked nosplit because it's called by spanOf and other nosplit
+// functions.
+//
+//go:nosplit
func (i arenaIdx) l1() uint {
if arenaL1Bits == 0 {
// Let the compiler optimize this away if there's no
@@ -575,6 +620,12 @@
}
}
+// l2 returns the "l2" portion of an arenaIdx.
+//
+// Marked nosplit because it's called by spanOf and other nosplit funcs.
+// functions.
+//
+//go:nosplit
func (i arenaIdx) l2() uint {
if arenaL1Bits == 0 {
return uint(i)
@@ -1183,6 +1234,7 @@
base = alignUp(base, physPageSize)
scav = h.pages.allocRange(base, npages)
}
+
if base == 0 {
// Try to acquire a base address.
base, scav = h.pages.alloc(npages)
@@ -1207,56 +1259,6 @@
unlock(&h.lock)
HaveSpan:
- // At this point, both s != nil and base != 0, and the heap
- // lock is no longer held. Initialize the span.
- s.init(base, npages)
- if h.allocNeedsZero(base, npages) {
- s.needzero = 1
- }
- nbytes := npages * pageSize
- if typ.manual() {
- s.manualFreeList = 0
- s.nelems = 0
- s.limit = s.base() + s.npages*pageSize
- s.state.set(mSpanManual)
- } else {
- // We must set span properties before the span is published anywhere
- // since we're not holding the heap lock.
- s.spanclass = spanclass
- if sizeclass := spanclass.sizeclass(); sizeclass == 0 {
- s.elemsize = nbytes
- s.nelems = 1
- s.divMul = 0
- } else {
- s.elemsize = uintptr(class_to_size[sizeclass])
- s.nelems = nbytes / s.elemsize
- s.divMul = class_to_divmagic[sizeclass]
- }
-
- // Initialize mark and allocation structures.
- s.freeindex = 0
- s.allocCache = ^uint64(0) // all 1s indicating all free.
- s.gcmarkBits = newMarkBits(s.nelems)
- s.allocBits = newAllocBits(s.nelems)
-
- // It's safe to access h.sweepgen without the heap lock because it's
- // only ever updated with the world stopped and we run on the
- // systemstack which blocks a STW transition.
- atomic.Store(&s.sweepgen, h.sweepgen)
-
- // Now that the span is filled in, set its state. This
- // is a publication barrier for the other fields in
- // the span. While valid pointers into this span
- // should never be visible until the span is returned,
- // if the garbage collector finds an invalid pointer,
- // access to the span may race with initialization of
- // the span. We resolve this race by atomically
- // setting the state after the span is fully
- // initialized, and atomically checking the state in
- // any situation where a pointer is suspect.
- s.state.set(mSpanInUse)
- }
-
// Decide if we need to scavenge in response to what we just allocated.
// Specifically, we track the maximum amount of memory to scavenge of all
// the alternatives below, assuming that the maximum satisfies *all*
@@ -1304,6 +1306,7 @@
// There are a few very limited cirumstances where we won't have a P here.
// It's OK to simply skip scavenging in these cases. Something else will notice
// and pick up the tab.
+ var now int64
if pp != nil && bytesToScavenge > 0 {
// Measure how long we spent scavenging and add that measurement to the assist
// time so we can track it for the GC CPU limiter.
@@ -1319,14 +1322,18 @@
})
// Finish up accounting.
- now := nanotime()
+ now = nanotime()
if track {
pp.limiterEvent.stop(limiterEventScavengeAssist, now)
}
- h.pages.scav.assistTime.Add(now - start)
+ scavenge.assistTime.Add(now - start)
}
+ // Initialize the span.
+ h.initSpan(s, typ, spanclass, base, npages)
+
// Commit and account for any scavenged memory that the span now owns.
+ nbytes := npages * pageSize
if scav != 0 {
// sysUsed all the pages that are actually available
// in the span since some of them might be scavenged.
@@ -1354,6 +1361,64 @@
}
memstats.heapStats.release()
+ pageTraceAlloc(pp, now, base, npages)
+ return s
+}
+
+// initSpan initializes a blank span s which will represent the range
+// [base, base+npages*pageSize). typ is the type of span being allocated.
+func (h *mheap) initSpan(s *mspan, typ spanAllocType, spanclass spanClass, base, npages uintptr) {
+ // At this point, both s != nil and base != 0, and the heap
+ // lock is no longer held. Initialize the span.
+ s.init(base, npages)
+ if h.allocNeedsZero(base, npages) {
+ s.needzero = 1
+ }
+ nbytes := npages * pageSize
+ if typ.manual() {
+ s.manualFreeList = 0
+ s.nelems = 0
+ s.limit = s.base() + s.npages*pageSize
+ s.state.set(mSpanManual)
+ } else {
+ // We must set span properties before the span is published anywhere
+ // since we're not holding the heap lock.
+ s.spanclass = spanclass
+ if sizeclass := spanclass.sizeclass(); sizeclass == 0 {
+ s.elemsize = nbytes
+ s.nelems = 1
+ s.divMul = 0
+ } else {
+ s.elemsize = uintptr(class_to_size[sizeclass])
+ s.nelems = nbytes / s.elemsize
+ s.divMul = class_to_divmagic[sizeclass]
+ }
+
+ // Initialize mark and allocation structures.
+ s.freeindex = 0
+ s.freeIndexForScan = 0
+ s.allocCache = ^uint64(0) // all 1s indicating all free.
+ s.gcmarkBits = newMarkBits(s.nelems)
+ s.allocBits = newAllocBits(s.nelems)
+
+ // It's safe to access h.sweepgen without the heap lock because it's
+ // only ever updated with the world stopped and we run on the
+ // systemstack which blocks a STW transition.
+ atomic.Store(&s.sweepgen, h.sweepgen)
+
+ // Now that the span is filled in, set its state. This
+ // is a publication barrier for the other fields in
+ // the span. While valid pointers into this span
+ // should never be visible until the span is returned,
+ // if the garbage collector finds an invalid pointer,
+ // access to the span may race with initialization of
+ // the span. We resolve this race by atomically
+ // setting the state after the span is fully
+ // initialized, and atomically checking the state in
+ // any situation where a pointer is suspect.
+ s.state.set(mSpanInUse)
+ }
+
// Publish the span in various locations.
// This is safe to call without the lock held because the slots
@@ -1373,14 +1438,12 @@
atomic.Or8(&arena.pageInUse[pageIdx], pageMask)
// Update related page sweeper stats.
- h.pagesInUse.Add(int64(npages))
+ h.pagesInUse.Add(npages)
}
// Make sure the newly allocated span will be observed
// by the GC before pointers into the span are published.
publicationBarrier()
-
- return s
}
// Try to add at least npage pages of memory to the heap,
@@ -1406,7 +1469,7 @@
// Not enough room in the current arena. Allocate more
// arena space. This may not be contiguous with the
// current arena, so we have to request the full ask.
- av, asize := h.sysAlloc(ask)
+ av, asize := h.sysAlloc(ask, &h.arenaHints, true)
if av == nil {
inUse := gcController.heapFree.load() + gcController.heapReleased.load() + gcController.heapInUse.load()
print("runtime: out of memory: cannot allocate ", ask, "-byte block (", inUse, " in use)\n")
@@ -1474,6 +1537,8 @@
// Free the span back into the heap.
func (h *mheap) freeSpan(s *mspan) {
systemstack(func() {
+ pageTraceFree(getg().m.p.ptr(), 0, s.base(), s.npages)
+
lock(&h.lock)
if msanenabled {
// Tell msan that this entire span is no longer in use.
@@ -1504,6 +1569,8 @@
//
//go:systemstack
func (h *mheap) freeManual(s *mspan, typ spanAllocType) {
+ pageTraceFree(getg().m.p.ptr(), 0, s.base(), s.npages)
+
s.needzero = 1
lock(&h.lock)
h.freeSpanLocked(s, typ)
@@ -1519,11 +1586,14 @@
throw("mheap.freeSpanLocked - invalid stack free")
}
case mSpanInUse:
+ if s.isUserArenaChunk {
+ throw("mheap.freeSpanLocked - invalid free of user arena chunk")
+ }
if s.allocCount != 0 || s.sweepgen != h.sweepgen {
print("mheap.freeSpanLocked - span ", s, " ptr ", hex(s.base()), " allocCount ", s.allocCount, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n")
throw("mheap.freeSpanLocked - invalid free")
}
- h.pagesInUse.Add(-int64(s.npages))
+ h.pagesInUse.Add(-s.npages)
// Clear in-use bit in arena page bitmap.
arena, pageIdx, pageMask := pageIndexOf(s.base())
@@ -1602,6 +1672,7 @@
span.specials = nil
span.needzero = 0
span.freeindex = 0
+ span.freeIndexForScan = 0
span.allocBits = nil
span.gcmarkBits = nil
span.state.set(mSpanDead)
@@ -1715,8 +1786,8 @@
// if that happens.
)
-//go:notinheap
type special struct {
+ _ sys.NotInHeap
next *special // linked list in span
offset uint16 // span offset of object
kind byte // kind of special
@@ -1836,9 +1907,8 @@
//
// specialfinalizer is allocated from non-GC'd memory, so any heap
// pointers must be specially handled.
-//
-//go:notinheap
type specialfinalizer struct {
+ _ sys.NotInHeap
special special
fn *funcval // May be a heap pointer.
nret uintptr
@@ -1862,12 +1932,14 @@
// situation where it's possible that markrootSpans
// has already run but mark termination hasn't yet.
if gcphase != _GCoff {
- base, _, _ := findObject(uintptr(p), 0, 0)
+ base, span, _ := findObject(uintptr(p), 0, 0)
mp := acquirem()
gcw := &mp.p.ptr().gcw
// Mark everything reachable from the object
// so it's retained for the finalizer.
- scanobject(base, gcw)
+ if !span.spanclass.noscan() {
+ scanobject(base, gcw)
+ }
// Mark the finalizer itself, since the
// special isn't part of the GC'd heap.
scanblock(uintptr(unsafe.Pointer(&s.fn)), goarch.PtrSize, &oneptrmask[0], gcw, nil)
@@ -1895,9 +1967,8 @@
}
// The described object is being heap profiled.
-//
-//go:notinheap
type specialprofile struct {
+ _ sys.NotInHeap
special special
b *bucket
}
@@ -1976,14 +2047,15 @@
}
}
-// gcBits is an alloc/mark bitmap. This is always used as *gcBits.
-//
-//go:notinheap
-type gcBits uint8
+// gcBits is an alloc/mark bitmap. This is always used as gcBits.x.
+type gcBits struct {
+ _ sys.NotInHeap
+ x uint8
+}
// bytep returns a pointer to the n'th byte of b.
func (b *gcBits) bytep(n uintptr) *uint8 {
- return addb((*uint8)(b), n)
+ return addb(&b.x, n)
}
// bitp returns a pointer to the byte containing bit n and a mask for
@@ -2000,8 +2072,8 @@
next uintptr // *gcBits triggers recursive type bug. (issue 14620)
}
-//go:notinheap
type gcBitsArena struct {
+ _ sys.NotInHeap
// gcBitsHeader // side step recursive type bug (issue 14620) by including fields by hand.
free uintptr // free is the index into bits of the next free byte; read/write atomically
next *gcBitsArena
diff --git a/src/runtime/mklockrank.go b/src/runtime/mklockrank.go
new file mode 100644
index 0000000..bc15e57
--- /dev/null
+++ b/src/runtime/mklockrank.go
@@ -0,0 +1,366 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ignore
+
+// mklockrank records the static rank graph of the locks in the
+// runtime and generates the rank checking structures in lockrank.go.
+package main
+
+import (
+ "bytes"
+ "flag"
+ "fmt"
+ "go/format"
+ "internal/dag"
+ "io"
+ "log"
+ "os"
+ "strings"
+)
+
+// ranks describes the lock rank graph. See "go doc internal/dag" for
+// the syntax.
+//
+// "a < b" means a must be acquired before b if both are held
+// (or, if b is held, a cannot be acquired).
+//
+// "NONE < a" means no locks may be held when a is acquired.
+//
+// If a lock is not given a rank, then it is assumed to be a leaf
+// lock, which means no other lock can be acquired while it is held.
+// Therefore, leaf locks do not need to be given an explicit rank.
+//
+// Ranks in all caps are pseudo-nodes that help define order, but do
+// not actually define a rank.
+//
+// TODO: It's often hard to correlate rank names to locks. Change
+// these to be more consistent with the locks they label.
+const ranks = `
+# Sysmon
+NONE
+< sysmon
+< scavenge, forcegc;
+
+# Defer
+NONE < defer;
+
+# GC
+NONE <
+ sweepWaiters,
+ assistQueue,
+ sweep;
+
+# Scheduler, timers, netpoll
+NONE < pollDesc, cpuprof;
+assistQueue,
+ cpuprof,
+ forcegc,
+ pollDesc, # pollDesc can interact with timers, which can lock sched.
+ scavenge,
+ sweep,
+ sweepWaiters
+< sched;
+sched < allg, allp;
+allp < timers;
+timers < netpollInit;
+
+# Channels
+scavenge, sweep < hchan;
+NONE < notifyList;
+hchan, notifyList < sudog;
+
+# RWMutex
+NONE < rwmutexW;
+rwmutexW, sysmon < rwmutexR;
+
+# Semaphores
+NONE < root;
+
+# Itabs
+NONE
+< itab
+< reflectOffs;
+
+# User arena state
+NONE < userArenaState;
+
+# Tracing without a P uses a global trace buffer.
+scavenge
+# Above TRACEGLOBAL can emit a trace event without a P.
+< TRACEGLOBAL
+# Below TRACEGLOBAL manages the global tracing buffer.
+# Note that traceBuf eventually chains to MALLOC, but we never get that far
+# in the situation where there's no P.
+< traceBuf;
+# Starting/stopping tracing traces strings.
+traceBuf < traceStrings;
+
+# Malloc
+allg,
+ hchan,
+ notifyList,
+ reflectOffs,
+ timers,
+ traceStrings,
+ userArenaState
+# Above MALLOC are things that can allocate memory.
+< MALLOC
+# Below MALLOC is the malloc implementation.
+< fin,
+ gcBitsArenas,
+ mheapSpecial,
+ mspanSpecial,
+ spanSetSpine,
+ MPROF;
+
+# Memory profiling
+MPROF < profInsert, profBlock, profMemActive;
+profMemActive < profMemFuture;
+
+# Stack allocation and copying
+gcBitsArenas,
+ netpollInit,
+ profBlock,
+ profInsert,
+ profMemFuture,
+ spanSetSpine,
+ fin,
+ root
+# Anything that can grow the stack can acquire STACKGROW.
+# (Most higher layers imply STACKGROW, like MALLOC.)
+< STACKGROW
+# Below STACKGROW is the stack allocator/copying implementation.
+< gscan;
+gscan, rwmutexR < stackpool;
+gscan < stackLarge;
+# Generally, hchan must be acquired before gscan. But in one case,
+# where we suspend a G and then shrink its stack, syncadjustsudogs
+# can acquire hchan locks while holding gscan. To allow this case,
+# we use hchanLeaf instead of hchan.
+gscan < hchanLeaf;
+
+# Write barrier
+defer,
+ gscan,
+ mspanSpecial,
+ sudog
+# Anything that can have write barriers can acquire WB.
+# Above WB, we can have write barriers.
+< WB
+# Below WB is the write barrier implementation.
+< wbufSpans;
+
+# Span allocator
+stackLarge,
+ stackpool,
+ wbufSpans
+# Above mheap is anything that can call the span allocator.
+< mheap;
+# Below mheap is the span allocator implementation.
+mheap, mheapSpecial < globalAlloc;
+
+# Execution tracer events (with a P)
+hchan,
+ mheap,
+ root,
+ sched,
+ traceStrings,
+ notifyList,
+ fin
+# Above TRACE is anything that can create a trace event
+< TRACE
+< trace
+< traceStackTab;
+
+# panic is handled specially. It is implicitly below all other locks.
+NONE < panic;
+# deadlock is not acquired while holding panic, but it also needs to be
+# below all other locks.
+panic < deadlock;
+`
+
+// cyclicRanks lists lock ranks that allow multiple locks of the same
+// rank to be acquired simultaneously. The runtime enforces ordering
+// within these ranks using a separate mechanism.
+var cyclicRanks = map[string]bool{
+ // Multiple timers are locked simultaneously in destroy().
+ "timers": true,
+ // Multiple hchans are acquired in hchan.sortkey() order in
+ // select.
+ "hchan": true,
+ // Multiple hchanLeafs are acquired in hchan.sortkey() order in
+ // syncadjustsudogs().
+ "hchanLeaf": true,
+ // The point of the deadlock lock is to deadlock.
+ "deadlock": true,
+}
+
+func main() {
+ flagO := flag.String("o", "", "write to `file` instead of stdout")
+ flagDot := flag.Bool("dot", false, "emit graphviz output instead of Go")
+ flag.Parse()
+ if flag.NArg() != 0 {
+ fmt.Fprintf(os.Stderr, "too many arguments")
+ os.Exit(2)
+ }
+
+ g, err := dag.Parse(ranks)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ var out []byte
+ if *flagDot {
+ var b bytes.Buffer
+ g.TransitiveReduction()
+ // Add cyclic edges for visualization.
+ for k := range cyclicRanks {
+ g.AddEdge(k, k)
+ }
+ // Reverse the graph. It's much easier to read this as
+ // a "<" partial order than a ">" partial order. This
+ // ways, locks are acquired from the top going down
+ // and time moves forward over the edges instead of
+ // backward.
+ g.Transpose()
+ generateDot(&b, g)
+ out = b.Bytes()
+ } else {
+ var b bytes.Buffer
+ generateGo(&b, g)
+ out, err = format.Source(b.Bytes())
+ if err != nil {
+ log.Fatal(err)
+ }
+ }
+
+ if *flagO != "" {
+ err = os.WriteFile(*flagO, out, 0666)
+ } else {
+ _, err = os.Stdout.Write(out)
+ }
+ if err != nil {
+ log.Fatal(err)
+ }
+}
+
+func generateGo(w io.Writer, g *dag.Graph) {
+ fmt.Fprintf(w, `// Code generated by mklockrank.go; DO NOT EDIT.
+
+package runtime
+
+type lockRank int
+
+`)
+
+ // Create numeric ranks.
+ topo := g.Topo()
+ for i, j := 0, len(topo)-1; i < j; i, j = i+1, j-1 {
+ topo[i], topo[j] = topo[j], topo[i]
+ }
+ fmt.Fprintf(w, `
+// Constants representing the ranks of all non-leaf runtime locks, in rank order.
+// Locks with lower rank must be taken before locks with higher rank,
+// in addition to satisfying the partial order in lockPartialOrder.
+// A few ranks allow self-cycles, which are specified in lockPartialOrder.
+const (
+ lockRankUnknown lockRank = iota
+
+`)
+ for _, rank := range topo {
+ if isPseudo(rank) {
+ fmt.Fprintf(w, "\t// %s\n", rank)
+ } else {
+ fmt.Fprintf(w, "\t%s\n", cname(rank))
+ }
+ }
+ fmt.Fprintf(w, `)
+
+// lockRankLeafRank is the rank of lock that does not have a declared rank,
+// and hence is a leaf lock.
+const lockRankLeafRank lockRank = 1000
+`)
+
+ // Create string table.
+ fmt.Fprintf(w, `
+// lockNames gives the names associated with each of the above ranks.
+var lockNames = []string{
+`)
+ for _, rank := range topo {
+ if !isPseudo(rank) {
+ fmt.Fprintf(w, "\t%s: %q,\n", cname(rank), rank)
+ }
+ }
+ fmt.Fprintf(w, `}
+
+func (rank lockRank) String() string {
+ if rank == 0 {
+ return "UNKNOWN"
+ }
+ if rank == lockRankLeafRank {
+ return "LEAF"
+ }
+ if rank < 0 || int(rank) >= len(lockNames) {
+ return "BAD RANK"
+ }
+ return lockNames[rank]
+}
+`)
+
+ // Create partial order structure.
+ fmt.Fprintf(w, `
+// lockPartialOrder is the transitive closure of the lock rank graph.
+// An entry for rank X lists all of the ranks that can already be held
+// when rank X is acquired.
+//
+// Lock ranks that allow self-cycles list themselves.
+var lockPartialOrder [][]lockRank = [][]lockRank{
+`)
+ for _, rank := range topo {
+ if isPseudo(rank) {
+ continue
+ }
+ list := []string{}
+ for _, before := range g.Edges(rank) {
+ if !isPseudo(before) {
+ list = append(list, cname(before))
+ }
+ }
+ if cyclicRanks[rank] {
+ list = append(list, cname(rank))
+ }
+
+ fmt.Fprintf(w, "\t%s: {%s},\n", cname(rank), strings.Join(list, ", "))
+ }
+ fmt.Fprintf(w, "}\n")
+}
+
+// cname returns the Go const name for the given lock rank label.
+func cname(label string) string {
+ return "lockRank" + strings.ToUpper(label[:1]) + label[1:]
+}
+
+func isPseudo(label string) bool {
+ return strings.ToUpper(label) == label
+}
+
+// generateDot emits a Graphviz dot representation of g to w.
+func generateDot(w io.Writer, g *dag.Graph) {
+ fmt.Fprintf(w, "digraph g {\n")
+
+ // Define all nodes.
+ for _, node := range g.Nodes {
+ fmt.Fprintf(w, "%q;\n", node)
+ }
+
+ // Create edges.
+ for _, node := range g.Nodes {
+ for _, to := range g.Edges(node) {
+ fmt.Fprintf(w, "%q -> %q;\n", node, to)
+ }
+ }
+
+ fmt.Fprintf(w, "}\n")
+}
diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go
index 28befcb..61d2d02 100644
--- a/src/runtime/mkpreempt.go
+++ b/src/runtime/mkpreempt.go
@@ -126,6 +126,9 @@
fmt.Fprintf(out, "//go:build %s || %sle\n\n", base, base)
}
fmt.Fprintf(out, "#include \"go_asm.h\"\n")
+ if arch == "amd64" {
+ fmt.Fprintf(out, "#include \"asm_amd64.h\"\n")
+ }
fmt.Fprintf(out, "#include \"textflag.h\"\n\n")
fmt.Fprintf(out, "TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0\n")
}
@@ -267,8 +270,10 @@
// Clear the upper bits to get to a clean state. See issue #37174.
// It is safe here as Go code don't use the upper bits of Y registers.
p("#ifdef GOOS_darwin")
+ p("#ifndef hasAVX")
p("CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0")
p("JE 2(PC)")
+ p("#endif")
p("VZEROUPPER")
p("#endif")
diff --git a/src/runtime/mmap.go b/src/runtime/mmap.go
index 3280a62..f0183f6 100644
--- a/src/runtime/mmap.go
+++ b/src/runtime/mmap.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build !aix && !darwin && !js && (!linux || !amd64) && (!linux || !arm64) && !openbsd && !plan9 && !solaris && !windows
+//go:build !aix && !darwin && !js && (!linux || !amd64) && (!linux || !arm64) && (!freebsd || !amd64) && !openbsd && !plan9 && !solaris && !windows
package runtime
diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go
index 5de25cf..35b2a01 100644
--- a/src/runtime/mpagealloc.go
+++ b/src/runtime/mpagealloc.go
@@ -48,7 +48,6 @@
package runtime
import (
- "runtime/internal/atomic"
"unsafe"
)
@@ -107,7 +106,7 @@
return chunkIdx((p - arenaBaseOffset) / pallocChunkBytes)
}
-// chunkIndex returns the base address of the palloc chunk at index ci.
+// chunkBase returns the base address of the palloc chunk at index ci.
func chunkBase(ci chunkIdx) uintptr {
return uintptr(ci)*pallocChunkBytes + arenaBaseOffset
}
@@ -267,25 +266,16 @@
// All access is protected by the mheapLock.
inUse addrRanges
- _ uint32 // Align scav so it's easier to reason about alignment within scav.
-
// scav stores the scavenger state.
scav struct {
// index is an efficient index of chunks that have pages available to
// scavenge.
index scavengeIndex
- // released is the amount of memory released this generation.
+ // released is the amount of memory released this scavenge cycle.
//
// Updated atomically.
released uintptr
-
- _ uint32 // Align assistTime for atomics on 32-bit platforms.
-
- // scavengeAssistTime is the time spent scavenging in the last GC cycle.
- //
- // This is reset once a GC cycle ends.
- assistTime atomic.Int64
}
// mheap_.lock. This level of indirection makes it possible
@@ -395,14 +385,13 @@
for c := chunkIndex(base); c < chunkIndex(limit); c++ {
if p.chunks[c.l1()] == nil {
// Create the necessary l2 entry.
- //
- // Store it atomically to avoid races with readers which
- // don't acquire the heap lock.
r := sysAlloc(unsafe.Sizeof(*p.chunks[0]), p.sysStat)
if r == nil {
throw("pageAlloc: out of memory")
}
- atomic.StorepNoWB(unsafe.Pointer(&p.chunks[c.l1()]), r)
+ // Store the new chunk block but avoid a write barrier.
+ // grow is used in call chains that disallow write barriers.
+ *(*uintptr)(unsafe.Pointer(&p.chunks[c.l1()])) = uintptr(r)
}
p.chunkOf(c).scavenged.setRange(0, pallocChunkPages)
}
@@ -678,7 +667,7 @@
// Determine j0, the first index we should start iterating from.
// The searchAddr may help us eliminate iterations if we followed the
- // searchAddr on the previous level or we're on the root leve, in which
+ // searchAddr on the previous level or we're on the root level, in which
// case the searchAddr should be the same as i after levelShift.
j0 := 0
if searchIdx := offAddrToLevelIndex(l, p.searchAddr); searchIdx&^(entriesPerBlock-1) == i {
diff --git a/src/runtime/mpagecache.go b/src/runtime/mpagecache.go
index 5bad4f7..5bc9c84 100644
--- a/src/runtime/mpagecache.go
+++ b/src/runtime/mpagecache.go
@@ -21,8 +21,7 @@
scav uint64 // 64-bit bitmap representing scavenged pages (1 means scavenged)
}
-// empty returns true if the pageCache has any free pages, and false
-// otherwise.
+// empty reports whether the page cache has no free pages.
func (c *pageCache) empty() bool {
return c.cache == 0
}
diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go
index 99a67b9..24f8889 100644
--- a/src/runtime/mprof.go
+++ b/src/runtime/mprof.go
@@ -10,6 +10,7 @@
import (
"internal/abi"
"runtime/internal/atomic"
+ "runtime/internal/sys"
"unsafe"
)
@@ -57,9 +58,8 @@
// creation, including its next and allnext links.
//
// No heap pointers.
-//
-//go:notinheap
type bucket struct {
+ _ sys.NotInHeap
next *bucket
allnext *bucket
typ bucketType // memBucket or blockBucket (includes mutexProfile)
@@ -510,10 +510,18 @@
bp := b.bp()
lock(&profBlockLock)
+ // We want to up-scale the count and cycles according to the
+ // probability that the event was sampled. For block profile events,
+ // the sample probability is 1 if cycles >= rate, and cycles / rate
+ // otherwise. For mutex profile events, the sample probability is 1 / rate.
+ // We scale the events by 1 / (probability the event was sampled).
if which == blockProfile && cycles < rate {
// Remove sampling bias, see discussion on http://golang.org/cl/299991.
bp.count += float64(rate) / float64(cycles)
bp.cycles += rate
+ } else if which == mutexProfile {
+ bp.count += float64(rate)
+ bp.cycles += rate * cycles
} else {
bp.count++
bp.cycles += cycles
@@ -584,17 +592,7 @@
// memory profiling rate should do so just once, as early as
// possible in the execution of the program (for example,
// at the beginning of main).
-var MemProfileRate int = defaultMemProfileRate(512 * 1024)
-
-// defaultMemProfileRate returns 0 if disableMemoryProfiling is set.
-// It exists primarily for the godoc rendering of MemProfileRate
-// above.
-func defaultMemProfileRate(v int) int {
- if disableMemoryProfiling {
- return 0
- }
- return v
-}
+var MemProfileRate int = 512 * 1024
// disableMemoryProfiling is set by the linker if runtime.MemProfile
// is not used and the link type guarantees nobody else could use it
@@ -917,7 +915,7 @@
// doesn't change during the collection. So, check the finalizer goroutine
// in particular.
n = int(gcount())
- if fingRunning {
+ if fingStatus.Load()&fingRunningFinalizer != 0 {
n++
}
diff --git a/src/runtime/mranges.go b/src/runtime/mranges.go
index 9cf83cc..4388d26 100644
--- a/src/runtime/mranges.go
+++ b/src/runtime/mranges.go
@@ -70,6 +70,30 @@
return a
}
+// takeFromFront takes len bytes from the front of the address range, aligning
+// the base to align first. On success, returns the aligned start of the region
+// taken and true.
+func (a *addrRange) takeFromFront(len uintptr, align uint8) (uintptr, bool) {
+ base := alignUp(a.base.addr(), uintptr(align)) + len
+ if base > a.limit.addr() {
+ return 0, false
+ }
+ a.base = offAddr{base}
+ return base - len, true
+}
+
+// takeFromBack takes len bytes from the end of the address range, aligning
+// the limit to align after subtracting len. On success, returns the aligned
+// start of the region taken and true.
+func (a *addrRange) takeFromBack(len uintptr, align uint8) (uintptr, bool) {
+ limit := alignDown(a.limit.addr()-len, uintptr(align))
+ if a.base.addr() > limit {
+ return 0, false
+ }
+ a.limit = offAddr{limit}
+ return limit, true
+}
+
// removeGreaterEqual removes all addresses in a greater than or equal
// to addr and returns the new range.
func (a addrRange) removeGreaterEqual(addr uintptr) addrRange {
diff --git a/src/runtime/msan.go b/src/runtime/msan.go
index c485216..5e2aae1 100644
--- a/src/runtime/msan.go
+++ b/src/runtime/msan.go
@@ -31,8 +31,8 @@
//
//go:nosplit
func msanread(addr unsafe.Pointer, sz uintptr) {
- g := getg()
- if g == nil || g.m == nil || g == g.m.g0 || g == g.m.gsignal {
+ gp := getg()
+ if gp == nil || gp.m == nil || gp == gp.m.g0 || gp == gp.m.gsignal {
return
}
domsanread(addr, sz)
diff --git a/src/runtime/msan/msan.go b/src/runtime/msan/msan.go
index f1bf4e1..4e41f85 100644
--- a/src/runtime/msan/msan.go
+++ b/src/runtime/msan/msan.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build msan && linux && (amd64 || arm64)
+//go:build msan && ((linux && (amd64 || arm64)) || (freebsd && amd64))
package msan
diff --git a/src/runtime/mspanset.go b/src/runtime/mspanset.go
index 4158495..abbd450 100644
--- a/src/runtime/mspanset.go
+++ b/src/runtime/mspanset.go
@@ -33,9 +33,9 @@
// anyway. (In principle, we could do this during STW.)
spineLock mutex
- spine unsafe.Pointer // *[N]*spanSetBlock, accessed atomically
- spineLen uintptr // Spine array length, accessed atomically
- spineCap uintptr // Spine array cap, accessed under lock
+ spine atomicSpanSetSpinePointer // *[N]atomic.Pointer[spanSetBlock]
+ spineLen atomic.Uintptr // Spine array length
+ spineCap uintptr // Spine array cap, accessed under spineLock
// index is the head and tail of the spanSet in a single field.
// The head and the tail both represent an index into the logical
@@ -48,7 +48,7 @@
// span in the heap were stored in this set, and each span were
// the minimum size (1 runtime page, 8 KiB), then roughly the
// smallest heap which would be unrepresentable is 32 TiB in size.
- index headTailIndex
+ index atomicHeadTailIndex
}
const (
@@ -63,10 +63,10 @@
// popped is the number of pop operations that have occurred on
// this block. This number is used to help determine when a block
// may be safely recycled.
- popped uint32
+ popped atomic.Uint32
// spans is the set of spans in this block.
- spans [spanSetBlockEntries]*mspan
+ spans [spanSetBlockEntries]atomicMSpanPointer
}
// push adds span s to buffer b. push is safe to call concurrently
@@ -77,25 +77,24 @@
top, bottom := cursor/spanSetBlockEntries, cursor%spanSetBlockEntries
// Do we need to add a block?
- spineLen := atomic.Loaduintptr(&b.spineLen)
+ spineLen := b.spineLen.Load()
var block *spanSetBlock
retry:
if top < spineLen {
- spine := atomic.Loadp(unsafe.Pointer(&b.spine))
- blockp := add(spine, goarch.PtrSize*top)
- block = (*spanSetBlock)(atomic.Loadp(blockp))
+ block = b.spine.Load().lookup(top).Load()
} else {
// Add a new block to the spine, potentially growing
// the spine.
lock(&b.spineLock)
// spineLen cannot change until we release the lock,
// but may have changed while we were waiting.
- spineLen = atomic.Loaduintptr(&b.spineLen)
+ spineLen = b.spineLen.Load()
if top < spineLen {
unlock(&b.spineLock)
goto retry
}
+ spine := b.spine.Load()
if spineLen == b.spineCap {
// Grow the spine.
newCap := b.spineCap * 2
@@ -106,10 +105,12 @@
if b.spineCap != 0 {
// Blocks are allocated off-heap, so
// no write barriers.
- memmove(newSpine, b.spine, b.spineCap*goarch.PtrSize)
+ memmove(newSpine, spine.p, b.spineCap*goarch.PtrSize)
}
+ spine = spanSetSpinePointer{newSpine}
+
// Spine is allocated off-heap, so no write barrier.
- atomic.StorepNoWB(unsafe.Pointer(&b.spine), newSpine)
+ b.spine.StoreNoWB(spine)
b.spineCap = newCap
// We can't immediately free the old spine
// since a concurrent push with a lower index
@@ -124,16 +125,15 @@
block = spanSetBlockPool.alloc()
// Add it to the spine.
- blockp := add(b.spine, goarch.PtrSize*top)
// Blocks are allocated off-heap, so no write barrier.
- atomic.StorepNoWB(blockp, unsafe.Pointer(block))
- atomic.Storeuintptr(&b.spineLen, spineLen+1)
+ spine.lookup(top).StoreNoWB(block)
+ b.spineLen.Store(spineLen + 1)
unlock(&b.spineLock)
}
// We have a block. Insert the span atomically, since there may be
// concurrent readers via the block API.
- atomic.StorepNoWB(unsafe.Pointer(&block.spans[bottom]), unsafe.Pointer(s))
+ block.spans[bottom].StoreNoWB(s)
}
// pop removes and returns a span from buffer b, or nil if b is empty.
@@ -150,7 +150,7 @@
}
// Check if the head position we want to claim is actually
// backed by a block.
- spineLen := atomic.Loaduintptr(&b.spineLen)
+ spineLen := b.spineLen.Load()
if spineLen <= uintptr(head)/spanSetBlockEntries {
// We're racing with a spine growth and the allocation of
// a new block (and maybe a new spine!), and trying to grab
@@ -180,24 +180,23 @@
// We may be reading a stale spine pointer, but because the length
// grows monotonically and we've already verified it, we'll definitely
// be reading from a valid block.
- spine := atomic.Loadp(unsafe.Pointer(&b.spine))
- blockp := add(spine, goarch.PtrSize*uintptr(top))
+ blockp := b.spine.Load().lookup(uintptr(top))
// Given that the spine length is correct, we know we will never
// see a nil block here, since the length is always updated after
// the block is set.
- block := (*spanSetBlock)(atomic.Loadp(blockp))
- s := (*mspan)(atomic.Loadp(unsafe.Pointer(&block.spans[bottom])))
+ block := blockp.Load()
+ s := block.spans[bottom].Load()
for s == nil {
// We raced with the span actually being set, but given that we
// know a block for this span exists, the race window here is
// extremely small. Try again.
- s = (*mspan)(atomic.Loadp(unsafe.Pointer(&block.spans[bottom])))
+ s = block.spans[bottom].Load()
}
// Clear the pointer. This isn't strictly necessary, but defensively
// avoids accidentally re-using blocks which could lead to memory
// corruption. This way, we'll get a nil pointer access instead.
- atomic.StorepNoWB(unsafe.Pointer(&block.spans[bottom]), nil)
+ block.spans[bottom].StoreNoWB(nil)
// Increase the popped count. If we are the last possible popper
// in the block (note that bottom need not equal spanSetBlockEntries-1
@@ -211,9 +210,9 @@
// pushers (there can't be any). Note that we may not be the popper
// which claimed the last slot in the block, we're just the last one
// to finish popping.
- if atomic.Xadd(&block.popped, 1) == spanSetBlockEntries {
+ if block.popped.Add(1) == spanSetBlockEntries {
// Clear the block's pointer.
- atomic.StorepNoWB(blockp, nil)
+ blockp.StoreNoWB(nil)
// Return the block to the block pool.
spanSetBlockPool.free(block)
@@ -235,23 +234,23 @@
throw("attempt to clear non-empty span set")
}
top := head / spanSetBlockEntries
- if uintptr(top) < b.spineLen {
+ if uintptr(top) < b.spineLen.Load() {
// If the head catches up to the tail and the set is empty,
// we may not clean up the block containing the head and tail
// since it may be pushed into again. In order to avoid leaking
// memory since we're going to reset the head and tail, clean
// up such a block now, if it exists.
- blockp := (**spanSetBlock)(add(b.spine, goarch.PtrSize*uintptr(top)))
- block := *blockp
+ blockp := b.spine.Load().lookup(uintptr(top))
+ block := blockp.Load()
if block != nil {
- // Sanity check the popped value.
- if block.popped == 0 {
+ // Check the popped value.
+ if block.popped.Load() == 0 {
// popped should never be zero because that means we have
// pushed at least one value but not yet popped if this
// block pointer is not nil.
throw("span set block with unpopped elements found in reset")
}
- if block.popped == spanSetBlockEntries {
+ if block.popped.Load() == spanSetBlockEntries {
// popped should also never be equal to spanSetBlockEntries
// because the last popper should have made the block pointer
// in this slot nil.
@@ -259,14 +258,45 @@
}
// Clear the pointer to the block.
- atomic.StorepNoWB(unsafe.Pointer(blockp), nil)
+ blockp.StoreNoWB(nil)
// Return the block to the block pool.
spanSetBlockPool.free(block)
}
}
b.index.reset()
- atomic.Storeuintptr(&b.spineLen, 0)
+ b.spineLen.Store(0)
+}
+
+// atomicSpanSetSpinePointer is an atomically-accessed spanSetSpinePointer.
+//
+// It has the same semantics as atomic.UnsafePointer.
+type atomicSpanSetSpinePointer struct {
+ a atomic.UnsafePointer
+}
+
+// Loads the spanSetSpinePointer and returns it.
+//
+// It has the same semantics as atomic.UnsafePointer.
+func (s *atomicSpanSetSpinePointer) Load() spanSetSpinePointer {
+ return spanSetSpinePointer{s.a.Load()}
+}
+
+// Stores the spanSetSpinePointer.
+//
+// It has the same semantics as atomic.UnsafePointer.
+func (s *atomicSpanSetSpinePointer) StoreNoWB(p spanSetSpinePointer) {
+ s.a.StoreNoWB(p.p)
+}
+
+// spanSetSpinePointer represents a pointer to a contiguous block of atomic.Pointer[spanSetBlock].
+type spanSetSpinePointer struct {
+ p unsafe.Pointer
+}
+
+// lookup returns &s[idx].
+func (s spanSetSpinePointer) lookup(idx uintptr) *atomic.Pointer[spanSetBlock] {
+ return (*atomic.Pointer[spanSetBlock])(add(unsafe.Pointer(s.p), goarch.PtrSize*idx))
}
// spanSetBlockPool is a global pool of spanSetBlocks.
@@ -288,7 +318,7 @@
// free returns a spanSetBlock back to the pool.
func (p *spanSetBlockAlloc) free(block *spanSetBlock) {
- atomic.Store(&block.popped, 0)
+ block.popped.Store(0)
p.stack.push(&block.lfnode)
}
@@ -317,29 +347,34 @@
return h.head(), h.tail()
}
+// atomicHeadTailIndex is an atomically-accessed headTailIndex.
+type atomicHeadTailIndex struct {
+ u atomic.Uint64
+}
+
// load atomically reads a headTailIndex value.
-func (h *headTailIndex) load() headTailIndex {
- return headTailIndex(atomic.Load64((*uint64)(h)))
+func (h *atomicHeadTailIndex) load() headTailIndex {
+ return headTailIndex(h.u.Load())
}
// cas atomically compares-and-swaps a headTailIndex value.
-func (h *headTailIndex) cas(old, new headTailIndex) bool {
- return atomic.Cas64((*uint64)(h), uint64(old), uint64(new))
+func (h *atomicHeadTailIndex) cas(old, new headTailIndex) bool {
+ return h.u.CompareAndSwap(uint64(old), uint64(new))
}
// incHead atomically increments the head of a headTailIndex.
-func (h *headTailIndex) incHead() headTailIndex {
- return headTailIndex(atomic.Xadd64((*uint64)(h), (1 << 32)))
+func (h *atomicHeadTailIndex) incHead() headTailIndex {
+ return headTailIndex(h.u.Add(1 << 32))
}
// decHead atomically decrements the head of a headTailIndex.
-func (h *headTailIndex) decHead() headTailIndex {
- return headTailIndex(atomic.Xadd64((*uint64)(h), -(1 << 32)))
+func (h *atomicHeadTailIndex) decHead() headTailIndex {
+ return headTailIndex(h.u.Add(-(1 << 32)))
}
// incTail atomically increments the tail of a headTailIndex.
-func (h *headTailIndex) incTail() headTailIndex {
- ht := headTailIndex(atomic.Xadd64((*uint64)(h), +1))
+func (h *atomicHeadTailIndex) incTail() headTailIndex {
+ ht := headTailIndex(h.u.Add(1))
// Check for overflow.
if ht.tail() == 0 {
print("runtime: head = ", ht.head(), ", tail = ", ht.tail(), "\n")
@@ -349,6 +384,21 @@
}
// reset clears the headTailIndex to (0, 0).
-func (h *headTailIndex) reset() {
- atomic.Store64((*uint64)(h), 0)
+func (h *atomicHeadTailIndex) reset() {
+ h.u.Store(0)
+}
+
+// atomicMSpanPointer is an atomic.Pointer[mspan]. Can't use generics because it's NotInHeap.
+type atomicMSpanPointer struct {
+ p atomic.UnsafePointer
+}
+
+// Load returns the *mspan.
+func (p *atomicMSpanPointer) Load() *mspan {
+ return (*mspan)(p.p.Load())
+}
+
+// Store stores an *mspan.
+func (p *atomicMSpanPointer) StoreNoWB(s *mspan) {
+ p.p.StoreNoWB(unsafe.Pointer(s))
}
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
index 0029ea9..3a5273f 100644
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -45,8 +45,6 @@
enablegc bool
- _ uint32 // ensure gcPauseDist is aligned.
-
// gcPauseDist represents the distribution of all GC-related
// application pauses in the runtime.
//
@@ -334,10 +332,6 @@
println(offset)
throw("memstats.heapStats not aligned to 8 bytes")
}
- if offset := unsafe.Offsetof(memstats.gcPauseDist); offset%8 != 0 {
- println(offset)
- throw("memstats.gcPauseDist not aligned to 8 bytes")
- }
// Ensure the size of heapStatsDelta causes adjacent fields/slots (e.g.
// [3]heapStatsDelta) to be 8-byte aligned.
if size := unsafe.Sizeof(heapStatsDelta{}); size%8 != 0 {
@@ -733,8 +727,7 @@
// gen represents the current index into which writers
// are writing, and can take on the value of 0, 1, or 2.
- // This value is updated atomically.
- gen uint32
+ gen atomic.Uint32
// noPLock is intended to provide mutual exclusion for updating
// stats when no P is available. It does not block other writers
@@ -763,7 +756,7 @@
//go:nosplit
func (m *consistentHeapStats) acquire() *heapStatsDelta {
if pp := getg().m.p.ptr(); pp != nil {
- seq := atomic.Xadd(&pp.statsSeq, 1)
+ seq := pp.statsSeq.Add(1)
if seq%2 == 0 {
// Should have been incremented to odd.
print("runtime: seq=", seq, "\n")
@@ -772,7 +765,7 @@
} else {
lock(&m.noPLock)
}
- gen := atomic.Load(&m.gen) % 3
+ gen := m.gen.Load() % 3
return &m.stats[gen]
}
@@ -792,7 +785,7 @@
//go:nosplit
func (m *consistentHeapStats) release() {
if pp := getg().m.p.ptr(); pp != nil {
- seq := atomic.Xadd(&pp.statsSeq, 1)
+ seq := pp.statsSeq.Add(1)
if seq%2 != 0 {
// Should have been incremented to even.
print("runtime: seq=", seq, "\n")
@@ -843,7 +836,7 @@
// Get the current generation. We can be confident that this
// will not change since read is serialized and is the only
// one that modifies currGen.
- currGen := atomic.Load(&m.gen)
+ currGen := m.gen.Load()
prevGen := currGen - 1
if currGen == 0 {
prevGen = 2
@@ -858,7 +851,7 @@
//
// This exchange is safe to do because we won't race
// with anyone else trying to update this value.
- atomic.Xchg(&m.gen, (currGen+1)%3)
+ m.gen.Swap((currGen + 1) % 3)
// Allow P-less writers to continue. They'll be writing to the
// next generation now.
@@ -866,7 +859,7 @@
for _, p := range allp {
// Spin until there are no more writers.
- for atomic.Load(&p.statsSeq)%2 != 0 {
+ for p.statsSeq.Load()%2 != 0 {
}
}
@@ -886,3 +879,25 @@
releasem(mp)
}
+
+type cpuStats struct {
+ // All fields are CPU time in nanoseconds computed by comparing
+ // calls of nanotime. This means they're all overestimates, because
+ // they don't accurately compute on-CPU time (so some of the time
+ // could be spent scheduled away by the OS).
+
+ gcAssistTime int64 // GC assists
+ gcDedicatedTime int64 // GC dedicated mark workers + pauses
+ gcIdleTime int64 // GC idle mark workers
+ gcPauseTime int64 // GC pauses (all GOMAXPROCS, even if just 1 is running)
+ gcTotalTime int64
+
+ scavengeAssistTime int64 // background scavenger
+ scavengeBgTime int64 // scavenge assists
+ scavengeTotalTime int64
+
+ idleTime int64 // Time Ps spent in _Pidle.
+ userTime int64 // Time Ps spent in _Prunning or _Psyscall that's not any of the above.
+
+ totalTime int64 // GOMAXPROCS * (monotonic wall clock time elapsed)
+}
diff --git a/src/runtime/mwbbuf.go b/src/runtime/mwbbuf.go
index 39ce0b4..3b7cbf8 100644
--- a/src/runtime/mwbbuf.go
+++ b/src/runtime/mwbbuf.go
@@ -212,22 +212,22 @@
//
//go:nowritebarrierrec
//go:systemstack
-func wbBufFlush1(_p_ *p) {
+func wbBufFlush1(pp *p) {
// Get the buffered pointers.
- start := uintptr(unsafe.Pointer(&_p_.wbBuf.buf[0]))
- n := (_p_.wbBuf.next - start) / unsafe.Sizeof(_p_.wbBuf.buf[0])
- ptrs := _p_.wbBuf.buf[:n]
+ start := uintptr(unsafe.Pointer(&pp.wbBuf.buf[0]))
+ n := (pp.wbBuf.next - start) / unsafe.Sizeof(pp.wbBuf.buf[0])
+ ptrs := pp.wbBuf.buf[:n]
// Poison the buffer to make extra sure nothing is enqueued
// while we're processing the buffer.
- _p_.wbBuf.next = 0
+ pp.wbBuf.next = 0
if useCheckmark {
// Slow path for checkmark mode.
for _, ptr := range ptrs {
shade(ptr)
}
- _p_.wbBuf.reset()
+ pp.wbBuf.reset()
return
}
@@ -245,7 +245,7 @@
// could track whether any un-shaded goroutine has used the
// buffer, or just track globally whether there are any
// un-shaded stacks and flush after each stack scan.
- gcw := &_p_.gcw
+ gcw := &pp.gcw
pos := 0
for _, ptr := range ptrs {
if ptr < minLegalPointer {
@@ -286,5 +286,5 @@
// Enqueue the greyed objects.
gcw.putBatch(ptrs[:pos])
- _p_.wbBuf.reset()
+ pp.wbBuf.reset()
}
diff --git a/src/runtime/nbpipe_fcntl_libc_test.go b/src/runtime/nbpipe_fcntl_libc_test.go
index a9c8987..170245d 100644
--- a/src/runtime/nbpipe_fcntl_libc_test.go
+++ b/src/runtime/nbpipe_fcntl_libc_test.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build aix || darwin || solaris
+//go:build aix || darwin || (openbsd && !mips64) || solaris
package runtime_test
diff --git a/src/runtime/nbpipe_fcntl_unix_test.go b/src/runtime/nbpipe_fcntl_unix_test.go
index 97607fa..b7252ea 100644
--- a/src/runtime/nbpipe_fcntl_unix_test.go
+++ b/src/runtime/nbpipe_fcntl_unix_test.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build dragonfly || freebsd || linux || netbsd || openbsd
+//go:build dragonfly || freebsd || linux || netbsd || (openbsd && mips64)
package runtime_test
diff --git a/src/runtime/netpoll.go b/src/runtime/netpoll.go
index ac6bc89..5ac1f37 100644
--- a/src/runtime/netpoll.go
+++ b/src/runtime/netpoll.go
@@ -8,6 +8,7 @@
import (
"runtime/internal/atomic"
+ "runtime/internal/sys"
"unsafe"
)
@@ -49,16 +50,17 @@
// goroutines respectively. The semaphore can be in the following states:
//
// pdReady - io readiness notification is pending;
-// a goroutine consumes the notification by changing the state to nil.
+// a goroutine consumes the notification by changing the state to pdNil.
// pdWait - a goroutine prepares to park on the semaphore, but not yet parked;
// the goroutine commits to park by changing the state to G pointer,
// or, alternatively, concurrent io notification changes the state to pdReady,
-// or, alternatively, concurrent timeout/close changes the state to nil.
+// or, alternatively, concurrent timeout/close changes the state to pdNil.
// G pointer - the goroutine is blocked on the semaphore;
-// io notification or timeout/close changes the state to pdReady or nil respectively
+// io notification or timeout/close changes the state to pdReady or pdNil respectively
// and unparks the goroutine.
-// nil - none of the above.
+// pdNil - none of the above.
const (
+ pdNil uintptr = 0
pdReady uintptr = 1
pdWait uintptr = 2
)
@@ -68,9 +70,8 @@
// Network poller descriptor.
//
// No heap pointers.
-//
-//go:notinheap
type pollDesc struct {
+ _ sys.NotInHeap
link *pollDesc // in pollcache, protected by pollcache.lock
fd uintptr // constant for pollDesc usage lifetime
@@ -93,8 +94,8 @@
// rg, wg are accessed atomically and hold g pointers.
// (Using atomic.Uintptr here is similar to using guintptr elsewhere.)
- rg atomic.Uintptr // pdReady, pdWait, G waiting for read or nil
- wg atomic.Uintptr // pdReady, pdWait, G waiting for write or nil
+ rg atomic.Uintptr // pdReady, pdWait, G waiting for read or pdNil
+ wg atomic.Uintptr // pdReady, pdWait, G waiting for write or pdNil
lock mutex // protects the following fields
closing bool
@@ -177,10 +178,10 @@
var (
netpollInitLock mutex
- netpollInited uint32
+ netpollInited atomic.Uint32
pollcache pollCache
- netpollWaiters uint32
+ netpollWaiters atomic.Uint32
)
//go:linkname poll_runtime_pollServerInit internal/poll.runtime_pollServerInit
@@ -189,19 +190,19 @@
}
func netpollGenericInit() {
- if atomic.Load(&netpollInited) == 0 {
+ if netpollInited.Load() == 0 {
lockInit(&netpollInitLock, lockRankNetpollInit)
lock(&netpollInitLock)
- if netpollInited == 0 {
+ if netpollInited.Load() == 0 {
netpollinit()
- atomic.Store(&netpollInited, 1)
+ netpollInited.Store(1)
}
unlock(&netpollInitLock)
}
}
func netpollinited() bool {
- return atomic.Load(&netpollInited) != 0
+ return netpollInited.Load() != 0
}
//go:linkname poll_runtime_isPollServerDescriptor internal/poll.runtime_isPollServerDescriptor
@@ -217,21 +218,21 @@
pd := pollcache.alloc()
lock(&pd.lock)
wg := pd.wg.Load()
- if wg != 0 && wg != pdReady {
+ if wg != pdNil && wg != pdReady {
throw("runtime: blocked write on free polldesc")
}
rg := pd.rg.Load()
- if rg != 0 && rg != pdReady {
+ if rg != pdNil && rg != pdReady {
throw("runtime: blocked read on free polldesc")
}
pd.fd = fd
pd.closing = false
pd.setEventErr(false)
pd.rseq++
- pd.rg.Store(0)
+ pd.rg.Store(pdNil)
pd.rd = 0
pd.wseq++
- pd.wg.Store(0)
+ pd.wg.Store(pdNil)
pd.wd = 0
pd.self = pd
pd.publishInfo()
@@ -251,11 +252,11 @@
throw("runtime: close polldesc w/o unblock")
}
wg := pd.wg.Load()
- if wg != 0 && wg != pdReady {
+ if wg != pdNil && wg != pdReady {
throw("runtime: blocked write on closing polldesc")
}
rg := pd.rg.Load()
- if rg != 0 && rg != pdReady {
+ if rg != pdNil && rg != pdReady {
throw("runtime: blocked read on closing polldesc")
}
netpollclose(pd.fd)
@@ -280,9 +281,9 @@
return errcode
}
if mode == 'r' {
- pd.rg.Store(0)
+ pd.rg.Store(pdNil)
} else if mode == 'w' {
- pd.wg.Store(0)
+ pd.wg.Store(pdNil)
}
return pollNoError
}
@@ -482,17 +483,17 @@
// Bump the count of goroutines waiting for the poller.
// The scheduler uses this to decide whether to block
// waiting for the poller if there is nothing else to do.
- atomic.Xadd(&netpollWaiters, 1)
+ netpollWaiters.Add(1)
}
return r
}
func netpollgoready(gp *g, traceskip int) {
- atomic.Xadd(&netpollWaiters, -1)
+ netpollWaiters.Add(-1)
goready(gp, traceskip+1)
}
-// returns true if IO is ready, or false if timedout or closed
+// returns true if IO is ready, or false if timed out or closed
// waitio - wait only for completed IO, ignore errors
// Concurrent calls to netpollblock in the same mode are forbidden, as pollDesc
// can hold only a single waiting goroutine for each mode.
@@ -505,16 +506,16 @@
// set the gpp semaphore to pdWait
for {
// Consume notification if already ready.
- if gpp.CompareAndSwap(pdReady, 0) {
+ if gpp.CompareAndSwap(pdReady, pdNil) {
return true
}
- if gpp.CompareAndSwap(0, pdWait) {
+ if gpp.CompareAndSwap(pdNil, pdWait) {
break
}
// Double check that this isn't corrupt; otherwise we'd loop
// forever.
- if v := gpp.Load(); v != pdReady && v != 0 {
+ if v := gpp.Load(); v != pdReady && v != pdNil {
throw("runtime: double wait")
}
}
@@ -526,7 +527,7 @@
gopark(netpollblockcommit, unsafe.Pointer(gpp), waitReasonIOWait, traceEvGoBlockNet, 5)
}
// be careful to not lose concurrent pdReady notification
- old := gpp.Swap(0)
+ old := gpp.Swap(pdNil)
if old > pdWait {
throw("runtime: corrupted polldesc")
}
@@ -544,7 +545,7 @@
if old == pdReady {
return nil
}
- if old == 0 && !ioready {
+ if old == pdNil && !ioready {
// Only set pdReady for ioready. runtime_pollWait
// will check for timeout/cancel before waiting.
return nil
@@ -555,7 +556,7 @@
}
if gpp.CompareAndSwap(old, new) {
if old == pdWait {
- old = 0
+ old = pdNil
}
return (*g)(unsafe.Pointer(old))
}
@@ -641,8 +642,8 @@
// makeArg converts pd to an interface{}.
// makeArg does not do any allocation. Normally, such
// a conversion requires an allocation because pointers to
-// go:notinheap types (which pollDesc is) must be stored
-// in interfaces indirectly. See issue 42076.
+// types which embed runtime/internal/sys.NotInHeap (which pollDesc is)
+// must be stored in interfaces indirectly. See issue 42076.
func (pd *pollDesc) makeArg() (i any) {
x := (*eface)(unsafe.Pointer(&i))
x._type = pdType
diff --git a/src/runtime/netpoll_aix.go b/src/runtime/netpoll_aix.go
index 22cc513..5184aad 100644
--- a/src/runtime/netpoll_aix.go
+++ b/src/runtime/netpoll_aix.go
@@ -45,7 +45,7 @@
wrwake int32
pendingUpdates int32
- netpollWakeSig uint32 // used to avoid duplicate calls of netpollBreak
+ netpollWakeSig atomic.Uint32 // used to avoid duplicate calls of netpollBreak
)
func netpollinit() {
@@ -135,10 +135,13 @@
// netpollBreak interrupts a poll.
func netpollBreak() {
- if atomic.Cas(&netpollWakeSig, 0, 1) {
- b := [1]byte{0}
- write(uintptr(wrwake), unsafe.Pointer(&b[0]), 1)
+ // Failing to cas indicates there is an in-flight wakeup, so we're done here.
+ if !netpollWakeSig.CompareAndSwap(0, 1) {
+ return
}
+
+ b := [1]byte{0}
+ write(uintptr(wrwake), unsafe.Pointer(&b[0]), 1)
}
// netpoll checks for ready network connections.
@@ -193,7 +196,7 @@
var b [1]byte
for read(rdwake, unsafe.Pointer(&b[0]), 1) == 1 {
}
- atomic.Store(&netpollWakeSig, 0)
+ netpollWakeSig.Store(0)
}
// Still look at the other fds even if the mode may have
// changed, as netpollBreak might have been called.
diff --git a/src/runtime/netpoll_epoll.go b/src/runtime/netpoll_epoll.go
index b7d6199..7164a59 100644
--- a/src/runtime/netpoll_epoll.go
+++ b/src/runtime/netpoll_epoll.go
@@ -8,49 +8,37 @@
import (
"runtime/internal/atomic"
+ "runtime/internal/syscall"
"unsafe"
)
-func epollcreate(size int32) int32
-func epollcreate1(flags int32) int32
-
-//go:noescape
-func epollctl(epfd, op, fd int32, ev *epollevent) int32
-
-//go:noescape
-func epollwait(epfd int32, ev *epollevent, nev, timeout int32) int32
-func closeonexec(fd int32)
-
var (
epfd int32 = -1 // epoll descriptor
netpollBreakRd, netpollBreakWr uintptr // for netpollBreak
- netpollWakeSig uint32 // used to avoid duplicate calls of netpollBreak
+ netpollWakeSig atomic.Uint32 // used to avoid duplicate calls of netpollBreak
)
func netpollinit() {
- epfd = epollcreate1(_EPOLL_CLOEXEC)
- if epfd < 0 {
- epfd = epollcreate(1024)
- if epfd < 0 {
- println("runtime: epollcreate failed with", -epfd)
- throw("runtime: netpollinit failed")
- }
- closeonexec(epfd)
- }
- r, w, errno := nonblockingPipe()
+ var errno uintptr
+ epfd, errno = syscall.EpollCreate1(syscall.EPOLL_CLOEXEC)
if errno != 0 {
- println("runtime: pipe failed with", -errno)
+ println("runtime: epollcreate failed with", errno)
+ throw("runtime: netpollinit failed")
+ }
+ r, w, errpipe := nonblockingPipe()
+ if errpipe != 0 {
+ println("runtime: pipe failed with", -errpipe)
throw("runtime: pipe failed")
}
- ev := epollevent{
- events: _EPOLLIN,
+ ev := syscall.EpollEvent{
+ Events: syscall.EPOLLIN,
}
- *(**uintptr)(unsafe.Pointer(&ev.data)) = &netpollBreakRd
- errno = epollctl(epfd, _EPOLL_CTL_ADD, r, &ev)
+ *(**uintptr)(unsafe.Pointer(&ev.Data)) = &netpollBreakRd
+ errno = syscall.EpollCtl(epfd, syscall.EPOLL_CTL_ADD, r, &ev)
if errno != 0 {
- println("runtime: epollctl failed with", -errno)
+ println("runtime: epollctl failed with", errno)
throw("runtime: epollctl failed")
}
netpollBreakRd = uintptr(r)
@@ -61,16 +49,16 @@
return fd == uintptr(epfd) || fd == netpollBreakRd || fd == netpollBreakWr
}
-func netpollopen(fd uintptr, pd *pollDesc) int32 {
- var ev epollevent
- ev.events = _EPOLLIN | _EPOLLOUT | _EPOLLRDHUP | _EPOLLET
- *(**pollDesc)(unsafe.Pointer(&ev.data)) = pd
- return -epollctl(epfd, _EPOLL_CTL_ADD, int32(fd), &ev)
+func netpollopen(fd uintptr, pd *pollDesc) uintptr {
+ var ev syscall.EpollEvent
+ ev.Events = syscall.EPOLLIN | syscall.EPOLLOUT | syscall.EPOLLRDHUP | syscall.EPOLLET
+ *(**pollDesc)(unsafe.Pointer(&ev.Data)) = pd
+ return syscall.EpollCtl(epfd, syscall.EPOLL_CTL_ADD, int32(fd), &ev)
}
-func netpollclose(fd uintptr) int32 {
- var ev epollevent
- return -epollctl(epfd, _EPOLL_CTL_DEL, int32(fd), &ev)
+func netpollclose(fd uintptr) uintptr {
+ var ev syscall.EpollEvent
+ return syscall.EpollCtl(epfd, syscall.EPOLL_CTL_DEL, int32(fd), &ev)
}
func netpollarm(pd *pollDesc, mode int) {
@@ -79,22 +67,25 @@
// netpollBreak interrupts an epollwait.
func netpollBreak() {
- if atomic.Cas(&netpollWakeSig, 0, 1) {
- for {
- var b byte
- n := write(netpollBreakWr, unsafe.Pointer(&b), 1)
- if n == 1 {
- break
- }
- if n == -_EINTR {
- continue
- }
- if n == -_EAGAIN {
- return
- }
- println("runtime: netpollBreak write failed with", -n)
- throw("runtime: netpollBreak write failed")
+ // Failing to cas indicates there is an in-flight wakeup, so we're done here.
+ if !netpollWakeSig.CompareAndSwap(0, 1) {
+ return
+ }
+
+ for {
+ var b byte
+ n := write(netpollBreakWr, unsafe.Pointer(&b), 1)
+ if n == 1 {
+ break
}
+ if n == -_EINTR {
+ continue
+ }
+ if n == -_EAGAIN {
+ return
+ }
+ println("runtime: netpollBreak write failed with", -n)
+ throw("runtime: netpollBreak write failed")
}
}
@@ -121,12 +112,12 @@
// 1e9 ms == ~11.5 days.
waitms = 1e9
}
- var events [128]epollevent
+ var events [128]syscall.EpollEvent
retry:
- n := epollwait(epfd, &events[0], int32(len(events)), waitms)
- if n < 0 {
- if n != -_EINTR {
- println("runtime: epollwait on fd", epfd, "failed with", -n)
+ n, errno := syscall.EpollWait(epfd, events[:], int32(len(events)), waitms)
+ if errno != 0 {
+ if errno != _EINTR {
+ println("runtime: epollwait on fd", epfd, "failed with", errno)
throw("runtime: netpoll failed")
}
// If a timed sleep was interrupted, just return to
@@ -138,14 +129,14 @@
}
var toRun gList
for i := int32(0); i < n; i++ {
- ev := &events[i]
- if ev.events == 0 {
+ ev := events[i]
+ if ev.Events == 0 {
continue
}
- if *(**uintptr)(unsafe.Pointer(&ev.data)) == &netpollBreakRd {
- if ev.events != _EPOLLIN {
- println("runtime: netpoll: break fd ready for", ev.events)
+ if *(**uintptr)(unsafe.Pointer(&ev.Data)) == &netpollBreakRd {
+ if ev.Events != syscall.EPOLLIN {
+ println("runtime: netpoll: break fd ready for", ev.Events)
throw("runtime: netpoll: break fd ready for something unexpected")
}
if delay != 0 {
@@ -154,21 +145,21 @@
// if blocking.
var tmp [16]byte
read(int32(netpollBreakRd), noescape(unsafe.Pointer(&tmp[0])), int32(len(tmp)))
- atomic.Store(&netpollWakeSig, 0)
+ netpollWakeSig.Store(0)
}
continue
}
var mode int32
- if ev.events&(_EPOLLIN|_EPOLLRDHUP|_EPOLLHUP|_EPOLLERR) != 0 {
+ if ev.Events&(syscall.EPOLLIN|syscall.EPOLLRDHUP|syscall.EPOLLHUP|syscall.EPOLLERR) != 0 {
mode += 'r'
}
- if ev.events&(_EPOLLOUT|_EPOLLHUP|_EPOLLERR) != 0 {
+ if ev.Events&(syscall.EPOLLOUT|syscall.EPOLLHUP|syscall.EPOLLERR) != 0 {
mode += 'w'
}
if mode != 0 {
- pd := *(**pollDesc)(unsafe.Pointer(&ev.data))
- pd.setEventErr(ev.events == _EPOLLERR)
+ pd := *(**pollDesc)(unsafe.Pointer(&ev.Data))
+ pd.setEventErr(ev.Events == syscall.EPOLLERR)
netpollready(&toRun, pd, mode)
}
}
diff --git a/src/runtime/netpoll_kqueue.go b/src/runtime/netpoll_kqueue.go
index 1694753..5ae77b5 100644
--- a/src/runtime/netpoll_kqueue.go
+++ b/src/runtime/netpoll_kqueue.go
@@ -18,7 +18,7 @@
netpollBreakRd, netpollBreakWr uintptr // for netpollBreak
- netpollWakeSig uint32 // used to avoid duplicate calls of netpollBreak
+ netpollWakeSig atomic.Uint32 // used to avoid duplicate calls of netpollBreak
)
func netpollinit() {
@@ -83,19 +83,22 @@
// netpollBreak interrupts a kevent.
func netpollBreak() {
- if atomic.Cas(&netpollWakeSig, 0, 1) {
- for {
- var b byte
- n := write(netpollBreakWr, unsafe.Pointer(&b), 1)
- if n == 1 || n == -_EAGAIN {
- break
- }
- if n == -_EINTR {
- continue
- }
- println("runtime: netpollBreak write failed with", -n)
- throw("runtime: netpollBreak write failed")
+ // Failing to cas indicates there is an in-flight wakeup, so we're done here.
+ if !netpollWakeSig.CompareAndSwap(0, 1) {
+ return
+ }
+
+ for {
+ var b byte
+ n := write(netpollBreakWr, unsafe.Pointer(&b), 1)
+ if n == 1 || n == -_EAGAIN {
+ break
}
+ if n == -_EINTR {
+ continue
+ }
+ println("runtime: netpollBreak write failed with", -n)
+ throw("runtime: netpollBreak write failed")
}
}
@@ -152,7 +155,7 @@
// if blocking.
var tmp [16]byte
read(int32(netpollBreakRd), noescape(unsafe.Pointer(&tmp[0])), int32(len(tmp)))
- atomic.Store(&netpollWakeSig, 0)
+ netpollWakeSig.Store(0)
}
continue
}
diff --git a/src/runtime/netpoll_solaris.go b/src/runtime/netpoll_solaris.go
index 6e545b3..d835cd9 100644
--- a/src/runtime/netpoll_solaris.go
+++ b/src/runtime/netpoll_solaris.go
@@ -88,7 +88,7 @@
libc_port_dissociate,
libc_port_getn,
libc_port_alert libcFunc
- netpollWakeSig uint32 // used to avoid duplicate calls of netpollBreak
+ netpollWakeSig atomic.Uint32 // used to avoid duplicate calls of netpollBreak
)
func errno() int32 {
@@ -191,17 +191,20 @@
// netpollBreak interrupts a port_getn wait.
func netpollBreak() {
- if atomic.Cas(&netpollWakeSig, 0, 1) {
- // Use port_alert to put portfd into alert mode.
- // This will wake up all threads sleeping in port_getn on portfd,
- // and cause their calls to port_getn to return immediately.
- // Further, until portfd is taken out of alert mode,
- // all calls to port_getn will return immediately.
- if port_alert(portfd, _PORT_ALERT_UPDATE, _POLLHUP, uintptr(unsafe.Pointer(&portfd))) < 0 {
- if e := errno(); e != _EBUSY {
- println("runtime: port_alert failed with", e)
- throw("runtime: netpoll: port_alert failed")
- }
+ // Failing to cas indicates there is an in-flight wakeup, so we're done here.
+ if !netpollWakeSig.CompareAndSwap(0, 1) {
+ return
+ }
+
+ // Use port_alert to put portfd into alert mode.
+ // This will wake up all threads sleeping in port_getn on portfd,
+ // and cause their calls to port_getn to return immediately.
+ // Further, until portfd is taken out of alert mode,
+ // all calls to port_getn will return immediately.
+ if port_alert(portfd, _PORT_ALERT_UPDATE, _POLLHUP, uintptr(unsafe.Pointer(&portfd))) < 0 {
+ if e := errno(); e != _EBUSY {
+ println("runtime: port_alert failed with", e)
+ throw("runtime: netpoll: port_alert failed")
}
}
}
@@ -274,7 +277,7 @@
println("runtime: port_alert failed with", e)
throw("runtime: netpoll: port_alert failed")
}
- atomic.Store(&netpollWakeSig, 0)
+ netpollWakeSig.Store(0)
}
continue
}
diff --git a/src/runtime/netpoll_stub.go b/src/runtime/netpoll_stub.go
index d0a63bc..14cf0c3 100644
--- a/src/runtime/netpoll_stub.go
+++ b/src/runtime/netpoll_stub.go
@@ -8,8 +8,8 @@
import "runtime/internal/atomic"
-var netpollInited uint32
-var netpollWaiters uint32
+var netpollInited atomic.Uint32
+var netpollWaiters atomic.Uint32
var netpollStubLock mutex
var netpollNote note
@@ -19,7 +19,7 @@
var netpollBroken bool
func netpollGenericInit() {
- atomic.Store(&netpollInited, 1)
+ netpollInited.Store(1)
}
func netpollBreak() {
@@ -57,5 +57,5 @@
}
func netpollinited() bool {
- return atomic.Load(&netpollInited) != 0
+ return netpollInited.Load() != 0
}
diff --git a/src/runtime/netpoll_windows.go b/src/runtime/netpoll_windows.go
index 4c1cd26..796bf1d 100644
--- a/src/runtime/netpoll_windows.go
+++ b/src/runtime/netpoll_windows.go
@@ -35,7 +35,7 @@
var (
iocphandle uintptr = _INVALID_HANDLE_VALUE // completion port io handle
- netpollWakeSig uint32 // used to avoid duplicate calls of netpollBreak
+ netpollWakeSig atomic.Uint32 // used to avoid duplicate calls of netpollBreak
)
func netpollinit() {
@@ -67,11 +67,14 @@
}
func netpollBreak() {
- if atomic.Cas(&netpollWakeSig, 0, 1) {
- if stdcall4(_PostQueuedCompletionStatus, iocphandle, 0, 0, 0) == 0 {
- println("runtime: netpoll: PostQueuedCompletionStatus failed (errno=", getlasterror(), ")")
- throw("runtime: netpoll: PostQueuedCompletionStatus failed")
- }
+ // Failing to cas indicates there is an in-flight wakeup, so we're done here.
+ if !netpollWakeSig.CompareAndSwap(0, 1) {
+ return
+ }
+
+ if stdcall4(_PostQueuedCompletionStatus, iocphandle, 0, 0, 0) == 0 {
+ println("runtime: netpoll: PostQueuedCompletionStatus failed (errno=", getlasterror(), ")")
+ throw("runtime: netpoll: PostQueuedCompletionStatus failed")
}
}
@@ -133,7 +136,7 @@
}
handlecompletion(&toRun, op, errno, qty)
} else {
- atomic.Store(&netpollWakeSig, 0)
+ netpollWakeSig.Store(0)
if delay == 0 {
// Forward the notification to the
// blocked poller.
diff --git a/src/runtime/os2_aix.go b/src/runtime/os2_aix.go
index 9ad1caa..2efc565 100644
--- a/src/runtime/os2_aix.go
+++ b/src/runtime/os2_aix.go
@@ -388,11 +388,11 @@
//go:nosplit
func exit(code int32) {
- _g_ := getg()
+ gp := getg()
// Check the validity of g because without a g during
// newosproc0.
- if _g_ != nil {
+ if gp != nil {
syscall1(&libc_exit, uintptr(code))
return
}
@@ -403,11 +403,11 @@
//go:nosplit
func write1(fd uintptr, p unsafe.Pointer, n int32) int32 {
- _g_ := getg()
+ gp := getg()
// Check the validity of g because without a g during
// newosproc0.
- if _g_ != nil {
+ if gp != nil {
r, errno := syscall3(&libc_write, uintptr(fd), uintptr(p), uintptr(n))
if int32(r) < 0 {
return -int32(errno)
@@ -493,11 +493,11 @@
//go:nosplit
func sigaction(sig uintptr, new, old *sigactiont) {
- _g_ := getg()
+ gp := getg()
// Check the validity of g because without a g during
// runtime.libpreinit.
- if _g_ != nil {
+ if gp != nil {
r, err := syscall3(&libc_sigaction, sig, uintptr(unsafe.Pointer(new)), uintptr(unsafe.Pointer(old)))
if int32(r) == -1 {
println("Sigaction failed for sig: ", sig, " with error:", hex(err))
@@ -645,11 +645,11 @@
//go:nosplit
func pthread_attr_init(attr *pthread_attr) int32 {
- _g_ := getg()
+ gp := getg()
// Check the validity of g because without a g during
// newosproc0.
- if _g_ != nil {
+ if gp != nil {
r, _ := syscall1(&libpthread_attr_init, uintptr(unsafe.Pointer(attr)))
return int32(r)
}
@@ -661,11 +661,11 @@
//go:nosplit
func pthread_attr_setdetachstate(attr *pthread_attr, state int32) int32 {
- _g_ := getg()
+ gp := getg()
// Check the validity of g because without a g during
// newosproc0.
- if _g_ != nil {
+ if gp != nil {
r, _ := syscall2(&libpthread_attr_setdetachstate, uintptr(unsafe.Pointer(attr)), uintptr(state))
return int32(r)
}
@@ -689,11 +689,11 @@
//go:nosplit
func pthread_attr_setstacksize(attr *pthread_attr, size uint64) int32 {
- _g_ := getg()
+ gp := getg()
// Check the validity of g because without a g during
// newosproc0.
- if _g_ != nil {
+ if gp != nil {
r, _ := syscall2(&libpthread_attr_setstacksize, uintptr(unsafe.Pointer(attr)), uintptr(size))
return int32(r)
}
@@ -705,11 +705,11 @@
//go:nosplit
func pthread_create(tid *pthread, attr *pthread_attr, fn *funcDescriptor, arg unsafe.Pointer) int32 {
- _g_ := getg()
+ gp := getg()
// Check the validity of g because without a g during
// newosproc0.
- if _g_ != nil {
+ if gp != nil {
r, _ := syscall4(&libpthread_create, uintptr(unsafe.Pointer(tid)), uintptr(unsafe.Pointer(attr)), uintptr(unsafe.Pointer(fn)), uintptr(arg))
return int32(r)
}
@@ -723,11 +723,11 @@
//go:nosplit
func sigprocmask(how int32, new, old *sigset) {
- _g_ := getg()
+ gp := getg()
// Check the validity of m because it might be called during a cgo
// callback early enough where m isn't available yet.
- if _g_ != nil && _g_.m != nil {
+ if gp != nil && gp.m != nil {
r, err := syscall3(&libpthread_sigthreadmask, uintptr(how), uintptr(unsafe.Pointer(new)), uintptr(unsafe.Pointer(old)))
if int32(r) != 0 {
println("syscall sigthreadmask failed: ", hex(err))
diff --git a/src/runtime/os3_plan9.go b/src/runtime/os3_plan9.go
index e901b3e..8c9cbe2 100644
--- a/src/runtime/os3_plan9.go
+++ b/src/runtime/os3_plan9.go
@@ -14,7 +14,9 @@
//
//go:nowritebarrierrec
func sighandler(_ureg *ureg, note *byte, gp *g) int {
- _g_ := getg()
+ gsignal := getg()
+ mp := gsignal.m
+
var t sigTabT
var docrash bool
var sig int
@@ -61,7 +63,7 @@
if flags&_SigPanic != 0 {
// Copy the error string from sigtramp's stack into m->notesig so
// we can reliably access it from the panic routines.
- memmove(unsafe.Pointer(_g_.m.notesig), unsafe.Pointer(note), uintptr(len(notestr)+1))
+ memmove(unsafe.Pointer(mp.notesig), unsafe.Pointer(note), uintptr(len(notestr)+1))
gp.sig = uint32(sig)
gp.sigpc = c.pc()
@@ -120,8 +122,8 @@
return _NCONT
}
Throw:
- _g_.m.throwing = throwTypeRuntime
- _g_.m.caughtsig.set(gp)
+ mp.throwing = throwTypeRuntime
+ mp.caughtsig.set(gp)
startpanic_m()
print(notestr, "\n")
print("PC=", hex(c.pc()), "\n")
diff --git a/src/runtime/os3_solaris.go b/src/runtime/os3_solaris.go
index 8c85b71..ffac4b6 100644
--- a/src/runtime/os3_solaris.go
+++ b/src/runtime/os3_solaris.go
@@ -7,6 +7,7 @@
import (
"internal/abi"
"internal/goarch"
+ "runtime/internal/atomic"
"unsafe"
)
@@ -171,18 +172,20 @@
// Disable signals during create, so that the new thread starts
// with signals disabled. It will enable them in minit.
sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
- ret = pthread_create(&tid, &attr, abi.FuncPCABI0(tstart_sysvicall), unsafe.Pointer(mp))
+ ret = retryOnEAGAIN(func() int32 {
+ return pthread_create(&tid, &attr, abi.FuncPCABI0(tstart_sysvicall), unsafe.Pointer(mp))
+ })
sigprocmask(_SIG_SETMASK, &oset, nil)
if ret != 0 {
print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", ret, ")\n")
- if ret == -_EAGAIN {
+ if ret == _EAGAIN {
println("runtime: may need to increase max user processes (ulimit -u)")
}
throw("newosproc")
}
}
-func exitThread(wait *uint32) {
+func exitThread(wait *atomic.Uint32) {
// We should never reach exitThread on Solaris because we let
// libc clean up threads.
throw("exitThread")
@@ -267,7 +270,7 @@
return *((*uintptr)(unsafe.Pointer(&sa._funcptr)))
}
-// setSignaltstackSP sets the ss_sp field of a stackt.
+// setSignalstackSP sets the ss_sp field of a stackt.
//
//go:nosplit
func setSignalstackSP(s *stackt, sp uintptr) {
@@ -308,18 +311,17 @@
}
var sem *semt
- _g_ := getg()
// Call libc's malloc rather than malloc. This will
// allocate space on the C heap. We can't call malloc
// here because it could cause a deadlock.
- _g_.m.libcall.fn = uintptr(unsafe.Pointer(&libc_malloc))
- _g_.m.libcall.n = 1
- _g_.m.scratch = mscratch{}
- _g_.m.scratch.v[0] = unsafe.Sizeof(*sem)
- _g_.m.libcall.args = uintptr(unsafe.Pointer(&_g_.m.scratch))
- asmcgocall(unsafe.Pointer(&asmsysvicall6x), unsafe.Pointer(&_g_.m.libcall))
- sem = (*semt)(unsafe.Pointer(_g_.m.libcall.r1))
+ mp.libcall.fn = uintptr(unsafe.Pointer(&libc_malloc))
+ mp.libcall.n = 1
+ mp.scratch = mscratch{}
+ mp.scratch.v[0] = unsafe.Sizeof(*sem)
+ mp.libcall.args = uintptr(unsafe.Pointer(&mp.scratch))
+ asmcgocall(unsafe.Pointer(&asmsysvicall6x), unsafe.Pointer(&mp.libcall))
+ sem = (*semt)(unsafe.Pointer(mp.libcall.r1))
if sem_init(sem, 0, 0) != 0 {
throw("sem_init")
}
diff --git a/src/runtime/os_aix.go b/src/runtime/os_aix.go
index 15e4929..e07c7f1 100644
--- a/src/runtime/os_aix.go
+++ b/src/runtime/os_aix.go
@@ -8,6 +8,7 @@
import (
"internal/abi"
+ "runtime/internal/atomic"
"unsafe"
)
@@ -110,17 +111,17 @@
)
if pthread_attr_init(&attr) != 0 {
- write(2, unsafe.Pointer(&failthreadcreate[0]), int32(len(failthreadcreate)))
+ writeErrStr(failthreadcreate)
exit(1)
}
if pthread_attr_setstacksize(&attr, threadStackSize) != 0 {
- write(2, unsafe.Pointer(&failthreadcreate[0]), int32(len(failthreadcreate)))
+ writeErrStr(failthreadcreate)
exit(1)
}
if pthread_attr_setdetachstate(&attr, _PTHREAD_CREATE_DETACHED) != 0 {
- write(2, unsafe.Pointer(&failthreadcreate[0]), int32(len(failthreadcreate)))
+ writeErrStr(failthreadcreate)
exit(1)
}
@@ -139,14 +140,12 @@
}
sigprocmask(_SIG_SETMASK, &oset, nil)
if ret != 0 {
- write(2, unsafe.Pointer(&failthreadcreate[0]), int32(len(failthreadcreate)))
+ writeErrStr(failthreadcreate)
exit(1)
}
}
-var failthreadcreate = []byte("runtime: failed to create new OS thread\n")
-
// Called to do synchronous initialization of Go code built with
// -buildmode=c-archive or -buildmode=c-shared.
// None of the Go runtime is initialized.
@@ -164,7 +163,7 @@
}
// errno address must be retrieved by calling _Errno libc function.
-// This will return a pointer to errno
+// This will return a pointer to errno.
func miniterrno() {
mp := getg().m
r, _ := syscall0(&libc__Errno)
@@ -212,16 +211,9 @@
// Disable signals during create, so that the new thread starts
// with signals disabled. It will enable them in minit.
sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
- var ret int32
- for tries := 0; tries < 20; tries++ {
- // pthread_create can fail with EAGAIN for no reasons
- // but it will be ok if it retries.
- ret = pthread_create(&tid, &attr, &tstart, unsafe.Pointer(mp))
- if ret != _EAGAIN {
- break
- }
- usleep(uint32(tries+1) * 1000) // Milliseconds.
- }
+ ret := retryOnEAGAIN(func() int32 {
+ return pthread_create(&tid, &attr, &tstart, unsafe.Pointer(mp))
+ })
sigprocmask(_SIG_SETMASK, &oset, nil)
if ret != 0 {
print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", ret, ")\n")
@@ -233,7 +225,7 @@
}
-func exitThread(wait *uint32) {
+func exitThread(wait *atomic.Uint32) {
// We should never reach exitThread on AIX because we let
// libc clean up threads.
throw("exitThread")
@@ -296,7 +288,7 @@
return sa.sa_handler
}
-// setSignaltstackSP sets the ss_sp field of a stackt.
+// setSignalstackSP sets the ss_sp field of a stackt.
//
//go:nosplit
func setSignalstackSP(s *stackt, sp uintptr) {
diff --git a/src/runtime/os_darwin.go b/src/runtime/os_darwin.go
index 8562d7d..c4f3bb6 100644
--- a/src/runtime/os_darwin.go
+++ b/src/runtime/os_darwin.go
@@ -136,6 +136,8 @@
ncpu = getncpu()
physPageSize = getPageSize()
+
+ osinit_hack()
}
func sysctlbynameInt32(name []byte) (int32, int32) {
@@ -208,21 +210,21 @@
var err int32
err = pthread_attr_init(&attr)
if err != 0 {
- write(2, unsafe.Pointer(&failthreadcreate[0]), int32(len(failthreadcreate)))
+ writeErrStr(failthreadcreate)
exit(1)
}
// Find out OS stack size for our own stack guard.
var stacksize uintptr
if pthread_attr_getstacksize(&attr, &stacksize) != 0 {
- write(2, unsafe.Pointer(&failthreadcreate[0]), int32(len(failthreadcreate)))
+ writeErrStr(failthreadcreate)
exit(1)
}
mp.g0.stack.hi = stacksize // for mstart
// Tell the pthread library we won't join with this thread.
if pthread_attr_setdetachstate(&attr, _PTHREAD_CREATE_DETACHED) != 0 {
- write(2, unsafe.Pointer(&failthreadcreate[0]), int32(len(failthreadcreate)))
+ writeErrStr(failthreadcreate)
exit(1)
}
@@ -230,10 +232,12 @@
// setup and then calls mstart.
var oset sigset
sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
- err = pthread_create(&attr, abi.FuncPCABI0(mstart_stub), unsafe.Pointer(mp))
+ err = retryOnEAGAIN(func() int32 {
+ return pthread_create(&attr, abi.FuncPCABI0(mstart_stub), unsafe.Pointer(mp))
+ })
sigprocmask(_SIG_SETMASK, &oset, nil)
if err != 0 {
- write(2, unsafe.Pointer(&failthreadcreate[0]), int32(len(failthreadcreate)))
+ writeErrStr(failthreadcreate)
exit(1)
}
}
@@ -253,7 +257,7 @@
var err int32
err = pthread_attr_init(&attr)
if err != 0 {
- write(2, unsafe.Pointer(&failthreadcreate[0]), int32(len(failthreadcreate)))
+ writeErrStr(failthreadcreate)
exit(1)
}
@@ -263,7 +267,7 @@
// we use the OS default stack size instead of the suggestion.
// Find out that stack size for our own stack guard.
if pthread_attr_getstacksize(&attr, &stacksize) != 0 {
- write(2, unsafe.Pointer(&failthreadcreate[0]), int32(len(failthreadcreate)))
+ writeErrStr(failthreadcreate)
exit(1)
}
g0.stack.hi = stacksize // for mstart
@@ -271,7 +275,7 @@
// Tell the pthread library we won't join with this thread.
if pthread_attr_setdetachstate(&attr, _PTHREAD_CREATE_DETACHED) != 0 {
- write(2, unsafe.Pointer(&failthreadcreate[0]), int32(len(failthreadcreate)))
+ writeErrStr(failthreadcreate)
exit(1)
}
@@ -282,14 +286,11 @@
err = pthread_create(&attr, fn, nil)
sigprocmask(_SIG_SETMASK, &oset, nil)
if err != 0 {
- write(2, unsafe.Pointer(&failthreadcreate[0]), int32(len(failthreadcreate)))
+ writeErrStr(failthreadcreate)
exit(1)
}
}
-var failallocatestack = []byte("runtime: failed to allocate stack for the new OS thread\n")
-var failthreadcreate = []byte("runtime: failed to create new OS thread\n")
-
// Called to do synchronous initialization of Go code built with
// -buildmode=c-archive or -buildmode=c-shared.
// None of the Go runtime is initialized.
@@ -412,7 +413,7 @@
return *(*uintptr)(unsafe.Pointer(&sa.__sigaction_u))
}
-// setSignaltstackSP sets the ss_sp field of a stackt.
+// setSignalstackSP sets the ss_sp field of a stackt.
//
//go:nosplit
func setSignalstackSP(s *stackt, sp uintptr) {
diff --git a/src/runtime/os_dragonfly.go b/src/runtime/os_dragonfly.go
index 8347814..e467578 100644
--- a/src/runtime/os_dragonfly.go
+++ b/src/runtime/os_dragonfly.go
@@ -162,7 +162,10 @@
}
// TODO: Check for error.
- lwp_create(¶ms)
+ retryOnEAGAIN(func() int32 {
+ lwp_create(¶ms)
+ return 0
+ })
sigprocmask(_SIG_SETMASK, &oset, nil)
}
@@ -248,7 +251,7 @@
return sa.sa_sigaction
}
-// setSignaltstackSP sets the ss_sp field of a stackt.
+// setSignalstackSP sets the ss_sp field of a stackt.
//
//go:nosplit
func setSignalstackSP(s *stackt, sp uintptr) {
diff --git a/src/runtime/os_freebsd.go b/src/runtime/os_freebsd.go
index 23efd1a..f53cb11 100644
--- a/src/runtime/os_freebsd.go
+++ b/src/runtime/os_freebsd.go
@@ -213,10 +213,14 @@
var oset sigset
sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
- ret := thr_new(¶m, int32(unsafe.Sizeof(param)))
+ ret := retryOnEAGAIN(func() int32 {
+ errno := thr_new(¶m, int32(unsafe.Sizeof(param)))
+ // thr_new returns negative errno
+ return -errno
+ })
sigprocmask(_SIG_SETMASK, &oset, nil)
- if ret < 0 {
- print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", -ret, ")\n")
+ if ret != 0 {
+ print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", ret, ")\n")
throw("newosproc")
}
}
@@ -227,7 +231,7 @@
func newosproc0(stacksize uintptr, fn unsafe.Pointer) {
stack := sysAlloc(stacksize, &memstats.stacks_sys)
if stack == nil {
- write(2, unsafe.Pointer(&failallocatestack[0]), int32(len(failallocatestack)))
+ writeErrStr(failallocatestack)
exit(1)
}
// This code "knows" it's being called once from the library
@@ -252,14 +256,11 @@
ret := thr_new(¶m, int32(unsafe.Sizeof(param)))
sigprocmask(_SIG_SETMASK, &oset, nil)
if ret < 0 {
- write(2, unsafe.Pointer(&failthreadcreate[0]), int32(len(failthreadcreate)))
+ writeErrStr(failthreadcreate)
exit(1)
}
}
-var failallocatestack = []byte("runtime: failed to allocate stack for the new OS thread\n")
-var failthreadcreate = []byte("runtime: failed to create new OS thread\n")
-
// Called to do synchronous initialization of Go code built with
// -buildmode=c-archive or -buildmode=c-shared.
// None of the Go runtime is initialized.
@@ -362,7 +363,7 @@
return sa.sa_handler
}
-// setSignaltstackSP sets the ss_sp field of a stackt.
+// setSignalstackSP sets the ss_sp field of a stackt.
//
//go:nosplit
func setSignalstackSP(s *stackt, sp uintptr) {
diff --git a/src/runtime/os_freebsd_riscv64.go b/src/runtime/os_freebsd_riscv64.go
new file mode 100644
index 0000000..0f2ed50
--- /dev/null
+++ b/src/runtime/os_freebsd_riscv64.go
@@ -0,0 +1,7 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+func osArchInit() {}
diff --git a/src/runtime/os_js.go b/src/runtime/os_js.go
index 34cc027..7481fb9 100644
--- a/src/runtime/os_js.go
+++ b/src/runtime/os_js.go
@@ -7,6 +7,7 @@
package runtime
import (
+ "runtime/internal/atomic"
"unsafe"
)
@@ -35,7 +36,7 @@
usleep(usec)
}
-func exitThread(wait *uint32)
+func exitThread(wait *atomic.Uint32)
type mOS struct{}
@@ -49,13 +50,13 @@
const _SIGSEGV = 0xb
func sigpanic() {
- g := getg()
- if !canpanic(g) {
+ gp := getg()
+ if !canpanic() {
throw("unexpected signal during runtime execution")
}
// js only invokes the exception handler for memory faults.
- g.sig = _SIGSEGV
+ gp.sig = _SIGSEGV
panicmem()
}
diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go
index 25aea65..3ad1e3b 100644
--- a/src/runtime/os_linux.go
+++ b/src/runtime/os_linux.go
@@ -21,12 +21,12 @@
// profileTimer holds the ID of the POSIX interval timer for profiling CPU
// usage on this thread.
//
- // It is valid when the profileTimerValid field is non-zero. A thread
+ // It is valid when the profileTimerValid field is true. A thread
// creates and manages its own timer, and these fields are read and written
// only by this thread. But because some of the reads on profileTimerValid
- // are in signal handling code, access to that field uses atomic operations.
+ // are in signal handling code, this field should be atomic type.
profileTimer int32
- profileTimerValid uint32
+ profileTimerValid atomic.Bool
// needPerThreadSyscall indicates that a per-thread syscall is required
// for doAllThreadsSyscall.
@@ -176,12 +176,20 @@
// with signals disabled. It will enable them in minit.
var oset sigset
sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
- ret := clone(cloneFlags, stk, unsafe.Pointer(mp), unsafe.Pointer(mp.g0), unsafe.Pointer(abi.FuncPCABI0(mstart)))
+ ret := retryOnEAGAIN(func() int32 {
+ r := clone(cloneFlags, stk, unsafe.Pointer(mp), unsafe.Pointer(mp.g0), unsafe.Pointer(abi.FuncPCABI0(mstart)))
+ // clone returns positive TID, negative errno.
+ // We don't care about the TID.
+ if r >= 0 {
+ return 0
+ }
+ return -r
+ })
sigprocmask(_SIG_SETMASK, &oset, nil)
- if ret < 0 {
- print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", -ret, ")\n")
- if ret == -_EAGAIN {
+ if ret != 0 {
+ print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", ret, ")\n")
+ if ret == _EAGAIN {
println("runtime: may need to increase max user processes (ulimit -u)")
}
throw("newosproc")
@@ -194,19 +202,16 @@
func newosproc0(stacksize uintptr, fn unsafe.Pointer) {
stack := sysAlloc(stacksize, &memstats.stacks_sys)
if stack == nil {
- write(2, unsafe.Pointer(&failallocatestack[0]), int32(len(failallocatestack)))
+ writeErrStr(failallocatestack)
exit(1)
}
ret := clone(cloneFlags, unsafe.Pointer(uintptr(stack)+stacksize), nil, nil, fn)
if ret < 0 {
- write(2, unsafe.Pointer(&failthreadcreate[0]), int32(len(failthreadcreate)))
+ writeErrStr(failthreadcreate)
exit(1)
}
}
-var failallocatestack = []byte("runtime: failed to allocate stack for the new OS thread\n")
-var failthreadcreate = []byte("runtime: failed to create new OS thread\n")
-
const (
_AT_NULL = 0 // End of vector
_AT_PAGESZ = 6 // System physical page size
@@ -504,7 +509,7 @@
return sa.sa_handler
}
-// setSignaltstackSP sets the ss_sp field of a stackt.
+// setSignalstackSP sets the ss_sp field of a stackt.
//
//go:nosplit
func setSignalstackSP(s *stackt, sp uintptr) {
@@ -593,7 +598,7 @@
// Having an M means the thread interacts with the Go scheduler, and we can
// check whether there's an active per-thread timer for this thread.
- if atomic.Load(&mp.profileTimerValid) != 0 {
+ if mp.profileTimerValid.Load() {
// If this M has its own per-thread CPU profiling interval timer, we
// should track the SIGPROF signals that come from that timer (for
// accurate reporting of its CPU usage; see issue 35057) and ignore any
@@ -619,9 +624,9 @@
}
// destroy any active timer
- if atomic.Load(&mp.profileTimerValid) != 0 {
+ if mp.profileTimerValid.Load() {
timerid := mp.profileTimer
- atomic.Store(&mp.profileTimerValid, 0)
+ mp.profileTimerValid.Store(false)
mp.profileTimer = 0
ret := timer_delete(timerid)
@@ -681,7 +686,7 @@
}
mp.profileTimer = timerid
- atomic.Store(&mp.profileTimerValid, 1)
+ mp.profileTimerValid.Store(true)
}
// perThreadSyscallArgs contains the system call number, arguments, and
@@ -880,9 +885,23 @@
}
if errno != 0 || r1 != args.r1 || r2 != args.r2 {
print("trap:", args.trap, ", a123456=[", args.a1, ",", args.a2, ",", args.a3, ",", args.a4, ",", args.a5, ",", args.a6, "]\n")
- print("results: got {r1=", r1, ",r2=", r2, ",errno=", errno, "}, want {r1=", args.r1, ",r2=", args.r2, ",errno=0\n")
+ print("results: got {r1=", r1, ",r2=", r2, ",errno=", errno, "}, want {r1=", args.r1, ",r2=", args.r2, ",errno=0}\n")
fatal("AllThreadsSyscall6 results differ between threads; runtime corrupted")
}
gp.m.needPerThreadSyscall.Store(0)
}
+
+const (
+ _SI_USER = 0
+ _SI_TKILL = -6
+)
+
+// sigFromUser reports whether the signal was sent because of a call
+// to kill or tgkill.
+//
+//go:nosplit
+func (c *sigctxt) sigFromUser() bool {
+ code := int32(c.sigcode())
+ return code == _SI_USER || code == _SI_TKILL
+}
diff --git a/src/runtime/os_linux_arm.go b/src/runtime/os_linux_arm.go
index b590da7..bd3ab44 100644
--- a/src/runtime/os_linux_arm.go
+++ b/src/runtime/os_linux_arm.go
@@ -11,6 +11,8 @@
_HWCAP_VFPv3 = 1 << 13 // introduced in 2.6.30
)
+func vdsoCall()
+
func checkgoarm() {
// On Android, /proc/self/auxv might be unreadable and hwcap won't
// reflect the CPU capabilities. Assume that every Android arm device
diff --git a/src/runtime/os_linux_be64.go b/src/runtime/os_linux_be64.go
index 537515f..d8d4ac2 100644
--- a/src/runtime/os_linux_be64.go
+++ b/src/runtime/os_linux_be64.go
@@ -11,7 +11,6 @@
const (
_SS_DISABLE = 2
_NSIG = 65
- _SI_USER = 0
_SIG_BLOCK = 0
_SIG_UNBLOCK = 1
_SIG_SETMASK = 2
diff --git a/src/runtime/os_linux_generic.go b/src/runtime/os_linux_generic.go
index bed9e66..15fafc1 100644
--- a/src/runtime/os_linux_generic.go
+++ b/src/runtime/os_linux_generic.go
@@ -9,7 +9,6 @@
const (
_SS_DISABLE = 2
_NSIG = 65
- _SI_USER = 0
_SIG_BLOCK = 0
_SIG_UNBLOCK = 1
_SIG_SETMASK = 2
diff --git a/src/runtime/os_linux_mips64x.go b/src/runtime/os_linux_mips64x.go
index 188db01..11d35bc 100644
--- a/src/runtime/os_linux_mips64x.go
+++ b/src/runtime/os_linux_mips64x.go
@@ -27,7 +27,6 @@
const (
_SS_DISABLE = 2
_NSIG = 129
- _SI_USER = 0
_SIG_BLOCK = 1
_SIG_UNBLOCK = 2
_SIG_SETMASK = 3
diff --git a/src/runtime/os_linux_mipsx.go b/src/runtime/os_linux_mipsx.go
index 73016f8..cdf83ff 100644
--- a/src/runtime/os_linux_mipsx.go
+++ b/src/runtime/os_linux_mipsx.go
@@ -21,7 +21,6 @@
const (
_SS_DISABLE = 2
_NSIG = 128 + 1
- _SI_USER = 0
_SIG_BLOCK = 1
_SIG_UNBLOCK = 2
_SIG_SETMASK = 3
diff --git a/src/runtime/os_netbsd.go b/src/runtime/os_netbsd.go
index 3cbace3..ce59618 100644
--- a/src/runtime/os_netbsd.go
+++ b/src/runtime/os_netbsd.go
@@ -152,16 +152,16 @@
//go:nosplit
func semasleep(ns int64) int32 {
- _g_ := getg()
+ gp := getg()
var deadline int64
if ns >= 0 {
deadline = nanotime() + ns
}
for {
- v := atomic.Load(&_g_.m.waitsemacount)
+ v := atomic.Load(&gp.m.waitsemacount)
if v > 0 {
- if atomic.Cas(&_g_.m.waitsemacount, v, v-1) {
+ if atomic.Cas(&gp.m.waitsemacount, v, v-1) {
return 0 // semaphore acquired
}
continue
@@ -178,7 +178,7 @@
ts.setNsec(wait)
tsp = &ts
}
- ret := lwp_park(_CLOCK_MONOTONIC, _TIMER_RELTIME, tsp, 0, unsafe.Pointer(&_g_.m.waitsemacount), nil)
+ ret := lwp_park(_CLOCK_MONOTONIC, _TIMER_RELTIME, tsp, 0, unsafe.Pointer(&gp.m.waitsemacount), nil)
if ret == _ETIMEDOUT {
return -1
}
@@ -227,11 +227,15 @@
lwp_mcontext_init(&uc.uc_mcontext, stk, mp, mp.g0, abi.FuncPCABI0(netbsdMstart))
- ret := lwp_create(unsafe.Pointer(&uc), _LWP_DETACHED, unsafe.Pointer(&mp.procid))
+ ret := retryOnEAGAIN(func() int32 {
+ errno := lwp_create(unsafe.Pointer(&uc), _LWP_DETACHED, unsafe.Pointer(&mp.procid))
+ // lwp_create returns negative errno
+ return -errno
+ })
sigprocmask(_SIG_SETMASK, &oset, nil)
- if ret < 0 {
- print("runtime: failed to create new OS thread (have ", mcount()-1, " already; errno=", -ret, ")\n")
- if ret == -_EAGAIN {
+ if ret != 0 {
+ print("runtime: failed to create new OS thread (have ", mcount()-1, " already; errno=", ret, ")\n")
+ if ret == _EAGAIN {
println("runtime: may need to increase max user processes (ulimit -p)")
}
throw("runtime.newosproc")
@@ -289,8 +293,8 @@
// Called to initialize a new m (including the bootstrap m).
// Called on the new thread, cannot allocate memory.
func minit() {
- _g_ := getg()
- _g_.m.procid = uint64(lwp_self())
+ gp := getg()
+ gp.m.procid = uint64(lwp_self())
// On NetBSD a thread created by pthread_create inherits the
// signal stack of the creating thread. We always create a
@@ -299,8 +303,8 @@
// created in C that calls sigaltstack and then calls a Go
// function, because we will lose track of the C code's
// sigaltstack, but it's the best we can do.
- signalstack(&_g_.m.gsignal.stack)
- _g_.m.newSigstack = true
+ signalstack(&gp.m.gsignal.stack)
+ gp.m.newSigstack = true
minitSignalMask()
}
@@ -352,7 +356,7 @@
return sa.sa_sigaction
}
-// setSignaltstackSP sets the ss_sp field of a stackt.
+// setSignalstackSP sets the ss_sp field of a stackt.
//
//go:nosplit
func setSignalstackSP(s *stackt, sp uintptr) {
diff --git a/src/runtime/os_openbsd.go b/src/runtime/os_openbsd.go
index 2383dc8..500286a 100644
--- a/src/runtime/os_openbsd.go
+++ b/src/runtime/os_openbsd.go
@@ -51,6 +51,21 @@
return out, true
}
+func sysctlUint64(mib []uint32) (uint64, bool) {
+ var out uint64
+ nout := unsafe.Sizeof(out)
+ ret := sysctl(&mib[0], uint32(len(mib)), (*byte)(unsafe.Pointer(&out)), &nout, nil, 0)
+ if ret < 0 {
+ return 0, false
+ }
+ return out, true
+}
+
+//go:linkname internal_cpu_sysctlUint64 internal/cpu.sysctlUint64
+func internal_cpu_sysctlUint64(mib []uint32) (uint64, bool) {
+ return sysctlUint64(mib)
+}
+
func getncpu() int32 {
// Try hw.ncpuonline first because hw.ncpu would report a number twice as
// high as the actual CPUs running on OpenBSD 6.4 with hyperthreading
@@ -84,7 +99,7 @@
//go:nosplit
func semasleep(ns int64) int32 {
- _g_ := getg()
+ gp := getg()
// Compute sleep deadline.
var tsp *timespec
@@ -95,9 +110,9 @@
}
for {
- v := atomic.Load(&_g_.m.waitsemacount)
+ v := atomic.Load(&gp.m.waitsemacount)
if v > 0 {
- if atomic.Cas(&_g_.m.waitsemacount, v, v-1) {
+ if atomic.Cas(&gp.m.waitsemacount, v, v-1) {
return 0 // semaphore acquired
}
continue
@@ -110,7 +125,7 @@
// be examined [...] immediately before blocking. If that int
// is non-zero then __thrsleep() will immediately return EINTR
// without blocking."
- ret := thrsleep(uintptr(unsafe.Pointer(&_g_.m.waitsemacount)), _CLOCK_MONOTONIC, tsp, 0, &_g_.m.waitsemacount)
+ ret := thrsleep(uintptr(unsafe.Pointer(&gp.m.waitsemacount)), _CLOCK_MONOTONIC, tsp, 0, &gp.m.waitsemacount)
if ret == _EWOULDBLOCK {
return -1
}
@@ -214,7 +229,7 @@
return sa.sa_sigaction
}
-// setSignaltstackSP sets the ss_sp field of a stackt.
+// setSignalstackSP sets the ss_sp field of a stackt.
//
//go:nosplit
func setSignalstackSP(s *stackt, sp uintptr) {
diff --git a/src/runtime/os_openbsd_libc.go b/src/runtime/os_openbsd_libc.go
index 4ad2a06..201f162 100644
--- a/src/runtime/os_openbsd_libc.go
+++ b/src/runtime/os_openbsd_libc.go
@@ -11,8 +11,6 @@
"unsafe"
)
-var failThreadCreate = []byte("runtime: failed to create new OS thread\n")
-
// mstart_stub provides glue code to call mstart from pthread_create.
func mstart_stub()
@@ -27,21 +25,21 @@
// Initialize an attribute object.
var attr pthreadattr
if err := pthread_attr_init(&attr); err != 0 {
- write(2, unsafe.Pointer(&failThreadCreate[0]), int32(len(failThreadCreate)))
+ writeErrStr(failthreadcreate)
exit(1)
}
// Find out OS stack size for our own stack guard.
var stacksize uintptr
if pthread_attr_getstacksize(&attr, &stacksize) != 0 {
- write(2, unsafe.Pointer(&failThreadCreate[0]), int32(len(failThreadCreate)))
+ writeErrStr(failthreadcreate)
exit(1)
}
mp.g0.stack.hi = stacksize // for mstart
// Tell the pthread library we won't join with this thread.
if pthread_attr_setdetachstate(&attr, _PTHREAD_CREATE_DETACHED) != 0 {
- write(2, unsafe.Pointer(&failThreadCreate[0]), int32(len(failThreadCreate)))
+ writeErrStr(failthreadcreate)
exit(1)
}
@@ -49,10 +47,12 @@
// setup and then calls mstart.
var oset sigset
sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
- err := pthread_create(&attr, abi.FuncPCABI0(mstart_stub), unsafe.Pointer(mp))
+ err := retryOnEAGAIN(func() int32 {
+ return pthread_create(&attr, abi.FuncPCABI0(mstart_stub), unsafe.Pointer(mp))
+ })
sigprocmask(_SIG_SETMASK, &oset, nil)
if err != 0 {
- write(2, unsafe.Pointer(&failThreadCreate[0]), int32(len(failThreadCreate)))
+ writeErrStr(failthreadcreate)
exit(1)
}
diff --git a/src/runtime/os_openbsd_syscall.go b/src/runtime/os_openbsd_syscall.go
index 9d67a7e..d784f76 100644
--- a/src/runtime/os_openbsd_syscall.go
+++ b/src/runtime/os_openbsd_syscall.go
@@ -34,12 +34,16 @@
var oset sigset
sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
- ret := tfork(¶m, unsafe.Sizeof(param), mp, mp.g0, abi.FuncPCABI0(mstart))
+ ret := retryOnEAGAIN(func() int32 {
+ errno := tfork(¶m, unsafe.Sizeof(param), mp, mp.g0, abi.FuncPCABI0(mstart))
+ // tfork returns negative errno
+ return -errno
+ })
sigprocmask(_SIG_SETMASK, &oset, nil)
- if ret < 0 {
- print("runtime: failed to create new OS thread (have ", mcount()-1, " already; errno=", -ret, ")\n")
- if ret == -_EAGAIN {
+ if ret != 0 {
+ print("runtime: failed to create new OS thread (have ", mcount()-1, " already; errno=", ret, ")\n")
+ if ret == _EAGAIN {
println("runtime: may need to increase max user processes (ulimit -p)")
}
throw("runtime.newosproc")
diff --git a/src/runtime/os_openbsd_syscall2.go b/src/runtime/os_openbsd_syscall2.go
index e4c9d2f..ebf478b 100644
--- a/src/runtime/os_openbsd_syscall2.go
+++ b/src/runtime/os_openbsd_syscall2.go
@@ -7,6 +7,7 @@
package runtime
import (
+ "runtime/internal/atomic"
"unsafe"
)
@@ -37,7 +38,7 @@
usleep(usec)
}
-// write calls the write system call.
+// write1 calls the write system call.
// It returns a non-negative number of bytes written or a negative errno value.
//
//go:noescape
@@ -46,14 +47,14 @@
//go:noescape
func open(name *byte, mode, perm int32) int32
-// return value is only set on linux to be used in osinit()
+// return value is only set on linux to be used in osinit().
func madvise(addr unsafe.Pointer, n uintptr, flags int32) int32
-// exitThread terminates the current thread, writing *wait = 0 when
+// exitThread terminates the current thread, writing *wait = freeMStack when
// the stack is safe to reclaim.
//
//go:noescape
-func exitThread(wait *uint32)
+func exitThread(wait *atomic.Uint32)
//go:noescape
func obsdsigprocmask(how int32, new sigset) sigset
diff --git a/src/runtime/os_plan9.go b/src/runtime/os_plan9.go
index f0e7c6a..5e5a63d 100644
--- a/src/runtime/os_plan9.go
+++ b/src/runtime/os_plan9.go
@@ -75,13 +75,13 @@
}
func sigpanic() {
- g := getg()
- if !canpanic(g) {
+ gp := getg()
+ if !canpanic() {
throw("unexpected signal during runtime execution")
}
- note := gostringnocopy((*byte)(unsafe.Pointer(g.m.notesig)))
- switch g.sig {
+ note := gostringnocopy((*byte)(unsafe.Pointer(gp.m.notesig)))
+ switch gp.sig {
case _SIGRFAULT, _SIGWFAULT:
i := indexNoFloat(note, "addr=")
if i >= 0 {
@@ -92,17 +92,24 @@
panicmem()
}
addr := note[i:]
- g.sigcode1 = uintptr(atolwhex(addr))
- if g.sigcode1 < 0x1000 {
+ gp.sigcode1 = uintptr(atolwhex(addr))
+ if gp.sigcode1 < 0x1000 {
panicmem()
}
- if g.paniconfault {
- panicmemAddr(g.sigcode1)
+ if gp.paniconfault {
+ panicmemAddr(gp.sigcode1)
}
- print("unexpected fault address ", hex(g.sigcode1), "\n")
+ if inUserArenaChunk(gp.sigcode1) {
+ // We could check that the arena chunk is explicitly set to fault,
+ // but the fact that we faulted on accessing it is enough to prove
+ // that it is.
+ print("accessed data from freed user arena ", hex(gp.sigcode1), "\n")
+ } else {
+ print("unexpected fault address ", hex(gp.sigcode1), "\n")
+ }
throw("fault")
case _SIGTRAP:
- if g.paniconfault {
+ if gp.paniconfault {
panicmem()
}
throw(note)
@@ -461,7 +468,7 @@
}
}
-func exitThread(wait *uint32) {
+func exitThread(wait *atomic.Uint32) {
// We should never reach exitThread on Plan 9 because we let
// the OS clean up threads.
throw("exitThread")
@@ -473,19 +480,19 @@
//go:nosplit
func semasleep(ns int64) int {
- _g_ := getg()
+ gp := getg()
if ns >= 0 {
ms := timediv(ns, 1000000, nil)
if ms == 0 {
ms = 1
}
- ret := plan9_tsemacquire(&_g_.m.waitsemacount, ms)
+ ret := plan9_tsemacquire(&gp.m.waitsemacount, ms)
if ret == 1 {
return 0 // success
}
return -1 // timeout or interrupted
}
- for plan9_semacquire(&_g_.m.waitsemacount, 1) < 0 {
+ for plan9_semacquire(&gp.m.waitsemacount, 1) < 0 {
// interrupted; try again (c.f. lock_sema.go)
}
return 0 // success
diff --git a/src/runtime/os_unix_nonlinux.go b/src/runtime/os_unix_nonlinux.go
new file mode 100644
index 0000000..b98753b
--- /dev/null
+++ b/src/runtime/os_unix_nonlinux.go
@@ -0,0 +1,15 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build unix && !linux
+
+package runtime
+
+// sigFromUser reports whether the signal was sent because of a call
+// to kill.
+//
+//go:nosplit
+func (c *sigctxt) sigFromUser() bool {
+ return c.sigcode() == _SI_USER
+}
diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go
index 2f6ec75..44718f1 100644
--- a/src/runtime/os_windows.go
+++ b/src/runtime/os_windows.go
@@ -941,7 +941,7 @@
throw("bad newosproc0")
}
-func exitThread(wait *uint32) {
+func exitThread(wait *atomic.Uint32) {
// We should never reach exitThread on Windows because we let
// the OS clean up threads.
throw("exitThread")
@@ -1326,7 +1326,7 @@
if !atomic.Cas(&mp.preemptExtLock, 0, 1) {
// External code is running. Fail the preemption
// attempt.
- atomic.Xadd(&mp.preemptGen, 1)
+ mp.preemptGen.Add(1)
return
}
@@ -1336,7 +1336,7 @@
// The M hasn't been minit'd yet (or was just unminit'd).
unlock(&mp.threadLock)
atomic.Store(&mp.preemptExtLock, 0)
- atomic.Xadd(&mp.preemptGen, 1)
+ mp.preemptGen.Add(1)
return
}
var thread uintptr
@@ -1366,7 +1366,7 @@
atomic.Store(&mp.preemptExtLock, 0)
// The thread no longer exists. This shouldn't be
// possible, but just acknowledge the request.
- atomic.Xadd(&mp.preemptGen, 1)
+ mp.preemptGen.Add(1)
return
}
@@ -1431,7 +1431,7 @@
atomic.Store(&mp.preemptExtLock, 0)
// Acknowledge the preemption.
- atomic.Xadd(&mp.preemptGen, 1)
+ mp.preemptGen.Add(1)
stdcall1(_ResumeThread, thread)
stdcall1(_CloseHandle, thread)
diff --git a/src/runtime/pagetrace_off.go b/src/runtime/pagetrace_off.go
new file mode 100644
index 0000000..10b44d4
--- /dev/null
+++ b/src/runtime/pagetrace_off.go
@@ -0,0 +1,28 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !goexperiment.pagetrace
+
+package runtime
+
+//go:systemstack
+func pageTraceAlloc(pp *p, now int64, base, npages uintptr) {
+}
+
+//go:systemstack
+func pageTraceFree(pp *p, now int64, base, npages uintptr) {
+}
+
+//go:systemstack
+func pageTraceScav(pp *p, now int64, base, npages uintptr) {
+}
+
+type pageTraceBuf struct {
+}
+
+func initPageTrace(env string) {
+}
+
+func finishPageTrace() {
+}
diff --git a/src/runtime/pagetrace_on.go b/src/runtime/pagetrace_on.go
new file mode 100644
index 0000000..0e621cb
--- /dev/null
+++ b/src/runtime/pagetrace_on.go
@@ -0,0 +1,358 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.pagetrace
+
+// Page tracer.
+//
+// This file contains an implementation of page trace instrumentation for tracking
+// the way the Go runtime manages pages of memory. The trace may be enabled at program
+// startup with the GODEBUG option pagetrace.
+//
+// Each page trace event is either 8 or 16 bytes wide. The first
+// 8 bytes follow this format for non-sync events:
+//
+// [16 timestamp delta][35 base address][10 npages][1 isLarge][2 pageTraceEventType]
+//
+// If the "large" bit is set then the event is 16 bytes wide with the second 8 byte word
+// containing the full npages value (the npages bitfield is 0).
+//
+// The base address's bottom pageShift bits are always zero hence why we can pack other
+// data in there. We ignore the top 16 bits, assuming a 48 bit address space for the
+// heap.
+//
+// The timestamp delta is computed from the difference between the current nanotime
+// timestamp and the last sync event's timestamp. The bottom pageTraceTimeLostBits of
+// this delta is removed and only the next pageTraceTimeDeltaBits are kept.
+//
+// A sync event is emitted at the beginning of each trace buffer and whenever the
+// timestamp delta would not fit in an event.
+//
+// Sync events have the following structure:
+//
+// [61 timestamp or P ID][1 isPID][2 pageTraceSyncEvent]
+//
+// In essence, the "large" bit repurposed to indicate whether it's a timestamp or a P ID
+// (these are typically uint32). Note that we only have 61 bits for the 64-bit timestamp,
+// but like for the delta we drop the bottom pageTraceTimeLostBits here as well.
+
+package runtime
+
+import (
+ "runtime/internal/sys"
+ "unsafe"
+)
+
+// pageTraceAlloc records a page trace allocation event.
+// pp may be nil. Call only if debug.pagetracefd != 0.
+//
+// Must run on the system stack as a crude way to prevent preemption.
+//
+//go:systemstack
+func pageTraceAlloc(pp *p, now int64, base, npages uintptr) {
+ if pageTrace.enabled {
+ if now == 0 {
+ now = nanotime()
+ }
+ pageTraceEmit(pp, now, base, npages, pageTraceAllocEvent)
+ }
+}
+
+// pageTraceFree records a page trace free event.
+// pp may be nil. Call only if debug.pagetracefd != 0.
+//
+// Must run on the system stack as a crude way to prevent preemption.
+//
+//go:systemstack
+func pageTraceFree(pp *p, now int64, base, npages uintptr) {
+ if pageTrace.enabled {
+ if now == 0 {
+ now = nanotime()
+ }
+ pageTraceEmit(pp, now, base, npages, pageTraceFreeEvent)
+ }
+}
+
+// pageTraceScav records a page trace scavenge event.
+// pp may be nil. Call only if debug.pagetracefd != 0.
+//
+// Must run on the system stack as a crude way to prevent preemption.
+//
+//go:systemstack
+func pageTraceScav(pp *p, now int64, base, npages uintptr) {
+ if pageTrace.enabled {
+ if now == 0 {
+ now = nanotime()
+ }
+ pageTraceEmit(pp, now, base, npages, pageTraceScavEvent)
+ }
+}
+
+// pageTraceEventType is a page trace event type.
+type pageTraceEventType uint8
+
+const (
+ pageTraceSyncEvent pageTraceEventType = iota // Timestamp emission.
+ pageTraceAllocEvent // Allocation of pages.
+ pageTraceFreeEvent // Freeing pages.
+ pageTraceScavEvent // Scavenging pages.
+)
+
+// pageTraceEmit emits a page trace event.
+//
+// Must run on the system stack as a crude way to prevent preemption.
+//
+//go:systemstack
+func pageTraceEmit(pp *p, now int64, base, npages uintptr, typ pageTraceEventType) {
+ // Get a buffer.
+ var tbp *pageTraceBuf
+ pid := int32(-1)
+ if pp == nil {
+ // We have no P, so take the global buffer.
+ lock(&pageTrace.lock)
+ tbp = &pageTrace.buf
+ } else {
+ tbp = &pp.pageTraceBuf
+ pid = pp.id
+ }
+
+ // Initialize the buffer if necessary.
+ tb := *tbp
+ if tb.buf == nil {
+ tb.buf = (*pageTraceEvents)(sysAlloc(pageTraceBufSize, &memstats.other_sys))
+ tb = tb.writePid(pid)
+ }
+
+ // Handle timestamp and emit a sync event if necessary.
+ if now < tb.timeBase {
+ now = tb.timeBase
+ }
+ if now-tb.timeBase >= pageTraceTimeMaxDelta {
+ tb.timeBase = now
+ tb = tb.writeSync(pid)
+ }
+
+ // Emit the event.
+ tb = tb.writeEvent(pid, now, base, npages, typ)
+
+ // Write back the buffer.
+ *tbp = tb
+ if pp == nil {
+ unlock(&pageTrace.lock)
+ }
+}
+
+const (
+ pageTraceBufSize = 32 << 10
+
+ // These constants describe the per-event timestamp delta encoding.
+ pageTraceTimeLostBits = 7 // How many bits of precision we lose in the delta.
+ pageTraceTimeDeltaBits = 16 // Size of the delta in bits.
+ pageTraceTimeMaxDelta = 1 << (pageTraceTimeLostBits + pageTraceTimeDeltaBits)
+)
+
+// pageTraceEvents is the low-level buffer containing the trace data.
+type pageTraceEvents struct {
+ _ sys.NotInHeap
+ events [pageTraceBufSize / 8]uint64
+}
+
+// pageTraceBuf is a wrapper around pageTraceEvents that knows how to write events
+// to the buffer. It tracks state necessary to do so.
+type pageTraceBuf struct {
+ buf *pageTraceEvents
+ len int // How many events have been written so far.
+ timeBase int64 // The current timestamp base from which deltas are produced.
+ finished bool // Whether this trace buf should no longer flush anything out.
+}
+
+// writePid writes a P ID event indicating which P we're running on.
+//
+// Assumes there's always space in the buffer since this is only called at the
+// beginning of a new buffer.
+//
+// Must run on the system stack as a crude way to prevent preemption.
+//
+//go:systemstack
+func (tb pageTraceBuf) writePid(pid int32) pageTraceBuf {
+ e := uint64(int64(pid))<<3 | 0b100 | uint64(pageTraceSyncEvent)
+ tb.buf.events[tb.len] = e
+ tb.len++
+ return tb
+}
+
+// writeSync writes a sync event, which is just a timestamp. Handles flushing.
+//
+// Must run on the system stack as a crude way to prevent preemption.
+//
+//go:systemstack
+func (tb pageTraceBuf) writeSync(pid int32) pageTraceBuf {
+ if tb.len+1 > len(tb.buf.events) {
+ // N.B. flush will writeSync again.
+ return tb.flush(pid, tb.timeBase)
+ }
+ e := ((uint64(tb.timeBase) >> pageTraceTimeLostBits) << 3) | uint64(pageTraceSyncEvent)
+ tb.buf.events[tb.len] = e
+ tb.len++
+ return tb
+}
+
+// writeEvent handles writing all non-sync and non-pid events. Handles flushing if necessary.
+//
+// pid indicates the P we're currently running on. Necessary in case we need to flush.
+// now is the current nanotime timestamp.
+// base is the base address of whatever group of pages this event is happening to.
+// npages is the length of the group of pages this event is happening to.
+// typ is the event that's happening to these pages.
+//
+// Must run on the system stack as a crude way to prevent preemption.
+//
+//go:systemstack
+func (tb pageTraceBuf) writeEvent(pid int32, now int64, base, npages uintptr, typ pageTraceEventType) pageTraceBuf {
+ large := 0
+ np := npages
+ if npages >= 1024 {
+ large = 1
+ np = 0
+ }
+ if tb.len+1+large > len(tb.buf.events) {
+ tb = tb.flush(pid, now)
+ }
+ if base%pageSize != 0 {
+ throw("base address not page aligned")
+ }
+ e := uint64(base)
+ // The pageShift low-order bits are zero.
+ e |= uint64(typ) // 2 bits
+ e |= uint64(large) << 2 // 1 bit
+ e |= uint64(np) << 3 // 10 bits
+ // Write the timestamp delta in the upper pageTraceTimeDeltaBits.
+ e |= uint64((now-tb.timeBase)>>pageTraceTimeLostBits) << (64 - pageTraceTimeDeltaBits)
+ tb.buf.events[tb.len] = e
+ if large != 0 {
+ // npages doesn't fit in 10 bits, so write an additional word with that data.
+ tb.buf.events[tb.len+1] = uint64(npages)
+ }
+ tb.len += 1 + large
+ return tb
+}
+
+// flush writes out the contents of the buffer to pageTrace.fd and resets the buffer.
+// It then writes out a P ID event and the first sync event for the new buffer.
+//
+// Must run on the system stack as a crude way to prevent preemption.
+//
+//go:systemstack
+func (tb pageTraceBuf) flush(pid int32, now int64) pageTraceBuf {
+ if !tb.finished {
+ lock(&pageTrace.fdLock)
+ writeFull(uintptr(pageTrace.fd), (*byte)(unsafe.Pointer(&tb.buf.events[0])), tb.len*8)
+ unlock(&pageTrace.fdLock)
+ }
+ tb.len = 0
+ tb.timeBase = now
+ return tb.writePid(pid).writeSync(pid)
+}
+
+var pageTrace struct {
+ // enabled indicates whether tracing is enabled. If true, fd >= 0.
+ //
+ // Safe to read without synchronization because it's only set once
+ // at program initialization.
+ enabled bool
+
+ // buf is the page trace buffer used if there is no P.
+ //
+ // lock protects buf.
+ lock mutex
+ buf pageTraceBuf
+
+ // fdLock protects writing to fd.
+ //
+ // fd is the file to write the page trace to.
+ fdLock mutex
+ fd int32
+}
+
+// initPageTrace initializes the page tracing infrastructure from GODEBUG.
+//
+// env must be the value of the GODEBUG environment variable.
+func initPageTrace(env string) {
+ var value string
+ for env != "" {
+ elt, rest := env, ""
+ for i := 0; i < len(env); i++ {
+ if env[i] == ',' {
+ elt, rest = env[:i], env[i+1:]
+ break
+ }
+ }
+ env = rest
+ if hasPrefix(elt, "pagetrace=") {
+ value = elt[len("pagetrace="):]
+ break
+ }
+ }
+ pageTrace.fd = -1
+ if canCreateFile && value != "" {
+ var tmp [4096]byte
+ if len(value) != 0 && len(value) < 4096 {
+ copy(tmp[:], value)
+ pageTrace.fd = create(&tmp[0], 0o664)
+ }
+ }
+ pageTrace.enabled = pageTrace.fd >= 0
+}
+
+// finishPageTrace flushes all P's trace buffers and disables page tracing.
+func finishPageTrace() {
+ if !pageTrace.enabled {
+ return
+ }
+ // Grab worldsema as we're about to execute a ragged barrier.
+ semacquire(&worldsema)
+ systemstack(func() {
+ // Disable tracing. This isn't strictly necessary and it's best-effort.
+ pageTrace.enabled = false
+
+ // Execute a ragged barrier, flushing each trace buffer.
+ forEachP(func(pp *p) {
+ if pp.pageTraceBuf.buf != nil {
+ pp.pageTraceBuf = pp.pageTraceBuf.flush(pp.id, nanotime())
+ }
+ pp.pageTraceBuf.finished = true
+ })
+
+ // Write the global have-no-P buffer.
+ lock(&pageTrace.lock)
+ if pageTrace.buf.buf != nil {
+ pageTrace.buf = pageTrace.buf.flush(-1, nanotime())
+ }
+ pageTrace.buf.finished = true
+ unlock(&pageTrace.lock)
+
+ // Safely close the file as nothing else should be allowed to write to the fd.
+ lock(&pageTrace.fdLock)
+ closefd(pageTrace.fd)
+ pageTrace.fd = -1
+ unlock(&pageTrace.fdLock)
+ })
+ semrelease(&worldsema)
+}
+
+// writeFull ensures that a complete write of bn bytes from b is made to fd.
+func writeFull(fd uintptr, b *byte, bn int) {
+ for bn > 0 {
+ n := write(fd, unsafe.Pointer(b), int32(bn))
+ if n == -_EINTR || n == -_EAGAIN {
+ continue
+ }
+ if n < 0 {
+ print("errno=", -n, "\n")
+ throw("writeBytes: bad write")
+ }
+ bn -= int(n)
+ b = addb(b, uintptr(n))
+ }
+}
diff --git a/src/runtime/panic.go b/src/runtime/panic.go
index 121f202..26618db 100644
--- a/src/runtime/panic.go
+++ b/src/runtime/panic.go
@@ -197,9 +197,9 @@
panic(boundsError{x: int64(x), signed: false, y: y, code: boundsSlice3C})
}
-// failures in the conversion (*[x]T)s, 0 <= x <= y, x == cap(s)
+// failures in the conversion ([x]T)(s) or (*[x]T)(s), 0 <= x <= y, y == len(s)
func goPanicSliceConvert(x int, y int) {
- panicCheck1(getcallerpc(), "slice length too short to convert to pointer to array")
+ panicCheck1(getcallerpc(), "slice length too short to convert to array or pointer to array")
panic(boundsError{x: int64(x), signed: true, y: y, code: boundsConvert})
}
@@ -457,7 +457,7 @@
return
}
if d.openDefer {
- done := runOpenDeferFrame(gp, d)
+ done := runOpenDeferFrame(d)
if !done {
throw("unfinished open-coded defers in deferreturn")
}
@@ -519,7 +519,7 @@
d.started = true
d._panic = (*_panic)(noescape(unsafe.Pointer(&p)))
if d.openDefer {
- done := runOpenDeferFrame(gp, d)
+ done := runOpenDeferFrame(d)
if !done {
// We should always run all defers in the frame,
// since there is no panic associated with this
@@ -744,7 +744,7 @@
// d. It normally processes all active defers in the frame, but stops immediately
// if a defer does a successful recover. It returns true if there are no
// remaining defers to run in the frame.
-func runOpenDeferFrame(gp *g, d *_defer) bool {
+func runOpenDeferFrame(d *_defer) bool {
done := true
fd := d.fd
@@ -837,7 +837,7 @@
p.link = gp._panic
gp._panic = (*_panic)(noescape(unsafe.Pointer(&p)))
- atomic.Xadd(&runningPanicDefers, 1)
+ runningPanicDefers.Add(1)
// By calculating getcallerpc/getcallersp here, we avoid scanning the
// gopanic frame (stack scanning is slow...)
@@ -881,7 +881,7 @@
done := true
if d.openDefer {
- done = runOpenDeferFrame(gp, d)
+ done = runOpenDeferFrame(d)
if done && !d._panic.recovered {
addOneOpenDeferFrame(gp, 0, nil)
}
@@ -917,7 +917,7 @@
mcall(recovery)
throw("bypassed recovery failed") // mcall should not return
}
- atomic.Xadd(&runningPanicDefers, -1)
+ runningPanicDefers.Add(-1)
// After a recover, remove any remaining non-started,
// open-coded defer entries, since the corresponding defers
@@ -1067,13 +1067,11 @@
}
// runningPanicDefers is non-zero while running deferred functions for panic.
-// runningPanicDefers is incremented and decremented atomically.
// This is used to try hard to get a panic stack trace out when exiting.
-var runningPanicDefers uint32
+var runningPanicDefers atomic.Uint32
// panicking is non-zero when crashing the program for an unrecovered panic.
-// panicking is incremented and decremented atomically.
-var panicking uint32
+var panicking atomic.Uint32
// paniclk is held while printing the panic information and stack trace,
// so that two concurrent panics don't overlap their output.
@@ -1155,7 +1153,7 @@
// startpanic_m set panicking, which will
// block main from exiting, so now OK to
// decrement runningPanicDefers.
- atomic.Xadd(&runningPanicDefers, -1)
+ runningPanicDefers.Add(-1)
printpanics(msgs)
}
@@ -1190,7 +1188,7 @@
//
//go:nowritebarrierrec
func startpanic_m() bool {
- _g_ := getg()
+ gp := getg()
if mheap_.cachealloc.size == 0 { // very early
print("runtime: panic before malloc heap initialized\n")
}
@@ -1198,19 +1196,19 @@
// could happen in a signal handler, or in a throw, or inside
// malloc itself. We want to catch if an allocation ever does
// happen (even if we're not in one of these situations).
- _g_.m.mallocing++
+ gp.m.mallocing++
// If we're dying because of a bad lock count, set it to a
// good lock count so we don't recursively panic below.
- if _g_.m.locks < 0 {
- _g_.m.locks = 1
+ if gp.m.locks < 0 {
+ gp.m.locks = 1
}
- switch _g_.m.dying {
+ switch gp.m.dying {
case 0:
// Setting dying >0 has the side-effect of disabling this G's writebuf.
- _g_.m.dying = 1
- atomic.Xadd(&panicking, 1)
+ gp.m.dying = 1
+ panicking.Add(1)
lock(&paniclk)
if debug.schedtrace > 0 || debug.scheddetail > 0 {
schedtrace(true)
@@ -1220,13 +1218,13 @@
case 1:
// Something failed while panicking.
// Just print a stack trace and exit.
- _g_.m.dying = 2
+ gp.m.dying = 2
print("panic during panic\n")
return false
case 2:
// This is a genuine bug in the runtime, we couldn't even
// print the stack trace successfully.
- _g_.m.dying = 3
+ gp.m.dying = 3
print("stack trace unavailable\n")
exit(4)
fallthrough
@@ -1240,6 +1238,8 @@
var didothers bool
var deadlock mutex
+// gp is the crashing g running on this M, but may be a user G, while getg() is
+// always g0.
func dopanic_m(gp *g, pc, sp uintptr) bool {
if gp.sig != 0 {
signame := signame(gp.sig)
@@ -1252,7 +1252,6 @@
}
level, all, docrash := gotraceback()
- _g_ := getg()
if level > 0 {
if gp != gp.m.curg {
all = true
@@ -1261,7 +1260,7 @@
print("\n")
goroutineheader(gp)
traceback(pc, sp, 0, gp)
- } else if level >= 2 || _g_.m.throwing >= throwTypeRuntime {
+ } else if level >= 2 || gp.m.throwing >= throwTypeRuntime {
print("\nruntime stack:\n")
traceback(pc, sp, 0, gp)
}
@@ -1272,7 +1271,7 @@
}
unlock(&paniclk)
- if atomic.Xadd(&panicking, -1) != 0 {
+ if panicking.Add(-1) != 0 {
// Some other m is panicking too.
// Let it print what it needs to print.
// Wait forever without chewing up cpu.
@@ -1290,29 +1289,32 @@
// panicking.
//
//go:nosplit
-func canpanic(gp *g) bool {
- // Note that g is m->gsignal, different from gp.
- // Note also that g->m can change at preemption, so m can go stale
- // if this function ever makes a function call.
- _g_ := getg()
- mp := _g_.m
+func canpanic() bool {
+ gp := getg()
+ mp := acquirem()
// Is it okay for gp to panic instead of crashing the program?
// Yes, as long as it is running Go code, not runtime code,
// and not stuck in a system call.
- if gp == nil || gp != mp.curg {
+ if gp != mp.curg {
+ releasem(mp)
return false
}
- if mp.locks != 0 || mp.mallocing != 0 || mp.throwing != throwTypeNone || mp.preemptoff != "" || mp.dying != 0 {
+ // N.B. mp.locks != 1 instead of 0 to account for acquirem.
+ if mp.locks != 1 || mp.mallocing != 0 || mp.throwing != throwTypeNone || mp.preemptoff != "" || mp.dying != 0 {
+ releasem(mp)
return false
}
status := readgstatus(gp)
if status&^_Gscan != _Grunning || gp.syscallsp != 0 {
+ releasem(mp)
return false
}
if GOOS == "windows" && mp.libcallsp != 0 {
+ releasem(mp)
return false
}
+ releasem(mp)
return true
}
diff --git a/src/runtime/pprof/label.go b/src/runtime/pprof/label.go
index 0c58a7a..d39e0ad 100644
--- a/src/runtime/pprof/label.go
+++ b/src/runtime/pprof/label.go
@@ -57,8 +57,8 @@
// WithLabels returns a new context.Context with the given labels added.
// A label overwrites a prior label with the same key.
func WithLabels(ctx context.Context, labels LabelSet) context.Context {
- childLabels := make(labelMap)
parentLabels := labelValue(ctx)
+ childLabels := make(labelMap, len(parentLabels))
// TODO(matloob): replace the map implementation with something
// more efficient so creating a child context WithLabels doesn't need
// to clone the map.
diff --git a/src/runtime/pprof/pe.go b/src/runtime/pprof/pe.go
new file mode 100644
index 0000000..4105458
--- /dev/null
+++ b/src/runtime/pprof/pe.go
@@ -0,0 +1,19 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pprof
+
+import "os"
+
+// peBuildID returns a best effort unique ID for the named executable.
+//
+// It would be wasteful to calculate the hash of the whole file,
+// instead use the binary name and the last modified time for the buildid.
+func peBuildID(file string) string {
+ s, err := os.Stat(file)
+ if err != nil {
+ return file
+ }
+ return file + s.ModTime().String()
+}
diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go
index f0b25c1..17a490e 100644
--- a/src/runtime/pprof/pprof.go
+++ b/src/runtime/pprof/pprof.go
@@ -74,7 +74,6 @@
import (
"bufio"
- "bytes"
"fmt"
"internal/abi"
"io"
@@ -372,8 +371,7 @@
// as the pprof-proto format output. Translations from cycle count to time duration
// are done because The proto expects count and time (nanoseconds) instead of count
// and the number of cycles for block, contention profiles.
-// Possible 'scaler' functions are scaleBlockProfile and scaleMutexProfile.
-func printCountCycleProfile(w io.Writer, countName, cycleName string, scaler func(int64, float64) (int64, float64), records []runtime.BlockProfileRecord) error {
+func printCountCycleProfile(w io.Writer, countName, cycleName string, records []runtime.BlockProfileRecord) error {
// Output profile in protobuf form.
b := newProfileBuilder(w)
b.pbValueType(tagProfile_PeriodType, countName, "count")
@@ -386,9 +384,8 @@
values := []int64{0, 0}
var locs []uint64
for _, r := range records {
- count, nanosec := scaler(r.Count, float64(r.Cycles)/cpuGHz)
- values[0] = count
- values[1] = int64(nanosec)
+ values[0] = r.Count
+ values[1] = int64(float64(r.Cycles) / cpuGHz)
// For count profiles, all stack addresses are
// return PCs, which is what appendLocsForStack expects.
locs = b.appendLocsForStack(locs[:0], r.Stack())
@@ -402,7 +399,7 @@
// The profile will be in compressed proto format unless debug is nonzero.
func printCountProfile(w io.Writer, debug int, name string, p countProfile) error {
// Build count of each stack.
- var buf bytes.Buffer
+ var buf strings.Builder
key := func(stk []uintptr, lbls *labelMap) string {
buf.Reset()
fmt.Fprintf(&buf, "@")
@@ -593,10 +590,24 @@
// Technically the rate is MemProfileRate not 2*MemProfileRate,
// but early versions of the C++ heap profiler reported 2*MemProfileRate,
// so that's what pprof has come to expect.
+ rate := 2 * runtime.MemProfileRate
+
+ // pprof reads a profile with alloc == inuse as being a "2-column" profile
+ // (objects and bytes, not distinguishing alloc from inuse),
+ // but then such a profile can't be merged using pprof *.prof with
+ // other 4-column profiles where alloc != inuse.
+ // The easiest way to avoid this bug is to adjust allocBytes so it's never == inuseBytes.
+ // pprof doesn't use these header values anymore except for checking equality.
+ inUseBytes := total.InUseBytes()
+ allocBytes := total.AllocBytes
+ if inUseBytes == allocBytes {
+ allocBytes++
+ }
+
fmt.Fprintf(w, "heap profile: %d: %d [%d: %d] @ heap/%d\n",
- total.InUseObjects(), total.InUseBytes(),
- total.AllocObjects, total.AllocBytes,
- 2*runtime.MemProfileRate)
+ total.InUseObjects(), inUseBytes,
+ total.AllocObjects, allocBytes,
+ rate)
for i := range p {
r := &p[i]
@@ -842,24 +853,16 @@
// writeBlock writes the current blocking profile to w.
func writeBlock(w io.Writer, debug int) error {
- return writeProfileInternal(w, debug, "contention", runtime.BlockProfile, scaleBlockProfile)
-}
-
-func scaleBlockProfile(cnt int64, ns float64) (int64, float64) {
- // Do nothing.
- // The current way of block profile sampling makes it
- // hard to compute the unsampled number. The legacy block
- // profile parse doesn't attempt to scale or unsample.
- return cnt, ns
+ return writeProfileInternal(w, debug, "contention", runtime.BlockProfile)
}
// writeMutex writes the current mutex profile to w.
func writeMutex(w io.Writer, debug int) error {
- return writeProfileInternal(w, debug, "mutex", runtime.MutexProfile, scaleMutexProfile)
+ return writeProfileInternal(w, debug, "mutex", runtime.MutexProfile)
}
-// writeProfileInternal writes the current blocking or mutex profile depending on the passed parameters
-func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile func([]runtime.BlockProfileRecord) (int, bool), scaleProfile func(int64, float64) (int64, float64)) error {
+// writeProfileInternal writes the current blocking or mutex profile depending on the passed parameters.
+func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile func([]runtime.BlockProfileRecord) (int, bool)) error {
var p []runtime.BlockProfileRecord
n, ok := runtimeProfile(nil)
for {
@@ -874,7 +877,7 @@
sort.Slice(p, func(i, j int) bool { return p[i].Cycles > p[j].Cycles })
if debug <= 0 {
- return printCountCycleProfile(w, "contentions", "delay", scaleProfile, p)
+ return printCountCycleProfile(w, "contentions", "delay", p)
}
b := bufio.NewWriter(w)
@@ -904,9 +907,4 @@
return b.Flush()
}
-func scaleMutexProfile(cnt int64, ns float64) (int64, float64) {
- period := runtime.SetMutexProfileFraction(-1)
- return cnt * int64(period), ns * float64(period)
-}
-
func runtime_cyclesPerSecond() int64
diff --git a/src/runtime/pprof/pprof_norusage.go b/src/runtime/pprof/pprof_norusage.go
index 3d60525..8de3808 100644
--- a/src/runtime/pprof/pprof_norusage.go
+++ b/src/runtime/pprof/pprof_norusage.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build !aix && !darwin && !dragonfly && !freebsd && !linux && !netbsd && !openbsd && !solaris
+//go:build !aix && !darwin && !dragonfly && !freebsd && !linux && !netbsd && !openbsd && !solaris && !windows
package pprof
diff --git a/src/runtime/pprof/pprof_rusage.go b/src/runtime/pprof/pprof_rusage.go
index 984a32e..aa429fb 100644
--- a/src/runtime/pprof/pprof_rusage.go
+++ b/src/runtime/pprof/pprof_rusage.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris
+//go:build unix
package pprof
@@ -28,6 +28,8 @@
}
var rusage syscall.Rusage
- syscall.Getrusage(syscall.RUSAGE_SELF, &rusage)
- fmt.Fprintf(w, "# MaxRSS = %d\n", uintptr(rusage.Maxrss)*rssToBytes)
+ err := syscall.Getrusage(syscall.RUSAGE_SELF, &rusage)
+ if err == nil {
+ fmt.Fprintf(w, "# MaxRSS = %d\n", uintptr(rusage.Maxrss)*rssToBytes)
+ }
}
diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go
index aabc180..53688ad 100644
--- a/src/runtime/pprof/pprof_test.go
+++ b/src/runtime/pprof/pprof_test.go
@@ -12,6 +12,7 @@
"fmt"
"internal/abi"
"internal/profile"
+ "internal/syscall/unix"
"internal/testenv"
"io"
"math"
@@ -116,11 +117,8 @@
// Linux [5.9,5.16) has a kernel bug that can break CPU timers on newly
// created threads, breaking our CPU accounting.
- major, minor, patch, err := linuxKernelVersion()
- if err != nil {
- t.Errorf("Error determining kernel version: %v", err)
- }
- t.Logf("Running on Linux %d.%d.%d", major, minor, patch)
+ major, minor := unix.KernelVersion()
+ t.Logf("Running on Linux %d.%d", major, minor)
defer func() {
if t.Failed() {
t.Logf("Failure of this test may indicate that your system suffers from a known Linux kernel bug fixed on newer kernels. See https://golang.org/issue/49065.")
@@ -530,7 +528,7 @@
ok = true
var samples uintptr
- var buf bytes.Buffer
+ var buf strings.Builder
p := parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, labels map[string][]string) {
fmt.Fprintf(&buf, "%d:", count)
fprintStack(&buf, stk)
@@ -609,7 +607,7 @@
var total uintptr
for i, name := range need {
total += have[i]
- t.Logf("%s: %d\n", name, have[i])
+ t.Logf("found %d samples in expected function %s\n", have[i], name)
}
if total == 0 {
t.Logf("no samples in expected functions")
@@ -720,7 +718,7 @@
// The place we'd see it would be the inner most frame.
name := stk[0].Line[0].Function.Name
if name == "gogo" {
- var buf bytes.Buffer
+ var buf strings.Builder
fprintStack(&buf, stk)
t.Fatalf("found profile entry for gogo:\n%s", buf.String())
}
@@ -729,6 +727,9 @@
}
func fprintStack(w io.Writer, stk []*profile.Location) {
+ if len(stk) == 0 {
+ fmt.Fprintf(w, " (stack empty)")
+ }
for _, loc := range stk {
fmt.Fprintf(w, " %#x", loc.Address)
fmt.Fprintf(w, " (")
@@ -924,7 +925,7 @@
}
t.Run("debug=1", func(t *testing.T) {
- var w bytes.Buffer
+ var w strings.Builder
Lookup("block").WriteTo(&w, 1)
prof := w.String()
@@ -1091,7 +1092,7 @@
var mu sync.Mutex
mu.Lock()
go func() {
- awaitBlockedGoroutine(t, "semacquire", "blockMutex")
+ awaitBlockedGoroutine(t, "sync.Mutex.Lock", "blockMutex")
mu.Unlock()
}()
// Note: Unlock releases mu before recording the mutex event,
@@ -1196,7 +1197,7 @@
blockMutex(t)
t.Run("debug=1", func(t *testing.T) {
- var w bytes.Buffer
+ var w strings.Builder
Lookup("mutex").WriteTo(&w, 1)
prof := w.String()
t.Logf("received profile: %v", prof)
@@ -1248,6 +1249,50 @@
})
}
+func TestMutexProfileRateAdjust(t *testing.T) {
+ old := runtime.SetMutexProfileFraction(1)
+ defer runtime.SetMutexProfileFraction(old)
+ if old != 0 {
+ t.Fatalf("need MutexProfileRate 0, got %d", old)
+ }
+
+ readProfile := func() (contentions int64, delay int64) {
+ var w bytes.Buffer
+ Lookup("mutex").WriteTo(&w, 0)
+ p, err := profile.Parse(&w)
+ if err != nil {
+ t.Fatalf("failed to parse profile: %v", err)
+ }
+ t.Logf("parsed proto: %s", p)
+ if err := p.CheckValid(); err != nil {
+ t.Fatalf("invalid profile: %v", err)
+ }
+
+ for _, s := range p.Sample {
+ for _, l := range s.Location {
+ for _, line := range l.Line {
+ if line.Function.Name == "runtime/pprof.blockMutex.func1" {
+ contentions += s.Value[0]
+ delay += s.Value[1]
+ }
+ }
+ }
+ }
+ return
+ }
+
+ blockMutex(t)
+ contentions, delay := readProfile()
+ if contentions == 0 || delay == 0 {
+ t.Fatal("did not see expected function in profile")
+ }
+ runtime.SetMutexProfileFraction(0)
+ newContentions, newDelay := readProfile()
+ if newContentions != contentions || newDelay != delay {
+ t.Fatalf("sample value changed: got [%d, %d], want [%d, %d]", newContentions, newDelay, contentions, delay)
+ }
+}
+
func func1(c chan int) { <-c }
func func2(c chan int) { <-c }
func func3(c chan int) { <-c }
@@ -1319,13 +1364,13 @@
t.Errorf("protobuf profile is invalid: %v", err)
}
expectedLabels := map[int64]map[string]string{
- 50: map[string]string{},
- 44: map[string]string{"label": "value"},
- 40: map[string]string{},
- 36: map[string]string{"label": "value"},
- 10: map[string]string{},
- 9: map[string]string{"label": "value"},
- 1: map[string]string{},
+ 50: {},
+ 44: {"label": "value"},
+ 40: {},
+ 36: {"label": "value"},
+ 10: {},
+ 9: {"label": "value"},
+ 1: {},
}
if !containsCountsLabels(p, expectedLabels) {
t.Errorf("expected count profile to contain goroutines with counts and labels %v, got %v",
@@ -1419,7 +1464,7 @@
go func() {
defer wg.Done()
for ctx.Err() == nil {
- var w bytes.Buffer
+ var w strings.Builder
goroutineProf.WriteTo(&w, 1)
prof := w.String()
count := profilerCalls(prof)
@@ -1437,7 +1482,7 @@
// The finalizer goroutine should not show up in most profiles, since it's
// marked as a system goroutine when idle.
t.Run("finalizer not present", func(t *testing.T) {
- var w bytes.Buffer
+ var w strings.Builder
goroutineProf.WriteTo(&w, 1)
prof := w.String()
if includesFinalizer(prof) {
@@ -1465,7 +1510,7 @@
runtime.GC()
}
}
- var w bytes.Buffer
+ var w strings.Builder
goroutineProf.WriteTo(&w, 1)
prof := w.String()
if !includesFinalizer(prof) {
@@ -1679,7 +1724,7 @@
emptyCallStackTestRun++
t.Parallel()
- var buf bytes.Buffer
+ var buf strings.Builder
p := NewProfile(name)
p.Add("foo", 47674)
@@ -1759,7 +1804,7 @@
go func() {
goroutineProf := Lookup("goroutine")
for ctx.Err() == nil {
- var w bytes.Buffer
+ var w strings.Builder
goroutineProf.WriteTo(&w, 1)
prof := w.String()
if strings.Contains(prof, "loop-i") {
@@ -1825,14 +1870,14 @@
isLabeled := s.Label != nil && contains(s.Label["key"], "value")
var (
mayBeLabeled bool
- mustBeLabeled bool
- mustNotBeLabeled bool
+ mustBeLabeled string
+ mustNotBeLabeled string
)
for _, loc := range s.Location {
for _, l := range loc.Line {
switch l.Function.Name {
case "runtime/pprof.labelHog", "runtime/pprof.parallelLabelHog", "runtime/pprof.parallelLabelHog.func1":
- mustBeLabeled = true
+ mustBeLabeled = l.Function.Name
case "runtime/pprof.Do":
// Do sets the labels, so samples may
// or may not be labeled depending on
@@ -1844,7 +1889,7 @@
// (such as those identified by
// runtime.isSystemGoroutine). These
// should never be labeled.
- mustNotBeLabeled = true
+ mustNotBeLabeled = l.Function.Name
case "gogo", "gosave_systemstack_switch", "racecall":
// These are context switch/race
// critical that we can't do a full
@@ -1866,25 +1911,28 @@
}
}
}
- if mustNotBeLabeled {
- // If this must not be labeled, then mayBeLabeled hints
- // are not relevant.
+ errorStack := func(f string, args ...any) {
+ var buf strings.Builder
+ fprintStack(&buf, s.Location)
+ t.Errorf("%s: %s", fmt.Sprintf(f, args...), buf.String())
+ }
+ if mustBeLabeled != "" && mustNotBeLabeled != "" {
+ errorStack("sample contains both %s, which must be labeled, and %s, which must not be labeled", mustBeLabeled, mustNotBeLabeled)
+ continue
+ }
+ if mustBeLabeled != "" || mustNotBeLabeled != "" {
+ // We found a definitive frame, so mayBeLabeled hints are not relevant.
mayBeLabeled = false
}
- if mustBeLabeled && !isLabeled {
- var buf bytes.Buffer
- fprintStack(&buf, s.Location)
- t.Errorf("Sample labeled got false want true: %s", buf.String())
+ if mayBeLabeled {
+ // This sample may or may not be labeled, so there's nothing we can check.
+ continue
}
- if mustNotBeLabeled && isLabeled {
- var buf bytes.Buffer
- fprintStack(&buf, s.Location)
- t.Errorf("Sample labeled got true want false: %s", buf.String())
+ if mustBeLabeled != "" && !isLabeled {
+ errorStack("sample must be labeled because of %s, but is not", mustBeLabeled)
}
- if isLabeled && !(mayBeLabeled || mustBeLabeled) {
- var buf bytes.Buffer
- fprintStack(&buf, s.Location)
- t.Errorf("Sample labeled got true want false: %s", buf.String())
+ if mustNotBeLabeled != "" && isLabeled {
+ errorStack("sample must not be labeled because of %s, but is", mustNotBeLabeled)
}
}
}
diff --git a/src/runtime/pprof/pprof_windows.go b/src/runtime/pprof/pprof_windows.go
new file mode 100644
index 0000000..23ef2f8
--- /dev/null
+++ b/src/runtime/pprof/pprof_windows.go
@@ -0,0 +1,22 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pprof
+
+import (
+ "fmt"
+ "internal/syscall/windows"
+ "io"
+ "syscall"
+ "unsafe"
+)
+
+func addMaxRSS(w io.Writer) {
+ var m windows.PROCESS_MEMORY_COUNTERS
+ p, _ := syscall.GetCurrentProcess()
+ err := windows.GetProcessMemoryInfo(p, &m, uint32(unsafe.Sizeof(m)))
+ if err == nil {
+ fmt.Fprintf(w, "# MaxRSS = %d\n", m.PeakWorkingSetSize)
+ }
+}
diff --git a/src/runtime/pprof/proto.go b/src/runtime/pprof/proto.go
index 085027c..b68f30d 100644
--- a/src/runtime/pprof/proto.go
+++ b/src/runtime/pprof/proto.go
@@ -10,7 +10,6 @@
"fmt"
"internal/abi"
"io"
- "os"
"runtime"
"strconv"
"strings"
@@ -46,10 +45,11 @@
type memMap struct {
// initialized as reading mapping
- start uintptr
- end uintptr
- offset uint64
- file, buildID string
+ start uintptr // Address at which the binary (or DLL) is loaded into memory.
+ end uintptr // The limit of the address range occupied by this mapping.
+ offset uint64 // Offset in the binary that corresponds to the first mapped address.
+ file string // The object this entry is loaded from.
+ buildID string // A string that uniquely identifies a particular program version with high probability.
funcs symbolizeFlag
fake bool // map entry was faked; /proc/self/maps wasn't available
@@ -230,7 +230,7 @@
frame.PC = addr - 1
}
ret := []runtime.Frame{frame}
- for frame.Function != "runtime.goexit" && more == true {
+ for frame.Function != "runtime.goexit" && more {
frame, more = frames.Next()
ret = append(ret, frame)
}
@@ -395,6 +395,10 @@
// location ID slice, locs. The addresses in the stack are return PCs or 1 + the PC of
// an inline marker as the runtime traceback function returns.
//
+// It may return an empty slice even if locs is non-empty, for example if locs consists
+// solely of runtime.goexit. We still count these empty stacks in profiles in order to
+// get the right cumulative sample count.
+//
// It may emit to b.pb, so there must be no message encoding in progress.
func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLocs []uint64) {
b.deck.reset()
@@ -590,6 +594,7 @@
type newFunc struct {
id uint64
name, file string
+ startLine int64
}
newFuncs := make([]newFunc, 0, 8)
@@ -610,7 +615,12 @@
if funcID == 0 {
funcID = uint64(len(b.funcs)) + 1
b.funcs[frame.Function] = int(funcID)
- newFuncs = append(newFuncs, newFunc{funcID, frame.Function, frame.File})
+ newFuncs = append(newFuncs, newFunc{
+ id: funcID,
+ name: frame.Function,
+ file: frame.File,
+ startLine: int64(runtime_FrameStartLine(&frame)),
+ })
}
b.pbLine(tagLocation_Line, funcID, int64(frame.Line))
}
@@ -633,6 +643,7 @@
b.pb.int64Opt(tagFunction_Name, b.stringIndex(fn.name))
b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(fn.name))
b.pb.int64Opt(tagFunction_Filename, b.stringIndex(fn.file))
+ b.pb.int64Opt(tagFunction_StartLine, fn.startLine)
b.pb.endMessage(tagProfile_Function, start)
}
@@ -640,20 +651,6 @@
return id
}
-// readMapping reads /proc/self/maps and writes mappings to b.pb.
-// It saves the address ranges of the mappings in b.mem for use
-// when emitting locations.
-func (b *profileBuilder) readMapping() {
- data, _ := os.ReadFile("/proc/self/maps")
- parseProcSelfMaps(data, b.addMapping)
- if len(b.mem) == 0 { // pprof expects a map entry, so fake one.
- b.addMappingEntry(0, 0, 0, "", "", true)
- // TODO(hyangah): make addMapping return *memMap or
- // take a memMap struct, and get rid of addMappingEntry
- // that takes a bunch of positional arguments.
- }
-}
-
var space = []byte(" ")
var newline = []byte("\n")
@@ -735,13 +732,12 @@
continue
}
- // TODO: pprof's remapMappingIDs makes two adjustments:
+ // TODO: pprof's remapMappingIDs makes one adjustment:
// 1. If there is an /anon_hugepage mapping first and it is
// consecutive to a next mapping, drop the /anon_hugepage.
- // 2. If start-offset = 0x400000, change start to 0x400000 and offset to 0.
- // There's no indication why either of these is needed.
- // Let's try not doing these and see what breaks.
- // If we do need them, they would go here, before we
+ // There's no indication why this is needed.
+ // Let's try not doing this and see what breaks.
+ // If we do need it, it would go here, before we
// enter the mappings into b.mem in the first place.
buildID, _ := elfBuildID(file)
diff --git a/src/runtime/pprof/proto_other.go b/src/runtime/pprof/proto_other.go
new file mode 100644
index 0000000..4a7fe79
--- /dev/null
+++ b/src/runtime/pprof/proto_other.go
@@ -0,0 +1,30 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !windows
+
+package pprof
+
+import (
+ "errors"
+ "os"
+)
+
+// readMapping reads /proc/self/maps and writes mappings to b.pb.
+// It saves the address ranges of the mappings in b.mem for use
+// when emitting locations.
+func (b *profileBuilder) readMapping() {
+ data, _ := os.ReadFile("/proc/self/maps")
+ parseProcSelfMaps(data, b.addMapping)
+ if len(b.mem) == 0 { // pprof expects a map entry, so fake one.
+ b.addMappingEntry(0, 0, 0, "", "", true)
+ // TODO(hyangah): make addMapping return *memMap or
+ // take a memMap struct, and get rid of addMappingEntry
+ // that takes a bunch of positional arguments.
+ }
+}
+
+func readMainModuleMapping() (start, end uint64, err error) {
+ return 0, 0, errors.New("not implemented")
+}
diff --git a/src/runtime/pprof/proto_test.go b/src/runtime/pprof/proto_test.go
index 84a051a..780b481 100644
--- a/src/runtime/pprof/proto_test.go
+++ b/src/runtime/pprof/proto_test.go
@@ -101,6 +101,36 @@
addr2 = mprof.Mapping[1].Start
map2 = mprof.Mapping[1]
map2.BuildID, _ = elfBuildID(map2.File)
+ case "windows":
+ addr1 = uint64(abi.FuncPCABIInternal(f1))
+ addr2 = uint64(abi.FuncPCABIInternal(f2))
+
+ exe, err := os.Executable()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ start, end, err := readMainModuleMapping()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ map1 = &profile.Mapping{
+ ID: 1,
+ Start: start,
+ Limit: end,
+ File: exe,
+ BuildID: peBuildID(exe),
+ HasFunctions: true,
+ }
+ map2 = &profile.Mapping{
+ ID: 1,
+ Start: start,
+ Limit: end,
+ File: exe,
+ BuildID: peBuildID(exe),
+ HasFunctions: true,
+ }
case "js":
addr1 = uint64(abi.FuncPCABIInternal(f1))
addr2 = uint64(abi.FuncPCABIInternal(f2))
@@ -285,7 +315,7 @@
if len(out) > 0 && out[len(out)-1] != '\n' {
out += "\n"
}
- var buf bytes.Buffer
+ var buf strings.Builder
parseProcSelfMaps([]byte(in), func(lo, hi, offset uint64, file, buildID string) {
fmt.Fprintf(&buf, "%08x %08x %08x %s\n", lo, hi, offset, file)
})
diff --git a/src/runtime/pprof/proto_windows.go b/src/runtime/pprof/proto_windows.go
new file mode 100644
index 0000000..d5ae4a5
--- /dev/null
+++ b/src/runtime/pprof/proto_windows.go
@@ -0,0 +1,73 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pprof
+
+import (
+ "errors"
+ "internal/syscall/windows"
+ "syscall"
+)
+
+// readMapping adds memory mapping information to the profile.
+func (b *profileBuilder) readMapping() {
+ snap, err := createModuleSnapshot()
+ if err != nil {
+ // pprof expects a map entry, so fake one, when we haven't added anything yet.
+ b.addMappingEntry(0, 0, 0, "", "", true)
+ return
+ }
+ defer func() { _ = syscall.CloseHandle(snap) }()
+
+ var module windows.ModuleEntry32
+ module.Size = uint32(windows.SizeofModuleEntry32)
+ err = windows.Module32First(snap, &module)
+ if err != nil {
+ // pprof expects a map entry, so fake one, when we haven't added anything yet.
+ b.addMappingEntry(0, 0, 0, "", "", true)
+ return
+ }
+ for err == nil {
+ exe := syscall.UTF16ToString(module.ExePath[:])
+ b.addMappingEntry(
+ uint64(module.ModBaseAddr),
+ uint64(module.ModBaseAddr)+uint64(module.ModBaseSize),
+ 0,
+ exe,
+ peBuildID(exe),
+ false,
+ )
+ err = windows.Module32Next(snap, &module)
+ }
+}
+
+func readMainModuleMapping() (start, end uint64, err error) {
+ snap, err := createModuleSnapshot()
+ if err != nil {
+ return 0, 0, err
+ }
+ defer func() { _ = syscall.CloseHandle(snap) }()
+
+ var module windows.ModuleEntry32
+ module.Size = uint32(windows.SizeofModuleEntry32)
+ err = windows.Module32First(snap, &module)
+ if err != nil {
+ return 0, 0, err
+ }
+
+ return uint64(module.ModBaseAddr), uint64(module.ModBaseAddr) + uint64(module.ModBaseSize), nil
+}
+
+func createModuleSnapshot() (syscall.Handle, error) {
+ for {
+ snap, err := syscall.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE|windows.TH32CS_SNAPMODULE32, uint32(syscall.Getpid()))
+ var errno syscall.Errno
+ if err != nil && errors.As(err, &errno) && errno == windows.ERROR_BAD_LENGTH {
+ // When CreateToolhelp32Snapshot(SNAPMODULE|SNAPMODULE32, ...) fails
+ // with ERROR_BAD_LENGTH then it should be retried until it succeeds.
+ continue
+ }
+ return snap, err
+ }
+}
diff --git a/src/runtime/pprof/protobuf.go b/src/runtime/pprof/protobuf.go
index 7b99095..f7ec1ac 100644
--- a/src/runtime/pprof/protobuf.go
+++ b/src/runtime/pprof/protobuf.go
@@ -116,7 +116,7 @@
}
func (b *protobuf) boolOpt(tag int, x bool) {
- if x == false {
+ if !x {
return
}
b.bool(tag, x)
diff --git a/src/runtime/pprof/runtime.go b/src/runtime/pprof/runtime.go
index dd2545b..57e9ca4 100644
--- a/src/runtime/pprof/runtime.go
+++ b/src/runtime/pprof/runtime.go
@@ -6,9 +6,13 @@
import (
"context"
+ "runtime"
"unsafe"
)
+// runtime_FrameStartLine is defined in runtime/symtab.go.
+func runtime_FrameStartLine(f *runtime.Frame) int
+
// runtime_expandFinalInlineFrame is defined in runtime/symtab.go.
func runtime_expandFinalInlineFrame(stk []uintptr) []uintptr
diff --git a/src/runtime/pprof/rusage_test.go b/src/runtime/pprof/rusage_test.go
index b82b1af..8039510 100644
--- a/src/runtime/pprof/rusage_test.go
+++ b/src/runtime/pprof/rusage_test.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris
+//go:build unix
package pprof
diff --git a/src/runtime/pprof/uname_linux_test.go b/src/runtime/pprof/uname_linux_test.go
deleted file mode 100644
index 8374c83..0000000
--- a/src/runtime/pprof/uname_linux_test.go
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright 2021 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build linux
-
-package pprof
-
-import (
- "fmt"
- "regexp"
- "strconv"
- "syscall"
-)
-
-var versionRe = regexp.MustCompile(`^(\d+)(?:\.(\d+)(?:\.(\d+))).*$`)
-
-func linuxKernelVersion() (major, minor, patch int, err error) {
- var uname syscall.Utsname
- if err := syscall.Uname(&uname); err != nil {
- return 0, 0, 0, err
- }
-
- buf := make([]byte, 0, len(uname.Release))
- for _, b := range uname.Release {
- if b == 0 {
- break
- }
- buf = append(buf, byte(b))
- }
- rl := string(buf)
-
- m := versionRe.FindStringSubmatch(rl)
- if m == nil {
- return 0, 0, 0, fmt.Errorf("error matching version number in %q", rl)
- }
-
- v, err := strconv.ParseInt(m[1], 10, 64)
- if err != nil {
- return 0, 0, 0, fmt.Errorf("error parsing major version %q in %s: %w", m[1], rl, err)
- }
- major = int(v)
-
- if len(m) >= 3 {
- v, err := strconv.ParseInt(m[2], 10, 64)
- if err != nil {
- return 0, 0, 0, fmt.Errorf("error parsing minor version %q in %s: %w", m[2], rl, err)
- }
- minor = int(v)
- }
-
- if len(m) >= 4 {
- v, err := strconv.ParseInt(m[3], 10, 64)
- if err != nil {
- return 0, 0, 0, fmt.Errorf("error parsing patch version %q in %s: %w", m[3], rl, err)
- }
- patch = int(v)
- }
-
- return
-}
diff --git a/src/runtime/pprof/uname_other_test.go b/src/runtime/pprof/uname_other_test.go
deleted file mode 100644
index 3276407..0000000
--- a/src/runtime/pprof/uname_other_test.go
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright 2021 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !linux
-
-package pprof
-
-import (
- "errors"
-)
-
-func linuxKernelVersion() (major, minor, patch int, err error) {
- return 0, 0, 0, errors.New("not running on linux")
-}
diff --git a/src/runtime/preempt.go b/src/runtime/preempt.go
index da24f50..4f62fc6 100644
--- a/src/runtime/preempt.go
+++ b/src/runtime/preempt.go
@@ -55,7 +55,6 @@
import (
"internal/abi"
"internal/goarch"
- "runtime/internal/atomic"
)
type suspendGState struct {
@@ -192,7 +191,7 @@
case _Grunning:
// Optimization: if there is already a pending preemption request
// (from the previous loop iteration), don't bother with the atomics.
- if gp.preemptStop && gp.preempt && gp.stackguard0 == stackPreempt && asyncM == gp.m && atomic.Load(&asyncM.preemptGen) == asyncGen {
+ if gp.preemptStop && gp.preempt && gp.stackguard0 == stackPreempt && asyncM == gp.m && asyncM.preemptGen.Load() == asyncGen {
break
}
@@ -208,7 +207,7 @@
// Prepare for asynchronous preemption.
asyncM2 := gp.m
- asyncGen2 := atomic.Load(&asyncM2.preemptGen)
+ asyncGen2 := asyncM2.preemptGen.Load()
needAsync := asyncM != asyncM2 || asyncGen != asyncGen2
asyncM = asyncM2
asyncGen = asyncGen2
@@ -419,7 +418,7 @@
inltree := (*[1 << 20]inlinedCall)(inldata)
ix := pcdatavalue(f, _PCDATA_InlTreeIndex, pc, nil)
if ix >= 0 {
- name = funcnameFromNameoff(f, inltree[ix].func_)
+ name = funcnameFromNameOff(f, inltree[ix].nameOff)
}
}
if hasPrefix(name, "runtime.") ||
diff --git a/src/runtime/preempt_amd64.s b/src/runtime/preempt_amd64.s
index 31f7c8b..94a84fb 100644
--- a/src/runtime/preempt_amd64.s
+++ b/src/runtime/preempt_amd64.s
@@ -1,6 +1,7 @@
// Code generated by mkpreempt.go; DO NOT EDIT.
#include "go_asm.h"
+#include "asm_amd64.h"
#include "textflag.h"
TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
@@ -27,8 +28,10 @@
MOVQ R14, 96(SP)
MOVQ R15, 104(SP)
#ifdef GOOS_darwin
+ #ifndef hasAVX
CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0
JE 2(PC)
+ #endif
VZEROUPPER
#endif
MOVUPS X0, 112(SP)
diff --git a/src/runtime/print.go b/src/runtime/print.go
index b2a642b..a1e0b8e 100644
--- a/src/runtime/print.go
+++ b/src/runtime/print.go
@@ -6,7 +6,6 @@
import (
"internal/goarch"
- "runtime/internal/atomic"
"unsafe"
)
@@ -40,7 +39,7 @@
func recordForPanic(b []byte) {
printlock()
- if atomic.Load(&panicking) == 0 {
+ if panicking.Load() == 0 {
// Not actively crashing: maintain circular buffer of print output.
for i := 0; i < len(b); {
n := copy(printBacklog[printBacklogIndex:], b[i:])
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 3991a48..554a60d 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -73,7 +73,7 @@
// If there is at least one spinning thread (sched.nmspinning>1), we don't
// unpark new threads when submitting work. To compensate for that, if the last
// spinning thread finds work and stops spinning, it must unpark a new spinning
-// thread. This approach smooths out unjustified spikes of thread unparking,
+// thread. This approach smooths out unjustified spikes of thread unparking,
// but at the same time guarantees eventual maximal CPU parallelism
// utilization.
//
@@ -143,11 +143,11 @@
// The main goroutine.
func main() {
- g := getg()
+ mp := getg().m
// Racectx of m0->g0 is used only as the parent of the main goroutine.
// It must not be used for anything else.
- g.m.g0.racectx = 0
+ mp.g0.racectx = 0
// Max stack size is 1 GB on 64-bit, 250 MB on 32-bit.
// Using decimal instead of binary GB and MB because
@@ -180,7 +180,7 @@
// to preserve the lock.
lockOSThread()
- if g.m != &m0 {
+ if mp != &m0 {
throw("runtime.main not on m0")
}
@@ -249,6 +249,7 @@
fn := main_main // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime
fn()
if raceenabled {
+ runExitHooks(0) // run hooks now, since racefini does not return
racefini()
}
@@ -256,18 +257,19 @@
// another goroutine at the same time as main returns,
// let the other goroutine finish printing the panic trace.
// Once it does, it will exit. See issues 3934 and 20018.
- if atomic.Load(&runningPanicDefers) != 0 {
+ if runningPanicDefers.Load() != 0 {
// Running deferred functions should not take long.
for c := 0; c < 1000; c++ {
- if atomic.Load(&runningPanicDefers) == 0 {
+ if runningPanicDefers.Load() == 0 {
break
}
Gosched()
}
}
- if atomic.Load(&panicking) != 0 {
+ if panicking.Load() != 0 {
gopark(nil, nil, waitReasonPanicWait, traceEvGoStop, 1)
}
+ runExitHooks(0)
exit(0)
for {
@@ -279,8 +281,9 @@
// os_beforeExit is called from os.Exit(0).
//
//go:linkname os_beforeExit os.runtime_beforeExit
-func os_beforeExit() {
- if raceenabled {
+func os_beforeExit(exitCode int) {
+ runExitHooks(exitCode)
+ if exitCode == 0 && raceenabled {
racefini()
}
}
@@ -295,10 +298,10 @@
lockInit(&forcegc.lock, lockRankForcegc)
for {
lock(&forcegc.lock)
- if forcegc.idle != 0 {
+ if forcegc.idle.Load() {
throw("forcegc: phase error")
}
- atomic.Store(&forcegc.idle, 1)
+ forcegc.idle.Store(true)
goparkunlock(&forcegc.lock, waitReasonForceGCIdle, traceEvGoBlock, 1)
// this goroutine is explicitly resumed by sysmon
if debug.gctrace > 0 {
@@ -326,6 +329,21 @@
mcall(goschedguarded_m)
}
+// goschedIfBusy yields the processor like gosched, but only does so if
+// there are no idle Ps or if we're on the only P and there's nothing in
+// the run queue. In both cases, there is freely available idle time.
+//
+//go:nosplit
+func goschedIfBusy() {
+ gp := getg()
+ // Call gosched if gp.preempt is set; we may be in a tight loop that
+ // doesn't otherwise yield.
+ if !gp.preempt && sched.npidle.Load() > 0 {
+ return
+ }
+ mcall(gosched_m)
+}
+
// Puts the current goroutine into a waiting state and calls unlockf on the
// system stack.
//
@@ -463,7 +481,7 @@
releasem(mp)
}
-// called from assembly
+// called from assembly.
func badmcall(fn func(*g)) {
throw("runtime: mcall called on m->g0 stack")
}
@@ -476,22 +494,16 @@
panic(plainError("arg size to reflect.call more than 1GB"))
}
-var badmorestackg0Msg = "fatal: morestack on g0\n"
-
//go:nosplit
//go:nowritebarrierrec
func badmorestackg0() {
- sp := stringStructOf(&badmorestackg0Msg)
- write(2, sp.str, int32(sp.len))
+ writeErrStr("fatal: morestack on g0\n")
}
-var badmorestackgsignalMsg = "fatal: morestack on gsignal\n"
-
//go:nosplit
//go:nowritebarrierrec
func badmorestackgsignal() {
- sp := stringStructOf(&badmorestackgsignalMsg)
- write(2, sp.str, int32(sp.len))
+ writeErrStr("fatal: morestack on gsignal\n")
}
//go:nosplit
@@ -600,35 +612,13 @@
_GoidCacheBatch = 16
)
-// cpuinit extracts the environment variable GODEBUG from the environment on
-// Unix-like operating systems and calls internal/cpu.Initialize.
-func cpuinit() {
- const prefix = "GODEBUG="
- var env string
-
+// cpuinit sets up CPU feature flags and calls internal/cpu.Initialize. env should be the complete
+// value of the GODEBUG environment variable.
+func cpuinit(env string) {
switch GOOS {
case "aix", "darwin", "ios", "dragonfly", "freebsd", "netbsd", "openbsd", "illumos", "solaris", "linux":
cpu.DebugOptions = true
-
- // Similar to goenv_unix but extracts the environment value for
- // GODEBUG directly.
- // TODO(moehrmann): remove when general goenvs() can be called before cpuinit()
- n := int32(0)
- for argv_index(argv, argc+1+n) != nil {
- n++
- }
-
- for i := int32(0); i < n; i++ {
- p := argv_index(argv, argc+1+i)
- s := *(*string)(unsafe.Pointer(&stringStruct{unsafe.Pointer(p), findnull(p)}))
-
- if hasPrefix(s, prefix) {
- env = gostring(p)[len(prefix):]
- break
- }
- }
}
-
cpu.Initialize(env)
// Support cpu feature variables are used in code generated by the compiler
@@ -647,6 +637,35 @@
}
}
+// getGodebugEarly extracts the environment variable GODEBUG from the environment on
+// Unix-like operating systems and returns it. This function exists to extract GODEBUG
+// early before much of the runtime is initialized.
+func getGodebugEarly() string {
+ const prefix = "GODEBUG="
+ var env string
+ switch GOOS {
+ case "aix", "darwin", "ios", "dragonfly", "freebsd", "netbsd", "openbsd", "illumos", "solaris", "linux":
+ // Similar to goenv_unix but extracts the environment value for
+ // GODEBUG directly.
+ // TODO(moehrmann): remove when general goenvs() can be called before cpuinit()
+ n := int32(0)
+ for argv_index(argv, argc+1+n) != nil {
+ n++
+ }
+
+ for i := int32(0); i < n; i++ {
+ p := argv_index(argv, argc+1+i)
+ s := unsafe.String(p, findnull(p))
+
+ if hasPrefix(s, prefix) {
+ env = gostring(p)[len(prefix):]
+ break
+ }
+ }
+ }
+ return env
+}
+
// The bootstrap sequence is:
//
// call osinit
@@ -678,9 +697,9 @@
// raceinit must be the first call to race detector.
// In particular, it must be done before mallocinit below calls racemapshadow.
- _g_ := getg()
+ gp := getg()
if raceenabled {
- _g_.racectx, raceprocctx0 = raceinit()
+ gp.racectx, raceprocctx0 = raceinit()
}
sched.maxmcount = 10000
@@ -691,30 +710,35 @@
moduledataverify()
stackinit()
mallocinit()
- cpuinit() // must run before alginit
- alginit() // maps, hash, fastrand must not be used before this call
- fastrandinit() // must run before mcommoninit
- mcommoninit(_g_.m, -1)
+ godebug := getGodebugEarly()
+ initPageTrace(godebug) // must run after mallocinit but before anything allocates
+ cpuinit(godebug) // must run before alginit
+ alginit() // maps, hash, fastrand must not be used before this call
+ fastrandinit() // must run before mcommoninit
+ mcommoninit(gp.m, -1)
modulesinit() // provides activeModules
typelinksinit() // uses maps, activeModules
itabsinit() // uses activeModules
stkobjinit() // must run before GC starts
- sigsave(&_g_.m.sigmask)
- initSigmask = _g_.m.sigmask
-
- if offset := unsafe.Offsetof(sched.timeToRun); offset%8 != 0 {
- println(offset)
- throw("sched.timeToRun not aligned to 8 bytes")
- }
+ sigsave(&gp.m.sigmask)
+ initSigmask = gp.m.sigmask
goargs()
goenvs()
parsedebugvars()
gcinit()
+ // if disableMemoryProfiling is set, update MemProfileRate to 0 to turn off memprofile.
+ // Note: parsedebugvars may update MemProfileRate, but when disableMemoryProfiling is
+ // set to true by the linker, it means that nothing is consuming the profile, it is
+ // safe to set MemProfileRate to 0.
+ if disableMemoryProfiling {
+ MemProfileRate = 0
+ }
+
lock(&sched.lock)
- sched.lastpoll = uint64(nanotime())
+ sched.lastpoll.Store(nanotime())
procs := ncpu
if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 {
procs = n
@@ -733,8 +757,8 @@
if debug.cgocheck > 1 {
writeBarrier.cgo = true
writeBarrier.enabled = true
- for _, p := range allp {
- p.wbBuf.reset()
+ for _, pp := range allp {
+ pp.wbBuf.reset()
}
}
@@ -751,9 +775,9 @@
}
func dumpgstatus(gp *g) {
- _g_ := getg()
- print("runtime: gp: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
- print("runtime: g: g=", _g_, ", goid=", _g_.goid, ", g->atomicstatus=", readgstatus(_g_), "\n")
+ thisg := getg()
+ print("runtime: gp: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
+ print("runtime: getg: g=", thisg, ", goid=", thisg.goid, ", g->atomicstatus=", readgstatus(thisg), "\n")
}
// sched.lock must be held.
@@ -784,10 +808,10 @@
// Pre-allocated ID may be passed as 'id', or omitted by passing -1.
func mcommoninit(mp *m, id int64) {
- _g_ := getg()
+ gp := getg()
// g0 stack won't make sense for user (and is not necessary unwindable).
- if _g_ != _g_.m.g0 {
+ if gp != gp.m.g0 {
callers(1, mp.createstack[:])
}
@@ -832,6 +856,12 @@
}
}
+func (mp *m) becomeSpinning() {
+ mp.spinning = true
+ sched.nmspinning.Add(1)
+ sched.needspinning.Store(0)
+}
+
var fastrandseed uintptr
func fastrandinit() {
@@ -848,7 +878,6 @@
status := readgstatus(gp)
// Mark runnable.
- _g_ := getg()
mp := acquirem() // disable preemption because it can be holding p in a local var
if status&^_Gscan != _Gwaiting {
dumpgstatus(gp)
@@ -857,7 +886,7 @@
// status is Gwaiting or Gscanwaiting, make Grunnable and put on runq
casgstatus(gp, _Gwaiting, _Grunnable)
- runqput(_g_.m.p.ptr(), gp, next)
+ runqput(mp.p.ptr(), gp, next)
wakep()
releasem(mp)
}
@@ -868,20 +897,20 @@
// freezing is set to non-zero if the runtime is trying to freeze the
// world.
-var freezing uint32
+var freezing atomic.Bool
// Similar to stopTheWorld but best-effort and can be called several times.
// There is no reverse operation, used during crashing.
// This function must not lock any mutexes.
func freezetheworld() {
- atomic.Store(&freezing, 1)
+ freezing.Store(true)
// stopwait and preemption requests can be lost
// due to races with concurrently executing threads,
// so try several times
for i := 0; i < 5; i++ {
// this should tell the scheduler to not start any new goroutines
sched.stopwait = freezeStopWait
- atomic.Store(&sched.gcwaiting, 1)
+ sched.gcwaiting.Store(true)
// this should stop running goroutines
if !preemptall() {
break // no running goroutines
@@ -899,7 +928,7 @@
//
//go:nosplit
func readgstatus(gp *g) uint32 {
- return atomic.Load(&gp.atomicstatus)
+ return gp.atomicstatus.Load()
}
// The Gscanstatuses are acting like locks and this releases them.
@@ -921,7 +950,7 @@
_Gscansyscall,
_Gscanpreempted:
if newval == oldval&^_Gscan {
- success = atomic.Cas(&gp.atomicstatus, oldval, newval)
+ success = gp.atomicstatus.CompareAndSwap(oldval, newval)
}
}
if !success {
@@ -941,7 +970,7 @@
_Gwaiting,
_Gsyscall:
if newval == oldval|_Gscan {
- r := atomic.Cas(&gp.atomicstatus, oldval, newval)
+ r := gp.atomicstatus.CompareAndSwap(oldval, newval)
if r {
acquireLockRank(lockRankGscan)
}
@@ -954,6 +983,10 @@
panic("not reached")
}
+// casgstatusAlwaysTrack is a debug flag that causes casgstatus to always track
+// various latencies on every transition instead of sampling them.
+var casgstatusAlwaysTrack = false
+
// If asked to move to or from a Gscanstatus this will throw. Use the castogscanstatus
// and casfrom_Gscanstatus instead.
// casgstatus will loop if the g->atomicstatus is in a Gscan status until the routine that
@@ -977,15 +1010,15 @@
// loop if gp->atomicstatus is in a scan state giving
// GC time to finish and change the state to oldval.
- for i := 0; !atomic.Cas(&gp.atomicstatus, oldval, newval); i++ {
- if oldval == _Gwaiting && gp.atomicstatus == _Grunnable {
+ for i := 0; !gp.atomicstatus.CompareAndSwap(oldval, newval); i++ {
+ if oldval == _Gwaiting && gp.atomicstatus.Load() == _Grunnable {
throw("casgstatus: waiting for Gwaiting but is Grunnable")
}
if i == 0 {
nextYield = nanotime() + yieldDelay
}
if nanotime() < nextYield {
- for x := 0; x < 10 && gp.atomicstatus != oldval; x++ {
+ for x := 0; x < 10 && gp.atomicstatus.Load() != oldval; x++ {
procyield(1)
}
} else {
@@ -994,37 +1027,75 @@
}
}
- // Handle tracking for scheduling latencies.
if oldval == _Grunning {
- // Track every 8th time a goroutine transitions out of running.
- if gp.trackingSeq%gTrackingPeriod == 0 {
+ // Track every gTrackingPeriod time a goroutine transitions out of running.
+ if casgstatusAlwaysTrack || gp.trackingSeq%gTrackingPeriod == 0 {
gp.tracking = true
}
gp.trackingSeq++
}
- if gp.tracking {
- if oldval == _Grunnable {
- // We transitioned out of runnable, so measure how much
- // time we spent in this state and add it to
- // runnableTime.
- now := nanotime()
- gp.runnableTime += now - gp.runnableStamp
- gp.runnableStamp = 0
- }
- if newval == _Grunnable {
- // We just transitioned into runnable, so record what
- // time that happened.
- now := nanotime()
- gp.runnableStamp = now
- } else if newval == _Grunning {
- // We're transitioning into running, so turn off
- // tracking and record how much time we spent in
- // runnable.
- gp.tracking = false
- sched.timeToRun.record(gp.runnableTime)
- gp.runnableTime = 0
- }
+ if !gp.tracking {
+ return
}
+
+ // Handle various kinds of tracking.
+ //
+ // Currently:
+ // - Time spent in runnable.
+ // - Time spent blocked on a sync.Mutex or sync.RWMutex.
+ switch oldval {
+ case _Grunnable:
+ // We transitioned out of runnable, so measure how much
+ // time we spent in this state and add it to
+ // runnableTime.
+ now := nanotime()
+ gp.runnableTime += now - gp.trackingStamp
+ gp.trackingStamp = 0
+ case _Gwaiting:
+ if !gp.waitreason.isMutexWait() {
+ // Not blocking on a lock.
+ break
+ }
+ // Blocking on a lock, measure it. Note that because we're
+ // sampling, we have to multiply by our sampling period to get
+ // a more representative estimate of the absolute value.
+ // gTrackingPeriod also represents an accurate sampling period
+ // because we can only enter this state from _Grunning.
+ now := nanotime()
+ sched.totalMutexWaitTime.Add((now - gp.trackingStamp) * gTrackingPeriod)
+ gp.trackingStamp = 0
+ }
+ switch newval {
+ case _Gwaiting:
+ if !gp.waitreason.isMutexWait() {
+ // Not blocking on a lock.
+ break
+ }
+ // Blocking on a lock. Write down the timestamp.
+ now := nanotime()
+ gp.trackingStamp = now
+ case _Grunnable:
+ // We just transitioned into runnable, so record what
+ // time that happened.
+ now := nanotime()
+ gp.trackingStamp = now
+ case _Grunning:
+ // We're transitioning into running, so turn off
+ // tracking and record how much time we spent in
+ // runnable.
+ gp.tracking = false
+ sched.timeToRun.record(gp.runnableTime)
+ gp.runnableTime = 0
+ }
+}
+
+// casGToWaiting transitions gp from old to _Gwaiting, and sets the wait reason.
+//
+// Use this over casgstatus when possible to ensure that a waitreason is set.
+func casGToWaiting(gp *g, old uint32, reason waitReason) {
+ // Set the wait reason before calling casgstatus, because casgstatus will use it.
+ gp.waitreason = reason
+ casgstatus(gp, old, _Gwaiting)
}
// casgstatus(gp, oldstatus, Gcopystack), assuming oldstatus is Gwaiting or Grunnable.
@@ -1040,7 +1111,7 @@
if oldstatus != _Gwaiting && oldstatus != _Grunnable {
throw("copystack: bad status, not Gwaiting or Grunnable")
}
- if atomic.Cas(&gp.atomicstatus, oldstatus, _Gcopystack) {
+ if gp.atomicstatus.CompareAndSwap(oldstatus, _Gcopystack) {
return oldstatus
}
}
@@ -1055,7 +1126,7 @@
throw("bad g transition")
}
acquireLockRank(lockRankGscan)
- for !atomic.Cas(&gp.atomicstatus, _Grunning, _Gscan|_Gpreempted) {
+ for !gp.atomicstatus.CompareAndSwap(_Grunning, _Gscan|_Gpreempted) {
}
}
@@ -1066,7 +1137,8 @@
if old != _Gpreempted || new != _Gwaiting {
throw("bad g transition")
}
- return atomic.Cas(&gp.atomicstatus, _Gpreempted, _Gwaiting)
+ gp.waitreason = waitReasonPreempted
+ return gp.atomicstatus.CompareAndSwap(_Gpreempted, _Gwaiting)
}
// stopTheWorld stops all P's from executing goroutines, interrupting
@@ -1098,7 +1170,8 @@
// must have preempted all goroutines, including any attempting
// to scan our stack, in which case, any stack shrinking will
// have already completed by the time we exit.
- casgstatus(gp, _Grunning, _Gwaiting)
+ // Don't provide a wait reason because we're still executing.
+ casGToWaiting(gp, _Grunning, waitReasonStoppingTheWorld)
stopTheWorldWithSema()
casgstatus(gp, _Gwaiting, _Grunning)
})
@@ -1177,41 +1250,41 @@
// Holding worldsema causes any other goroutines invoking
// stopTheWorld to block.
func stopTheWorldWithSema() {
- _g_ := getg()
+ gp := getg()
// If we hold a lock, then we won't be able to stop another M
// that is blocked trying to acquire the lock.
- if _g_.m.locks > 0 {
+ if gp.m.locks > 0 {
throw("stopTheWorld: holding locks")
}
lock(&sched.lock)
sched.stopwait = gomaxprocs
- atomic.Store(&sched.gcwaiting, 1)
+ sched.gcwaiting.Store(true)
preemptall()
// stop current P
- _g_.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic.
+ gp.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic.
sched.stopwait--
// try to retake all P's in Psyscall status
- for _, p := range allp {
- s := p.status
- if s == _Psyscall && atomic.Cas(&p.status, s, _Pgcstop) {
+ for _, pp := range allp {
+ s := pp.status
+ if s == _Psyscall && atomic.Cas(&pp.status, s, _Pgcstop) {
if trace.enabled {
- traceGoSysBlock(p)
- traceProcStop(p)
+ traceGoSysBlock(pp)
+ traceProcStop(pp)
}
- p.syscalltick++
+ pp.syscalltick++
sched.stopwait--
}
}
// stop idle P's
now := nanotime()
for {
- p, _ := pidleget(now)
- if p == nil {
+ pp, _ := pidleget(now)
+ if pp == nil {
break
}
- p.status = _Pgcstop
+ pp.status = _Pgcstop
sched.stopwait--
}
wait := sched.stopwait > 0
@@ -1234,13 +1307,13 @@
if sched.stopwait != 0 {
bad = "stopTheWorld: not stopped (stopwait != 0)"
} else {
- for _, p := range allp {
- if p.status != _Pgcstop {
+ for _, pp := range allp {
+ if pp.status != _Pgcstop {
bad = "stopTheWorld: not stopped (status != _Pgcstop)"
}
}
}
- if atomic.Load(&freezing) != 0 {
+ if freezing.Load() {
// Some other thread is panicking. This can cause the
// sanity checks above to fail if the panic happens in
// the signal handler on a stopped thread. Either way,
@@ -1271,9 +1344,9 @@
newprocs = 0
}
p1 := procresize(procs)
- sched.gcwaiting = 0
- if sched.sysmonwait != 0 {
- sched.sysmonwait = 0
+ sched.gcwaiting.Store(false)
+ if sched.sysmonwait.Load() {
+ sched.sysmonwait.Store(false)
notewakeup(&sched.sysmonnote)
}
unlock(&sched.lock)
@@ -1354,9 +1427,9 @@
//go:nosplit
//go:nowritebarrierrec
func mstart0() {
- _g_ := getg()
+ gp := getg()
- osStack := _g_.stack.lo == 0
+ osStack := gp.stack.lo == 0
if osStack {
// Initialize stack bounds from system stack.
// Cgo may have left stack size in stack.hi.
@@ -1366,25 +1439,25 @@
// We set hi to &size, but there are things above
// it. The 1024 is supposed to compensate this,
// but is somewhat arbitrary.
- size := _g_.stack.hi
+ size := gp.stack.hi
if size == 0 {
size = 8192 * sys.StackGuardMultiplier
}
- _g_.stack.hi = uintptr(noescape(unsafe.Pointer(&size)))
- _g_.stack.lo = _g_.stack.hi - size + 1024
+ gp.stack.hi = uintptr(noescape(unsafe.Pointer(&size)))
+ gp.stack.lo = gp.stack.hi - size + 1024
}
// Initialize stack guard so that we can start calling regular
// Go code.
- _g_.stackguard0 = _g_.stack.lo + _StackGuard
+ gp.stackguard0 = gp.stack.lo + _StackGuard
// This is the g0, so we can also call go:systemstack
// functions, which check stackguard1.
- _g_.stackguard1 = _g_.stackguard0
+ gp.stackguard1 = gp.stackguard0
mstart1()
// Exit this thread.
if mStackIsSystemAllocated() {
// Windows, Solaris, illumos, Darwin, AIX and Plan 9 always system-allocate
- // the stack, but put it in _g_.stack before mstart,
+ // the stack, but put it in gp.stack before mstart,
// so the logic above hasn't set osStack yet.
osStack = true
}
@@ -1396,9 +1469,9 @@
//
//go:noinline
func mstart1() {
- _g_ := getg()
+ gp := getg()
- if _g_ != _g_.m.g0 {
+ if gp != gp.m.g0 {
throw("bad runtime·mstart")
}
@@ -1408,26 +1481,26 @@
// so other calls can reuse the current frame.
// And goexit0 does a gogo that needs to return from mstart1
// and let mstart0 exit the thread.
- _g_.sched.g = guintptr(unsafe.Pointer(_g_))
- _g_.sched.pc = getcallerpc()
- _g_.sched.sp = getcallersp()
+ gp.sched.g = guintptr(unsafe.Pointer(gp))
+ gp.sched.pc = getcallerpc()
+ gp.sched.sp = getcallersp()
asminit()
minit()
// Install signal handlers; after minit so that minit can
// prepare the thread to be able to handle the signals.
- if _g_.m == &m0 {
+ if gp.m == &m0 {
mstartm0()
}
- if fn := _g_.m.mstartfn; fn != nil {
+ if fn := gp.m.mstartfn; fn != nil {
fn()
}
- if _g_.m != &m0 {
- acquirep(_g_.m.nextp.ptr())
- _g_.m.nextp = 0
+ if gp.m != &m0 {
+ acquirep(gp.m.nextp.ptr())
+ gp.m.nextp = 0
}
schedule()
}
@@ -1461,7 +1534,7 @@
// mexit tears down and exits the current thread.
//
// Don't call this directly to exit the thread, since it must run at
-// the top of the thread stack. Instead, use gogo(&_g_.m.g0.sched) to
+// the top of the thread stack. Instead, use gogo(&gp.m.g0.sched) to
// unwind the stack to the point that exits the thread.
//
// It is entered with m.p != nil, so write barriers are allowed. It
@@ -1469,10 +1542,9 @@
//
//go:yeswritebarrierrec
func mexit(osStack bool) {
- g := getg()
- m := g.m
+ mp := getg().m
- if m == &m0 {
+ if mp == &m0 {
// This is the main thread. Just wedge it.
//
// On Linux, exiting the main thread puts the process
@@ -1497,41 +1569,40 @@
unminit()
// Free the gsignal stack.
- if m.gsignal != nil {
- stackfree(m.gsignal.stack)
+ if mp.gsignal != nil {
+ stackfree(mp.gsignal.stack)
// On some platforms, when calling into VDSO (e.g. nanotime)
// we store our g on the gsignal stack, if there is one.
// Now the stack is freed, unlink it from the m, so we
// won't write to it when calling VDSO code.
- m.gsignal = nil
+ mp.gsignal = nil
}
// Remove m from allm.
lock(&sched.lock)
for pprev := &allm; *pprev != nil; pprev = &(*pprev).alllink {
- if *pprev == m {
- *pprev = m.alllink
+ if *pprev == mp {
+ *pprev = mp.alllink
goto found
}
}
throw("m not found in allm")
found:
- if !osStack {
- // Delay reaping m until it's done with the stack.
- //
- // If this is using an OS stack, the OS will free it
- // so there's no need for reaping.
- atomic.Store(&m.freeWait, 1)
- // Put m on the free list, though it will not be reaped until
- // freeWait is 0. Note that the free list must not be linked
- // through alllink because some functions walk allm without
- // locking, so may be using alllink.
- m.freelink = sched.freem
- sched.freem = m
- }
+ // Delay reaping m until it's done with the stack.
+ //
+ // Put mp on the free list, though it will not be reaped while freeWait
+ // is freeMWait. mp is no longer reachable via allm, so even if it is
+ // on an OS stack, we must keep a reference to mp alive so that the GC
+ // doesn't free mp while we are still using it.
+ //
+ // Note that the free list must not be linked through alllink because
+ // some functions walk allm without locking, so may be using alllink.
+ mp.freeWait.Store(freeMWait)
+ mp.freelink = sched.freem
+ sched.freem = mp
unlock(&sched.lock)
- atomic.Xadd64(&ncgocall, int64(m.ncgocall))
+ atomic.Xadd64(&ncgocall, int64(mp.ncgocall))
// Release the P.
handoffp(releasep())
@@ -1548,16 +1619,19 @@
if GOOS == "darwin" || GOOS == "ios" {
// Make sure pendingPreemptSignals is correct when an M exits.
// For #41702.
- if atomic.Load(&m.signalPending) != 0 {
- atomic.Xadd(&pendingPreemptSignals, -1)
+ if mp.signalPending.Load() != 0 {
+ pendingPreemptSignals.Add(-1)
}
}
// Destroy all allocated resources. After this is called, we may no
// longer take any locks.
- mdestroy(m)
+ mdestroy(mp)
if osStack {
+ // No more uses of mp, so it is safe to drop the reference.
+ mp.freeWait.Store(freeMRef)
+
// Return from mstart and let the system thread
// library free the g0 stack and terminate the thread.
return
@@ -1567,7 +1641,7 @@
// return to. Exit the thread directly. exitThread will clear
// m.freeWait when it's done with the stack and the m can be
// reaped.
- exitThread(&m.freeWait)
+ exitThread(&mp.freeWait)
}
// forEachP calls fn(p) for every P p when p reaches a GC safe point.
@@ -1583,7 +1657,7 @@
//go:systemstack
func forEachP(fn func(*p)) {
mp := acquirem()
- _p_ := getg().m.p.ptr()
+ pp := getg().m.p.ptr()
lock(&sched.lock)
if sched.safePointWait != 0 {
@@ -1593,9 +1667,9 @@
sched.safePointFn = fn
// Ask all Ps to run the safe point function.
- for _, p := range allp {
- if p != _p_ {
- atomic.Store(&p.runSafePointFn, 1)
+ for _, p2 := range allp {
+ if p2 != pp {
+ atomic.Store(&p2.runSafePointFn, 1)
}
}
preemptall()
@@ -1617,19 +1691,19 @@
unlock(&sched.lock)
// Run fn for the current P.
- fn(_p_)
+ fn(pp)
// Force Ps currently in _Psyscall into _Pidle and hand them
// off to induce safe point function execution.
- for _, p := range allp {
- s := p.status
- if s == _Psyscall && p.runSafePointFn == 1 && atomic.Cas(&p.status, s, _Pidle) {
+ for _, p2 := range allp {
+ s := p2.status
+ if s == _Psyscall && p2.runSafePointFn == 1 && atomic.Cas(&p2.status, s, _Pidle) {
if trace.enabled {
- traceGoSysBlock(p)
- traceProcStop(p)
+ traceGoSysBlock(p2)
+ traceProcStop(p2)
}
- p.syscalltick++
- handoffp(p)
+ p2.syscalltick++
+ handoffp(p2)
}
}
@@ -1650,8 +1724,8 @@
if sched.safePointWait != 0 {
throw("forEachP: not done")
}
- for _, p := range allp {
- if p.runSafePointFn != 0 {
+ for _, p2 := range allp {
+ if p2.runSafePointFn != 0 {
throw("forEachP: P did not run fn")
}
}
@@ -1707,20 +1781,20 @@
// id is optional pre-allocated m ID. Omit by passing -1.
//
// This function is allowed to have write barriers even if the caller
-// isn't because it borrows _p_.
+// isn't because it borrows pp.
//
//go:yeswritebarrierrec
-func allocm(_p_ *p, fn func(), id int64) *m {
+func allocm(pp *p, fn func(), id int64) *m {
allocmLock.rlock()
- // The caller owns _p_, but we may borrow (i.e., acquirep) it. We must
+ // The caller owns pp, but we may borrow (i.e., acquirep) it. We must
// disable preemption to ensure it is not stolen, which would make the
// caller lose ownership.
acquirem()
- _g_ := getg()
- if _g_.m.p == 0 {
- acquirep(_p_) // temporarily borrow p for mallocs in this function
+ gp := getg()
+ if gp.m.p == 0 {
+ acquirep(pp) // temporarily borrow p for mallocs in this function
}
// Release the free M list. We need to do this somewhere and
@@ -1729,19 +1803,25 @@
lock(&sched.lock)
var newList *m
for freem := sched.freem; freem != nil; {
- if freem.freeWait != 0 {
+ wait := freem.freeWait.Load()
+ if wait == freeMWait {
next := freem.freelink
freem.freelink = newList
newList = freem
freem = next
continue
}
- // stackfree must be on the system stack, but allocm is
- // reachable off the system stack transitively from
- // startm.
- systemstack(func() {
- stackfree(freem.g0.stack)
- })
+ // Free the stack if needed. For freeMRef, there is
+ // nothing to do except drop freem from the sched.freem
+ // list.
+ if wait == freeMStack {
+ // stackfree must be on the system stack, but allocm is
+ // reachable off the system stack transitively from
+ // startm.
+ systemstack(func() {
+ stackfree(freem.g0.stack)
+ })
+ }
freem = freem.freelink
}
sched.freem = newList
@@ -1761,11 +1841,11 @@
}
mp.g0.m = mp
- if _p_ == _g_.m.p.ptr() {
+ if pp == gp.m.p.ptr() {
releasep()
}
- releasem(_g_.m)
+ releasem(gp.m)
allocmLock.runlock()
return mp
}
@@ -1813,7 +1893,7 @@
// for details.
//
// Can not throw, because scheduler is not initialized yet.
- write(2, unsafe.Pointer(&earlycgocallback[0]), int32(len(earlycgocallback)))
+ writeErrStr("fatal error: cgo callback before cgo call\n")
exit(1)
}
@@ -1859,10 +1939,10 @@
// scheduling stack is, but we assume there's at least 32 kB,
// which is more than enough for us.
setg(mp.g0)
- _g_ := getg()
- _g_.stack.hi = getcallersp() + 1024
- _g_.stack.lo = getcallersp() - 32*1024
- _g_.stackguard0 = _g_.stack.lo + _StackGuard
+ gp := getg()
+ gp.stack.hi = getcallersp() + 1024
+ gp.stack.lo = getcallersp() - 32*1024
+ gp.stackguard0 = gp.stack.lo + _StackGuard
// Initialize this thread to use the m.
asminit()
@@ -1870,16 +1950,14 @@
// mp.curg is now a real goroutine.
casgstatus(mp.curg, _Gdead, _Gsyscall)
- atomic.Xadd(&sched.ngsys, -1)
+ sched.ngsys.Add(-1)
}
-var earlycgocallback = []byte("fatal error: cgo callback before cgo call\n")
-
// newextram allocates m's and puts them on the extra list.
// It is called with a working local m, so that it can do things
// like call schedlock and allocate.
func newextram() {
- c := atomic.Xchg(&extraMWaiters, 0)
+ c := extraMWaiters.Swap(0)
if c > 0 {
for i := uint32(0); i < c; i++ {
oneNewExtraM()
@@ -1918,13 +1996,23 @@
casgstatus(gp, _Gidle, _Gdead)
gp.m = mp
mp.curg = gp
+ mp.isextra = true
mp.lockedInt++
mp.lockedg.set(gp)
gp.lockedm.set(mp)
- gp.goid = int64(atomic.Xadd64(&sched.goidgen, 1))
+ gp.goid = sched.goidgen.Add(1)
+ gp.sysblocktraced = true
if raceenabled {
gp.racectx = racegostart(abi.FuncPCABIInternal(newextram) + sys.PCQuantum)
}
+ if trace.enabled {
+ // Trigger two trace events for the locked g in the extra m,
+ // since the next event of the g will be traceEvGoSysExit in exitsyscall,
+ // while calling from C thread to Go.
+ traceGoCreate(gp, 0) // no start pc
+ gp.traceseq++
+ traceEvent(traceEvGoInSyscall, -1, gp.goid)
+ }
// put on allg for garbage collector
allgadd(gp)
@@ -1932,7 +2020,7 @@
// counted by gcount. It would be more "proper" to increment
// sched.ngfree, but that requires locking. Incrementing ngsys
// has the same effect.
- atomic.Xadd(&sched.ngsys, +1)
+ sched.ngsys.Add(1)
// Add m to the extra list.
mnext := lockextra(true)
@@ -1973,7 +2061,7 @@
// Return mp.curg to dead state.
casgstatus(mp.curg, _Gsyscall, _Gdead)
mp.curg.preemptStop = false
- atomic.Xadd(&sched.ngsys, +1)
+ sched.ngsys.Add(1)
// Block signals before unminit.
// Unminit unregisters the signal handling stack (but needs g on some systems).
@@ -2000,9 +2088,9 @@
return uintptr(unsafe.Pointer(getg().m))
}
-var extram uintptr
+var extram atomic.Uintptr
var extraMCount uint32 // Protected by lockextra
-var extraMWaiters uint32
+var extraMWaiters atomic.Uint32
// lockextra locks the extra list and returns the list head.
// The caller must unlock the list by storing a new list head
@@ -2016,7 +2104,7 @@
incr := false
for {
- old := atomic.Loaduintptr(&extram)
+ old := extram.Load()
if old == locked {
osyield_no_g()
continue
@@ -2026,13 +2114,13 @@
// Add 1 to the number of threads
// waiting for an M.
// This is cleared by newextram.
- atomic.Xadd(&extraMWaiters, 1)
+ extraMWaiters.Add(1)
incr = true
}
usleep_no_g(1)
continue
}
- if atomic.Casuintptr(&extram, old, locked) {
+ if extram.CompareAndSwap(old, locked) {
return (*m)(unsafe.Pointer(old))
}
osyield_no_g()
@@ -2042,7 +2130,7 @@
//go:nosplit
func unlockextra(mp *m) {
- atomic.Storeuintptr(&extram, uintptr(unsafe.Pointer(mp)))
+ extram.Store(uintptr(unsafe.Pointer(mp)))
}
var (
@@ -2057,6 +2145,13 @@
execLock rwmutex
)
+// These errors are reported (via writeErrStr) by some OS-specific
+// versions of newosproc and newosproc0.
+const (
+ failthreadcreate = "runtime: failed to create new OS thread\n"
+ failallocatestack = "runtime: failed to allocate stack for the new OS thread\n"
+)
+
// newmHandoff contains a list of m structures that need new OS threads.
// This is used by newm in situations where newm itself can't safely
// start an OS thread.
@@ -2085,7 +2180,7 @@
// id is optional pre-allocated m ID. Omit by passing -1.
//
//go:nowritebarrierrec
-func newm(fn func(), _p_ *p, id int64) {
+func newm(fn func(), pp *p, id int64) {
// allocm adds a new M to allm, but they do not start until created by
// the OS in newm1 or the template thread.
//
@@ -2098,8 +2193,8 @@
// start.
acquirem()
- mp := allocm(_p_, fn, id)
- mp.nextp.set(_p_)
+ mp := allocm(pp, fn, id)
+ mp.nextp.set(pp)
mp.sigmask = initSigmask
if gp := getg(); gp != nil && gp.m != nil && (gp.m.lockedExt != 0 || gp.m.incgo) && GOOS != "plan9" {
// We're on a locked M or a thread that may have been
@@ -2221,24 +2316,24 @@
// Stops execution of the current m until new work is available.
// Returns with acquired P.
func stopm() {
- _g_ := getg()
+ gp := getg()
- if _g_.m.locks != 0 {
+ if gp.m.locks != 0 {
throw("stopm holding locks")
}
- if _g_.m.p != 0 {
+ if gp.m.p != 0 {
throw("stopm holding p")
}
- if _g_.m.spinning {
+ if gp.m.spinning {
throw("stopm spinning")
}
lock(&sched.lock)
- mput(_g_.m)
+ mput(gp.m)
unlock(&sched.lock)
mPark()
- acquirep(_g_.m.nextp.ptr())
- _g_.m.nextp = 0
+ acquirep(gp.m.nextp.ptr())
+ gp.m.nextp = 0
}
func mspinning() {
@@ -2249,8 +2344,8 @@
// Schedules some M to run the p (creates an M if necessary).
// If p==nil, tries to get an idle P, if no idle P's does nothing.
// May run with m.p==nil, so write barriers are not allowed.
-// If spinning is set, the caller has incremented nmspinning and startm will
-// either decrement nmspinning or set m.spinning in the newly started M.
+// If spinning is set, the caller has incremented nmspinning and must provide a
+// P. startm will set m.spinning in the newly started M.
//
// Callers passing a non-nil P must call from a non-preemptible context. See
// comment on acquirem below.
@@ -2258,7 +2353,7 @@
// Must not have write barriers because this may be called without a P.
//
//go:nowritebarrierrec
-func startm(_p_ *p, spinning bool) {
+func startm(pp *p, spinning bool) {
// Disable preemption.
//
// Every owned P must have an owner that will eventually stop it in the
@@ -2277,17 +2372,16 @@
// disable preemption before acquiring a P from pidleget below.
mp := acquirem()
lock(&sched.lock)
- if _p_ == nil {
- _p_, _ = pidleget(0)
- if _p_ == nil {
+ if pp == nil {
+ if spinning {
+ // TODO(prattmic): All remaining calls to this function
+ // with _p_ == nil could be cleaned up to find a P
+ // before calling startm.
+ throw("startm: P required for spinning=true")
+ }
+ pp, _ = pidleget(0)
+ if pp == nil {
unlock(&sched.lock)
- if spinning {
- // The caller incremented nmspinning, but there are no idle Ps,
- // so it's okay to just undo the increment and give up.
- if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 {
- throw("startm: negative nmspinning")
- }
- }
releasem(mp)
return
}
@@ -2314,8 +2408,8 @@
// The caller incremented nmspinning, so set m.spinning in the new M.
fn = mspinning
}
- newm(fn, _p_, id)
- // Ownership transfer of _p_ committed by start in newm.
+ newm(fn, pp, id)
+ // Ownership transfer of pp committed by start in newm.
// Preemption is now safe.
releasem(mp)
return
@@ -2327,14 +2421,14 @@
if nmp.nextp != 0 {
throw("startm: m has p")
}
- if spinning && !runqempty(_p_) {
+ if spinning && !runqempty(pp) {
throw("startm: p has runnable gs")
}
// The caller incremented nmspinning, so set m.spinning in the new M.
nmp.spinning = spinning
- nmp.nextp.set(_p_)
+ nmp.nextp.set(pp)
notewakeup(&nmp.park)
- // Ownership transfer of _p_ committed by wakeup. Preemption is now
+ // Ownership transfer of pp committed by wakeup. Preemption is now
// safe.
releasem(mp)
}
@@ -2343,34 +2437,35 @@
// Always runs without a P, so write barriers are not allowed.
//
//go:nowritebarrierrec
-func handoffp(_p_ *p) {
+func handoffp(pp *p) {
// handoffp must start an M in any situation where
- // findrunnable would return a G to run on _p_.
+ // findrunnable would return a G to run on pp.
// if it has local work, start it straight away
- if !runqempty(_p_) || sched.runqsize != 0 {
- startm(_p_, false)
+ if !runqempty(pp) || sched.runqsize != 0 {
+ startm(pp, false)
return
}
// if there's trace work to do, start it straight away
- if (trace.enabled || trace.shutdown) && traceReaderAvailable() {
- startm(_p_, false)
+ if (trace.enabled || trace.shutdown) && traceReaderAvailable() != nil {
+ startm(pp, false)
return
}
// if it has GC work, start it straight away
- if gcBlackenEnabled != 0 && gcMarkWorkAvailable(_p_) {
- startm(_p_, false)
+ if gcBlackenEnabled != 0 && gcMarkWorkAvailable(pp) {
+ startm(pp, false)
return
}
// no local work, check that there are no spinning/idle M's,
// otherwise our help is not required
- if atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) == 0 && atomic.Cas(&sched.nmspinning, 0, 1) { // TODO: fast atomic
- startm(_p_, true)
+ if sched.nmspinning.Load()+sched.npidle.Load() == 0 && sched.nmspinning.CompareAndSwap(0, 1) { // TODO: fast atomic
+ sched.needspinning.Store(0)
+ startm(pp, true)
return
}
lock(&sched.lock)
- if sched.gcwaiting != 0 {
- _p_.status = _Pgcstop
+ if sched.gcwaiting.Load() {
+ pp.status = _Pgcstop
sched.stopwait--
if sched.stopwait == 0 {
notewakeup(&sched.stopnote)
@@ -2378,8 +2473,8 @@
unlock(&sched.lock)
return
}
- if _p_.runSafePointFn != 0 && atomic.Cas(&_p_.runSafePointFn, 1, 0) {
- sched.safePointFn(_p_)
+ if pp.runSafePointFn != 0 && atomic.Cas(&pp.runSafePointFn, 1, 0) {
+ sched.safePointFn(pp)
sched.safePointWait--
if sched.safePointWait == 0 {
notewakeup(&sched.safePointNote)
@@ -2387,21 +2482,21 @@
}
if sched.runqsize != 0 {
unlock(&sched.lock)
- startm(_p_, false)
+ startm(pp, false)
return
}
// If this is the last running P and nobody is polling network,
// need to wakeup another M to poll network.
- if sched.npidle == uint32(gomaxprocs-1) && atomic.Load64(&sched.lastpoll) != 0 {
+ if sched.npidle.Load() == gomaxprocs-1 && sched.lastpoll.Load() != 0 {
unlock(&sched.lock)
- startm(_p_, false)
+ startm(pp, false)
return
}
// The scheduler lock cannot be held when calling wakeNetPoller below
// because wakeNetPoller may call wakep which may call startm.
- when := nobarrierWakeTime(_p_)
- pidleput(_p_, 0)
+ when := nobarrierWakeTime(pp)
+ pidleput(pp, 0)
unlock(&sched.lock)
if when != 0 {
@@ -2411,41 +2506,67 @@
// Tries to add one more P to execute G's.
// Called when a G is made runnable (newproc, ready).
+// Must be called with a P.
func wakep() {
- if atomic.Load(&sched.npidle) == 0 {
+ // Be conservative about spinning threads, only start one if none exist
+ // already.
+ if sched.nmspinning.Load() != 0 || !sched.nmspinning.CompareAndSwap(0, 1) {
return
}
- // be conservative about spinning threads
- if atomic.Load(&sched.nmspinning) != 0 || !atomic.Cas(&sched.nmspinning, 0, 1) {
+
+ // Disable preemption until ownership of pp transfers to the next M in
+ // startm. Otherwise preemption here would leave pp stuck waiting to
+ // enter _Pgcstop.
+ //
+ // See preemption comment on acquirem in startm for more details.
+ mp := acquirem()
+
+ var pp *p
+ lock(&sched.lock)
+ pp, _ = pidlegetSpinning(0)
+ if pp == nil {
+ if sched.nmspinning.Add(-1) < 0 {
+ throw("wakep: negative nmspinning")
+ }
+ unlock(&sched.lock)
+ releasem(mp)
return
}
- startm(nil, true)
+ // Since we always have a P, the race in the "No M is available"
+ // comment in startm doesn't apply during the small window between the
+ // unlock here and lock in startm. A checkdead in between will always
+ // see at least one running M (ours).
+ unlock(&sched.lock)
+
+ startm(pp, true)
+
+ releasem(mp)
}
// Stops execution of the current m that is locked to a g until the g is runnable again.
// Returns with acquired P.
func stoplockedm() {
- _g_ := getg()
+ gp := getg()
- if _g_.m.lockedg == 0 || _g_.m.lockedg.ptr().lockedm.ptr() != _g_.m {
+ if gp.m.lockedg == 0 || gp.m.lockedg.ptr().lockedm.ptr() != gp.m {
throw("stoplockedm: inconsistent locking")
}
- if _g_.m.p != 0 {
+ if gp.m.p != 0 {
// Schedule another M to run this p.
- _p_ := releasep()
- handoffp(_p_)
+ pp := releasep()
+ handoffp(pp)
}
incidlelocked(1)
// Wait until another thread schedules lockedg again.
mPark()
- status := readgstatus(_g_.m.lockedg.ptr())
+ status := readgstatus(gp.m.lockedg.ptr())
if status&^_Gscan != _Grunnable {
print("runtime:stoplockedm: lockedg (atomicstatus=", status, ") is not Grunnable or Gscanrunnable\n")
- dumpgstatus(_g_.m.lockedg.ptr())
+ dumpgstatus(gp.m.lockedg.ptr())
throw("stoplockedm: not runnable")
}
- acquirep(_g_.m.nextp.ptr())
- _g_.m.nextp = 0
+ acquirep(gp.m.nextp.ptr())
+ gp.m.nextp = 0
}
// Schedules the locked m to run the locked gp.
@@ -2453,10 +2574,8 @@
//
//go:nowritebarrierrec
func startlockedm(gp *g) {
- _g_ := getg()
-
mp := gp.lockedm.ptr()
- if mp == _g_.m {
+ if mp == getg().m {
throw("startlockedm: locked to me")
}
if mp.nextp != 0 {
@@ -2464,8 +2583,8 @@
}
// directly handoff current P to the locked m
incidlelocked(-1)
- _p_ := releasep()
- mp.nextp.set(_p_)
+ pp := releasep()
+ mp.nextp.set(pp)
notewakeup(&mp.park)
stopm()
}
@@ -2473,22 +2592,22 @@
// Stops the current m for stopTheWorld.
// Returns when the world is restarted.
func gcstopm() {
- _g_ := getg()
+ gp := getg()
- if sched.gcwaiting == 0 {
+ if !sched.gcwaiting.Load() {
throw("gcstopm: not waiting for gc")
}
- if _g_.m.spinning {
- _g_.m.spinning = false
+ if gp.m.spinning {
+ gp.m.spinning = false
// OK to just drop nmspinning here,
// startTheWorld will unpark threads as necessary.
- if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 {
+ if sched.nmspinning.Add(-1) < 0 {
throw("gcstopm: negative nmspinning")
}
}
- _p_ := releasep()
+ pp := releasep()
lock(&sched.lock)
- _p_.status = _Pgcstop
+ pp.status = _Pgcstop
sched.stopwait--
if sched.stopwait == 0 {
notewakeup(&sched.stopnote)
@@ -2507,7 +2626,7 @@
//
//go:yeswritebarrierrec
func execute(gp *g, inheritTime bool) {
- _g_ := getg()
+ mp := getg().m
if goroutineProfile.active {
// Make sure that gp has had its stack written out to the goroutine
@@ -2518,19 +2637,19 @@
// Assign gp.m before entering _Grunning so running Gs have an
// M.
- _g_.m.curg = gp
- gp.m = _g_.m
+ mp.curg = gp
+ gp.m = mp
casgstatus(gp, _Grunnable, _Grunning)
gp.waitsince = 0
gp.preempt = false
gp.stackguard0 = gp.stack.lo + _StackGuard
if !inheritTime {
- _g_.m.p.ptr().schedtick++
+ mp.p.ptr().schedtick++
}
// Check whether the profiler needs to be turned on or off.
hz := sched.profilehz
- if _g_.m.profilehz != hz {
+ if mp.profilehz != hz {
setThreadCPUProfiler(hz)
}
@@ -2551,19 +2670,19 @@
// tryWakeP indicates that the returned goroutine is not normal (GC worker, trace
// reader) so the caller should try to wake a P.
func findRunnable() (gp *g, inheritTime, tryWakeP bool) {
- _g_ := getg()
+ mp := getg().m
// The conditions here and in handoffp must agree: if
// findrunnable would return a G to run, handoffp must start
// an M.
top:
- _p_ := _g_.m.p.ptr()
- if sched.gcwaiting != 0 {
+ pp := mp.p.ptr()
+ if sched.gcwaiting.Load() {
gcstopm()
goto top
}
- if _p_.runSafePointFn != 0 {
+ if pp.runSafePointFn != 0 {
runSafePointFn()
}
@@ -2571,11 +2690,11 @@
// which may steal timers. It's important that between now
// and then, nothing blocks, so these numbers remain mostly
// relevant.
- now, pollUntil, _ := checkTimers(_p_, 0)
+ now, pollUntil, _ := checkTimers(pp, 0)
// Try to schedule the trace reader.
if trace.enabled || trace.shutdown {
- gp = traceReader()
+ gp := traceReader()
if gp != nil {
casgstatus(gp, _Gwaiting, _Grunnable)
traceGoUnpark(gp, 0)
@@ -2585,18 +2704,19 @@
// Try to schedule a GC worker.
if gcBlackenEnabled != 0 {
- gp, now = gcController.findRunnableGCWorker(_p_, now)
+ gp, tnow := gcController.findRunnableGCWorker(pp, now)
if gp != nil {
return gp, false, true
}
+ now = tnow
}
// Check the global runnable queue once in a while to ensure fairness.
// Otherwise two goroutines can completely occupy the local runqueue
// by constantly respawning each other.
- if _p_.schedtick%61 == 0 && sched.runqsize > 0 {
+ if pp.schedtick%61 == 0 && sched.runqsize > 0 {
lock(&sched.lock)
- gp = globrunqget(_p_, 1)
+ gp := globrunqget(pp, 1)
unlock(&sched.lock)
if gp != nil {
return gp, false, false
@@ -2604,7 +2724,7 @@
}
// Wake up the finalizer G.
- if fingwait && fingwake {
+ if fingStatus.Load()&(fingWait|fingWake) == fingWait|fingWake {
if gp := wakefing(); gp != nil {
ready(gp, 0, true)
}
@@ -2614,14 +2734,14 @@
}
// local runq
- if gp, inheritTime := runqget(_p_); gp != nil {
+ if gp, inheritTime := runqget(pp); gp != nil {
return gp, inheritTime, false
}
// global runq
if sched.runqsize != 0 {
lock(&sched.lock)
- gp := globrunqget(_p_, 0)
+ gp := globrunqget(pp, 0)
unlock(&sched.lock)
if gp != nil {
return gp, false, false
@@ -2635,7 +2755,7 @@
// blocked thread (e.g. it has already returned from netpoll, but does
// not set lastpoll yet), this thread will do blocking netpoll below
// anyway.
- if netpollinited() && atomic.Load(&netpollWaiters) > 0 && atomic.Load64(&sched.lastpoll) != 0 {
+ if netpollinited() && netpollWaiters.Load() > 0 && sched.lastpoll.Load() != 0 {
if list := netpoll(0); !list.empty() { // non-blocking
gp := list.pop()
injectglist(&list)
@@ -2652,15 +2772,12 @@
// Limit the number of spinning Ms to half the number of busy Ps.
// This is necessary to prevent excessive CPU consumption when
// GOMAXPROCS>>1 but the program parallelism is low.
- procs := uint32(gomaxprocs)
- if _g_.m.spinning || 2*atomic.Load(&sched.nmspinning) < procs-atomic.Load(&sched.npidle) {
- if !_g_.m.spinning {
- _g_.m.spinning = true
- atomic.Xadd(&sched.nmspinning, 1)
+ if mp.spinning || 2*sched.nmspinning.Load() < gomaxprocs-sched.npidle.Load() {
+ if !mp.spinning {
+ mp.becomeSpinning()
}
gp, inheritTime, tnow, w, newWork := stealWork(now)
- now = tnow
if gp != nil {
// Successfully stole.
return gp, inheritTime, false
@@ -2670,6 +2787,8 @@
// discover.
goto top
}
+
+ now = tnow
if w != 0 && (pollUntil == 0 || w < pollUntil) {
// Earlier timer to wait for.
pollUntil = w
@@ -2680,10 +2799,10 @@
//
// If we're in the GC mark phase, can safely scan and blacken objects,
// and have work to do, run idle-time marking rather than give up the P.
- if gcBlackenEnabled != 0 && gcMarkWorkAvailable(_p_) && gcController.addIdleMarkWorker() {
+ if gcBlackenEnabled != 0 && gcMarkWorkAvailable(pp) && gcController.addIdleMarkWorker() {
node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop())
if node != nil {
- _p_.gcMarkWorkerMode = gcMarkWorkerIdleMode
+ pp.gcMarkWorkerMode = gcMarkWorkerIdleMode
gp := node.gp.ptr()
casgstatus(gp, _Gwaiting, _Grunnable)
if trace.enabled {
@@ -2722,19 +2841,25 @@
// return P and block
lock(&sched.lock)
- if sched.gcwaiting != 0 || _p_.runSafePointFn != 0 {
+ if sched.gcwaiting.Load() || pp.runSafePointFn != 0 {
unlock(&sched.lock)
goto top
}
if sched.runqsize != 0 {
- gp := globrunqget(_p_, 0)
+ gp := globrunqget(pp, 0)
unlock(&sched.lock)
return gp, false, false
}
- if releasep() != _p_ {
+ if !mp.spinning && sched.needspinning.Load() == 1 {
+ // See "Delicate dance" comment below.
+ mp.becomeSpinning()
+ unlock(&sched.lock)
+ goto top
+ }
+ if releasep() != pp {
throw("findrunnable: wrong p")
}
- now = pidleput(_p_, now)
+ now = pidleput(pp, now)
unlock(&sched.lock)
// Delicate dance: thread transitions from spinning to non-spinning
@@ -2751,43 +2876,60 @@
// * New/modified-earlier timers on a per-P timer heap.
// * Idle-priority GC work (barring golang.org/issue/19112).
//
- // If we discover new work below, we need to restore m.spinning as a signal
- // for resetspinning to unpark a new worker thread (because there can be more
- // than one starving goroutine). However, if after discovering new work
- // we also observe no idle Ps it is OK to skip unparking a new worker
- // thread: the system is fully loaded so no spinning threads are required.
- // Also see "Worker thread parking/unparking" comment at the top of the file.
- wasSpinning := _g_.m.spinning
- if _g_.m.spinning {
- _g_.m.spinning = false
- if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 {
+ // If we discover new work below, we need to restore m.spinning as a
+ // signal for resetspinning to unpark a new worker thread (because
+ // there can be more than one starving goroutine).
+ //
+ // However, if after discovering new work we also observe no idle Ps
+ // (either here or in resetspinning), we have a problem. We may be
+ // racing with a non-spinning M in the block above, having found no
+ // work and preparing to release its P and park. Allowing that P to go
+ // idle will result in loss of work conservation (idle P while there is
+ // runnable work). This could result in complete deadlock in the
+ // unlikely event that we discover new work (from netpoll) right as we
+ // are racing with _all_ other Ps going idle.
+ //
+ // We use sched.needspinning to synchronize with non-spinning Ms going
+ // idle. If needspinning is set when they are about to drop their P,
+ // they abort the drop and instead become a new spinning M on our
+ // behalf. If we are not racing and the system is truly fully loaded
+ // then no spinning threads are required, and the next thread to
+ // naturally become spinning will clear the flag.
+ //
+ // Also see "Worker thread parking/unparking" comment at the top of the
+ // file.
+ wasSpinning := mp.spinning
+ if mp.spinning {
+ mp.spinning = false
+ if sched.nmspinning.Add(-1) < 0 {
throw("findrunnable: negative nmspinning")
}
// Note the for correctness, only the last M transitioning from
// spinning to non-spinning must perform these rechecks to
- // ensure no missed work. We are performing it on every M that
- // transitions as a conservative change to monitor effects on
- // latency. See golang.org/issue/43997.
+ // ensure no missed work. However, the runtime has some cases
+ // of transient increments of nmspinning that are decremented
+ // without going through this path, so we must be conservative
+ // and perform the check on all spinning Ms.
+ //
+ // See https://go.dev/issue/43997.
// Check all runqueues once again.
- _p_ = checkRunqsNoP(allpSnapshot, idlepMaskSnapshot)
- if _p_ != nil {
- acquirep(_p_)
- _g_.m.spinning = true
- atomic.Xadd(&sched.nmspinning, 1)
+ pp := checkRunqsNoP(allpSnapshot, idlepMaskSnapshot)
+ if pp != nil {
+ acquirep(pp)
+ mp.becomeSpinning()
goto top
}
// Check for idle-priority GC work again.
- _p_, gp = checkIdleGCNoP()
- if _p_ != nil {
- acquirep(_p_)
- _g_.m.spinning = true
- atomic.Xadd(&sched.nmspinning, 1)
+ pp, gp := checkIdleGCNoP()
+ if pp != nil {
+ acquirep(pp)
+ mp.becomeSpinning()
// Run the idle worker.
- _p_.gcMarkWorkerMode = gcMarkWorkerIdleMode
+ pp.gcMarkWorkerMode = gcMarkWorkerIdleMode
casgstatus(gp, _Gwaiting, _Grunnable)
if trace.enabled {
traceGoUnpark(gp, 0)
@@ -2805,12 +2947,12 @@
}
// Poll network until next timer.
- if netpollinited() && (atomic.Load(&netpollWaiters) > 0 || pollUntil != 0) && atomic.Xchg64(&sched.lastpoll, 0) != 0 {
- atomic.Store64(&sched.pollUntil, uint64(pollUntil))
- if _g_.m.p != 0 {
+ if netpollinited() && (netpollWaiters.Load() > 0 || pollUntil != 0) && sched.lastpoll.Swap(0) != 0 {
+ sched.pollUntil.Store(pollUntil)
+ if mp.p != 0 {
throw("findrunnable: netpoll with p")
}
- if _g_.m.spinning {
+ if mp.spinning {
throw("findrunnable: netpoll with spinning")
}
// Refresh now.
@@ -2827,8 +2969,8 @@
delay = 0
}
list := netpoll(delay) // block until new work is available
- atomic.Store64(&sched.pollUntil, 0)
- atomic.Store64(&sched.lastpoll, uint64(now))
+ sched.pollUntil.Store(0)
+ sched.lastpoll.Store(now)
if faketime != 0 && list.empty() {
// Using fake time and nothing is ready; stop M.
// When all M's stop, checkdead will call timejump.
@@ -2836,12 +2978,12 @@
goto top
}
lock(&sched.lock)
- _p_, _ = pidleget(now)
+ pp, _ := pidleget(now)
unlock(&sched.lock)
- if _p_ == nil {
+ if pp == nil {
injectglist(&list)
} else {
- acquirep(_p_)
+ acquirep(pp)
if !list.empty() {
gp := list.pop()
injectglist(&list)
@@ -2852,13 +2994,12 @@
return gp, false, false
}
if wasSpinning {
- _g_.m.spinning = true
- atomic.Xadd(&sched.nmspinning, 1)
+ mp.becomeSpinning()
}
goto top
}
} else if pollUntil != 0 && netpollinited() {
- pollerPollUntil := int64(atomic.Load64(&sched.pollUntil))
+ pollerPollUntil := sched.pollUntil.Load()
if pollerPollUntil == 0 || pollerPollUntil > pollUntil {
netpollBreak()
}
@@ -2879,7 +3020,7 @@
if !runqempty(p) {
return true
}
- if netpollinited() && atomic.Load(&netpollWaiters) > 0 && sched.lastpoll != 0 {
+ if netpollinited() && netpollWaiters.Load() > 0 && sched.lastpoll.Load() != 0 {
if list := netpoll(0); !list.empty() {
injectglist(&list)
return true
@@ -2904,7 +3045,7 @@
stealTimersOrRunNextG := i == stealTries-1
for enum := stealOrder.start(fastrand()); !enum.done(); enum.next() {
- if sched.gcwaiting != 0 {
+ if sched.gcwaiting.Load() {
// GC work may be available.
return nil, false, now, pollUntil, true
}
@@ -2972,17 +3113,18 @@
for id, p2 := range allpSnapshot {
if !idlepMaskSnapshot.read(uint32(id)) && !runqempty(p2) {
lock(&sched.lock)
- pp, _ := pidleget(0)
- unlock(&sched.lock)
- if pp != nil {
- return pp
+ pp, _ := pidlegetSpinning(0)
+ if pp == nil {
+ // Can't get a P, don't bother checking remaining Ps.
+ unlock(&sched.lock)
+ return nil
}
-
- // Can't get a P, don't bother checking remaining Ps.
- break
+ unlock(&sched.lock)
+ return pp
}
}
+ // No work available.
return nil
}
@@ -3038,7 +3180,7 @@
// the assumption in gcControllerState.findRunnableGCWorker that an
// empty gcBgMarkWorkerPool is only possible if gcMarkDone is running.
lock(&sched.lock)
- pp, now := pidleget(0)
+ pp, now := pidlegetSpinning(0)
if pp == nil {
unlock(&sched.lock)
return nil, nil
@@ -3068,12 +3210,12 @@
// going to wake up before the when argument; or it wakes an idle P to service
// timers and the network poller if there isn't one already.
func wakeNetPoller(when int64) {
- if atomic.Load64(&sched.lastpoll) == 0 {
+ if sched.lastpoll.Load() == 0 {
// In findrunnable we ensure that when polling the pollUntil
// field is either zero or the time to which the current
// poll is expected to run. This can have a spurious wakeup
// but should never miss a wakeup.
- pollerPollUntil := int64(atomic.Load64(&sched.pollUntil))
+ pollerPollUntil := sched.pollUntil.Load()
if pollerPollUntil == 0 || pollerPollUntil > when {
netpollBreak()
}
@@ -3087,13 +3229,13 @@
}
func resetspinning() {
- _g_ := getg()
- if !_g_.m.spinning {
+ gp := getg()
+ if !gp.m.spinning {
throw("resetspinning: not a spinning m")
}
- _g_.m.spinning = false
- nmspinning := atomic.Xadd(&sched.nmspinning, -1)
- if int32(nmspinning) < 0 {
+ gp.m.spinning = false
+ nmspinning := sched.nmspinning.Add(-1)
+ if nmspinning < 0 {
throw("findrunnable: negative nmspinning")
}
// M wakeup policy is deliberately somewhat conservative, so check if we
@@ -3138,8 +3280,20 @@
*glist = gList{}
startIdle := func(n int) {
- for ; n != 0 && sched.npidle != 0; n-- {
- startm(nil, false)
+ for i := 0; i < n; i++ {
+ mp := acquirem() // See comment in startm.
+ lock(&sched.lock)
+
+ pp, _ := pidlegetSpinning(0)
+ if pp == nil {
+ unlock(&sched.lock)
+ releasem(mp)
+ break
+ }
+
+ unlock(&sched.lock)
+ startm(pp, false)
+ releasem(mp)
}
}
@@ -3152,7 +3306,7 @@
return
}
- npidle := int(atomic.Load(&sched.npidle))
+ npidle := int(sched.npidle.Load())
var globq gQueue
var n int
for n = 0; n < npidle && !q.empty(); n++ {
@@ -3175,31 +3329,31 @@
// One round of scheduler: find a runnable goroutine and execute it.
// Never returns.
func schedule() {
- _g_ := getg()
+ mp := getg().m
- if _g_.m.locks != 0 {
+ if mp.locks != 0 {
throw("schedule: holding locks")
}
- if _g_.m.lockedg != 0 {
+ if mp.lockedg != 0 {
stoplockedm()
- execute(_g_.m.lockedg.ptr(), false) // Never returns.
+ execute(mp.lockedg.ptr(), false) // Never returns.
}
// We should not schedule away from a g that is executing a cgo call,
// since the cgo call is using the m's g0 stack.
- if _g_.m.incgo {
+ if mp.incgo {
throw("schedule: in cgo")
}
top:
- pp := _g_.m.p.ptr()
+ pp := mp.p.ptr()
pp.preempt = false
// Safety check: if we are spinning, the run queue should be empty.
// Check this before calling checkTimers, as that might call
// goready to put a ready goroutine on the local run queue.
- if _g_.m.spinning && (pp.runnext != 0 || pp.runqhead != pp.runqtail) {
+ if mp.spinning && (pp.runnext != 0 || pp.runqhead != pp.runqtail) {
throw("schedule: spinning with local work")
}
@@ -3208,7 +3362,7 @@
// This thread is going to run a goroutine and is not spinning anymore,
// so if it was marked as spinning we need to reset it now and potentially
// start a new spinning M.
- if _g_.m.spinning {
+ if mp.spinning {
resetspinning()
}
@@ -3252,10 +3406,10 @@
// readied later, the caller can do other work but eventually should
// call schedule to restart the scheduling of goroutines on this m.
func dropg() {
- _g_ := getg()
+ gp := getg()
- setMNoWB(&_g_.m.curg.m, nil)
- setGNoWB(&_g_.m.curg, nil)
+ setMNoWB(&gp.m.curg.m, nil)
+ setGNoWB(&gp.m.curg, nil)
}
// checkTimers runs any timers for the P that are ready.
@@ -3271,8 +3425,8 @@
func checkTimers(pp *p, now int64) (rnow, pollUntil int64, ran bool) {
// If it's not yet time for the first timer, or the first adjusted
// timer, then there is nothing to do.
- next := int64(atomic.Load64(&pp.timer0When))
- nextAdj := int64(atomic.Load64(&pp.timerModifiedEarliest))
+ next := pp.timer0When.Load()
+ nextAdj := pp.timerModifiedEarliest.Load()
if next == 0 || (nextAdj != 0 && nextAdj < next) {
next = nextAdj
}
@@ -3290,7 +3444,7 @@
// if we would clear deleted timers.
// This corresponds to the condition below where
// we decide whether to call clearDeletedTimers.
- if pp != getg().m.p.ptr() || int(atomic.Load(&pp.deletedTimers)) <= int(atomic.Load(&pp.numTimers)/4) {
+ if pp != getg().m.p.ptr() || int(pp.deletedTimers.Load()) <= int(pp.numTimers.Load()/4) {
return now, next, false
}
}
@@ -3315,7 +3469,7 @@
// If this is the local P, and there are a lot of deleted timers,
// clear them out. We only do this for the local P to reduce
// lock contention on timersLock.
- if pp == getg().m.p.ptr() && int(atomic.Load(&pp.deletedTimers)) > len(pp.timers)/4 {
+ if pp == getg().m.p.ptr() && int(pp.deletedTimers.Load()) > len(pp.timers)/4 {
clearDeletedTimers(pp)
}
@@ -3331,19 +3485,21 @@
// park continuation on g0.
func park_m(gp *g) {
- _g_ := getg()
+ mp := getg().m
if trace.enabled {
- traceGoPark(_g_.m.waittraceev, _g_.m.waittraceskip)
+ traceGoPark(mp.waittraceev, mp.waittraceskip)
}
+ // N.B. Not using casGToWaiting here because the waitreason is
+ // set by park_m's caller.
casgstatus(gp, _Grunning, _Gwaiting)
dropg()
- if fn := _g_.m.waitunlockf; fn != nil {
- ok := fn(gp, _g_.m.waitlock)
- _g_.m.waitunlockf = nil
- _g_.m.waitlock = nil
+ if fn := mp.waitunlockf; fn != nil {
+ ok := fn(gp, mp.waitlock)
+ mp.waitunlockf = nil
+ mp.waitlock = nil
if !ok {
if trace.enabled {
traceGoUnpark(gp, 2)
@@ -3378,7 +3534,7 @@
goschedImpl(gp)
}
-// goschedguarded is a forbidden-states-avoided version of gosched_m
+// goschedguarded is a forbidden-states-avoided version of gosched_m.
func goschedguarded_m(gp *g) {
if !canPreemptM(gp.m) {
@@ -3410,7 +3566,6 @@
dumpgstatus(gp)
throw("bad g status")
}
- gp.waitreason = waitReasonPreempted
if gp.asyncSafePoint {
// Double-check that async preemption does not
@@ -3470,24 +3625,24 @@
// goexit continuation on g0.
func goexit0(gp *g) {
- _g_ := getg()
- _p_ := _g_.m.p.ptr()
+ mp := getg().m
+ pp := mp.p.ptr()
casgstatus(gp, _Grunning, _Gdead)
- gcController.addScannableStack(_p_, -int64(gp.stack.hi-gp.stack.lo))
+ gcController.addScannableStack(pp, -int64(gp.stack.hi-gp.stack.lo))
if isSystemGoroutine(gp, false) {
- atomic.Xadd(&sched.ngsys, -1)
+ sched.ngsys.Add(-1)
}
gp.m = nil
locked := gp.lockedm != 0
gp.lockedm = 0
- _g_.m.lockedg = 0
+ mp.lockedg = 0
gp.preemptStop = false
gp.paniconfault = false
gp._defer = nil // should be true already but just in case.
gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data.
gp.writebuf = nil
- gp.waitreason = 0
+ gp.waitreason = waitReasonZero
gp.param = nil
gp.labels = nil
gp.timer = nil
@@ -3498,22 +3653,22 @@
// rapidly creating an exiting goroutines.
assistWorkPerByte := gcController.assistWorkPerByte.Load()
scanCredit := int64(assistWorkPerByte * float64(gp.gcAssistBytes))
- atomic.Xaddint64(&gcController.bgScanCredit, scanCredit)
+ gcController.bgScanCredit.Add(scanCredit)
gp.gcAssistBytes = 0
}
dropg()
if GOARCH == "wasm" { // no threads yet on wasm
- gfput(_p_, gp)
+ gfput(pp, gp)
schedule() // never returns
}
- if _g_.m.lockedInt != 0 {
- print("invalid m->lockedInt = ", _g_.m.lockedInt, "\n")
+ if mp.lockedInt != 0 {
+ print("invalid m->lockedInt = ", mp.lockedInt, "\n")
throw("internal lockOSThread error")
}
- gfput(_p_, gp)
+ gfput(pp, gp)
if locked {
// The goroutine may have locked this thread because
// it put it in an unusual kernel state. Kill it
@@ -3522,11 +3677,11 @@
// Return to mstart, which will release the P and exit
// the thread.
if GOOS != "plan9" { // See golang.org/issue/22227.
- gogo(&_g_.m.g0.sched)
+ gogo(&mp.g0.sched)
} else {
// Clear lockedExt on plan9 since we may end up re-using
// this thread.
- _g_.m.lockedExt = 0
+ mp.lockedExt = 0
}
}
schedule()
@@ -3541,9 +3696,9 @@
//go:nosplit
//go:nowritebarrierrec
func save(pc, sp uintptr) {
- _g_ := getg()
+ gp := getg()
- if _g_ == _g_.m.g0 || _g_ == _g_.m.gsignal {
+ if gp == gp.m.g0 || gp == gp.m.gsignal {
// m.g0.sched is special and must describe the context
// for exiting the thread. mstart1 writes to it directly.
// m.gsignal.sched should not be used at all.
@@ -3552,14 +3707,14 @@
throw("save on system g not allowed")
}
- _g_.sched.pc = pc
- _g_.sched.sp = sp
- _g_.sched.lr = 0
- _g_.sched.ret = 0
+ gp.sched.pc = pc
+ gp.sched.sp = sp
+ gp.sched.lr = 0
+ gp.sched.ret = 0
// We need to ensure ctxt is zero, but can't have a write
// barrier here. However, it should always already be zero.
// Assert that.
- if _g_.sched.ctxt != nil {
+ if gp.sched.ctxt != nil {
badctxt()
}
}
@@ -3594,7 +3749,7 @@
// when syscall returns we emit traceGoSysExit and when the goroutine starts running
// (potentially instantly, if exitsyscallfast returns true) we emit traceGoStart.
// To ensure that traceGoSysExit is emitted strictly after traceGoSysBlock,
-// we remember current value of syscalltick in m (_g_.m.syscalltick = _g_.m.p.ptr().syscalltick),
+// we remember current value of syscalltick in m (gp.m.syscalltick = gp.m.p.ptr().syscalltick),
// whoever emits traceGoSysBlock increments p.syscalltick afterwards;
// and we wait for the increment before emitting traceGoSysExit.
// Note that the increment is done even if tracing is not enabled,
@@ -3602,27 +3757,27 @@
//
//go:nosplit
func reentersyscall(pc, sp uintptr) {
- _g_ := getg()
+ gp := getg()
// Disable preemption because during this function g is in Gsyscall status,
// but can have inconsistent g->sched, do not let GC observe it.
- _g_.m.locks++
+ gp.m.locks++
// Entersyscall must not call any function that might split/grow the stack.
// (See details in comment above.)
// Catch calls that might, by replacing the stack guard with something that
// will trip any stack check and leaving a flag to tell newstack to die.
- _g_.stackguard0 = stackPreempt
- _g_.throwsplit = true
+ gp.stackguard0 = stackPreempt
+ gp.throwsplit = true
// Leave SP around for GC and traceback.
save(pc, sp)
- _g_.syscallsp = sp
- _g_.syscallpc = pc
- casgstatus(_g_, _Grunning, _Gsyscall)
- if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
+ gp.syscallsp = sp
+ gp.syscallpc = pc
+ casgstatus(gp, _Grunning, _Gsyscall)
+ if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp {
systemstack(func() {
- print("entersyscall inconsistent ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
+ print("entersyscall inconsistent ", hex(gp.syscallsp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n")
throw("entersyscall")
})
}
@@ -3635,30 +3790,30 @@
save(pc, sp)
}
- if atomic.Load(&sched.sysmonwait) != 0 {
+ if sched.sysmonwait.Load() {
systemstack(entersyscall_sysmon)
save(pc, sp)
}
- if _g_.m.p.ptr().runSafePointFn != 0 {
+ if gp.m.p.ptr().runSafePointFn != 0 {
// runSafePointFn may stack split if run on this stack
systemstack(runSafePointFn)
save(pc, sp)
}
- _g_.m.syscalltick = _g_.m.p.ptr().syscalltick
- _g_.sysblocktraced = true
- pp := _g_.m.p.ptr()
+ gp.m.syscalltick = gp.m.p.ptr().syscalltick
+ gp.sysblocktraced = true
+ pp := gp.m.p.ptr()
pp.m = 0
- _g_.m.oldp.set(pp)
- _g_.m.p = 0
+ gp.m.oldp.set(pp)
+ gp.m.p = 0
atomic.Store(&pp.status, _Psyscall)
- if sched.gcwaiting != 0 {
+ if sched.gcwaiting.Load() {
systemstack(entersyscall_gcwait)
save(pc, sp)
}
- _g_.m.locks--
+ gp.m.locks--
}
// Standard syscall entry used by the go syscall library and normal cgo calls.
@@ -3673,24 +3828,24 @@
func entersyscall_sysmon() {
lock(&sched.lock)
- if atomic.Load(&sched.sysmonwait) != 0 {
- atomic.Store(&sched.sysmonwait, 0)
+ if sched.sysmonwait.Load() {
+ sched.sysmonwait.Store(false)
notewakeup(&sched.sysmonnote)
}
unlock(&sched.lock)
}
func entersyscall_gcwait() {
- _g_ := getg()
- _p_ := _g_.m.oldp.ptr()
+ gp := getg()
+ pp := gp.m.oldp.ptr()
lock(&sched.lock)
- if sched.stopwait > 0 && atomic.Cas(&_p_.status, _Psyscall, _Pgcstop) {
+ if sched.stopwait > 0 && atomic.Cas(&pp.status, _Psyscall, _Pgcstop) {
if trace.enabled {
- traceGoSysBlock(_p_)
- traceProcStop(_p_)
+ traceGoSysBlock(pp)
+ traceProcStop(pp)
}
- _p_.syscalltick++
+ pp.syscalltick++
if sched.stopwait--; sched.stopwait == 0 {
notewakeup(&sched.stopnote)
}
@@ -3702,34 +3857,34 @@
//
//go:nosplit
func entersyscallblock() {
- _g_ := getg()
+ gp := getg()
- _g_.m.locks++ // see comment in entersyscall
- _g_.throwsplit = true
- _g_.stackguard0 = stackPreempt // see comment in entersyscall
- _g_.m.syscalltick = _g_.m.p.ptr().syscalltick
- _g_.sysblocktraced = true
- _g_.m.p.ptr().syscalltick++
+ gp.m.locks++ // see comment in entersyscall
+ gp.throwsplit = true
+ gp.stackguard0 = stackPreempt // see comment in entersyscall
+ gp.m.syscalltick = gp.m.p.ptr().syscalltick
+ gp.sysblocktraced = true
+ gp.m.p.ptr().syscalltick++
// Leave SP around for GC and traceback.
pc := getcallerpc()
sp := getcallersp()
save(pc, sp)
- _g_.syscallsp = _g_.sched.sp
- _g_.syscallpc = _g_.sched.pc
- if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
+ gp.syscallsp = gp.sched.sp
+ gp.syscallpc = gp.sched.pc
+ if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp {
sp1 := sp
- sp2 := _g_.sched.sp
- sp3 := _g_.syscallsp
+ sp2 := gp.sched.sp
+ sp3 := gp.syscallsp
systemstack(func() {
- print("entersyscallblock inconsistent ", hex(sp1), " ", hex(sp2), " ", hex(sp3), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
+ print("entersyscallblock inconsistent ", hex(sp1), " ", hex(sp2), " ", hex(sp3), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n")
throw("entersyscallblock")
})
}
- casgstatus(_g_, _Grunning, _Gsyscall)
- if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
+ casgstatus(gp, _Grunning, _Gsyscall)
+ if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp {
systemstack(func() {
- print("entersyscallblock inconsistent ", hex(sp), " ", hex(_g_.sched.sp), " ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
+ print("entersyscallblock inconsistent ", hex(sp), " ", hex(gp.sched.sp), " ", hex(gp.syscallsp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n")
throw("entersyscallblock")
})
}
@@ -3739,7 +3894,7 @@
// Resave for traceback during blocked call.
save(getcallerpc(), getcallersp())
- _g_.m.locks--
+ gp.m.locks--
}
func entersyscallblock_handoff() {
@@ -3763,16 +3918,16 @@
//go:nowritebarrierrec
//go:linkname exitsyscall
func exitsyscall() {
- _g_ := getg()
+ gp := getg()
- _g_.m.locks++ // see comment in entersyscall
- if getcallersp() > _g_.syscallsp {
+ gp.m.locks++ // see comment in entersyscall
+ if getcallersp() > gp.syscallsp {
throw("exitsyscall: syscall frame is no longer valid")
}
- _g_.waitsince = 0
- oldp := _g_.m.oldp.ptr()
- _g_.m.oldp = 0
+ gp.waitsince = 0
+ oldp := gp.m.oldp.ptr()
+ gp.m.oldp = 0
if exitsyscallfast(oldp) {
// When exitsyscallfast returns success, we have a P so can now use
// write barriers
@@ -3781,33 +3936,33 @@
// profile, exactly as it was when the goroutine profiler first
// stopped the world.
systemstack(func() {
- tryRecordGoroutineProfileWB(_g_)
+ tryRecordGoroutineProfileWB(gp)
})
}
if trace.enabled {
- if oldp != _g_.m.p.ptr() || _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
+ if oldp != gp.m.p.ptr() || gp.m.syscalltick != gp.m.p.ptr().syscalltick {
systemstack(traceGoStart)
}
}
// There's a cpu for us, so we can run.
- _g_.m.p.ptr().syscalltick++
+ gp.m.p.ptr().syscalltick++
// We need to cas the status and scan before resuming...
- casgstatus(_g_, _Gsyscall, _Grunning)
+ casgstatus(gp, _Gsyscall, _Grunning)
// Garbage collector isn't running (since we are),
// so okay to clear syscallsp.
- _g_.syscallsp = 0
- _g_.m.locks--
- if _g_.preempt {
+ gp.syscallsp = 0
+ gp.m.locks--
+ if gp.preempt {
// restore the preemption request in case we've cleared it in newstack
- _g_.stackguard0 = stackPreempt
+ gp.stackguard0 = stackPreempt
} else {
// otherwise restore the real _StackGuard, we've spoiled it in entersyscall/entersyscallblock
- _g_.stackguard0 = _g_.stack.lo + _StackGuard
+ gp.stackguard0 = gp.stack.lo + _StackGuard
}
- _g_.throwsplit = false
+ gp.throwsplit = false
- if sched.disable.user && !schedEnabled(_g_) {
+ if sched.disable.user && !schedEnabled(gp) {
// Scheduling of this goroutine is disabled.
Gosched()
}
@@ -3815,21 +3970,21 @@
return
}
- _g_.sysexitticks = 0
+ gp.sysexitticks = 0
if trace.enabled {
// Wait till traceGoSysBlock event is emitted.
// This ensures consistency of the trace (the goroutine is started after it is blocked).
- for oldp != nil && oldp.syscalltick == _g_.m.syscalltick {
+ for oldp != nil && oldp.syscalltick == gp.m.syscalltick {
osyield()
}
// We can't trace syscall exit right now because we don't have a P.
// Tracing code can invoke write barriers that cannot run without a P.
// So instead we remember the syscall exit time and emit the event
// in execute when we have a P.
- _g_.sysexitticks = cputicks()
+ gp.sysexitticks = cputicks()
}
- _g_.m.locks--
+ gp.m.locks--
// Call the scheduler.
mcall(exitsyscall0)
@@ -3840,14 +3995,14 @@
// Must wait until now because until gosched returns
// we don't know for sure that the garbage collector
// is not running.
- _g_.syscallsp = 0
- _g_.m.p.ptr().syscalltick++
- _g_.throwsplit = false
+ gp.syscallsp = 0
+ gp.m.p.ptr().syscalltick++
+ gp.throwsplit = false
}
//go:nosplit
func exitsyscallfast(oldp *p) bool {
- _g_ := getg()
+ gp := getg()
// Freezetheworld sets stopwait but does not retake P's.
if sched.stopwait == freezeStopWait {
@@ -3871,7 +4026,7 @@
if oldp != nil {
// Wait till traceGoSysBlock event is emitted.
// This ensures consistency of the trace (the goroutine is started after it is blocked).
- for oldp.syscalltick == _g_.m.syscalltick {
+ for oldp.syscalltick == gp.m.syscalltick {
osyield()
}
}
@@ -3891,33 +4046,33 @@
//
//go:nosplit
func exitsyscallfast_reacquired() {
- _g_ := getg()
- if _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
+ gp := getg()
+ if gp.m.syscalltick != gp.m.p.ptr().syscalltick {
if trace.enabled {
- // The p was retaken and then enter into syscall again (since _g_.m.syscalltick has changed).
+ // The p was retaken and then enter into syscall again (since gp.m.syscalltick has changed).
// traceGoSysBlock for this syscall was already emitted,
// but here we effectively retake the p from the new syscall running on the same p.
systemstack(func() {
// Denote blocking of the new syscall.
- traceGoSysBlock(_g_.m.p.ptr())
+ traceGoSysBlock(gp.m.p.ptr())
// Denote completion of the current syscall.
traceGoSysExit(0)
})
}
- _g_.m.p.ptr().syscalltick++
+ gp.m.p.ptr().syscalltick++
}
}
func exitsyscallfast_pidle() bool {
lock(&sched.lock)
- _p_, _ := pidleget(0)
- if _p_ != nil && atomic.Load(&sched.sysmonwait) != 0 {
- atomic.Store(&sched.sysmonwait, 0)
+ pp, _ := pidleget(0)
+ if pp != nil && sched.sysmonwait.Load() {
+ sched.sysmonwait.Store(false)
notewakeup(&sched.sysmonnote)
}
unlock(&sched.lock)
- if _p_ != nil {
- acquirep(_p_)
+ if pp != nil {
+ acquirep(pp)
return true
}
return false
@@ -3933,12 +4088,12 @@
casgstatus(gp, _Gsyscall, _Grunnable)
dropg()
lock(&sched.lock)
- var _p_ *p
+ var pp *p
if schedEnabled(gp) {
- _p_, _ = pidleget(0)
+ pp, _ = pidleget(0)
}
var locked bool
- if _p_ == nil {
+ if pp == nil {
globrunqput(gp)
// Below, we stoplockedm if gp is locked. globrunqput releases
@@ -3947,13 +4102,13 @@
// could race with another M transitioning gp from unlocked to
// locked.
locked = gp.lockedm != 0
- } else if atomic.Load(&sched.sysmonwait) != 0 {
- atomic.Store(&sched.sysmonwait, 0)
+ } else if sched.sysmonwait.Load() {
+ sched.sysmonwait.Store(false)
notewakeup(&sched.sysmonnote)
}
unlock(&sched.lock)
- if _p_ != nil {
- acquirep(_p_)
+ if pp != nil {
+ acquirep(pp)
execute(gp, false) // Never returns.
}
if locked {
@@ -4038,7 +4193,7 @@
// pendingPreemptSignals is the number of preemption signals
// that have been sent but not received. This is only used on Darwin.
// For #41702.
-var pendingPreemptSignals uint32
+var pendingPreemptSignals atomic.Int32
// Called from syscall package before Exec.
//
@@ -4050,7 +4205,7 @@
// On Darwin, wait for all pending preemption signals to
// be received. See issue #41702.
if GOOS == "darwin" || GOOS == "ios" {
- for int32(atomic.Load(&pendingPreemptSignals)) > 0 {
+ for pendingPreemptSignals.Load() > 0 {
osyield()
}
}
@@ -4089,8 +4244,8 @@
systemstack(func() {
newg := newproc1(fn, gp, pc)
- _p_ := getg().m.p.ptr()
- runqput(_p_, newg, true)
+ pp := getg().m.p.ptr()
+ runqput(pp, newg, true)
if mainStarted {
wakep()
@@ -4102,15 +4257,13 @@
// address of the go statement that created this. The caller is responsible
// for adding the new g to the scheduler.
func newproc1(fn *funcval, callergp *g, callerpc uintptr) *g {
- _g_ := getg()
-
if fn == nil {
fatal("go of nil func value")
}
- acquirem() // disable preemption because it can be holding p in a local var
- _p_ := _g_.m.p.ptr()
- newg := gfget(_p_)
+ mp := acquirem() // disable preemption because we hold M and P in local vars.
+ pp := mp.p.ptr()
+ newg := gfget(pp)
if newg == nil {
newg = malg(_StackMin)
casgstatus(newg, _Gidle, _Gdead)
@@ -4145,11 +4298,11 @@
newg.ancestors = saveAncestors(callergp)
newg.startpc = fn.fn
if isSystemGoroutine(newg, false) {
- atomic.Xadd(&sched.ngsys, +1)
+ sched.ngsys.Add(1)
} else {
// Only user goroutines inherit pprof labels.
- if _g_.m.curg != nil {
- newg.labels = _g_.m.curg.labels
+ if mp.curg != nil {
+ newg.labels = mp.curg.labels
}
if goroutineProfile.active {
// A concurrent goroutine profile is running. It should include
@@ -4166,18 +4319,18 @@
newg.tracking = true
}
casgstatus(newg, _Gdead, _Grunnable)
- gcController.addScannableStack(_p_, int64(newg.stack.hi-newg.stack.lo))
+ gcController.addScannableStack(pp, int64(newg.stack.hi-newg.stack.lo))
- if _p_.goidcache == _p_.goidcacheend {
+ if pp.goidcache == pp.goidcacheend {
// Sched.goidgen is the last allocated id,
// this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch].
// At startup sched.goidgen=0, so main goroutine receives goid=1.
- _p_.goidcache = atomic.Xadd64(&sched.goidgen, _GoidCacheBatch)
- _p_.goidcache -= _GoidCacheBatch - 1
- _p_.goidcacheend = _p_.goidcache + _GoidCacheBatch
+ pp.goidcache = sched.goidgen.Add(_GoidCacheBatch)
+ pp.goidcache -= _GoidCacheBatch - 1
+ pp.goidcacheend = pp.goidcache + _GoidCacheBatch
}
- newg.goid = int64(_p_.goidcache)
- _p_.goidcache++
+ newg.goid = pp.goidcache
+ pp.goidcache++
if raceenabled {
newg.racectx = racegostart(callerpc)
if newg.labels != nil {
@@ -4189,7 +4342,7 @@
if trace.enabled {
traceGoCreate(newg, newg.startpc)
}
- releasem(_g_.m)
+ releasem(mp)
return newg
}
@@ -4230,7 +4383,7 @@
// Put on gfree list.
// If local list is too long, transfer a batch to the global list.
-func gfput(_p_ *p, gp *g) {
+func gfput(pp *p, gp *g) {
if readgstatus(gp) != _Gdead {
throw("gfput: bad status (not Gdead)")
}
@@ -4245,17 +4398,17 @@
gp.stackguard0 = 0
}
- _p_.gFree.push(gp)
- _p_.gFree.n++
- if _p_.gFree.n >= 64 {
+ pp.gFree.push(gp)
+ pp.gFree.n++
+ if pp.gFree.n >= 64 {
var (
inc int32
stackQ gQueue
noStackQ gQueue
)
- for _p_.gFree.n >= 32 {
- gp = _p_.gFree.pop()
- _p_.gFree.n--
+ for pp.gFree.n >= 32 {
+ gp := pp.gFree.pop()
+ pp.gFree.n--
if gp.stack.lo == 0 {
noStackQ.push(gp)
} else {
@@ -4273,12 +4426,12 @@
// Get from gfree list.
// If local list is empty, grab a batch from global list.
-func gfget(_p_ *p) *g {
+func gfget(pp *p) *g {
retry:
- if _p_.gFree.empty() && (!sched.gFree.stack.empty() || !sched.gFree.noStack.empty()) {
+ if pp.gFree.empty() && (!sched.gFree.stack.empty() || !sched.gFree.noStack.empty()) {
lock(&sched.gFree.lock)
// Move a batch of free Gs to the P.
- for _p_.gFree.n < 32 {
+ for pp.gFree.n < 32 {
// Prefer Gs with stacks.
gp := sched.gFree.stack.pop()
if gp == nil {
@@ -4288,17 +4441,17 @@
}
}
sched.gFree.n--
- _p_.gFree.push(gp)
- _p_.gFree.n++
+ pp.gFree.push(gp)
+ pp.gFree.n++
}
unlock(&sched.gFree.lock)
goto retry
}
- gp := _p_.gFree.pop()
+ gp := pp.gFree.pop()
if gp == nil {
return nil
}
- _p_.gFree.n--
+ pp.gFree.n--
if gp.stack.lo != 0 && gp.stack.hi-gp.stack.lo != uintptr(startingStackSize) {
// Deallocate old stack. We kept it in gfput because it was the
// right size when the goroutine was put on the free list, but
@@ -4331,15 +4484,15 @@
}
// Purge all cached G's from gfree list to the global list.
-func gfpurge(_p_ *p) {
+func gfpurge(pp *p) {
var (
inc int32
stackQ gQueue
noStackQ gQueue
)
- for !_p_.gFree.empty() {
- gp := _p_.gFree.pop()
- _p_.gFree.n--
+ for !pp.gFree.empty() {
+ gp := pp.gFree.pop()
+ pp.gFree.n--
if gp.stack.lo == 0 {
noStackQ.push(gp)
} else {
@@ -4368,9 +4521,9 @@
if GOARCH == "wasm" {
return // no threads on wasm yet
}
- _g_ := getg()
- _g_.m.lockedg.set(_g_)
- _g_.lockedm.set(_g_.m)
+ gp := getg()
+ gp.m.lockedg.set(gp)
+ gp.lockedm.set(gp.m)
}
//go:nosplit
@@ -4396,10 +4549,10 @@
// while we're in a known-good state.
startTemplateThread()
}
- _g_ := getg()
- _g_.m.lockedExt++
- if _g_.m.lockedExt == 0 {
- _g_.m.lockedExt--
+ gp := getg()
+ gp.m.lockedExt++
+ if gp.m.lockedExt == 0 {
+ gp.m.lockedExt--
panic("LockOSThread nesting overflow")
}
dolockOSThread()
@@ -4420,12 +4573,12 @@
if GOARCH == "wasm" {
return // no threads on wasm yet
}
- _g_ := getg()
- if _g_.m.lockedInt != 0 || _g_.m.lockedExt != 0 {
+ gp := getg()
+ if gp.m.lockedInt != 0 || gp.m.lockedExt != 0 {
return
}
- _g_.m.lockedg = 0
- _g_.lockedm = 0
+ gp.m.lockedg = 0
+ gp.lockedm = 0
}
//go:nosplit
@@ -4443,21 +4596,21 @@
// the goroutine locked to the OS thread until the goroutine (and
// hence the thread) exits.
func UnlockOSThread() {
- _g_ := getg()
- if _g_.m.lockedExt == 0 {
+ gp := getg()
+ if gp.m.lockedExt == 0 {
return
}
- _g_.m.lockedExt--
+ gp.m.lockedExt--
dounlockOSThread()
}
//go:nosplit
func unlockOSThread() {
- _g_ := getg()
- if _g_.m.lockedInt == 0 {
+ gp := getg()
+ if gp.m.lockedInt == 0 {
systemstack(badunlockosthread)
}
- _g_.m.lockedInt--
+ gp.m.lockedInt--
dounlockOSThread()
}
@@ -4466,9 +4619,9 @@
}
func gcount() int32 {
- n := int32(atomic.Loaduintptr(&allglen)) - sched.gFree.n - int32(atomic.Load(&sched.ngsys))
- for _, _p_ := range allp {
- n -= _p_.gFree.n
+ n := int32(atomic.Loaduintptr(&allglen)) - sched.gFree.n - sched.ngsys.Load()
+ for _, pp := range allp {
+ n -= pp.gFree.n
}
// All these variables can be changed concurrently, so the result can be inconsistent.
@@ -4484,8 +4637,11 @@
}
var prof struct {
- signalLock uint32
- hz int32
+ signalLock atomic.Uint32
+
+ // Must hold signalLock to write. Reads may be lock-free, but
+ // signalLock should be taken to synchronize with changes.
+ hz atomic.Int32
}
func _System() { _System() }
@@ -4500,7 +4656,7 @@
//
//go:nowritebarrierrec
func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
- if prof.hz == 0 {
+ if prof.hz.Load() == 0 {
return
}
@@ -4550,7 +4706,7 @@
// cgoCallers. We are running in a signal handler
// with all signals blocked, so we don't have to worry
// about any other code interrupting us.
- if atomic.Load(&mp.cgoCallersUse) == 0 && mp.cgoCallers != nil && mp.cgoCallers[0] != 0 {
+ if mp.cgoCallersUse.Load() == 0 && mp.cgoCallers != nil && mp.cgoCallers[0] != 0 {
for cgoOff < len(mp.cgoCallers) && mp.cgoCallers[cgoOff] != 0 {
cgoOff++
}
@@ -4563,41 +4719,37 @@
if n > 0 {
n += cgoOff
}
+ } else if usesLibcall() && mp.libcallg != 0 && mp.libcallpc != 0 && mp.libcallsp != 0 {
+ // Libcall, i.e. runtime syscall on windows.
+ // Collect Go stack that leads to the call.
+ n = gentraceback(mp.libcallpc, mp.libcallsp, 0, mp.libcallg.ptr(), 0, &stk[n], len(stk[n:]), nil, nil, 0)
+ } else if mp != nil && mp.vdsoSP != 0 {
+ // VDSO call, e.g. nanotime1 on Linux.
+ // Collect Go stack that leads to the call.
+ n = gentraceback(mp.vdsoPC, mp.vdsoSP, 0, gp, 0, &stk[n], len(stk[n:]), nil, nil, _TraceJumpStack)
} else {
n = gentraceback(pc, sp, lr, gp, 0, &stk[0], len(stk), nil, nil, _TraceTrap|_TraceJumpStack)
}
if n <= 0 {
// Normal traceback is impossible or has failed.
- // See if it falls into several common cases.
- n = 0
- if usesLibcall() && mp.libcallg != 0 && mp.libcallpc != 0 && mp.libcallsp != 0 {
- // Libcall, i.e. runtime syscall on windows.
- // Collect Go stack that leads to the call.
- n = gentraceback(mp.libcallpc, mp.libcallsp, 0, mp.libcallg.ptr(), 0, &stk[0], len(stk), nil, nil, 0)
+ // Account it against abstract "System" or "GC".
+ n = 2
+ if inVDSOPage(pc) {
+ pc = abi.FuncPCABIInternal(_VDSO) + sys.PCQuantum
+ } else if pc > firstmoduledata.etext {
+ // "ExternalCode" is better than "etext".
+ pc = abi.FuncPCABIInternal(_ExternalCode) + sys.PCQuantum
}
- if n == 0 && mp != nil && mp.vdsoSP != 0 {
- n = gentraceback(mp.vdsoPC, mp.vdsoSP, 0, gp, 0, &stk[0], len(stk), nil, nil, _TraceTrap|_TraceJumpStack)
- }
- if n == 0 {
- // If all of the above has failed, account it against abstract "System" or "GC".
- n = 2
- if inVDSOPage(pc) {
- pc = abi.FuncPCABIInternal(_VDSO) + sys.PCQuantum
- } else if pc > firstmoduledata.etext {
- // "ExternalCode" is better than "etext".
- pc = abi.FuncPCABIInternal(_ExternalCode) + sys.PCQuantum
- }
- stk[0] = pc
- if mp.preemptoff != "" {
- stk[1] = abi.FuncPCABIInternal(_GC) + sys.PCQuantum
- } else {
- stk[1] = abi.FuncPCABIInternal(_System) + sys.PCQuantum
- }
+ stk[0] = pc
+ if mp.preemptoff != "" {
+ stk[1] = abi.FuncPCABIInternal(_GC) + sys.PCQuantum
+ } else {
+ stk[1] = abi.FuncPCABIInternal(_System) + sys.PCQuantum
}
}
- if prof.hz != 0 {
+ if prof.hz.Load() != 0 {
// Note: it can happen on Windows that we interrupted a system thread
// with no g, so gp could nil. The other nil checks are done out of
// caution, but not expected to be nil in practice.
@@ -4630,22 +4782,22 @@
// Disable preemption, otherwise we can be rescheduled to another thread
// that has profiling enabled.
- _g_ := getg()
- _g_.m.locks++
+ gp := getg()
+ gp.m.locks++
// Stop profiler on this thread so that it is safe to lock prof.
// if a profiling signal came in while we had prof locked,
// it would deadlock.
setThreadCPUProfiler(0)
- for !atomic.Cas(&prof.signalLock, 0, 1) {
+ for !prof.signalLock.CompareAndSwap(0, 1) {
osyield()
}
- if prof.hz != hz {
+ if prof.hz.Load() != hz {
setProcessCPUProfiler(hz)
- prof.hz = hz
+ prof.hz.Store(hz)
}
- atomic.Store(&prof.signalLock, 0)
+ prof.signalLock.Store(0)
lock(&sched.lock)
sched.profilehz = hz
@@ -4655,7 +4807,7 @@
setThreadCPUProfiler(hz)
}
- _g_.m.locks--
+ gp.m.locks--
}
// init initializes pp, which may be a freshly allocated p or a
@@ -4726,9 +4878,9 @@
lock(&pp.timersLock)
moveTimers(plocal, pp.timers)
pp.timers = nil
- pp.numTimers = 0
- pp.deletedTimers = 0
- atomic.Store64(&pp.timer0When, 0)
+ pp.numTimers.Store(0)
+ pp.deletedTimers.Store(0)
+ pp.timer0When.Store(0)
unlock(&pp.timersLock)
unlock(&plocal.timersLock)
}
@@ -4852,32 +5004,32 @@
atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp))
}
- _g_ := getg()
- if _g_.m.p != 0 && _g_.m.p.ptr().id < nprocs {
+ gp := getg()
+ if gp.m.p != 0 && gp.m.p.ptr().id < nprocs {
// continue to use the current P
- _g_.m.p.ptr().status = _Prunning
- _g_.m.p.ptr().mcache.prepareForSweep()
+ gp.m.p.ptr().status = _Prunning
+ gp.m.p.ptr().mcache.prepareForSweep()
} else {
// release the current P and acquire allp[0].
//
// We must do this before destroying our current P
// because p.destroy itself has write barriers, so we
// need to do that from a valid P.
- if _g_.m.p != 0 {
+ if gp.m.p != 0 {
if trace.enabled {
// Pretend that we were descheduled
// and then scheduled again to keep
// the trace sane.
traceGoSched()
- traceProcStop(_g_.m.p.ptr())
+ traceProcStop(gp.m.p.ptr())
}
- _g_.m.p.ptr().m = 0
+ gp.m.p.ptr().m = 0
}
- _g_.m.p = 0
- p := allp[0]
- p.m = 0
- p.status = _Pidle
- acquirep(p)
+ gp.m.p = 0
+ pp := allp[0]
+ pp.m = 0
+ pp.status = _Pidle
+ acquirep(pp)
if trace.enabled {
traceGoStart()
}
@@ -4888,8 +5040,8 @@
// release resources from unused P's
for i := nprocs; i < old; i++ {
- p := allp[i]
- p.destroy()
+ pp := allp[i]
+ pp.destroy()
// can't free P itself because it can be referenced by an M in syscall
}
@@ -4904,17 +5056,17 @@
var runnablePs *p
for i := nprocs - 1; i >= 0; i-- {
- p := allp[i]
- if _g_.m.p.ptr() == p {
+ pp := allp[i]
+ if gp.m.p.ptr() == pp {
continue
}
- p.status = _Pidle
- if runqempty(p) {
- pidleput(p, now)
+ pp.status = _Pidle
+ if runqempty(pp) {
+ pidleput(pp, now)
} else {
- p.m.set(mget())
- p.link.set(runnablePs)
- runnablePs = p
+ pp.m.set(mget())
+ pp.link.set(runnablePs)
+ runnablePs = pp
}
}
stealOrder.reset(uint32(nprocs))
@@ -4930,18 +5082,18 @@
// Associate p and the current m.
//
// This function is allowed to have write barriers even if the caller
-// isn't because it immediately acquires _p_.
+// isn't because it immediately acquires pp.
//
//go:yeswritebarrierrec
-func acquirep(_p_ *p) {
+func acquirep(pp *p) {
// Do the part that isn't allowed to have write barriers.
- wirep(_p_)
+ wirep(pp)
// Have p; write barriers now allowed.
// Perform deferred mcache flush before this P can allocate
// from a potentially stale mcache.
- _p_.mcache.prepareForSweep()
+ pp.mcache.prepareForSweep()
if trace.enabled {
traceProcStart()
@@ -4949,49 +5101,49 @@
}
// wirep is the first step of acquirep, which actually associates the
-// current M to _p_. This is broken out so we can disallow write
+// current M to pp. This is broken out so we can disallow write
// barriers for this part, since we don't yet have a P.
//
//go:nowritebarrierrec
//go:nosplit
-func wirep(_p_ *p) {
- _g_ := getg()
+func wirep(pp *p) {
+ gp := getg()
- if _g_.m.p != 0 {
+ if gp.m.p != 0 {
throw("wirep: already in go")
}
- if _p_.m != 0 || _p_.status != _Pidle {
+ if pp.m != 0 || pp.status != _Pidle {
id := int64(0)
- if _p_.m != 0 {
- id = _p_.m.ptr().id
+ if pp.m != 0 {
+ id = pp.m.ptr().id
}
- print("wirep: p->m=", _p_.m, "(", id, ") p->status=", _p_.status, "\n")
+ print("wirep: p->m=", pp.m, "(", id, ") p->status=", pp.status, "\n")
throw("wirep: invalid p state")
}
- _g_.m.p.set(_p_)
- _p_.m.set(_g_.m)
- _p_.status = _Prunning
+ gp.m.p.set(pp)
+ pp.m.set(gp.m)
+ pp.status = _Prunning
}
// Disassociate p and the current m.
func releasep() *p {
- _g_ := getg()
+ gp := getg()
- if _g_.m.p == 0 {
+ if gp.m.p == 0 {
throw("releasep: invalid arg")
}
- _p_ := _g_.m.p.ptr()
- if _p_.m.ptr() != _g_.m || _p_.status != _Prunning {
- print("releasep: m=", _g_.m, " m->p=", _g_.m.p.ptr(), " p->m=", hex(_p_.m), " p->status=", _p_.status, "\n")
+ pp := gp.m.p.ptr()
+ if pp.m.ptr() != gp.m || pp.status != _Prunning {
+ print("releasep: m=", gp.m, " m->p=", gp.m.p.ptr(), " p->m=", hex(pp.m), " p->status=", pp.status, "\n")
throw("releasep: invalid p state")
}
if trace.enabled {
- traceProcStop(_g_.m.p.ptr())
+ traceProcStop(gp.m.p.ptr())
}
- _g_.m.p = 0
- _p_.m = 0
- _p_.status = _Pidle
- return _p_
+ gp.m.p = 0
+ pp.m = 0
+ pp.status = _Pidle
+ return pp
}
func incidlelocked(v int32) {
@@ -5020,7 +5172,7 @@
// freezetheworld will cause all running threads to block.
// And runtime will essentially enter into deadlock state,
// except that there is a thread that will call exit soon.
- if panicking > 0 {
+ if panicking.Load() > 0 {
return
}
@@ -5090,7 +5242,7 @@
// M must be spinning to steal. We set this to be
// explicit, but since this is the only M it would
// become spinning on its own anyways.
- atomic.Xadd(&sched.nmspinning, 1)
+ sched.nmspinning.Add(1)
mp.spinning = true
mp.nextp.set(pp)
notewakeup(&mp.park)
@@ -5099,8 +5251,8 @@
}
// There are no goroutines running, so we can look at the P's.
- for _, _p_ := range allp {
- if len(_p_.timers) > 0 {
+ for _, pp := range allp {
+ if len(pp.timers) > 0 {
return
}
}
@@ -5160,13 +5312,13 @@
// from a timer to avoid adding system load to applications that spend
// most of their time sleeping.
now := nanotime()
- if debug.schedtrace <= 0 && (sched.gcwaiting != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs)) {
+ if debug.schedtrace <= 0 && (sched.gcwaiting.Load() || sched.npidle.Load() == gomaxprocs) {
lock(&sched.lock)
- if atomic.Load(&sched.gcwaiting) != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs) {
+ if sched.gcwaiting.Load() || sched.npidle.Load() == gomaxprocs {
syscallWake := false
next := timeSleepUntil()
if next > now {
- atomic.Store(&sched.sysmonwait, 1)
+ sched.sysmonwait.Store(true)
unlock(&sched.lock)
// Make wake-up period small enough
// for the sampling to be correct.
@@ -5183,7 +5335,7 @@
osRelax(false)
}
lock(&sched.lock)
- atomic.Store(&sched.sysmonwait, 0)
+ sched.sysmonwait.Store(false)
noteclear(&sched.sysmonnote)
}
if syscallWake {
@@ -5204,9 +5356,9 @@
asmcgocall(*cgo_yield, nil)
}
// poll network if not polled for more than 10ms
- lastpoll := int64(atomic.Load64(&sched.lastpoll))
+ lastpoll := sched.lastpoll.Load()
if netpollinited() && lastpoll != 0 && lastpoll+10*1000*1000 < now {
- atomic.Cas64(&sched.lastpoll, uint64(lastpoll), uint64(now))
+ sched.lastpoll.CompareAndSwap(lastpoll, now)
list := netpoll(0) // non-blocking - returns list of goroutines
if !list.empty() {
// Need to decrement number of idle locked M's
@@ -5253,9 +5405,9 @@
idle++
}
// check if we need to force a GC
- if t := (gcTrigger{kind: gcTriggerTime, now: now}); t.test() && atomic.Load(&forcegc.idle) != 0 {
+ if t := (gcTrigger{kind: gcTriggerTime, now: now}); t.test() && forcegc.idle.Load() {
lock(&forcegc.lock)
- forcegc.idle = 0
+ forcegc.idle.Store(false)
var list gList
list.push(forcegc.g)
injectglist(&list)
@@ -5289,23 +5441,23 @@
// temporarily drop the allpLock. Hence, we need to re-fetch
// allp each time around the loop.
for i := 0; i < len(allp); i++ {
- _p_ := allp[i]
- if _p_ == nil {
+ pp := allp[i]
+ if pp == nil {
// This can happen if procresize has grown
// allp but not yet created new Ps.
continue
}
- pd := &_p_.sysmontick
- s := _p_.status
+ pd := &pp.sysmontick
+ s := pp.status
sysretake := false
if s == _Prunning || s == _Psyscall {
// Preempt G if it's running for too long.
- t := int64(_p_.schedtick)
+ t := int64(pp.schedtick)
if int64(pd.schedtick) != t {
pd.schedtick = uint32(t)
pd.schedwhen = now
} else if pd.schedwhen+forcePreemptNS <= now {
- preemptone(_p_)
+ preemptone(pp)
// In case of syscall, preemptone() doesn't
// work, because there is no M wired to P.
sysretake = true
@@ -5313,7 +5465,7 @@
}
if s == _Psyscall {
// Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
- t := int64(_p_.syscalltick)
+ t := int64(pp.syscalltick)
if !sysretake && int64(pd.syscalltick) != t {
pd.syscalltick = uint32(t)
pd.syscallwhen = now
@@ -5322,7 +5474,7 @@
// On the one hand we don't want to retake Ps if there is no other work to do,
// but on the other hand we want to retake them eventually
// because they can prevent the sysmon thread from deep sleep.
- if runqempty(_p_) && atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) > 0 && pd.syscallwhen+10*1000*1000 > now {
+ if runqempty(pp) && sched.nmspinning.Load()+sched.npidle.Load() > 0 && pd.syscallwhen+10*1000*1000 > now {
continue
}
// Drop allpLock so we can take sched.lock.
@@ -5332,14 +5484,14 @@
// Otherwise the M from which we retake can exit the syscall,
// increment nmidle and report deadlock.
incidlelocked(-1)
- if atomic.Cas(&_p_.status, s, _Pidle) {
+ if atomic.Cas(&pp.status, s, _Pidle) {
if trace.enabled {
- traceGoSysBlock(_p_)
- traceProcStop(_p_)
+ traceGoSysBlock(pp)
+ traceProcStop(pp)
}
n++
- _p_.syscalltick++
- handoffp(_p_)
+ pp.syscalltick++
+ handoffp(pp)
}
incidlelocked(1)
lock(&allpLock)
@@ -5356,11 +5508,11 @@
// Returns true if preemption request was issued to at least one goroutine.
func preemptall() bool {
res := false
- for _, _p_ := range allp {
- if _p_.status != _Prunning {
+ for _, pp := range allp {
+ if pp.status != _Prunning {
continue
}
- if preemptone(_p_) {
+ if preemptone(pp) {
res = true
}
}
@@ -5377,8 +5529,8 @@
// The actual preemption will happen at some point in the future
// and will be indicated by the gp->status no longer being
// Grunning
-func preemptone(_p_ *p) bool {
- mp := _p_.m.ptr()
+func preemptone(pp *p) bool {
+ mp := pp.m.ptr()
if mp == nil || mp == getg().m {
return false
}
@@ -5397,7 +5549,7 @@
// Request an async preemption of this P.
if preemptMSupported && debug.asyncpreemptoff == 0 {
- _p_.preempt = true
+ pp.preempt = true
preemptM(mp)
}
@@ -5413,23 +5565,25 @@
}
lock(&sched.lock)
- print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle, " threads=", mcount(), " spinningthreads=", sched.nmspinning, " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize)
+ print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle.Load(), " threads=", mcount(), " spinningthreads=", sched.nmspinning.Load(), " needspinning=", sched.needspinning.Load(), " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize)
if detailed {
- print(" gcwaiting=", sched.gcwaiting, " nmidlelocked=", sched.nmidlelocked, " stopwait=", sched.stopwait, " sysmonwait=", sched.sysmonwait, "\n")
+ print(" gcwaiting=", sched.gcwaiting.Load(), " nmidlelocked=", sched.nmidlelocked, " stopwait=", sched.stopwait, " sysmonwait=", sched.sysmonwait.Load(), "\n")
}
// We must be careful while reading data from P's, M's and G's.
// Even if we hold schedlock, most data can be changed concurrently.
// E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
- for i, _p_ := range allp {
- mp := _p_.m.ptr()
- h := atomic.Load(&_p_.runqhead)
- t := atomic.Load(&_p_.runqtail)
+ for i, pp := range allp {
+ mp := pp.m.ptr()
+ h := atomic.Load(&pp.runqhead)
+ t := atomic.Load(&pp.runqtail)
if detailed {
- id := int64(-1)
+ print(" P", i, ": status=", pp.status, " schedtick=", pp.schedtick, " syscalltick=", pp.syscalltick, " m=")
if mp != nil {
- id = mp.id
+ print(mp.id)
+ } else {
+ print("nil")
}
- print(" P", i, ": status=", _p_.status, " schedtick=", _p_.schedtick, " syscalltick=", _p_.syscalltick, " m=", id, " runqsize=", t-h, " gfreecnt=", _p_.gFree.n, " timerslen=", len(_p_.timers), "\n")
+ print(" runqsize=", t-h, " gfreecnt=", pp.gFree.n, " timerslen=", len(pp.timers), "\n")
} else {
// In non-detailed mode format lengths of per-P run queues as:
// [len1 len2 len3 len4]
@@ -5450,36 +5604,42 @@
}
for mp := allm; mp != nil; mp = mp.alllink {
- _p_ := mp.p.ptr()
- gp := mp.curg
- lockedg := mp.lockedg.ptr()
- id1 := int32(-1)
- if _p_ != nil {
- id1 = _p_.id
+ pp := mp.p.ptr()
+ print(" M", mp.id, ": p=")
+ if pp != nil {
+ print(pp.id)
+ } else {
+ print("nil")
}
- id2 := int64(-1)
- if gp != nil {
- id2 = gp.goid
+ print(" curg=")
+ if mp.curg != nil {
+ print(mp.curg.goid)
+ } else {
+ print("nil")
}
- id3 := int64(-1)
- if lockedg != nil {
- id3 = lockedg.goid
+ print(" mallocing=", mp.mallocing, " throwing=", mp.throwing, " preemptoff=", mp.preemptoff, " locks=", mp.locks, " dying=", mp.dying, " spinning=", mp.spinning, " blocked=", mp.blocked, " lockedg=")
+ if lockedg := mp.lockedg.ptr(); lockedg != nil {
+ print(lockedg.goid)
+ } else {
+ print("nil")
}
- print(" M", mp.id, ": p=", id1, " curg=", id2, " mallocing=", mp.mallocing, " throwing=", mp.throwing, " preemptoff=", mp.preemptoff, ""+" locks=", mp.locks, " dying=", mp.dying, " spinning=", mp.spinning, " blocked=", mp.blocked, " lockedg=", id3, "\n")
+ print("\n")
}
forEachG(func(gp *g) {
- mp := gp.m
- lockedm := gp.lockedm.ptr()
- id1 := int64(-1)
- if mp != nil {
- id1 = mp.id
+ print(" G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason.String(), ") m=")
+ if gp.m != nil {
+ print(gp.m.id)
+ } else {
+ print("nil")
}
- id2 := int64(-1)
- if lockedm != nil {
- id2 = lockedm.id
+ print(" lockedm=")
+ if lockedm := gp.lockedm.ptr(); lockedm != nil {
+ print(lockedm.id)
+ } else {
+ print("nil")
}
- print(" G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason.String(), ") m=", id1, " lockedm=", id2, "\n")
+ print("\n")
})
unlock(&sched.lock)
}
@@ -5501,7 +5661,7 @@
sched.disable.n = 0
globrunqputbatch(&sched.disable.runnable, n)
unlock(&sched.lock)
- for ; n != 0 && sched.npidle != 0; n-- {
+ for ; n != 0 && sched.npidle.Load() != 0; n-- {
startm(nil, false)
}
} else {
@@ -5592,7 +5752,7 @@
// Try get a batch of G's from the global runnable queue.
// sched.lock must be held.
-func globrunqget(_p_ *p, max int32) *g {
+func globrunqget(pp *p, max int32) *g {
assertLockHeld(&sched.lock)
if sched.runqsize == 0 {
@@ -5606,8 +5766,8 @@
if max > 0 && n > max {
n = max
}
- if n > int32(len(_p_.runq))/2 {
- n = int32(len(_p_.runq)) / 2
+ if n > int32(len(pp.runq))/2 {
+ n = int32(len(pp.runq)) / 2
}
sched.runqsize -= n
@@ -5616,7 +5776,7 @@
n--
for ; n > 0; n-- {
gp1 := sched.runq.pop()
- runqput(_p_, gp1, false)
+ runqput(pp, gp1, false)
}
return gp
}
@@ -5671,7 +5831,7 @@
// TODO(prattmic): Additional targeted updates may improve the above cases.
// e.g., updating the mask when stealing a timer.
func updateTimerPMask(pp *p) {
- if atomic.Load(&pp.numTimers) > 0 {
+ if pp.numTimers.Load() > 0 {
return
}
@@ -5679,7 +5839,7 @@
// decrement numTimers when handling a timerModified timer in
// checkTimers. We must take timersLock to serialize with these changes.
lock(&pp.timersLock)
- if atomic.Load(&pp.numTimers) == 0 {
+ if pp.numTimers.Load() == 0 {
timerpMask.clear(pp.id)
}
unlock(&pp.timersLock)
@@ -5696,21 +5856,21 @@
// May run during STW, so write barriers are not allowed.
//
//go:nowritebarrierrec
-func pidleput(_p_ *p, now int64) int64 {
+func pidleput(pp *p, now int64) int64 {
assertLockHeld(&sched.lock)
- if !runqempty(_p_) {
+ if !runqempty(pp) {
throw("pidleput: P has non-empty run queue")
}
if now == 0 {
now = nanotime()
}
- updateTimerPMask(_p_) // clear if there are no timers.
- idlepMask.set(_p_.id)
- _p_.link = sched.pidle
- sched.pidle.set(_p_)
- atomic.Xadd(&sched.npidle, 1)
- if !_p_.limiterEvent.start(limiterEventIdle, now) {
+ updateTimerPMask(pp) // clear if there are no timers.
+ idlepMask.set(pp.id)
+ pp.link = sched.pidle
+ sched.pidle.set(pp)
+ sched.npidle.Add(1)
+ if !pp.limiterEvent.start(limiterEventIdle, now) {
throw("must be able to track idle limiter event")
}
return now
@@ -5726,33 +5886,58 @@
func pidleget(now int64) (*p, int64) {
assertLockHeld(&sched.lock)
- _p_ := sched.pidle.ptr()
- if _p_ != nil {
+ pp := sched.pidle.ptr()
+ if pp != nil {
// Timer may get added at any time now.
if now == 0 {
now = nanotime()
}
- timerpMask.set(_p_.id)
- idlepMask.clear(_p_.id)
- sched.pidle = _p_.link
- atomic.Xadd(&sched.npidle, -1)
- _p_.limiterEvent.stop(limiterEventIdle, now)
+ timerpMask.set(pp.id)
+ idlepMask.clear(pp.id)
+ sched.pidle = pp.link
+ sched.npidle.Add(-1)
+ pp.limiterEvent.stop(limiterEventIdle, now)
}
- return _p_, now
+ return pp, now
}
-// runqempty reports whether _p_ has no Gs on its local run queue.
+// pidlegetSpinning tries to get a p from the _Pidle list, acquiring ownership.
+// This is called by spinning Ms (or callers than need a spinning M) that have
+// found work. If no P is available, this must synchronized with non-spinning
+// Ms that may be preparing to drop their P without discovering this work.
+//
+// sched.lock must be held.
+//
+// May run during STW, so write barriers are not allowed.
+//
+//go:nowritebarrierrec
+func pidlegetSpinning(now int64) (*p, int64) {
+ assertLockHeld(&sched.lock)
+
+ pp, now := pidleget(now)
+ if pp == nil {
+ // See "Delicate dance" comment in findrunnable. We found work
+ // that we cannot take, we must synchronize with non-spinning
+ // Ms that may be preparing to drop their P.
+ sched.needspinning.Store(1)
+ return nil, now
+ }
+
+ return pp, now
+}
+
+// runqempty reports whether pp has no Gs on its local run queue.
// It never returns true spuriously.
-func runqempty(_p_ *p) bool {
- // Defend against a race where 1) _p_ has G1 in runqnext but runqhead == runqtail,
- // 2) runqput on _p_ kicks G1 to the runq, 3) runqget on _p_ empties runqnext.
+func runqempty(pp *p) bool {
+ // Defend against a race where 1) pp has G1 in runqnext but runqhead == runqtail,
+ // 2) runqput on pp kicks G1 to the runq, 3) runqget on pp empties runqnext.
// Simply observing that runqhead == runqtail and then observing that runqnext == nil
// does not mean the queue is empty.
for {
- head := atomic.Load(&_p_.runqhead)
- tail := atomic.Load(&_p_.runqtail)
- runnext := atomic.Loaduintptr((*uintptr)(unsafe.Pointer(&_p_.runnext)))
- if tail == atomic.Load(&_p_.runqtail) {
+ head := atomic.Load(&pp.runqhead)
+ tail := atomic.Load(&pp.runqtail)
+ runnext := atomic.Loaduintptr((*uintptr)(unsafe.Pointer(&pp.runnext)))
+ if tail == atomic.Load(&pp.runqtail) {
return head == tail && runnext == 0
}
}
@@ -5771,18 +5956,18 @@
// runqput tries to put g on the local runnable queue.
// If next is false, runqput adds g to the tail of the runnable queue.
-// If next is true, runqput puts g in the _p_.runnext slot.
+// If next is true, runqput puts g in the pp.runnext slot.
// If the run queue is full, runnext puts g on the global queue.
// Executed only by the owner P.
-func runqput(_p_ *p, gp *g, next bool) {
+func runqput(pp *p, gp *g, next bool) {
if randomizeScheduler && next && fastrandn(2) == 0 {
next = false
}
if next {
retryNext:
- oldnext := _p_.runnext
- if !_p_.runnext.cas(oldnext, guintptr(unsafe.Pointer(gp))) {
+ oldnext := pp.runnext
+ if !pp.runnext.cas(oldnext, guintptr(unsafe.Pointer(gp))) {
goto retryNext
}
if oldnext == 0 {
@@ -5793,14 +5978,14 @@
}
retry:
- h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with consumers
- t := _p_.runqtail
- if t-h < uint32(len(_p_.runq)) {
- _p_.runq[t%uint32(len(_p_.runq))].set(gp)
- atomic.StoreRel(&_p_.runqtail, t+1) // store-release, makes the item available for consumption
+ h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with consumers
+ t := pp.runqtail
+ if t-h < uint32(len(pp.runq)) {
+ pp.runq[t%uint32(len(pp.runq))].set(gp)
+ atomic.StoreRel(&pp.runqtail, t+1) // store-release, makes the item available for consumption
return
}
- if runqputslow(_p_, gp, h, t) {
+ if runqputslow(pp, gp, h, t) {
return
}
// the queue is not full, now the put above must succeed
@@ -5809,19 +5994,19 @@
// Put g and a batch of work from local runnable queue on global queue.
// Executed only by the owner P.
-func runqputslow(_p_ *p, gp *g, h, t uint32) bool {
- var batch [len(_p_.runq)/2 + 1]*g
+func runqputslow(pp *p, gp *g, h, t uint32) bool {
+ var batch [len(pp.runq)/2 + 1]*g
// First, grab a batch from local queue.
n := t - h
n = n / 2
- if n != uint32(len(_p_.runq)/2) {
+ if n != uint32(len(pp.runq)/2) {
throw("runqputslow: queue is not full")
}
for i := uint32(0); i < n; i++ {
- batch[i] = _p_.runq[(h+i)%uint32(len(_p_.runq))].ptr()
+ batch[i] = pp.runq[(h+i)%uint32(len(pp.runq))].ptr()
}
- if !atomic.CasRel(&_p_.runqhead, h, h+n) { // cas-release, commits consume
+ if !atomic.CasRel(&pp.runqhead, h, h+n) { // cas-release, commits consume
return false
}
batch[n] = gp
@@ -5886,50 +6071,50 @@
// If inheritTime is true, gp should inherit the remaining time in the
// current time slice. Otherwise, it should start a new time slice.
// Executed only by the owner P.
-func runqget(_p_ *p) (gp *g, inheritTime bool) {
+func runqget(pp *p) (gp *g, inheritTime bool) {
// If there's a runnext, it's the next G to run.
- next := _p_.runnext
+ next := pp.runnext
// If the runnext is non-0 and the CAS fails, it could only have been stolen by another P,
// because other Ps can race to set runnext to 0, but only the current P can set it to non-0.
- // Hence, there's no need to retry this CAS if it falls.
- if next != 0 && _p_.runnext.cas(next, 0) {
+ // Hence, there's no need to retry this CAS if it fails.
+ if next != 0 && pp.runnext.cas(next, 0) {
return next.ptr(), true
}
for {
- h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with other consumers
- t := _p_.runqtail
+ h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with other consumers
+ t := pp.runqtail
if t == h {
return nil, false
}
- gp := _p_.runq[h%uint32(len(_p_.runq))].ptr()
- if atomic.CasRel(&_p_.runqhead, h, h+1) { // cas-release, commits consume
+ gp := pp.runq[h%uint32(len(pp.runq))].ptr()
+ if atomic.CasRel(&pp.runqhead, h, h+1) { // cas-release, commits consume
return gp, false
}
}
}
-// runqdrain drains the local runnable queue of _p_ and returns all goroutines in it.
+// runqdrain drains the local runnable queue of pp and returns all goroutines in it.
// Executed only by the owner P.
-func runqdrain(_p_ *p) (drainQ gQueue, n uint32) {
- oldNext := _p_.runnext
- if oldNext != 0 && _p_.runnext.cas(oldNext, 0) {
+func runqdrain(pp *p) (drainQ gQueue, n uint32) {
+ oldNext := pp.runnext
+ if oldNext != 0 && pp.runnext.cas(oldNext, 0) {
drainQ.pushBack(oldNext.ptr())
n++
}
retry:
- h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with other consumers
- t := _p_.runqtail
+ h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with other consumers
+ t := pp.runqtail
qn := t - h
if qn == 0 {
return
}
- if qn > uint32(len(_p_.runq)) { // read inconsistent h and t
+ if qn > uint32(len(pp.runq)) { // read inconsistent h and t
goto retry
}
- if !atomic.CasRel(&_p_.runqhead, h, h+qn) { // cas-release, commits consume
+ if !atomic.CasRel(&pp.runqhead, h, h+qn) { // cas-release, commits consume
goto retry
}
@@ -5941,34 +6126,34 @@
// meanwhile, other P's can't access to all G's in local P's runnable queue and steal them.
// See https://groups.google.com/g/golang-dev/c/0pTKxEKhHSc/m/6Q85QjdVBQAJ for more details.
for i := uint32(0); i < qn; i++ {
- gp := _p_.runq[(h+i)%uint32(len(_p_.runq))].ptr()
+ gp := pp.runq[(h+i)%uint32(len(pp.runq))].ptr()
drainQ.pushBack(gp)
n++
}
return
}
-// Grabs a batch of goroutines from _p_'s runnable queue into batch.
+// Grabs a batch of goroutines from pp's runnable queue into batch.
// Batch is a ring buffer starting at batchHead.
// Returns number of grabbed goroutines.
// Can be executed by any P.
-func runqgrab(_p_ *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool) uint32 {
+func runqgrab(pp *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool) uint32 {
for {
- h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with other consumers
- t := atomic.LoadAcq(&_p_.runqtail) // load-acquire, synchronize with the producer
+ h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with other consumers
+ t := atomic.LoadAcq(&pp.runqtail) // load-acquire, synchronize with the producer
n := t - h
n = n - n/2
if n == 0 {
if stealRunNextG {
- // Try to steal from _p_.runnext.
- if next := _p_.runnext; next != 0 {
- if _p_.status == _Prunning {
- // Sleep to ensure that _p_ isn't about to run the g
+ // Try to steal from pp.runnext.
+ if next := pp.runnext; next != 0 {
+ if pp.status == _Prunning {
+ // Sleep to ensure that pp isn't about to run the g
// we are about to steal.
// The important use case here is when the g running
- // on _p_ ready()s another g and then almost
+ // on pp ready()s another g and then almost
// immediately blocks. Instead of stealing runnext
- // in this window, back off to give _p_ a chance to
+ // in this window, back off to give pp a chance to
// schedule runnext. This will avoid thrashing gs
// between different Ps.
// A sync chan send/recv takes ~50ns as of time of
@@ -5982,7 +6167,7 @@
osyield()
}
}
- if !_p_.runnext.cas(next, 0) {
+ if !pp.runnext.cas(next, 0) {
continue
}
batch[batchHead%uint32(len(batch))] = next
@@ -5991,14 +6176,14 @@
}
return 0
}
- if n > uint32(len(_p_.runq)/2) { // read inconsistent h and t
+ if n > uint32(len(pp.runq)/2) { // read inconsistent h and t
continue
}
for i := uint32(0); i < n; i++ {
- g := _p_.runq[(h+i)%uint32(len(_p_.runq))]
+ g := pp.runq[(h+i)%uint32(len(pp.runq))]
batch[(batchHead+i)%uint32(len(batch))] = g
}
- if atomic.CasRel(&_p_.runqhead, h, h+n) { // cas-release, commits consume
+ if atomic.CasRel(&pp.runqhead, h, h+n) { // cas-release, commits consume
return n
}
}
@@ -6007,22 +6192,22 @@
// Steal half of elements from local runnable queue of p2
// and put onto local runnable queue of p.
// Returns one of the stolen elements (or nil if failed).
-func runqsteal(_p_, p2 *p, stealRunNextG bool) *g {
- t := _p_.runqtail
- n := runqgrab(p2, &_p_.runq, t, stealRunNextG)
+func runqsteal(pp, p2 *p, stealRunNextG bool) *g {
+ t := pp.runqtail
+ n := runqgrab(p2, &pp.runq, t, stealRunNextG)
if n == 0 {
return nil
}
n--
- gp := _p_.runq[(t+n)%uint32(len(_p_.runq))].ptr()
+ gp := pp.runq[(t+n)%uint32(len(pp.runq))].ptr()
if n == 0 {
return gp
}
- h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with consumers
- if t-h+n >= uint32(len(_p_.runq)) {
+ h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with consumers
+ if t-h+n >= uint32(len(pp.runq)) {
throw("runqsteal: runq overflow")
}
- atomic.StoreRel(&_p_.runqtail, t+n) // store-release, makes the item available for consumption
+ atomic.StoreRel(&pp.runqtail, t+n) // store-release, makes the item available for consumption
return gp
}
@@ -6143,8 +6328,8 @@
//go:nosplit
func procPin() int {
- _g_ := getg()
- mp := _g_.m
+ gp := getg()
+ mp := gp.m
mp.locks++
return int(mp.p.ptr().id)
@@ -6152,8 +6337,8 @@
//go:nosplit
func procUnpin() {
- _g_ := getg()
- _g_.m.locks--
+ gp := getg()
+ gp.m.locks--
}
//go:linkname sync_runtime_procPin sync.runtime_procPin
@@ -6190,7 +6375,7 @@
// GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
// As opposed to runtime mutex we don't do passive spinning here,
// because there can be work on global runq or on other Ps.
- if i >= active_spin || ncpu <= 1 || gomaxprocs <= int32(sched.npidle+sched.nmspinning)+1 {
+ if i >= active_spin || ncpu <= 1 || gomaxprocs <= sched.npidle.Load()+sched.nmspinning.Load()+1 {
return false
}
if p := getg().m.p.ptr(); !runqempty(p) {
@@ -6278,7 +6463,7 @@
type tracestat struct {
active bool // init tracing activation status
- id int64 // init goroutine id
+ id uint64 // init goroutine id
allocs uint64 // heap allocations
bytes uint64 // heap allocated bytes
}
diff --git a/src/runtime/profbuf.go b/src/runtime/profbuf.go
index 3d907d5..c579f21 100644
--- a/src/runtime/profbuf.go
+++ b/src/runtime/profbuf.go
@@ -87,9 +87,9 @@
type profBuf struct {
// accessed atomically
r, w profAtomic
- overflow uint64
- overflowTime uint64
- eof uint32
+ overflow atomic.Uint64
+ overflowTime atomic.Uint64
+ eof atomic.Uint32
// immutable (excluding slice content)
hdrsize uintptr
@@ -150,15 +150,15 @@
// hasOverflow reports whether b has any overflow records pending.
func (b *profBuf) hasOverflow() bool {
- return uint32(atomic.Load64(&b.overflow)) > 0
+ return uint32(b.overflow.Load()) > 0
}
// takeOverflow consumes the pending overflow records, returning the overflow count
// and the time of the first overflow.
// When called by the reader, it is racing against incrementOverflow.
func (b *profBuf) takeOverflow() (count uint32, time uint64) {
- overflow := atomic.Load64(&b.overflow)
- time = atomic.Load64(&b.overflowTime)
+ overflow := b.overflow.Load()
+ time = b.overflowTime.Load()
for {
count = uint32(overflow)
if count == 0 {
@@ -166,11 +166,11 @@
break
}
// Increment generation, clear overflow count in low bits.
- if atomic.Cas64(&b.overflow, overflow, ((overflow>>32)+1)<<32) {
+ if b.overflow.CompareAndSwap(overflow, ((overflow>>32)+1)<<32) {
break
}
- overflow = atomic.Load64(&b.overflow)
- time = atomic.Load64(&b.overflowTime)
+ overflow = b.overflow.Load()
+ time = b.overflowTime.Load()
}
return uint32(overflow), time
}
@@ -179,14 +179,14 @@
// It is racing against a possible takeOverflow in the reader.
func (b *profBuf) incrementOverflow(now int64) {
for {
- overflow := atomic.Load64(&b.overflow)
+ overflow := b.overflow.Load()
// Once we see b.overflow reach 0, it's stable: no one else is changing it underfoot.
// We need to set overflowTime if we're incrementing b.overflow from 0.
if uint32(overflow) == 0 {
// Store overflowTime first so it's always available when overflow != 0.
- atomic.Store64(&b.overflowTime, uint64(now))
- atomic.Store64(&b.overflow, (((overflow>>32)+1)<<32)+1)
+ b.overflowTime.Store(uint64(now))
+ b.overflow.Store((((overflow >> 32) + 1) << 32) + 1)
break
}
// Otherwise we're racing to increment against reader
@@ -196,7 +196,7 @@
if int32(overflow) == -1 {
break
}
- if atomic.Cas64(&b.overflow, overflow, overflow+1) {
+ if b.overflow.CompareAndSwap(overflow, overflow+1) {
break
}
}
@@ -394,10 +394,10 @@
// close signals that there will be no more writes on the buffer.
// Once all the data has been read from the buffer, reads will return eof=true.
func (b *profBuf) close() {
- if atomic.Load(&b.eof) > 0 {
+ if b.eof.Load() > 0 {
throw("runtime: profBuf already closed")
}
- atomic.Store(&b.eof, 1)
+ b.eof.Store(1)
b.wakeupExtra()
}
@@ -475,7 +475,7 @@
dst[2+b.hdrsize] = uint64(count)
return dst[:2+b.hdrsize+1], overflowTag[:1], false
}
- if atomic.Load(&b.eof) > 0 {
+ if b.eof.Load() > 0 {
// No data, no overflow, EOF set: done.
return nil, nil, true
}
diff --git a/src/runtime/race.go b/src/runtime/race.go
index 4694288..f83a04d 100644
--- a/src/runtime/race.go
+++ b/src/runtime/race.go
@@ -67,21 +67,21 @@
// Non-synchronization events (memory accesses, function entry/exit) still affect
// the race detector.
func RaceDisable() {
- _g_ := getg()
- if _g_.raceignore == 0 {
- racecall(&__tsan_go_ignore_sync_begin, _g_.racectx, 0, 0, 0)
+ gp := getg()
+ if gp.raceignore == 0 {
+ racecall(&__tsan_go_ignore_sync_begin, gp.racectx, 0, 0, 0)
}
- _g_.raceignore++
+ gp.raceignore++
}
//go:nosplit
// RaceEnable re-enables handling of race events in the current goroutine.
func RaceEnable() {
- _g_ := getg()
- _g_.raceignore--
- if _g_.raceignore == 0 {
- racecall(&__tsan_go_ignore_sync_end, _g_.racectx, 0, 0, 0)
+ gp := getg()
+ gp.raceignore--
+ if gp.raceignore == 0 {
+ racecall(&__tsan_go_ignore_sync_end, gp.racectx, 0, 0, 0)
}
}
@@ -187,7 +187,7 @@
continue
}
ctx.pc = f.Entry() + uintptr(inltree[ix].parentPc) // "caller" pc
- ctx.fn = cfuncnameFromNameoff(fi, inltree[ix].func_)
+ ctx.fn = cfuncnameFromNameOff(fi, inltree[ix].nameOff)
ctx.line = uintptr(line)
ctx.file = &bytes(file)[0] // assume NUL-terminated
ctx.off = pc - f.Entry()
@@ -350,7 +350,7 @@
// with up to 4 uintptr arguments.
func racecall(fn *byte, arg0, arg1, arg2, arg3 uintptr)
-// checks if the address has shadow (i.e. heap or data/bss)
+// checks if the address has shadow (i.e. heap or data/bss).
//
//go:nosplit
func isvalidaddr(addr unsafe.Pointer) bool {
@@ -360,8 +360,8 @@
//go:nosplit
func raceinit() (gctx, pctx uintptr) {
- // cgo is required to initialize libc, which is used by race runtime
- if !iscgo {
+ // On most machines, cgo is required to initialize libc, which is used by race runtime.
+ if !iscgo && GOOS != "darwin" {
throw("raceinit: race build must use cgo")
}
@@ -453,12 +453,12 @@
//go:nosplit
func racegostart(pc uintptr) uintptr {
- _g_ := getg()
+ gp := getg()
var spawng *g
- if _g_.m.curg != nil {
- spawng = _g_.m.curg
+ if gp.m.curg != nil {
+ spawng = gp.m.curg
} else {
- spawng = _g_
+ spawng = gp
}
var racectx uintptr
@@ -478,8 +478,8 @@
//go:nosplit
func racewriterangepc(addr unsafe.Pointer, sz, callpc, pc uintptr) {
- _g_ := getg()
- if _g_ != _g_.m.curg {
+ gp := getg()
+ if gp != gp.m.curg {
// The call is coming from manual instrumentation of Go code running on g0/gsignal.
// Not interesting.
return
@@ -495,8 +495,8 @@
//go:nosplit
func racereadrangepc(addr unsafe.Pointer, sz, callpc, pc uintptr) {
- _g_ := getg()
- if _g_ != _g_.m.curg {
+ gp := getg()
+ if gp != gp.m.curg {
// The call is coming from manual instrumentation of Go code running on g0/gsignal.
// Not interesting.
return
diff --git a/src/runtime/race/README b/src/runtime/race/README
index ad8f55f..596700a 100644
--- a/src/runtime/race/README
+++ b/src/runtime/race/README
@@ -6,7 +6,6 @@
race_darwin_amd64.syso built with LLVM 127e59048cd3d8dbb80c14b3036918c114089529 and Go 59ab6f351a370a27458755dc69f4a837e55a05a6.
race_freebsd_amd64.syso built with LLVM 127e59048cd3d8dbb80c14b3036918c114089529 and Go 59ab6f351a370a27458755dc69f4a837e55a05a6.
-race_linux_amd64.syso built with LLVM 127e59048cd3d8dbb80c14b3036918c114089529 and Go 59ab6f351a370a27458755dc69f4a837e55a05a6.
race_linux_ppc64le.syso built with LLVM 41cb504b7c4b18ac15830107431a0c1eec73a6b2 and Go 851ecea4cc99ab276109493477b2c7e30c253ea8.
race_netbsd_amd64.syso built with LLVM 41cb504b7c4b18ac15830107431a0c1eec73a6b2 and Go 851ecea4cc99ab276109493477b2c7e30c253ea8.
race_windows_amd64.syso built with LLVM 89f7ccea6f6488c443655880229c54db1f180153 and Go f62d3202bf9dbb3a00ad2a2c63ff4fa4188c5d3b.
@@ -14,3 +13,5 @@
race_darwin_arm64.syso built with LLVM 41cb504b7c4b18ac15830107431a0c1eec73a6b2 and Go 851ecea4cc99ab276109493477b2c7e30c253ea8.
race_openbsd_amd64.syso built with LLVM fcf6ae2f070eba73074b6ec8d8281e54d29dbeeb and Go 8f2db14cd35bbd674cb2988a508306de6655e425.
race_linux_s390x.syso built with LLVM 41cb504b7c4b18ac15830107431a0c1eec73a6b2 and Go 851ecea4cc99ab276109493477b2c7e30c253ea8.
+internal/amd64v3/race_linux.syso built with LLVM 74c2d4f6024c8f160871a2baa928d0b42415f183 and Go c0f27eb3d580c8b9efd73802678eba4c6c9461be.
+internal/amd64v1/race_linux.syso built with LLVM 74c2d4f6024c8f160871a2baa928d0b42415f183 and Go c0f27eb3d580c8b9efd73802678eba4c6c9461be.
diff --git a/src/runtime/race/doc.go b/src/runtime/race/doc.go
index 9e93f66..60a20df 100644
--- a/src/runtime/race/doc.go
+++ b/src/runtime/race/doc.go
@@ -7,3 +7,5 @@
// For details about the race detector see
// https://golang.org/doc/articles/race_detector.html
package race
+
+//go:generate ./mkcgo.sh
diff --git a/src/runtime/race/internal/amd64v1/doc.go b/src/runtime/race/internal/amd64v1/doc.go
new file mode 100644
index 0000000..ccb088c
--- /dev/null
+++ b/src/runtime/race/internal/amd64v1/doc.go
@@ -0,0 +1,10 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This package holds the race detector .syso for
+// amd64 architectures with GOAMD64<v3.
+
+//go:build amd64 && ((linux && !amd64.v3) || darwin || freebsd || netbsd || openbsd || windows)
+
+package amd64v1
diff --git a/src/runtime/race/race_darwin_amd64.syso b/src/runtime/race/internal/amd64v1/race_darwin.syso
similarity index 100%
rename from src/runtime/race/race_darwin_amd64.syso
rename to src/runtime/race/internal/amd64v1/race_darwin.syso
Binary files differ
diff --git a/src/runtime/race/race_freebsd_amd64.syso b/src/runtime/race/internal/amd64v1/race_freebsd.syso
similarity index 100%
rename from src/runtime/race/race_freebsd_amd64.syso
rename to src/runtime/race/internal/amd64v1/race_freebsd.syso
Binary files differ
diff --git a/src/runtime/race/internal/amd64v1/race_linux.syso b/src/runtime/race/internal/amd64v1/race_linux.syso
new file mode 100644
index 0000000..68f1508
--- /dev/null
+++ b/src/runtime/race/internal/amd64v1/race_linux.syso
Binary files differ
diff --git a/src/runtime/race/race_netbsd_amd64.syso b/src/runtime/race/internal/amd64v1/race_netbsd.syso
similarity index 100%
rename from src/runtime/race/race_netbsd_amd64.syso
rename to src/runtime/race/internal/amd64v1/race_netbsd.syso
Binary files differ
diff --git a/src/runtime/race/race_openbsd_amd64.syso b/src/runtime/race/internal/amd64v1/race_openbsd.syso
similarity index 100%
rename from src/runtime/race/race_openbsd_amd64.syso
rename to src/runtime/race/internal/amd64v1/race_openbsd.syso
Binary files differ
diff --git a/src/runtime/race/race_windows_amd64.syso b/src/runtime/race/internal/amd64v1/race_windows.syso
similarity index 100%
rename from src/runtime/race/race_windows_amd64.syso
rename to src/runtime/race/internal/amd64v1/race_windows.syso
Binary files differ
diff --git a/src/runtime/race/internal/amd64v3/doc.go b/src/runtime/race/internal/amd64v3/doc.go
new file mode 100644
index 0000000..215998a
--- /dev/null
+++ b/src/runtime/race/internal/amd64v3/doc.go
@@ -0,0 +1,10 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This package holds the race detector .syso for
+// amd64 architectures with GOAMD64>=v3.
+
+//go:build amd64 && linux && amd64.v3
+
+package amd64v3
diff --git a/src/runtime/race/internal/amd64v3/race_linux.syso b/src/runtime/race/internal/amd64v3/race_linux.syso
new file mode 100644
index 0000000..33c3e76
--- /dev/null
+++ b/src/runtime/race/internal/amd64v3/race_linux.syso
Binary files differ
diff --git a/src/runtime/race/mkcgo.sh b/src/runtime/race/mkcgo.sh
new file mode 100755
index 0000000..6ebe5a4
--- /dev/null
+++ b/src/runtime/race/mkcgo.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+hdr='
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Code generated by mkcgo.sh. DO NOT EDIT.
+
+//go:build race
+
+'
+
+convert() {
+ (echo "$hdr"; go tool cgo -dynpackage race -dynimport $1) | gofmt
+}
+
+convert race_darwin_arm64.syso >race_darwin_arm64.go
+convert internal/amd64v1/race_darwin.syso >race_darwin_amd64.go
+
diff --git a/src/runtime/race/race.go b/src/runtime/race/race.go
index 8692066..9c508eb 100644
--- a/src/runtime/race/race.go
+++ b/src/runtime/race/race.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build (race && linux && amd64) || (race && freebsd && amd64) || (race && netbsd && amd64) || (race && darwin && amd64) || (race && windows && amd64) || (race && linux && ppc64le) || (race && linux && arm64) || (race && darwin && arm64) || (race && openbsd && amd64) || (race && linux && s390x)
+//go:build race && ((linux && (amd64 || arm64 || ppc64le || s390x)) || ((freebsd || netbsd || openbsd || windows) && amd64))
package race
@@ -11,5 +11,10 @@
// The prebuilt race runtime lives in race_GOOS_GOARCH.syso.
// Calls to the runtime are done directly from src/runtime/race.go.
+// On darwin we always use system DLLs to create threads,
+// so we use race_darwin_$GOARCH.go to provide the syso-derived
+// symbol information without needing to invoke cgo.
+// This allows -race to be used on Mac systems without a C toolchain.
+
// void __race_unused_func(void);
import "C"
diff --git a/src/runtime/race/race_darwin_amd64.go b/src/runtime/race/race_darwin_amd64.go
new file mode 100644
index 0000000..fbb838a
--- /dev/null
+++ b/src/runtime/race/race_darwin_amd64.go
@@ -0,0 +1,101 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Code generated by mkcgo.sh. DO NOT EDIT.
+
+//go:build race
+
+package race
+
+//go:cgo_import_dynamic _Block_object_assign _Block_object_assign ""
+//go:cgo_import_dynamic _Block_object_dispose _Block_object_dispose ""
+//go:cgo_import_dynamic _NSConcreteStackBlock _NSConcreteStackBlock ""
+//go:cgo_import_dynamic _NSGetArgv _NSGetArgv ""
+//go:cgo_import_dynamic _NSGetEnviron _NSGetEnviron ""
+//go:cgo_import_dynamic _NSGetExecutablePath _NSGetExecutablePath ""
+//go:cgo_import_dynamic __bzero __bzero ""
+//go:cgo_import_dynamic __error __error ""
+//go:cgo_import_dynamic __fork __fork ""
+//go:cgo_import_dynamic __mmap __mmap ""
+//go:cgo_import_dynamic __munmap __munmap ""
+//go:cgo_import_dynamic __stack_chk_fail __stack_chk_fail ""
+//go:cgo_import_dynamic __stack_chk_guard __stack_chk_guard ""
+//go:cgo_import_dynamic _dyld_get_image_header _dyld_get_image_header ""
+//go:cgo_import_dynamic _dyld_get_image_name _dyld_get_image_name ""
+//go:cgo_import_dynamic _dyld_get_image_vmaddr_slide _dyld_get_image_vmaddr_slide ""
+//go:cgo_import_dynamic _dyld_get_shared_cache_range _dyld_get_shared_cache_range ""
+//go:cgo_import_dynamic _dyld_get_shared_cache_uuid _dyld_get_shared_cache_uuid ""
+//go:cgo_import_dynamic _dyld_image_count _dyld_image_count ""
+//go:cgo_import_dynamic _exit _exit ""
+//go:cgo_import_dynamic abort abort ""
+//go:cgo_import_dynamic arc4random_buf arc4random_buf ""
+//go:cgo_import_dynamic close close ""
+//go:cgo_import_dynamic dlsym dlsym ""
+//go:cgo_import_dynamic dup dup ""
+//go:cgo_import_dynamic dup2 dup2 ""
+//go:cgo_import_dynamic dyld_shared_cache_iterate_text dyld_shared_cache_iterate_text ""
+//go:cgo_import_dynamic execve execve ""
+//go:cgo_import_dynamic exit exit ""
+//go:cgo_import_dynamic fstat$INODE64 fstat$INODE64 ""
+//go:cgo_import_dynamic ftruncate ftruncate ""
+//go:cgo_import_dynamic getpid getpid ""
+//go:cgo_import_dynamic getrlimit getrlimit ""
+//go:cgo_import_dynamic gettimeofday gettimeofday ""
+//go:cgo_import_dynamic getuid getuid ""
+//go:cgo_import_dynamic grantpt grantpt ""
+//go:cgo_import_dynamic ioctl ioctl ""
+//go:cgo_import_dynamic isatty isatty ""
+//go:cgo_import_dynamic lstat$INODE64 lstat$INODE64 ""
+//go:cgo_import_dynamic mach_absolute_time mach_absolute_time ""
+//go:cgo_import_dynamic mach_task_self_ mach_task_self_ ""
+//go:cgo_import_dynamic mach_timebase_info mach_timebase_info ""
+//go:cgo_import_dynamic mach_vm_region_recurse mach_vm_region_recurse ""
+//go:cgo_import_dynamic madvise madvise ""
+//go:cgo_import_dynamic malloc_num_zones malloc_num_zones ""
+//go:cgo_import_dynamic malloc_zones malloc_zones ""
+//go:cgo_import_dynamic memcpy memcpy ""
+//go:cgo_import_dynamic memset_pattern16 memset_pattern16 ""
+//go:cgo_import_dynamic mkdir mkdir ""
+//go:cgo_import_dynamic mprotect mprotect ""
+//go:cgo_import_dynamic open open ""
+//go:cgo_import_dynamic pipe pipe ""
+//go:cgo_import_dynamic posix_openpt posix_openpt ""
+//go:cgo_import_dynamic posix_spawn posix_spawn ""
+//go:cgo_import_dynamic posix_spawn_file_actions_addclose posix_spawn_file_actions_addclose ""
+//go:cgo_import_dynamic posix_spawn_file_actions_adddup2 posix_spawn_file_actions_adddup2 ""
+//go:cgo_import_dynamic posix_spawn_file_actions_destroy posix_spawn_file_actions_destroy ""
+//go:cgo_import_dynamic posix_spawn_file_actions_init posix_spawn_file_actions_init ""
+//go:cgo_import_dynamic posix_spawnattr_destroy posix_spawnattr_destroy ""
+//go:cgo_import_dynamic posix_spawnattr_init posix_spawnattr_init ""
+//go:cgo_import_dynamic posix_spawnattr_setflags posix_spawnattr_setflags ""
+//go:cgo_import_dynamic pthread_attr_getstack pthread_attr_getstack ""
+//go:cgo_import_dynamic pthread_create pthread_create ""
+//go:cgo_import_dynamic pthread_get_stackaddr_np pthread_get_stackaddr_np ""
+//go:cgo_import_dynamic pthread_get_stacksize_np pthread_get_stacksize_np ""
+//go:cgo_import_dynamic pthread_getspecific pthread_getspecific ""
+//go:cgo_import_dynamic pthread_join pthread_join ""
+//go:cgo_import_dynamic pthread_self pthread_self ""
+//go:cgo_import_dynamic pthread_sigmask pthread_sigmask ""
+//go:cgo_import_dynamic pthread_threadid_np pthread_threadid_np ""
+//go:cgo_import_dynamic read read ""
+//go:cgo_import_dynamic readlink readlink ""
+//go:cgo_import_dynamic realpath$DARWIN_EXTSN realpath$DARWIN_EXTSN ""
+//go:cgo_import_dynamic rename rename ""
+//go:cgo_import_dynamic sched_yield sched_yield ""
+//go:cgo_import_dynamic setrlimit setrlimit ""
+//go:cgo_import_dynamic sigaction sigaction ""
+//go:cgo_import_dynamic stat$INODE64 stat$INODE64 ""
+//go:cgo_import_dynamic sysconf sysconf ""
+//go:cgo_import_dynamic sysctl sysctl ""
+//go:cgo_import_dynamic sysctlbyname sysctlbyname ""
+//go:cgo_import_dynamic task_info task_info ""
+//go:cgo_import_dynamic tcgetattr tcgetattr ""
+//go:cgo_import_dynamic tcsetattr tcsetattr ""
+//go:cgo_import_dynamic unlink unlink ""
+//go:cgo_import_dynamic unlockpt unlockpt ""
+//go:cgo_import_dynamic usleep usleep ""
+//go:cgo_import_dynamic vm_region_64 vm_region_64 ""
+//go:cgo_import_dynamic vm_region_recurse_64 vm_region_recurse_64 ""
+//go:cgo_import_dynamic waitpid waitpid ""
+//go:cgo_import_dynamic write write ""
diff --git a/src/runtime/race/race_darwin_arm64.go b/src/runtime/race/race_darwin_arm64.go
new file mode 100644
index 0000000..fe8584c
--- /dev/null
+++ b/src/runtime/race/race_darwin_arm64.go
@@ -0,0 +1,95 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Code generated by mkcgo.sh. DO NOT EDIT.
+
+//go:build race
+
+package race
+
+//go:cgo_import_dynamic _NSGetArgv _NSGetArgv ""
+//go:cgo_import_dynamic _NSGetEnviron _NSGetEnviron ""
+//go:cgo_import_dynamic _NSGetExecutablePath _NSGetExecutablePath ""
+//go:cgo_import_dynamic __error __error ""
+//go:cgo_import_dynamic __fork __fork ""
+//go:cgo_import_dynamic __mmap __mmap ""
+//go:cgo_import_dynamic __munmap __munmap ""
+//go:cgo_import_dynamic __stack_chk_fail __stack_chk_fail ""
+//go:cgo_import_dynamic __stack_chk_guard __stack_chk_guard ""
+//go:cgo_import_dynamic _dyld_get_image_header _dyld_get_image_header ""
+//go:cgo_import_dynamic _dyld_get_image_name _dyld_get_image_name ""
+//go:cgo_import_dynamic _dyld_get_image_vmaddr_slide _dyld_get_image_vmaddr_slide ""
+//go:cgo_import_dynamic _dyld_image_count _dyld_image_count ""
+//go:cgo_import_dynamic _exit _exit ""
+//go:cgo_import_dynamic abort abort ""
+//go:cgo_import_dynamic arc4random_buf arc4random_buf ""
+//go:cgo_import_dynamic bzero bzero ""
+//go:cgo_import_dynamic close close ""
+//go:cgo_import_dynamic dlsym dlsym ""
+//go:cgo_import_dynamic dup dup ""
+//go:cgo_import_dynamic dup2 dup2 ""
+//go:cgo_import_dynamic execve execve ""
+//go:cgo_import_dynamic exit exit ""
+//go:cgo_import_dynamic fstat fstat ""
+//go:cgo_import_dynamic ftruncate ftruncate ""
+//go:cgo_import_dynamic getpid getpid ""
+//go:cgo_import_dynamic getrlimit getrlimit ""
+//go:cgo_import_dynamic gettimeofday gettimeofday ""
+//go:cgo_import_dynamic getuid getuid ""
+//go:cgo_import_dynamic grantpt grantpt ""
+//go:cgo_import_dynamic ioctl ioctl ""
+//go:cgo_import_dynamic isatty isatty ""
+//go:cgo_import_dynamic lstat lstat ""
+//go:cgo_import_dynamic mach_absolute_time mach_absolute_time ""
+//go:cgo_import_dynamic mach_task_self_ mach_task_self_ ""
+//go:cgo_import_dynamic mach_timebase_info mach_timebase_info ""
+//go:cgo_import_dynamic mach_vm_region_recurse mach_vm_region_recurse ""
+//go:cgo_import_dynamic madvise madvise ""
+//go:cgo_import_dynamic malloc_num_zones malloc_num_zones ""
+//go:cgo_import_dynamic malloc_zones malloc_zones ""
+//go:cgo_import_dynamic memcpy memcpy ""
+//go:cgo_import_dynamic memset_pattern16 memset_pattern16 ""
+//go:cgo_import_dynamic mkdir mkdir ""
+//go:cgo_import_dynamic mprotect mprotect ""
+//go:cgo_import_dynamic open open ""
+//go:cgo_import_dynamic pipe pipe ""
+//go:cgo_import_dynamic posix_openpt posix_openpt ""
+//go:cgo_import_dynamic posix_spawn posix_spawn ""
+//go:cgo_import_dynamic posix_spawn_file_actions_addclose posix_spawn_file_actions_addclose ""
+//go:cgo_import_dynamic posix_spawn_file_actions_adddup2 posix_spawn_file_actions_adddup2 ""
+//go:cgo_import_dynamic posix_spawn_file_actions_destroy posix_spawn_file_actions_destroy ""
+//go:cgo_import_dynamic posix_spawn_file_actions_init posix_spawn_file_actions_init ""
+//go:cgo_import_dynamic posix_spawnattr_destroy posix_spawnattr_destroy ""
+//go:cgo_import_dynamic posix_spawnattr_init posix_spawnattr_init ""
+//go:cgo_import_dynamic posix_spawnattr_setflags posix_spawnattr_setflags ""
+//go:cgo_import_dynamic pthread_attr_getstack pthread_attr_getstack ""
+//go:cgo_import_dynamic pthread_create pthread_create ""
+//go:cgo_import_dynamic pthread_get_stackaddr_np pthread_get_stackaddr_np ""
+//go:cgo_import_dynamic pthread_get_stacksize_np pthread_get_stacksize_np ""
+//go:cgo_import_dynamic pthread_getspecific pthread_getspecific ""
+//go:cgo_import_dynamic pthread_join pthread_join ""
+//go:cgo_import_dynamic pthread_self pthread_self ""
+//go:cgo_import_dynamic pthread_sigmask pthread_sigmask ""
+//go:cgo_import_dynamic pthread_threadid_np pthread_threadid_np ""
+//go:cgo_import_dynamic read read ""
+//go:cgo_import_dynamic readlink readlink ""
+//go:cgo_import_dynamic realpath$DARWIN_EXTSN realpath$DARWIN_EXTSN ""
+//go:cgo_import_dynamic rename rename ""
+//go:cgo_import_dynamic sched_yield sched_yield ""
+//go:cgo_import_dynamic setrlimit setrlimit ""
+//go:cgo_import_dynamic sigaction sigaction ""
+//go:cgo_import_dynamic stat stat ""
+//go:cgo_import_dynamic sysconf sysconf ""
+//go:cgo_import_dynamic sysctl sysctl ""
+//go:cgo_import_dynamic sysctlbyname sysctlbyname ""
+//go:cgo_import_dynamic task_info task_info ""
+//go:cgo_import_dynamic tcgetattr tcgetattr ""
+//go:cgo_import_dynamic tcsetattr tcsetattr ""
+//go:cgo_import_dynamic unlink unlink ""
+//go:cgo_import_dynamic unlockpt unlockpt ""
+//go:cgo_import_dynamic usleep usleep ""
+//go:cgo_import_dynamic vm_region_64 vm_region_64 ""
+//go:cgo_import_dynamic vm_region_recurse_64 vm_region_recurse_64 ""
+//go:cgo_import_dynamic waitpid waitpid ""
+//go:cgo_import_dynamic write write ""
diff --git a/src/runtime/race/race_linux_amd64.syso b/src/runtime/race/race_linux_amd64.syso
deleted file mode 100644
index 6885610..0000000
--- a/src/runtime/race/race_linux_amd64.syso
+++ /dev/null
Binary files differ
diff --git a/src/runtime/race/race_unix_test.go b/src/runtime/race/race_unix_test.go
index 6cc0730..3cf53b0 100644
--- a/src/runtime/race/race_unix_test.go
+++ b/src/runtime/race/race_unix_test.go
@@ -19,11 +19,11 @@
if err != nil {
t.Fatalf("failed to mmap memory: %v", err)
}
+ defer syscall.Munmap(data)
p := (*uint32)(unsafe.Pointer(&data[0]))
atomic.AddUint32(p, 1)
(*p)++
if *p != 2 {
t.Fatalf("data[0] = %v, expect 2", *p)
}
- syscall.Munmap(data)
}
diff --git a/src/runtime/race/race_v1_amd64.go b/src/runtime/race/race_v1_amd64.go
new file mode 100644
index 0000000..7c40db1
--- /dev/null
+++ b/src/runtime/race/race_v1_amd64.go
@@ -0,0 +1,9 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build (linux && !amd64.v3) || darwin || freebsd || netbsd || openbsd || windows
+
+package race
+
+import _ "runtime/race/internal/amd64v1"
diff --git a/src/runtime/race/race_v3_amd64.go b/src/runtime/race/race_v3_amd64.go
new file mode 100644
index 0000000..80728d8
--- /dev/null
+++ b/src/runtime/race/race_v3_amd64.go
@@ -0,0 +1,9 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build linux && amd64.v3
+
+package race
+
+import _ "runtime/race/internal/amd64v3"
diff --git a/src/runtime/race/sched_test.go b/src/runtime/race/sched_test.go
index 9fe83ea..a66860c 100644
--- a/src/runtime/race/sched_test.go
+++ b/src/runtime/race/sched_test.go
@@ -7,10 +7,10 @@
package race_test
import (
- "bytes"
"fmt"
"reflect"
"runtime"
+ "strings"
"testing"
)
@@ -40,7 +40,7 @@
}
}
- var buf bytes.Buffer
+ var buf strings.Builder
for i := 0; i < N; i++ {
fmt.Fprintf(&buf, "%v\n", out[i])
}
diff --git a/src/runtime/rdebug.go b/src/runtime/rdebug.go
index 1b213f1..7ecb2a5 100644
--- a/src/runtime/rdebug.go
+++ b/src/runtime/rdebug.go
@@ -15,8 +15,8 @@
//go:linkname setPanicOnFault runtime/debug.setPanicOnFault
func setPanicOnFault(new bool) (old bool) {
- _g_ := getg()
- old = _g_.paniconfault
- _g_.paniconfault = new
+ gp := getg()
+ old = gp.paniconfault
+ gp.paniconfault = new
return old
}
diff --git a/src/runtime/retry.go b/src/runtime/retry.go
new file mode 100644
index 0000000..2e2f813
--- /dev/null
+++ b/src/runtime/retry.go
@@ -0,0 +1,23 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build unix
+
+package runtime
+
+// retryOnEAGAIN retries a function until it does not return EAGAIN.
+// It will use an increasing delay between calls, and retry up to 20 times.
+// The function argument is expected to return an errno value,
+// and retryOnEAGAIN will return any errno value other than EAGAIN.
+// If all retries return EAGAIN, then retryOnEAGAIN will return EAGAIN.
+func retryOnEAGAIN(fn func() int32) int32 {
+ for tries := 0; tries < 20; tries++ {
+ errno := fn()
+ if errno != _EAGAIN {
+ return errno
+ }
+ usleep_no_g(uint32(tries+1) * 1000) // milliseconds
+ }
+ return _EAGAIN
+}
diff --git a/src/runtime/rt0_freebsd_riscv64.s b/src/runtime/rt0_freebsd_riscv64.s
new file mode 100644
index 0000000..dc46b70
--- /dev/null
+++ b/src/runtime/rt0_freebsd_riscv64.s
@@ -0,0 +1,112 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// On FreeBSD argc/argv are passed in R0, not X2
+TEXT _rt0_riscv64_freebsd(SB),NOSPLIT|NOFRAME,$0
+ ADD $8, A0, A1 // argv
+ MOV 0(A0), A0 // argc
+ JMP main(SB)
+
+// When building with -buildmode=c-shared, this symbol is called when the shared
+// library is loaded.
+TEXT _rt0_riscv64_freebsd_lib(SB),NOSPLIT,$224
+ // Preserve callee-save registers, along with X1 (LR).
+ MOV X1, (8*3)(X2)
+ MOV X8, (8*4)(X2)
+ MOV X9, (8*5)(X2)
+ MOV X18, (8*6)(X2)
+ MOV X19, (8*7)(X2)
+ MOV X20, (8*8)(X2)
+ MOV X21, (8*9)(X2)
+ MOV X22, (8*10)(X2)
+ MOV X23, (8*11)(X2)
+ MOV X24, (8*12)(X2)
+ MOV X25, (8*13)(X2)
+ MOV X26, (8*14)(X2)
+ MOV g, (8*15)(X2)
+ MOVD F8, (8*16)(X2)
+ MOVD F9, (8*17)(X2)
+ MOVD F18, (8*18)(X2)
+ MOVD F19, (8*19)(X2)
+ MOVD F20, (8*20)(X2)
+ MOVD F21, (8*21)(X2)
+ MOVD F22, (8*22)(X2)
+ MOVD F23, (8*23)(X2)
+ MOVD F24, (8*24)(X2)
+ MOVD F25, (8*25)(X2)
+ MOVD F26, (8*26)(X2)
+ MOVD F27, (8*27)(X2)
+
+ // Initialize g as nil in case of using g later e.g. sigaction in cgo_sigaction.go
+ MOV X0, g
+
+ MOV A0, _rt0_riscv64_freebsd_lib_argc<>(SB)
+ MOV A1, _rt0_riscv64_freebsd_lib_argv<>(SB)
+
+ // Synchronous initialization.
+ MOV $runtime·libpreinit(SB), T0
+ JALR RA, T0
+
+ // Create a new thread to do the runtime initialization and return.
+ MOV _cgo_sys_thread_create(SB), T0
+ BEQZ T0, nocgo
+ MOV $_rt0_riscv64_freebsd_lib_go(SB), A0
+ MOV $0, A1
+ JALR RA, T0
+ JMP restore
+
+nocgo:
+ MOV $0x800000, A0 // stacksize = 8192KB
+ MOV $_rt0_riscv64_freebsd_lib_go(SB), A1
+ MOV A0, 8(X2)
+ MOV A1, 16(X2)
+ MOV $runtime·newosproc0(SB), T0
+ JALR RA, T0
+
+restore:
+ // Restore callee-save registers, along with X1 (LR).
+ MOV (8*3)(X2), X1
+ MOV (8*4)(X2), X8
+ MOV (8*5)(X2), X9
+ MOV (8*6)(X2), X18
+ MOV (8*7)(X2), X19
+ MOV (8*8)(X2), X20
+ MOV (8*9)(X2), X21
+ MOV (8*10)(X2), X22
+ MOV (8*11)(X2), X23
+ MOV (8*12)(X2), X24
+ MOV (8*13)(X2), X25
+ MOV (8*14)(X2), X26
+ MOV (8*15)(X2), g
+ MOVD (8*16)(X2), F8
+ MOVD (8*17)(X2), F9
+ MOVD (8*18)(X2), F18
+ MOVD (8*19)(X2), F19
+ MOVD (8*20)(X2), F20
+ MOVD (8*21)(X2), F21
+ MOVD (8*22)(X2), F22
+ MOVD (8*23)(X2), F23
+ MOVD (8*24)(X2), F24
+ MOVD (8*25)(X2), F25
+ MOVD (8*26)(X2), F26
+ MOVD (8*27)(X2), F27
+
+ RET
+
+TEXT _rt0_riscv64_freebsd_lib_go(SB),NOSPLIT,$0
+ MOV _rt0_riscv64_freebsd_lib_argc<>(SB), A0
+ MOV _rt0_riscv64_freebsd_lib_argv<>(SB), A1
+ MOV $runtime·rt0_go(SB), T0
+ JALR ZERO, T0
+
+DATA _rt0_riscv64_freebsd_lib_argc<>(SB)/8, $0
+GLOBL _rt0_riscv64_freebsd_lib_argc<>(SB),NOPTR, $8
+DATA _rt0_riscv64_freebsd_lib_argv<>(SB)/8, $0
+GLOBL _rt0_riscv64_freebsd_lib_argv<>(SB),NOPTR, $8
+
+TEXT main(SB),NOSPLIT|NOFRAME,$0
+ MOV $runtime·rt0_go(SB), T0
+ JALR ZERO, T0
diff --git a/src/runtime/rt0_linux_ppc64.s b/src/runtime/rt0_linux_ppc64.s
index 897d610..c9300a9 100644
--- a/src/runtime/rt0_linux_ppc64.s
+++ b/src/runtime/rt0_linux_ppc64.s
@@ -22,6 +22,7 @@
// There is no TLS base pointer.
//
// TODO(austin): Support ABI v1 dynamic linking entry point
+ XOR R0, R0 // Note, newer kernels may not always set R0 to 0.
MOVD $runtime·rt0_go(SB), R12
MOVD R12, CTR
MOVBZ runtime·iscgo(SB), R5
diff --git a/src/runtime/runtime-gdb.py b/src/runtime/runtime-gdb.py
index 5bb605c..c4462de 100644
--- a/src/runtime/runtime-gdb.py
+++ b/src/runtime/runtime-gdb.py
@@ -447,7 +447,7 @@
# args = gdb.string_to_argv(arg)
vp = gdb.lookup_type('void').pointer()
for ptr in SliceValue(gdb.parse_and_eval("'runtime.allgs'")):
- if ptr['atomicstatus'] == G_DEAD:
+ if ptr['atomicstatus']['value'] == G_DEAD:
continue
s = ' '
if ptr['m']:
@@ -455,7 +455,7 @@
pc = ptr['sched']['pc'].cast(vp)
pc = pc_to_int(pc)
blk = gdb.block_for_pc(pc)
- status = int(ptr['atomicstatus'])
+ status = int(ptr['atomicstatus']['value'])
st = sts.get(status, "unknown(%d)" % status)
print(s, ptr['goid'], "{0:8s}".format(st), blk.function)
@@ -472,7 +472,7 @@
"""
vp = gdb.lookup_type('void').pointer()
for ptr in SliceValue(gdb.parse_and_eval("'runtime.allgs'")):
- if ptr['atomicstatus'] == G_DEAD:
+ if ptr['atomicstatus']['value'] == G_DEAD:
continue
if ptr['goid'] == goid:
break
@@ -480,7 +480,7 @@
return None, None
# Get the goroutine's saved state.
pc, sp = ptr['sched']['pc'], ptr['sched']['sp']
- status = ptr['atomicstatus']&~G_SCAN
+ status = ptr['atomicstatus']['value']&~G_SCAN
# Goroutine is not running nor in syscall, so use the info in goroutine
if status != G_RUNNING and status != G_SYSCALL:
return pc.cast(vp), sp.cast(vp)
diff --git a/src/runtime/runtime-gdb_test.go b/src/runtime/runtime-gdb_test.go
index d97c2a2..4e7c227 100644
--- a/src/runtime/runtime-gdb_test.go
+++ b/src/runtime/runtime-gdb_test.go
@@ -6,6 +6,7 @@
import (
"bytes"
+ "flag"
"fmt"
"internal/testenv"
"os"
@@ -16,6 +17,7 @@
"strconv"
"strings"
"testing"
+ "time"
)
// NOTE: In some configurations, GDB will segfault when sent a SIGWINCH signal.
@@ -40,6 +42,10 @@
if runtime.GOARCH == "mips" {
t.Skip("skipping gdb tests on linux/mips; see https://golang.org/issue/25939")
}
+ // Disable GDB tests on alpine until issue #54352 resolved.
+ if strings.HasSuffix(testenv.Builder(), "-alpine") {
+ t.Skip("skipping gdb tests on alpine; see https://golang.org/issue/54352")
+ }
case "freebsd":
t.Skip("skipping gdb tests on FreeBSD; see https://golang.org/issue/29508")
case "aix":
@@ -394,6 +400,15 @@
if runtime.GOOS == "netbsd" {
testenv.SkipFlaky(t, 15603)
}
+ if flag.Lookup("test.parallel").Value.(flag.Getter).Get().(int) < 2 {
+ // It is possible that this test will hang for a long time due to an
+ // apparent GDB bug reported in https://go.dev/issue/37405.
+ // If test parallelism is high enough, that might be ok: the other parallel
+ // tests will finish, and then this test will finish right before it would
+ // time out. However, if test are running sequentially, a hang in this test
+ // would likely cause the remaining tests to run out of time.
+ testenv.SkipFlaky(t, 37405)
+ }
checkGdbEnvironment(t)
t.Parallel()
@@ -415,6 +430,7 @@
}
// Execute gdb commands.
+ start := time.Now()
args := []string{"-nx", "-batch",
"-iex", "add-auto-load-safe-path " + filepath.Join(testenv.GOROOT(t), "src", "runtime"),
"-ex", "set startup-with-shell off",
@@ -424,7 +440,32 @@
"-ex", "continue",
filepath.Join(dir, "a.exe"),
}
- got, err := testenv.RunWithTimeout(t, exec.Command("gdb", args...))
+ cmd = testenv.Command(t, "gdb", args...)
+
+ // Work around the GDB hang reported in https://go.dev/issue/37405.
+ // Sometimes (rarely), the GDB process hangs completely when the Go program
+ // exits, and we suspect that the bug is on the GDB side.
+ //
+ // The default Cancel function added by testenv.Command will mark the test as
+ // failed if it is in danger of timing out, but we want to instead mark it as
+ // skipped. Change the Cancel function to kill the process and merely log
+ // instead of failing the test.
+ //
+ // (This approach does not scale: if the test parallelism is less than or
+ // equal to the number of tests that run right up to the deadline, then the
+ // remaining parallel tests are likely to time out. But as long as it's just
+ // this one flaky test, it's probably fine..?)
+ //
+ // If there is no deadline set on the test at all, relying on the timeout set
+ // by testenv.Command will cause the test to hang indefinitely, but that's
+ // what “no deadline” means, after all — and it's probably the right behavior
+ // anyway if someone is trying to investigate and fix the GDB bug.
+ cmd.Cancel = func() error {
+ t.Logf("GDB command timed out after %v: %v", time.Since(start), cmd)
+ return cmd.Process.Kill()
+ }
+
+ got, err := cmd.CombinedOutput()
t.Logf("gdb output:\n%s", got)
if err != nil {
if bytes.Contains(got, []byte("internal-error: wait returned unexpected status 0x0")) {
diff --git a/src/runtime/runtime.go b/src/runtime/runtime.go
index 2cf93ab..9f68738 100644
--- a/src/runtime/runtime.go
+++ b/src/runtime/runtime.go
@@ -6,29 +6,29 @@
import (
"runtime/internal/atomic"
- _ "unsafe" // for go:linkname
+ "unsafe"
)
//go:generate go run wincallback.go
//go:generate go run mkduff.go
//go:generate go run mkfastlog2table.go
+//go:generate go run mklockrank.go -o lockrank.go
var ticks ticksType
type ticksType struct {
lock mutex
- pad uint32 // ensure 8-byte alignment of val on 386
- val uint64
+ val atomic.Int64
}
// Note: Called by runtime/pprof in addition to runtime code.
func tickspersecond() int64 {
- r := int64(atomic.Load64(&ticks.val))
+ r := ticks.val.Load()
if r != 0 {
return r
}
lock(&ticks.lock)
- r = int64(ticks.val)
+ r = ticks.val.Load()
if r == 0 {
t0 := nanotime()
c0 := cputicks()
@@ -42,7 +42,7 @@
if r == 0 {
r++
}
- atomic.Store64(&ticks.val, uint64(r))
+ ticks.val.Store(r)
}
unlock(&ticks.lock)
return r
@@ -65,3 +65,52 @@
func syscall_Exit(code int) {
exit(int32(code))
}
+
+var godebugDefault string
+var godebugUpdate atomic.Pointer[func(string, string)]
+var godebugEnv atomic.Pointer[string] // set by parsedebugvars
+
+//go:linkname godebug_setUpdate internal/godebug.setUpdate
+func godebug_setUpdate(update func(string, string)) {
+ p := new(func(string, string))
+ *p = update
+ godebugUpdate.Store(p)
+ godebugNotify()
+}
+
+func godebugNotify() {
+ if update := godebugUpdate.Load(); update != nil {
+ var env string
+ if p := godebugEnv.Load(); p != nil {
+ env = *p
+ }
+ (*update)(godebugDefault, env)
+ }
+}
+
+//go:linkname syscall_runtimeSetenv syscall.runtimeSetenv
+func syscall_runtimeSetenv(key, value string) {
+ setenv_c(key, value)
+ if key == "GODEBUG" {
+ p := new(string)
+ *p = value
+ godebugEnv.Store(p)
+ godebugNotify()
+ }
+}
+
+//go:linkname syscall_runtimeUnsetenv syscall.runtimeUnsetenv
+func syscall_runtimeUnsetenv(key string) {
+ unsetenv_c(key)
+ if key == "GODEBUG" {
+ godebugEnv.Store(nil)
+ godebugNotify()
+ }
+}
+
+// writeErrStr writes a string to descriptor 2.
+//
+//go:nosplit
+func writeErrStr(s string) {
+ write(2, unsafe.Pointer(unsafe.StringData(s)), int32(len(s)))
+}
diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go
index e307901..277f18a 100644
--- a/src/runtime/runtime1.go
+++ b/src/runtime/runtime1.go
@@ -35,13 +35,13 @@
//
//go:nosplit
func gotraceback() (level int32, all, crash bool) {
- _g_ := getg()
+ gp := getg()
t := atomic.Load(&traceback_cache)
crash = t&tracebackCrash != 0
- all = _g_.m.throwing >= throwTypeUser || t&tracebackAll != 0
- if _g_.m.traceback != 0 {
- level = int32(_g_.m.traceback)
- } else if _g_.m.throwing >= throwTypeRuntime {
+ all = gp.m.throwing >= throwTypeUser || t&tracebackAll != 0
+ if gp.m.traceback != 0 {
+ level = int32(gp.m.traceback)
+ } else if gp.m.throwing >= throwTypeRuntime {
// Always include runtime frames in runtime throws unless
// otherwise overridden by m.traceback.
level = 2
@@ -56,7 +56,7 @@
argv **byte
)
-// nosplit for use in linux startup sysargs
+// nosplit for use in linux startup sysargs.
//
//go:nosplit
func argv_index(argv **byte, i int32) *byte {
@@ -355,6 +355,8 @@
{"adaptivestackstart", &debug.adaptivestackstart},
}
+var globalGODEBUG string
+
func parsedebugvars() {
// defaults
debug.cgocheck = 1
@@ -372,7 +374,9 @@
debug.madvdontneed = 1
}
- for p := gogetenv("GODEBUG"); p != ""; {
+ globalGODEBUG = gogetenv("GODEBUG")
+ godebugEnv.StoreNoWB(&globalGODEBUG)
+ for p := globalGODEBUG; p != ""; {
field := ""
i := bytealg.IndexByteString(p, ',')
if i < 0 {
@@ -474,18 +478,18 @@
//go:nosplit
func acquirem() *m {
- _g_ := getg()
- _g_.m.locks++
- return _g_.m
+ gp := getg()
+ gp.m.locks++
+ return gp.m
}
//go:nosplit
func releasem(mp *m) {
- _g_ := getg()
+ gp := getg()
mp.locks--
- if mp.locks == 0 && _g_.preempt {
+ if mp.locks == 0 && gp.preempt {
// restore the preemption request in case we've cleared it in newstack
- _g_.stackguard0 = stackPreempt
+ gp.stackguard0 = stackPreempt
}
}
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index e178822..9381d1e 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -435,9 +435,9 @@
// 3. By debugCallWrap to pass parameters to a new goroutine because allocating a
// closure in the runtime is forbidden.
param unsafe.Pointer
- atomicstatus uint32
+ atomicstatus atomic.Uint32
stackLock uint32 // sigprof/scang lock; TODO: fold in to atomicstatus
- goid int64
+ goid uint64
schedlink guintptr
waitsince int64 // approx time when the g become blocked
waitreason waitReason // if status==Gwaiting
@@ -461,14 +461,14 @@
activeStackChans bool
// parkingOnChan indicates that the goroutine is about to
// park on a chansend or chanrecv. Used to signal an unsafe point
- // for stack shrinking. It's a boolean value, but is updated atomically.
- parkingOnChan uint8
+ // for stack shrinking.
+ parkingOnChan atomic.Bool
raceignore int8 // ignore race detection events
sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine
tracking bool // whether we're tracking this G for sched latency statistics
trackingSeq uint8 // used to decide whether to track this G
- runnableStamp int64 // timestamp of when the G last became runnable, only used when tracking
+ trackingStamp int64 // timestamp of when the G last started being tracked
runnableTime int64 // the amount of time spent runnable, cleared when running, only used when tracking
sysexitticks int64 // cputicks when syscall has returned (for tracing)
traceseq uint64 // trace event sequencer
@@ -487,7 +487,7 @@
cgoCtxt []uintptr // cgo traceback context
labels unsafe.Pointer // profiler labels
timer *timer // cached timer for time.Sleep
- selectDone uint32 // are we participating in a select and did someone win the race?
+ selectDone atomic.Uint32 // are we participating in a select and did someone win the race?
// goroutineProfiled indicates the status of this goroutine's stack for the
// current in-progress goroutine profile
@@ -516,6 +516,13 @@
tlsSize = tlsSlots * goarch.PtrSize
)
+// Values for m.freeWait.
+const (
+ freeMStack = 0 // M done, free stack and reference.
+ freeMRef = 1 // M done, free reference.
+ freeMWait = 2 // M still in use.
+)
+
type m struct {
g0 *g // goroutine with scheduling stack
morebuf gobuf // gobuf arg to morestack
@@ -545,15 +552,16 @@
blocked bool // m is blocked on a note
newSigstack bool // minit on C thread called sigaltstack
printlock int8
- incgo bool // m is executing a cgo call
- freeWait uint32 // if == 0, safe to free g0 and delete m (atomic)
+ incgo bool // m is executing a cgo call
+ isextra bool // m is an extra m
+ freeWait atomic.Uint32 // Whether it is safe to free g0 and delete m (one of freeMRef, freeMStack, freeMWait)
fastrand uint64
needextram bool
traceback uint8
- ncgocall uint64 // number of cgo calls in total
- ncgo int32 // number of cgo calls currently in progress
- cgoCallersUse uint32 // if non-zero, cgoCallers in use temporarily
- cgoCallers *cgoCallers // cgo traceback if crashing in cgo call
+ ncgocall uint64 // number of cgo calls in total
+ ncgo int32 // number of cgo calls currently in progress
+ cgoCallersUse atomic.Uint32 // if non-zero, cgoCallers in use temporarily
+ cgoCallers *cgoCallers // cgo traceback if crashing in cgo call
park note
alllink *m // on allm
schedlink muintptr
@@ -583,12 +591,11 @@
// preemptGen counts the number of completed preemption
// signals. This is used to detect when a preemption is
- // requested, but fails. Accessed atomically.
- preemptGen uint32
+ // requested, but fails.
+ preemptGen atomic.Uint32
// Whether this is a pending preemption signal on this M.
- // Accessed atomically.
- signalPending uint32
+ signalPending atomic.Uint32
dlogPerM
@@ -668,19 +675,15 @@
palloc persistentAlloc // per-P to avoid mutex
- _ uint32 // Alignment for atomic fields below
-
// The when field of the first entry on the timer heap.
- // This is updated using atomic functions.
// This is 0 if the timer heap is empty.
- timer0When uint64
+ timer0When atomic.Int64
// The earliest known nextwhen field of a timer with
// timerModifiedEarlier status. Because the timer may have been
// modified again, there need not be any timer with this value.
- // This is updated using atomic functions.
// This is 0 if there are no timerModifiedEarlier timers.
- timerModifiedEarliest uint64
+ timerModifiedEarliest atomic.Int64
// Per-P GC state
gcAssistTime int64 // Nanoseconds in assistAlloc
@@ -713,7 +716,7 @@
// statsSeq is a counter indicating whether this P is currently
// writing any stats. Its value is even when not, odd when it is.
- statsSeq uint32
+ statsSeq atomic.Uint32
// Lock for timers. We normally access the timers while running
// on this P, but the scheduler can also do it from a different P.
@@ -725,12 +728,10 @@
timers []*timer
// Number of timers in P's heap.
- // Modified using atomic instructions.
- numTimers uint32
+ numTimers atomic.Uint32
// Number of timerDeleted timers in P's heap.
- // Modified using atomic instructions.
- deletedTimers uint32
+ deletedTimers atomic.Uint32
// Race context used while executing timer functions.
timerRaceCtx uintptr
@@ -753,15 +754,19 @@
// scheduler ASAP (regardless of what G is running on it).
preempt bool
+ // pageTraceBuf is a buffer for writing out page allocation/free/scavenge traces.
+ //
+ // Used only if GOEXPERIMENT=pagetrace.
+ pageTraceBuf pageTraceBuf
+
// Padding is no longer needed. False sharing is now not a worry because p is large enough
// that its size class is an integer multiple of the cache line size (for any of our architectures).
}
type schedt struct {
- // accessed atomically. keep at top to ensure alignment on 32-bit systems.
- goidgen uint64
- lastpoll uint64 // time of last network poll, 0 if currently polling
- pollUntil uint64 // time to which current poll is sleeping
+ goidgen atomic.Uint64
+ lastpoll atomic.Int64 // time of last network poll, 0 if currently polling
+ pollUntil atomic.Int64 // time to which current poll is sleeping
lock mutex
@@ -776,11 +781,12 @@
nmsys int32 // number of system m's not counted for deadlock
nmfreed int64 // cumulative number of freed m's
- ngsys uint32 // number of system goroutines; updated atomically
+ ngsys atomic.Int32 // number of system goroutines
- pidle puintptr // idle p's
- npidle uint32
- nmspinning uint32 // See "Worker thread parking/unparking" comment in proc.go.
+ pidle puintptr // idle p's
+ npidle atomic.Int32
+ nmspinning atomic.Int32 // See "Worker thread parking/unparking" comment in proc.go.
+ needspinning atomic.Uint32 // See "Delicate dance" comment in proc.go. Boolean. Must hold sched.lock to set to 1.
// Global runnable queue.
runq gQueue
@@ -818,10 +824,10 @@
// m.exited is set. Linked through m.freelink.
freem *m
- gcwaiting uint32 // gc is waiting to run
+ gcwaiting atomic.Bool // gc is waiting to run
stopwait int32
stopnote note
- sysmonwait uint32
+ sysmonwait atomic.Bool
sysmonnote note
// safepointFn should be called on each P at the next GC
@@ -844,9 +850,16 @@
// timeToRun is a distribution of scheduling latencies, defined
// as the sum of time a G spends in the _Grunnable state before
// it transitions to _Grunning.
- //
- // timeToRun is protected by sched.lock.
timeToRun timeHistogram
+
+ // idleTime is the total CPU time Ps have "spent" idle.
+ //
+ // Reset on each GC cycle.
+ idleTime atomic.Int64
+
+ // totalMutexWaitTime is the sum of time goroutines have spent in _Gwaiting
+ // with a waitreason of the form waitReasonSync{RW,}Mutex{R,}Lock.
+ totalMutexWaitTime atomic.Int64
}
// Values for the flags field of a sigTabT.
@@ -867,8 +880,8 @@
// Keep in sync with linker (../cmd/link/internal/ld/pcln.go:/pclntab)
// and with package debug/gosym and with symtab.go in package runtime.
type _func struct {
- entryoff uint32 // start pc, as offset from moduledata.text/pcHeader.textStart
- nameoff int32 // function name
+ entryOff uint32 // start pc, as offset from moduledata.text/pcHeader.textStart
+ nameOff int32 // function name, as index into moduledata.funcnametab.
args int32 // in/out args size
deferreturn uint32 // offset of start of a deferreturn call instruction from entry, if any.
@@ -878,21 +891,45 @@
pcln uint32
npcdata uint32
cuOffset uint32 // runtime.cutab offset of this function's CU
+ startLine int32 // line number of start of function (func keyword/TEXT directive)
funcID funcID // set for certain special runtime functions
flag funcFlag
_ [1]byte // pad
nfuncdata uint8 // must be last, must end on a uint32-aligned boundary
+
+ // The end of the struct is followed immediately by two variable-length
+ // arrays that reference the pcdata and funcdata locations for this
+ // function.
+
+ // pcdata contains the offset into moduledata.pctab for the start of
+ // that index's table. e.g.,
+ // &moduledata.pctab[_func.pcdata[_PCDATA_UnsafePoint]] is the start of
+ // the unsafe point table.
+ //
+ // An offset of 0 indicates that there is no table.
+ //
+ // pcdata [npcdata]uint32
+
+ // funcdata contains the offset past moduledata.gofunc which contains a
+ // pointer to that index's funcdata. e.g.,
+ // *(moduledata.gofunc + _func.funcdata[_FUNCDATA_ArgsPointerMaps]) is
+ // the argument pointer map.
+ //
+ // An offset of ^uint32(0) indicates that there is no entry.
+ //
+ // funcdata [nfuncdata]uint32
}
// Pseudo-Func that is returned for PCs that occur in inlined code.
// A *Func can be either a *_func or a *funcinl, and they are distinguished
// by the first uintptr.
type funcinl struct {
- ones uint32 // set to ^0 to distinguish from _func
- entry uintptr // entry of the real (the "outermost") frame
- name string
- file string
- line int
+ ones uint32 // set to ^0 to distinguish from _func
+ entry uintptr // entry of the real (the "outermost") frame
+ name string
+ file string
+ line int32
+ startLine int32
}
// layout of Itab known to compilers
@@ -917,7 +954,7 @@
type forcegcstate struct {
lock mutex
g *g
- idle uint32
+ idle atomic.Bool
}
// extendRandom extends the random numbers in r[:n] to the whole slice r.
@@ -994,24 +1031,10 @@
goexit bool
}
-// stack traces
-type stkframe struct {
- fn funcInfo // function being run
- pc uintptr // program counter within fn
- continpc uintptr // program counter where execution can continue, or 0 if not
- lr uintptr // program counter at caller aka link register
- sp uintptr // stack pointer at pc
- fp uintptr // stack pointer at caller aka frame pointer
- varp uintptr // top of local variables
- argp uintptr // pointer to function arguments
- arglen uintptr // number of bytes at argp
- argmap *bitvector // force use of this argmap
-}
-
// ancestorInfo records details of where a goroutine was started.
type ancestorInfo struct {
pcs []uintptr // pcs from the stack of this goroutine
- goid int64 // goroutine id of this goroutine; original goroutine possibly dead
+ goid uint64 // goroutine id of this goroutine; original goroutine possibly dead
gopc uintptr // pc of go statement that created this goroutine
}
@@ -1050,12 +1073,17 @@
waitReasonSemacquire // "semacquire"
waitReasonSleep // "sleep"
waitReasonSyncCondWait // "sync.Cond.Wait"
- waitReasonTimerGoroutineIdle // "timer goroutine (idle)"
+ waitReasonSyncMutexLock // "sync.Mutex.Lock"
+ waitReasonSyncRWMutexRLock // "sync.RWMutex.RLock"
+ waitReasonSyncRWMutexLock // "sync.RWMutex.Lock"
waitReasonTraceReaderBlocked // "trace reader (blocked)"
waitReasonWaitForGCCycle // "wait for GC cycle"
waitReasonGCWorkerIdle // "GC worker (idle)"
+ waitReasonGCWorkerActive // "GC worker (active)"
waitReasonPreempted // "preempted"
waitReasonDebugCall // "debug call"
+ waitReasonGCMarkTermination // "GC mark termination"
+ waitReasonStoppingTheWorld // "stopping the world"
)
var waitReasonStrings = [...]string{
@@ -1080,12 +1108,17 @@
waitReasonSemacquire: "semacquire",
waitReasonSleep: "sleep",
waitReasonSyncCondWait: "sync.Cond.Wait",
- waitReasonTimerGoroutineIdle: "timer goroutine (idle)",
+ waitReasonSyncMutexLock: "sync.Mutex.Lock",
+ waitReasonSyncRWMutexRLock: "sync.RWMutex.RLock",
+ waitReasonSyncRWMutexLock: "sync.RWMutex.Lock",
waitReasonTraceReaderBlocked: "trace reader (blocked)",
waitReasonWaitForGCCycle: "wait for GC cycle",
waitReasonGCWorkerIdle: "GC worker (idle)",
+ waitReasonGCWorkerActive: "GC worker (active)",
waitReasonPreempted: "preempted",
waitReasonDebugCall: "debug call",
+ waitReasonGCMarkTermination: "GC mark termination",
+ waitReasonStoppingTheWorld: "stopping the world",
}
func (w waitReason) String() string {
@@ -1095,6 +1128,12 @@
return waitReasonStrings[w]
}
+func (w waitReason) isMutexWait() bool {
+ return w == waitReasonSyncMutexLock ||
+ w == waitReasonSyncRWMutexRLock ||
+ w == waitReasonSyncRWMutexLock
+}
+
var (
allm *m
gomaxprocs int32
diff --git a/src/runtime/runtime_linux_test.go b/src/runtime/runtime_linux_test.go
index a753aee..6af5561 100644
--- a/src/runtime/runtime_linux_test.go
+++ b/src/runtime/runtime_linux_test.go
@@ -53,15 +53,6 @@
}
}
-func TestEpollctlErrorSign(t *testing.T) {
- v := Epollctl(-1, 1, -1, unsafe.Pointer(&EpollEvent{}))
-
- const EBADF = 0x09
- if v != -EBADF {
- t.Errorf("epollctl = %v, want %v", v, -EBADF)
- }
-}
-
func TestKernelStructSize(t *testing.T) {
// Check that the Go definitions of structures exchanged with the kernel are
// the same size as what the kernel defines.
diff --git a/src/runtime/runtime_test.go b/src/runtime/runtime_test.go
index 018a8db..2faf06e 100644
--- a/src/runtime/runtime_test.go
+++ b/src/runtime/runtime_test.go
@@ -377,7 +377,7 @@
if !ok {
b.Fatal("goroutine profile failed")
}
- latencies = append(latencies, time.Now().Sub(start))
+ latencies = append(latencies, time.Since(start))
}
b.StopTimer()
diff --git a/src/runtime/rwmutex.go b/src/runtime/rwmutex.go
index 7713c3f..ede3d13 100644
--- a/src/runtime/rwmutex.go
+++ b/src/runtime/rwmutex.go
@@ -23,8 +23,8 @@
wLock mutex // serializes writers
writer muintptr // pending writer waiting for completing readers
- readerCount uint32 // number of pending readers
- readerWait uint32 // number of departing readers
+ readerCount atomic.Int32 // number of pending readers
+ readerWait atomic.Int32 // number of departing readers
}
const rwmutexMaxReaders = 1 << 30
@@ -36,7 +36,7 @@
// deadlock (issue #20903). Alternatively, we could drop the P
// while sleeping.
acquirem()
- if int32(atomic.Xadd(&rw.readerCount, 1)) < 0 {
+ if rw.readerCount.Add(1) < 0 {
// A writer is pending. Park on the reader queue.
systemstack(func() {
lockWithRank(&rw.rLock, lockRankRwmutexR)
@@ -60,12 +60,12 @@
// runlock undoes a single rlock call on rw.
func (rw *rwmutex) runlock() {
- if r := int32(atomic.Xadd(&rw.readerCount, -1)); r < 0 {
+ if r := rw.readerCount.Add(-1); r < 0 {
if r+1 == 0 || r+1 == -rwmutexMaxReaders {
throw("runlock of unlocked rwmutex")
}
// A writer is pending.
- if atomic.Xadd(&rw.readerWait, -1) == 0 {
+ if rw.readerWait.Add(-1) == 0 {
// The last reader unblocks the writer.
lockWithRank(&rw.rLock, lockRankRwmutexR)
w := rw.writer.ptr()
@@ -84,10 +84,10 @@
lockWithRank(&rw.wLock, lockRankRwmutexW)
m := getg().m
// Announce that there is a pending writer.
- r := int32(atomic.Xadd(&rw.readerCount, -rwmutexMaxReaders)) + rwmutexMaxReaders
+ r := rw.readerCount.Add(-rwmutexMaxReaders) + rwmutexMaxReaders
// Wait for any active readers to complete.
lockWithRank(&rw.rLock, lockRankRwmutexR)
- if r != 0 && atomic.Xadd(&rw.readerWait, r) != 0 {
+ if r != 0 && rw.readerWait.Add(r) != 0 {
// Wait for reader to wake us up.
systemstack(func() {
rw.writer.set(m)
@@ -103,7 +103,7 @@
// unlock unlocks rw for writing.
func (rw *rwmutex) unlock() {
// Announce to readers that there is no active writer.
- r := int32(atomic.Xadd(&rw.readerCount, rwmutexMaxReaders))
+ r := rw.readerCount.Add(rwmutexMaxReaders)
if r >= rwmutexMaxReaders {
throw("unlock of unlocked rwmutex")
}
diff --git a/src/runtime/rwmutex_test.go b/src/runtime/rwmutex_test.go
index f15d367..ddb16ae 100644
--- a/src/runtime/rwmutex_test.go
+++ b/src/runtime/rwmutex_test.go
@@ -17,10 +17,10 @@
"testing"
)
-func parallelReader(m *RWMutex, clocked chan bool, cunlock *uint32, cdone chan bool) {
+func parallelReader(m *RWMutex, clocked chan bool, cunlock *atomic.Bool, cdone chan bool) {
m.RLock()
clocked <- true
- for atomic.LoadUint32(cunlock) == 0 {
+ for !cunlock.Load() {
}
m.RUnlock()
cdone <- true
@@ -30,7 +30,7 @@
GOMAXPROCS(numReaders + 1)
var m RWMutex
clocked := make(chan bool, numReaders)
- var cunlock uint32
+ var cunlock atomic.Bool
cdone := make(chan bool)
for i := 0; i < numReaders; i++ {
go parallelReader(&m, clocked, &cunlock, cdone)
@@ -39,7 +39,7 @@
for i := 0; i < numReaders; i++ {
<-clocked
}
- atomic.StoreUint32(&cunlock, 1)
+ cunlock.Store(true)
// Wait for the goroutines to finish.
for i := 0; i < numReaders; i++ {
<-cdone
diff --git a/src/runtime/select.go b/src/runtime/select.go
index e18b2f1..1072465 100644
--- a/src/runtime/select.go
+++ b/src/runtime/select.go
@@ -8,7 +8,6 @@
import (
"internal/abi"
- "runtime/internal/atomic"
"unsafe"
)
@@ -70,7 +69,7 @@
// Mark that it's safe for stack shrinking to occur now,
// because any thread acquiring this G's stack for shrinking
// is guaranteed to observe activeStackChans after this store.
- atomic.Store8(&gp.parkingOnChan, 0)
+ gp.parkingOnChan.Store(false)
// Make sure we unlock after setting activeStackChans and
// unsetting parkingOnChan. The moment we unlock any of the
// channel locks we risk gp getting readied by a channel operation
@@ -324,13 +323,13 @@
// to park on a channel. The window between when this G's status
// changes and when we set gp.activeStackChans is not safe for
// stack shrinking.
- atomic.Store8(&gp.parkingOnChan, 1)
+ gp.parkingOnChan.Store(true)
gopark(selparkcommit, nil, waitReasonSelect, traceEvGoBlockSelect, 1)
gp.activeStackChans = false
sellock(scases, lockorder)
- gp.selectDone = 0
+ gp.selectDone.Store(0)
sg = (*sudog)(gp.param)
gp.param = nil
diff --git a/src/runtime/sema.go b/src/runtime/sema.go
index 39935f7..bc23a85 100644
--- a/src/runtime/sema.go
+++ b/src/runtime/sema.go
@@ -39,8 +39,8 @@
// BenchmarkSemTable/OneAddrCollision/* for a benchmark that exercises this.
type semaRoot struct {
lock mutex
- treap *sudog // root of balanced tree of unique waiters.
- nwait uint32 // Number of waiters. Read w/o the lock.
+ treap *sudog // root of balanced tree of unique waiters.
+ nwait atomic.Uint32 // Number of waiters. Read w/o the lock.
}
var semtable semTable
@@ -59,12 +59,12 @@
//go:linkname sync_runtime_Semacquire sync.runtime_Semacquire
func sync_runtime_Semacquire(addr *uint32) {
- semacquire1(addr, false, semaBlockProfile, 0)
+ semacquire1(addr, false, semaBlockProfile, 0, waitReasonSemacquire)
}
//go:linkname poll_runtime_Semacquire internal/poll.runtime_Semacquire
func poll_runtime_Semacquire(addr *uint32) {
- semacquire1(addr, false, semaBlockProfile, 0)
+ semacquire1(addr, false, semaBlockProfile, 0, waitReasonSemacquire)
}
//go:linkname sync_runtime_Semrelease sync.runtime_Semrelease
@@ -74,7 +74,17 @@
//go:linkname sync_runtime_SemacquireMutex sync.runtime_SemacquireMutex
func sync_runtime_SemacquireMutex(addr *uint32, lifo bool, skipframes int) {
- semacquire1(addr, lifo, semaBlockProfile|semaMutexProfile, skipframes)
+ semacquire1(addr, lifo, semaBlockProfile|semaMutexProfile, skipframes, waitReasonSyncMutexLock)
+}
+
+//go:linkname sync_runtime_SemacquireRWMutexR sync.runtime_SemacquireRWMutexR
+func sync_runtime_SemacquireRWMutexR(addr *uint32, lifo bool, skipframes int) {
+ semacquire1(addr, lifo, semaBlockProfile|semaMutexProfile, skipframes, waitReasonSyncRWMutexRLock)
+}
+
+//go:linkname sync_runtime_SemacquireRWMutex sync.runtime_SemacquireRWMutex
+func sync_runtime_SemacquireRWMutex(addr *uint32, lifo bool, skipframes int) {
+ semacquire1(addr, lifo, semaBlockProfile|semaMutexProfile, skipframes, waitReasonSyncRWMutexLock)
}
//go:linkname poll_runtime_Semrelease internal/poll.runtime_Semrelease
@@ -98,10 +108,10 @@
// Called from runtime.
func semacquire(addr *uint32) {
- semacquire1(addr, false, 0, 0)
+ semacquire1(addr, false, 0, 0, waitReasonSemacquire)
}
-func semacquire1(addr *uint32, lifo bool, profile semaProfileFlags, skipframes int) {
+func semacquire1(addr *uint32, lifo bool, profile semaProfileFlags, skipframes int, reason waitReason) {
gp := getg()
if gp != gp.m.curg {
throw("semacquire not on the G stack")
@@ -137,17 +147,17 @@
for {
lockWithRank(&root.lock, lockRankRoot)
// Add ourselves to nwait to disable "easy case" in semrelease.
- atomic.Xadd(&root.nwait, 1)
+ root.nwait.Add(1)
// Check cansemacquire to avoid missed wakeup.
if cansemacquire(addr) {
- atomic.Xadd(&root.nwait, -1)
+ root.nwait.Add(-1)
unlock(&root.lock)
break
}
// Any semrelease after the cansemacquire knows we're waiting
// (we set nwait above), so go to sleep.
root.queue(addr, s, lifo)
- goparkunlock(&root.lock, waitReasonSemacquire, traceEvGoBlockSync, 4+skipframes)
+ goparkunlock(&root.lock, reason, traceEvGoBlockSync, 4+skipframes)
if s.ticket != 0 || cansemacquire(addr) {
break
}
@@ -169,13 +179,13 @@
// Easy case: no waiters?
// This check must happen after the xadd, to avoid a missed wakeup
// (see loop in semacquire).
- if atomic.Load(&root.nwait) == 0 {
+ if root.nwait.Load() == 0 {
return
}
// Harder case: search for a waiter and wake it.
lockWithRank(&root.lock, lockRankRoot)
- if atomic.Load(&root.nwait) == 0 {
+ if root.nwait.Load() == 0 {
// The count is already consumed by another goroutine,
// so no need to wake up another goroutine.
unlock(&root.lock)
@@ -183,7 +193,7 @@
}
s, t0 := root.dequeue(addr)
if s != nil {
- atomic.Xadd(&root.nwait, -1)
+ root.nwait.Add(-1)
}
unlock(&root.lock)
if s != nil { // May be slow or even yield, so unlock first
@@ -451,7 +461,7 @@
type notifyList struct {
// wait is the ticket number of the next waiter. It is atomically
// incremented outside the lock.
- wait uint32
+ wait atomic.Uint32
// notify is the ticket number of the next waiter to be notified. It can
// be read outside the lock, but is only written to with lock held.
@@ -482,7 +492,7 @@
func notifyListAdd(l *notifyList) uint32 {
// This may be called concurrently, for example, when called from
// sync.Cond.Wait while holding a RWMutex in read mode.
- return atomic.Xadd(&l.wait, 1) - 1
+ return l.wait.Add(1) - 1
}
// notifyListWait waits for a notification. If one has been sent since
@@ -527,7 +537,7 @@
func notifyListNotifyAll(l *notifyList) {
// Fast-path: if there are no new waiters since the last notification
// we don't need to acquire the lock.
- if atomic.Load(&l.wait) == atomic.Load(&l.notify) {
+ if l.wait.Load() == atomic.Load(&l.notify) {
return
}
@@ -542,7 +552,7 @@
// value of wait because any previous waiters are already in the list
// or will notice that they have already been notified when trying to
// add themselves to the list.
- atomic.Store(&l.notify, atomic.Load(&l.wait))
+ atomic.Store(&l.notify, l.wait.Load())
unlock(&l.lock)
// Go through the local list and ready all waiters.
@@ -560,7 +570,7 @@
func notifyListNotifyOne(l *notifyList) {
// Fast-path: if there are no new waiters since the last notification
// we don't need to acquire the lock at all.
- if atomic.Load(&l.wait) == atomic.Load(&l.notify) {
+ if l.wait.Load() == atomic.Load(&l.notify) {
return
}
@@ -568,7 +578,7 @@
// Re-check under the lock if we need to do anything.
t := l.notify
- if t == atomic.Load(&l.wait) {
+ if t == l.wait.Load() {
unlock(&l.lock)
return
}
diff --git a/src/runtime/semasleep_test.go b/src/runtime/semasleep_test.go
index d56733c..7262853 100644
--- a/src/runtime/semasleep_test.go
+++ b/src/runtime/semasleep_test.go
@@ -37,14 +37,16 @@
if err := cmd.Start(); err != nil {
t.Fatalf("Failed to start command: %v", err)
}
+
+ waiting := false
doneCh := make(chan error, 1)
- go func() {
- doneCh <- cmd.Wait()
- close(doneCh)
- }()
t.Cleanup(func() {
cmd.Process.Kill()
- <-doneCh
+ if waiting {
+ <-doneCh
+ } else {
+ cmd.Wait()
+ }
})
// Wait for After1 to close its stdout so that we know the runtime's SIGIO
@@ -57,6 +59,19 @@
t.Fatalf("error reading from testprog: %v", err)
}
+ // Wait for child exit.
+ //
+ // Note that we must do this after waiting for the write/child end of
+ // stdout to close. Wait closes the read/parent end of stdout, so
+ // starting this goroutine prior to io.ReadAll introduces a race
+ // condition where ReadAll may get fs.ErrClosed if the child exits too
+ // quickly.
+ waiting = true
+ go func() {
+ doneCh <- cmd.Wait()
+ close(doneCh)
+ }()
+
// Wait for an arbitrary timeout longer than one second. The subprocess itself
// attempts to sleep for one second, but if the machine running the test is
// heavily loaded that subprocess may not schedule very quickly even if the
diff --git a/src/runtime/signal_darwin_amd64.go b/src/runtime/signal_darwin_amd64.go
index abc212a..20544d8 100644
--- a/src/runtime/signal_darwin_amd64.go
+++ b/src/runtime/signal_darwin_amd64.go
@@ -84,6 +84,10 @@
// in real life, people will probably search for it and find this code.
// There are no Google hits for b01dfacedebac1e or 0xb01dfacedebac1e
// as I type this comment.
+ //
+ // Note: if this code is removed, please consider
+ // enabling TestSignalForwardingGo for darwin-amd64 in
+ // misc/cgo/testcarchive/carchive_test.go.
if c.sigcode() == _SI_USER {
c.set_sigcode(_SI_USER + 1)
c.set_sigaddr(0xb01dfacedebac1e)
diff --git a/src/runtime/signal_freebsd_riscv64.go b/src/runtime/signal_freebsd_riscv64.go
new file mode 100644
index 0000000..fbf6c63
--- /dev/null
+++ b/src/runtime/signal_freebsd_riscv64.go
@@ -0,0 +1,63 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+type sigctxt struct {
+ info *siginfo
+ ctxt unsafe.Pointer
+}
+
+//go:nosplit
+//go:nowritebarrierrec
+func (c *sigctxt) regs() *mcontext { return &(*ucontext)(c.ctxt).uc_mcontext }
+
+func (c *sigctxt) ra() uint64 { return c.regs().mc_gpregs.gp_ra }
+func (c *sigctxt) sp() uint64 { return c.regs().mc_gpregs.gp_sp }
+func (c *sigctxt) gp() uint64 { return c.regs().mc_gpregs.gp_gp }
+func (c *sigctxt) tp() uint64 { return c.regs().mc_gpregs.gp_tp }
+func (c *sigctxt) t0() uint64 { return c.regs().mc_gpregs.gp_t[0] }
+func (c *sigctxt) t1() uint64 { return c.regs().mc_gpregs.gp_t[1] }
+func (c *sigctxt) t2() uint64 { return c.regs().mc_gpregs.gp_t[2] }
+func (c *sigctxt) s0() uint64 { return c.regs().mc_gpregs.gp_s[0] }
+func (c *sigctxt) s1() uint64 { return c.regs().mc_gpregs.gp_s[1] }
+func (c *sigctxt) a0() uint64 { return c.regs().mc_gpregs.gp_a[0] }
+func (c *sigctxt) a1() uint64 { return c.regs().mc_gpregs.gp_a[1] }
+func (c *sigctxt) a2() uint64 { return c.regs().mc_gpregs.gp_a[2] }
+func (c *sigctxt) a3() uint64 { return c.regs().mc_gpregs.gp_a[3] }
+func (c *sigctxt) a4() uint64 { return c.regs().mc_gpregs.gp_a[4] }
+func (c *sigctxt) a5() uint64 { return c.regs().mc_gpregs.gp_a[5] }
+func (c *sigctxt) a6() uint64 { return c.regs().mc_gpregs.gp_a[6] }
+func (c *sigctxt) a7() uint64 { return c.regs().mc_gpregs.gp_a[7] }
+func (c *sigctxt) s2() uint64 { return c.regs().mc_gpregs.gp_s[2] }
+func (c *sigctxt) s3() uint64 { return c.regs().mc_gpregs.gp_s[3] }
+func (c *sigctxt) s4() uint64 { return c.regs().mc_gpregs.gp_s[4] }
+func (c *sigctxt) s5() uint64 { return c.regs().mc_gpregs.gp_s[5] }
+func (c *sigctxt) s6() uint64 { return c.regs().mc_gpregs.gp_s[6] }
+func (c *sigctxt) s7() uint64 { return c.regs().mc_gpregs.gp_s[7] }
+func (c *sigctxt) s8() uint64 { return c.regs().mc_gpregs.gp_s[8] }
+func (c *sigctxt) s9() uint64 { return c.regs().mc_gpregs.gp_s[9] }
+func (c *sigctxt) s10() uint64 { return c.regs().mc_gpregs.gp_s[10] }
+func (c *sigctxt) s11() uint64 { return c.regs().mc_gpregs.gp_s[11] }
+func (c *sigctxt) t3() uint64 { return c.regs().mc_gpregs.gp_t[3] }
+func (c *sigctxt) t4() uint64 { return c.regs().mc_gpregs.gp_t[4] }
+func (c *sigctxt) t5() uint64 { return c.regs().mc_gpregs.gp_t[5] }
+func (c *sigctxt) t6() uint64 { return c.regs().mc_gpregs.gp_t[6] }
+
+//go:nosplit
+//go:nowritebarrierrec
+func (c *sigctxt) pc() uint64 { return c.regs().mc_gpregs.gp_sepc }
+
+func (c *sigctxt) sigcode() uint64 { return uint64(c.info.si_code) }
+func (c *sigctxt) sigaddr() uint64 { return c.info.si_addr }
+
+func (c *sigctxt) set_pc(x uint64) { c.regs().mc_gpregs.gp_sepc = x }
+func (c *sigctxt) set_ra(x uint64) { c.regs().mc_gpregs.gp_ra = x }
+func (c *sigctxt) set_sp(x uint64) { c.regs().mc_gpregs.gp_sp = x }
+func (c *sigctxt) set_gp(x uint64) { c.regs().mc_gpregs.gp_gp = x }
+
+func (c *sigctxt) set_sigcode(x uint64) { c.info.si_code = int32(x) }
+func (c *sigctxt) set_sigaddr(x uint64) { c.info.si_addr = x }
diff --git a/src/runtime/signal_riscv64.go b/src/runtime/signal_riscv64.go
index 5eeb227..b8d7b97 100644
--- a/src/runtime/signal_riscv64.go
+++ b/src/runtime/signal_riscv64.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build linux && riscv64
+//go:build (linux || freebsd) && riscv64
package runtime
diff --git a/src/runtime/signal_unix.go b/src/runtime/signal_unix.go
index 0be499b..c401fc1 100644
--- a/src/runtime/signal_unix.go
+++ b/src/runtime/signal_unix.go
@@ -349,11 +349,11 @@
}
// Acknowledge the preemption.
- atomic.Xadd(&gp.m.preemptGen, 1)
- atomic.Store(&gp.m.signalPending, 0)
+ gp.m.preemptGen.Add(1)
+ gp.m.signalPending.Store(0)
if GOOS == "darwin" || GOOS == "ios" {
- atomic.Xadd(&pendingPreemptSignals, -1)
+ pendingPreemptSignals.Add(-1)
}
}
@@ -372,9 +372,9 @@
execLock.rlock()
}
- if atomic.Cas(&mp.signalPending, 0, 1) {
+ if mp.signalPending.CompareAndSwap(0, 1) {
if GOOS == "darwin" || GOOS == "ios" {
- atomic.Xadd(&pendingPreemptSignals, 1)
+ pendingPreemptSignals.Add(1)
}
// If multiple threads are preempting the same M, it may send many
@@ -433,9 +433,9 @@
return
}
c := &sigctxt{info, ctx}
- g := sigFetchG(c)
- setg(g)
- if g == nil {
+ gp := sigFetchG(c)
+ setg(gp)
+ if gp == nil {
if sig == _SIGPROF {
// Some platforms (Linux) have per-thread timers, which we use in
// combination with the process-wide timer. Avoid double-counting.
@@ -453,7 +453,7 @@
// The default behavior for sigPreempt is to ignore
// the signal, so badsignal will be a no-op anyway.
if GOOS == "darwin" || GOOS == "ios" {
- atomic.Xadd(&pendingPreemptSignals, -1)
+ pendingPreemptSignals.Add(-1)
}
return
}
@@ -462,22 +462,22 @@
return
}
- setg(g.m.gsignal)
+ setg(gp.m.gsignal)
// If some non-Go code called sigaltstack, adjust.
var gsignalStack gsignalStack
- setStack := adjustSignalStack(sig, g.m, &gsignalStack)
+ setStack := adjustSignalStack(sig, gp.m, &gsignalStack)
if setStack {
- g.m.gsignal.stktopsp = getcallersp()
+ gp.m.gsignal.stktopsp = getcallersp()
}
- if g.stackguard0 == stackFork {
+ if gp.stackguard0 == stackFork {
signalDuringFork(sig)
}
c.fixsigcode(sig)
- sighandler(sig, info, ctx, g)
- setg(g)
+ sighandler(sig, info, ctx, gp)
+ setg(gp)
if setStack {
restoreGsignalStack(&gsignalStack)
}
@@ -502,7 +502,7 @@
//go:nosplit
//go:nowritebarrierrec
func sigprofNonGo(sig uint32, info *siginfo, ctx unsafe.Pointer) {
- if prof.hz != 0 {
+ if prof.hz.Load() != 0 {
c := &sigctxt{info, ctx}
// Some platforms (Linux) have per-thread timers, which we use in
// combination with the process-wide timer. Avoid double-counting.
@@ -525,7 +525,7 @@
//go:nosplit
//go:nowritebarrierrec
func sigprofNonGoPC(pc uintptr) {
- if prof.hz != 0 {
+ if prof.hz.Load() != 0 {
stk := []uintptr{
pc,
abi.FuncPCABIInternal(_ExternalCode) + sys.PCQuantum,
@@ -596,7 +596,7 @@
// sighandler is invoked when a signal occurs. The global g will be
// set to a gsignal goroutine and we will be running on the alternate
-// signal stack. The parameter g will be the value of the global g
+// signal stack. The parameter gp will be the value of the global g
// when the signal occurred. The sig, info, and ctxt parameters are
// from the system signal handler: they are the parameters passed when
// the SA is passed to the sigaction system call.
@@ -606,9 +606,11 @@
//
//go:nowritebarrierrec
func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
- _g_ := getg()
+ // The g executing the signal handler. This is almost always
+ // mp.gsignal. See delayedSignal for an exception.
+ gsignal := getg()
+ mp := gsignal.m
c := &sigctxt{info, ctxt}
- mp := _g_.m
// Cgo TSAN (not the Go race detector) intercepts signals and calls the
// signal handler at a later time. When the signal handler is called, the
@@ -620,7 +622,7 @@
// signal delivery. We use that as an indicator of delayed signals.
// For delayed signals, the handler is called on the g0 stack (see
// adjustSignalStack).
- delayedSignal := *cgo_yield != nil && mp != nil && _g_.stack == mp.g0.stack
+ delayedSignal := *cgo_yield != nil && mp != nil && gsignal.stack == mp.g0.stack
if sig == _SIGPROF {
// Some platforms (Linux) have per-thread timers, which we use in
@@ -660,7 +662,7 @@
if sig < uint32(len(sigtable)) {
flags = sigtable[sig].flags
}
- if c.sigcode() != _SI_USER && flags&_SigPanic != 0 && gp.throwsplit {
+ if !c.sigFromUser() && flags&_SigPanic != 0 && gp.throwsplit {
// We can't safely sigpanic because it may grow the
// stack. Abort in the signal handler instead.
flags = _SigThrow
@@ -670,7 +672,7 @@
// causes a memory fault. Don't turn that into a panic.
flags = _SigThrow
}
- if c.sigcode() != _SI_USER && flags&_SigPanic != 0 {
+ if !c.sigFromUser() && flags&_SigPanic != 0 {
// The signal is going to cause a panic.
// Arrange the stack so that it looks like the point
// where the signal occurred made a call to the
@@ -688,13 +690,13 @@
return
}
- if c.sigcode() == _SI_USER || flags&_SigNotify != 0 {
+ if c.sigFromUser() || flags&_SigNotify != 0 {
if sigsend(sig) {
return
}
}
- if c.sigcode() == _SI_USER && signal_ignored(sig) {
+ if c.sigFromUser() && signal_ignored(sig) {
return
}
@@ -704,14 +706,14 @@
// _SigThrow means that we should exit now.
// If we get here with _SigPanic, it means that the signal
- // was sent to us by a program (c.sigcode() == _SI_USER);
+ // was sent to us by a program (c.sigFromUser() is true);
// in that case, if we didn't handle it in sigsend, we exit now.
if flags&(_SigThrow|_SigPanic) == 0 {
return
}
- _g_.m.throwing = throwTypeRuntime
- _g_.m.caughtsig.set(gp)
+ mp.throwing = throwTypeRuntime
+ mp.caughtsig.set(gp)
if crashing == 0 {
startpanic_m()
@@ -723,12 +725,12 @@
print("Signal ", sig, "\n")
}
- print("PC=", hex(c.sigpc()), " m=", _g_.m.id, " sigcode=", c.sigcode(), "\n")
- if _g_.m.incgo && gp == _g_.m.g0 && _g_.m.curg != nil {
+ print("PC=", hex(c.sigpc()), " m=", mp.id, " sigcode=", c.sigcode(), "\n")
+ if mp.incgo && gp == mp.g0 && mp.curg != nil {
print("signal arrived during cgo execution\n")
// Switch to curg so that we get a traceback of the Go code
// leading up to the cgocall, which switched from curg to g0.
- gp = _g_.m.curg
+ gp = mp.curg
}
if sig == _SIGILL || sig == _SIGFPE {
// It would be nice to know how long the instruction is.
@@ -760,10 +762,10 @@
if level > 0 {
goroutineheader(gp)
tracebacktrap(c.sigpc(), c.sigsp(), c.siglr(), gp)
- if crashing > 0 && gp != _g_.m.curg && _g_.m.curg != nil && readgstatus(_g_.m.curg)&^_Gscan == _Grunning {
+ if crashing > 0 && gp != mp.curg && mp.curg != nil && readgstatus(mp.curg)&^_Gscan == _Grunning {
// tracebackothers on original m skipped this one; trace it now.
- goroutineheader(_g_.m.curg)
- traceback(^uintptr(0), ^uintptr(0), 0, _g_.m.curg)
+ goroutineheader(mp.curg)
+ traceback(^uintptr(0), ^uintptr(0), 0, mp.curg)
} else if crashing == 0 {
tracebackothers(gp)
print("\n")
@@ -814,34 +816,41 @@
//
//go:linkname sigpanic
func sigpanic() {
- g := getg()
- if !canpanic(g) {
+ gp := getg()
+ if !canpanic() {
throw("unexpected signal during runtime execution")
}
- switch g.sig {
+ switch gp.sig {
case _SIGBUS:
- if g.sigcode0 == _BUS_ADRERR && g.sigcode1 < 0x1000 {
+ if gp.sigcode0 == _BUS_ADRERR && gp.sigcode1 < 0x1000 {
panicmem()
}
// Support runtime/debug.SetPanicOnFault.
- if g.paniconfault {
- panicmemAddr(g.sigcode1)
+ if gp.paniconfault {
+ panicmemAddr(gp.sigcode1)
}
- print("unexpected fault address ", hex(g.sigcode1), "\n")
+ print("unexpected fault address ", hex(gp.sigcode1), "\n")
throw("fault")
case _SIGSEGV:
- if (g.sigcode0 == 0 || g.sigcode0 == _SEGV_MAPERR || g.sigcode0 == _SEGV_ACCERR) && g.sigcode1 < 0x1000 {
+ if (gp.sigcode0 == 0 || gp.sigcode0 == _SEGV_MAPERR || gp.sigcode0 == _SEGV_ACCERR) && gp.sigcode1 < 0x1000 {
panicmem()
}
// Support runtime/debug.SetPanicOnFault.
- if g.paniconfault {
- panicmemAddr(g.sigcode1)
+ if gp.paniconfault {
+ panicmemAddr(gp.sigcode1)
}
- print("unexpected fault address ", hex(g.sigcode1), "\n")
+ if inUserArenaChunk(gp.sigcode1) {
+ // We could check that the arena chunk is explicitly set to fault,
+ // but the fact that we faulted on accessing it is enough to prove
+ // that it is.
+ print("accessed data from freed user arena ", hex(gp.sigcode1), "\n")
+ } else {
+ print("unexpected fault address ", hex(gp.sigcode1), "\n")
+ }
throw("fault")
case _SIGFPE:
- switch g.sigcode0 {
+ switch gp.sigcode0 {
case _FPE_INTDIV:
panicdivide()
case _FPE_INTOVF:
@@ -850,11 +859,11 @@
panicfloat()
}
- if g.sig >= uint32(len(sigtable)) {
- // can't happen: we looked up g.sig in sigtable to decide to call sigpanic
+ if gp.sig >= uint32(len(sigtable)) {
+ // can't happen: we looked up gp.sig in sigtable to decide to call sigpanic
throw("unexpected signal value")
}
- panic(errorString(sigtable[g.sig].name))
+ panic(errorString(sigtable[gp.sig].name))
}
// dieFromSignal kills the program with a signal.
@@ -927,7 +936,7 @@
//
// On FreeBSD, the libthr sigaction code prevents
// this from working so we fall through to raise.
- if GOOS != "freebsd" && (isarchive || islibrary) && handler == _SIG_DFL && c.sigcode() != _SI_USER {
+ if GOOS != "freebsd" && (isarchive || islibrary) && handler == _SIG_DFL && !c.sigFromUser() {
return
}
@@ -1030,8 +1039,6 @@
throw("signal received during fork")
}
-var badginsignalMsg = "fatal: bad g in signal handler\n"
-
// This runs on a foreign stack, without an m or a g. No stack split.
//
//go:nosplit
@@ -1042,8 +1049,7 @@
// There is no extra M. needm will not be able to grab
// an M. Instead of hanging, just crash.
// Cannot call split-stack function as there is no G.
- s := stringStructOf(&badginsignalMsg)
- write(2, s.str, int32(s.len))
+ writeErrStr("fatal: bad g in signal handler\n")
exit(2)
*(*uintptr)(unsafe.Pointer(uintptr(123))) = 2
}
@@ -1108,15 +1114,15 @@
// Unfortunately, user generated SIGPIPEs will also be forwarded, because si_code
// is set to _SI_USER even for a SIGPIPE raised from a write to a closed socket
// or pipe.
- if (c.sigcode() == _SI_USER || flags&_SigPanic == 0) && sig != _SIGPIPE {
+ if (c.sigFromUser() || flags&_SigPanic == 0) && sig != _SIGPIPE {
return false
}
// Determine if the signal occurred inside Go code. We test that:
// (1) we weren't in VDSO page,
// (2) we were in a goroutine (i.e., m.curg != nil), and
// (3) we weren't in CGO.
- g := sigFetchG(c)
- if g != nil && g.m != nil && g.m.curg != nil && !g.m.incgo {
+ gp := sigFetchG(c)
+ if gp != nil && gp.m != nil && gp.m.curg != nil && !gp.m.incgo {
return false
}
@@ -1207,15 +1213,15 @@
// of whether it is already set). Record which choice was made in
// newSigstack, so that it can be undone in unminit.
func minitSignalStack() {
- _g_ := getg()
+ mp := getg().m
var st stackt
sigaltstack(nil, &st)
if st.ss_flags&_SS_DISABLE != 0 || !iscgo {
- signalstack(&_g_.m.gsignal.stack)
- _g_.m.newSigstack = true
+ signalstack(&mp.gsignal.stack)
+ mp.newSigstack = true
} else {
- setGsignalStack(&st, &_g_.m.goSigStack)
- _g_.m.newSigstack = false
+ setGsignalStack(&st, &mp.goSigStack)
+ mp.newSigstack = false
}
}
@@ -1297,18 +1303,18 @@
//go:nosplit
//go:nowritebarrierrec
func setGsignalStack(st *stackt, old *gsignalStack) {
- g := getg()
+ gp := getg()
if old != nil {
- old.stack = g.m.gsignal.stack
- old.stackguard0 = g.m.gsignal.stackguard0
- old.stackguard1 = g.m.gsignal.stackguard1
- old.stktopsp = g.m.gsignal.stktopsp
+ old.stack = gp.m.gsignal.stack
+ old.stackguard0 = gp.m.gsignal.stackguard0
+ old.stackguard1 = gp.m.gsignal.stackguard1
+ old.stktopsp = gp.m.gsignal.stktopsp
}
stsp := uintptr(unsafe.Pointer(st.ss_sp))
- g.m.gsignal.stack.lo = stsp
- g.m.gsignal.stack.hi = stsp + st.ss_size
- g.m.gsignal.stackguard0 = stsp + _StackGuard
- g.m.gsignal.stackguard1 = stsp + _StackGuard
+ gp.m.gsignal.stack.lo = stsp
+ gp.m.gsignal.stack.hi = stsp + st.ss_size
+ gp.m.gsignal.stackguard0 = stsp + _StackGuard
+ gp.m.gsignal.stackguard1 = stsp + _StackGuard
}
// restoreGsignalStack restores the gsignal stack to the value it had
@@ -1340,9 +1346,9 @@
//go:nosplit
//go:linkname setsigsegv
func setsigsegv(pc uintptr) {
- g := getg()
- g.sig = _SIGSEGV
- g.sigpc = pc
- g.sigcode0 = _SEGV_MAPERR
- g.sigcode1 = 0 // TODO: emulate si_addr
+ gp := getg()
+ gp.sig = _SIGSEGV
+ gp.sigpc = pc
+ gp.sigcode0 = _SEGV_MAPERR
+ gp.sigcode1 = 0 // TODO: emulate si_addr
}
diff --git a/src/runtime/signal_windows.go b/src/runtime/signal_windows.go
index c5cf38c..37986cd 100644
--- a/src/runtime/signal_windows.go
+++ b/src/runtime/signal_windows.go
@@ -199,35 +199,37 @@
return 0 // not reached
}
+// Always called on g0. gp is the G where the exception occurred.
+//
//go:nosplit
func winthrow(info *exceptionrecord, r *context, gp *g) {
- _g_ := getg()
+ g0 := getg()
- if panicking != 0 { // traceback already printed
+ if panicking.Load() != 0 { // traceback already printed
exit(2)
}
- panicking = 1
+ panicking.Store(1)
// In case we're handling a g0 stack overflow, blow away the
// g0 stack bounds so we have room to print the traceback. If
// this somehow overflows the stack, the OS will trap it.
- _g_.stack.lo = 0
- _g_.stackguard0 = _g_.stack.lo + _StackGuard
- _g_.stackguard1 = _g_.stackguard0
+ g0.stack.lo = 0
+ g0.stackguard0 = g0.stack.lo + _StackGuard
+ g0.stackguard1 = g0.stackguard0
print("Exception ", hex(info.exceptioncode), " ", hex(info.exceptioninformation[0]), " ", hex(info.exceptioninformation[1]), " ", hex(r.ip()), "\n")
print("PC=", hex(r.ip()), "\n")
- if _g_.m.incgo && gp == _g_.m.g0 && _g_.m.curg != nil {
+ if g0.m.incgo && gp == g0.m.g0 && g0.m.curg != nil {
if iscgo {
print("signal arrived during external code execution\n")
}
- gp = _g_.m.curg
+ gp = g0.m.curg
}
print("\n")
- _g_.m.throwing = throwTypeRuntime
- _g_.m.caughtsig.set(gp)
+ g0.m.throwing = throwTypeRuntime
+ g0.m.caughtsig.set(gp)
level, _, docrash := gotraceback()
if level > 0 {
@@ -244,20 +246,27 @@
}
func sigpanic() {
- g := getg()
- if !canpanic(g) {
+ gp := getg()
+ if !canpanic() {
throw("unexpected signal during runtime execution")
}
- switch g.sig {
+ switch gp.sig {
case _EXCEPTION_ACCESS_VIOLATION:
- if g.sigcode1 < 0x1000 {
+ if gp.sigcode1 < 0x1000 {
panicmem()
}
- if g.paniconfault {
- panicmemAddr(g.sigcode1)
+ if gp.paniconfault {
+ panicmemAddr(gp.sigcode1)
}
- print("unexpected fault address ", hex(g.sigcode1), "\n")
+ if inUserArenaChunk(gp.sigcode1) {
+ // We could check that the arena chunk is explicitly set to fault,
+ // but the fact that we faulted on accessing it is enough to prove
+ // that it is.
+ print("accessed data from freed user arena ", hex(gp.sigcode1), "\n")
+ } else {
+ print("unexpected fault address ", hex(gp.sigcode1), "\n")
+ }
throw("fault")
case _EXCEPTION_INT_DIVIDE_BY_ZERO:
panicdivide()
diff --git a/src/runtime/signal_windows_test.go b/src/runtime/signal_windows_test.go
index add23cd..c9b8e90 100644
--- a/src/runtime/signal_windows_test.go
+++ b/src/runtime/signal_windows_test.go
@@ -1,4 +1,6 @@
-//go:build windows
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
package runtime_test
@@ -15,6 +17,64 @@
"testing"
)
+func TestVectoredHandlerExceptionInNonGoThread(t *testing.T) {
+ if *flagQuick {
+ t.Skip("-quick")
+ }
+ if strings.HasPrefix(testenv.Builder(), "windows-amd64-2012") {
+ testenv.SkipFlaky(t, 49681)
+ }
+ testenv.MustHaveGoBuild(t)
+ testenv.MustHaveCGO(t)
+ testenv.MustHaveExecPath(t, "gcc")
+ testprog.Lock()
+ defer testprog.Unlock()
+ dir := t.TempDir()
+
+ // build c program
+ dll := filepath.Join(dir, "veh.dll")
+ cmd := exec.Command("gcc", "-shared", "-o", dll, "testdata/testwinlibthrow/veh.c")
+ out, err := testenv.CleanCmdEnv(cmd).CombinedOutput()
+ if err != nil {
+ t.Fatalf("failed to build c exe: %s\n%s", err, out)
+ }
+
+ // build go exe
+ exe := filepath.Join(dir, "test.exe")
+ cmd = exec.Command(testenv.GoToolPath(t), "build", "-o", exe, "testdata/testwinlibthrow/main.go")
+ out, err = testenv.CleanCmdEnv(cmd).CombinedOutput()
+ if err != nil {
+ t.Fatalf("failed to build go library: %s\n%s", err, out)
+ }
+
+ // run test program in same thread
+ cmd = exec.Command(exe)
+ out, err = testenv.CleanCmdEnv(cmd).CombinedOutput()
+ if err == nil {
+ t.Fatal("error expected")
+ }
+ if _, ok := err.(*exec.ExitError); ok && len(out) > 0 {
+ if !bytes.Contains(out, []byte("Exception 0x2a")) {
+ t.Fatalf("unexpected failure while running executable: %s\n%s", err, out)
+ }
+ } else {
+ t.Fatalf("unexpected error while running executable: %s\n%s", err, out)
+ }
+ // run test program in a new thread
+ cmd = exec.Command(exe, "thread")
+ out, err = testenv.CleanCmdEnv(cmd).CombinedOutput()
+ if err == nil {
+ t.Fatal("error expected")
+ }
+ if err, ok := err.(*exec.ExitError); ok {
+ if err.ExitCode() != 42 {
+ t.Fatalf("unexpected failure while running executable: %s\n%s", err, out)
+ }
+ } else {
+ t.Fatalf("unexpected error while running executable: %s\n%s", err, out)
+ }
+}
+
func TestVectoredHandlerDontCrashOnLibrary(t *testing.T) {
if *flagQuick {
t.Skip("-quick")
@@ -91,8 +151,8 @@
// run test program
cmd = exec.Command(exe)
- var stdout bytes.Buffer
- var stderr bytes.Buffer
+ var stdout strings.Builder
+ var stderr strings.Builder
cmd.Stdout = &stdout
cmd.Stderr = &stderr
inPipe, err := cmd.StdinPipe()
diff --git a/src/runtime/sigqueue.go b/src/runtime/sigqueue.go
index 49502cb..51e424d 100644
--- a/src/runtime/sigqueue.go
+++ b/src/runtime/sigqueue.go
@@ -54,8 +54,8 @@
wanted [(_NSIG + 31) / 32]uint32
ignored [(_NSIG + 31) / 32]uint32
recv [(_NSIG + 31) / 32]uint32
- state uint32
- delivering uint32
+ state atomic.Uint32
+ delivering atomic.Uint32
inuse bool
}
@@ -74,11 +74,11 @@
return false
}
- atomic.Xadd(&sig.delivering, 1)
+ sig.delivering.Add(1)
// We are running in the signal handler; defer is not available.
if w := atomic.Load(&sig.wanted[s/32]); w&bit == 0 {
- atomic.Xadd(&sig.delivering, -1)
+ sig.delivering.Add(-1)
return false
}
@@ -86,7 +86,7 @@
for {
mask := sig.mask[s/32]
if mask&bit != 0 {
- atomic.Xadd(&sig.delivering, -1)
+ sig.delivering.Add(-1)
return true // signal already in queue
}
if atomic.Cas(&sig.mask[s/32], mask, mask|bit) {
@@ -97,18 +97,18 @@
// Notify receiver that queue has new bit.
Send:
for {
- switch atomic.Load(&sig.state) {
+ switch sig.state.Load() {
default:
throw("sigsend: inconsistent state")
case sigIdle:
- if atomic.Cas(&sig.state, sigIdle, sigSending) {
+ if sig.state.CompareAndSwap(sigIdle, sigSending) {
break Send
}
case sigSending:
// notification already pending
break Send
case sigReceiving:
- if atomic.Cas(&sig.state, sigReceiving, sigIdle) {
+ if sig.state.CompareAndSwap(sigReceiving, sigIdle) {
if GOOS == "darwin" || GOOS == "ios" {
sigNoteWakeup(&sig.note)
break Send
@@ -119,7 +119,7 @@
}
}
- atomic.Xadd(&sig.delivering, -1)
+ sig.delivering.Add(-1)
return true
}
@@ -140,11 +140,11 @@
// Wait for updates to be available from signal sender.
Receive:
for {
- switch atomic.Load(&sig.state) {
+ switch sig.state.Load() {
default:
throw("signal_recv: inconsistent state")
case sigIdle:
- if atomic.Cas(&sig.state, sigIdle, sigReceiving) {
+ if sig.state.CompareAndSwap(sigIdle, sigReceiving) {
if GOOS == "darwin" || GOOS == "ios" {
sigNoteSleep(&sig.note)
break Receive
@@ -154,7 +154,7 @@
break Receive
}
case sigSending:
- if atomic.Cas(&sig.state, sigSending, sigIdle) {
+ if sig.state.CompareAndSwap(sigSending, sigIdle) {
break Receive
}
}
@@ -182,14 +182,14 @@
// a signal, has read from sig.wanted, is now updating sig.mask,
// and has not yet woken up the processor thread. We need to wait
// until all current signal deliveries have completed.
- for atomic.Load(&sig.delivering) != 0 {
+ for sig.delivering.Load() != 0 {
Gosched()
}
// Although WaitUntilIdle seems like the right name for this
// function, the state we are looking for is sigReceiving, not
// sigIdle. The sigIdle state is really more like sigProcessing.
- for atomic.Load(&sig.state) != sigReceiving {
+ for sig.state.Load() != sigReceiving {
Gosched()
}
}
diff --git a/src/runtime/slice.go b/src/runtime/slice.go
index 2413a46..459dc88 100644
--- a/src/runtime/slice.go
+++ b/src/runtime/slice.go
@@ -18,7 +18,7 @@
cap int
}
-// A notInHeapSlice is a slice backed by go:notinheap memory.
+// A notInHeapSlice is a slice backed by runtime/internal/sys.NotInHeap memory.
type notInHeapSlice struct {
array *notInHeap
len int
@@ -123,92 +123,72 @@
return math.MulUintptr(a, b)
}
-// Keep this code in sync with cmd/compile/internal/walk/builtin.go:walkUnsafeSlice
-func unsafeslice(et *_type, ptr unsafe.Pointer, len int) {
- if len < 0 {
- panicunsafeslicelen()
- }
-
- mem, overflow := math.MulUintptr(et.size, uintptr(len))
- if overflow || mem > -uintptr(ptr) {
- if ptr == nil {
- panicunsafeslicenilptr()
- }
- panicunsafeslicelen()
- }
-}
-
-// Keep this code in sync with cmd/compile/internal/walk/builtin.go:walkUnsafeSlice
-func unsafeslice64(et *_type, ptr unsafe.Pointer, len64 int64) {
- len := int(len64)
- if int64(len) != len64 {
- panicunsafeslicelen()
- }
- unsafeslice(et, ptr, len)
-}
-
-func unsafeslicecheckptr(et *_type, ptr unsafe.Pointer, len64 int64) {
- unsafeslice64(et, ptr, len64)
-
- // Check that underlying array doesn't straddle multiple heap objects.
- // unsafeslice64 has already checked for overflow.
- if checkptrStraddles(ptr, uintptr(len64)*et.size) {
- throw("checkptr: unsafe.Slice result straddles multiple allocations")
- }
-}
-
-func panicunsafeslicelen() {
- panic(errorString("unsafe.Slice: len out of range"))
-}
-
-func panicunsafeslicenilptr() {
- panic(errorString("unsafe.Slice: ptr is nil and len is not zero"))
-}
-
-// growslice handles slice growth during append.
-// It is passed the slice element type, the old slice, and the desired new minimum capacity,
-// and it returns a new slice with at least that capacity, with the old data
-// copied into it.
-// The new slice's length is set to the old slice's length,
-// NOT to the new requested capacity.
-// This is for codegen convenience. The old slice's length is used immediately
-// to calculate where to write new values during an append.
-// TODO: When the old backend is gone, reconsider this decision.
-// The SSA backend might prefer the new length or to return only ptr/cap and save stack space.
-func growslice(et *_type, old slice, cap int) slice {
+// growslice allocates new backing store for a slice.
+//
+// arguments:
+//
+// oldPtr = pointer to the slice's backing array
+// newLen = new length (= oldLen + num)
+// oldCap = original slice's capacity.
+// num = number of elements being added
+// et = element type
+//
+// return values:
+//
+// newPtr = pointer to the new backing store
+// newLen = same value as the argument
+// newCap = capacity of the new backing store
+//
+// Requires that uint(newLen) > uint(oldCap).
+// Assumes the original slice length is newLen - num
+//
+// A new backing store is allocated with space for at least newLen elements.
+// Existing entries [0, oldLen) are copied over to the new backing store.
+// Added entries [oldLen, newLen) are not initialized by growslice
+// (although for pointer-containing element types, they are zeroed). They
+// must be initialized by the caller.
+// Trailing entries [newLen, newCap) are zeroed.
+//
+// growslice's odd calling convention makes the generated code that calls
+// this function simpler. In particular, it accepts and returns the
+// new length so that the old length is not live (does not need to be
+// spilled/restored) and the new length is returned (also does not need
+// to be spilled/restored).
+func growslice(oldPtr unsafe.Pointer, newLen, oldCap, num int, et *_type) slice {
+ oldLen := newLen - num
if raceenabled {
callerpc := getcallerpc()
- racereadrangepc(old.array, uintptr(old.len*int(et.size)), callerpc, abi.FuncPCABIInternal(growslice))
+ racereadrangepc(oldPtr, uintptr(oldLen*int(et.size)), callerpc, abi.FuncPCABIInternal(growslice))
}
if msanenabled {
- msanread(old.array, uintptr(old.len*int(et.size)))
+ msanread(oldPtr, uintptr(oldLen*int(et.size)))
}
if asanenabled {
- asanread(old.array, uintptr(old.len*int(et.size)))
+ asanread(oldPtr, uintptr(oldLen*int(et.size)))
}
- if cap < old.cap {
- panic(errorString("growslice: cap out of range"))
+ if newLen < 0 {
+ panic(errorString("growslice: len out of range"))
}
if et.size == 0 {
// append should not create a slice with nil pointer but non-zero len.
- // We assume that append doesn't need to preserve old.array in this case.
- return slice{unsafe.Pointer(&zerobase), old.len, cap}
+ // We assume that append doesn't need to preserve oldPtr in this case.
+ return slice{unsafe.Pointer(&zerobase), newLen, newLen}
}
- newcap := old.cap
+ newcap := oldCap
doublecap := newcap + newcap
- if cap > doublecap {
- newcap = cap
+ if newLen > doublecap {
+ newcap = newLen
} else {
const threshold = 256
- if old.cap < threshold {
+ if oldCap < threshold {
newcap = doublecap
} else {
// Check 0 < newcap to detect overflow
// and prevent an infinite loop.
- for 0 < newcap && newcap < cap {
+ for 0 < newcap && newcap < newLen {
// Transition from growing 2x for small slices
// to growing 1.25x for large slices. This formula
// gives a smooth-ish transition between the two.
@@ -217,7 +197,7 @@
// Set newcap to the requested cap when
// the newcap calculation overflowed.
if newcap <= 0 {
- newcap = cap
+ newcap = newLen
}
}
}
@@ -230,14 +210,14 @@
// For powers of 2, use a variable shift.
switch {
case et.size == 1:
- lenmem = uintptr(old.len)
- newlenmem = uintptr(cap)
+ lenmem = uintptr(oldLen)
+ newlenmem = uintptr(newLen)
capmem = roundupsize(uintptr(newcap))
overflow = uintptr(newcap) > maxAlloc
newcap = int(capmem)
case et.size == goarch.PtrSize:
- lenmem = uintptr(old.len) * goarch.PtrSize
- newlenmem = uintptr(cap) * goarch.PtrSize
+ lenmem = uintptr(oldLen) * goarch.PtrSize
+ newlenmem = uintptr(newLen) * goarch.PtrSize
capmem = roundupsize(uintptr(newcap) * goarch.PtrSize)
overflow = uintptr(newcap) > maxAlloc/goarch.PtrSize
newcap = int(capmem / goarch.PtrSize)
@@ -245,21 +225,23 @@
var shift uintptr
if goarch.PtrSize == 8 {
// Mask shift for better code generation.
- shift = uintptr(sys.Ctz64(uint64(et.size))) & 63
+ shift = uintptr(sys.TrailingZeros64(uint64(et.size))) & 63
} else {
- shift = uintptr(sys.Ctz32(uint32(et.size))) & 31
+ shift = uintptr(sys.TrailingZeros32(uint32(et.size))) & 31
}
- lenmem = uintptr(old.len) << shift
- newlenmem = uintptr(cap) << shift
+ lenmem = uintptr(oldLen) << shift
+ newlenmem = uintptr(newLen) << shift
capmem = roundupsize(uintptr(newcap) << shift)
overflow = uintptr(newcap) > (maxAlloc >> shift)
newcap = int(capmem >> shift)
+ capmem = uintptr(newcap) << shift
default:
- lenmem = uintptr(old.len) * et.size
- newlenmem = uintptr(cap) * et.size
+ lenmem = uintptr(oldLen) * et.size
+ newlenmem = uintptr(newLen) * et.size
capmem, overflow = math.MulUintptr(et.size, uintptr(newcap))
capmem = roundupsize(capmem)
newcap = int(capmem / et.size)
+ capmem = uintptr(newcap) * et.size
}
// The check of overflow in addition to capmem > maxAlloc is needed
@@ -276,27 +258,48 @@
// print(len(s), "\n")
// }
if overflow || capmem > maxAlloc {
- panic(errorString("growslice: cap out of range"))
+ panic(errorString("growslice: len out of range"))
}
var p unsafe.Pointer
if et.ptrdata == 0 {
p = mallocgc(capmem, nil, false)
- // The append() that calls growslice is going to overwrite from old.len to cap (which will be the new length).
+ // The append() that calls growslice is going to overwrite from oldLen to newLen.
// Only clear the part that will not be overwritten.
+ // The reflect_growslice() that calls growslice will manually clear
+ // the region not cleared here.
memclrNoHeapPointers(add(p, newlenmem), capmem-newlenmem)
} else {
// Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan uninitialized memory.
p = mallocgc(capmem, et, true)
if lenmem > 0 && writeBarrier.enabled {
- // Only shade the pointers in old.array since we know the destination slice p
+ // Only shade the pointers in oldPtr since we know the destination slice p
// only contains nil pointers because it has been cleared during alloc.
- bulkBarrierPreWriteSrcOnly(uintptr(p), uintptr(old.array), lenmem-et.size+et.ptrdata)
+ bulkBarrierPreWriteSrcOnly(uintptr(p), uintptr(oldPtr), lenmem-et.size+et.ptrdata)
}
}
- memmove(p, old.array, lenmem)
+ memmove(p, oldPtr, lenmem)
- return slice{p, old.len, newcap}
+ return slice{p, newLen, newcap}
+}
+
+//go:linkname reflect_growslice reflect.growslice
+func reflect_growslice(et *_type, old slice, num int) slice {
+ // Semantically equivalent to slices.Grow, except that the caller
+ // is responsible for ensuring that old.len+num > old.cap.
+ num -= old.cap - old.len // preserve memory of old[old.len:old.cap]
+ new := growslice(old.array, old.cap+num, old.cap, num, et)
+ // growslice does not zero out new[old.cap:new.len] since it assumes that
+ // the memory will be overwritten by an append() that called growslice.
+ // Since the caller of reflect_growslice is not append(),
+ // zero out this region before returning the slice to the reflect package.
+ if et.ptrdata == 0 {
+ oldcapmem := uintptr(old.cap) * et.size
+ newlenmem := uintptr(new.len) * et.size
+ memclrNoHeapPointers(add(new.array, oldcapmem), newlenmem-oldcapmem)
+ }
+ new.len = old.len // preserve the old length
+ return new
}
func isPowerOfTwo(x uintptr) bool {
diff --git a/src/runtime/stack.go b/src/runtime/stack.go
index 2a7f0bd..d5e587a 100644
--- a/src/runtime/stack.go
+++ b/src/runtime/stack.go
@@ -98,6 +98,7 @@
// The guard leaves enough room for one _StackSmall frame plus
// a _StackLimit chain of NOSPLIT calls plus _StackSystem
// bytes for the OS.
+ // This arithmetic must match that in cmd/internal/objabi/stack.go:StackLimit.
_StackGuard = 928*sys.StackGuardMultiplier + _StackSystem
// After a stack split check the SP is allowed to be this
@@ -107,6 +108,7 @@
// The maximum number of bytes that a chain of NOSPLIT
// functions can use.
+ // This arithmetic must match that in cmd/internal/objabi/stack.go:StackLimit.
_StackLimit = _StackGuard - _StackSystem - _StackSmall
)
@@ -157,11 +159,11 @@
// There is a free list for each order.
var stackpool [_NumStackOrders]struct {
item stackpoolItem
- _ [cpu.CacheLinePadSize - unsafe.Sizeof(stackpoolItem{})%cpu.CacheLinePadSize]byte
+ _ [(cpu.CacheLinePadSize - unsafe.Sizeof(stackpoolItem{})%cpu.CacheLinePadSize) % cpu.CacheLinePadSize]byte
}
-//go:notinheap
type stackpoolItem struct {
+ _ sys.NotInHeap
mu mutex
span mSpanList
}
@@ -564,7 +566,7 @@
sghi uintptr
}
-// Adjustpointer checks whether *vpp is in the old stack described by adjinfo.
+// adjustpointer checks whether *vpp is in the old stack described by adjinfo.
// If so, it rewrites *vpp to point into the new stack.
func adjustpointer(adjinfo *adjustinfo, vpp unsafe.Pointer) {
pp := (*uintptr)(vpp)
@@ -617,7 +619,7 @@
}
b := *(addb(bv.bytedata, i/8))
for b != 0 {
- j := uintptr(sys.Ctz8(b))
+ j := uintptr(sys.TrailingZeros8(b))
b &= b - 1
pp := (*uintptr)(add(scanp, (i+j)*goarch.PtrSize))
retry:
@@ -664,7 +666,7 @@
return true
}
- locals, args, objs := getStackMap(frame, &adjinfo.cache, true)
+ locals, args, objs := frame.getStackMap(&adjinfo.cache, true)
// Adjust local variables if stack frame has been allocated.
if locals.n > 0 {
@@ -886,7 +888,7 @@
// Adjust sudogs, synchronizing with channel ops if necessary.
ncopy := used
if !gp.activeStackChans {
- if newsize < old.hi-old.lo && atomic.Load8(&gp.parkingOnChan) != 0 {
+ if newsize < old.hi-old.lo && gp.parkingOnChan.Load() {
// It's not safe for someone to shrink this stack while we're actively
// parking on a channel, but it is safe to grow since we do that
// ourselves and explicitly don't want to synchronize with channels
@@ -1150,7 +1152,7 @@
// We also can't *shrink* the stack in the window between the
// goroutine calling gopark to park on a channel and
// gp.activeStackChans being set.
- return gp.syscallsp == 0 && !gp.asyncSafePoint && atomic.Load8(&gp.parkingOnChan) == 0
+ return gp.syscallsp == 0 && !gp.asyncSafePoint && !gp.parkingOnChan.Load()
}
// Maybe shrink the stack being used by gp.
@@ -1247,147 +1249,6 @@
unlock(&stackLarge.lock)
}
-// getStackMap returns the locals and arguments live pointer maps, and
-// stack object list for frame.
-func getStackMap(frame *stkframe, cache *pcvalueCache, debug bool) (locals, args bitvector, objs []stackObjectRecord) {
- targetpc := frame.continpc
- if targetpc == 0 {
- // Frame is dead. Return empty bitvectors.
- return
- }
-
- f := frame.fn
- pcdata := int32(-1)
- if targetpc != f.entry() {
- // Back up to the CALL. If we're at the function entry
- // point, we want to use the entry map (-1), even if
- // the first instruction of the function changes the
- // stack map.
- targetpc--
- pcdata = pcdatavalue(f, _PCDATA_StackMapIndex, targetpc, cache)
- }
- if pcdata == -1 {
- // We do not have a valid pcdata value but there might be a
- // stackmap for this function. It is likely that we are looking
- // at the function prologue, assume so and hope for the best.
- pcdata = 0
- }
-
- // Local variables.
- size := frame.varp - frame.sp
- var minsize uintptr
- switch goarch.ArchFamily {
- case goarch.ARM64:
- minsize = sys.StackAlign
- default:
- minsize = sys.MinFrameSize
- }
- if size > minsize {
- stackid := pcdata
- stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
- if stkmap == nil || stkmap.n <= 0 {
- print("runtime: frame ", funcname(f), " untyped locals ", hex(frame.varp-size), "+", hex(size), "\n")
- throw("missing stackmap")
- }
- // If nbit == 0, there's no work to do.
- if stkmap.nbit > 0 {
- if stackid < 0 || stackid >= stkmap.n {
- // don't know where we are
- print("runtime: pcdata is ", stackid, " and ", stkmap.n, " locals stack map entries for ", funcname(f), " (targetpc=", hex(targetpc), ")\n")
- throw("bad symbol table")
- }
- locals = stackmapdata(stkmap, stackid)
- if stackDebug >= 3 && debug {
- print(" locals ", stackid, "/", stkmap.n, " ", locals.n, " words ", locals.bytedata, "\n")
- }
- } else if stackDebug >= 3 && debug {
- print(" no locals to adjust\n")
- }
- }
-
- // Arguments.
- if frame.arglen > 0 {
- if frame.argmap != nil {
- // argmap is set when the function is reflect.makeFuncStub or reflect.methodValueCall.
- // In this case, arglen specifies how much of the args section is actually live.
- // (It could be either all the args + results, or just the args.)
- args = *frame.argmap
- n := int32(frame.arglen / goarch.PtrSize)
- if n < args.n {
- args.n = n // Don't use more of the arguments than arglen.
- }
- } else {
- stackmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
- if stackmap == nil || stackmap.n <= 0 {
- print("runtime: frame ", funcname(f), " untyped args ", hex(frame.argp), "+", hex(frame.arglen), "\n")
- throw("missing stackmap")
- }
- if pcdata < 0 || pcdata >= stackmap.n {
- // don't know where we are
- print("runtime: pcdata is ", pcdata, " and ", stackmap.n, " args stack map entries for ", funcname(f), " (targetpc=", hex(targetpc), ")\n")
- throw("bad symbol table")
- }
- if stackmap.nbit > 0 {
- args = stackmapdata(stackmap, pcdata)
- }
- }
- }
-
- // stack objects.
- if (GOARCH == "amd64" || GOARCH == "arm64" || GOARCH == "ppc64" || GOARCH == "ppc64le" || GOARCH == "riscv64") &&
- unsafe.Sizeof(abi.RegArgs{}) > 0 && frame.argmap != nil {
- // argmap is set when the function is reflect.makeFuncStub or reflect.methodValueCall.
- // We don't actually use argmap in this case, but we need to fake the stack object
- // record for these frames which contain an internal/abi.RegArgs at a hard-coded offset.
- // This offset matches the assembly code on amd64 and arm64.
- objs = methodValueCallFrameObjs[:]
- } else {
- p := funcdata(f, _FUNCDATA_StackObjects)
- if p != nil {
- n := *(*uintptr)(p)
- p = add(p, goarch.PtrSize)
- r0 := (*stackObjectRecord)(noescape(p))
- objs = unsafe.Slice(r0, int(n))
- // Note: the noescape above is needed to keep
- // getStackMap from "leaking param content:
- // frame". That leak propagates up to getgcmask, then
- // GCMask, then verifyGCInfo, which converts the stack
- // gcinfo tests into heap gcinfo tests :(
- }
- }
-
- return
-}
-
-var methodValueCallFrameObjs [1]stackObjectRecord // initialized in stackobjectinit
-
-func stkobjinit() {
- var abiRegArgsEface any = abi.RegArgs{}
- abiRegArgsType := efaceOf(&abiRegArgsEface)._type
- if abiRegArgsType.kind&kindGCProg != 0 {
- throw("abiRegArgsType needs GC Prog, update methodValueCallFrameObjs")
- }
- // Set methodValueCallFrameObjs[0].gcdataoff so that
- // stackObjectRecord.gcdata() will work correctly with it.
- ptr := uintptr(unsafe.Pointer(&methodValueCallFrameObjs[0]))
- var mod *moduledata
- for datap := &firstmoduledata; datap != nil; datap = datap.next {
- if datap.gofunc <= ptr && ptr < datap.end {
- mod = datap
- break
- }
- }
- if mod == nil {
- throw("methodValueCallFrameObjs is not in a module")
- }
- methodValueCallFrameObjs[0] = stackObjectRecord{
- off: -int32(alignUp(abiRegArgsType.size, 8)), // It's always the highest address local.
- size: int32(abiRegArgsType.size),
- _ptrdata: int32(abiRegArgsType.ptrdata),
- gcdataoff: uint32(uintptr(unsafe.Pointer(abiRegArgsType.gcdata)) - mod.rodata),
- }
-}
-
// A stackObjectRecord is generated by the compiler for each stack object in a stack frame.
// This record must match the generator code in cmd/compile/internal/liveness/plive.go:emitStackObjects.
type stackObjectRecord struct {
diff --git a/src/runtime/stack_test.go b/src/runtime/stack_test.go
index dfb29a9..92d5880 100644
--- a/src/runtime/stack_test.go
+++ b/src/runtime/stack_test.go
@@ -5,7 +5,6 @@
package runtime_test
import (
- "bytes"
"fmt"
"reflect"
"regexp"
@@ -109,13 +108,14 @@
// in finalizer
var finalizerStart time.Time
- var started, progress uint32
+ var started atomic.Bool
+ var progress atomic.Uint32
wg.Add(1)
s := new(string) // Must be of a type that avoids the tiny allocator, or else the finalizer might not run.
SetFinalizer(s, func(ss *string) {
defer wg.Done()
finalizerStart = time.Now()
- atomic.StoreUint32(&started, 1)
+ started.Store(true)
growStack(&progress)
})
setFinalizerTime := time.Now()
@@ -128,10 +128,10 @@
// Panic — instead of calling t.Error and returning from the test — so
// that we get a useful goroutine dump if the test times out, especially
// if GOTRACEBACK=system or GOTRACEBACK=crash is set.
- if atomic.LoadUint32(&started) == 0 {
+ if !started.Load() {
panic("finalizer did not start")
} else {
- panic(fmt.Sprintf("finalizer started %s ago (%s after registration) and ran %d iterations, but did not return", time.Since(finalizerStart), finalizerStart.Sub(setFinalizerTime), atomic.LoadUint32(&progress)))
+ panic(fmt.Sprintf("finalizer started %s ago (%s after registration) and ran %d iterations, but did not return", time.Since(finalizerStart), finalizerStart.Sub(setFinalizerTime), progress.Load()))
}
})
defer timer.Stop()
@@ -139,7 +139,7 @@
GC()
wg.Wait()
- t.Logf("finalizer started after %s and ran %d iterations in %v", finalizerStart.Sub(setFinalizerTime), atomic.LoadUint32(&progress), time.Since(finalizerStart))
+ t.Logf("finalizer started after %s and ran %d iterations in %v", finalizerStart.Sub(setFinalizerTime), progress.Load(), time.Since(finalizerStart))
}
// ... and in init
@@ -147,7 +147,7 @@
// growStack()
//}
-func growStack(progress *uint32) {
+func growStack(progress *atomic.Uint32) {
n := 1 << 10
if testing.Short() {
n = 1 << 8
@@ -159,7 +159,7 @@
panic("stack is corrupted")
}
if progress != nil {
- atomic.StoreUint32(progress, uint32(i))
+ progress.Store(uint32(i))
}
}
GC()
@@ -777,7 +777,7 @@
// and that we see TestTracebackSystemstack.
countIn, countOut := 0, 0
frames := CallersFrames(pcs)
- var tb bytes.Buffer
+ var tb strings.Builder
for {
frame, more := frames.Next()
fmt.Fprintf(&tb, "\n%s+0x%x %s:%d", frame.Function, frame.PC-frame.Entry, frame.File, frame.Line)
diff --git a/src/runtime/start_line_amd64_test.go b/src/runtime/start_line_amd64_test.go
new file mode 100644
index 0000000..305ed0b
--- /dev/null
+++ b/src/runtime/start_line_amd64_test.go
@@ -0,0 +1,23 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "runtime/internal/startlinetest"
+ "testing"
+)
+
+// TestStartLineAsm tests the start line metadata of an assembly function. This
+// is only tested on amd64 to avoid the need for a proliferation of per-arch
+// copies of this function.
+func TestStartLineAsm(t *testing.T) {
+ startlinetest.CallerStartLine = callerStartLine
+
+ const wantLine = 23
+ got := startlinetest.AsmFunc()
+ if got != wantLine {
+ t.Errorf("start line got %d want %d", got, wantLine)
+ }
+}
diff --git a/src/runtime/start_line_test.go b/src/runtime/start_line_test.go
new file mode 100644
index 0000000..6c4faa8
--- /dev/null
+++ b/src/runtime/start_line_test.go
@@ -0,0 +1,138 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "fmt"
+ "internal/testenv"
+ "runtime"
+ "testing"
+)
+
+// The tests in this file test the function start line metadata included in
+// _func and inlinedCall. TestStartLine hard-codes the start lines of functions
+// in this file. If code moves, the test will need to be updated.
+//
+// The "start line" of a function should be the line containing the func
+// keyword.
+
+func normalFunc() int {
+ return callerStartLine(false)
+}
+
+func multilineDeclarationFunc() int {
+ return multilineDeclarationFunc1(0, 0, 0)
+}
+
+//go:noinline
+func multilineDeclarationFunc1(
+ a, b, c int) int {
+ return callerStartLine(false)
+}
+
+func blankLinesFunc() int {
+
+ // Some
+ // lines
+ // without
+ // code
+
+ return callerStartLine(false)
+}
+
+func inlineFunc() int {
+ return inlineFunc1()
+}
+
+func inlineFunc1() int {
+ return callerStartLine(true)
+}
+
+var closureFn func() int
+
+func normalClosure() int {
+ // Assign to global to ensure this isn't inlined.
+ closureFn = func() int {
+ return callerStartLine(false)
+ }
+ return closureFn()
+}
+
+func inlineClosure() int {
+ return func() int {
+ return callerStartLine(true)
+ }()
+}
+
+func TestStartLine(t *testing.T) {
+ // We test inlined vs non-inlined variants. We can't do that if
+ // optimizations are disabled.
+ testenv.SkipIfOptimizationOff(t)
+
+ testCases := []struct{
+ name string
+ fn func() int
+ want int
+ }{
+ {
+ name: "normal",
+ fn: normalFunc,
+ want: 21,
+ },
+ {
+ name: "multiline-declaration",
+ fn: multilineDeclarationFunc,
+ want: 30,
+ },
+ {
+ name: "blank-lines",
+ fn: blankLinesFunc,
+ want: 35,
+ },
+ {
+ name: "inline",
+ fn: inlineFunc,
+ want: 49,
+ },
+ {
+ name: "normal-closure",
+ fn: normalClosure,
+ want: 57,
+ },
+ {
+ name: "inline-closure",
+ fn: inlineClosure,
+ want: 64,
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.name, func(t *testing.T) {
+ got := tc.fn()
+ if got != tc.want {
+ t.Errorf("start line got %d want %d", got, tc.want)
+ }
+ })
+ }
+}
+
+//go:noinline
+func callerStartLine(wantInlined bool) int {
+ var pcs [1]uintptr
+ n := runtime.Callers(2, pcs[:])
+ if n != 1 {
+ panic(fmt.Sprintf("no caller of callerStartLine? n = %d", n))
+ }
+
+ frames := runtime.CallersFrames(pcs[:])
+ frame, _ := frames.Next()
+
+ inlined := frame.Func == nil // Func always set to nil for inlined frames
+ if wantInlined != inlined {
+ panic(fmt.Sprintf("caller %s inlined got %v want %v", frame.Function, inlined, wantInlined))
+ }
+
+ return runtime.FrameStartLine(&frame)
+}
diff --git a/src/runtime/stkframe.go b/src/runtime/stkframe.go
new file mode 100644
index 0000000..3ecf3a8
--- /dev/null
+++ b/src/runtime/stkframe.go
@@ -0,0 +1,289 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+ "internal/abi"
+ "internal/goarch"
+ "runtime/internal/sys"
+ "unsafe"
+)
+
+// A stkframe holds information about a single physical stack frame.
+type stkframe struct {
+ // fn is the function being run in this frame. If there is
+ // inlining, this is the outermost function.
+ fn funcInfo
+
+ // pc is the program counter within fn.
+ //
+ // The meaning of this is subtle:
+ //
+ // - Typically, this frame performed a regular function call
+ // and this is the return PC (just after the CALL
+ // instruction). In this case, pc-1 reflects the CALL
+ // instruction itself and is the correct source of symbolic
+ // information.
+ //
+ // - If this frame "called" sigpanic, then pc is the
+ // instruction that panicked, and pc is the correct address
+ // to use for symbolic information.
+ //
+ // - If this is the innermost frame, then PC is where
+ // execution will continue, but it may not be the
+ // instruction following a CALL. This may be from
+ // cooperative preemption, in which case this is the
+ // instruction after the call to morestack. Or this may be
+ // from a signal or an un-started goroutine, in which case
+ // PC could be any instruction, including the first
+ // instruction in a function. Conventionally, we use pc-1
+ // for symbolic information, unless pc == fn.entry(), in
+ // which case we use pc.
+ pc uintptr
+
+ // continpc is the PC where execution will continue in fn, or
+ // 0 if execution will not continue in this frame.
+ //
+ // This is usually the same as pc, unless this frame "called"
+ // sigpanic, in which case it's either the address of
+ // deferreturn or 0 if this frame will never execute again.
+ //
+ // This is the PC to use to look up GC liveness for this frame.
+ continpc uintptr
+
+ lr uintptr // program counter at caller aka link register
+ sp uintptr // stack pointer at pc
+ fp uintptr // stack pointer at caller aka frame pointer
+ varp uintptr // top of local variables
+ argp uintptr // pointer to function arguments
+}
+
+// reflectMethodValue is a partial duplicate of reflect.makeFuncImpl
+// and reflect.methodValue.
+type reflectMethodValue struct {
+ fn uintptr
+ stack *bitvector // ptrmap for both args and results
+ argLen uintptr // just args
+}
+
+// argBytes returns the argument frame size for a call to frame.fn.
+func (frame *stkframe) argBytes() uintptr {
+ if frame.fn.args != _ArgsSizeUnknown {
+ return uintptr(frame.fn.args)
+ }
+ // This is an uncommon and complicated case. Fall back to fully
+ // fetching the argument map to compute its size.
+ argMap, _ := frame.argMapInternal()
+ return uintptr(argMap.n) * goarch.PtrSize
+}
+
+// argMapInternal is used internally by stkframe to fetch special
+// argument maps.
+//
+// argMap.n is always populated with the size of the argument map.
+//
+// argMap.bytedata is only populated for dynamic argument maps (used
+// by reflect). If the caller requires the argument map, it should use
+// this if non-nil, and otherwise fetch the argument map using the
+// current PC.
+//
+// hasReflectStackObj indicates that this frame also has a reflect
+// function stack object, which the caller must synthesize.
+func (frame *stkframe) argMapInternal() (argMap bitvector, hasReflectStackObj bool) {
+ f := frame.fn
+ if f.args != _ArgsSizeUnknown {
+ argMap.n = f.args / goarch.PtrSize
+ return
+ }
+ // Extract argument bitmaps for reflect stubs from the calls they made to reflect.
+ switch funcname(f) {
+ case "reflect.makeFuncStub", "reflect.methodValueCall":
+ // These take a *reflect.methodValue as their
+ // context register and immediately save it to 0(SP).
+ // Get the methodValue from 0(SP).
+ arg0 := frame.sp + sys.MinFrameSize
+
+ minSP := frame.fp
+ if !usesLR {
+ // The CALL itself pushes a word.
+ // Undo that adjustment.
+ minSP -= goarch.PtrSize
+ }
+ if arg0 >= minSP {
+ // The function hasn't started yet.
+ // This only happens if f was the
+ // start function of a new goroutine
+ // that hasn't run yet *and* f takes
+ // no arguments and has no results
+ // (otherwise it will get wrapped in a
+ // closure). In this case, we can't
+ // reach into its locals because it
+ // doesn't have locals yet, but we
+ // also know its argument map is
+ // empty.
+ if frame.pc != f.entry() {
+ print("runtime: confused by ", funcname(f), ": no frame (sp=", hex(frame.sp), " fp=", hex(frame.fp), ") at entry+", hex(frame.pc-f.entry()), "\n")
+ throw("reflect mismatch")
+ }
+ return bitvector{}, false // No locals, so also no stack objects
+ }
+ hasReflectStackObj = true
+ mv := *(**reflectMethodValue)(unsafe.Pointer(arg0))
+ // Figure out whether the return values are valid.
+ // Reflect will update this value after it copies
+ // in the return values.
+ retValid := *(*bool)(unsafe.Pointer(arg0 + 4*goarch.PtrSize))
+ if mv.fn != f.entry() {
+ print("runtime: confused by ", funcname(f), "\n")
+ throw("reflect mismatch")
+ }
+ argMap = *mv.stack
+ if !retValid {
+ // argMap.n includes the results, but
+ // those aren't valid, so drop them.
+ n := int32((uintptr(mv.argLen) &^ (goarch.PtrSize - 1)) / goarch.PtrSize)
+ if n < argMap.n {
+ argMap.n = n
+ }
+ }
+ }
+ return
+}
+
+// getStackMap returns the locals and arguments live pointer maps, and
+// stack object list for frame.
+func (frame *stkframe) getStackMap(cache *pcvalueCache, debug bool) (locals, args bitvector, objs []stackObjectRecord) {
+ targetpc := frame.continpc
+ if targetpc == 0 {
+ // Frame is dead. Return empty bitvectors.
+ return
+ }
+
+ f := frame.fn
+ pcdata := int32(-1)
+ if targetpc != f.entry() {
+ // Back up to the CALL. If we're at the function entry
+ // point, we want to use the entry map (-1), even if
+ // the first instruction of the function changes the
+ // stack map.
+ targetpc--
+ pcdata = pcdatavalue(f, _PCDATA_StackMapIndex, targetpc, cache)
+ }
+ if pcdata == -1 {
+ // We do not have a valid pcdata value but there might be a
+ // stackmap for this function. It is likely that we are looking
+ // at the function prologue, assume so and hope for the best.
+ pcdata = 0
+ }
+
+ // Local variables.
+ size := frame.varp - frame.sp
+ var minsize uintptr
+ switch goarch.ArchFamily {
+ case goarch.ARM64:
+ minsize = sys.StackAlign
+ default:
+ minsize = sys.MinFrameSize
+ }
+ if size > minsize {
+ stackid := pcdata
+ stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
+ if stkmap == nil || stkmap.n <= 0 {
+ print("runtime: frame ", funcname(f), " untyped locals ", hex(frame.varp-size), "+", hex(size), "\n")
+ throw("missing stackmap")
+ }
+ // If nbit == 0, there's no work to do.
+ if stkmap.nbit > 0 {
+ if stackid < 0 || stackid >= stkmap.n {
+ // don't know where we are
+ print("runtime: pcdata is ", stackid, " and ", stkmap.n, " locals stack map entries for ", funcname(f), " (targetpc=", hex(targetpc), ")\n")
+ throw("bad symbol table")
+ }
+ locals = stackmapdata(stkmap, stackid)
+ if stackDebug >= 3 && debug {
+ print(" locals ", stackid, "/", stkmap.n, " ", locals.n, " words ", locals.bytedata, "\n")
+ }
+ } else if stackDebug >= 3 && debug {
+ print(" no locals to adjust\n")
+ }
+ }
+
+ // Arguments. First fetch frame size and special-case argument maps.
+ var isReflect bool
+ args, isReflect = frame.argMapInternal()
+ if args.n > 0 && args.bytedata == nil {
+ // Non-empty argument frame, but not a special map.
+ // Fetch the argument map at pcdata.
+ stackmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
+ if stackmap == nil || stackmap.n <= 0 {
+ print("runtime: frame ", funcname(f), " untyped args ", hex(frame.argp), "+", hex(args.n*goarch.PtrSize), "\n")
+ throw("missing stackmap")
+ }
+ if pcdata < 0 || pcdata >= stackmap.n {
+ // don't know where we are
+ print("runtime: pcdata is ", pcdata, " and ", stackmap.n, " args stack map entries for ", funcname(f), " (targetpc=", hex(targetpc), ")\n")
+ throw("bad symbol table")
+ }
+ if stackmap.nbit == 0 {
+ args.n = 0
+ } else {
+ args = stackmapdata(stackmap, pcdata)
+ }
+ }
+
+ // stack objects.
+ if (GOARCH == "amd64" || GOARCH == "arm64" || GOARCH == "ppc64" || GOARCH == "ppc64le" || GOARCH == "riscv64") &&
+ unsafe.Sizeof(abi.RegArgs{}) > 0 && isReflect {
+ // For reflect.makeFuncStub and reflect.methodValueCall,
+ // we need to fake the stack object record.
+ // These frames contain an internal/abi.RegArgs at a hard-coded offset.
+ // This offset matches the assembly code on amd64 and arm64.
+ objs = methodValueCallFrameObjs[:]
+ } else {
+ p := funcdata(f, _FUNCDATA_StackObjects)
+ if p != nil {
+ n := *(*uintptr)(p)
+ p = add(p, goarch.PtrSize)
+ r0 := (*stackObjectRecord)(noescape(p))
+ objs = unsafe.Slice(r0, int(n))
+ // Note: the noescape above is needed to keep
+ // getStackMap from "leaking param content:
+ // frame". That leak propagates up to getgcmask, then
+ // GCMask, then verifyGCInfo, which converts the stack
+ // gcinfo tests into heap gcinfo tests :(
+ }
+ }
+
+ return
+}
+
+var methodValueCallFrameObjs [1]stackObjectRecord // initialized in stackobjectinit
+
+func stkobjinit() {
+ var abiRegArgsEface any = abi.RegArgs{}
+ abiRegArgsType := efaceOf(&abiRegArgsEface)._type
+ if abiRegArgsType.kind&kindGCProg != 0 {
+ throw("abiRegArgsType needs GC Prog, update methodValueCallFrameObjs")
+ }
+ // Set methodValueCallFrameObjs[0].gcdataoff so that
+ // stackObjectRecord.gcdata() will work correctly with it.
+ ptr := uintptr(unsafe.Pointer(&methodValueCallFrameObjs[0]))
+ var mod *moduledata
+ for datap := &firstmoduledata; datap != nil; datap = datap.next {
+ if datap.gofunc <= ptr && ptr < datap.end {
+ mod = datap
+ break
+ }
+ }
+ if mod == nil {
+ throw("methodValueCallFrameObjs is not in a module")
+ }
+ methodValueCallFrameObjs[0] = stackObjectRecord{
+ off: -int32(alignUp(abiRegArgsType.size, 8)), // It's always the highest address local.
+ size: int32(abiRegArgsType.size),
+ _ptrdata: int32(abiRegArgsType.ptrdata),
+ gcdataoff: uint32(uintptr(unsafe.Pointer(abiRegArgsType.gcdata)) - mod.rodata),
+ }
+}
diff --git a/src/runtime/string.go b/src/runtime/string.go
index 359a565..a00976b 100644
--- a/src/runtime/string.go
+++ b/src/runtime/string.go
@@ -78,7 +78,7 @@
// n is the length of the slice.
// Buf is a fixed-size buffer for the result,
// it is not nil if the result does not escape.
-func slicebytetostring(buf *tmpBuf, ptr *byte, n int) (str string) {
+func slicebytetostring(buf *tmpBuf, ptr *byte, n int) string {
if n == 0 {
// Turns out to be a relatively common case.
// Consider that you want to parse out data between parens in "foo()bar",
@@ -102,9 +102,7 @@
if goarch.BigEndian {
p = add(p, 7)
}
- stringStructOf(&str).str = p
- stringStructOf(&str).len = 1
- return
+ return unsafe.String((*byte)(p), 1)
}
var p unsafe.Pointer
@@ -113,16 +111,14 @@
} else {
p = mallocgc(uintptr(n), nil, false)
}
- stringStructOf(&str).str = p
- stringStructOf(&str).len = n
memmove(p, unsafe.Pointer(ptr), uintptr(n))
- return
+ return unsafe.String((*byte)(p), n)
}
// stringDataOnStack reports whether the string's data is
// stored on the current goroutine's stack.
func stringDataOnStack(s string) bool {
- ptr := uintptr(stringStructOf(&s).str)
+ ptr := uintptr(unsafe.Pointer(unsafe.StringData(s)))
stk := getg().stack
return stk.lo <= ptr && ptr < stk.hi
}
@@ -151,7 +147,7 @@
// where k is []byte, T1 to Tn is a nesting of struct and array literals.
// - Used for "<"+string(b)+">" concatenation where b is []byte.
// - Used for string(b)=="foo" comparison where b is []byte.
-func slicebytetostringtmp(ptr *byte, n int) (str string) {
+func slicebytetostringtmp(ptr *byte, n int) string {
if raceenabled && n > 0 {
racereadrangepc(unsafe.Pointer(ptr),
uintptr(n),
@@ -164,9 +160,7 @@
if asanenabled && n > 0 {
asanread(unsafe.Pointer(ptr), uintptr(n))
}
- stringStructOf(&str).str = unsafe.Pointer(ptr)
- stringStructOf(&str).len = n
- return
+ return unsafe.String(ptr, n)
}
func stringtoslicebyte(buf *tmpBuf, s string) []byte {
@@ -271,13 +265,7 @@
// b to set the string contents and then drop b.
func rawstring(size int) (s string, b []byte) {
p := mallocgc(uintptr(size), nil, false)
-
- stringStructOf(&s).str = p
- stringStructOf(&s).len = size
-
- *(*slice)(unsafe.Pointer(&b)) = slice{p, size, size}
-
- return
+ return unsafe.String((*byte)(p), size), unsafe.Slice((*byte)(p), size)
}
// rawbyteslice allocates a new byte slice. The byte slice is not zeroed.
@@ -337,6 +325,13 @@
return s
}
+// internal_syscall_gostring is a version of gostring for internal/syscall/unix.
+//
+//go:linkname internal_syscall_gostring internal/syscall/unix.gostring
+func internal_syscall_gostring(p *byte) string {
+ return gostring(p)
+}
+
func gostringn(p *byte, l int) string {
if l == 0 {
return ""
diff --git a/src/runtime/string_test.go b/src/runtime/string_test.go
index 1ea7f5e..cfc0ad7 100644
--- a/src/runtime/string_test.go
+++ b/src/runtime/string_test.go
@@ -223,6 +223,19 @@
}
}
+func TestConcatTempString(t *testing.T) {
+ s := "bytes"
+ b := []byte(s)
+ n := testing.AllocsPerRun(1000, func() {
+ if "prefix "+string(b)+" suffix" != "prefix bytes suffix" {
+ t.Fatalf("strings are not equal: '%v' and '%v'", "prefix "+string(b)+" suffix", "prefix bytes suffix")
+ }
+ })
+ if n != 0 {
+ t.Fatalf("want 0 allocs, got %v", n)
+ }
+}
+
func TestCompareTempString(t *testing.T) {
s := strings.Repeat("x", sizeNoStack)
b := []byte(s)
@@ -230,10 +243,24 @@
if string(b) != s {
t.Fatalf("strings are not equal: '%v' and '%v'", string(b), s)
}
+ if string(b) < s {
+ t.Fatalf("strings are not equal: '%v' and '%v'", string(b), s)
+ }
+ if string(b) > s {
+ t.Fatalf("strings are not equal: '%v' and '%v'", string(b), s)
+ }
if string(b) == s {
} else {
t.Fatalf("strings are not equal: '%v' and '%v'", string(b), s)
}
+ if string(b) <= s {
+ } else {
+ t.Fatalf("strings are not equal: '%v' and '%v'", string(b), s)
+ }
+ if string(b) >= s {
+ } else {
+ t.Fatalf("strings are not equal: '%v' and '%v'", string(b), s)
+ }
})
if n != 0 {
t.Fatalf("want 0 allocs, got %v", n)
diff --git a/src/runtime/stubs.go b/src/runtime/stubs.go
index 929f8fa..42c2612 100644
--- a/src/runtime/stubs.go
+++ b/src/runtime/stubs.go
@@ -59,13 +59,10 @@
//go:noescape
func systemstack(fn func())
-var badsystemstackMsg = "fatal: systemstack called from unexpected goroutine"
-
//go:nosplit
//go:nowritebarrierrec
func badsystemstack() {
- sp := stringStructOf(&badsystemstackMsg)
- write(2, sp.str, int32(sp.len))
+ writeErrStr("fatal: systemstack called from unexpected goroutine")
}
// memclrNoHeapPointers clears n bytes starting at ptr.
@@ -131,7 +128,7 @@
// by the compiler should be in this list.
if goarch.IsAmd64|goarch.IsArm64|goarch.IsPpc64|
goarch.IsPpc64le|goarch.IsMips64|goarch.IsMips64le|
- goarch.IsS390x|goarch.IsRiscv64 == 1 {
+ goarch.IsS390x|goarch.IsRiscv64|goarch.IsLoong64 == 1 {
mp.fastrand += 0xa0761d6478bd642f
hi, lo := math.Mul64(mp.fastrand, mp.fastrand^0xe7037ed1a0b428db)
return uint32(hi ^ lo)
@@ -196,6 +193,9 @@
return uint(fastrand64())
}
+//go:linkname rand_fastrand64 math/rand.fastrand64
+func rand_fastrand64() uint64 { return fastrand64() }
+
//go:linkname sync_fastrandn sync.fastrandn
func sync_fastrandn(n uint32) uint32 { return fastrandn(n) }
diff --git a/src/runtime/stubs2.go b/src/runtime/stubs2.go
index 94a888d..0d83deb 100644
--- a/src/runtime/stubs2.go
+++ b/src/runtime/stubs2.go
@@ -6,7 +6,10 @@
package runtime
-import "unsafe"
+import (
+ "runtime/internal/atomic"
+ "unsafe"
+)
// read calls the read system call.
// It returns a non-negative number of bytes written or a negative errno value.
@@ -22,7 +25,7 @@
usleep(usec)
}
-// write calls the write system call.
+// write1 calls the write system call.
// It returns a non-negative number of bytes written or a negative errno value.
//
//go:noescape
@@ -31,11 +34,11 @@
//go:noescape
func open(name *byte, mode, perm int32) int32
-// return value is only set on linux to be used in osinit()
+// return value is only set on linux to be used in osinit().
func madvise(addr unsafe.Pointer, n uintptr, flags int32) int32
-// exitThread terminates the current thread, writing *wait = 0 when
+// exitThread terminates the current thread, writing *wait = freeMStack when
// the stack is safe to reclaim.
//
//go:noescape
-func exitThread(wait *uint32)
+func exitThread(wait *atomic.Uint32)
diff --git a/src/runtime/stubs_ppc64.go b/src/runtime/stubs_ppc64.go
index 6919b74..e23e338 100644
--- a/src/runtime/stubs_ppc64.go
+++ b/src/runtime/stubs_ppc64.go
@@ -6,7 +6,7 @@
package runtime
-// This is needed for vet
+// This is needed for vet.
//
//go:noescape
func callCgoSigaction(sig uintptr, new, old *sigactiont) int32
diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go
index ad34b68..dead27e 100644
--- a/src/runtime/symtab.go
+++ b/src/runtime/symtab.go
@@ -49,6 +49,15 @@
File string
Line int
+ // startLine is the line number of the beginning of the function in
+ // this frame. Specifically, it is the line number of the func keyword
+ // for Go functions. Note that //line directives can change the
+ // filename and/or line number arbitrarily within a function, meaning
+ // that the Line - startLine offset is not always meaningful.
+ //
+ // This may be zero if not known.
+ startLine int
+
// Entry point program counter for the function; may be zero
// if not known. If Func is not nil then Entry ==
// Func.Entry().
@@ -108,6 +117,7 @@
pc--
}
name := funcname(funcInfo)
+ startLine := f.startLine()
if inldata := funcdata(funcInfo, _FUNCDATA_InlTree); inldata != nil {
inltree := (*[1 << 20]inlinedCall)(inldata)
// Non-strict as cgoTraceback may have added bogus PCs
@@ -116,17 +126,19 @@
if ix >= 0 {
// Note: entry is not modified. It always refers to a real frame, not an inlined one.
f = nil
- name = funcnameFromNameoff(funcInfo, inltree[ix].func_)
- // File/line is already correct.
- // TODO: remove file/line from InlinedCall?
+ ic := inltree[ix]
+ name = funcnameFromNameOff(funcInfo, ic.nameOff)
+ startLine = ic.startLine
+ // File/line from funcline1 below are already correct.
}
}
ci.frames = append(ci.frames, Frame{
- PC: pc,
- Func: f,
- Function: name,
- Entry: entry,
- funcInfo: funcInfo,
+ PC: pc,
+ Func: f,
+ Function: name,
+ Entry: entry,
+ startLine: int(startLine),
+ funcInfo: funcInfo,
// Note: File,Line set below
})
}
@@ -158,6 +170,13 @@
return
}
+// runtime_FrameStartLine returns the start line of the function in a Frame.
+//
+//go:linkname runtime_FrameStartLine runtime/pprof.runtime_FrameStartLine
+func runtime_FrameStartLine(f *Frame) int {
+ return f.startLine
+}
+
// runtime_expandFinalInlineFrame expands the final pc in stk to include all
// "callers" if pc is inline.
//
@@ -393,7 +412,7 @@
// pcHeader holds data used by the pclntab lookups.
type pcHeader struct {
- magic uint32 // 0xFFFFFFF0
+ magic uint32 // 0xFFFFFFF1
pad1, pad2 uint8 // 0,0
minLC uint8 // min instruction size
ptrSize uint8 // size of a ptr in bytes
@@ -428,6 +447,7 @@
data, edata uintptr
bss, ebss uintptr
noptrbss, enoptrbss uintptr
+ covctrs, ecovctrs uintptr
end, gcdata, gcbss uintptr
types, etypes uintptr
rodata uintptr
@@ -575,7 +595,7 @@
const minfunc = 16 // minimum function size
const pcbucketsize = 256 * minfunc // size of bucket in the pc->func lookup table
-// findfunctab is an array of these structures.
+// findfuncbucket is an array of these structures.
// Each bucket represents 4096 bytes of the text segment.
// Each subbucket represents 256 bytes of the text segment.
// To find a function given a pc, locate the bucket and subbucket for
@@ -599,7 +619,7 @@
func moduledataverify1(datap *moduledata) {
// Check that the pclntab's format is valid.
hdr := datap.pcHeader
- if hdr.magic != 0xfffffff0 || hdr.pad1 != 0 || hdr.pad2 != 0 ||
+ if hdr.magic != 0xfffffff1 || hdr.pad1 != 0 || hdr.pad2 != 0 ||
hdr.minLC != sys.PCQuantum || hdr.ptrSize != goarch.PtrSize || hdr.textStart != datap.text {
println("runtime: pcHeader: magic=", hex(hdr.magic), "pad1=", hdr.pad1, "pad2=", hdr.pad2,
"minLC=", hdr.minLC, "ptrSize=", hdr.ptrSize, "pcHeader.textStart=", hex(hdr.textStart),
@@ -727,14 +747,16 @@
// The runtime currently doesn't have function end info, alas.
if ix := pcdatavalue1(f, _PCDATA_InlTreeIndex, pc, nil, false); ix >= 0 {
inltree := (*[1 << 20]inlinedCall)(inldata)
- name := funcnameFromNameoff(f, inltree[ix].func_)
+ ic := inltree[ix]
+ name := funcnameFromNameOff(f, ic.nameOff)
file, line := funcline(f, pc)
fi := &funcinl{
- ones: ^uint32(0),
- entry: f.entry(), // entry of the real (the outermost) function.
- name: name,
- file: file,
- line: int(line),
+ ones: ^uint32(0),
+ entry: f.entry(), // entry of the real (the outermost) function.
+ name: name,
+ file: file,
+ line: line,
+ startLine: ic.startLine,
}
return (*Func)(unsafe.Pointer(fi))
}
@@ -773,7 +795,7 @@
fn := f.raw()
if fn.isInlined() { // inlined version
fi := (*funcinl)(unsafe.Pointer(fn))
- return fi.file, fi.line
+ return fi.file, int(fi.line)
}
// Pass strict=false here, because anyone can call this function,
// and they might just be wrong about targetpc belonging to f.
@@ -781,6 +803,17 @@
return file, int(line32)
}
+// startLine returns the starting line number of the function. i.e., the line
+// number of the func keyword.
+func (f *Func) startLine() int32 {
+ fn := f.raw()
+ if fn.isInlined() { // inlined version
+ fi := (*funcinl)(unsafe.Pointer(fn))
+ return fi.startLine
+ }
+ return fn.funcInfo().startLine
+}
+
// findmoduledatap looks up the moduledata for a PC.
//
// It is nosplit because it's part of the isgoexception
@@ -811,12 +844,12 @@
// isInlined reports whether f should be re-interpreted as a *funcinl.
func (f *_func) isInlined() bool {
- return f.entryoff == ^uint32(0) // see comment for funcinl.ones
+ return f.entryOff == ^uint32(0) // see comment for funcinl.ones
}
// entry returns the entry PC for f.
func (f funcInfo) entry() uintptr {
- return f.datap.textAddr(f.entryoff)
+ return f.datap.textAddr(f.entryOff)
}
// findfunc looks up function metadata for a PC.
@@ -902,7 +935,7 @@
}
if !f.valid() {
- if strict && panicking == 0 {
+ if strict && panicking.Load() == 0 {
println("runtime: no module data for", hex(f.entry()))
throw("no module data")
}
@@ -945,7 +978,7 @@
// If there was a table, it should have covered all program counters.
// If not, something is wrong.
- if panicking != 0 || !strict {
+ if panicking.Load() != 0 || !strict {
return -1, 0
}
@@ -968,10 +1001,10 @@
}
func cfuncname(f funcInfo) *byte {
- if !f.valid() || f.nameoff == 0 {
+ if !f.valid() || f.nameOff == 0 {
return nil
}
- return &f.datap.funcnametab[f.nameoff]
+ return &f.datap.funcnametab[f.nameOff]
}
func funcname(f funcInfo) string {
@@ -994,15 +1027,15 @@
return name[:i]
}
-func cfuncnameFromNameoff(f funcInfo, nameoff int32) *byte {
+func cfuncnameFromNameOff(f funcInfo, nameOff int32) *byte {
if !f.valid() {
return nil
}
- return &f.datap.funcnametab[nameoff]
+ return &f.datap.funcnametab[nameOff]
}
-func funcnameFromNameoff(f funcInfo, nameoff int32) string {
- return gostringnocopy(cfuncnameFromNameoff(f, nameoff))
+func funcnameFromNameOff(f funcInfo, nameOff int32) string {
+ return gostringnocopy(cfuncnameFromNameOff(f, nameOff))
}
func funcfile(f funcInfo, fileno int32) string {
@@ -1173,11 +1206,9 @@
// inlinedCall is the encoding of entries in the FUNCDATA_InlTree table.
type inlinedCall struct {
- parent int16 // index of parent in the inltree, or < 0
- funcID funcID // type of the called function
- _ byte
- file int32 // perCU file index for inlined call. See cmd/link:pcln.go
- line int32 // line number of the call site
- func_ int32 // offset into pclntab for name of called function
- parentPc int32 // position of an instruction whose source position is the call site (offset from entry)
+ funcID funcID // type of the called function
+ _ [3]byte
+ nameOff int32 // offset into pclntab for name of called function
+ parentPc int32 // position of an instruction whose source position is the call site (offset from entry)
+ startLine int32 // line number of start of function (func keyword/TEXT directive)
}
diff --git a/src/runtime/sys_darwin.go b/src/runtime/sys_darwin.go
index 1547fdc..5ba697e 100644
--- a/src/runtime/sys_darwin.go
+++ b/src/runtime/sys_darwin.go
@@ -6,6 +6,7 @@
import (
"internal/abi"
+ "runtime/internal/atomic"
"unsafe"
)
@@ -48,6 +49,17 @@
}
func syscall6()
+//go:linkname syscall_syscall9 syscall.syscall9
+//go:nosplit
+//go:cgo_unsafe_args
+func syscall_syscall9(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r1, r2, err uintptr) {
+ entersyscall()
+ libcCall(unsafe.Pointer(abi.FuncPCABI0(syscall9)), unsafe.Pointer(&fn))
+ exitsyscall()
+ return
+}
+func syscall9()
+
//go:linkname syscall_syscall6X syscall.syscall6X
//go:nosplit
func syscall_syscall6X(fn, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) {
@@ -86,7 +98,7 @@
return args.r1, args.r2, args.err
}
-// syscallNoErr is used in crypto/x509 to call into Security.framework and CF.
+// crypto_x509_syscall is used in crypto/x509/internal/macos to call into Security.framework and CF.
//go:linkname crypto_x509_syscall crypto/x509/internal/macos.syscall
//go:nosplit
@@ -167,6 +179,47 @@
}
func pthread_kill_trampoline()
+// osinit_hack is a clumsy hack to work around Apple libc bugs
+// causing fork+exec to hang in the child process intermittently.
+// See go.dev/issue/33565 and go.dev/issue/56784 for a few reports.
+//
+// The stacks obtained from the hung child processes are in
+// libSystem_atfork_child, which is supposed to reinitialize various
+// parts of the C library in the new process.
+//
+// One common stack dies in _notify_fork_child calling _notify_globals
+// (inlined) calling _os_alloc_once, because _os_alloc_once detects that
+// the once lock is held by the parent process and then calls
+// _os_once_gate_corruption_abort. The allocation is setting up the
+// globals for the notification subsystem. See the source code at [1].
+// To work around this, we can allocate the globals earlier in the Go
+// program's lifetime, before any execs are involved, by calling any
+// notify routine that is exported, calls _notify_globals, and doesn't do
+// anything too expensive otherwise. notify_is_valid_token(0) fits the bill.
+//
+// The other common stack dies in xpc_atfork_child calling
+// _objc_msgSend_uncached which ends up in
+// WAITING_FOR_ANOTHER_THREAD_TO_FINISH_CALLING_+initialize. Of course,
+// whatever thread the child is waiting for is in the parent process and
+// is not going to finish anything in the child process. There is no
+// public source code for these routines, so it is unclear exactly what
+// the problem is. An Apple engineer suggests using xpc_date_create_from_current,
+// which empirically does fix the problem.
+//
+// So osinit_hack_trampoline (in sys_darwin_$GOARCH.s) calls
+// notify_is_valid_token(0) and xpc_date_create_from_current(), which makes the
+// fork+exec hangs stop happening. If Apple fixes the libc bug in
+// some future version of macOS, then we can remove this awful code.
+//
+//go:nosplit
+func osinit_hack() {
+ if GOOS == "darwin" { // not ios
+ libcCall(unsafe.Pointer(abi.FuncPCABI0(osinit_hack_trampoline)), nil)
+ }
+ return
+}
+func osinit_hack_trampoline()
+
// mmap is used to do low-level memory allocation via mmap. Don't allow stack
// splits, since this function (used by sysAlloc) is called in a lot of low-level
// parts of the runtime and callers often assume it won't acquire any locks.
@@ -474,7 +527,8 @@
func pthread_cond_signal_trampoline()
// Not used on Darwin, but must be defined.
-func exitThread(wait *uint32) {
+func exitThread(wait *atomic.Uint32) {
+ throw("exitThread")
}
//go:nosplit
@@ -535,3 +589,6 @@
//go:cgo_import_dynamic libc_pthread_cond_wait pthread_cond_wait "/usr/lib/libSystem.B.dylib"
//go:cgo_import_dynamic libc_pthread_cond_timedwait_relative_np pthread_cond_timedwait_relative_np "/usr/lib/libSystem.B.dylib"
//go:cgo_import_dynamic libc_pthread_cond_signal pthread_cond_signal "/usr/lib/libSystem.B.dylib"
+
+//go:cgo_import_dynamic libc_notify_is_valid_token notify_is_valid_token "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_xpc_date_create_from_current xpc_date_create_from_current "/usr/lib/libSystem.B.dylib"
diff --git a/src/runtime/sys_darwin_amd64.s b/src/runtime/sys_darwin_amd64.s
index ba81fcc..6eaeeb9 100644
--- a/src/runtime/sys_darwin_amd64.s
+++ b/src/runtime/sys_darwin_amd64.s
@@ -597,6 +597,15 @@
POPQ BP
RET
+TEXT runtime·osinit_hack_trampoline(SB),NOSPLIT,$0
+ PUSHQ BP
+ MOVQ SP, BP
+ MOVQ $0, DI // arg 1 val
+ CALL libc_notify_is_valid_token(SB)
+ CALL libc_xpc_date_create_from_current(SB)
+ POPQ BP
+ RET
+
// syscall calls a function in libc on behalf of the syscall package.
// syscall takes a pointer to a struct like:
// struct {
@@ -839,6 +848,65 @@
POPQ BP
RET
+// syscall9 calls a function in libc on behalf of the syscall package.
+// syscall9 takes a pointer to a struct like:
+// struct {
+// fn uintptr
+// a1 uintptr
+// a2 uintptr
+// a3 uintptr
+// a4 uintptr
+// a5 uintptr
+// a6 uintptr
+// a7 uintptr
+// a8 uintptr
+// a9 uintptr
+// r1 uintptr
+// r2 uintptr
+// err uintptr
+// }
+// syscall9 must be called on the g0 stack with the
+// C calling convention (use libcCall).
+//
+// syscall9 expects a 32-bit result and tests for 32-bit -1
+// to decide there was an error.
+TEXT runtime·syscall9(SB),NOSPLIT,$0
+ PUSHQ BP
+ MOVQ SP, BP
+ SUBQ $16, SP
+ MOVQ (0*8)(DI), R13// fn
+ MOVQ (2*8)(DI), SI // a2
+ MOVQ (3*8)(DI), DX // a3
+ MOVQ (4*8)(DI), CX // a4
+ MOVQ (5*8)(DI), R8 // a5
+ MOVQ (6*8)(DI), R9 // a6
+ MOVQ (7*8)(DI), R10 // a7
+ MOVQ (8*8)(DI), R11 // a8
+ MOVQ (9*8)(DI), R12 // a9
+ MOVQ DI, (SP)
+ MOVQ (1*8)(DI), DI // a1
+ XORL AX, AX // vararg: say "no float args"
+
+ CALL R13
+
+ MOVQ (SP), DI
+ MOVQ AX, (10*8)(DI) // r1
+ MOVQ DX, (11*8)(DI) // r2
+
+ CMPL AX, $-1
+ JNE ok
+
+ CALL libc_error(SB)
+ MOVLQSX (AX), AX
+ MOVQ (SP), DI
+ MOVQ AX, (12*8)(DI) // err
+
+ok:
+ XORL AX, AX // no error (it's ignored anyway)
+ MOVQ BP, SP
+ POPQ BP
+ RET
+
// syscall_x509 is for crypto/x509. It is like syscall6 but does not check for errors,
// takes 5 uintptrs and 1 float64, and only returns one value,
// for use with standard C ABI functions.
diff --git a/src/runtime/sys_darwin_arm64.s b/src/runtime/sys_darwin_arm64.s
index bf0dc9d..4a51fb3 100644
--- a/src/runtime/sys_darwin_arm64.s
+++ b/src/runtime/sys_darwin_arm64.s
@@ -458,6 +458,12 @@
BL libc_pthread_setspecific(SB)
RET
+TEXT runtime·osinit_hack_trampoline(SB),NOSPLIT,$0
+ MOVD $0, R0 // arg 1 val
+ BL libc_notify_is_valid_token(SB)
+ BL libc_xpc_date_create_from_current(SB)
+ RET
+
// syscall calls a function in libc on behalf of the syscall package.
// syscall takes a pointer to a struct like:
// struct {
@@ -669,6 +675,63 @@
ok:
RET
+// syscall9 calls a function in libc on behalf of the syscall package.
+// syscall9 takes a pointer to a struct like:
+// struct {
+// fn uintptr
+// a1 uintptr
+// a2 uintptr
+// a3 uintptr
+// a4 uintptr
+// a5 uintptr
+// a6 uintptr
+// a7 uintptr
+// a8 uintptr
+// a9 uintptr
+// r1 uintptr
+// r2 uintptr
+// err uintptr
+// }
+// syscall9 must be called on the g0 stack with the
+// C calling convention (use libcCall).
+TEXT runtime·syscall9(SB),NOSPLIT,$0
+ SUB $16, RSP // push structure pointer
+ MOVD R0, 8(RSP)
+
+ MOVD 0(R0), R12 // fn
+ MOVD 16(R0), R1 // a2
+ MOVD 24(R0), R2 // a3
+ MOVD 32(R0), R3 // a4
+ MOVD 40(R0), R4 // a5
+ MOVD 48(R0), R5 // a6
+ MOVD 56(R0), R6 // a7
+ MOVD 64(R0), R7 // a8
+ MOVD 72(R0), R8 // a9
+ MOVD 8(R0), R0 // a1
+
+ // If fn is declared as vararg, we have to pass the vararg arguments on the stack.
+ // See syscall above. The only function this applies to is openat, for which the 4th
+ // arg must be on the stack.
+ MOVD R3, (RSP)
+
+ BL (R12)
+
+ MOVD 8(RSP), R2 // pop structure pointer
+ ADD $16, RSP
+ MOVD R0, 80(R2) // save r1
+ MOVD R1, 88(R2) // save r2
+ CMPW $-1, R0
+ BNE ok
+ SUB $16, RSP // push structure pointer
+ MOVD R2, 8(RSP)
+ BL libc_error(SB)
+ MOVW (R0), R0
+ MOVD 8(RSP), R2 // pop structure pointer
+ ADD $16, RSP
+ MOVD R0, 96(R2) // save err
+ok:
+ RET
+
// syscall_x509 is for crypto/x509. It is like syscall6 but does not check for errors,
// takes 5 uintptrs and 1 float64, and only returns one value,
// for use with standard C ABI functions.
diff --git a/src/runtime/sys_dragonfly_amd64.s b/src/runtime/sys_dragonfly_amd64.s
index 602d5e9..0cf9821 100644
--- a/src/runtime/sys_dragonfly_amd64.s
+++ b/src/runtime/sys_dragonfly_amd64.s
@@ -65,7 +65,7 @@
MOVL $0xf1, 0xf1 // crash
RET
-// func exitThread(wait *uint32)
+// func exitThread(wait *atomic.Uint32)
TEXT runtime·exitThread(SB),NOSPLIT,$0-8
MOVQ wait+0(FP), AX
// We're done using the stack.
diff --git a/src/runtime/sys_freebsd_386.s b/src/runtime/sys_freebsd_386.s
index 9e5210b..4e0bc9b 100644
--- a/src/runtime/sys_freebsd_386.s
+++ b/src/runtime/sys_freebsd_386.s
@@ -10,8 +10,44 @@
#include "go_tls.h"
#include "textflag.h"
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 4
+#define FD_CLOEXEC 1
+#define F_SETFD 2
+
+#define SYS_exit 1
+#define SYS_read 3
+#define SYS_write 4
+#define SYS_open 5
+#define SYS_close 6
+#define SYS_getpid 20
+#define SYS_kill 37
+#define SYS_sigaltstack 53
+#define SYS_munmap 73
+#define SYS_madvise 75
+#define SYS_setitimer 83
+#define SYS_fcntl 92
+#define SYS_sysarch 165
+#define SYS___sysctl 202
+#define SYS_clock_gettime 232
+#define SYS_nanosleep 240
+#define SYS_sched_yield 331
+#define SYS_sigprocmask 340
+#define SYS_kqueue 362
+#define SYS_sigaction 416
+#define SYS_sigreturn 417
+#define SYS_thr_exit 431
+#define SYS_thr_self 432
+#define SYS_thr_kill 433
+#define SYS__umtx_op 454
+#define SYS_thr_new 455
+#define SYS_mmap 477
+#define SYS_cpuset_getaffinity 487
+#define SYS_pipe2 542
+#define SYS_kevent 560
+
TEXT runtime·sys_umtx_op(SB),NOSPLIT,$-4
- MOVL $454, AX
+ MOVL $SYS__umtx_op, AX
INT $0x80
JAE 2(PC)
NEGL AX
@@ -19,7 +55,7 @@
RET
TEXT runtime·thr_new(SB),NOSPLIT,$-4
- MOVL $455, AX
+ MOVL $SYS_thr_new, AX
INT $0x80
JAE 2(PC)
NEGL AX
@@ -54,7 +90,7 @@
// Exit the entire program (like C exit)
TEXT runtime·exit(SB),NOSPLIT,$-4
- MOVL $1, AX
+ MOVL $SYS_exit, AX
INT $0x80
MOVL $0xf1, 0xf1 // crash
RET
@@ -63,7 +99,7 @@
DATA exitStack<>+0x00(SB)/4, $0
DATA exitStack<>+0x04(SB)/4, $0
-// func exitThread(wait *uint32)
+// func exitThread(wait *atomic.Uint32)
TEXT runtime·exitThread(SB),NOSPLIT,$0-4
MOVL wait+0(FP), AX
// We're done using the stack.
@@ -72,13 +108,13 @@
// on the stack. We want to pass 0, so switch over to a fake
// stack of 0s. It won't write to the stack.
MOVL $exitStack<>(SB), SP
- MOVL $431, AX // thr_exit
+ MOVL $SYS_thr_exit, AX
INT $0x80
MOVL $0xf1, 0xf1 // crash
JMP 0(PC)
TEXT runtime·open(SB),NOSPLIT,$-4
- MOVL $5, AX
+ MOVL $SYS_open, AX
INT $0x80
JAE 2(PC)
MOVL $-1, AX
@@ -86,7 +122,7 @@
RET
TEXT runtime·closefd(SB),NOSPLIT,$-4
- MOVL $6, AX
+ MOVL $SYS_close, AX
INT $0x80
JAE 2(PC)
MOVL $-1, AX
@@ -94,7 +130,7 @@
RET
TEXT runtime·read(SB),NOSPLIT,$-4
- MOVL $3, AX
+ MOVL $SYS_read, AX
INT $0x80
JAE 2(PC)
NEGL AX // caller expects negative errno
@@ -103,7 +139,7 @@
// func pipe2(flags int32) (r, w int32, errno int32)
TEXT runtime·pipe2(SB),NOSPLIT,$12-16
- MOVL $542, AX
+ MOVL $SYS_pipe2, AX
LEAL r+4(FP), BX
MOVL BX, 4(SP)
MOVL flags+0(FP), BX
@@ -115,7 +151,7 @@
RET
TEXT runtime·write1(SB),NOSPLIT,$-4
- MOVL $4, AX
+ MOVL $SYS_write, AX
INT $0x80
JAE 2(PC)
NEGL AX // caller expects negative errno
@@ -126,25 +162,25 @@
// thr_self(&0(FP))
LEAL ret+0(FP), AX
MOVL AX, 4(SP)
- MOVL $432, AX
+ MOVL $SYS_thr_self, AX
INT $0x80
RET
TEXT runtime·thr_kill(SB),NOSPLIT,$-4
// thr_kill(tid, sig)
- MOVL $433, AX
+ MOVL $SYS_thr_kill, AX
INT $0x80
RET
TEXT runtime·raiseproc(SB),NOSPLIT,$16
// getpid
- MOVL $20, AX
+ MOVL $SYS_getpid, AX
INT $0x80
// kill(self, sig)
MOVL AX, 4(SP)
MOVL sig+0(FP), AX
MOVL AX, 8(SP)
- MOVL $37, AX
+ MOVL $SYS_kill, AX
INT $0x80
RET
@@ -160,7 +196,7 @@
MOVSL
MOVL $0, AX // top 32 bits of file offset
STOSL
- MOVL $477, AX
+ MOVL $SYS_mmap, AX
INT $0x80
JAE ok
MOVL $0, p+24(FP)
@@ -172,14 +208,14 @@
RET
TEXT runtime·munmap(SB),NOSPLIT,$-4
- MOVL $73, AX
+ MOVL $SYS_munmap, AX
INT $0x80
JAE 2(PC)
MOVL $0xf1, 0xf1 // crash
RET
TEXT runtime·madvise(SB),NOSPLIT,$-4
- MOVL $75, AX // madvise
+ MOVL $SYS_madvise, AX
INT $0x80
JAE 2(PC)
MOVL $-1, AX
@@ -187,15 +223,15 @@
RET
TEXT runtime·setitimer(SB), NOSPLIT, $-4
- MOVL $83, AX
+ MOVL $SYS_setitimer, AX
INT $0x80
RET
// func fallback_walltime() (sec int64, nsec int32)
TEXT runtime·fallback_walltime(SB), NOSPLIT, $32-12
- MOVL $232, AX // clock_gettime
+ MOVL $SYS_clock_gettime, AX
LEAL 12(SP), BX
- MOVL $0, 4(SP) // CLOCK_REALTIME
+ MOVL $CLOCK_REALTIME, 4(SP)
MOVL BX, 8(SP)
INT $0x80
MOVL 12(SP), AX // sec
@@ -209,9 +245,9 @@
// func fallback_nanotime() int64
TEXT runtime·fallback_nanotime(SB), NOSPLIT, $32-8
- MOVL $232, AX
+ MOVL $SYS_clock_gettime, AX
LEAL 12(SP), BX
- MOVL $4, 4(SP) // CLOCK_MONOTONIC
+ MOVL $CLOCK_MONOTONIC, 4(SP)
MOVL BX, 8(SP)
INT $0x80
MOVL 12(SP), AX // sec
@@ -230,7 +266,7 @@
TEXT runtime·asmSigaction(SB),NOSPLIT,$-4
- MOVL $416, AX
+ MOVL $SYS_sigaction, AX
INT $0x80
MOVL AX, ret+12(FP)
RET
@@ -267,13 +303,13 @@
MOVL 24(SP), AX // context
MOVL $0, 0(SP) // syscall gap
MOVL AX, 4(SP)
- MOVL $417, AX // sigreturn(ucontext)
+ MOVL $SYS_sigreturn, AX
INT $0x80
MOVL $0xf1, 0xf1 // crash
RET
TEXT runtime·sigaltstack(SB),NOSPLIT,$0
- MOVL $53, AX
+ MOVL $SYS_sigaltstack, AX
INT $0x80
JAE 2(PC)
MOVL $0xf1, 0xf1 // crash
@@ -293,7 +329,7 @@
LEAL 12(SP), AX
MOVL AX, 4(SP) // arg 1 - rqtp
MOVL $0, 8(SP) // arg 2 - rmtp
- MOVL $240, AX // sys_nanosleep
+ MOVL $SYS_nanosleep, AX
INT $0x80
RET
@@ -352,7 +388,7 @@
MOVL $0, 0(SP) // syscall gap
MOVL $1, 4(SP)
MOVL AX, 8(SP)
- MOVL $165, AX
+ MOVL $SYS_sysarch, AX
INT $0x80
JAE 2(PC)
INT $3
@@ -368,7 +404,7 @@
MOVSL // arg 4 - oldlenp
MOVSL // arg 5 - newp
MOVSL // arg 6 - newlen
- MOVL $202, AX // sys___sysctl
+ MOVL $SYS___sysctl, AX
INT $0x80
JAE 4(PC)
NEGL AX
@@ -379,7 +415,7 @@
RET
TEXT runtime·osyield(SB),NOSPLIT,$-4
- MOVL $331, AX // sys_sched_yield
+ MOVL $SYS_sched_yield, AX
INT $0x80
RET
@@ -391,7 +427,7 @@
MOVL AX, 8(SP) // arg 2 - set
MOVL old+8(FP), AX
MOVL AX, 12(SP) // arg 3 - oset
- MOVL $340, AX // sys_sigprocmask
+ MOVL $SYS_sigprocmask, AX
INT $0x80
JAE 2(PC)
MOVL $0xf1, 0xf1 // crash
@@ -399,7 +435,7 @@
// int32 runtime·kqueue(void);
TEXT runtime·kqueue(SB),NOSPLIT,$0
- MOVL $362, AX
+ MOVL $SYS_kqueue, AX
INT $0x80
JAE 2(PC)
NEGL AX
@@ -408,7 +444,7 @@
// int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout);
TEXT runtime·kevent(SB),NOSPLIT,$0
- MOVL $363, AX
+ MOVL $SYS_kevent, AX
INT $0x80
JAE 2(PC)
NEGL AX
@@ -417,12 +453,12 @@
// int32 runtime·closeonexec(int32 fd);
TEXT runtime·closeonexec(SB),NOSPLIT,$32
- MOVL $92, AX // fcntl
+ MOVL $SYS_fcntl, AX
// 0(SP) is where the caller PC would be; kernel skips it
MOVL fd+0(FP), BX
MOVL BX, 4(SP) // fd
- MOVL $2, 8(SP) // F_SETFD
- MOVL $1, 12(SP) // FD_CLOEXEC
+ MOVL $F_SETFD, 8(SP)
+ MOVL $FD_CLOEXEC, 12(SP)
INT $0x80
JAE 2(PC)
NEGL AX
@@ -430,7 +466,7 @@
// func cpuset_getaffinity(level int, which int, id int64, size int, mask *byte) int32
TEXT runtime·cpuset_getaffinity(SB), NOSPLIT, $0-28
- MOVL $487, AX
+ MOVL $SYS_cpuset_getaffinity, AX
INT $0x80
JAE 2(PC)
NEGL AX
diff --git a/src/runtime/sys_freebsd_amd64.s b/src/runtime/sys_freebsd_amd64.s
index 94341f6..374e0ab 100644
--- a/src/runtime/sys_freebsd_amd64.s
+++ b/src/runtime/sys_freebsd_amd64.s
@@ -11,13 +11,49 @@
#include "textflag.h"
#include "cgo/abi_amd64.h"
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 4
+#define FD_CLOEXEC 1
+#define F_SETFD 2
+#define AMD64_SET_FSBASE 129
+
+#define SYS_exit 1
+#define SYS_read 3
+#define SYS_write 4
+#define SYS_open 5
+#define SYS_close 6
+#define SYS_getpid 20
+#define SYS_kill 37
+#define SYS_sigaltstack 53
+#define SYS_munmap 73
+#define SYS_madvise 75
+#define SYS_setitimer 83
+#define SYS_fcntl 92
+#define SYS_sysarch 165
+#define SYS___sysctl 202
+#define SYS_clock_gettime 232
+#define SYS_nanosleep 240
+#define SYS_sched_yield 331
+#define SYS_sigprocmask 340
+#define SYS_kqueue 362
+#define SYS_sigaction 416
+#define SYS_thr_exit 431
+#define SYS_thr_self 432
+#define SYS_thr_kill 433
+#define SYS__umtx_op 454
+#define SYS_thr_new 455
+#define SYS_mmap 477
+#define SYS_cpuset_getaffinity 487
+#define SYS_pipe2 542
+#define SYS_kevent 560
+
TEXT runtime·sys_umtx_op(SB),NOSPLIT,$0
MOVQ addr+0(FP), DI
MOVL mode+8(FP), SI
MOVL val+12(FP), DX
MOVQ uaddr1+16(FP), R10
MOVQ ut+24(FP), R8
- MOVL $454, AX
+ MOVL $SYS__umtx_op, AX
SYSCALL
JCC 2(PC)
NEGQ AX
@@ -27,7 +63,7 @@
TEXT runtime·thr_new(SB),NOSPLIT,$0
MOVQ param+0(FP), DI
MOVL size+8(FP), SI
- MOVL $455, AX
+ MOVL $SYS_thr_new, AX
SYSCALL
JCC 2(PC)
NEGQ AX
@@ -55,18 +91,18 @@
// Exit the entire program (like C exit)
TEXT runtime·exit(SB),NOSPLIT,$-8
MOVL code+0(FP), DI // arg 1 exit status
- MOVL $1, AX
+ MOVL $SYS_exit, AX
SYSCALL
MOVL $0xf1, 0xf1 // crash
RET
-// func exitThread(wait *uint32)
+// func exitThread(wait *atomic.uint32)
TEXT runtime·exitThread(SB),NOSPLIT,$0-8
MOVQ wait+0(FP), AX
// We're done using the stack.
MOVL $0, (AX)
MOVL $0, DI // arg 1 long *state
- MOVL $431, AX // thr_exit
+ MOVL $SYS_thr_exit, AX
SYSCALL
MOVL $0xf1, 0xf1 // crash
JMP 0(PC)
@@ -75,7 +111,7 @@
MOVQ name+0(FP), DI // arg 1 pathname
MOVL mode+8(FP), SI // arg 2 flags
MOVL perm+12(FP), DX // arg 3 mode
- MOVL $5, AX
+ MOVL $SYS_open, AX
SYSCALL
JCC 2(PC)
MOVL $-1, AX
@@ -84,7 +120,7 @@
TEXT runtime·closefd(SB),NOSPLIT,$-8
MOVL fd+0(FP), DI // arg 1 fd
- MOVL $6, AX
+ MOVL $SYS_close, AX
SYSCALL
JCC 2(PC)
MOVL $-1, AX
@@ -95,7 +131,7 @@
MOVL fd+0(FP), DI // arg 1 fd
MOVQ p+8(FP), SI // arg 2 buf
MOVL n+16(FP), DX // arg 3 count
- MOVL $3, AX
+ MOVL $SYS_read, AX
SYSCALL
JCC 2(PC)
NEGQ AX // caller expects negative errno
@@ -106,7 +142,7 @@
TEXT runtime·pipe2(SB),NOSPLIT,$0-20
LEAQ r+8(FP), DI
MOVL flags+0(FP), SI
- MOVL $542, AX
+ MOVL $SYS_pipe2, AX
SYSCALL
JCC 2(PC)
NEGQ AX
@@ -117,7 +153,7 @@
MOVQ fd+0(FP), DI // arg 1 fd
MOVQ p+8(FP), SI // arg 2 buf
MOVL n+16(FP), DX // arg 3 count
- MOVL $4, AX
+ MOVL $SYS_write, AX
SYSCALL
JCC 2(PC)
NEGQ AX // caller expects negative errno
@@ -127,7 +163,7 @@
TEXT runtime·thr_self(SB),NOSPLIT,$0-8
// thr_self(&0(FP))
LEAQ ret+0(FP), DI // arg 1
- MOVL $432, AX
+ MOVL $SYS_thr_self, AX
SYSCALL
RET
@@ -135,18 +171,18 @@
// thr_kill(tid, sig)
MOVQ tid+0(FP), DI // arg 1 id
MOVQ sig+8(FP), SI // arg 2 sig
- MOVL $433, AX
+ MOVL $SYS_thr_kill, AX
SYSCALL
RET
TEXT runtime·raiseproc(SB),NOSPLIT,$0
// getpid
- MOVL $20, AX
+ MOVL $SYS_getpid, AX
SYSCALL
// kill(self, sig)
MOVQ AX, DI // arg 1 pid
MOVL sig+0(FP), SI // arg 2 sig
- MOVL $37, AX
+ MOVL $SYS_kill, AX
SYSCALL
RET
@@ -154,14 +190,14 @@
MOVL mode+0(FP), DI
MOVQ new+8(FP), SI
MOVQ old+16(FP), DX
- MOVL $83, AX
+ MOVL $SYS_setitimer, AX
SYSCALL
RET
// func fallback_walltime() (sec int64, nsec int32)
TEXT runtime·fallback_walltime(SB), NOSPLIT, $32-12
- MOVL $232, AX // clock_gettime
- MOVQ $0, DI // CLOCK_REALTIME
+ MOVL $SYS_clock_gettime, AX
+ MOVQ $CLOCK_REALTIME, DI
LEAQ 8(SP), SI
SYSCALL
MOVQ 8(SP), AX // sec
@@ -173,8 +209,8 @@
RET
TEXT runtime·fallback_nanotime(SB), NOSPLIT, $32-8
- MOVL $232, AX
- MOVQ $4, DI // CLOCK_MONOTONIC
+ MOVL $SYS_clock_gettime, AX
+ MOVQ $CLOCK_MONOTONIC, DI
LEAQ 8(SP), SI
SYSCALL
MOVQ 8(SP), AX // sec
@@ -191,7 +227,7 @@
MOVQ sig+0(FP), DI // arg 1 sig
MOVQ new+8(FP), SI // arg 2 act
MOVQ old+16(FP), DX // arg 3 oact
- MOVL $416, AX
+ MOVL $SYS_sigaction, AX
SYSCALL
JCC 2(PC)
MOVL $-1, AX
@@ -349,14 +385,14 @@
MOVQ _cgo_callers(SB), AX
JMP AX
-TEXT runtime·mmap(SB),NOSPLIT,$0
+TEXT runtime·sysMmap(SB),NOSPLIT,$0
MOVQ addr+0(FP), DI // arg 1 addr
MOVQ n+8(FP), SI // arg 2 len
MOVL prot+16(FP), DX // arg 3 prot
MOVL flags+20(FP), R10 // arg 4 flags
MOVL fd+24(FP), R8 // arg 5 fid
MOVL off+28(FP), R9 // arg 6 offset
- MOVL $477, AX
+ MOVL $SYS_mmap, AX
SYSCALL
JCC ok
MOVQ $0, p+32(FP)
@@ -367,20 +403,51 @@
MOVQ $0, err+40(FP)
RET
-TEXT runtime·munmap(SB),NOSPLIT,$0
+// Call the function stored in _cgo_mmap using the GCC calling convention.
+// This must be called on the system stack.
+TEXT runtime·callCgoMmap(SB),NOSPLIT,$16
+ MOVQ addr+0(FP), DI
+ MOVQ n+8(FP), SI
+ MOVL prot+16(FP), DX
+ MOVL flags+20(FP), CX
+ MOVL fd+24(FP), R8
+ MOVL off+28(FP), R9
+ MOVQ _cgo_mmap(SB), AX
+ MOVQ SP, BX
+ ANDQ $~15, SP // alignment as per amd64 psABI
+ MOVQ BX, 0(SP)
+ CALL AX
+ MOVQ 0(SP), SP
+ MOVQ AX, ret+32(FP)
+ RET
+
+TEXT runtime·sysMunmap(SB),NOSPLIT,$0
MOVQ addr+0(FP), DI // arg 1 addr
MOVQ n+8(FP), SI // arg 2 len
- MOVL $73, AX
+ MOVL $SYS_munmap, AX
SYSCALL
JCC 2(PC)
MOVL $0xf1, 0xf1 // crash
RET
+// Call the function stored in _cgo_munmap using the GCC calling convention.
+// This must be called on the system stack.
+TEXT runtime·callCgoMunmap(SB),NOSPLIT,$16-16
+ MOVQ addr+0(FP), DI
+ MOVQ n+8(FP), SI
+ MOVQ _cgo_munmap(SB), AX
+ MOVQ SP, BX
+ ANDQ $~15, SP // alignment as per amd64 psABI
+ MOVQ BX, 0(SP)
+ CALL AX
+ MOVQ 0(SP), SP
+ RET
+
TEXT runtime·madvise(SB),NOSPLIT,$0
MOVQ addr+0(FP), DI
MOVQ n+8(FP), SI
MOVL flags+16(FP), DX
- MOVQ $75, AX // madvise
+ MOVQ $SYS_madvise, AX
SYSCALL
JCC 2(PC)
MOVL $-1, AX
@@ -390,7 +457,7 @@
TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
MOVQ new+0(FP), DI
MOVQ old+8(FP), SI
- MOVQ $53, AX
+ MOVQ $SYS_sigaltstack, AX
SYSCALL
JCC 2(PC)
MOVL $0xf1, 0xf1 // crash
@@ -408,7 +475,7 @@
MOVQ SP, DI // arg 1 - rqtp
MOVQ $0, SI // arg 2 - rmtp
- MOVL $240, AX // sys_nanosleep
+ MOVL $SYS_nanosleep, AX
SYSCALL
RET
@@ -417,8 +484,8 @@
ADDQ $8, DI // adjust for ELF: wants to use -8(FS) for g and m
MOVQ DI, 0(SP)
MOVQ SP, SI
- MOVQ $129, DI // AMD64_SET_FSBASE
- MOVQ $165, AX // sysarch
+ MOVQ $AMD64_SET_FSBASE, DI
+ MOVQ $SYS_sysarch, AX
SYSCALL
JCC 2(PC)
MOVL $0xf1, 0xf1 // crash
@@ -431,7 +498,7 @@
MOVQ size+24(FP), R10 // arg 4 - oldlenp
MOVQ dst+32(FP), R8 // arg 5 - newp
MOVQ ndst+40(FP), R9 // arg 6 - newlen
- MOVQ $202, AX // sys___sysctl
+ MOVQ $SYS___sysctl, AX
SYSCALL
JCC 4(PC)
NEGQ AX
@@ -442,7 +509,7 @@
RET
TEXT runtime·osyield(SB),NOSPLIT,$-4
- MOVL $331, AX // sys_sched_yield
+ MOVL $SYS_sched_yield, AX
SYSCALL
RET
@@ -450,7 +517,7 @@
MOVL how+0(FP), DI // arg 1 - how
MOVQ new+8(FP), SI // arg 2 - set
MOVQ old+16(FP), DX // arg 3 - oset
- MOVL $340, AX // sys_sigprocmask
+ MOVL $SYS_sigprocmask, AX
SYSCALL
JAE 2(PC)
MOVL $0xf1, 0xf1 // crash
@@ -461,7 +528,7 @@
MOVQ $0, DI
MOVQ $0, SI
MOVQ $0, DX
- MOVL $362, AX
+ MOVL $SYS_kqueue, AX
SYSCALL
JCC 2(PC)
NEGQ AX
@@ -476,7 +543,7 @@
MOVQ ev+24(FP), R10
MOVL nev+32(FP), R8
MOVQ ts+40(FP), R9
- MOVL $363, AX
+ MOVL $SYS_kevent, AX
SYSCALL
JCC 2(PC)
NEGQ AX
@@ -486,9 +553,9 @@
// void runtime·closeonexec(int32 fd);
TEXT runtime·closeonexec(SB),NOSPLIT,$0
MOVL fd+0(FP), DI // fd
- MOVQ $2, SI // F_SETFD
- MOVQ $1, DX // FD_CLOEXEC
- MOVL $92, AX // fcntl
+ MOVQ $F_SETFD, SI
+ MOVQ $FD_CLOEXEC, DX
+ MOVL $SYS_fcntl, AX
SYSCALL
RET
@@ -499,7 +566,7 @@
MOVQ id+16(FP), DX
MOVQ size+24(FP), R10
MOVQ mask+32(FP), R8
- MOVL $487, AX
+ MOVL $SYS_cpuset_getaffinity, AX
SYSCALL
JCC 2(PC)
NEGQ AX
diff --git a/src/runtime/sys_freebsd_arm.s b/src/runtime/sys_freebsd_arm.s
index cbee34d..a3fee14 100644
--- a/src/runtime/sys_freebsd_arm.s
+++ b/src/runtime/sys_freebsd_arm.s
@@ -31,7 +31,6 @@
#define SYS_sched_yield (SYS_BASE + 331)
#define SYS_sigprocmask (SYS_BASE + 340)
#define SYS_kqueue (SYS_BASE + 362)
-#define SYS_kevent (SYS_BASE + 363)
#define SYS_sigaction (SYS_BASE + 416)
#define SYS_thr_exit (SYS_BASE + 431)
#define SYS_thr_self (SYS_BASE + 432)
@@ -41,6 +40,7 @@
#define SYS_mmap (SYS_BASE + 477)
#define SYS_cpuset_getaffinity (SYS_BASE + 487)
#define SYS_pipe2 (SYS_BASE + 542)
+#define SYS_kevent (SYS_BASE + 560)
TEXT runtime·sys_umtx_op(SB),NOSPLIT,$0
MOVW addr+0(FP), R0
@@ -85,7 +85,7 @@
MOVW.CS R8, (R8)
RET
-// func exitThread(wait *uint32)
+// func exitThread(wait *atomic.Uint32)
TEXT runtime·exitThread(SB),NOSPLIT,$0-4
MOVW wait+0(FP), R0
// We're done using the stack.
diff --git a/src/runtime/sys_freebsd_arm64.s b/src/runtime/sys_freebsd_arm64.s
index 5dcdf37..29866cb 100644
--- a/src/runtime/sys_freebsd_arm64.s
+++ b/src/runtime/sys_freebsd_arm64.s
@@ -38,7 +38,6 @@
#define SYS_sched_yield 331
#define SYS_sigprocmask 340
#define SYS_kqueue 362
-#define SYS_kevent 363
#define SYS_sigaction 416
#define SYS_thr_exit 431
#define SYS_thr_self 432
@@ -48,6 +47,7 @@
#define SYS_mmap 477
#define SYS_cpuset_getaffinity 487
#define SYS_pipe2 542
+#define SYS_kevent 560
TEXT emptyfunc<>(SB),0,$0-0
RET
@@ -99,7 +99,7 @@
MOVD $0, R0
MOVD R0, (R0)
-// func exitThread(wait *uint32)
+// func exitThread(wait *atomic.Uint32)
TEXT runtime·exitThread(SB),NOSPLIT|NOFRAME,$0-8
MOVD wait+0(FP), R0
// We're done using the stack.
@@ -460,7 +460,7 @@
BEQ 3(PC)
// get CNTPCT (Physical Count Register) into R0
- MRS CNTPCT_EL0, R0 // SIGILL
+ MRS CNTPCT_EL0, R0
B 2(PC)
// get CNTVCT (Virtual Count Register) into R0
diff --git a/src/runtime/sys_freebsd_riscv64.s b/src/runtime/sys_freebsd_riscv64.s
new file mode 100644
index 0000000..30deed2
--- /dev/null
+++ b/src/runtime/sys_freebsd_riscv64.s
@@ -0,0 +1,436 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// System calls and other sys.stuff for riscv64, FreeBSD
+// /usr/src/sys/kern/syscalls.master for syscall numbers.
+//
+
+#include "go_asm.h"
+#include "go_tls.h"
+#include "textflag.h"
+
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 4
+#define FD_CLOEXEC 1
+#define F_SETFD 2
+#define F_GETFL 3
+#define F_SETFL 4
+#define O_NONBLOCK 4
+
+#define SYS_exit 1
+#define SYS_read 3
+#define SYS_write 4
+#define SYS_open 5
+#define SYS_close 6
+#define SYS_getpid 20
+#define SYS_kill 37
+#define SYS_sigaltstack 53
+#define SYS_munmap 73
+#define SYS_madvise 75
+#define SYS_setitimer 83
+#define SYS_fcntl 92
+#define SYS___sysctl 202
+#define SYS_nanosleep 240
+#define SYS_clock_gettime 232
+#define SYS_sched_yield 331
+#define SYS_sigprocmask 340
+#define SYS_kqueue 362
+#define SYS_sigaction 416
+#define SYS_thr_exit 431
+#define SYS_thr_self 432
+#define SYS_thr_kill 433
+#define SYS__umtx_op 454
+#define SYS_thr_new 455
+#define SYS_mmap 477
+#define SYS_cpuset_getaffinity 487
+#define SYS_pipe2 542
+#define SYS_kevent 560
+
+TEXT emptyfunc<>(SB),0,$0-0
+ RET
+
+// func sys_umtx_op(addr *uint32, mode int32, val uint32, uaddr1 uintptr, ut *umtx_time) int32
+TEXT runtime·sys_umtx_op(SB),NOSPLIT,$0
+ MOV addr+0(FP), A0
+ MOVW mode+8(FP), A1
+ MOVW val+12(FP), A2
+ MOV uaddr1+16(FP), A3
+ MOV ut+24(FP), A4
+ MOV $SYS__umtx_op, T0
+ ECALL
+ BEQ T0, ZERO, ok
+ NEG A0, A0
+ok:
+ MOVW A0, ret+32(FP)
+ RET
+
+// func thr_new(param *thrparam, size int32) int32
+TEXT runtime·thr_new(SB),NOSPLIT,$0
+ MOV param+0(FP), A0
+ MOVW size+8(FP), A1
+ MOV $SYS_thr_new, T0
+ ECALL
+ BEQ T0, ZERO, ok
+ NEG A0, A0
+ok:
+ MOVW A0, ret+16(FP)
+ RET
+
+// func thr_start()
+TEXT runtime·thr_start(SB),NOSPLIT,$0
+ // set up g
+ MOV m_g0(A0), g
+ MOV A0, g_m(g)
+ CALL emptyfunc<>(SB) // fault if stack check is wrong
+ CALL runtime·mstart(SB)
+
+ WORD $0 // crash
+ RET
+
+// func exit(code int32)
+TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0-4
+ MOVW code+0(FP), A0
+ MOV $SYS_exit, T0
+ ECALL
+ WORD $0 // crash
+
+// func exitThread(wait *atomic.Uint32)
+TEXT runtime·exitThread(SB),NOSPLIT|NOFRAME,$0-8
+ MOV wait+0(FP), A0
+ // We're done using the stack.
+ FENCE
+ MOVW ZERO, (A0)
+ FENCE
+ MOV $0, A0 // exit code
+ MOV $SYS_thr_exit, T0
+ ECALL
+ JMP 0(PC)
+
+// func open(name *byte, mode, perm int32) int32
+TEXT runtime·open(SB),NOSPLIT|NOFRAME,$0-20
+ MOV name+0(FP), A0
+ MOVW mode+8(FP), A1
+ MOVW perm+12(FP), A2
+ MOV $SYS_open, T0
+ ECALL
+ BEQ T0, ZERO, ok
+ MOV $-1, A0
+ok:
+ MOVW A0, ret+16(FP)
+ RET
+
+// func closefd(fd int32) int32
+TEXT runtime·closefd(SB),NOSPLIT|NOFRAME,$0-12
+ MOVW fd+0(FP), A0
+ MOV $SYS_close, T0
+ ECALL
+ BEQ T0, ZERO, ok
+ MOV $-1, A0
+ok:
+ MOVW A0, ret+8(FP)
+ RET
+
+// func pipe2(flags int32) (r, w int32, errno int32)
+TEXT runtime·pipe2(SB),NOSPLIT|NOFRAME,$0-20
+ MOV $r+8(FP), A0
+ MOVW flags+0(FP), A1
+ MOV $SYS_pipe2, T0
+ ECALL
+ BEQ T0, ZERO, ok
+ NEG A0, A0
+ok:
+ MOVW A0, errno+16(FP)
+ RET
+
+// func write1(fd uintptr, p unsafe.Pointer, n int32) int32
+TEXT runtime·write1(SB),NOSPLIT|NOFRAME,$0-28
+ MOV fd+0(FP), A0
+ MOV p+8(FP), A1
+ MOVW n+16(FP), A2
+ MOV $SYS_write, T0
+ ECALL
+ BEQ T0, ZERO, ok
+ NEG A0, A0
+ok:
+ MOVW A0, ret+24(FP)
+ RET
+
+// func read(fd int32, p unsafe.Pointer, n int32) int32
+TEXT runtime·read(SB),NOSPLIT|NOFRAME,$0-28
+ MOVW fd+0(FP), A0
+ MOV p+8(FP), A1
+ MOVW n+16(FP), A2
+ MOV $SYS_read, T0
+ ECALL
+ BEQ T0, ZERO, ok
+ NEG A0, A0
+ok:
+ MOVW A0, ret+24(FP)
+ RET
+
+// func usleep(usec uint32)
+TEXT runtime·usleep(SB),NOSPLIT,$24-4
+ MOVWU usec+0(FP), A0
+ MOV $1000, A1
+ MUL A1, A0, A0
+ MOV $1000000000, A1
+ DIV A1, A0, A2
+ MOV A2, 8(X2)
+ REM A1, A0, A3
+ MOV A3, 16(X2)
+ ADD $8, X2, A0
+ MOV ZERO, A1
+ MOV $SYS_nanosleep, T0
+ ECALL
+ RET
+
+// func thr_self() thread
+TEXT runtime·thr_self(SB),NOSPLIT,$8-8
+ MOV $ptr-8(SP), A0 // arg 1 &8(SP)
+ MOV $SYS_thr_self, T0
+ ECALL
+ MOV ptr-8(SP), A0
+ MOV A0, ret+0(FP)
+ RET
+
+// func thr_kill(t thread, sig int)
+TEXT runtime·thr_kill(SB),NOSPLIT,$0-16
+ MOV tid+0(FP), A0 // arg 1 pid
+ MOV sig+8(FP), A1 // arg 2 sig
+ MOV $SYS_thr_kill, T0
+ ECALL
+ RET
+
+// func raiseproc(sig uint32)
+TEXT runtime·raiseproc(SB),NOSPLIT|NOFRAME,$0
+ MOV $SYS_getpid, T0
+ ECALL
+ // arg 1 pid - already in A0
+ MOVW sig+0(FP), A1 // arg 2
+ MOV $SYS_kill, T0
+ ECALL
+ RET
+
+// func setitimer(mode int32, new, old *itimerval)
+TEXT runtime·setitimer(SB),NOSPLIT|NOFRAME,$0-24
+ MOVW mode+0(FP), A0
+ MOV new+8(FP), A1
+ MOV old+16(FP), A2
+ MOV $SYS_setitimer, T0
+ ECALL
+ RET
+
+// func fallback_walltime() (sec int64, nsec int32)
+TEXT runtime·fallback_walltime(SB),NOSPLIT,$24-12
+ MOV $CLOCK_REALTIME, A0
+ MOV $8(X2), A1
+ MOV $SYS_clock_gettime, T0
+ ECALL
+ MOV 8(X2), T0 // sec
+ MOVW 16(X2), T1 // nsec
+ MOV T0, sec+0(FP)
+ MOVW T1, nsec+8(FP)
+ RET
+
+// func fallback_nanotime() int64
+TEXT runtime·fallback_nanotime(SB),NOSPLIT,$24-8
+ MOV $CLOCK_MONOTONIC, A0
+ MOV $8(X2), A1
+ MOV $SYS_clock_gettime, T0
+ ECALL
+ MOV 8(X2), T0 // sec
+ MOV 16(X2), T1 // nsec
+
+ // sec is in T0, nsec in T1
+ // return nsec in T0
+ MOV $1000000000, T2
+ MUL T2, T0
+ ADD T1, T0
+
+ MOV T0, ret+0(FP)
+ RET
+
+// func asmSigaction(sig uintptr, new, old *sigactiont) int32
+TEXT runtime·asmSigaction(SB),NOSPLIT|NOFRAME,$0
+ MOV sig+0(FP), A0 // arg 1 sig
+ MOV new+8(FP), A1 // arg 2 act
+ MOV old+16(FP), A2 // arg 3 oact
+ MOV $SYS_sigaction, T0
+ ECALL
+ BEQ T0, ZERO, ok
+ MOV $-1, A0
+ok:
+ MOVW A0, ret+24(FP)
+ RET
+
+// func sigfwd(fn uintptr, sig uint32, info *siginfo, ctx unsafe.Pointer)
+TEXT runtime·sigfwd(SB),NOSPLIT,$0-32
+ MOVW sig+8(FP), A0
+ MOV info+16(FP), A1
+ MOV ctx+24(FP), A2
+ MOV fn+0(FP), T1
+ JALR RA, T1
+ RET
+
+// func sigtramp(signo, ureg, ctxt unsafe.Pointer)
+TEXT runtime·sigtramp(SB),NOSPLIT,$64
+ MOVW A0, 8(X2)
+ MOV A1, 16(X2)
+ MOV A2, 24(X2)
+
+ // this might be called in external code context,
+ // where g is not set.
+ MOVBU runtime·iscgo(SB), A0
+ BEQ A0, ZERO, ok
+ CALL runtime·load_g(SB)
+ok:
+ MOV $runtime·sigtrampgo(SB), A0
+ JALR RA, A0
+ RET
+
+// func mmap(addr uintptr, n uintptr, prot int, flags int, fd int, off int64) (ret uintptr, err error)
+TEXT runtime·mmap(SB),NOSPLIT|NOFRAME,$0
+ MOV addr+0(FP), A0
+ MOV n+8(FP), A1
+ MOVW prot+16(FP), A2
+ MOVW flags+20(FP), A3
+ MOVW fd+24(FP), A4
+ MOVW off+28(FP), A5
+ MOV $SYS_mmap, T0
+ ECALL
+ BNE T0, ZERO, fail
+ MOV A0, p+32(FP)
+ MOV ZERO, err+40(FP)
+ RET
+fail:
+ MOV ZERO, p+32(FP)
+ MOV A0, err+40(FP)
+ RET
+
+// func munmap(addr uintptr, n uintptr) (err error)
+TEXT runtime·munmap(SB),NOSPLIT|NOFRAME,$0
+ MOV addr+0(FP), A0
+ MOV n+8(FP), A1
+ MOV $SYS_munmap, T0
+ ECALL
+ BNE T0, ZERO, fail
+ RET
+fail:
+ WORD $0 // crash
+
+// func madvise(addr unsafe.Pointer, n uintptr, flags int32) int32
+TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
+ MOV addr+0(FP), A0
+ MOV n+8(FP), A1
+ MOVW flags+16(FP), A2
+ MOV $SYS_madvise, T0
+ ECALL
+ BEQ T0, ZERO, ok
+ MOV $-1, A0
+ok:
+ MOVW A0, ret+24(FP)
+ RET
+
+// func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
+TEXT runtime·sysctl(SB),NOSPLIT,$0
+ MOV mib+0(FP), A0
+ MOV miblen+8(FP), A1
+ MOV out+16(FP), A2
+ MOV size+24(FP), A3
+ MOV dst+32(FP), A4
+ MOV ndst+40(FP), A5
+ MOV $SYS___sysctl, T0
+ ECALL
+ BEQ T0, ZERO, ok
+ NEG A0, A0
+ok:
+ MOVW A0, ret+48(FP)
+ RET
+
+// func sigaltstack(new, old *stackt)
+TEXT runtime·sigaltstack(SB),NOSPLIT|NOFRAME,$0
+ MOV new+0(FP), A0
+ MOV old+8(FP), A1
+ MOV $SYS_sigaltstack, T0
+ ECALL
+ BNE T0, ZERO, fail
+ RET
+fail:
+ WORD $0 // crash
+
+// func osyield()
+TEXT runtime·osyield(SB),NOSPLIT|NOFRAME,$0
+ MOV $SYS_sched_yield, T0
+ ECALL
+ RET
+
+// func sigprocmask(how int32, new, old *sigset)
+TEXT runtime·sigprocmask(SB),NOSPLIT|NOFRAME,$0-24
+ MOVW how+0(FP), A0
+ MOV new+8(FP), A1
+ MOV old+16(FP), A2
+ MOV $SYS_sigprocmask, T0
+ ECALL
+ BNE T0, ZERO, fail
+ RET
+fail:
+ WORD $0 // crash
+
+
+// func cpuset_getaffinity(level int, which int, id int64, size int, mask *byte) int32
+TEXT runtime·cpuset_getaffinity(SB),NOSPLIT|NOFRAME,$0-44
+ MOV level+0(FP), A0
+ MOV which+8(FP), A1
+ MOV id+16(FP), A2
+ MOV size+24(FP), A3
+ MOV mask+32(FP), A4
+ MOV $SYS_cpuset_getaffinity, T0
+ ECALL
+ BEQ T0, ZERO, ok
+ MOV $-1, A0
+ok:
+ MOVW A0, ret+40(FP)
+ RET
+
+// func kqueue() int32
+TEXT runtime·kqueue(SB),NOSPLIT|NOFRAME,$0
+ MOV $SYS_kqueue, T0
+ ECALL
+ BEQ T0, ZERO, ok
+ MOV $-1, A0
+ok:
+ MOVW A0, ret+0(FP)
+ RET
+
+// func kevent(kq int, ch unsafe.Pointer, nch int, ev unsafe.Pointer, nev int, ts *Timespec) (n int, err error)
+TEXT runtime·kevent(SB),NOSPLIT,$0
+ MOVW kq+0(FP), A0
+ MOV ch+8(FP), A1
+ MOVW nch+16(FP), A2
+ MOV ev+24(FP), A3
+ MOVW nev+32(FP), A4
+ MOV ts+40(FP), A5
+ MOV $SYS_kevent, T0
+ ECALL
+ BEQ T0, ZERO, ok
+ NEG A0, A0
+ok:
+ MOVW A0, ret+48(FP)
+ RET
+
+// func closeonexec(fd int32)
+TEXT runtime·closeonexec(SB),NOSPLIT|NOFRAME,$0
+ MOVW fd+0(FP), A0
+ MOV $F_SETFD, A1
+ MOV $FD_CLOEXEC, A2
+ MOV $SYS_fcntl, T0
+ ECALL
+ RET
+
+// func getCntxct() uint32
+TEXT runtime·getCntxct(SB),NOSPLIT|NOFRAME,$0
+ RDTIME A0
+ MOVW A0, ret+0(FP)
+ RET
diff --git a/src/runtime/sys_linux_386.s b/src/runtime/sys_linux_386.s
index 4942f21..12a2941 100644
--- a/src/runtime/sys_linux_386.s
+++ b/src/runtime/sys_linux_386.s
@@ -33,7 +33,6 @@
#define SYS_access 33
#define SYS_kill 37
#define SYS_brk 45
-#define SYS_fcntl 55
#define SYS_munmap 91
#define SYS_socketcall 102
#define SYS_setittimer 104
@@ -52,15 +51,11 @@
#define SYS_sched_getaffinity 242
#define SYS_set_thread_area 243
#define SYS_exit_group 252
-#define SYS_epoll_create 254
-#define SYS_epoll_ctl 255
-#define SYS_epoll_wait 256
#define SYS_timer_create 259
#define SYS_timer_settime 260
#define SYS_timer_delete 263
#define SYS_clock_gettime 265
#define SYS_tgkill 270
-#define SYS_epoll_create1 329
#define SYS_pipe2 331
TEXT runtime·exit(SB),NOSPLIT,$0
@@ -77,7 +72,7 @@
INT $3 // not reached
RET
-// func exitThread(wait *uint32)
+// func exitThread(wait *atomic.Uint32)
TEXT runtime·exitThread(SB),NOSPLIT,$0-4
MOVL wait+0(FP), AX
// We're done using the stack.
@@ -726,53 +721,6 @@
MOVL AX, ret+12(FP)
RET
-// int32 runtime·epollcreate(int32 size);
-TEXT runtime·epollcreate(SB),NOSPLIT,$0
- MOVL $SYS_epoll_create, AX
- MOVL size+0(FP), BX
- INVOKE_SYSCALL
- MOVL AX, ret+4(FP)
- RET
-
-// int32 runtime·epollcreate1(int32 flags);
-TEXT runtime·epollcreate1(SB),NOSPLIT,$0
- MOVL $SYS_epoll_create1, AX
- MOVL flags+0(FP), BX
- INVOKE_SYSCALL
- MOVL AX, ret+4(FP)
- RET
-
-// func epollctl(epfd, op, fd int32, ev *epollEvent) int
-TEXT runtime·epollctl(SB),NOSPLIT,$0
- MOVL $SYS_epoll_ctl, AX
- MOVL epfd+0(FP), BX
- MOVL op+4(FP), CX
- MOVL fd+8(FP), DX
- MOVL ev+12(FP), SI
- INVOKE_SYSCALL
- MOVL AX, ret+16(FP)
- RET
-
-// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout);
-TEXT runtime·epollwait(SB),NOSPLIT,$0
- MOVL $SYS_epoll_wait, AX
- MOVL epfd+0(FP), BX
- MOVL ev+4(FP), CX
- MOVL nev+8(FP), DX
- MOVL timeout+12(FP), SI
- INVOKE_SYSCALL
- MOVL AX, ret+16(FP)
- RET
-
-// void runtime·closeonexec(int32 fd);
-TEXT runtime·closeonexec(SB),NOSPLIT,$0
- MOVL $SYS_fcntl, AX
- MOVL fd+0(FP), BX // fd
- MOVL $2, CX // F_SETFD
- MOVL $1, DX // FD_CLOEXEC
- INVOKE_SYSCALL
- RET
-
// int access(const char *name, int mode)
TEXT runtime·access(SB),NOSPLIT,$0
MOVL $SYS_access, AX
diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s
index ca6ecb1..c7a89ba 100644
--- a/src/runtime/sys_linux_amd64.s
+++ b/src/runtime/sys_linux_amd64.s
@@ -33,24 +33,19 @@
#define SYS_clone 56
#define SYS_exit 60
#define SYS_kill 62
-#define SYS_fcntl 72
#define SYS_sigaltstack 131
#define SYS_arch_prctl 158
#define SYS_gettid 186
#define SYS_futex 202
#define SYS_sched_getaffinity 204
-#define SYS_epoll_create 213
#define SYS_timer_create 222
#define SYS_timer_settime 223
#define SYS_timer_delete 226
#define SYS_clock_gettime 228
#define SYS_exit_group 231
-#define SYS_epoll_ctl 233
#define SYS_tgkill 234
#define SYS_openat 257
#define SYS_faccessat 269
-#define SYS_epoll_pwait 281
-#define SYS_epoll_create1 291
#define SYS_pipe2 293
TEXT runtime·exit(SB),NOSPLIT,$0-4
@@ -59,7 +54,7 @@
SYSCALL
RET
-// func exitThread(wait *uint32)
+// func exitThread(wait *atomic.Uint32)
TEXT runtime·exitThread(SB),NOSPLIT,$0-8
MOVQ wait+0(FP), AX
// We're done using the stack.
@@ -666,55 +661,6 @@
MOVL AX, ret+24(FP)
RET
-// int32 runtime·epollcreate(int32 size);
-TEXT runtime·epollcreate(SB),NOSPLIT,$0
- MOVL size+0(FP), DI
- MOVL $SYS_epoll_create, AX
- SYSCALL
- MOVL AX, ret+8(FP)
- RET
-
-// int32 runtime·epollcreate1(int32 flags);
-TEXT runtime·epollcreate1(SB),NOSPLIT,$0
- MOVL flags+0(FP), DI
- MOVL $SYS_epoll_create1, AX
- SYSCALL
- MOVL AX, ret+8(FP)
- RET
-
-// func epollctl(epfd, op, fd int32, ev *epollEvent) int
-TEXT runtime·epollctl(SB),NOSPLIT,$0
- MOVL epfd+0(FP), DI
- MOVL op+4(FP), SI
- MOVL fd+8(FP), DX
- MOVQ ev+16(FP), R10
- MOVL $SYS_epoll_ctl, AX
- SYSCALL
- MOVL AX, ret+24(FP)
- RET
-
-// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout);
-TEXT runtime·epollwait(SB),NOSPLIT,$0
- // This uses pwait instead of wait, because Android O blocks wait.
- MOVL epfd+0(FP), DI
- MOVQ ev+8(FP), SI
- MOVL nev+16(FP), DX
- MOVL timeout+20(FP), R10
- MOVQ $0, R8
- MOVL $SYS_epoll_pwait, AX
- SYSCALL
- MOVL AX, ret+24(FP)
- RET
-
-// void runtime·closeonexec(int32 fd);
-TEXT runtime·closeonexec(SB),NOSPLIT,$0
- MOVL fd+0(FP), DI // fd
- MOVQ $2, SI // F_SETFD
- MOVQ $1, DX // FD_CLOEXEC
- MOVL $SYS_fcntl, AX
- SYSCALL
- RET
-
// int access(const char *name, int mode)
TEXT runtime·access(SB),NOSPLIT,$0
// This uses faccessat instead of access, because Android O blocks access.
diff --git a/src/runtime/sys_linux_arm.s b/src/runtime/sys_linux_arm.s
index 66bf403..7b8c4f0 100644
--- a/src/runtime/sys_linux_arm.s
+++ b/src/runtime/sys_linux_arm.s
@@ -41,15 +41,10 @@
#define SYS_nanosleep (SYS_BASE + 162)
#define SYS_sched_getaffinity (SYS_BASE + 242)
#define SYS_clock_gettime (SYS_BASE + 263)
-#define SYS_epoll_create (SYS_BASE + 250)
-#define SYS_epoll_ctl (SYS_BASE + 251)
-#define SYS_epoll_wait (SYS_BASE + 252)
#define SYS_timer_create (SYS_BASE + 257)
#define SYS_timer_settime (SYS_BASE + 258)
#define SYS_timer_delete (SYS_BASE + 261)
-#define SYS_epoll_create1 (SYS_BASE + 357)
#define SYS_pipe2 (SYS_BASE + 359)
-#define SYS_fcntl (SYS_BASE + 55)
#define SYS_access (SYS_BASE + 33)
#define SYS_connect (SYS_BASE + 283)
#define SYS_socket (SYS_BASE + 281)
@@ -122,7 +117,7 @@
MOVW $1003, R1
MOVW R0, (R1) // fail hard
-// func exitThread(wait *uint32)
+// func exitThread(wait *atomic.Uint32)
TEXT runtime·exitThread(SB),NOSPLIT|NOFRAME,$0-4
MOVW wait+0(FP), R0
// We're done using the stack.
@@ -262,73 +257,105 @@
MOVW R0, ret+12(FP)
RET
-TEXT runtime·walltime(SB),NOSPLIT,$8-12
+// Call a VDSO function.
+//
+// R0-R3: arguments to VDSO function (C calling convention)
+// R4: uintptr function to call
+//
+// There is no return value.
+TEXT runtime·vdsoCall(SB),NOSPLIT,$8-0
+ // R0-R3 may be arguments to fn, do not touch.
+ // R4 is function to call.
+ // R5-R9 are available as locals. They are unchanged by the C call
+ // (callee-save).
+
// We don't know how much stack space the VDSO code will need,
// so switch to g0.
// Save old SP. Use R13 instead of SP to avoid linker rewriting the offsets.
- MOVW R13, R4 // R4 is unchanged by C code.
+ MOVW R13, R5
- MOVW g_m(g), R5 // R5 is unchanged by C code.
+ MOVW g_m(g), R6
// Set vdsoPC and vdsoSP for SIGPROF traceback.
// Save the old values on stack and restore them on exit,
// so this function is reentrant.
- MOVW m_vdsoPC(R5), R1
- MOVW m_vdsoSP(R5), R2
- MOVW R1, 4(R13)
- MOVW R2, 8(R13)
+ MOVW m_vdsoPC(R6), R7
+ MOVW m_vdsoSP(R6), R8
+ MOVW R7, 4(R13)
+ MOVW R8, 8(R13)
- MOVW $ret-4(FP), R2 // caller's SP
- MOVW LR, m_vdsoPC(R5)
- MOVW R2, m_vdsoSP(R5)
+ MOVW $sp-4(FP), R7 // caller's SP
+ MOVW LR, m_vdsoPC(R6)
+ MOVW R7, m_vdsoSP(R6)
- MOVW m_curg(R5), R0
+ MOVW m_curg(R6), R7
- CMP g, R0 // Only switch if on curg.
+ CMP g, R7 // Only switch if on curg.
B.NE noswitch
- MOVW m_g0(R5), R0
- MOVW (g_sched+gobuf_sp)(R0), R13 // Set SP to g0 stack
+ MOVW m_g0(R6), R7
+ MOVW (g_sched+gobuf_sp)(R7), R13 // Set SP to g0 stack
noswitch:
- SUB $24, R13 // Space for results
BIC $0x7, R13 // Align for C code
- MOVW $CLOCK_REALTIME, R0
- MOVW $8(R13), R1 // timespec
- MOVW runtime·vdsoClockgettimeSym(SB), R2
- CMP $0, R2
- B.EQ fallback
-
// Store g on gsignal's stack, so if we receive a signal
// during VDSO code we can find the g.
- // If we don't have a signal stack, we won't receive signal,
- // so don't bother saving g.
- // When using cgo, we already saved g on TLS, also don't save
- // g here.
- // Also don't save g if we are already on the signal stack.
- // We won't get a nested signal.
- MOVB runtime·iscgo(SB), R6
- CMP $0, R6
+
+ // When using cgo, we already saved g on TLS, also don't save g here.
+ MOVB runtime·iscgo(SB), R7
+ CMP $0, R7
BNE nosaveg
- MOVW m_gsignal(R5), R6 // g.m.gsignal
- CMP $0, R6
+ // If we don't have a signal stack, we won't receive signal, so don't
+ // bother saving g.
+ MOVW m_gsignal(R6), R7 // g.m.gsignal
+ CMP $0, R7
BEQ nosaveg
- CMP g, R6
+ // Don't save g if we are already on the signal stack, as we won't get
+ // a nested signal.
+ CMP g, R7
BEQ nosaveg
- MOVW (g_stack+stack_lo)(R6), R6 // g.m.gsignal.stack.lo
- MOVW g, (R6)
+ // If we don't have a signal stack, we won't receive signal, so don't
+ // bother saving g.
+ MOVW (g_stack+stack_lo)(R7), R7 // g.m.gsignal.stack.lo
+ CMP $0, R7
+ BEQ nosaveg
+ MOVW g, (R7)
- BL (R2)
+ BL (R4)
- MOVW $0, R1
- MOVW R1, (R6) // clear g slot, R6 is unchanged by C code
+ MOVW $0, R8
+ MOVW R8, (R7) // clear g slot
JMP finish
nosaveg:
- BL (R2)
+ BL (R4)
+
+finish:
+ MOVW R5, R13 // Restore real SP
+ // Restore vdsoPC, vdsoSP
+ // We don't worry about being signaled between the two stores.
+ // If we are not in a signal handler, we'll restore vdsoSP to 0,
+ // and no one will care about vdsoPC. If we are in a signal handler,
+ // we cannot receive another signal.
+ MOVW 8(R13), R7
+ MOVW R7, m_vdsoSP(R6)
+ MOVW 4(R13), R7
+ MOVW R7, m_vdsoPC(R6)
+ RET
+
+TEXT runtime·walltime(SB),NOSPLIT,$12-12
+ MOVW $CLOCK_REALTIME, R0
+ MOVW $spec-12(SP), R1 // timespec
+
+ MOVW runtime·vdsoClockgettimeSym(SB), R4
+ CMP $0, R4
+ B.EQ fallback
+
+ BL runtime·vdsoCall(SB)
+
JMP finish
fallback:
@@ -336,19 +363,8 @@
SWI $0
finish:
- MOVW 8(R13), R0 // sec
- MOVW 12(R13), R2 // nsec
-
- MOVW R4, R13 // Restore real SP
- // Restore vdsoPC, vdsoSP
- // We don't worry about being signaled between the two stores.
- // If we are not in a signal handler, we'll restore vdsoSP to 0,
- // and no one will care about vdsoPC. If we are in a signal handler,
- // we cannot receive another signal.
- MOVW 8(R13), R1
- MOVW R1, m_vdsoSP(R5)
- MOVW 4(R13), R1
- MOVW R1, m_vdsoPC(R5)
+ MOVW sec-12(SP), R0 // sec
+ MOVW nsec-8(SP), R2 // nsec
MOVW R0, sec_lo+0(FP)
MOVW $0, R1
@@ -356,73 +372,17 @@
MOVW R2, nsec+8(FP)
RET
-// int64 nanotime1(void)
-TEXT runtime·nanotime1(SB),NOSPLIT,$8-8
- // Switch to g0 stack. See comment above in runtime·walltime.
-
- // Save old SP. Use R13 instead of SP to avoid linker rewriting the offsets.
- MOVW R13, R4 // R4 is unchanged by C code.
-
- MOVW g_m(g), R5 // R5 is unchanged by C code.
-
- // Set vdsoPC and vdsoSP for SIGPROF traceback.
- // Save the old values on stack and restore them on exit,
- // so this function is reentrant.
- MOVW m_vdsoPC(R5), R1
- MOVW m_vdsoSP(R5), R2
- MOVW R1, 4(R13)
- MOVW R2, 8(R13)
-
- MOVW $ret-4(FP), R2 // caller's SP
- MOVW LR, m_vdsoPC(R5)
- MOVW R2, m_vdsoSP(R5)
-
- MOVW m_curg(R5), R0
-
- CMP g, R0 // Only switch if on curg.
- B.NE noswitch
-
- MOVW m_g0(R5), R0
- MOVW (g_sched+gobuf_sp)(R0), R13 // Set SP to g0 stack
-
-noswitch:
- SUB $24, R13 // Space for results
- BIC $0x7, R13 // Align for C code
-
+// func nanotime1() int64
+TEXT runtime·nanotime1(SB),NOSPLIT,$12-8
MOVW $CLOCK_MONOTONIC, R0
- MOVW $8(R13), R1 // timespec
- MOVW runtime·vdsoClockgettimeSym(SB), R2
- CMP $0, R2
+ MOVW $spec-12(SP), R1 // timespec
+
+ MOVW runtime·vdsoClockgettimeSym(SB), R4
+ CMP $0, R4
B.EQ fallback
- // Store g on gsignal's stack, so if we receive a signal
- // during VDSO code we can find the g.
- // If we don't have a signal stack, we won't receive signal,
- // so don't bother saving g.
- // When using cgo, we already saved g on TLS, also don't save
- // g here.
- // Also don't save g if we are already on the signal stack.
- // We won't get a nested signal.
- MOVB runtime·iscgo(SB), R6
- CMP $0, R6
- BNE nosaveg
- MOVW m_gsignal(R5), R6 // g.m.gsignal
- CMP $0, R6
- BEQ nosaveg
- CMP g, R6
- BEQ nosaveg
- MOVW (g_stack+stack_lo)(R6), R6 // g.m.gsignal.stack.lo
- MOVW g, (R6)
+ BL runtime·vdsoCall(SB)
- BL (R2)
-
- MOVW $0, R1
- MOVW R1, (R6) // clear g slot, R6 is unchanged by C code
-
- JMP finish
-
-nosaveg:
- BL (R2)
JMP finish
fallback:
@@ -430,19 +390,8 @@
SWI $0
finish:
- MOVW 8(R13), R0 // sec
- MOVW 12(R13), R2 // nsec
-
- MOVW R4, R13 // Restore real SP
- // Restore vdsoPC, vdsoSP
- // We don't worry about being signaled between the two stores.
- // If we are not in a signal handler, we'll restore vdsoSP to 0,
- // and no one will care about vdsoPC. If we are in a signal handler,
- // we cannot receive another signal.
- MOVW 8(R13), R4
- MOVW R4, m_vdsoSP(R5)
- MOVW 4(R13), R4
- MOVW R4, m_vdsoPC(R5)
+ MOVW sec-12(SP), R0 // sec
+ MOVW nsec-8(SP), R2 // nsec
MOVW $1000000000, R3
MULLU R0, R3, (R1, R0)
@@ -451,6 +400,7 @@
MOVW R0, ret_lo+0(FP)
MOVW R1, ret_hi+4(FP)
+
RET
// int32 futex(int32 *uaddr, int32 op, int32 val,
@@ -661,53 +611,6 @@
MOVW R0, ret+12(FP)
RET
-// int32 runtime·epollcreate(int32 size)
-TEXT runtime·epollcreate(SB),NOSPLIT,$0
- MOVW size+0(FP), R0
- MOVW $SYS_epoll_create, R7
- SWI $0
- MOVW R0, ret+4(FP)
- RET
-
-// int32 runtime·epollcreate1(int32 flags)
-TEXT runtime·epollcreate1(SB),NOSPLIT,$0
- MOVW flags+0(FP), R0
- MOVW $SYS_epoll_create1, R7
- SWI $0
- MOVW R0, ret+4(FP)
- RET
-
-// func epollctl(epfd, op, fd int32, ev *epollEvent) int
-TEXT runtime·epollctl(SB),NOSPLIT,$0
- MOVW epfd+0(FP), R0
- MOVW op+4(FP), R1
- MOVW fd+8(FP), R2
- MOVW ev+12(FP), R3
- MOVW $SYS_epoll_ctl, R7
- SWI $0
- MOVW R0, ret+16(FP)
- RET
-
-// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout)
-TEXT runtime·epollwait(SB),NOSPLIT,$0
- MOVW epfd+0(FP), R0
- MOVW ev+4(FP), R1
- MOVW nev+8(FP), R2
- MOVW timeout+12(FP), R3
- MOVW $SYS_epoll_wait, R7
- SWI $0
- MOVW R0, ret+16(FP)
- RET
-
-// void runtime·closeonexec(int32 fd)
-TEXT runtime·closeonexec(SB),NOSPLIT,$0
- MOVW fd+0(FP), R0 // fd
- MOVW $2, R1 // F_SETFD
- MOVW $1, R2 // FD_CLOEXEC
- MOVW $SYS_fcntl, R7
- SWI $0
- RET
-
// b __kuser_get_tls @ 0xffff0fe0
TEXT runtime·read_tls_fallback(SB),NOSPLIT|NOFRAME,$0
MOVW $0xffff0fe0, R0
diff --git a/src/runtime/sys_linux_arm64.s b/src/runtime/sys_linux_arm64.s
index b47b6fd..38ff6ac 100644
--- a/src/runtime/sys_linux_arm64.s
+++ b/src/runtime/sys_linux_arm64.s
@@ -22,7 +22,6 @@
#define SYS_openat 56
#define SYS_close 57
#define SYS_pipe2 59
-#define SYS_fcntl 25
#define SYS_nanosleep 101
#define SYS_mmap 222
#define SYS_munmap 215
@@ -42,9 +41,6 @@
#define SYS_futex 98
#define SYS_sched_getaffinity 123
#define SYS_exit_group 94
-#define SYS_epoll_create1 20
-#define SYS_epoll_ctl 21
-#define SYS_epoll_pwait 22
#define SYS_clock_gettime 113
#define SYS_faccessat 48
#define SYS_socket 198
@@ -60,7 +56,7 @@
SVC
RET
-// func exitThread(wait *uint32)
+// func exitThread(wait *atomic.Uint32)
TEXT runtime·exitThread(SB),NOSPLIT|NOFRAME,$0-8
MOVD wait+0(FP), R0
// We're done using the stack.
@@ -762,54 +758,6 @@
MOVW R0, ret+24(FP)
RET
-// int32 runtime·epollcreate(int32 size);
-TEXT runtime·epollcreate(SB),NOSPLIT|NOFRAME,$0
- MOVW $0, R0
- MOVD $SYS_epoll_create1, R8
- SVC
- MOVW R0, ret+8(FP)
- RET
-
-// int32 runtime·epollcreate1(int32 flags);
-TEXT runtime·epollcreate1(SB),NOSPLIT|NOFRAME,$0
- MOVW flags+0(FP), R0
- MOVD $SYS_epoll_create1, R8
- SVC
- MOVW R0, ret+8(FP)
- RET
-
-// func epollctl(epfd, op, fd int32, ev *epollEvent) int
-TEXT runtime·epollctl(SB),NOSPLIT|NOFRAME,$0
- MOVW epfd+0(FP), R0
- MOVW op+4(FP), R1
- MOVW fd+8(FP), R2
- MOVD ev+16(FP), R3
- MOVD $SYS_epoll_ctl, R8
- SVC
- MOVW R0, ret+24(FP)
- RET
-
-// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout);
-TEXT runtime·epollwait(SB),NOSPLIT|NOFRAME,$0
- MOVW epfd+0(FP), R0
- MOVD ev+8(FP), R1
- MOVW nev+16(FP), R2
- MOVW timeout+20(FP), R3
- MOVD $0, R4
- MOVD $SYS_epoll_pwait, R8
- SVC
- MOVW R0, ret+24(FP)
- RET
-
-// void runtime·closeonexec(int32 fd);
-TEXT runtime·closeonexec(SB),NOSPLIT|NOFRAME,$0
- MOVW fd+0(FP), R0 // fd
- MOVD $2, R1 // F_SETFD
- MOVD $1, R2 // FD_CLOEXEC
- MOVD $SYS_fcntl, R8
- SVC
- RET
-
// int access(const char *name, int mode)
TEXT runtime·access(SB),NOSPLIT,$0-20
MOVD $AT_FDCWD, R0
diff --git a/src/runtime/sys_linux_loong64.s b/src/runtime/sys_linux_loong64.s
index 36a92df..9ce5e72 100644
--- a/src/runtime/sys_linux_loong64.s
+++ b/src/runtime/sys_linux_loong64.s
@@ -18,7 +18,6 @@
#define SYS_close 57
#define SYS_getpid 172
#define SYS_kill 129
-#define SYS_fcntl 25
#define SYS_mmap 222
#define SYS_munmap 215
#define SYS_setitimer 103
@@ -35,12 +34,9 @@
#define SYS_futex 98
#define SYS_sched_getaffinity 123
#define SYS_exit_group 94
-#define SYS_epoll_ctl 21
#define SYS_tgkill 131
#define SYS_openat 56
-#define SYS_epoll_pwait 22
#define SYS_clock_gettime 113
-#define SYS_epoll_create1 20
#define SYS_brk 214
#define SYS_pipe2 59
#define SYS_timer_create 107
@@ -53,7 +49,7 @@
SYSCALL
RET
-// func exitThread(wait *uint32)
+// func exitThread(wait *atomic.Uint32)
TEXT runtime·exitThread(SB),NOSPLIT|NOFRAME,$0-8
MOVV wait+0(FP), R19
// We're done using the stack.
@@ -534,54 +530,6 @@
MOVW R4, ret+24(FP)
RET
-// int32 runtime·epollcreate(int32 size);
-TEXT runtime·epollcreate(SB),NOSPLIT|NOFRAME,$0
- MOVW size+0(FP), R4
- MOVV $SYS_epoll_create1, R11
- SYSCALL
- MOVW R4, ret+8(FP)
- RET
-
-// int32 runtime·epollcreate1(int32 flags);
-TEXT runtime·epollcreate1(SB),NOSPLIT|NOFRAME,$0
- MOVW flags+0(FP), R4
- MOVV $SYS_epoll_create1, R11
- SYSCALL
- MOVW R4, ret+8(FP)
- RET
-
-// func epollctl(epfd, op, fd int32, ev *epollEvent) int
-TEXT runtime·epollctl(SB),NOSPLIT|NOFRAME,$0
- MOVW epfd+0(FP), R4
- MOVW op+4(FP), R5
- MOVW fd+8(FP), R6
- MOVV ev+16(FP), R7
- MOVV $SYS_epoll_ctl, R11
- SYSCALL
- MOVW R4, ret+24(FP)
- RET
-
-// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout);
-TEXT runtime·epollwait(SB),NOSPLIT|NOFRAME,$0
- MOVW epfd+0(FP), R4
- MOVV ev+8(FP), R5
- MOVW nev+16(FP), R6
- MOVW timeout+20(FP), R7
- MOVV $0, R8
- MOVV $SYS_epoll_pwait, R11
- SYSCALL
- MOVW R4, ret+24(FP)
- RET
-
-// void runtime·closeonexec(int32 fd);
-TEXT runtime·closeonexec(SB),NOSPLIT|NOFRAME,$0
- MOVW fd+0(FP), R4 // fd
- MOVV $2, R5 // F_SETFD
- MOVV $1, R6 // FD_CLOEXEC
- MOVV $SYS_fcntl, R11
- SYSCALL
- RET
-
// func sbrk0() uintptr
TEXT runtime·sbrk0(SB),NOSPLIT|NOFRAME,$0-8
// Implemented as brk(NULL).
diff --git a/src/runtime/sys_linux_mips64x.s b/src/runtime/sys_linux_mips64x.s
index 06d54df..47f2da5 100644
--- a/src/runtime/sys_linux_mips64x.s
+++ b/src/runtime/sys_linux_mips64x.s
@@ -20,7 +20,6 @@
#define SYS_close 5003
#define SYS_getpid 5038
#define SYS_kill 5060
-#define SYS_fcntl 5070
#define SYS_mmap 5009
#define SYS_munmap 5011
#define SYS_setitimer 5036
@@ -37,16 +36,12 @@
#define SYS_futex 5194
#define SYS_sched_getaffinity 5196
#define SYS_exit_group 5205
-#define SYS_epoll_create 5207
-#define SYS_epoll_ctl 5208
#define SYS_timer_create 5216
#define SYS_timer_settime 5217
#define SYS_timer_delete 5220
#define SYS_tgkill 5225
#define SYS_openat 5247
-#define SYS_epoll_pwait 5272
#define SYS_clock_gettime 5222
-#define SYS_epoll_create1 5285
#define SYS_brk 5012
#define SYS_pipe2 5287
@@ -56,7 +51,7 @@
SYSCALL
RET
-// func exitThread(wait *uint32)
+// func exitThread(wait *atomic.Uint32)
TEXT runtime·exitThread(SB),NOSPLIT|NOFRAME,$0-8
MOVV wait+0(FP), R1
// We're done using the stack.
@@ -568,62 +563,6 @@
MOVW R2, ret+24(FP)
RET
-// int32 runtime·epollcreate(int32 size);
-TEXT runtime·epollcreate(SB),NOSPLIT|NOFRAME,$0
- MOVW size+0(FP), R4
- MOVV $SYS_epoll_create, R2
- SYSCALL
- BEQ R7, 2(PC)
- SUBVU R2, R0, R2 // caller expects negative errno
- MOVW R2, ret+8(FP)
- RET
-
-// int32 runtime·epollcreate1(int32 flags);
-TEXT runtime·epollcreate1(SB),NOSPLIT|NOFRAME,$0
- MOVW flags+0(FP), R4
- MOVV $SYS_epoll_create1, R2
- SYSCALL
- BEQ R7, 2(PC)
- SUBVU R2, R0, R2 // caller expects negative errno
- MOVW R2, ret+8(FP)
- RET
-
-// func epollctl(epfd, op, fd int32, ev *epollEvent) int
-TEXT runtime·epollctl(SB),NOSPLIT|NOFRAME,$0
- MOVW epfd+0(FP), R4
- MOVW op+4(FP), R5
- MOVW fd+8(FP), R6
- MOVV ev+16(FP), R7
- MOVV $SYS_epoll_ctl, R2
- SYSCALL
- SUBVU R2, R0, R2 // caller expects negative errno
- MOVW R2, ret+24(FP)
- RET
-
-// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout);
-TEXT runtime·epollwait(SB),NOSPLIT|NOFRAME,$0
- // This uses pwait instead of wait, because Android O blocks wait.
- MOVW epfd+0(FP), R4
- MOVV ev+8(FP), R5
- MOVW nev+16(FP), R6
- MOVW timeout+20(FP), R7
- MOVV $0, R8
- MOVV $SYS_epoll_pwait, R2
- SYSCALL
- BEQ R7, 2(PC)
- SUBVU R2, R0, R2 // caller expects negative errno
- MOVW R2, ret+24(FP)
- RET
-
-// void runtime·closeonexec(int32 fd);
-TEXT runtime·closeonexec(SB),NOSPLIT|NOFRAME,$0
- MOVW fd+0(FP), R4 // fd
- MOVV $2, R5 // F_SETFD
- MOVV $1, R6 // FD_CLOEXEC
- MOVV $SYS_fcntl, R2
- SYSCALL
- RET
-
// func sbrk0() uintptr
TEXT runtime·sbrk0(SB),NOSPLIT|NOFRAME,$0-8
// Implemented as brk(NULL).
diff --git a/src/runtime/sys_linux_mipsx.s b/src/runtime/sys_linux_mipsx.s
index e70edcc..5e6b6c1 100644
--- a/src/runtime/sys_linux_mipsx.s
+++ b/src/runtime/sys_linux_mipsx.s
@@ -20,7 +20,6 @@
#define SYS_getpid 4020
#define SYS_kill 4037
#define SYS_brk 4045
-#define SYS_fcntl 4055
#define SYS_mmap 4090
#define SYS_munmap 4091
#define SYS_setitimer 4104
@@ -37,15 +36,11 @@
#define SYS_futex 4238
#define SYS_sched_getaffinity 4240
#define SYS_exit_group 4246
-#define SYS_epoll_create 4248
-#define SYS_epoll_ctl 4249
-#define SYS_epoll_wait 4250
#define SYS_timer_create 4257
#define SYS_timer_settime 4258
#define SYS_timer_delete 4261
#define SYS_clock_gettime 4263
#define SYS_tgkill 4266
-#define SYS_epoll_create1 4326
#define SYS_pipe2 4328
TEXT runtime·exit(SB),NOSPLIT,$0-4
@@ -55,7 +50,7 @@
UNDEF
RET
-// func exitThread(wait *uint32)
+// func exitThread(wait *atomic.Uint32)
TEXT runtime·exitThread(SB),NOSPLIT,$0-4
MOVW wait+0(FP), R1
// We're done using the stack.
@@ -487,60 +482,6 @@
MOVW R2, ret+12(FP)
RET
-// int32 runtime·epollcreate(int32 size);
-TEXT runtime·epollcreate(SB),NOSPLIT,$0-8
- MOVW size+0(FP), R4
- MOVW $SYS_epoll_create, R2
- SYSCALL
- BEQ R7, 2(PC)
- SUBU R2, R0, R2 // caller expects negative errno
- MOVW R2, ret+4(FP)
- RET
-
-// int32 runtime·epollcreate1(int32 flags);
-TEXT runtime·epollcreate1(SB),NOSPLIT,$0-8
- MOVW flags+0(FP), R4
- MOVW $SYS_epoll_create1, R2
- SYSCALL
- BEQ R7, 2(PC)
- SUBU R2, R0, R2 // caller expects negative errno
- MOVW R2, ret+4(FP)
- RET
-
-// func epollctl(epfd, op, fd int32, ev *epollEvent) int
-TEXT runtime·epollctl(SB),NOSPLIT,$0-20
- MOVW epfd+0(FP), R4
- MOVW op+4(FP), R5
- MOVW fd+8(FP), R6
- MOVW ev+12(FP), R7
- MOVW $SYS_epoll_ctl, R2
- SYSCALL
- SUBU R2, R0, R2 // caller expects negative errno
- MOVW R2, ret+16(FP)
- RET
-
-// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout);
-TEXT runtime·epollwait(SB),NOSPLIT,$0-20
- MOVW epfd+0(FP), R4
- MOVW ev+4(FP), R5
- MOVW nev+8(FP), R6
- MOVW timeout+12(FP), R7
- MOVW $SYS_epoll_wait, R2
- SYSCALL
- BEQ R7, 2(PC)
- SUBU R2, R0, R2 // caller expects negative errno
- MOVW R2, ret+16(FP)
- RET
-
-// void runtime·closeonexec(int32 fd);
-TEXT runtime·closeonexec(SB),NOSPLIT,$0-4
- MOVW fd+0(FP), R4 // fd
- MOVW $2, R5 // F_SETFD
- MOVW $1, R6 // FD_CLOEXEC
- MOVW $SYS_fcntl, R2
- SYSCALL
- RET
-
// func sbrk0() uintptr
TEXT runtime·sbrk0(SB),NOSPLIT,$0-4
// Implemented as brk(NULL).
diff --git a/src/runtime/sys_linux_ppc64x.s b/src/runtime/sys_linux_ppc64x.s
index 2913a05..d0427a4 100644
--- a/src/runtime/sys_linux_ppc64x.s
+++ b/src/runtime/sys_linux_ppc64x.s
@@ -21,7 +21,6 @@
#define SYS_getpid 20
#define SYS_kill 37
#define SYS_brk 45
-#define SYS_fcntl 55
#define SYS_mmap 90
#define SYS_munmap 91
#define SYS_setitimer 104
@@ -38,15 +37,11 @@
#define SYS_futex 221
#define SYS_sched_getaffinity 223
#define SYS_exit_group 234
-#define SYS_epoll_create 236
-#define SYS_epoll_ctl 237
-#define SYS_epoll_wait 238
#define SYS_timer_create 240
#define SYS_timer_settime 241
#define SYS_timer_delete 244
#define SYS_clock_gettime 246
#define SYS_tgkill 250
-#define SYS_epoll_create1 315
#define SYS_pipe2 317
TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0-4
@@ -54,7 +49,7 @@
SYSCALL $SYS_exit_group
RET
-// func exitThread(wait *uint32)
+// func exitThread(wait *atomic.Uint32)
TEXT runtime·exitThread(SB),NOSPLIT|NOFRAME,$0-8
MOVD wait+0(FP), R1
// We're done using the stack.
@@ -111,16 +106,22 @@
MOVW R3, errno+16(FP)
RET
+// func usleep(usec uint32)
TEXT runtime·usleep(SB),NOSPLIT,$16-4
MOVW usec+0(FP), R3
- MOVD R3, R5
- MOVW $1000000, R4
- DIVD R4, R3
- MOVD R3, 8(R1)
- MOVW $1000, R4
- MULLD R3, R4
- SUB R4, R5
- MOVD R5, 16(R1)
+
+ // Use magic constant 0x8637bd06 and shift right 51
+ // to perform usec/1000000.
+ MOVD $0x8637bd06, R4
+ MULLD R3, R4, R4 // Convert usec to S.
+ SRD $51, R4, R4
+ MOVD R4, 8(R1) // Store to tv_sec
+
+ MOVD $1000000, R5
+ MULLW R4, R5, R5 // Convert tv_sec back into uS
+ SUB R5, R3, R5 // Compute remainder uS.
+ MULLD $1000, R5, R5 // Convert to nsec
+ MOVD R5, 16(R1) // Store to tv_nsec
// nanosleep(&ts, 0)
ADD $8, R1, R3
@@ -876,55 +877,6 @@
MOVW R3, ret+24(FP)
RET
-// int32 runtime·epollcreate(int32 size);
-TEXT runtime·epollcreate(SB),NOSPLIT|NOFRAME,$0
- MOVW size+0(FP), R3
- SYSCALL $SYS_epoll_create
- BVC 2(PC)
- NEG R3 // caller expects negative errno
- MOVW R3, ret+8(FP)
- RET
-
-// int32 runtime·epollcreate1(int32 flags);
-TEXT runtime·epollcreate1(SB),NOSPLIT|NOFRAME,$0
- MOVW flags+0(FP), R3
- SYSCALL $SYS_epoll_create1
- BVC 2(PC)
- NEG R3 // caller expects negative errno
- MOVW R3, ret+8(FP)
- RET
-
-// func epollctl(epfd, op, fd int32, ev *epollEvent) int
-TEXT runtime·epollctl(SB),NOSPLIT|NOFRAME,$0
- MOVW epfd+0(FP), R3
- MOVW op+4(FP), R4
- MOVW fd+8(FP), R5
- MOVD ev+16(FP), R6
- SYSCALL $SYS_epoll_ctl
- NEG R3 // caller expects negative errno
- MOVW R3, ret+24(FP)
- RET
-
-// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout);
-TEXT runtime·epollwait(SB),NOSPLIT|NOFRAME,$0
- MOVW epfd+0(FP), R3
- MOVD ev+8(FP), R4
- MOVW nev+16(FP), R5
- MOVW timeout+20(FP), R6
- SYSCALL $SYS_epoll_wait
- BVC 2(PC)
- NEG R3 // caller expects negative errno
- MOVW R3, ret+24(FP)
- RET
-
-// void runtime·closeonexec(int32 fd);
-TEXT runtime·closeonexec(SB),NOSPLIT|NOFRAME,$0
- MOVW fd+0(FP), R3 // fd
- MOVD $2, R4 // F_SETFD
- MOVD $1, R5 // FD_CLOEXEC
- SYSCALL $SYS_fcntl
- RET
-
// func sbrk0() uintptr
TEXT runtime·sbrk0(SB),NOSPLIT|NOFRAME,$0
// Implemented as brk(NULL).
diff --git a/src/runtime/sys_linux_riscv64.s b/src/runtime/sys_linux_riscv64.s
index afb2d11..d1558fd 100644
--- a/src/runtime/sys_linux_riscv64.s
+++ b/src/runtime/sys_linux_riscv64.s
@@ -18,13 +18,9 @@
#define SYS_clone 220
#define SYS_close 57
#define SYS_connect 203
-#define SYS_epoll_create1 20
-#define SYS_epoll_ctl 21
-#define SYS_epoll_pwait 22
#define SYS_exit 93
#define SYS_exit_group 94
#define SYS_faccessat 48
-#define SYS_fcntl 25
#define SYS_futex 98
#define SYS_getpid 172
#define SYS_gettid 178
@@ -61,7 +57,7 @@
ECALL
RET
-// func exitThread(wait *uint32)
+// func exitThread(wait *atomic.Uint32)
TEXT runtime·exitThread(SB),NOSPLIT|NOFRAME,$0-8
MOV wait+0(FP), A0
// We're done using the stack.
@@ -578,54 +574,6 @@
MOV A0, ret+24(FP)
RET
-// func epollcreate(size int32) int32
-TEXT runtime·epollcreate(SB),NOSPLIT|NOFRAME,$0
- MOV $0, A0
- MOV $SYS_epoll_create1, A7
- ECALL
- MOVW A0, ret+8(FP)
- RET
-
-// func epollcreate1(flags int32) int32
-TEXT runtime·epollcreate1(SB),NOSPLIT|NOFRAME,$0
- MOVW flags+0(FP), A0
- MOV $SYS_epoll_create1, A7
- ECALL
- MOVW A0, ret+8(FP)
- RET
-
-// func epollctl(epfd, op, fd int32, ev *epollevent) int32
-TEXT runtime·epollctl(SB),NOSPLIT|NOFRAME,$0
- MOVW epfd+0(FP), A0
- MOVW op+4(FP), A1
- MOVW fd+8(FP), A2
- MOV ev+16(FP), A3
- MOV $SYS_epoll_ctl, A7
- ECALL
- MOVW A0, ret+24(FP)
- RET
-
-// func epollwait(epfd int32, ev *epollevent, nev, timeout int32) int32
-TEXT runtime·epollwait(SB),NOSPLIT|NOFRAME,$0
- MOVW epfd+0(FP), A0
- MOV ev+8(FP), A1
- MOVW nev+16(FP), A2
- MOVW timeout+20(FP), A3
- MOV $0, A4
- MOV $SYS_epoll_pwait, A7
- ECALL
- MOVW A0, ret+24(FP)
- RET
-
-// func closeonexec(int32)
-TEXT runtime·closeonexec(SB),NOSPLIT|NOFRAME,$0
- MOVW fd+0(FP), A0 // fd
- MOV $2, A1 // F_SETFD
- MOV $1, A2 // FD_CLOEXEC
- MOV $SYS_fcntl, A7
- ECALL
- RET
-
// func sbrk0() uintptr
TEXT runtime·sbrk0(SB),NOSPLIT,$0-8
// Implemented as brk(NULL).
diff --git a/src/runtime/sys_linux_s390x.s b/src/runtime/sys_linux_s390x.s
index c82cb9b..1448670 100644
--- a/src/runtime/sys_linux_s390x.s
+++ b/src/runtime/sys_linux_s390x.s
@@ -17,7 +17,6 @@
#define SYS_getpid 20
#define SYS_kill 37
#define SYS_brk 45
-#define SYS_fcntl 55
#define SYS_mmap 90
#define SYS_munmap 91
#define SYS_setitimer 104
@@ -35,15 +34,11 @@
#define SYS_sched_getaffinity 240
#define SYS_tgkill 241
#define SYS_exit_group 248
-#define SYS_epoll_create 249
-#define SYS_epoll_ctl 250
-#define SYS_epoll_wait 251
#define SYS_timer_create 254
#define SYS_timer_settime 255
#define SYS_timer_delete 258
#define SYS_clock_gettime 260
#define SYS_pipe2 325
-#define SYS_epoll_create1 327
TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0-4
MOVW code+0(FP), R2
@@ -51,7 +46,7 @@
SYSCALL
RET
-// func exitThread(wait *uint32)
+// func exitThread(wait *atomic.Uint32)
TEXT runtime·exitThread(SB),NOSPLIT|NOFRAME,$0-8
MOVD wait+0(FP), R1
// We're done using the stack.
@@ -589,53 +584,6 @@
MOVW R2, ret+24(FP)
RET
-// int32 runtime·epollcreate(int32 size);
-TEXT runtime·epollcreate(SB),NOSPLIT|NOFRAME,$0
- MOVW size+0(FP), R2
- MOVW $SYS_epoll_create, R1
- SYSCALL
- MOVW R2, ret+8(FP)
- RET
-
-// int32 runtime·epollcreate1(int32 flags);
-TEXT runtime·epollcreate1(SB),NOSPLIT|NOFRAME,$0
- MOVW flags+0(FP), R2
- MOVW $SYS_epoll_create1, R1
- SYSCALL
- MOVW R2, ret+8(FP)
- RET
-
-// func epollctl(epfd, op, fd int32, ev *epollEvent) int
-TEXT runtime·epollctl(SB),NOSPLIT|NOFRAME,$0
- MOVW epfd+0(FP), R2
- MOVW op+4(FP), R3
- MOVW fd+8(FP), R4
- MOVD ev+16(FP), R5
- MOVW $SYS_epoll_ctl, R1
- SYSCALL
- MOVW R2, ret+24(FP)
- RET
-
-// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout);
-TEXT runtime·epollwait(SB),NOSPLIT|NOFRAME,$0
- MOVW epfd+0(FP), R2
- MOVD ev+8(FP), R3
- MOVW nev+16(FP), R4
- MOVW timeout+20(FP), R5
- MOVW $SYS_epoll_wait, R1
- SYSCALL
- MOVW R2, ret+24(FP)
- RET
-
-// void runtime·closeonexec(int32 fd);
-TEXT runtime·closeonexec(SB),NOSPLIT|NOFRAME,$0
- MOVW fd+0(FP), R2 // fd
- MOVD $2, R3 // F_SETFD
- MOVD $1, R4 // FD_CLOEXEC
- MOVW $SYS_fcntl, R1
- SYSCALL
- RET
-
// func sbrk0() uintptr
TEXT runtime·sbrk0(SB),NOSPLIT|NOFRAME,$0-8
// Implemented as brk(NULL).
diff --git a/src/runtime/sys_netbsd_386.s b/src/runtime/sys_netbsd_386.s
index 581b4fc..7be18c6 100644
--- a/src/runtime/sys_netbsd_386.s
+++ b/src/runtime/sys_netbsd_386.s
@@ -53,7 +53,7 @@
MOVL $0xf1, 0xf1 // crash
RET
-// func exitThread(wait *uint32)
+// func exitThread(wait *atomic.Uint32)
TEXT runtime·exitThread(SB),NOSPLIT,$0-4
MOVL wait+0(FP), AX
// We're done using the stack.
diff --git a/src/runtime/sys_netbsd_amd64.s b/src/runtime/sys_netbsd_amd64.s
index ab11f6f..30f3f38 100644
--- a/src/runtime/sys_netbsd_amd64.s
+++ b/src/runtime/sys_netbsd_amd64.s
@@ -122,7 +122,7 @@
MOVL $0xf1, 0xf1 // crash
RET
-// func exitThread(wait *uint32)
+// func exitThread(wait *atomic.Uint32)
TEXT runtime·exitThread(SB),NOSPLIT,$0-8
MOVQ wait+0(FP), AX
// We're done using the stack.
diff --git a/src/runtime/sys_netbsd_arm.s b/src/runtime/sys_netbsd_arm.s
index dbe3dbc..62fa852 100644
--- a/src/runtime/sys_netbsd_arm.s
+++ b/src/runtime/sys_netbsd_arm.s
@@ -56,7 +56,7 @@
MOVW.CS R8, (R8)
RET
-// func exitThread(wait *uint32)
+// func exitThread(wait *atomic.Uint32)
TEXT runtime·exitThread(SB),NOSPLIT,$0-4
MOVW wait+0(FP), R0
// We're done using the stack.
diff --git a/src/runtime/sys_netbsd_arm64.s b/src/runtime/sys_netbsd_arm64.s
index fc126ca..d57959f 100644
--- a/src/runtime/sys_netbsd_arm64.s
+++ b/src/runtime/sys_netbsd_arm64.s
@@ -115,7 +115,7 @@
MOVD $0, R0 // If we're still running,
MOVD R0, (R0) // crash
-// func exitThread(wait *uint32)
+// func exitThread(wait *atomic.Uint32)
TEXT runtime·exitThread(SB),NOSPLIT,$0-8
MOVD wait+0(FP), R0
// We're done using the stack.
diff --git a/src/runtime/sys_openbsd2.go b/src/runtime/sys_openbsd2.go
index f936e0c..49bad8e 100644
--- a/src/runtime/sys_openbsd2.go
+++ b/src/runtime/sys_openbsd2.go
@@ -8,6 +8,7 @@
import (
"internal/abi"
+ "runtime/internal/atomic"
"unsafe"
)
@@ -248,7 +249,8 @@
func sigaltstack_trampoline()
// Not used on OpenBSD, but must be defined.
-func exitThread(wait *uint32) {
+func exitThread(wait *atomic.Uint32) {
+ throw("exitThread")
}
//go:nosplit
diff --git a/src/runtime/sys_openbsd_mips64.s b/src/runtime/sys_openbsd_mips64.s
index c2b2092..affd586 100644
--- a/src/runtime/sys_openbsd_mips64.s
+++ b/src/runtime/sys_openbsd_mips64.s
@@ -24,7 +24,7 @@
MOVV R2, (R2)
RET
-// func exitThread(wait *uint32)
+// func exitThread(wait *atomic.Uint32)
TEXT runtime·exitThread(SB),NOSPLIT,$0
MOVV wait+0(FP), R4 // arg 1 - notdead
MOVV $302, R2 // sys___threxit
@@ -277,7 +277,7 @@
// In parent, return.
BEQ R2, 3(PC)
- MOVW R2, ret+40(FP)
+ MOVW $0, ret+40(FP)
RET
// Initialise m, g.
diff --git a/src/runtime/sys_wasm.go b/src/runtime/sys_wasm.go
index e6e7f47..bf57569 100644
--- a/src/runtime/sys_wasm.go
+++ b/src/runtime/sys_wasm.go
@@ -16,10 +16,6 @@
var wasmStack m0Stack
-func wasmMove()
-
-func wasmZero()
-
func wasmDiv()
func wasmTruncS()
diff --git a/src/runtime/sys_wasm.s b/src/runtime/sys_wasm.s
index 164dd16..f706e00 100644
--- a/src/runtime/sys_wasm.s
+++ b/src/runtime/sys_wasm.s
@@ -4,73 +4,6 @@
#include "textflag.h"
-TEXT runtime·wasmMove(SB), NOSPLIT, $0-0
-loop:
- Loop
- // *dst = *src
- Get R0
- Get R1
- I64Load $0
- I64Store $0
-
- // n--
- Get R2
- I32Const $1
- I32Sub
- Tee R2
-
- // n == 0
- I32Eqz
- If
- Return
- End
-
- // dst += 8
- Get R0
- I32Const $8
- I32Add
- Set R0
-
- // src += 8
- Get R1
- I32Const $8
- I32Add
- Set R1
-
- Br loop
- End
- UNDEF
-
-TEXT runtime·wasmZero(SB), NOSPLIT, $0-0
-loop:
- Loop
- // *dst = 0
- Get R0
- I64Const $0
- I64Store $0
-
- // n--
- Get R1
- I32Const $1
- I32Sub
- Tee R1
-
- // n == 0
- I32Eqz
- If
- Return
- End
-
- // dst += 8
- Get R0
- I32Const $8
- I32Add
- Set R0
-
- Br loop
- End
- UNDEF
-
TEXT runtime·wasmDiv(SB), NOSPLIT, $0-0
Get R0
I64Const $-0x8000000000000000
diff --git a/src/runtime/sys_windows_amd64.s b/src/runtime/sys_windows_amd64.s
index 1467b4d..777726f 100644
--- a/src/runtime/sys_windows_amd64.s
+++ b/src/runtime/sys_windows_amd64.s
@@ -8,6 +8,9 @@
#include "time_windows.h"
#include "cgo/abi_amd64.h"
+// Offsets into Thread Environment Block (pointer in GS)
+#define TEB_TlsSlots 0x1480
+
// void runtime·asmstdcall(void *c);
TEXT runtime·asmstdcall(SB),NOSPLIT|NOFRAME,$0
// asmcgocall will put first argument into CX.
@@ -116,6 +119,7 @@
// Make stack space for the rest of the function.
ADJSP $48
+ MOVQ CX, R13 // save exception address
MOVQ AX, R15 // save handler address
// find g
@@ -153,8 +157,8 @@
MOVQ DI, SP
g0:
- MOVQ 0(CX), BX // ExceptionRecord*
- MOVQ 8(CX), CX // Context*
+ MOVQ 0(R13), BX // ExceptionRecord*
+ MOVQ 8(R13), CX // Context*
MOVQ BX, 0(SP)
MOVQ CX, 8(SP)
MOVQ DX, 16(SP)
@@ -162,6 +166,8 @@
// AX is set to report result back to Windows
MOVL 24(SP), AX
+ MOVQ SP, DI // save g0 SP
+
// switch back to original stack and g
// no-op if we never left.
MOVQ 40(SP), SP
@@ -169,12 +175,54 @@
get_tls(BP)
MOVQ DX, g(BP)
+ // if return value is CONTINUE_SEARCH, do not set up control
+ // flow guard workaround.
+ CMPQ AX, $0
+ JEQ done
+
+ // Check if we need to set up the control flow guard workaround.
+ // On Windows, the stack pointer in the context must lie within
+ // system stack limits when we resume from exception.
+ // Store the resume SP and PC in alternate registers
+ // and return to sigresume on the g0 stack.
+ // sigresume makes no use of the stack at all,
+ // loading SP from R8 and jumping to R9.
+ // Note that smashing R8 and R9 is only safe because we know sigpanic
+ // will not actually return to the original frame, so the registers
+ // are effectively dead. But this does mean we can't use the
+ // same mechanism for async preemption.
+ MOVQ 8(R13), CX
+ MOVQ $sigresume<>(SB), BX
+ CMPQ BX, context_rip(CX)
+ JEQ done // do not clobber saved SP/PC
+
+ // Save resume SP and PC into R8, R9.
+ MOVQ context_rsp(CX), BX
+ MOVQ BX, context_r8(CX)
+ MOVQ context_rip(CX), BX
+ MOVQ BX, context_r9(CX)
+
+ // Set up context record to return to sigresume on g0 stack
+ MOVD DI, BX
+ MOVD BX, context_rsp(CX)
+ MOVD $sigresume<>(SB), BX
+ MOVD BX, context_rip(CX)
+
done:
ADJSP $-48
POP_REGS_HOST_TO_ABI0()
RET
+// Trampoline to resume execution from exception handler.
+// This is part of the control flow guard workaround.
+// It switches stacks and jumps to the continuation address.
+// R8 and R9 are set above at the end of sigtramp<>
+// in the context that starts executing at sigresume<>.
+TEXT sigresume<>(SB),NOSPLIT|NOFRAME,$0
+ MOVQ R8, SP
+ JMP R9
+
TEXT runtime·exceptiontramp(SB),NOSPLIT|NOFRAME,$0
MOVQ $runtime·exceptionhandler(SB), AX
JMP sigtramp<>(SB)
@@ -258,10 +306,10 @@
MOVQ AX, g_stackguard1(DX)
// Set up tls.
- LEAQ m_tls(CX), SI
- MOVQ SI, 0x28(GS)
+ LEAQ m_tls(CX), DI
MOVQ CX, g_m(DX)
- MOVQ DX, g(SI)
+ MOVQ DX, g(DI)
+ CALL runtime·settls(SB) // clobbers CX
CALL runtime·stackcheck(SB) // clobbers AX,CX
CALL runtime·mstart(SB)
@@ -273,7 +321,8 @@
// set tls base to DI
TEXT runtime·settls(SB),NOSPLIT,$0
- MOVQ DI, 0x28(GS)
+ MOVQ runtime·tls_g(SB), CX
+ MOVQ DI, 0(CX)(GS)
RET
// Runs on OS stack.
@@ -359,3 +408,32 @@
LEAQ m_tls(AX), DI
CALL runtime·settls(SB)
RET
+
+// This is called from rt0_go, which runs on the system stack
+// using the initial stack allocated by the OS.
+TEXT runtime·wintls(SB),NOSPLIT|NOFRAME,$0
+ // Allocate a TLS slot to hold g across calls to external code
+ MOVQ SP, AX
+ ANDQ $~15, SP // alignment as per Windows requirement
+ SUBQ $48, SP // room for SP and 4 args as per Windows requirement
+ // plus one extra word to keep stack 16 bytes aligned
+ MOVQ AX, 32(SP)
+ MOVQ runtime·_TlsAlloc(SB), AX
+ CALL AX
+ MOVQ 32(SP), SP
+
+ MOVQ AX, CX // TLS index
+
+ // Assert that slot is less than 64 so we can use _TEB->TlsSlots
+ CMPQ CX, $64
+ JB ok
+ CALL runtime·abort(SB)
+ok:
+ // Convert the TLS index at CX into
+ // an offset from TEB_TlsSlots.
+ SHLQ $3, CX
+
+ // Save offset from TLS into tls_g.
+ ADDQ $TEB_TlsSlots, CX
+ MOVQ CX, runtime·tls_g(SB)
+ RET
diff --git a/src/runtime/sys_windows_arm.s b/src/runtime/sys_windows_arm.s
index 5dc576a..db6d8f1 100644
--- a/src/runtime/sys_windows_arm.s
+++ b/src/runtime/sys_windows_arm.s
@@ -123,8 +123,14 @@
MOVW R1, R7 // Save param1
BL runtime·load_g(SB)
- CMP $0, g // is there a current g?
- BL.EQ runtime·badsignal2(SB)
+ CMP $0, g // is there a current g?
+ BNE g_ok
+ ADD $(8+20), R13 // free locals
+ MOVM.IA.W (R13), [R3, R4-R11, R14] // pop {r3, r4-r11, lr}
+ MOVW $0, R0 // continue
+ BEQ return
+
+g_ok:
// save g and SP in case of stack switch
MOVW R13, 24(R13)
diff --git a/src/runtime/sys_windows_arm64.s b/src/runtime/sys_windows_arm64.s
index 024625f..4702a4d 100644
--- a/src/runtime/sys_windows_arm64.s
+++ b/src/runtime/sys_windows_arm64.s
@@ -113,7 +113,8 @@
MOVD $runtime·badsignalmsg(SB), R1 // lpBuffer
MOVD $runtime·badsignallen(SB), R2 // lpNumberOfBytesToWrite
MOVD (R2), R2
- MOVD R13, R3 // lpNumberOfBytesWritten
+ // point R3 to stack local that will receive number of bytes written
+ ADD $16, RSP, R3 // lpNumberOfBytesWritten
MOVD $0, R4 // lpOverlapped
MOVD runtime·_WriteFile(SB), R12
SUB $16, RSP // skip over saved frame pointer below RSP
@@ -146,10 +147,15 @@
MOVD g, R17 // saved R28 (callee-save from Windows, not really g)
BL runtime·load_g(SB) // smashes R0, R27, R28 (g)
- CMP $0, g // is there a current g?
- BNE 2(PC)
- BL runtime·badsignal2(SB)
+ CMP $0, g // is there a current g?
+ BNE g_ok
+ MOVD R7, LR
+ MOVD R16, R27 // restore R27
+ MOVD R17, g // restore R28
+ MOVD $0, R0 // continue
+ RET
+g_ok:
// Do we need to switch to the g0 stack?
MOVD g, R3 // R3 = oldg (for sigtramp_g0)
MOVD g_m(g), R2 // R2 = m
diff --git a/src/runtime/syscall_aix.go b/src/runtime/syscall_aix.go
index f294922..cc9e912 100644
--- a/src/runtime/syscall_aix.go
+++ b/src/runtime/syscall_aix.go
@@ -127,9 +127,9 @@
// like close, but must not split stack, for fork.
//
-//go:linkname syscall_close syscall.close
+//go:linkname syscall_closeFD syscall.closeFD
//go:nosplit
-func syscall_close(fd int32) int32 {
+func syscall_closeFD(fd int32) int32 {
_, err := syscall1(&libc_close, uintptr(fd))
return int32(err)
}
diff --git a/src/runtime/syscall_unix_test.go b/src/runtime/syscall_unix_test.go
new file mode 100644
index 0000000..2a69c40
--- /dev/null
+++ b/src/runtime/syscall_unix_test.go
@@ -0,0 +1,25 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build unix
+
+package runtime_test
+
+import (
+ "runtime"
+ "syscall"
+ "testing"
+)
+
+func TestSyscallFlagAlignment(t *testing.T) {
+ // TODO(mknyszek): Check other flags.
+ check := func(name string, got, want int) {
+ if got != want {
+ t.Errorf("flag %s does not line up: got %d, want %d", name, got, want)
+ }
+ }
+ check("O_WRONLY", runtime.O_WRONLY, syscall.O_WRONLY)
+ check("O_CREAT", runtime.O_CREAT, syscall.O_CREAT)
+ check("O_TRUNC", runtime.O_TRUNC, syscall.O_TRUNC)
+}
diff --git a/src/runtime/syscall_windows_test.go b/src/runtime/syscall_windows_test.go
index 37f8f40..abc2838 100644
--- a/src/runtime/syscall_windows_test.go
+++ b/src/runtime/syscall_windows_test.go
@@ -5,7 +5,6 @@
package runtime_test
import (
- "bytes"
"fmt"
"internal/abi"
"internal/syscall/windows/sysdll"
@@ -629,7 +628,7 @@
}
func TestRaiseException(t *testing.T) {
- if testenv.Builder() == "windows-amd64-2012" {
+ if strings.HasPrefix(testenv.Builder(), "windows-amd64-2012") {
testenv.SkipFlaky(t, 49681)
}
o := runTestProg(t, "testprog", "RaiseException")
@@ -1044,7 +1043,7 @@
cmd := exec.Command(os.Args[0], "-test.run=TestNumCPU")
cmd.Env = append(os.Environ(), "GO_WANT_HELPER_PROCESS=1")
- var buf bytes.Buffer
+ var buf strings.Builder
cmd.Stdout = &buf
cmd.Stderr = &buf
cmd.SysProcAttr = &syscall.SysProcAttr{CreationFlags: _CREATE_SUSPENDED}
@@ -1054,7 +1053,7 @@
}
defer func() {
err = cmd.Wait()
- childOutput := string(buf.Bytes())
+ childOutput := buf.String()
if err != nil {
t.Fatalf("child failed: %v: %v", err, childOutput)
}
@@ -1216,7 +1215,7 @@
// wantLoadLibraryEx reports whether we expect LoadLibraryEx to work for tests.
func wantLoadLibraryEx() bool {
- return testenv.Builder() == "windows-amd64-gce" || testenv.Builder() == "windows-386-gce"
+ return testenv.Builder() != "" && (runtime.GOARCH == "amd64" || runtime.GOARCH == "386")
}
func TestLoadLibraryEx(t *testing.T) {
diff --git a/src/runtime/testdata/testexithooks/testexithooks.go b/src/runtime/testdata/testexithooks/testexithooks.go
new file mode 100644
index 0000000..ceb3326
--- /dev/null
+++ b/src/runtime/testdata/testexithooks/testexithooks.go
@@ -0,0 +1,85 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+ "flag"
+ "os"
+ _ "unsafe"
+)
+
+var modeflag = flag.String("mode", "", "mode to run in")
+
+func main() {
+ flag.Parse()
+ switch *modeflag {
+ case "simple":
+ testSimple()
+ case "goodexit":
+ testGoodExit()
+ case "badexit":
+ testBadExit()
+ case "panics":
+ testPanics()
+ case "callsexit":
+ testHookCallsExit()
+ default:
+ panic("unknown mode")
+ }
+}
+
+//go:linkname runtime_addExitHook runtime.addExitHook
+func runtime_addExitHook(f func(), runOnNonZeroExit bool)
+
+func testSimple() {
+ f1 := func() { println("foo") }
+ f2 := func() { println("bar") }
+ runtime_addExitHook(f1, false)
+ runtime_addExitHook(f2, false)
+ // no explicit call to os.Exit
+}
+
+func testGoodExit() {
+ f1 := func() { println("apple") }
+ f2 := func() { println("orange") }
+ runtime_addExitHook(f1, false)
+ runtime_addExitHook(f2, false)
+ // explicit call to os.Exit
+ os.Exit(0)
+}
+
+func testBadExit() {
+ f1 := func() { println("blog") }
+ f2 := func() { println("blix") }
+ f3 := func() { println("blek") }
+ f4 := func() { println("blub") }
+ f5 := func() { println("blat") }
+ runtime_addExitHook(f1, false)
+ runtime_addExitHook(f2, true)
+ runtime_addExitHook(f3, false)
+ runtime_addExitHook(f4, true)
+ runtime_addExitHook(f5, false)
+ os.Exit(1)
+}
+
+func testPanics() {
+ f1 := func() { println("ok") }
+ f2 := func() { panic("BADBADBAD") }
+ f3 := func() { println("good") }
+ runtime_addExitHook(f1, true)
+ runtime_addExitHook(f2, true)
+ runtime_addExitHook(f3, true)
+ os.Exit(0)
+}
+
+func testHookCallsExit() {
+ f1 := func() { println("ok") }
+ f2 := func() { os.Exit(1) }
+ f3 := func() { println("good") }
+ runtime_addExitHook(f1, true)
+ runtime_addExitHook(f2, true)
+ runtime_addExitHook(f3, true)
+ os.Exit(1)
+}
diff --git a/src/runtime/testdata/testprog/checkptr.go b/src/runtime/testdata/testprog/checkptr.go
index b27e5f7..60e71e6 100644
--- a/src/runtime/testdata/testprog/checkptr.go
+++ b/src/runtime/testdata/testprog/checkptr.go
@@ -20,6 +20,8 @@
register("CheckPtrSmall", CheckPtrSmall)
register("CheckPtrSliceOK", CheckPtrSliceOK)
register("CheckPtrSliceFail", CheckPtrSliceFail)
+ register("CheckPtrStringOK", CheckPtrStringOK)
+ register("CheckPtrStringFail", CheckPtrStringFail)
register("CheckPtrAlignmentNested", CheckPtrAlignmentNested)
}
@@ -98,6 +100,17 @@
sink2 = unsafe.Slice(p, 100)
}
+func CheckPtrStringOK() {
+ p := new([4]byte)
+ sink2 = unsafe.String(&p[1], 3)
+}
+
+func CheckPtrStringFail() {
+ p := new(byte)
+ sink2 = p
+ sink2 = unsafe.String(p, 100)
+}
+
func CheckPtrAlignmentNested() {
s := make([]int8, 100)
p := unsafe.Pointer(&s[0])
diff --git a/src/runtime/testdata/testprog/gc.go b/src/runtime/testdata/testprog/gc.go
index 0f44575..5dc85fb 100644
--- a/src/runtime/testdata/testprog/gc.go
+++ b/src/runtime/testdata/testprog/gc.go
@@ -396,7 +396,7 @@
// should do considerably better than this bound.
bound := int64(myLimit + 16<<20)
start := time.Now()
- for time.Now().Sub(start) < 200*time.Millisecond {
+ for time.Since(start) < 200*time.Millisecond {
metrics.Read(m[:])
retained := int64(m[0].Value.Uint64() - m[1].Value.Uint64())
if retained > bound {
diff --git a/src/runtime/testdata/testprog/numcpu_freebsd.go b/src/runtime/testdata/testprog/numcpu_freebsd.go
index 7209f67..310c212 100644
--- a/src/runtime/testdata/testprog/numcpu_freebsd.go
+++ b/src/runtime/testdata/testprog/numcpu_freebsd.go
@@ -48,7 +48,7 @@
fmt.Printf("fail to launch '%s', error: %s, output: %s\n", strings.Join(cmd.Args, " "), err, output)
return
}
- if bytes.Equal(output, []byte("1\n")) == false {
+ if !bytes.Equal(output, []byte("1\n")) {
// SMP mode deactivated in kernel.
fmt.Println("OK")
return
diff --git a/src/runtime/testdata/testprog/traceback_ancestors.go b/src/runtime/testdata/testprog/traceback_ancestors.go
index 1d0d00b..8fc1aa7 100644
--- a/src/runtime/testdata/testprog/traceback_ancestors.go
+++ b/src/runtime/testdata/testprog/traceback_ancestors.go
@@ -87,9 +87,10 @@
buf := make([]byte, 128)
runtime.Stack(buf, false)
prefix := []byte("goroutine ")
- if !bytes.HasPrefix(buf, prefix) {
+ var found bool
+ if buf, found = bytes.CutPrefix(buf, prefix); !found {
panic(fmt.Sprintf("expected %q at beginning of traceback:\n%s", prefix, buf))
}
- id, _, _ := bytes.Cut(bytes.TrimPrefix(buf, prefix), []byte(" "))
+ id, _, _ := bytes.Cut(buf, []byte(" "))
return string(id)
}
diff --git a/src/runtime/testdata/testprog/unsafe.go b/src/runtime/testdata/testprog/unsafe.go
new file mode 100644
index 0000000..021b08f
--- /dev/null
+++ b/src/runtime/testdata/testprog/unsafe.go
@@ -0,0 +1,12 @@
+package main
+
+import "unsafe"
+
+func init() {
+ register("panicOnNilAndEleSizeIsZero", panicOnNilAndEleSizeIsZero)
+}
+
+func panicOnNilAndEleSizeIsZero() {
+ var p *struct{}
+ _ = unsafe.Slice(p, 5)
+}
diff --git a/src/runtime/testdata/testprogcgo/issue29707.go b/src/runtime/testdata/testprogcgo/issue29707.go
new file mode 100644
index 0000000..7d9299f
--- /dev/null
+++ b/src/runtime/testdata/testprogcgo/issue29707.go
@@ -0,0 +1,60 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !plan9 && !windows
+// +build !plan9,!windows
+
+// This is for issue #29707
+
+package main
+
+/*
+#include <pthread.h>
+
+extern void* callbackTraceParser(void*);
+typedef void* (*cbTraceParser)(void*);
+
+static void testCallbackTraceParser(cbTraceParser cb) {
+ pthread_t thread_id;
+ pthread_create(&thread_id, NULL, cb, NULL);
+ pthread_join(thread_id, NULL);
+}
+*/
+import "C"
+
+import (
+ "bytes"
+ "fmt"
+ traceparser "internal/trace"
+ "runtime/trace"
+ "time"
+ "unsafe"
+)
+
+func init() {
+ register("CgoTraceParser", CgoTraceParser)
+}
+
+//export callbackTraceParser
+func callbackTraceParser(unsafe.Pointer) unsafe.Pointer {
+ time.Sleep(time.Millisecond)
+ return nil
+}
+
+func CgoTraceParser() {
+ buf := new(bytes.Buffer)
+
+ trace.Start(buf)
+ C.testCallbackTraceParser(C.cbTraceParser(C.callbackTraceParser))
+ trace.Stop()
+
+ _, err := traceparser.Parse(buf, "")
+ if err == traceparser.ErrTimeOrder {
+ fmt.Println("ErrTimeOrder")
+ } else if err != nil {
+ fmt.Println("Parse error: ", err)
+ } else {
+ fmt.Println("OK")
+ }
+}
diff --git a/src/runtime/testdata/testprogcgo/segv.go b/src/runtime/testdata/testprogcgo/segv.go
index 0632475..bf5aa31 100644
--- a/src/runtime/testdata/testprogcgo/segv.go
+++ b/src/runtime/testdata/testprogcgo/segv.go
@@ -2,18 +2,16 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build !plan9 && !windows
-// +build !plan9,!windows
+//go:build unix
+// +build unix
package main
+// #include <unistd.h>
// static void nop() {}
import "C"
-import (
- "syscall"
- "time"
-)
+import "syscall"
func init() {
register("Segv", Segv)
@@ -35,8 +33,8 @@
syscall.Kill(syscall.Getpid(), syscall.SIGSEGV)
- // Give the OS time to deliver the signal.
- time.Sleep(time.Second)
+ // Wait for the OS to deliver the signal.
+ C.pause()
}
func SegvInCgo() {
@@ -52,6 +50,6 @@
syscall.Kill(syscall.Getpid(), syscall.SIGSEGV)
- // Give the OS time to deliver the signal.
- time.Sleep(time.Second)
+ // Wait for the OS to deliver the signal.
+ C.pause()
}
diff --git a/src/runtime/testdata/testprogcgo/segv_linux.go b/src/runtime/testdata/testprogcgo/segv_linux.go
new file mode 100644
index 0000000..fe93778
--- /dev/null
+++ b/src/runtime/testdata/testprogcgo/segv_linux.go
@@ -0,0 +1,51 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+// #include <unistd.h>
+// static void nop() {}
+import "C"
+
+import "syscall"
+
+func init() {
+ register("TgkillSegv", TgkillSegv)
+ register("TgkillSegvInCgo", TgkillSegvInCgo)
+}
+
+func TgkillSegv() {
+ c := make(chan bool)
+ go func() {
+ close(c)
+ for i := 0; ; i++ {
+ // Sum defined in segv.go.
+ Sum += i
+ }
+ }()
+
+ <-c
+
+ syscall.Tgkill(syscall.Getpid(), syscall.Gettid(), syscall.SIGSEGV)
+
+ // Wait for the OS to deliver the signal.
+ C.pause()
+}
+
+func TgkillSegvInCgo() {
+ c := make(chan bool)
+ go func() {
+ close(c)
+ for {
+ C.nop()
+ }
+ }()
+
+ <-c
+
+ syscall.Tgkill(syscall.Getpid(), syscall.Gettid(), syscall.SIGSEGV)
+
+ // Wait for the OS to deliver the signal.
+ C.pause()
+}
diff --git a/src/runtime/testdata/testprogcgo/sigfwd.go b/src/runtime/testdata/testprogcgo/sigfwd.go
new file mode 100644
index 0000000..f6a0c03
--- /dev/null
+++ b/src/runtime/testdata/testprogcgo/sigfwd.go
@@ -0,0 +1,87 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build unix
+
+package main
+
+import (
+ "fmt"
+ "os"
+)
+
+/*
+#include <signal.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+sig_atomic_t expectCSigsegv;
+int *sigfwdP;
+
+static void sigsegv() {
+ expectCSigsegv = 1;
+ *sigfwdP = 1;
+ fprintf(stderr, "ERROR: C SIGSEGV not thrown on caught?.\n");
+ exit(2);
+}
+
+static void segvhandler(int signum) {
+ if (signum == SIGSEGV) {
+ if (expectCSigsegv == 0) {
+ fprintf(stderr, "SIGSEGV caught in C unexpectedly\n");
+ exit(1);
+ }
+ fprintf(stdout, "OK\n");
+ exit(0); // success
+ }
+}
+
+static void __attribute__ ((constructor)) sigsetup(void) {
+ if (getenv("GO_TEST_CGOSIGFWD") == NULL) {
+ return;
+ }
+
+ struct sigaction act;
+
+ memset(&act, 0, sizeof act);
+ act.sa_handler = segvhandler;
+ sigaction(SIGSEGV, &act, NULL);
+}
+*/
+import "C"
+
+func init() {
+ register("CgoSigfwd", CgoSigfwd)
+}
+
+var nilPtr *byte
+
+func f() (ret bool) {
+ defer func() {
+ if recover() == nil {
+ fmt.Fprintf(os.Stderr, "ERROR: couldn't raise SIGSEGV in Go\n")
+ C.exit(2)
+ }
+ ret = true
+ }()
+ *nilPtr = 1
+ return false
+}
+
+func CgoSigfwd() {
+ if os.Getenv("GO_TEST_CGOSIGFWD") == "" {
+ fmt.Fprintf(os.Stderr, "test must be run with GO_TEST_CGOSIGFWD set\n")
+ os.Exit(1)
+ }
+
+ // Test that the signal originating in Go is handled (and recovered) by Go.
+ if !f() {
+ fmt.Fprintf(os.Stderr, "couldn't recover from SIGSEGV in Go.\n")
+ C.exit(2)
+ }
+
+ // Test that the signal originating in C is handled by C.
+ C.sigsegv()
+}
diff --git a/src/runtime/testdata/testwinlibthrow/main.go b/src/runtime/testdata/testwinlibthrow/main.go
new file mode 100644
index 0000000..ce0c92f
--- /dev/null
+++ b/src/runtime/testdata/testwinlibthrow/main.go
@@ -0,0 +1,19 @@
+package main
+
+import (
+ "os"
+ "syscall"
+)
+
+func main() {
+ dll := syscall.MustLoadDLL("veh.dll")
+ RaiseNoExcept := dll.MustFindProc("RaiseNoExcept")
+ ThreadRaiseNoExcept := dll.MustFindProc("ThreadRaiseNoExcept")
+
+ thread := len(os.Args) > 1 && os.Args[1] == "thread"
+ if !thread {
+ RaiseNoExcept.Call()
+ } else {
+ ThreadRaiseNoExcept.Call()
+ }
+}
diff --git a/src/runtime/testdata/testwinlibthrow/veh.c b/src/runtime/testdata/testwinlibthrow/veh.c
new file mode 100644
index 0000000..08c1f9e
--- /dev/null
+++ b/src/runtime/testdata/testwinlibthrow/veh.c
@@ -0,0 +1,26 @@
+//go:build ignore
+
+#include <windows.h>
+
+__declspec(dllexport)
+void RaiseNoExcept(void)
+{
+ RaiseException(42, 0, 0, 0);
+}
+
+static DWORD WINAPI ThreadRaiser(void* Context)
+{
+ RaiseNoExcept();
+ return 0;
+}
+
+__declspec(dllexport)
+void ThreadRaiseNoExcept(void)
+{
+ HANDLE thread = CreateThread(0, 0, ThreadRaiser, 0, 0, 0);
+ if (0 != thread)
+ {
+ WaitForSingleObject(thread, INFINITE);
+ CloseHandle(thread);
+ }
+}
diff --git a/src/runtime/time.go b/src/runtime/time.go
index 80b0bfb..6cd70b7 100644
--- a/src/runtime/time.go
+++ b/src/runtime/time.go
@@ -36,7 +36,7 @@
nextwhen int64
// The status field holds one of the values below.
- status uint32
+ status atomic.Uint32
}
// Code outside this file has to be careful in using a timer value.
@@ -249,6 +249,7 @@
goready(arg.(*g), 0)
}
+// Note: this changes some unsynchronized operations to synchronized operations
// addtimer adds a timer to the current P.
// This should only be called with a newly created timer.
// That avoids the risk of changing the when field of a timer in some P's heap,
@@ -263,10 +264,10 @@
if t.period < 0 {
throw("timer period must be non-negative")
}
- if t.status != timerNoStatus {
+ if t.status.Load() != timerNoStatus {
throw("addtimer called with initialized timer")
}
- t.status = timerWaiting
+ t.status.Store(timerWaiting)
when := t.when
@@ -289,7 +290,7 @@
func doaddtimer(pp *p, t *timer) {
// Timers rely on the network poller, so make sure the poller
// has started.
- if netpollInited == 0 {
+ if netpollInited.Load() == 0 {
netpollGenericInit()
}
@@ -301,9 +302,9 @@
pp.timers = append(pp.timers, t)
siftupTimer(pp.timers, i)
if t == pp.timers[0] {
- atomic.Store64(&pp.timer0When, uint64(t.when))
+ pp.timer0When.Store(t.when)
}
- atomic.Xadd(&pp.numTimers, 1)
+ pp.numTimers.Add(1)
}
// deltimer deletes the timer t. It may be on some other P, so we can't
@@ -312,21 +313,21 @@
// Reports whether the timer was removed before it was run.
func deltimer(t *timer) bool {
for {
- switch s := atomic.Load(&t.status); s {
+ switch s := t.status.Load(); s {
case timerWaiting, timerModifiedLater:
// Prevent preemption while the timer is in timerModifying.
// This could lead to a self-deadlock. See #38070.
mp := acquirem()
- if atomic.Cas(&t.status, s, timerModifying) {
+ if t.status.CompareAndSwap(s, timerModifying) {
// Must fetch t.pp before changing status,
// as cleantimers in another goroutine
// can clear t.pp of a timerDeleted timer.
tpp := t.pp.ptr()
- if !atomic.Cas(&t.status, timerModifying, timerDeleted) {
+ if !t.status.CompareAndSwap(timerModifying, timerDeleted) {
badTimer()
}
releasem(mp)
- atomic.Xadd(&tpp.deletedTimers, 1)
+ tpp.deletedTimers.Add(1)
// Timer was not yet run.
return true
} else {
@@ -336,15 +337,15 @@
// Prevent preemption while the timer is in timerModifying.
// This could lead to a self-deadlock. See #38070.
mp := acquirem()
- if atomic.Cas(&t.status, s, timerModifying) {
+ if t.status.CompareAndSwap(s, timerModifying) {
// Must fetch t.pp before setting status
// to timerDeleted.
tpp := t.pp.ptr()
- if !atomic.Cas(&t.status, timerModifying, timerDeleted) {
+ if !t.status.CompareAndSwap(timerModifying, timerDeleted) {
badTimer()
}
releasem(mp)
- atomic.Xadd(&tpp.deletedTimers, 1)
+ tpp.deletedTimers.Add(1)
// Timer was not yet run.
return true
} else {
@@ -397,10 +398,10 @@
if i == 0 {
updateTimer0When(pp)
}
- n := atomic.Xadd(&pp.numTimers, -1)
+ n := pp.numTimers.Add(-1)
if n == 0 {
// If there are no timers, then clearly none are modified.
- atomic.Store64(&pp.timerModifiedEarliest, 0)
+ pp.timerModifiedEarliest.Store(0)
}
return smallestChanged
}
@@ -425,10 +426,10 @@
siftdownTimer(pp.timers, 0)
}
updateTimer0When(pp)
- n := atomic.Xadd(&pp.numTimers, -1)
+ n := pp.numTimers.Add(-1)
if n == 0 {
// If there are no timers, then clearly none are modified.
- atomic.Store64(&pp.timerModifiedEarliest, 0)
+ pp.timerModifiedEarliest.Store(0)
}
}
@@ -449,12 +450,12 @@
var mp *m
loop:
for {
- switch status = atomic.Load(&t.status); status {
+ switch status = t.status.Load(); status {
case timerWaiting, timerModifiedEarlier, timerModifiedLater:
// Prevent preemption while the timer is in timerModifying.
// This could lead to a self-deadlock. See #38070.
mp = acquirem()
- if atomic.Cas(&t.status, status, timerModifying) {
+ if t.status.CompareAndSwap(status, timerModifying) {
pending = true // timer not yet run
break loop
}
@@ -466,7 +467,7 @@
// Timer was already run and t is no longer in a heap.
// Act like addtimer.
- if atomic.Cas(&t.status, status, timerModifying) {
+ if t.status.CompareAndSwap(status, timerModifying) {
wasRemoved = true
pending = false // timer already run or stopped
break loop
@@ -476,8 +477,8 @@
// Prevent preemption while the timer is in timerModifying.
// This could lead to a self-deadlock. See #38070.
mp = acquirem()
- if atomic.Cas(&t.status, status, timerModifying) {
- atomic.Xadd(&t.pp.ptr().deletedTimers, -1)
+ if t.status.CompareAndSwap(status, timerModifying) {
+ t.pp.ptr().deletedTimers.Add(-1)
pending = false // timer already stopped
break loop
}
@@ -506,7 +507,7 @@
lock(&pp.timersLock)
doaddtimer(pp, t)
unlock(&pp.timersLock)
- if !atomic.Cas(&t.status, timerModifying, timerWaiting) {
+ if !t.status.CompareAndSwap(timerModifying, timerWaiting) {
badTimer()
}
releasem(mp)
@@ -531,7 +532,7 @@
}
// Set the new status of the timer.
- if !atomic.Cas(&t.status, timerModifying, newStatus) {
+ if !t.status.CompareAndSwap(timerModifying, newStatus) {
badTimer()
}
releasem(mp)
@@ -577,18 +578,18 @@
if t.pp.ptr() != pp {
throw("cleantimers: bad p")
}
- switch s := atomic.Load(&t.status); s {
+ switch s := t.status.Load(); s {
case timerDeleted:
- if !atomic.Cas(&t.status, s, timerRemoving) {
+ if !t.status.CompareAndSwap(s, timerRemoving) {
continue
}
dodeltimer0(pp)
- if !atomic.Cas(&t.status, timerRemoving, timerRemoved) {
+ if !t.status.CompareAndSwap(timerRemoving, timerRemoved) {
badTimer()
}
- atomic.Xadd(&pp.deletedTimers, -1)
+ pp.deletedTimers.Add(-1)
case timerModifiedEarlier, timerModifiedLater:
- if !atomic.Cas(&t.status, s, timerMoving) {
+ if !t.status.CompareAndSwap(s, timerMoving) {
continue
}
// Now we can change the when field.
@@ -596,7 +597,7 @@
// Move t to the right position.
dodeltimer0(pp)
doaddtimer(pp, t)
- if !atomic.Cas(&t.status, timerMoving, timerWaiting) {
+ if !t.status.CompareAndSwap(timerMoving, timerWaiting) {
badTimer()
}
default:
@@ -614,30 +615,30 @@
for _, t := range timers {
loop:
for {
- switch s := atomic.Load(&t.status); s {
+ switch s := t.status.Load(); s {
case timerWaiting:
- if !atomic.Cas(&t.status, s, timerMoving) {
+ if !t.status.CompareAndSwap(s, timerMoving) {
continue
}
t.pp = 0
doaddtimer(pp, t)
- if !atomic.Cas(&t.status, timerMoving, timerWaiting) {
+ if !t.status.CompareAndSwap(timerMoving, timerWaiting) {
badTimer()
}
break loop
case timerModifiedEarlier, timerModifiedLater:
- if !atomic.Cas(&t.status, s, timerMoving) {
+ if !t.status.CompareAndSwap(s, timerMoving) {
continue
}
t.when = t.nextwhen
t.pp = 0
doaddtimer(pp, t)
- if !atomic.Cas(&t.status, timerMoving, timerWaiting) {
+ if !t.status.CompareAndSwap(timerMoving, timerWaiting) {
badTimer()
}
break loop
case timerDeleted:
- if !atomic.Cas(&t.status, s, timerRemoved) {
+ if !t.status.CompareAndSwap(s, timerRemoved) {
continue
}
t.pp = 0
@@ -671,8 +672,8 @@
// a lot of timers back and forth if the timers rarely expire.
// We'll postpone looking through all the adjusted timers until
// one would actually expire.
- first := atomic.Load64(&pp.timerModifiedEarliest)
- if first == 0 || int64(first) > now {
+ first := pp.timerModifiedEarliest.Load()
+ if first == 0 || first > now {
if verifyTimers {
verifyTimerHeap(pp)
}
@@ -680,7 +681,7 @@
}
// We are going to clear all timerModifiedEarlier timers.
- atomic.Store64(&pp.timerModifiedEarliest, 0)
+ pp.timerModifiedEarliest.Store(0)
var moved []*timer
for i := 0; i < len(pp.timers); i++ {
@@ -688,20 +689,20 @@
if t.pp.ptr() != pp {
throw("adjusttimers: bad p")
}
- switch s := atomic.Load(&t.status); s {
+ switch s := t.status.Load(); s {
case timerDeleted:
- if atomic.Cas(&t.status, s, timerRemoving) {
+ if t.status.CompareAndSwap(s, timerRemoving) {
changed := dodeltimer(pp, i)
- if !atomic.Cas(&t.status, timerRemoving, timerRemoved) {
+ if !t.status.CompareAndSwap(timerRemoving, timerRemoved) {
badTimer()
}
- atomic.Xadd(&pp.deletedTimers, -1)
+ pp.deletedTimers.Add(-1)
// Go back to the earliest changed heap entry.
// "- 1" because the loop will add 1.
i = changed - 1
}
case timerModifiedEarlier, timerModifiedLater:
- if atomic.Cas(&t.status, s, timerMoving) {
+ if t.status.CompareAndSwap(s, timerMoving) {
// Now we can change the when field.
t.when = t.nextwhen
// Take t off the heap, and hold onto it.
@@ -741,7 +742,7 @@
func addAdjustedTimers(pp *p, moved []*timer) {
for _, t := range moved {
doaddtimer(pp, t)
- if !atomic.Cas(&t.status, timerMoving, timerWaiting) {
+ if !t.status.CompareAndSwap(timerMoving, timerWaiting) {
badTimer()
}
}
@@ -754,8 +755,8 @@
//
//go:nowritebarrierrec
func nobarrierWakeTime(pp *p) int64 {
- next := int64(atomic.Load64(&pp.timer0When))
- nextAdj := int64(atomic.Load64(&pp.timerModifiedEarliest))
+ next := pp.timer0When.Load()
+ nextAdj := pp.timerModifiedEarliest.Load()
if next == 0 || (nextAdj != 0 && nextAdj < next) {
next = nextAdj
}
@@ -776,14 +777,14 @@
if t.pp.ptr() != pp {
throw("runtimer: bad p")
}
- switch s := atomic.Load(&t.status); s {
+ switch s := t.status.Load(); s {
case timerWaiting:
if t.when > now {
// Not ready to run.
return t.when
}
- if !atomic.Cas(&t.status, s, timerRunning) {
+ if !t.status.CompareAndSwap(s, timerRunning) {
continue
}
// Note that runOneTimer may temporarily unlock
@@ -792,26 +793,26 @@
return 0
case timerDeleted:
- if !atomic.Cas(&t.status, s, timerRemoving) {
+ if !t.status.CompareAndSwap(s, timerRemoving) {
continue
}
dodeltimer0(pp)
- if !atomic.Cas(&t.status, timerRemoving, timerRemoved) {
+ if !t.status.CompareAndSwap(timerRemoving, timerRemoved) {
badTimer()
}
- atomic.Xadd(&pp.deletedTimers, -1)
+ pp.deletedTimers.Add(-1)
if len(pp.timers) == 0 {
return -1
}
case timerModifiedEarlier, timerModifiedLater:
- if !atomic.Cas(&t.status, s, timerMoving) {
+ if !t.status.CompareAndSwap(s, timerMoving) {
continue
}
t.when = t.nextwhen
dodeltimer0(pp)
doaddtimer(pp, t)
- if !atomic.Cas(&t.status, timerMoving, timerWaiting) {
+ if !t.status.CompareAndSwap(timerMoving, timerWaiting) {
badTimer()
}
@@ -858,14 +859,14 @@
t.when = maxWhen
}
siftdownTimer(pp.timers, 0)
- if !atomic.Cas(&t.status, timerRunning, timerWaiting) {
+ if !t.status.CompareAndSwap(timerRunning, timerWaiting) {
badTimer()
}
updateTimer0When(pp)
} else {
// Remove from heap.
dodeltimer0(pp)
- if !atomic.Cas(&t.status, timerRunning, timerNoStatus) {
+ if !t.status.CompareAndSwap(timerRunning, timerNoStatus) {
badTimer()
}
}
@@ -903,7 +904,7 @@
func clearDeletedTimers(pp *p) {
// We are going to clear all timerModifiedEarlier timers.
// Do this now in case new ones show up while we are looping.
- atomic.Store64(&pp.timerModifiedEarliest, 0)
+ pp.timerModifiedEarliest.Store(0)
cdel := int32(0)
to := 0
@@ -912,7 +913,7 @@
nextTimer:
for _, t := range timers {
for {
- switch s := atomic.Load(&t.status); s {
+ switch s := t.status.Load(); s {
case timerWaiting:
if changedHeap {
timers[to] = t
@@ -921,22 +922,22 @@
to++
continue nextTimer
case timerModifiedEarlier, timerModifiedLater:
- if atomic.Cas(&t.status, s, timerMoving) {
+ if t.status.CompareAndSwap(s, timerMoving) {
t.when = t.nextwhen
timers[to] = t
siftupTimer(timers, to)
to++
changedHeap = true
- if !atomic.Cas(&t.status, timerMoving, timerWaiting) {
+ if !t.status.CompareAndSwap(timerMoving, timerWaiting) {
badTimer()
}
continue nextTimer
}
case timerDeleted:
- if atomic.Cas(&t.status, s, timerRemoving) {
+ if t.status.CompareAndSwap(s, timerRemoving) {
t.pp = 0
cdel++
- if !atomic.Cas(&t.status, timerRemoving, timerRemoved) {
+ if !t.status.CompareAndSwap(timerRemoving, timerRemoved) {
badTimer()
}
changedHeap = true
@@ -964,8 +965,8 @@
timers[i] = nil
}
- atomic.Xadd(&pp.deletedTimers, -cdel)
- atomic.Xadd(&pp.numTimers, -cdel)
+ pp.deletedTimers.Add(-cdel)
+ pp.numTimers.Add(-cdel)
timers = timers[:to]
pp.timers = timers
@@ -993,7 +994,7 @@
throw("bad timer heap")
}
}
- if numTimers := int(atomic.Load(&pp.numTimers)); len(pp.timers) != numTimers {
+ if numTimers := int(pp.numTimers.Load()); len(pp.timers) != numTimers {
println("timer heap len", len(pp.timers), "!= numTimers", numTimers)
throw("bad timer heap len")
}
@@ -1003,9 +1004,9 @@
// The caller must have locked the timers for pp.
func updateTimer0When(pp *p) {
if len(pp.timers) == 0 {
- atomic.Store64(&pp.timer0When, 0)
+ pp.timer0When.Store(0)
} else {
- atomic.Store64(&pp.timer0When, uint64(pp.timers[0].when))
+ pp.timer0When.Store(pp.timers[0].when)
}
}
@@ -1014,11 +1015,12 @@
// The timers for pp will not be locked.
func updateTimerModifiedEarliest(pp *p, nextwhen int64) {
for {
- old := atomic.Load64(&pp.timerModifiedEarliest)
+ old := pp.timerModifiedEarliest.Load()
if old != 0 && int64(old) < nextwhen {
return
}
- if atomic.Cas64(&pp.timerModifiedEarliest, old, uint64(nextwhen)) {
+
+ if pp.timerModifiedEarliest.CompareAndSwap(old, nextwhen) {
return
}
}
@@ -1039,12 +1041,12 @@
continue
}
- w := int64(atomic.Load64(&pp.timer0When))
+ w := pp.timer0When.Load()
if w != 0 && w < next {
next = w
}
- w = int64(atomic.Load64(&pp.timerModifiedEarliest))
+ w = pp.timerModifiedEarliest.Load()
if w != 0 && w < next {
next = w
}
diff --git a/src/runtime/trace.go b/src/runtime/trace.go
index 10436d8..e7dfab1 100644
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -109,6 +109,8 @@
// trace is global tracing context.
var trace struct {
+ // trace.lock must only be acquired on the system stack where
+ // stack splits cannot happen while it is held.
lock mutex // protects the following members
lockOwner *g // to avoid deadlocks during recursive lock locks
enabled bool // when set runtime traces events
@@ -126,7 +128,6 @@
empty traceBufPtr // stack of empty buffers
fullHead traceBufPtr // queue of full buffers
fullTail traceBufPtr
- reader guintptr // goroutine that called ReadTrace, or nil
stackTab traceStackTable // maps stack traces to unique ids
// cpuLogRead accepts CPU profile samples from the signal handler where
// they're generated. It uses a two-word header to hold the IDs of the P and
@@ -144,6 +145,8 @@
// specific P.
cpuLogBuf traceBufPtr
+ reader atomic.Pointer[g] // goroutine that called ReadTrace, or nil
+
signalLock atomic.Uint32 // protects use of the following member, only usable in signal handlers
cpuLogWrite *profBuf // copy of cpuLogRead for use in signal handlers, set without signalLock
@@ -173,9 +176,8 @@
}
// traceBuf is per-P tracing buffer.
-//
-//go:notinheap
type traceBuf struct {
+ _ sys.NotInHeap
traceBufHeader
arr [64<<10 - unsafe.Sizeof(traceBufHeader{})]byte // underlying buffer for traceBufHeader.buf
}
@@ -186,7 +188,7 @@
// manipulated in contexts where write barriers are not allowed, so
// this is necessary.
//
-// TODO: Since traceBuf is now go:notinheap, this isn't necessary.
+// TODO: Since traceBuf is now embedded runtime/internal/sys.NotInHeap, this isn't necessary.
type traceBufPtr uintptr
func (tp traceBufPtr) ptr() *traceBuf { return (*traceBuf)(unsafe.Pointer(tp)) }
@@ -232,14 +234,12 @@
// - or GoSysExit appears for a goroutine for which we don't emit EvGoInSyscall below.
// To instruct traceEvent that it must not ignore events below, we set startingtrace.
// trace.enabled is set afterwards once we have emitted all preliminary events.
- _g_ := getg()
- _g_.m.startingtrace = true
+ mp := getg().m
+ mp.startingtrace = true
// Obtain current stack ID to use in all traceEvGoCreate events below.
- mp := acquirem()
stkBuf := make([]uintptr, traceStackSize)
stackID := traceStackID(mp, stkBuf, 2)
- releasem(mp)
profBuf := newProfBuf(2, profBufWordCount, profBufTagCount) // after the timestamp, header is [pp.id, gp.goid]
trace.cpuLogRead = profBuf
@@ -261,16 +261,27 @@
gp.tracelastp = getg().m.p
// +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum.
id := trace.stackTab.put([]uintptr{startPCforTrace(gp.startpc) + sys.PCQuantum})
- traceEvent(traceEvGoCreate, -1, uint64(gp.goid), uint64(id), stackID)
+ traceEvent(traceEvGoCreate, -1, gp.goid, uint64(id), stackID)
}
if status == _Gwaiting {
// traceEvGoWaiting is implied to have seq=1.
gp.traceseq++
- traceEvent(traceEvGoWaiting, -1, uint64(gp.goid))
+ traceEvent(traceEvGoWaiting, -1, gp.goid)
}
if status == _Gsyscall {
gp.traceseq++
- traceEvent(traceEvGoInSyscall, -1, uint64(gp.goid))
+ traceEvent(traceEvGoInSyscall, -1, gp.goid)
+ } else if status == _Gdead && gp.m != nil && gp.m.isextra {
+ // Trigger two trace events for the dead g in the extra m,
+ // since the next event of the g will be traceEvGoSysExit in exitsyscall,
+ // while calling from C thread to Go.
+ gp.traceseq = 0
+ gp.tracelastp = getg().m.p
+ // +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum.
+ id := trace.stackTab.put([]uintptr{startPCforTrace(0) + sys.PCQuantum}) // no start pc
+ traceEvent(traceEvGoCreate, -1, gp.goid, uint64(id), stackID)
+ gp.traceseq++
+ traceEvent(traceEvGoInSyscall, -1, gp.goid)
} else {
gp.sysblocktraced = false
}
@@ -293,7 +304,7 @@
trace.strings = make(map[string]uint64)
trace.seqGC = 0
- _g_.m.startingtrace = false
+ mp.startingtrace = false
trace.enabled = true
// Register runtime goroutine labels.
@@ -386,31 +397,33 @@
raceacquire(unsafe.Pointer(&trace.shutdownSema))
}
- // The lock protects us from races with StartTrace/StopTrace because they do stop-the-world.
- lock(&trace.lock)
- for _, p := range allp[:cap(allp)] {
- if p.tracebuf != 0 {
- throw("trace: non-empty trace buffer in proc")
+ systemstack(func() {
+ // The lock protects us from races with StartTrace/StopTrace because they do stop-the-world.
+ lock(&trace.lock)
+ for _, p := range allp[:cap(allp)] {
+ if p.tracebuf != 0 {
+ throw("trace: non-empty trace buffer in proc")
+ }
}
- }
- if trace.buf != 0 {
- throw("trace: non-empty global trace buffer")
- }
- if trace.fullHead != 0 || trace.fullTail != 0 {
- throw("trace: non-empty full trace buffer")
- }
- if trace.reading != 0 || trace.reader != 0 {
- throw("trace: reading after shutdown")
- }
- for trace.empty != 0 {
- buf := trace.empty
- trace.empty = buf.ptr().link
- sysFree(unsafe.Pointer(buf), unsafe.Sizeof(*buf.ptr()), &memstats.other_sys)
- }
- trace.strings = nil
- trace.shutdown = false
- trace.cpuLogRead = nil
- unlock(&trace.lock)
+ if trace.buf != 0 {
+ throw("trace: non-empty global trace buffer")
+ }
+ if trace.fullHead != 0 || trace.fullTail != 0 {
+ throw("trace: non-empty full trace buffer")
+ }
+ if trace.reading != 0 || trace.reader.Load() != nil {
+ throw("trace: reading after shutdown")
+ }
+ for trace.empty != 0 {
+ buf := trace.empty
+ trace.empty = buf.ptr().link
+ sysFree(unsafe.Pointer(buf), unsafe.Sizeof(*buf.ptr()), &memstats.other_sys)
+ }
+ trace.strings = nil
+ trace.shutdown = false
+ trace.cpuLogRead = nil
+ unlock(&trace.lock)
+ })
}
// ReadTrace returns the next chunk of binary tracing data, blocking until data
@@ -419,6 +432,55 @@
// returned data before calling ReadTrace again.
// ReadTrace must be called from one goroutine at a time.
func ReadTrace() []byte {
+top:
+ var buf []byte
+ var park bool
+ systemstack(func() {
+ buf, park = readTrace0()
+ })
+ if park {
+ gopark(func(gp *g, _ unsafe.Pointer) bool {
+ if !trace.reader.CompareAndSwapNoWB(nil, gp) {
+ // We're racing with another reader.
+ // Wake up and handle this case.
+ return false
+ }
+
+ if g2 := traceReader(); gp == g2 {
+ // New data arrived between unlocking
+ // and the CAS and we won the wake-up
+ // race, so wake up directly.
+ return false
+ } else if g2 != nil {
+ printlock()
+ println("runtime: got trace reader", g2, g2.goid)
+ throw("unexpected trace reader")
+ }
+
+ return true
+ }, nil, waitReasonTraceReaderBlocked, traceEvGoBlock, 2)
+ goto top
+ }
+
+ return buf
+}
+
+// readTrace0 is ReadTrace's continuation on g0. This must run on the
+// system stack because it acquires trace.lock.
+//
+//go:systemstack
+func readTrace0() (buf []byte, park bool) {
+ if raceenabled {
+ // g0 doesn't have a race context. Borrow the user G's.
+ if getg().racectx != 0 {
+ throw("expected racectx == 0")
+ }
+ getg().racectx = getg().m.curg.racectx
+ // (This defer should get open-coded, which is safe on
+ // the system stack.)
+ defer func() { getg().racectx = 0 }()
+ }
+
// This function may need to lock trace.lock recursively
// (goparkunlock -> traceGoPark -> traceEvent -> traceFlush).
// To allow this we use trace.lockOwner.
@@ -426,16 +488,16 @@
// allocation can call heap allocate, which will try to emit a trace
// event while holding heap lock.
lock(&trace.lock)
- trace.lockOwner = getg()
+ trace.lockOwner = getg().m.curg
- if trace.reader != 0 {
+ if trace.reader.Load() != nil {
// More than one goroutine reads trace. This is bad.
// But we rather do not crash the program because of tracing,
// because tracing can be enabled at runtime on prod servers.
trace.lockOwner = nil
unlock(&trace.lock)
println("runtime: ReadTrace called from multiple goroutines simultaneously")
- return nil
+ return nil, false
}
// Recycle the old buffer.
if buf := trace.reading; buf != 0 {
@@ -448,7 +510,7 @@
trace.headerWritten = true
trace.lockOwner = nil
unlock(&trace.lock)
- return []byte("go 1.19 trace\x00\x00\x00")
+ return []byte("go 1.19 trace\x00\x00\x00"), false
}
// Optimistically look for CPU profile samples. This may write new stack
// records, and may write new tracing buffers.
@@ -457,17 +519,22 @@
}
// Wait for new data.
if trace.fullHead == 0 && !trace.shutdown {
- trace.reader.set(getg())
- goparkunlock(&trace.lock, waitReasonTraceReaderBlocked, traceEvGoBlock, 2)
- lock(&trace.lock)
+ // We don't simply use a note because the scheduler
+ // executes this goroutine directly when it wakes up
+ // (also a note would consume an M).
+ trace.lockOwner = nil
+ unlock(&trace.lock)
+ return nil, true
}
+newFull:
+ assertLockHeld(&trace.lock)
// Write a buffer.
if trace.fullHead != 0 {
buf := traceFullDequeue()
trace.reading = buf
trace.lockOwner = nil
unlock(&trace.lock)
- return buf.ptr().arr[:buf.ptr().pos]
+ return buf.ptr().arr[:buf.ptr().pos], false
}
// Write footer with timer frequency.
@@ -480,13 +547,22 @@
}
trace.lockOwner = nil
unlock(&trace.lock)
- var data []byte
- data = append(data, traceEvFrequency|0<<traceArgCountShift)
- data = traceAppend(data, uint64(freq))
+
+ // Write frequency event.
+ bufp := traceFlush(0, 0)
+ buf := bufp.ptr()
+ buf.byte(traceEvFrequency | 0<<traceArgCountShift)
+ buf.varint(uint64(freq))
+
+ // Dump stack table.
// This will emit a bunch of full buffers, we will pick them up
// on the next iteration.
- trace.stackTab.dump()
- return data
+ bufp = trace.stackTab.dump(bufp)
+
+ // Flush final buffer.
+ lock(&trace.lock)
+ traceFullQueue(bufp)
+ goto newFull // trace.lock should be held at newFull
}
// Done.
if trace.shutdown {
@@ -500,40 +576,51 @@
}
// trace.enabled is already reset, so can call traceable functions.
semrelease(&trace.shutdownSema)
- return nil
+ return nil, false
}
// Also bad, but see the comment above.
trace.lockOwner = nil
unlock(&trace.lock)
println("runtime: spurious wakeup of trace reader")
- return nil
+ return nil, false
}
// traceReader returns the trace reader that should be woken up, if any.
// Callers should first check that trace.enabled or trace.shutdown is set.
+//
+// This must run on the system stack because it acquires trace.lock.
+//
+//go:systemstack
func traceReader() *g {
- if !traceReaderAvailable() {
+ // Optimistic check first
+ if traceReaderAvailable() == nil {
return nil
}
lock(&trace.lock)
- if !traceReaderAvailable() {
+ gp := traceReaderAvailable()
+ if gp == nil || !trace.reader.CompareAndSwapNoWB(gp, nil) {
unlock(&trace.lock)
return nil
}
- gp := trace.reader.ptr()
- trace.reader.set(nil)
unlock(&trace.lock)
return gp
}
-// traceReaderAvailable returns true if the trace reader is not currently
+// traceReaderAvailable returns the trace reader if it is not currently
// scheduled and should be. Callers should first check that trace.enabled
// or trace.shutdown is set.
-func traceReaderAvailable() bool {
- return trace.reader != 0 && (trace.fullHead != 0 || trace.shutdown)
+func traceReaderAvailable() *g {
+ if trace.fullHead != 0 || trace.shutdown {
+ return trace.reader.Load()
+ }
+ return nil
}
// traceProcFree frees trace buffer associated with pp.
+//
+// This must run on the system stack because it acquires trace.lock.
+//
+//go:systemstack
func traceProcFree(pp *p) {
buf := pp.tracebuf
pp.tracebuf = 0
@@ -624,7 +711,9 @@
// TODO: test on non-zero extraBytes param.
maxSize := 2 + 5*traceBytesPerNumber + extraBytes // event type, length, sequence, timestamp, stack id and two add params
if buf == nil || len(buf.arr)-buf.pos < maxSize {
- buf = traceFlush(traceBufPtrOf(buf), pid).ptr()
+ systemstack(func() {
+ buf = traceFlush(traceBufPtrOf(buf), pid).ptr()
+ })
bufp.set(buf)
}
@@ -701,7 +790,7 @@
hdr[0] = 0b10
}
if gp != nil {
- hdr[1] = uint64(gp.goid)
+ hdr[1] = gp.goid
}
// Allow only one writer at a time
@@ -765,7 +854,9 @@
buf := bufp.ptr()
if buf == nil {
- *bufp = traceFlush(*bufp, 0)
+ systemstack(func() {
+ *bufp = traceFlush(*bufp, 0)
+ })
buf = bufp.ptr()
}
for i := range stk {
@@ -782,19 +873,18 @@
}
func traceStackID(mp *m, buf []uintptr, skip int) uint64 {
- _g_ := getg()
- gp := mp.curg
+ gp := getg()
+ curgp := mp.curg
var nstk int
- if gp == _g_ {
+ if curgp == gp {
nstk = callers(skip+1, buf)
- } else if gp != nil {
- gp = mp.curg
- nstk = gcallers(gp, skip, buf)
+ } else if curgp != nil {
+ nstk = gcallers(curgp, skip, buf)
}
if nstk > 0 {
nstk-- // skip runtime.goexit
}
- if nstk > 0 && gp.goid == 1 {
+ if nstk > 0 && curgp.goid == 1 {
nstk-- // skip runtime.main
}
id := trace.stackTab.put(buf[:nstk])
@@ -803,6 +893,11 @@
// traceAcquireBuffer returns trace buffer to use and, if necessary, locks it.
func traceAcquireBuffer() (mp *m, pid int32, bufp *traceBufPtr) {
+ // Any time we acquire a buffer, we may end up flushing it,
+ // but flushes are rare. Record the lock edge even if it
+ // doesn't happen this time.
+ lockRankMayTraceFlush()
+
mp = acquirem()
if p := mp.p.ptr(); p != nil {
return mp, p.id, &p.tracebuf
@@ -819,7 +914,21 @@
releasem(getg().m)
}
+// lockRankMayTraceFlush records the lock ranking effects of a
+// potential call to traceFlush.
+func lockRankMayTraceFlush() {
+ owner := trace.lockOwner
+ dolock := owner == nil || owner != getg().m.curg
+ if dolock {
+ lockWithRankMayAcquire(&trace.lock, getLockRank(&trace.lock))
+ }
+}
+
// traceFlush puts buf onto stack of full buffers and returns an empty buffer.
+//
+// This must run on the system stack because it acquires trace.lock.
+//
+//go:systemstack
func traceFlush(buf traceBufPtr, pid int32) traceBufPtr {
owner := trace.lockOwner
dolock := owner == nil || owner != getg().m.curg
@@ -897,8 +1006,10 @@
buf := bufp.ptr()
size := 1 + 2*traceBytesPerNumber + len(s)
if buf == nil || len(buf.arr)-buf.pos < size {
- buf = traceFlush(traceBufPtrOf(buf), pid).ptr()
- bufp.set(buf)
+ systemstack(func() {
+ buf = traceFlush(traceBufPtrOf(buf), pid).ptr()
+ bufp.set(buf)
+ })
}
buf.byte(traceEvString)
buf.varint(id)
@@ -917,15 +1028,6 @@
return id, bufp
}
-// traceAppend appends v to buf in little-endian-base-128 encoding.
-func traceAppend(buf []byte, v uint64) []byte {
- for ; v >= 0x80; v >>= 7 {
- buf = append(buf, 0x80|byte(v))
- }
- buf = append(buf, byte(v))
- return buf
-}
-
// varint appends v to buf in little-endian-base-128 encoding.
func (buf *traceBuf) varint(v uint64) {
pos := buf.pos
@@ -938,6 +1040,22 @@
buf.pos = pos
}
+// varintAt writes varint v at byte position pos in buf. This always
+// consumes traceBytesPerNumber bytes. This is intended for when the
+// caller needs to reserve space for a varint but can't populate it
+// until later.
+func (buf *traceBuf) varintAt(pos int, v uint64) {
+ for i := 0; i < traceBytesPerNumber; i++ {
+ if i < traceBytesPerNumber-1 {
+ buf.arr[pos] = 0x80 | byte(v)
+ } else {
+ buf.arr[pos] = byte(v)
+ }
+ v >>= 7
+ pos++
+ }
+}
+
// byte appends v to buf.
func (buf *traceBuf) byte(v byte) {
buf.arr[buf.pos] = v
@@ -947,7 +1065,7 @@
// traceStackTable maps stack traces (arrays of PC's) to unique uint32 ids.
// It is lock-free for reading.
type traceStackTable struct {
- lock mutex
+ lock mutex // Must be acquired on the system stack
seq uint32
mem traceAlloc
tab [1 << 13]traceStackPtr
@@ -983,26 +1101,31 @@
return id
}
// Now, double check under the mutex.
- lock(&tab.lock)
- if id := tab.find(pcs, hash); id != 0 {
+ // Switch to the system stack so we can acquire tab.lock
+ var id uint32
+ systemstack(func() {
+ lock(&tab.lock)
+ if id = tab.find(pcs, hash); id != 0 {
+ unlock(&tab.lock)
+ return
+ }
+ // Create new record.
+ tab.seq++
+ stk := tab.newStack(len(pcs))
+ stk.hash = hash
+ stk.id = tab.seq
+ id = stk.id
+ stk.n = len(pcs)
+ stkpc := stk.stack()
+ for i, pc := range pcs {
+ stkpc[i] = pc
+ }
+ part := int(hash % uintptr(len(tab.tab)))
+ stk.link = tab.tab[part]
+ atomicstorep(unsafe.Pointer(&tab.tab[part]), unsafe.Pointer(stk))
unlock(&tab.lock)
- return id
- }
- // Create new record.
- tab.seq++
- stk := tab.newStack(len(pcs))
- stk.hash = hash
- stk.id = tab.seq
- stk.n = len(pcs)
- stkpc := stk.stack()
- for i, pc := range pcs {
- stkpc[i] = pc
- }
- part := int(hash % uintptr(len(tab.tab)))
- stk.link = tab.tab[part]
- atomicstorep(unsafe.Pointer(&tab.tab[part]), unsafe.Pointer(stk))
- unlock(&tab.lock)
- return stk.id
+ })
+ return id
}
// find checks if the stack trace pcs is already present in the table.
@@ -1027,61 +1150,75 @@
return (*traceStack)(tab.mem.alloc(unsafe.Sizeof(traceStack{}) + uintptr(n)*goarch.PtrSize))
}
-// allFrames returns all of the Frames corresponding to pcs.
-func allFrames(pcs []uintptr) []Frame {
- frames := make([]Frame, 0, len(pcs))
+// traceFrames returns the frames corresponding to pcs. It may
+// allocate and may emit trace events.
+func traceFrames(bufp traceBufPtr, pcs []uintptr) ([]traceFrame, traceBufPtr) {
+ frames := make([]traceFrame, 0, len(pcs))
ci := CallersFrames(pcs)
for {
+ var frame traceFrame
f, more := ci.Next()
- frames = append(frames, f)
+ frame, bufp = traceFrameForPC(bufp, 0, f)
+ frames = append(frames, frame)
if !more {
- return frames
+ return frames, bufp
}
}
}
// dump writes all previously cached stacks to trace buffers,
// releases all memory and resets state.
-func (tab *traceStackTable) dump() {
- var tmp [(2 + 4*traceStackSize) * traceBytesPerNumber]byte
- bufp := traceFlush(0, 0)
- for _, stk := range tab.tab {
- stk := stk.ptr()
+//
+// This must run on the system stack because it calls traceFlush.
+//
+//go:systemstack
+func (tab *traceStackTable) dump(bufp traceBufPtr) traceBufPtr {
+ for i := range tab.tab {
+ stk := tab.tab[i].ptr()
for ; stk != nil; stk = stk.link.ptr() {
- tmpbuf := tmp[:0]
- tmpbuf = traceAppend(tmpbuf, uint64(stk.id))
- frames := allFrames(stk.stack())
- tmpbuf = traceAppend(tmpbuf, uint64(len(frames)))
- for _, f := range frames {
- var frame traceFrame
- frame, bufp = traceFrameForPC(bufp, 0, f)
- tmpbuf = traceAppend(tmpbuf, uint64(f.PC))
- tmpbuf = traceAppend(tmpbuf, uint64(frame.funcID))
- tmpbuf = traceAppend(tmpbuf, uint64(frame.fileID))
- tmpbuf = traceAppend(tmpbuf, uint64(frame.line))
- }
- // Now copy to the buffer.
- size := 1 + traceBytesPerNumber + len(tmpbuf)
- if buf := bufp.ptr(); len(buf.arr)-buf.pos < size {
+ var frames []traceFrame
+ frames, bufp = traceFrames(bufp, stk.stack())
+
+ // Estimate the size of this record. This
+ // bound is pretty loose, but avoids counting
+ // lots of varint sizes.
+ maxSize := 1 + traceBytesPerNumber + (2+4*len(frames))*traceBytesPerNumber
+ // Make sure we have enough buffer space.
+ if buf := bufp.ptr(); len(buf.arr)-buf.pos < maxSize {
bufp = traceFlush(bufp, 0)
}
+
+ // Emit header, with space reserved for length.
buf := bufp.ptr()
buf.byte(traceEvStack | 3<<traceArgCountShift)
- buf.varint(uint64(len(tmpbuf)))
- buf.pos += copy(buf.arr[buf.pos:], tmpbuf)
+ lenPos := buf.pos
+ buf.pos += traceBytesPerNumber
+
+ // Emit body.
+ recPos := buf.pos
+ buf.varint(uint64(stk.id))
+ buf.varint(uint64(len(frames)))
+ for _, frame := range frames {
+ buf.varint(uint64(frame.PC))
+ buf.varint(frame.funcID)
+ buf.varint(frame.fileID)
+ buf.varint(frame.line)
+ }
+
+ // Fill in size header.
+ buf.varintAt(lenPos, uint64(buf.pos-recPos))
}
}
- lock(&trace.lock)
- traceFullQueue(bufp)
- unlock(&trace.lock)
-
tab.mem.drop()
*tab = traceStackTable{}
lockInit(&((*tab).lock), lockRankTraceStackTab)
+
+ return bufp
}
type traceFrame struct {
+ PC uintptr
funcID uint64
fileID uint64
line uint64
@@ -1092,6 +1229,7 @@
func traceFrameForPC(buf traceBufPtr, pid int32, f Frame) (traceFrame, traceBufPtr) {
bufp := &buf
var frame traceFrame
+ frame.PC = f.PC
fn := f.Function
const maxLen = 1 << 10
@@ -1120,14 +1258,13 @@
// traceAllocBlock is allocated from non-GC'd memory, so it must not
// contain heap pointers. Writes to pointers to traceAllocBlocks do
// not need write barriers.
-//
-//go:notinheap
type traceAllocBlock struct {
+ _ sys.NotInHeap
next traceAllocBlockPtr
data [64<<10 - goarch.PtrSize]byte
}
-// TODO: Since traceAllocBlock is now go:notinheap, this isn't necessary.
+// TODO: Since traceAllocBlock is now embedded runtime/internal/sys.NotInHeap, this isn't necessary.
type traceAllocBlockPtr uintptr
func (p traceAllocBlockPtr) ptr() *traceAllocBlock { return (*traceAllocBlock)(unsafe.Pointer(p)) }
@@ -1208,11 +1345,11 @@
func traceGCSweepStart() {
// Delay the actual GCSweepStart event until the first span
// sweep. If we don't sweep anything, don't emit any events.
- _p_ := getg().m.p.ptr()
- if _p_.traceSweep {
+ pp := getg().m.p.ptr()
+ if pp.traceSweep {
throw("double traceGCSweepStart")
}
- _p_.traceSweep, _p_.traceSwept, _p_.traceReclaimed = true, 0, 0
+ pp.traceSweep, pp.traceSwept, pp.traceReclaimed = true, 0, 0
}
// traceGCSweepSpan traces the sweep of a single page.
@@ -1220,24 +1357,24 @@
// This may be called outside a traceGCSweepStart/traceGCSweepDone
// pair; however, it will not emit any trace events in this case.
func traceGCSweepSpan(bytesSwept uintptr) {
- _p_ := getg().m.p.ptr()
- if _p_.traceSweep {
- if _p_.traceSwept == 0 {
+ pp := getg().m.p.ptr()
+ if pp.traceSweep {
+ if pp.traceSwept == 0 {
traceEvent(traceEvGCSweepStart, 1)
}
- _p_.traceSwept += bytesSwept
+ pp.traceSwept += bytesSwept
}
}
func traceGCSweepDone() {
- _p_ := getg().m.p.ptr()
- if !_p_.traceSweep {
+ pp := getg().m.p.ptr()
+ if !pp.traceSweep {
throw("missing traceGCSweepStart")
}
- if _p_.traceSwept != 0 {
- traceEvent(traceEvGCSweepDone, -1, uint64(_p_.traceSwept), uint64(_p_.traceReclaimed))
+ if pp.traceSwept != 0 {
+ traceEvent(traceEvGCSweepDone, -1, uint64(pp.traceSwept), uint64(pp.traceReclaimed))
}
- _p_.traceSweep = false
+ pp.traceSweep = false
}
func traceGCMarkAssistStart() {
@@ -1253,20 +1390,20 @@
newg.tracelastp = getg().m.p
// +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum.
id := trace.stackTab.put([]uintptr{startPCforTrace(pc) + sys.PCQuantum})
- traceEvent(traceEvGoCreate, 2, uint64(newg.goid), uint64(id))
+ traceEvent(traceEvGoCreate, 2, newg.goid, uint64(id))
}
func traceGoStart() {
- _g_ := getg().m.curg
- _p_ := _g_.m.p
- _g_.traceseq++
- if _p_.ptr().gcMarkWorkerMode != gcMarkWorkerNotWorker {
- traceEvent(traceEvGoStartLabel, -1, uint64(_g_.goid), _g_.traceseq, trace.markWorkerLabels[_p_.ptr().gcMarkWorkerMode])
- } else if _g_.tracelastp == _p_ {
- traceEvent(traceEvGoStartLocal, -1, uint64(_g_.goid))
+ gp := getg().m.curg
+ pp := gp.m.p
+ gp.traceseq++
+ if pp.ptr().gcMarkWorkerMode != gcMarkWorkerNotWorker {
+ traceEvent(traceEvGoStartLabel, -1, gp.goid, gp.traceseq, trace.markWorkerLabels[pp.ptr().gcMarkWorkerMode])
+ } else if gp.tracelastp == pp {
+ traceEvent(traceEvGoStartLocal, -1, gp.goid)
} else {
- _g_.tracelastp = _p_
- traceEvent(traceEvGoStart, -1, uint64(_g_.goid), _g_.traceseq)
+ gp.tracelastp = pp
+ traceEvent(traceEvGoStart, -1, gp.goid, gp.traceseq)
}
}
@@ -1275,14 +1412,14 @@
}
func traceGoSched() {
- _g_ := getg()
- _g_.tracelastp = _g_.m.p
+ gp := getg()
+ gp.tracelastp = gp.m.p
traceEvent(traceEvGoSched, 1)
}
func traceGoPreempt() {
- _g_ := getg()
- _g_.tracelastp = _g_.m.p
+ gp := getg()
+ gp.tracelastp = gp.m.p
traceEvent(traceEvGoPreempt, 1)
}
@@ -1294,13 +1431,13 @@
}
func traceGoUnpark(gp *g, skip int) {
- _p_ := getg().m.p
+ pp := getg().m.p
gp.traceseq++
- if gp.tracelastp == _p_ {
- traceEvent(traceEvGoUnblockLocal, skip, uint64(gp.goid))
+ if gp.tracelastp == pp {
+ traceEvent(traceEvGoUnblockLocal, skip, gp.goid)
} else {
- gp.tracelastp = _p_
- traceEvent(traceEvGoUnblock, skip, uint64(gp.goid), gp.traceseq)
+ gp.tracelastp = pp
+ traceEvent(traceEvGoUnblock, skip, gp.goid, gp.traceseq)
}
}
@@ -1321,10 +1458,10 @@
// aka right now), and assign a fresh time stamp to keep the log consistent.
ts = 0
}
- _g_ := getg().m.curg
- _g_.traceseq++
- _g_.tracelastp = _g_.m.p
- traceEvent(traceEvGoSysExit, -1, uint64(_g_.goid), _g_.traceseq, uint64(ts)/traceTickDiv)
+ gp := getg().m.curg
+ gp.traceseq++
+ gp.tracelastp = gp.m.p
+ traceEvent(traceEvGoSysExit, -1, gp.goid, gp.traceseq, uint64(ts)/traceTickDiv)
}
func traceGoSysBlock(pp *p) {
@@ -1338,8 +1475,8 @@
releasem(mp)
}
-func traceHeapAlloc() {
- traceEvent(traceEvHeapAlloc, -1, gcController.heapLive)
+func traceHeapAlloc(live uint64) {
+ traceEvent(traceEvHeapAlloc, -1, live)
}
func traceHeapGoal() {
@@ -1432,7 +1569,7 @@
func startPCforTrace(pc uintptr) uintptr {
f := findfunc(pc)
if !f.valid() {
- return pc // should not happen, but don't care
+ return pc // may happen for locked g in extra M since its pc is 0.
}
w := funcdata(f, _FUNCDATA_WrapInfo)
if w == nil {
diff --git a/src/runtime/trace/annotation.go b/src/runtime/trace/annotation.go
index 9171633..d47cb85 100644
--- a/src/runtime/trace/annotation.go
+++ b/src/runtime/trace/annotation.go
@@ -178,8 +178,7 @@
// The information is advisory only. The tracing status
// may have changed by the time this function returns.
func IsEnabled() bool {
- enabled := atomic.LoadInt32(&tracing.enabled)
- return enabled == 1
+ return tracing.enabled.Load()
}
//
diff --git a/src/runtime/trace/annotation_test.go b/src/runtime/trace/annotation_test.go
index 31fccef..69ea8f2 100644
--- a/src/runtime/trace/annotation_test.go
+++ b/src/runtime/trace/annotation_test.go
@@ -147,7 +147,7 @@
pretty := func(data []testData) string {
var s strings.Builder
for _, d := range data {
- s.WriteString(fmt.Sprintf("\t%+v\n", d))
+ fmt.Fprintf(&s, "\t%+v\n", d)
}
return s.String()
}
diff --git a/src/runtime/trace/trace.go b/src/runtime/trace/trace.go
index cf2b644..86c97e2 100644
--- a/src/runtime/trace/trace.go
+++ b/src/runtime/trace/trace.go
@@ -134,7 +134,7 @@
w.Write(data)
}
}()
- atomic.StoreInt32(&tracing.enabled, 1)
+ tracing.enabled.Store(true)
return nil
}
@@ -143,12 +143,12 @@
func Stop() {
tracing.Lock()
defer tracing.Unlock()
- atomic.StoreInt32(&tracing.enabled, 0)
+ tracing.enabled.Store(false)
runtime.StopTrace()
}
var tracing struct {
- sync.Mutex // gate mutators (Start, Stop)
- enabled int32 // accessed via atomic
+ sync.Mutex // gate mutators (Start, Stop)
+ enabled atomic.Bool
}
diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go
index 49147ff..37f35d5 100644
--- a/src/runtime/traceback.go
+++ b/src/runtime/traceback.go
@@ -7,7 +7,6 @@
import (
"internal/bytealg"
"internal/goarch"
- "runtime/internal/atomic"
"runtime/internal/sys"
"unsafe"
)
@@ -54,8 +53,6 @@
}
level, _, _ := gotraceback()
- var ctxt *funcval // Context pointer for unstarted goroutines. See issue #25897.
-
if pc0 == ^uintptr(0) && sp0 == ^uintptr(0) { // Signal to fetch saved values from gp.
if gp.syscallsp != 0 {
pc0 = gp.syscallpc
@@ -69,7 +66,6 @@
if usesLR {
lr0 = gp.sched.lr
}
- ctxt = (*funcval)(gp.sched.ctxt)
}
}
@@ -163,7 +159,10 @@
if frame.fp == 0 {
// Jump over system stack transitions. If we're on g0 and there's a user
// goroutine, try to jump. Otherwise this is a regular call.
- if flags&_TraceJumpStack != 0 && gp == gp.m.g0 && gp.m.curg != nil {
+ // We also defensively check that this won't switch M's on us,
+ // which could happen at critical points in the scheduler.
+ // This ensures gp.m doesn't change from a stack jump.
+ if flags&_TraceJumpStack != 0 && gp == gp.m.g0 && gp.m.curg != nil && gp.m.curg.m == gp.m {
switch f.funcID {
case funcID_morestack:
// morestack does not return normally -- newstack()
@@ -171,20 +170,33 @@
// This keeps morestack() from showing up in the backtrace,
// but that makes some sense since it'll never be returned
// to.
- frame.pc = gp.m.curg.sched.pc
+ gp = gp.m.curg
+ frame.pc = gp.sched.pc
frame.fn = findfunc(frame.pc)
f = frame.fn
flag = f.flag
- frame.lr = gp.m.curg.sched.lr
- frame.sp = gp.m.curg.sched.sp
- stack = gp.m.curg.stack
- cgoCtxt = gp.m.curg.cgoCtxt
+ frame.lr = gp.sched.lr
+ frame.sp = gp.sched.sp
+ stack = gp.stack
+ cgoCtxt = gp.cgoCtxt
case funcID_systemstack:
// systemstack returns normally, so just follow the
// stack transition.
- frame.sp = gp.m.curg.sched.sp
- stack = gp.m.curg.stack
- cgoCtxt = gp.m.curg.cgoCtxt
+ if usesLR && funcspdelta(f, frame.pc, &cache) == 0 {
+ // We're at the function prologue and the stack
+ // switch hasn't happened, or epilogue where we're
+ // about to return. Just unwind normally.
+ // Do this only on LR machines because on x86
+ // systemstack doesn't have an SP delta (the CALL
+ // instruction opens the frame), therefore no way
+ // to check.
+ flag &^= funcFlag_SPWRITE
+ break
+ }
+ gp = gp.m.curg
+ frame.sp = gp.sched.sp
+ stack = gp.stack
+ cgoCtxt = gp.cgoCtxt
flag &^= funcFlag_SPWRITE
}
}
@@ -287,21 +299,7 @@
frame.varp -= goarch.PtrSize
}
- // Derive size of arguments.
- // Most functions have a fixed-size argument block,
- // so we can use metadata about the function f.
- // Not all, though: there are some variadic functions
- // in package runtime and reflect, and for those we use call-specific
- // metadata recorded by f's caller.
- if callback != nil || printing {
- frame.argp = frame.fp + sys.MinFrameSize
- var ok bool
- frame.arglen, frame.argmap, ok = getArgInfoFast(f, callback != nil)
- if !ok {
- frame.arglen, frame.argmap = getArgInfo(&frame, f, callback != nil, ctxt)
- }
- }
- ctxt = nil // ctxt is only needed to get arg maps for the topmost frame
+ frame.argp = frame.fp + sys.MinFrameSize
// Determine frame's 'continuation PC', where it can continue.
// Normally this is the return address on the stack, but if sigpanic
@@ -418,8 +416,9 @@
// Create a fake _func for the
// inlined function.
- inlFunc.nameoff = inltree[ix].func_
+ inlFunc.nameOff = inltree[ix].nameOff
inlFunc.funcID = inltree[ix].funcID
+ inlFunc.startLine = inltree[ix].startLine
if (flags&_TraceRuntimeFrames) != 0 || showframe(inlFuncInfo, gp, nprint == 0, inlFuncInfo.funcID, lastFuncID) {
name := funcname(inlFuncInfo)
@@ -494,7 +493,6 @@
frame.lr = 0
frame.sp = frame.fp
frame.fp = 0
- frame.argmap = nil
// On link register architectures, sighandler saves the LR on stack
// before faking a call.
@@ -665,74 +663,6 @@
}
}
-// reflectMethodValue is a partial duplicate of reflect.makeFuncImpl
-// and reflect.methodValue.
-type reflectMethodValue struct {
- fn uintptr
- stack *bitvector // ptrmap for both args and results
- argLen uintptr // just args
-}
-
-// getArgInfoFast returns the argument frame information for a call to f.
-// It is short and inlineable. However, it does not handle all functions.
-// If ok reports false, you must call getArgInfo instead.
-// TODO(josharian): once we do mid-stack inlining,
-// call getArgInfo directly from getArgInfoFast and stop returning an ok bool.
-func getArgInfoFast(f funcInfo, needArgMap bool) (arglen uintptr, argmap *bitvector, ok bool) {
- return uintptr(f.args), nil, !(needArgMap && f.args == _ArgsSizeUnknown)
-}
-
-// getArgInfo returns the argument frame information for a call to f
-// with call frame frame.
-//
-// This is used for both actual calls with active stack frames and for
-// deferred calls or goroutines that are not yet executing. If this is an actual
-// call, ctxt must be nil (getArgInfo will retrieve what it needs from
-// the active stack frame). If this is a deferred call or unstarted goroutine,
-// ctxt must be the function object that was deferred or go'd.
-func getArgInfo(frame *stkframe, f funcInfo, needArgMap bool, ctxt *funcval) (arglen uintptr, argmap *bitvector) {
- arglen = uintptr(f.args)
- if needArgMap && f.args == _ArgsSizeUnknown {
- // Extract argument bitmaps for reflect stubs from the calls they made to reflect.
- switch funcname(f) {
- case "reflect.makeFuncStub", "reflect.methodValueCall":
- // These take a *reflect.methodValue as their
- // context register.
- var mv *reflectMethodValue
- var retValid bool
- if ctxt != nil {
- // This is not an actual call, but a
- // deferred call or an unstarted goroutine.
- // The function value is itself the *reflect.methodValue.
- mv = (*reflectMethodValue)(unsafe.Pointer(ctxt))
- } else {
- // This is a real call that took the
- // *reflect.methodValue as its context
- // register and immediately saved it
- // to 0(SP). Get the methodValue from
- // 0(SP).
- arg0 := frame.sp + sys.MinFrameSize
- mv = *(**reflectMethodValue)(unsafe.Pointer(arg0))
- // Figure out whether the return values are valid.
- // Reflect will update this value after it copies
- // in the return values.
- retValid = *(*bool)(unsafe.Pointer(arg0 + 4*goarch.PtrSize))
- }
- if mv.fn != f.entry() {
- print("runtime: confused by ", funcname(f), "\n")
- throw("reflect mismatch")
- }
- bv := mv.stack
- arglen = uintptr(bv.n * goarch.PtrSize)
- if !retValid {
- arglen = uintptr(mv.argLen) &^ (goarch.PtrSize - 1)
- }
- argmap = bv
- }
- }
- return
-}
-
// tracebackCgoContext handles tracing back a cgo context value, from
// the context argument to setCgoTraceback, for the gentraceback
// function. It returns the new value of n.
@@ -819,10 +749,10 @@
// concurrently with a signal handler.
// We just have to stop a signal handler from interrupting
// in the middle of our copy.
- atomic.Store(&gp.m.cgoCallersUse, 1)
+ gp.m.cgoCallersUse.Store(1)
cgoCallers := *gp.m.cgoCallers
gp.m.cgoCallers[0] = 0
- atomic.Store(&gp.m.cgoCallersUse, 0)
+ gp.m.cgoCallersUse.Store(0)
printCgoTraceback(&cgoCallers)
}
@@ -880,7 +810,7 @@
}
}
-// printAncestorTraceback prints the given function info at a given pc
+// printAncestorTracebackFuncInfo prints the given function info at a given pc
// within an ancestor traceback. The precision of this info is reduced
// due to only have access to the pcs at the time of the caller
// goroutine being created.
@@ -890,7 +820,7 @@
inltree := (*[1 << 20]inlinedCall)(inldata)
ix := pcdatavalue(f, _PCDATA_InlTreeIndex, pc, nil)
if ix >= 0 {
- name = funcnameFromNameoff(f, inltree[ix].func_)
+ name = funcnameFromNameOff(f, inltree[ix].nameOff)
}
}
file, line := funcline(f, pc)
@@ -923,8 +853,8 @@
// showframe reports whether the frame with the given characteristics should
// be printed during a traceback.
func showframe(f funcInfo, gp *g, firstFrame bool, funcID, childID funcID) bool {
- g := getg()
- if g.m.throwing >= throwTypeRuntime && gp != nil && (gp == g.m.curg || gp == g.m.caughtsig.ptr()) {
+ mp := getg().m
+ if mp.throwing >= throwTypeRuntime && gp != nil && (gp == mp.curg || gp == mp.caughtsig.ptr()) {
return true
}
return showfuncinfo(f, firstFrame, funcID, childID)
@@ -934,7 +864,7 @@
// be printed during a traceback.
func showfuncinfo(f funcInfo, firstFrame bool, funcID, childID funcID) bool {
// Note that f may be a synthesized funcInfo for an inlined
- // function, in which case only nameoff and funcID are set.
+ // function, in which case only nameOff and funcID are set.
level, _, _ := gotraceback()
if level > 1 {
@@ -1051,10 +981,10 @@
}
print("\n")
goroutineheader(gp)
- // Note: gp.m == g.m occurs when tracebackothers is
- // called from a signal handler initiated during a
- // systemstack call. The original G is still in the
- // running state, and we want to print its stack.
+ // Note: gp.m == getg().m occurs when tracebackothers is called
+ // from a signal handler initiated during a systemstack call.
+ // The original G is still in the running state, and we want to
+ // print its stack.
if gp.m != getg().m && readgstatus(gp)&^_Gscan == _Grunning {
print("\tgoroutine running on other thread; stack unavailable\n")
printcreatedby(gp)
@@ -1136,7 +1066,7 @@
// always consider it a user goroutine.
return false
}
- return !fingRunning
+ return fingStatus.Load()&fingRunningFinalizer == 0
}
return hasPrefix(funcname(f), "runtime.")
}
@@ -1352,7 +1282,7 @@
data uintptr
}
-// cgoTraceback prints a traceback of callers.
+// printCgoTraceback prints a traceback of callers.
func printCgoTraceback(callers *cgoCallers) {
if cgoSymbolizer == nil {
for _, c := range callers {
@@ -1407,7 +1337,7 @@
// callCgoSymbolizer calls the cgoSymbolizer function.
func callCgoSymbolizer(arg *cgoSymbolizerArg) {
call := cgocall
- if panicking > 0 || getg().m.curg != getg() {
+ if panicking.Load() > 0 || getg().m.curg != getg() {
// We do not want to call into the scheduler when panicking
// or when on the system stack.
call = asmcgocall
@@ -1427,7 +1357,7 @@
return
}
call := cgocall
- if panicking > 0 || getg().m.curg != getg() {
+ if panicking.Load() > 0 || getg().m.curg != getg() {
// We do not want to call into the scheduler when panicking
// or when on the system stack.
call = asmcgocall
diff --git a/src/runtime/traceback_test.go b/src/runtime/traceback_test.go
index e50bd95..97eb921 100644
--- a/src/runtime/traceback_test.go
+++ b/src/runtime/traceback_test.go
@@ -9,7 +9,6 @@
"internal/abi"
"internal/testenv"
"runtime"
- "strings"
"testing"
)
@@ -19,7 +18,7 @@
if *flagQuick {
t.Skip("-quick")
}
- optimized := !strings.HasSuffix(testenv.Builder(), "-noopt")
+ optimized := !testenv.OptimizationOff()
abiSel := func(x, y string) string {
// select expected output based on ABI
// In noopt build we always spill arguments so the output is the same as stack ABI.
diff --git a/src/runtime/type.go b/src/runtime/type.go
index e8e7819..1c6103e 100644
--- a/src/runtime/type.go
+++ b/src/runtime/type.go
@@ -454,7 +454,7 @@
}
}
-func (n name) name() (s string) {
+func (n name) name() string {
if n.bytes == nil {
return ""
}
@@ -462,22 +462,16 @@
if l == 0 {
return ""
}
- hdr := (*stringStruct)(unsafe.Pointer(&s))
- hdr.str = unsafe.Pointer(n.data(1 + i))
- hdr.len = l
- return
+ return unsafe.String(n.data(1+i), l)
}
-func (n name) tag() (s string) {
+func (n name) tag() string {
if *n.data(0)&(1<<1) == 0 {
return ""
}
i, l := n.readvarint(1)
i2, l2 := n.readvarint(1 + i + l)
- hdr := (*stringStruct)(unsafe.Pointer(&s))
- hdr.str = unsafe.Pointer(n.data(1 + i + l + i2))
- hdr.len = l2
- return
+ return unsafe.String(n.data(1+i+l+i2), l2)
}
func (n name) pkgPath() string {
diff --git a/src/runtime/unsafe.go b/src/runtime/unsafe.go
new file mode 100644
index 0000000..54649e8
--- /dev/null
+++ b/src/runtime/unsafe.go
@@ -0,0 +1,98 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+ "runtime/internal/math"
+ "unsafe"
+)
+
+func unsafestring(ptr unsafe.Pointer, len int) {
+ if len < 0 {
+ panicunsafestringlen()
+ }
+
+ if uintptr(len) > -uintptr(ptr) {
+ if ptr == nil {
+ panicunsafestringnilptr()
+ }
+ panicunsafestringlen()
+ }
+}
+
+// Keep this code in sync with cmd/compile/internal/walk/builtin.go:walkUnsafeString
+func unsafestring64(ptr unsafe.Pointer, len64 int64) {
+ len := int(len64)
+ if int64(len) != len64 {
+ panicunsafestringlen()
+ }
+ unsafestring(ptr, len)
+}
+
+func unsafestringcheckptr(ptr unsafe.Pointer, len64 int64) {
+ unsafestring64(ptr, len64)
+
+ // Check that underlying array doesn't straddle multiple heap objects.
+ // unsafestring64 has already checked for overflow.
+ if checkptrStraddles(ptr, uintptr(len64)) {
+ throw("checkptr: unsafe.String result straddles multiple allocations")
+ }
+}
+
+func panicunsafestringlen() {
+ panic(errorString("unsafe.String: len out of range"))
+}
+
+func panicunsafestringnilptr() {
+ panic(errorString("unsafe.String: ptr is nil and len is not zero"))
+}
+
+// Keep this code in sync with cmd/compile/internal/walk/builtin.go:walkUnsafeSlice
+func unsafeslice(et *_type, ptr unsafe.Pointer, len int) {
+ if len < 0 {
+ panicunsafeslicelen()
+ }
+
+ if et.size == 0 {
+ if ptr == nil && len > 0 {
+ panicunsafeslicenilptr()
+ }
+ }
+
+ mem, overflow := math.MulUintptr(et.size, uintptr(len))
+ if overflow || mem > -uintptr(ptr) {
+ if ptr == nil {
+ panicunsafeslicenilptr()
+ }
+ panicunsafeslicelen()
+ }
+}
+
+// Keep this code in sync with cmd/compile/internal/walk/builtin.go:walkUnsafeSlice
+func unsafeslice64(et *_type, ptr unsafe.Pointer, len64 int64) {
+ len := int(len64)
+ if int64(len) != len64 {
+ panicunsafeslicelen()
+ }
+ unsafeslice(et, ptr, len)
+}
+
+func unsafeslicecheckptr(et *_type, ptr unsafe.Pointer, len64 int64) {
+ unsafeslice64(et, ptr, len64)
+
+ // Check that underlying array doesn't straddle multiple heap objects.
+ // unsafeslice64 has already checked for overflow.
+ if checkptrStraddles(ptr, uintptr(len64)*et.size) {
+ throw("checkptr: unsafe.Slice result straddles multiple allocations")
+ }
+}
+
+func panicunsafeslicelen() {
+ panic(errorString("unsafe.Slice: len out of range"))
+}
+
+func panicunsafeslicenilptr() {
+ panic(errorString("unsafe.Slice: ptr is nil and len is not zero"))
+}
diff --git a/src/runtime/vdso_freebsd_arm64.go b/src/runtime/vdso_freebsd_arm64.go
index 7d9f62d..37b26d7 100644
--- a/src/runtime/vdso_freebsd_arm64.go
+++ b/src/runtime/vdso_freebsd_arm64.go
@@ -14,7 +14,7 @@
func (th *vdsoTimehands) getTimecounter() (uint32, bool) {
switch th.algo {
case _VDSO_TH_ALGO_ARM_GENTIM:
- return getCntxct(false), true
+ return getCntxct(th.physical != 0), true
default:
return 0, false
}
diff --git a/src/runtime/vdso_freebsd_riscv64.go b/src/runtime/vdso_freebsd_riscv64.go
new file mode 100644
index 0000000..a4fff4b
--- /dev/null
+++ b/src/runtime/vdso_freebsd_riscv64.go
@@ -0,0 +1,21 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+const (
+ _VDSO_TH_ALGO_RISCV_RDTIME = 1
+)
+
+func getCntxct() uint32
+
+//go:nosplit
+func (th *vdsoTimehands) getTimecounter() (uint32, bool) {
+ switch th.algo {
+ case _VDSO_TH_ALGO_RISCV_RDTIME:
+ return getCntxct(), true
+ default:
+ return 0, false
+ }
+}
diff --git a/src/runtime/vdso_freebsd_x86.go b/src/runtime/vdso_freebsd_x86.go
index 5324a3d..66d1c65 100644
--- a/src/runtime/vdso_freebsd_x86.go
+++ b/src/runtime/vdso_freebsd_x86.go
@@ -34,10 +34,8 @@
return uint32(tsc)
}
-//go:systemstack
+//go:nosplit
func (th *vdsoTimehands) getHPETTimecounter() (uint32, bool) {
- const digits = "0123456789"
-
idx := int(th.x86_hpet_idx)
if idx >= len(hpetDevMap) {
return 0, false
@@ -45,25 +43,7 @@
p := atomic.Loaduintptr(&hpetDevMap[idx])
if p == 0 {
- var devPath [len(hpetDevPath)]byte
- copy(devPath[:], hpetDevPath)
- devPath[9] = digits[idx]
-
- fd := open(&devPath[0], 0 /* O_RDONLY */, 0)
- if fd < 0 {
- atomic.Casuintptr(&hpetDevMap[idx], 0, ^uintptr(0))
- return 0, false
- }
-
- addr, mmapErr := mmap(nil, physPageSize, _PROT_READ, _MAP_SHARED, fd, 0)
- closefd(fd)
- newP := uintptr(addr)
- if mmapErr != 0 {
- newP = ^uintptr(0)
- }
- if !atomic.Casuintptr(&hpetDevMap[idx], 0, newP) && mmapErr == 0 {
- munmap(addr, physPageSize)
- }
+ systemstack(func() { initHPETTimecounter(idx) })
p = atomic.Loaduintptr(&hpetDevMap[idx])
}
if p == ^uintptr(0) {
@@ -72,20 +52,38 @@
return *(*uint32)(unsafe.Pointer(p + _HPET_MAIN_COUNTER)), true
}
+//go:systemstack
+func initHPETTimecounter(idx int) {
+ const digits = "0123456789"
+
+ var devPath [len(hpetDevPath)]byte
+ copy(devPath[:], hpetDevPath)
+ devPath[9] = digits[idx]
+
+ fd := open(&devPath[0], 0 /* O_RDONLY */ |_O_CLOEXEC, 0)
+ if fd < 0 {
+ atomic.Casuintptr(&hpetDevMap[idx], 0, ^uintptr(0))
+ return
+ }
+
+ addr, mmapErr := mmap(nil, physPageSize, _PROT_READ, _MAP_SHARED, fd, 0)
+ closefd(fd)
+ newP := uintptr(addr)
+ if mmapErr != 0 {
+ newP = ^uintptr(0)
+ }
+ if !atomic.Casuintptr(&hpetDevMap[idx], 0, newP) && mmapErr == 0 {
+ munmap(addr, physPageSize)
+ }
+}
+
//go:nosplit
func (th *vdsoTimehands) getTimecounter() (uint32, bool) {
switch th.algo {
case _VDSO_TH_ALGO_X86_TSC:
return th.getTSCTimecounter(), true
case _VDSO_TH_ALGO_X86_HPET:
- var (
- tc uint32
- ok bool
- )
- systemstack(func() {
- tc, ok = th.getHPETTimecounter()
- })
- return tc, ok
+ return th.getHPETTimecounter()
default:
return 0, false
}
diff --git a/src/runtime/wincallback.go b/src/runtime/wincallback.go
index 442a984..9ec2027 100644
--- a/src/runtime/wincallback.go
+++ b/src/runtime/wincallback.go
@@ -62,7 +62,7 @@
TEXT runtime·callbackasm(SB),NOSPLIT|NOFRAME,$0
`)
for i := 0; i < maxCallback; i++ {
- buf.WriteString(fmt.Sprintf("\tMOVW\t$%d, R12\n", i))
+ fmt.Fprintf(&buf, "\tMOVW\t$%d, R12\n", i)
buf.WriteString("\tB\truntime·callbackasm1(SB)\n")
}
@@ -90,7 +90,7 @@
TEXT runtime·callbackasm(SB),NOSPLIT|NOFRAME,$0
`)
for i := 0; i < maxCallback; i++ {
- buf.WriteString(fmt.Sprintf("\tMOVD\t$%d, R12\n", i))
+ fmt.Fprintf(&buf, "\tMOVD\t$%d, R12\n", i)
buf.WriteString("\tB\truntime·callbackasm1(SB)\n")
}
@@ -104,12 +104,12 @@
func gengo() {
var buf bytes.Buffer
- buf.WriteString(fmt.Sprintf(`// Code generated by wincallback.go using 'go generate'. DO NOT EDIT.
+ fmt.Fprintf(&buf, `// Code generated by wincallback.go using 'go generate'. DO NOT EDIT.
package runtime
const cb_max = %d // maximum number of windows callbacks allowed
-`, maxCallback))
+`, maxCallback)
err := os.WriteFile("zcallback_windows.go", buf.Bytes(), 0666)
if err != nil {
fmt.Fprintf(os.Stderr, "wincallback: %s\n", err)