Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 1 | // Copyright 2009 The Go Authors. All rights reserved. |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // Memory statistics |
| 6 | |
| 7 | package runtime |
| 8 | |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 9 | import ( |
Dan Willemsen | 59ee780 | 2021-12-15 01:08:25 -0800 | [diff] [blame] | 10 | "internal/goarch" |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 11 | "runtime/internal/atomic" |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 12 | "unsafe" |
| 13 | ) |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 14 | |
| 15 | // Statistics. |
Dan Willemsen | bbdf664 | 2017-01-13 22:57:23 -0800 | [diff] [blame] | 16 | // |
| 17 | // For detailed descriptions see the documentation for MemStats. |
| 18 | // Fields that differ from MemStats are further documented here. |
| 19 | // |
| 20 | // Many of these fields are updated on the fly, while others are only |
| 21 | // updated when updatememstats is called. |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 22 | type mstats struct { |
| 23 | // General statistics. |
| 24 | alloc uint64 // bytes allocated and not yet freed |
| 25 | total_alloc uint64 // bytes allocated (even if freed) |
| 26 | sys uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate) |
Dan Willemsen | f3f2eb6 | 2018-08-28 11:28:58 -0700 | [diff] [blame] | 27 | nlookup uint64 // number of pointer lookups (unused) |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 28 | nmalloc uint64 // number of mallocs |
| 29 | nfree uint64 // number of frees |
| 30 | |
| 31 | // Statistics about malloc heap. |
Patrice Arruda | 7f4776e | 2020-06-25 11:55:41 -0700 | [diff] [blame] | 32 | // Updated atomically, or with the world stopped. |
Dan Willemsen | bbdf664 | 2017-01-13 22:57:23 -0800 | [diff] [blame] | 33 | // |
Dan Willemsen | c78f714 | 2017-07-26 13:08:14 -0700 | [diff] [blame] | 34 | // Like MemStats, heap_sys and heap_inuse do not count memory |
| 35 | // in manually-managed spans. |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 36 | heap_sys sysMemStat // virtual address space obtained from system for GC'd heap |
| 37 | heap_inuse uint64 // bytes in mSpanInUse spans |
| 38 | heap_released uint64 // bytes released to the os |
Patrice Arruda | 7f4776e | 2020-06-25 11:55:41 -0700 | [diff] [blame] | 39 | |
| 40 | // heap_objects is not used by the runtime directly and instead |
| 41 | // computed on the fly by updatememstats. |
| 42 | heap_objects uint64 // total number of allocated objects |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 43 | |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 44 | // Statistics about stacks. |
| 45 | stacks_inuse uint64 // bytes in manually-managed stack spans; computed by updatememstats |
| 46 | stacks_sys sysMemStat // only counts newosproc0 stack in mstats; differs from MemStats.StackSys |
| 47 | |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 48 | // Statistics about allocation of low-level fixed-size structures. |
| 49 | // Protected by FixAlloc locks. |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 50 | mspan_inuse uint64 // mspan structures |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 51 | mspan_sys sysMemStat |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 52 | mcache_inuse uint64 // mcache structures |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 53 | mcache_sys sysMemStat |
| 54 | buckhash_sys sysMemStat // profiling bucket hash table |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 55 | |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 56 | // Statistics about GC overhead. |
| 57 | gcWorkBufInUse uint64 // computed by updatememstats |
| 58 | gcProgPtrScalarBitsInUse uint64 // computed by updatememstats |
| 59 | gcMiscSys sysMemStat // updated atomically or during STW |
| 60 | |
| 61 | // Miscellaneous statistics. |
| 62 | other_sys sysMemStat // updated atomically or during STW |
| 63 | |
| 64 | // Statistics about the garbage collector. |
| 65 | |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 66 | // Protected by mheap or stopping the world during GC. |
Dan Willemsen | c78f714 | 2017-07-26 13:08:14 -0700 | [diff] [blame] | 67 | last_gc_unix uint64 // last gc (in unix time) |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 68 | pause_total_ns uint64 |
| 69 | pause_ns [256]uint64 // circular buffer of recent gc pause lengths |
| 70 | pause_end [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970) |
| 71 | numgc uint32 |
Dan Willemsen | bbdf664 | 2017-01-13 22:57:23 -0800 | [diff] [blame] | 72 | numforcedgc uint32 // number of user-forced GCs |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 73 | gc_cpu_fraction float64 // fraction of CPU time used by GC |
| 74 | enablegc bool |
| 75 | debuggc bool |
| 76 | |
| 77 | // Statistics about allocation size classes. |
| 78 | |
| 79 | by_size [_NumSizeClasses]struct { |
| 80 | size uint32 |
| 81 | nmalloc uint64 |
| 82 | nfree uint64 |
| 83 | } |
| 84 | |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 85 | // Add an uint32 for even number of size classes to align below fields |
| 86 | // to 64 bits for atomic operations on 32 bit platforms. |
| 87 | _ [1 - _NumSizeClasses%2]uint32 |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 88 | |
Dan Willemsen | c78f714 | 2017-07-26 13:08:14 -0700 | [diff] [blame] | 89 | last_gc_nanotime uint64 // last gc (monotonic time) |
Patrice Arruda | 7f4776e | 2020-06-25 11:55:41 -0700 | [diff] [blame] | 90 | last_heap_inuse uint64 // heap_inuse at mark termination of the previous GC |
Dan Willemsen | c78f714 | 2017-07-26 13:08:14 -0700 | [diff] [blame] | 91 | |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 92 | // heapStats is a set of statistics |
| 93 | heapStats consistentHeapStats |
| 94 | |
| 95 | // _ uint32 // ensure gcPauseDist is aligned |
| 96 | |
| 97 | // gcPauseDist represents the distribution of all GC-related |
| 98 | // application pauses in the runtime. |
| 99 | // |
| 100 | // Each individual pause is counted separately, unlike pause_ns. |
| 101 | gcPauseDist timeHistogram |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 102 | } |
| 103 | |
| 104 | var memstats mstats |
| 105 | |
| 106 | // A MemStats records statistics about the memory allocator. |
| 107 | type MemStats struct { |
| 108 | // General statistics. |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 109 | |
Dan Willemsen | bbdf664 | 2017-01-13 22:57:23 -0800 | [diff] [blame] | 110 | // Alloc is bytes of allocated heap objects. |
| 111 | // |
| 112 | // This is the same as HeapAlloc (see below). |
| 113 | Alloc uint64 |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 114 | |
Dan Willemsen | bbdf664 | 2017-01-13 22:57:23 -0800 | [diff] [blame] | 115 | // TotalAlloc is cumulative bytes allocated for heap objects. |
| 116 | // |
| 117 | // TotalAlloc increases as heap objects are allocated, but |
| 118 | // unlike Alloc and HeapAlloc, it does not decrease when |
| 119 | // objects are freed. |
| 120 | TotalAlloc uint64 |
| 121 | |
| 122 | // Sys is the total bytes of memory obtained from the OS. |
| 123 | // |
| 124 | // Sys is the sum of the XSys fields below. Sys measures the |
| 125 | // virtual address space reserved by the Go runtime for the |
| 126 | // heap, stacks, and other internal data structures. It's |
| 127 | // likely that not all of the virtual address space is backed |
| 128 | // by physical memory at any given moment, though in general |
| 129 | // it all was at some point. |
| 130 | Sys uint64 |
| 131 | |
| 132 | // Lookups is the number of pointer lookups performed by the |
| 133 | // runtime. |
| 134 | // |
| 135 | // This is primarily useful for debugging runtime internals. |
| 136 | Lookups uint64 |
| 137 | |
| 138 | // Mallocs is the cumulative count of heap objects allocated. |
| 139 | // The number of live objects is Mallocs - Frees. |
| 140 | Mallocs uint64 |
| 141 | |
| 142 | // Frees is the cumulative count of heap objects freed. |
| 143 | Frees uint64 |
| 144 | |
| 145 | // Heap memory statistics. |
| 146 | // |
| 147 | // Interpreting the heap statistics requires some knowledge of |
| 148 | // how Go organizes memory. Go divides the virtual address |
| 149 | // space of the heap into "spans", which are contiguous |
| 150 | // regions of memory 8K or larger. A span may be in one of |
| 151 | // three states: |
| 152 | // |
| 153 | // An "idle" span contains no objects or other data. The |
| 154 | // physical memory backing an idle span can be released back |
| 155 | // to the OS (but the virtual address space never is), or it |
| 156 | // can be converted into an "in use" or "stack" span. |
| 157 | // |
| 158 | // An "in use" span contains at least one heap object and may |
| 159 | // have free space available to allocate more heap objects. |
| 160 | // |
| 161 | // A "stack" span is used for goroutine stacks. Stack spans |
| 162 | // are not considered part of the heap. A span can change |
| 163 | // between heap and stack memory; it is never used for both |
| 164 | // simultaneously. |
| 165 | |
| 166 | // HeapAlloc is bytes of allocated heap objects. |
| 167 | // |
| 168 | // "Allocated" heap objects include all reachable objects, as |
| 169 | // well as unreachable objects that the garbage collector has |
| 170 | // not yet freed. Specifically, HeapAlloc increases as heap |
| 171 | // objects are allocated and decreases as the heap is swept |
| 172 | // and unreachable objects are freed. Sweeping occurs |
| 173 | // incrementally between GC cycles, so these two processes |
| 174 | // occur simultaneously, and as a result HeapAlloc tends to |
| 175 | // change smoothly (in contrast with the sawtooth that is |
| 176 | // typical of stop-the-world garbage collectors). |
| 177 | HeapAlloc uint64 |
| 178 | |
| 179 | // HeapSys is bytes of heap memory obtained from the OS. |
| 180 | // |
| 181 | // HeapSys measures the amount of virtual address space |
| 182 | // reserved for the heap. This includes virtual address space |
| 183 | // that has been reserved but not yet used, which consumes no |
| 184 | // physical memory, but tends to be small, as well as virtual |
| 185 | // address space for which the physical memory has been |
| 186 | // returned to the OS after it became unused (see HeapReleased |
| 187 | // for a measure of the latter). |
| 188 | // |
| 189 | // HeapSys estimates the largest size the heap has had. |
| 190 | HeapSys uint64 |
| 191 | |
| 192 | // HeapIdle is bytes in idle (unused) spans. |
| 193 | // |
| 194 | // Idle spans have no objects in them. These spans could be |
| 195 | // (and may already have been) returned to the OS, or they can |
| 196 | // be reused for heap allocations, or they can be reused as |
| 197 | // stack memory. |
| 198 | // |
| 199 | // HeapIdle minus HeapReleased estimates the amount of memory |
| 200 | // that could be returned to the OS, but is being retained by |
| 201 | // the runtime so it can grow the heap without requesting more |
| 202 | // memory from the OS. If this difference is significantly |
| 203 | // larger than the heap size, it indicates there was a recent |
| 204 | // transient spike in live heap size. |
| 205 | HeapIdle uint64 |
| 206 | |
| 207 | // HeapInuse is bytes in in-use spans. |
| 208 | // |
| 209 | // In-use spans have at least one object in them. These spans |
| 210 | // can only be used for other objects of roughly the same |
| 211 | // size. |
| 212 | // |
Dan Willemsen | e1b3b18 | 2018-02-27 19:36:27 -0800 | [diff] [blame] | 213 | // HeapInuse minus HeapAlloc estimates the amount of memory |
Dan Willemsen | bbdf664 | 2017-01-13 22:57:23 -0800 | [diff] [blame] | 214 | // that has been dedicated to particular size classes, but is |
| 215 | // not currently being used. This is an upper bound on |
| 216 | // fragmentation, but in general this memory can be reused |
| 217 | // efficiently. |
| 218 | HeapInuse uint64 |
| 219 | |
| 220 | // HeapReleased is bytes of physical memory returned to the OS. |
| 221 | // |
| 222 | // This counts heap memory from idle spans that was returned |
| 223 | // to the OS and has not yet been reacquired for the heap. |
| 224 | HeapReleased uint64 |
| 225 | |
| 226 | // HeapObjects is the number of allocated heap objects. |
| 227 | // |
| 228 | // Like HeapAlloc, this increases as objects are allocated and |
| 229 | // decreases as the heap is swept and unreachable objects are |
| 230 | // freed. |
| 231 | HeapObjects uint64 |
| 232 | |
| 233 | // Stack memory statistics. |
| 234 | // |
| 235 | // Stacks are not considered part of the heap, but the runtime |
| 236 | // can reuse a span of heap memory for stack memory, and |
| 237 | // vice-versa. |
| 238 | |
| 239 | // StackInuse is bytes in stack spans. |
| 240 | // |
| 241 | // In-use stack spans have at least one stack in them. These |
| 242 | // spans can only be used for other stacks of the same size. |
| 243 | // |
| 244 | // There is no StackIdle because unused stack spans are |
| 245 | // returned to the heap (and hence counted toward HeapIdle). |
| 246 | StackInuse uint64 |
| 247 | |
| 248 | // StackSys is bytes of stack memory obtained from the OS. |
| 249 | // |
| 250 | // StackSys is StackInuse, plus any memory obtained directly |
| 251 | // from the OS for OS thread stacks (which should be minimal). |
| 252 | StackSys uint64 |
| 253 | |
| 254 | // Off-heap memory statistics. |
| 255 | // |
| 256 | // The following statistics measure runtime-internal |
| 257 | // structures that are not allocated from heap memory (usually |
| 258 | // because they are part of implementing the heap). Unlike |
| 259 | // heap or stack memory, any memory allocated to these |
| 260 | // structures is dedicated to these structures. |
| 261 | // |
| 262 | // These are primarily useful for debugging runtime memory |
| 263 | // overheads. |
| 264 | |
| 265 | // MSpanInuse is bytes of allocated mspan structures. |
| 266 | MSpanInuse uint64 |
| 267 | |
| 268 | // MSpanSys is bytes of memory obtained from the OS for mspan |
| 269 | // structures. |
| 270 | MSpanSys uint64 |
| 271 | |
| 272 | // MCacheInuse is bytes of allocated mcache structures. |
| 273 | MCacheInuse uint64 |
| 274 | |
| 275 | // MCacheSys is bytes of memory obtained from the OS for |
| 276 | // mcache structures. |
| 277 | MCacheSys uint64 |
| 278 | |
| 279 | // BuckHashSys is bytes of memory in profiling bucket hash tables. |
| 280 | BuckHashSys uint64 |
| 281 | |
| 282 | // GCSys is bytes of memory in garbage collection metadata. |
| 283 | GCSys uint64 |
| 284 | |
| 285 | // OtherSys is bytes of memory in miscellaneous off-heap |
| 286 | // runtime allocations. |
| 287 | OtherSys uint64 |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 288 | |
| 289 | // Garbage collector statistics. |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 290 | |
Dan Willemsen | bbdf664 | 2017-01-13 22:57:23 -0800 | [diff] [blame] | 291 | // NextGC is the target heap size of the next GC cycle. |
| 292 | // |
| 293 | // The garbage collector's goal is to keep HeapAlloc ≤ NextGC. |
| 294 | // At the end of each GC cycle, the target for the next cycle |
| 295 | // is computed based on the amount of reachable data and the |
| 296 | // value of GOGC. |
| 297 | NextGC uint64 |
| 298 | |
| 299 | // LastGC is the time the last garbage collection finished, as |
| 300 | // nanoseconds since 1970 (the UNIX epoch). |
| 301 | LastGC uint64 |
| 302 | |
| 303 | // PauseTotalNs is the cumulative nanoseconds in GC |
| 304 | // stop-the-world pauses since the program started. |
| 305 | // |
| 306 | // During a stop-the-world pause, all goroutines are paused |
| 307 | // and only the garbage collector can run. |
| 308 | PauseTotalNs uint64 |
| 309 | |
| 310 | // PauseNs is a circular buffer of recent GC stop-the-world |
| 311 | // pause times in nanoseconds. |
| 312 | // |
| 313 | // The most recent pause is at PauseNs[(NumGC+255)%256]. In |
| 314 | // general, PauseNs[N%256] records the time paused in the most |
| 315 | // recent N%256th GC cycle. There may be multiple pauses per |
| 316 | // GC cycle; this is the sum of all pauses during a cycle. |
| 317 | PauseNs [256]uint64 |
| 318 | |
| 319 | // PauseEnd is a circular buffer of recent GC pause end times, |
| 320 | // as nanoseconds since 1970 (the UNIX epoch). |
| 321 | // |
| 322 | // This buffer is filled the same way as PauseNs. There may be |
| 323 | // multiple pauses per GC cycle; this records the end of the |
| 324 | // last pause in a cycle. |
| 325 | PauseEnd [256]uint64 |
| 326 | |
| 327 | // NumGC is the number of completed GC cycles. |
| 328 | NumGC uint32 |
| 329 | |
| 330 | // NumForcedGC is the number of GC cycles that were forced by |
| 331 | // the application calling the GC function. |
| 332 | NumForcedGC uint32 |
| 333 | |
| 334 | // GCCPUFraction is the fraction of this program's available |
| 335 | // CPU time used by the GC since the program started. |
| 336 | // |
| 337 | // GCCPUFraction is expressed as a number between 0 and 1, |
| 338 | // where 0 means GC has consumed none of this program's CPU. A |
| 339 | // program's available CPU time is defined as the integral of |
| 340 | // GOMAXPROCS since the program started. That is, if |
| 341 | // GOMAXPROCS is 2 and a program has been running for 10 |
| 342 | // seconds, its "available CPU" is 20 seconds. GCCPUFraction |
| 343 | // does not include CPU time used for write barrier activity. |
| 344 | // |
| 345 | // This is the same as the fraction of CPU reported by |
| 346 | // GODEBUG=gctrace=1. |
| 347 | GCCPUFraction float64 |
| 348 | |
| 349 | // EnableGC indicates that GC is enabled. It is always true, |
| 350 | // even if GOGC=off. |
| 351 | EnableGC bool |
| 352 | |
| 353 | // DebugGC is currently unused. |
| 354 | DebugGC bool |
| 355 | |
| 356 | // BySize reports per-size class allocation statistics. |
| 357 | // |
| 358 | // BySize[N] gives statistics for allocations of size S where |
| 359 | // BySize[N-1].Size < S ≤ BySize[N].Size. |
| 360 | // |
| 361 | // This does not report allocations larger than BySize[60].Size. |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 362 | BySize [61]struct { |
Dan Willemsen | bbdf664 | 2017-01-13 22:57:23 -0800 | [diff] [blame] | 363 | // Size is the maximum byte size of an object in this |
| 364 | // size class. |
| 365 | Size uint32 |
| 366 | |
| 367 | // Mallocs is the cumulative count of heap objects |
| 368 | // allocated in this size class. The cumulative bytes |
| 369 | // of allocation is Size*Mallocs. The number of live |
| 370 | // objects in this size class is Mallocs - Frees. |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 371 | Mallocs uint64 |
Dan Willemsen | bbdf664 | 2017-01-13 22:57:23 -0800 | [diff] [blame] | 372 | |
| 373 | // Frees is the cumulative count of heap objects freed |
| 374 | // in this size class. |
| 375 | Frees uint64 |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 376 | } |
| 377 | } |
| 378 | |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 379 | func init() { |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 380 | if offset := unsafe.Offsetof(memstats.heapStats); offset%8 != 0 { |
| 381 | println(offset) |
| 382 | throw("memstats.heapStats not aligned to 8 bytes") |
| 383 | } |
| 384 | if offset := unsafe.Offsetof(memstats.gcPauseDist); offset%8 != 0 { |
| 385 | println(offset) |
| 386 | throw("memstats.gcPauseDist not aligned to 8 bytes") |
| 387 | } |
| 388 | // Ensure the size of heapStatsDelta causes adjacent fields/slots (e.g. |
| 389 | // [3]heapStatsDelta) to be 8-byte aligned. |
| 390 | if size := unsafe.Sizeof(heapStatsDelta{}); size%8 != 0 { |
| 391 | println(size) |
| 392 | throw("heapStatsDelta not a multiple of 8 bytes in size") |
| 393 | } |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 394 | } |
| 395 | |
| 396 | // ReadMemStats populates m with memory allocator statistics. |
Dan Willemsen | bbdf664 | 2017-01-13 22:57:23 -0800 | [diff] [blame] | 397 | // |
| 398 | // The returned memory allocator statistics are up to date as of the |
| 399 | // call to ReadMemStats. This is in contrast with a heap profile, |
| 400 | // which is a snapshot as of the most recently completed garbage |
| 401 | // collection cycle. |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 402 | func ReadMemStats(m *MemStats) { |
| 403 | stopTheWorld("read mem stats") |
| 404 | |
| 405 | systemstack(func() { |
| 406 | readmemstats_m(m) |
| 407 | }) |
| 408 | |
| 409 | startTheWorld() |
| 410 | } |
| 411 | |
| 412 | func readmemstats_m(stats *MemStats) { |
Dan Willemsen | c78f714 | 2017-07-26 13:08:14 -0700 | [diff] [blame] | 413 | updatememstats() |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 414 | |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 415 | stats.Alloc = memstats.alloc |
| 416 | stats.TotalAlloc = memstats.total_alloc |
| 417 | stats.Sys = memstats.sys |
| 418 | stats.Mallocs = memstats.nmalloc |
| 419 | stats.Frees = memstats.nfree |
| 420 | stats.HeapAlloc = memstats.alloc |
| 421 | stats.HeapSys = memstats.heap_sys.load() |
| 422 | // By definition, HeapIdle is memory that was mapped |
| 423 | // for the heap but is not currently used to hold heap |
| 424 | // objects. It also specifically is memory that can be |
| 425 | // used for other purposes, like stacks, but this memory |
| 426 | // is subtracted out of HeapSys before it makes that |
| 427 | // transition. Put another way: |
| 428 | // |
| 429 | // heap_sys = bytes allocated from the OS for the heap - bytes ultimately used for non-heap purposes |
| 430 | // heap_idle = bytes allocated from the OS for the heap - bytes ultimately used for any purpose |
| 431 | // |
| 432 | // or |
| 433 | // |
| 434 | // heap_sys = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse |
| 435 | // heap_idle = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse - heap_inuse |
| 436 | // |
| 437 | // => heap_idle = heap_sys - heap_inuse |
| 438 | stats.HeapIdle = memstats.heap_sys.load() - memstats.heap_inuse |
| 439 | stats.HeapInuse = memstats.heap_inuse |
| 440 | stats.HeapReleased = memstats.heap_released |
| 441 | stats.HeapObjects = memstats.heap_objects |
| 442 | stats.StackInuse = memstats.stacks_inuse |
Dan Willemsen | c78f714 | 2017-07-26 13:08:14 -0700 | [diff] [blame] | 443 | // memstats.stacks_sys is only memory mapped directly for OS stacks. |
| 444 | // Add in heap-allocated stack memory for user consumption. |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 445 | stats.StackSys = memstats.stacks_inuse + memstats.stacks_sys.load() |
| 446 | stats.MSpanInuse = memstats.mspan_inuse |
| 447 | stats.MSpanSys = memstats.mspan_sys.load() |
| 448 | stats.MCacheInuse = memstats.mcache_inuse |
| 449 | stats.MCacheSys = memstats.mcache_sys.load() |
| 450 | stats.BuckHashSys = memstats.buckhash_sys.load() |
| 451 | // MemStats defines GCSys as an aggregate of all memory related |
| 452 | // to the memory management system, but we track this memory |
| 453 | // at a more granular level in the runtime. |
| 454 | stats.GCSys = memstats.gcMiscSys.load() + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse |
| 455 | stats.OtherSys = memstats.other_sys.load() |
Dan Willemsen | 31b9b84 | 2021-08-31 12:51:40 -0700 | [diff] [blame] | 456 | stats.NextGC = gcController.heapGoal |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 457 | stats.LastGC = memstats.last_gc_unix |
| 458 | stats.PauseTotalNs = memstats.pause_total_ns |
| 459 | stats.PauseNs = memstats.pause_ns |
| 460 | stats.PauseEnd = memstats.pause_end |
| 461 | stats.NumGC = memstats.numgc |
| 462 | stats.NumForcedGC = memstats.numforcedgc |
| 463 | stats.GCCPUFraction = memstats.gc_cpu_fraction |
| 464 | stats.EnableGC = true |
| 465 | |
| 466 | // Handle BySize. Copy N values, where N is |
| 467 | // the minimum of the lengths of the two arrays. |
| 468 | // Unfortunately copy() won't work here because |
| 469 | // the arrays have different structs. |
| 470 | // |
| 471 | // TODO(mknyszek): Consider renaming the fields |
| 472 | // of by_size's elements to align so we can use |
| 473 | // the copy built-in. |
| 474 | bySizeLen := len(stats.BySize) |
| 475 | if l := len(memstats.by_size); l < bySizeLen { |
| 476 | bySizeLen = l |
| 477 | } |
| 478 | for i := 0; i < bySizeLen; i++ { |
| 479 | stats.BySize[i].Size = memstats.by_size[i].size |
| 480 | stats.BySize[i].Mallocs = memstats.by_size[i].nmalloc |
| 481 | stats.BySize[i].Frees = memstats.by_size[i].nfree |
| 482 | } |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 483 | } |
| 484 | |
| 485 | //go:linkname readGCStats runtime/debug.readGCStats |
| 486 | func readGCStats(pauses *[]uint64) { |
| 487 | systemstack(func() { |
| 488 | readGCStats_m(pauses) |
| 489 | }) |
| 490 | } |
| 491 | |
Colin Cross | efed634 | 2019-09-07 08:34:44 -0700 | [diff] [blame] | 492 | // readGCStats_m must be called on the system stack because it acquires the heap |
| 493 | // lock. See mheap for details. |
| 494 | //go:systemstack |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 495 | func readGCStats_m(pauses *[]uint64) { |
| 496 | p := *pauses |
| 497 | // Calling code in runtime/debug should make the slice large enough. |
| 498 | if cap(p) < len(memstats.pause_ns)+3 { |
| 499 | throw("short slice passed to readGCStats") |
| 500 | } |
| 501 | |
| 502 | // Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns. |
| 503 | lock(&mheap_.lock) |
| 504 | |
| 505 | n := memstats.numgc |
| 506 | if n > uint32(len(memstats.pause_ns)) { |
| 507 | n = uint32(len(memstats.pause_ns)) |
| 508 | } |
| 509 | |
| 510 | // The pause buffer is circular. The most recent pause is at |
| 511 | // pause_ns[(numgc-1)%len(pause_ns)], and then backward |
| 512 | // from there to go back farther in time. We deliver the times |
| 513 | // most recent first (in p[0]). |
| 514 | p = p[:cap(p)] |
| 515 | for i := uint32(0); i < n; i++ { |
| 516 | j := (memstats.numgc - 1 - i) % uint32(len(memstats.pause_ns)) |
| 517 | p[i] = memstats.pause_ns[j] |
| 518 | p[n+i] = memstats.pause_end[j] |
| 519 | } |
| 520 | |
Dan Willemsen | c78f714 | 2017-07-26 13:08:14 -0700 | [diff] [blame] | 521 | p[n+n] = memstats.last_gc_unix |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 522 | p[n+n+1] = uint64(memstats.numgc) |
| 523 | p[n+n+2] = memstats.pause_total_ns |
| 524 | unlock(&mheap_.lock) |
| 525 | *pauses = p[:n+n+3] |
| 526 | } |
| 527 | |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 528 | // Updates the memstats structure. |
| 529 | // |
| 530 | // The world must be stopped. |
| 531 | // |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 532 | //go:nowritebarrier |
Dan Willemsen | c78f714 | 2017-07-26 13:08:14 -0700 | [diff] [blame] | 533 | func updatememstats() { |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 534 | assertWorldStopped() |
| 535 | |
Patrice Arruda | 7f4776e | 2020-06-25 11:55:41 -0700 | [diff] [blame] | 536 | // Flush mcaches to mcentral before doing anything else. |
| 537 | // |
| 538 | // Flushing to the mcentral may in general cause stats to |
| 539 | // change as mcentral data structures are manipulated. |
| 540 | systemstack(flushallmcaches) |
| 541 | |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 542 | memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse) |
| 543 | memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse) |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 544 | memstats.sys = memstats.heap_sys.load() + memstats.stacks_sys.load() + memstats.mspan_sys.load() + |
| 545 | memstats.mcache_sys.load() + memstats.buckhash_sys.load() + memstats.gcMiscSys.load() + |
| 546 | memstats.other_sys.load() |
Dan Willemsen | c78f714 | 2017-07-26 13:08:14 -0700 | [diff] [blame] | 547 | |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 548 | // Calculate memory allocator stats. |
| 549 | // During program execution we only count number of frees and amount of freed memory. |
Patrice Arruda | 7f4776e | 2020-06-25 11:55:41 -0700 | [diff] [blame] | 550 | // Current number of alive objects in the heap and amount of alive heap memory |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 551 | // are calculated by scanning all spans. |
| 552 | // Total number of mallocs is calculated as number of frees plus number of alive objects. |
| 553 | // Similarly, total amount of allocated memory is calculated as amount of freed memory |
| 554 | // plus amount of alive heap memory. |
| 555 | memstats.alloc = 0 |
| 556 | memstats.total_alloc = 0 |
| 557 | memstats.nmalloc = 0 |
| 558 | memstats.nfree = 0 |
| 559 | for i := 0; i < len(memstats.by_size); i++ { |
| 560 | memstats.by_size[i].nmalloc = 0 |
| 561 | memstats.by_size[i].nfree = 0 |
| 562 | } |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 563 | // Collect consistent stats, which are the source-of-truth in the some cases. |
| 564 | var consStats heapStatsDelta |
| 565 | memstats.heapStats.unsafeRead(&consStats) |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 566 | |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 567 | // Collect large allocation stats. |
| 568 | totalAlloc := uint64(consStats.largeAlloc) |
| 569 | memstats.nmalloc += uint64(consStats.largeAllocCount) |
| 570 | totalFree := uint64(consStats.largeFree) |
| 571 | memstats.nfree += uint64(consStats.largeFreeCount) |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 572 | |
Dan Willemsen | c78f714 | 2017-07-26 13:08:14 -0700 | [diff] [blame] | 573 | // Collect per-sizeclass stats. |
| 574 | for i := 0; i < _NumSizeClasses; i++ { |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 575 | // Malloc stats. |
| 576 | a := uint64(consStats.smallAllocCount[i]) |
| 577 | totalAlloc += a * uint64(class_to_size[i]) |
| 578 | memstats.nmalloc += a |
| 579 | memstats.by_size[i].nmalloc = a |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 580 | |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 581 | // Free stats. |
| 582 | f := uint64(consStats.smallFreeCount[i]) |
| 583 | totalFree += f * uint64(class_to_size[i]) |
| 584 | memstats.nfree += f |
| 585 | memstats.by_size[i].nfree = f |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 586 | } |
Dan Willemsen | c78f714 | 2017-07-26 13:08:14 -0700 | [diff] [blame] | 587 | |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 588 | // Account for tiny allocations. |
Dan Willemsen | 31b9b84 | 2021-08-31 12:51:40 -0700 | [diff] [blame] | 589 | memstats.nfree += uint64(consStats.tinyAllocCount) |
| 590 | memstats.nmalloc += uint64(consStats.tinyAllocCount) |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 591 | |
| 592 | // Calculate derived stats. |
Dan Willemsen | c78f714 | 2017-07-26 13:08:14 -0700 | [diff] [blame] | 593 | memstats.total_alloc = totalAlloc |
| 594 | memstats.alloc = totalAlloc - totalFree |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 595 | memstats.heap_objects = memstats.nmalloc - memstats.nfree |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 596 | |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 597 | memstats.stacks_inuse = uint64(consStats.inStacks) |
| 598 | memstats.gcWorkBufInUse = uint64(consStats.inWorkBufs) |
| 599 | memstats.gcProgPtrScalarBitsInUse = uint64(consStats.inPtrScalarBits) |
| 600 | |
| 601 | // We also count stacks_inuse, gcWorkBufInUse, and gcProgPtrScalarBitsInUse as sys memory. |
| 602 | memstats.sys += memstats.stacks_inuse + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse |
| 603 | |
| 604 | // The world is stopped, so the consistent stats (after aggregation) |
| 605 | // should be identical to some combination of memstats. In particular: |
| 606 | // |
| 607 | // * heap_inuse == inHeap |
| 608 | // * heap_released == released |
| 609 | // * heap_sys - heap_released == committed - inStacks - inWorkBufs - inPtrScalarBits |
| 610 | // |
| 611 | // Check if that's actually true. |
| 612 | // |
| 613 | // TODO(mknyszek): Maybe don't throw here. It would be bad if a |
| 614 | // bug in otherwise benign accounting caused the whole application |
| 615 | // to crash. |
| 616 | if memstats.heap_inuse != uint64(consStats.inHeap) { |
| 617 | print("runtime: heap_inuse=", memstats.heap_inuse, "\n") |
| 618 | print("runtime: consistent value=", consStats.inHeap, "\n") |
| 619 | throw("heap_inuse and consistent stats are not equal") |
| 620 | } |
| 621 | if memstats.heap_released != uint64(consStats.released) { |
| 622 | print("runtime: heap_released=", memstats.heap_released, "\n") |
| 623 | print("runtime: consistent value=", consStats.released, "\n") |
| 624 | throw("heap_released and consistent stats are not equal") |
| 625 | } |
| 626 | globalRetained := memstats.heap_sys.load() - memstats.heap_released |
| 627 | consRetained := uint64(consStats.committed - consStats.inStacks - consStats.inWorkBufs - consStats.inPtrScalarBits) |
| 628 | if globalRetained != consRetained { |
| 629 | print("runtime: global value=", globalRetained, "\n") |
| 630 | print("runtime: consistent value=", consRetained, "\n") |
| 631 | throw("measures of the retained heap are not equal") |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 632 | } |
| 633 | } |
| 634 | |
Dan Willemsen | bbdf664 | 2017-01-13 22:57:23 -0800 | [diff] [blame] | 635 | // flushmcache flushes the mcache of allp[i]. |
| 636 | // |
| 637 | // The world must be stopped. |
| 638 | // |
| 639 | //go:nowritebarrier |
| 640 | func flushmcache(i int) { |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 641 | assertWorldStopped() |
| 642 | |
Dan Willemsen | bbdf664 | 2017-01-13 22:57:23 -0800 | [diff] [blame] | 643 | p := allp[i] |
Dan Willemsen | bbdf664 | 2017-01-13 22:57:23 -0800 | [diff] [blame] | 644 | c := p.mcache |
| 645 | if c == nil { |
| 646 | return |
| 647 | } |
| 648 | c.releaseAll() |
| 649 | stackcache_clear(c) |
| 650 | } |
| 651 | |
| 652 | // flushallmcaches flushes the mcaches of all Ps. |
| 653 | // |
| 654 | // The world must be stopped. |
| 655 | // |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 656 | //go:nowritebarrier |
| 657 | func flushallmcaches() { |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 658 | assertWorldStopped() |
| 659 | |
Dan Willemsen | bbdf664 | 2017-01-13 22:57:23 -0800 | [diff] [blame] | 660 | for i := 0; i < int(gomaxprocs); i++ { |
| 661 | flushmcache(i) |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 662 | } |
| 663 | } |
| 664 | |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 665 | // sysMemStat represents a global system statistic that is managed atomically. |
| 666 | // |
| 667 | // This type must structurally be a uint64 so that mstats aligns with MemStats. |
| 668 | type sysMemStat uint64 |
| 669 | |
| 670 | // load atomically reads the value of the stat. |
| 671 | // |
| 672 | // Must be nosplit as it is called in runtime initialization, e.g. newosproc0. |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 673 | //go:nosplit |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 674 | func (s *sysMemStat) load() uint64 { |
| 675 | return atomic.Load64((*uint64)(s)) |
| 676 | } |
| 677 | |
| 678 | // add atomically adds the sysMemStat by n. |
| 679 | // |
| 680 | // Must be nosplit as it is called in runtime initialization, e.g. newosproc0. |
| 681 | //go:nosplit |
| 682 | func (s *sysMemStat) add(n int64) { |
| 683 | if s == nil { |
| 684 | return |
| 685 | } |
| 686 | val := atomic.Xadd64((*uint64)(s), n) |
| 687 | if (n > 0 && int64(val) < n) || (n < 0 && int64(val)+n < n) { |
| 688 | print("runtime: val=", val, " n=", n, "\n") |
| 689 | throw("sysMemStat overflow") |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 690 | } |
| 691 | } |
| 692 | |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 693 | // heapStatsDelta contains deltas of various runtime memory statistics |
| 694 | // that need to be updated together in order for them to be kept |
| 695 | // consistent with one another. |
| 696 | type heapStatsDelta struct { |
| 697 | // Memory stats. |
| 698 | committed int64 // byte delta of memory committed |
| 699 | released int64 // byte delta of released memory generated |
| 700 | inHeap int64 // byte delta of memory placed in the heap |
| 701 | inStacks int64 // byte delta of memory reserved for stacks |
| 702 | inWorkBufs int64 // byte delta of memory reserved for work bufs |
| 703 | inPtrScalarBits int64 // byte delta of memory reserved for unrolled GC prog bits |
| 704 | |
| 705 | // Allocator stats. |
Dan Willemsen | 31b9b84 | 2021-08-31 12:51:40 -0700 | [diff] [blame] | 706 | tinyAllocCount uintptr // number of tiny allocations |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 707 | largeAlloc uintptr // bytes allocated for large objects |
| 708 | largeAllocCount uintptr // number of large object allocations |
| 709 | smallAllocCount [_NumSizeClasses]uintptr // number of allocs for small objects |
| 710 | largeFree uintptr // bytes freed for large objects (>maxSmallSize) |
| 711 | largeFreeCount uintptr // number of frees for large objects (>maxSmallSize) |
| 712 | smallFreeCount [_NumSizeClasses]uintptr // number of frees for small objects (<=maxSmallSize) |
| 713 | |
| 714 | // Add a uint32 to ensure this struct is a multiple of 8 bytes in size. |
| 715 | // Only necessary on 32-bit platforms. |
Dan Willemsen | 59ee780 | 2021-12-15 01:08:25 -0800 | [diff] [blame] | 716 | _ [(goarch.PtrSize / 4) % 2]uint32 |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 717 | } |
| 718 | |
| 719 | // merge adds in the deltas from b into a. |
| 720 | func (a *heapStatsDelta) merge(b *heapStatsDelta) { |
| 721 | a.committed += b.committed |
| 722 | a.released += b.released |
| 723 | a.inHeap += b.inHeap |
| 724 | a.inStacks += b.inStacks |
| 725 | a.inWorkBufs += b.inWorkBufs |
| 726 | a.inPtrScalarBits += b.inPtrScalarBits |
| 727 | |
Dan Willemsen | 31b9b84 | 2021-08-31 12:51:40 -0700 | [diff] [blame] | 728 | a.tinyAllocCount += b.tinyAllocCount |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 729 | a.largeAlloc += b.largeAlloc |
| 730 | a.largeAllocCount += b.largeAllocCount |
| 731 | for i := range b.smallAllocCount { |
| 732 | a.smallAllocCount[i] += b.smallAllocCount[i] |
Dan Willemsen | f3f2eb6 | 2018-08-28 11:28:58 -0700 | [diff] [blame] | 733 | } |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 734 | a.largeFree += b.largeFree |
| 735 | a.largeFreeCount += b.largeFreeCount |
| 736 | for i := range b.smallFreeCount { |
| 737 | a.smallFreeCount[i] += b.smallFreeCount[i] |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 738 | } |
| 739 | } |
| 740 | |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 741 | // consistentHeapStats represents a set of various memory statistics |
| 742 | // whose updates must be viewed completely to get a consistent |
| 743 | // state of the world. |
| 744 | // |
| 745 | // To write updates to memory stats use the acquire and release |
| 746 | // methods. To obtain a consistent global snapshot of these statistics, |
| 747 | // use read. |
| 748 | type consistentHeapStats struct { |
| 749 | // stats is a ring buffer of heapStatsDelta values. |
| 750 | // Writers always atomically update the delta at index gen. |
| 751 | // |
| 752 | // Readers operate by rotating gen (0 -> 1 -> 2 -> 0 -> ...) |
| 753 | // and synchronizing with writers by observing each P's |
| 754 | // statsSeq field. If the reader observes a P not writing, |
| 755 | // it can be sure that it will pick up the new gen value the |
| 756 | // next time it writes. |
| 757 | // |
| 758 | // The reader then takes responsibility by clearing space |
| 759 | // in the ring buffer for the next reader to rotate gen to |
| 760 | // that space (i.e. it merges in values from index (gen-2) mod 3 |
| 761 | // to index (gen-1) mod 3, then clears the former). |
| 762 | // |
| 763 | // Note that this means only one reader can be reading at a time. |
| 764 | // There is no way for readers to synchronize. |
| 765 | // |
| 766 | // This process is why we need a ring buffer of size 3 instead |
| 767 | // of 2: one is for the writers, one contains the most recent |
| 768 | // data, and the last one is clear so writers can begin writing |
| 769 | // to it the moment gen is updated. |
| 770 | stats [3]heapStatsDelta |
| 771 | |
| 772 | // gen represents the current index into which writers |
| 773 | // are writing, and can take on the value of 0, 1, or 2. |
| 774 | // This value is updated atomically. |
| 775 | gen uint32 |
| 776 | |
| 777 | // noPLock is intended to provide mutual exclusion for updating |
| 778 | // stats when no P is available. It does not block other writers |
| 779 | // with a P, only other writers without a P and the reader. Because |
| 780 | // stats are usually updated when a P is available, contention on |
| 781 | // this lock should be minimal. |
| 782 | noPLock mutex |
| 783 | } |
| 784 | |
| 785 | // acquire returns a heapStatsDelta to be updated. In effect, |
| 786 | // it acquires the shard for writing. release must be called |
| 787 | // as soon as the relevant deltas are updated. |
| 788 | // |
| 789 | // The returned heapStatsDelta must be updated atomically. |
| 790 | // |
| 791 | // The caller's P must not change between acquire and |
| 792 | // release. This also means that the caller should not |
Dan Willemsen | 59ee780 | 2021-12-15 01:08:25 -0800 | [diff] [blame] | 793 | // acquire a P or release its P in between. A P also must |
| 794 | // not acquire a given consistentHeapStats if it hasn't |
| 795 | // yet released it. |
| 796 | // |
| 797 | // nosplit because a stack growth in this function could |
| 798 | // lead to a stack allocation that could reenter the |
| 799 | // function. |
| 800 | // |
| 801 | //go:nosplit |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 802 | func (m *consistentHeapStats) acquire() *heapStatsDelta { |
| 803 | if pp := getg().m.p.ptr(); pp != nil { |
| 804 | seq := atomic.Xadd(&pp.statsSeq, 1) |
| 805 | if seq%2 == 0 { |
| 806 | // Should have been incremented to odd. |
| 807 | print("runtime: seq=", seq, "\n") |
| 808 | throw("bad sequence number") |
| 809 | } |
| 810 | } else { |
| 811 | lock(&m.noPLock) |
Dan Willemsen | f3f2eb6 | 2018-08-28 11:28:58 -0700 | [diff] [blame] | 812 | } |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 813 | gen := atomic.Load(&m.gen) % 3 |
| 814 | return &m.stats[gen] |
| 815 | } |
| 816 | |
| 817 | // release indicates that the writer is done modifying |
| 818 | // the delta. The value returned by the corresponding |
| 819 | // acquire must no longer be accessed or modified after |
| 820 | // release is called. |
| 821 | // |
| 822 | // The caller's P must not change between acquire and |
| 823 | // release. This also means that the caller should not |
| 824 | // acquire a P or release its P in between. |
Dan Willemsen | 59ee780 | 2021-12-15 01:08:25 -0800 | [diff] [blame] | 825 | // |
| 826 | // nosplit because a stack growth in this function could |
| 827 | // lead to a stack allocation that causes another acquire |
| 828 | // before this operation has completed. |
| 829 | // |
| 830 | //go:nosplit |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 831 | func (m *consistentHeapStats) release() { |
| 832 | if pp := getg().m.p.ptr(); pp != nil { |
| 833 | seq := atomic.Xadd(&pp.statsSeq, 1) |
| 834 | if seq%2 != 0 { |
| 835 | // Should have been incremented to even. |
| 836 | print("runtime: seq=", seq, "\n") |
| 837 | throw("bad sequence number") |
| 838 | } |
| 839 | } else { |
| 840 | unlock(&m.noPLock) |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 841 | } |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 842 | } |
| 843 | |
| 844 | // unsafeRead aggregates the delta for this shard into out. |
| 845 | // |
| 846 | // Unsafe because it does so without any synchronization. The |
| 847 | // world must be stopped. |
| 848 | func (m *consistentHeapStats) unsafeRead(out *heapStatsDelta) { |
| 849 | assertWorldStopped() |
| 850 | |
| 851 | for i := range m.stats { |
| 852 | out.merge(&m.stats[i]) |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 853 | } |
| 854 | } |
Colin Cross | 846c316 | 2021-05-14 11:11:40 -0700 | [diff] [blame] | 855 | |
| 856 | // unsafeClear clears the shard. |
| 857 | // |
| 858 | // Unsafe because the world must be stopped and values should |
| 859 | // be donated elsewhere before clearing. |
| 860 | func (m *consistentHeapStats) unsafeClear() { |
| 861 | assertWorldStopped() |
| 862 | |
| 863 | for i := range m.stats { |
| 864 | m.stats[i] = heapStatsDelta{} |
| 865 | } |
| 866 | } |
| 867 | |
| 868 | // read takes a globally consistent snapshot of m |
| 869 | // and puts the aggregated value in out. Even though out is a |
| 870 | // heapStatsDelta, the resulting values should be complete and |
| 871 | // valid statistic values. |
| 872 | // |
| 873 | // Not safe to call concurrently. The world must be stopped |
| 874 | // or metricsSema must be held. |
| 875 | func (m *consistentHeapStats) read(out *heapStatsDelta) { |
| 876 | // Getting preempted after this point is not safe because |
| 877 | // we read allp. We need to make sure a STW can't happen |
| 878 | // so it doesn't change out from under us. |
| 879 | mp := acquirem() |
| 880 | |
| 881 | // Get the current generation. We can be confident that this |
| 882 | // will not change since read is serialized and is the only |
| 883 | // one that modifies currGen. |
| 884 | currGen := atomic.Load(&m.gen) |
| 885 | prevGen := currGen - 1 |
| 886 | if currGen == 0 { |
| 887 | prevGen = 2 |
| 888 | } |
| 889 | |
| 890 | // Prevent writers without a P from writing while we update gen. |
| 891 | lock(&m.noPLock) |
| 892 | |
| 893 | // Rotate gen, effectively taking a snapshot of the state of |
| 894 | // these statistics at the point of the exchange by moving |
| 895 | // writers to the next set of deltas. |
| 896 | // |
| 897 | // This exchange is safe to do because we won't race |
| 898 | // with anyone else trying to update this value. |
| 899 | atomic.Xchg(&m.gen, (currGen+1)%3) |
| 900 | |
| 901 | // Allow P-less writers to continue. They'll be writing to the |
| 902 | // next generation now. |
| 903 | unlock(&m.noPLock) |
| 904 | |
| 905 | for _, p := range allp { |
| 906 | // Spin until there are no more writers. |
| 907 | for atomic.Load(&p.statsSeq)%2 != 0 { |
| 908 | } |
| 909 | } |
| 910 | |
| 911 | // At this point we've observed that each sequence |
| 912 | // number is even, so any future writers will observe |
| 913 | // the new gen value. That means it's safe to read from |
| 914 | // the other deltas in the stats buffer. |
| 915 | |
| 916 | // Perform our responsibilities and free up |
| 917 | // stats[prevGen] for the next time we want to take |
| 918 | // a snapshot. |
| 919 | m.stats[currGen].merge(&m.stats[prevGen]) |
| 920 | m.stats[prevGen] = heapStatsDelta{} |
| 921 | |
| 922 | // Finally, copy out the complete delta. |
| 923 | *out = m.stats[currGen] |
| 924 | |
| 925 | releasem(mp) |
| 926 | } |