blob: da24f5042c0707eb9e7611196904843e0c8177e7 [file] [log] [blame]
Patrice Arruda7f4776e2020-06-25 11:55:41 -07001// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Goroutine preemption
6//
7// A goroutine can be preempted at any safe-point. Currently, there
8// are a few categories of safe-points:
9//
10// 1. A blocked safe-point occurs for the duration that a goroutine is
11// descheduled, blocked on synchronization, or in a system call.
12//
13// 2. Synchronous safe-points occur when a running goroutine checks
14// for a preemption request.
15//
16// 3. Asynchronous safe-points occur at any instruction in user code
17// where the goroutine can be safely paused and a conservative
18// stack and register scan can find stack roots. The runtime can
19// stop a goroutine at an async safe-point using a signal.
20//
21// At both blocked and synchronous safe-points, a goroutine's CPU
22// state is minimal and the garbage collector has complete information
23// about its entire stack. This makes it possible to deschedule a
24// goroutine with minimal space, and to precisely scan a goroutine's
25// stack.
26//
27// Synchronous safe-points are implemented by overloading the stack
28// bound check in function prologues. To preempt a goroutine at the
29// next synchronous safe-point, the runtime poisons the goroutine's
30// stack bound to a value that will cause the next stack bound check
31// to fail and enter the stack growth implementation, which will
32// detect that it was actually a preemption and redirect to preemption
33// handling.
34//
35// Preemption at asynchronous safe-points is implemented by suspending
36// the thread using an OS mechanism (e.g., signals) and inspecting its
37// state to determine if the goroutine was at an asynchronous
38// safe-point. Since the thread suspension itself is generally
39// asynchronous, it also checks if the running goroutine wants to be
40// preempted, since this could have changed. If all conditions are
41// satisfied, it adjusts the signal context to make it look like the
42// signaled thread just called asyncPreempt and resumes the thread.
43// asyncPreempt spills all registers and enters the scheduler.
44//
45// (An alternative would be to preempt in the signal handler itself.
46// This would let the OS save and restore the register state and the
47// runtime would only need to know how to extract potentially
48// pointer-containing registers from the signal context. However, this
49// would consume an M for every preempted G, and the scheduler itself
50// is not designed to run from a signal handler, as it tends to
51// allocate memory and start threads in the preemption path.)
52
53package runtime
54
55import (
Dan Willemsen59ee7802021-12-15 01:08:25 -080056 "internal/abi"
57 "internal/goarch"
Patrice Arruda7f4776e2020-06-25 11:55:41 -070058 "runtime/internal/atomic"
Patrice Arruda7f4776e2020-06-25 11:55:41 -070059)
60
Patrice Arruda7f4776e2020-06-25 11:55:41 -070061type suspendGState struct {
62 g *g
63
64 // dead indicates the goroutine was not suspended because it
65 // is dead. This goroutine could be reused after the dead
66 // state was observed, so the caller must not assume that it
67 // remains dead.
68 dead bool
69
70 // stopped indicates that this suspendG transitioned the G to
71 // _Gwaiting via g.preemptStop and thus is responsible for
72 // readying it when done.
73 stopped bool
74}
75
76// suspendG suspends goroutine gp at a safe-point and returns the
77// state of the suspended goroutine. The caller gets read access to
78// the goroutine until it calls resumeG.
79//
80// It is safe for multiple callers to attempt to suspend the same
81// goroutine at the same time. The goroutine may execute between
82// subsequent successful suspend operations. The current
83// implementation grants exclusive access to the goroutine, and hence
84// multiple callers will serialize. However, the intent is to grant
85// shared read access, so please don't depend on exclusive access.
86//
87// This must be called from the system stack and the user goroutine on
88// the current M (if any) must be in a preemptible state. This
89// prevents deadlocks where two goroutines attempt to suspend each
90// other and both are in non-preemptible states. There are other ways
91// to resolve this deadlock, but this seems simplest.
92//
93// TODO(austin): What if we instead required this to be called from a
94// user goroutine? Then we could deschedule the goroutine while
95// waiting instead of blocking the thread. If two goroutines tried to
96// suspend each other, one of them would win and the other wouldn't
97// complete the suspend until it was resumed. We would have to be
98// careful that they couldn't actually queue up suspend for each other
99// and then both be suspended. This would also avoid the need for a
100// kernel context switch in the synchronous case because we could just
101// directly schedule the waiter. The context switch is unavoidable in
102// the signal case.
103//
104//go:systemstack
105func suspendG(gp *g) suspendGState {
106 if mp := getg().m; mp.curg != nil && readgstatus(mp.curg) == _Grunning {
107 // Since we're on the system stack of this M, the user
108 // G is stuck at an unsafe point. If another goroutine
109 // were to try to preempt m.curg, it could deadlock.
110 throw("suspendG from non-preemptible goroutine")
111 }
112
113 // See https://golang.org/cl/21503 for justification of the yield delay.
114 const yieldDelay = 10 * 1000
115 var nextYield int64
116
117 // Drive the goroutine to a preemption point.
118 stopped := false
119 var asyncM *m
120 var asyncGen uint32
121 var nextPreemptM int64
122 for i := 0; ; i++ {
123 switch s := readgstatus(gp); s {
124 default:
125 if s&_Gscan != 0 {
126 // Someone else is suspending it. Wait
127 // for them to finish.
128 //
129 // TODO: It would be nicer if we could
130 // coalesce suspends.
131 break
132 }
133
134 dumpgstatus(gp)
135 throw("invalid g status")
136
137 case _Gdead:
138 // Nothing to suspend.
139 //
140 // preemptStop may need to be cleared, but
141 // doing that here could race with goroutine
142 // reuse. Instead, goexit0 clears it.
143 return suspendGState{dead: true}
144
145 case _Gcopystack:
146 // The stack is being copied. We need to wait
147 // until this is done.
148
149 case _Gpreempted:
150 // We (or someone else) suspended the G. Claim
151 // ownership of it by transitioning it to
152 // _Gwaiting.
153 if !casGFromPreempted(gp, _Gpreempted, _Gwaiting) {
154 break
155 }
156
157 // We stopped the G, so we have to ready it later.
158 stopped = true
159
160 s = _Gwaiting
161 fallthrough
162
163 case _Grunnable, _Gsyscall, _Gwaiting:
164 // Claim goroutine by setting scan bit.
165 // This may race with execution or readying of gp.
166 // The scan bit keeps it from transition state.
167 if !castogscanstatus(gp, s, s|_Gscan) {
168 break
169 }
170
171 // Clear the preemption request. It's safe to
172 // reset the stack guard because we hold the
173 // _Gscan bit and thus own the stack.
174 gp.preemptStop = false
175 gp.preempt = false
176 gp.stackguard0 = gp.stack.lo + _StackGuard
177
178 // The goroutine was already at a safe-point
179 // and we've now locked that in.
180 //
181 // TODO: It would be much better if we didn't
182 // leave it in _Gscan, but instead gently
183 // prevented its scheduling until resumption.
184 // Maybe we only use this to bump a suspended
185 // count and the scheduler skips suspended
186 // goroutines? That wouldn't be enough for
187 // {_Gsyscall,_Gwaiting} -> _Grunning. Maybe
188 // for all those transitions we need to check
189 // suspended and deschedule?
190 return suspendGState{g: gp, stopped: stopped}
191
192 case _Grunning:
193 // Optimization: if there is already a pending preemption request
194 // (from the previous loop iteration), don't bother with the atomics.
195 if gp.preemptStop && gp.preempt && gp.stackguard0 == stackPreempt && asyncM == gp.m && atomic.Load(&asyncM.preemptGen) == asyncGen {
196 break
197 }
198
199 // Temporarily block state transitions.
200 if !castogscanstatus(gp, _Grunning, _Gscanrunning) {
201 break
202 }
203
204 // Request synchronous preemption.
205 gp.preemptStop = true
206 gp.preempt = true
207 gp.stackguard0 = stackPreempt
208
209 // Prepare for asynchronous preemption.
210 asyncM2 := gp.m
211 asyncGen2 := atomic.Load(&asyncM2.preemptGen)
212 needAsync := asyncM != asyncM2 || asyncGen != asyncGen2
213 asyncM = asyncM2
214 asyncGen = asyncGen2
215
216 casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
217
218 // Send asynchronous preemption. We do this
219 // after CASing the G back to _Grunning
220 // because preemptM may be synchronous and we
221 // don't want to catch the G just spinning on
222 // its status.
223 if preemptMSupported && debug.asyncpreemptoff == 0 && needAsync {
224 // Rate limit preemptM calls. This is
225 // particularly important on Windows
226 // where preemptM is actually
227 // synchronous and the spin loop here
228 // can lead to live-lock.
229 now := nanotime()
230 if now >= nextPreemptM {
231 nextPreemptM = now + yieldDelay/2
232 preemptM(asyncM)
233 }
234 }
235 }
236
237 // TODO: Don't busy wait. This loop should really only
238 // be a simple read/decide/CAS loop that only fails if
239 // there's an active race. Once the CAS succeeds, we
240 // should queue up the preemption (which will require
241 // it to be reliable in the _Grunning case, not
242 // best-effort) and then sleep until we're notified
243 // that the goroutine is suspended.
244 if i == 0 {
245 nextYield = nanotime() + yieldDelay
246 }
247 if nanotime() < nextYield {
248 procyield(10)
249 } else {
250 osyield()
251 nextYield = nanotime() + yieldDelay/2
252 }
253 }
254}
255
256// resumeG undoes the effects of suspendG, allowing the suspended
257// goroutine to continue from its current safe-point.
258func resumeG(state suspendGState) {
259 if state.dead {
260 // We didn't actually stop anything.
261 return
262 }
263
264 gp := state.g
265 switch s := readgstatus(gp); s {
266 default:
267 dumpgstatus(gp)
268 throw("unexpected g status")
269
270 case _Grunnable | _Gscan,
271 _Gwaiting | _Gscan,
272 _Gsyscall | _Gscan:
273 casfrom_Gscanstatus(gp, s, s&^_Gscan)
274 }
275
276 if state.stopped {
277 // We stopped it, so we need to re-schedule it.
278 ready(gp, 0, true)
279 }
280}
281
282// canPreemptM reports whether mp is in a state that is safe to preempt.
283//
284// It is nosplit because it has nosplit callers.
285//
286//go:nosplit
287func canPreemptM(mp *m) bool {
288 return mp.locks == 0 && mp.mallocing == 0 && mp.preemptoff == "" && mp.p.ptr().status == _Prunning
289}
290
291//go:generate go run mkpreempt.go
292
293// asyncPreempt saves all user registers and calls asyncPreempt2.
294//
295// When stack scanning encounters an asyncPreempt frame, it scans that
296// frame and its parent frame conservatively.
297//
298// asyncPreempt is implemented in assembly.
299func asyncPreempt()
300
301//go:nosplit
302func asyncPreempt2() {
303 gp := getg()
304 gp.asyncSafePoint = true
305 if gp.preemptStop {
306 mcall(preemptPark)
307 } else {
308 mcall(gopreempt_m)
309 }
310 gp.asyncSafePoint = false
311}
312
313// asyncPreemptStack is the bytes of stack space required to inject an
314// asyncPreempt call.
315var asyncPreemptStack = ^uintptr(0)
316
317func init() {
Dan Willemsen59ee7802021-12-15 01:08:25 -0800318 f := findfunc(abi.FuncPCABI0(asyncPreempt))
Patrice Arruda7f4776e2020-06-25 11:55:41 -0700319 total := funcMaxSPDelta(f)
Dan Willemsen59ee7802021-12-15 01:08:25 -0800320 f = findfunc(abi.FuncPCABIInternal(asyncPreempt2))
Patrice Arruda7f4776e2020-06-25 11:55:41 -0700321 total += funcMaxSPDelta(f)
322 // Add some overhead for return PCs, etc.
Dan Willemsen59ee7802021-12-15 01:08:25 -0800323 asyncPreemptStack = uintptr(total) + 8*goarch.PtrSize
Patrice Arruda7f4776e2020-06-25 11:55:41 -0700324 if asyncPreemptStack > _StackLimit {
325 // We need more than the nosplit limit. This isn't
326 // unsafe, but it may limit asynchronous preemption.
327 //
328 // This may be a problem if we start using more
329 // registers. In that case, we should store registers
330 // in a context object. If we pre-allocate one per P,
331 // asyncPreempt can spill just a few registers to the
332 // stack, then grab its context object and spill into
333 // it. When it enters the runtime, it would allocate a
334 // new context for the P.
335 print("runtime: asyncPreemptStack=", asyncPreemptStack, "\n")
336 throw("async stack too large")
337 }
338}
339
340// wantAsyncPreempt returns whether an asynchronous preemption is
341// queued for gp.
342func wantAsyncPreempt(gp *g) bool {
343 // Check both the G and the P.
344 return (gp.preempt || gp.m.p != 0 && gp.m.p.ptr().preempt) && readgstatus(gp)&^_Gscan == _Grunning
345}
346
347// isAsyncSafePoint reports whether gp at instruction PC is an
348// asynchronous safe point. This indicates that:
349//
350// 1. It's safe to suspend gp and conservatively scan its stack and
351// registers. There are no potentially hidden pointer values and it's
352// not in the middle of an atomic sequence like a write barrier.
353//
354// 2. gp has enough stack space to inject the asyncPreempt call.
355//
356// 3. It's generally safe to interact with the runtime, even if we're
357// in a signal handler stopped here. For example, there are no runtime
358// locks held, so acquiring a runtime lock won't self-deadlock.
359//
360// In some cases the PC is safe for asynchronous preemption but it
361// also needs to adjust the resumption PC. The new PC is returned in
362// the second result.
363func isAsyncSafePoint(gp *g, pc, sp, lr uintptr) (bool, uintptr) {
364 mp := gp.m
365
366 // Only user Gs can have safe-points. We check this first
367 // because it's extremely common that we'll catch mp in the
368 // scheduler processing this G preemption.
369 if mp.curg != gp {
370 return false, 0
371 }
372
373 // Check M state.
374 if mp.p == 0 || !canPreemptM(mp) {
375 return false, 0
376 }
377
378 // Check stack space.
379 if sp < gp.stack.lo || sp-gp.stack.lo < asyncPreemptStack {
380 return false, 0
381 }
382
383 // Check if PC is an unsafe-point.
384 f := findfunc(pc)
385 if !f.valid() {
386 // Not Go code.
387 return false, 0
388 }
389 if (GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "mips64" || GOARCH == "mips64le") && lr == pc+8 && funcspdelta(f, pc, nil) == 0 {
390 // We probably stopped at a half-executed CALL instruction,
391 // where the LR is updated but the PC has not. If we preempt
392 // here we'll see a seemingly self-recursive call, which is in
393 // fact not.
394 // This is normally ok, as we use the return address saved on
395 // stack for unwinding, not the LR value. But if this is a
396 // call to morestack, we haven't created the frame, and we'll
397 // use the LR for unwinding, which will be bad.
398 return false, 0
399 }
Colin Cross846c3162021-05-14 11:11:40 -0700400 up, startpc := pcdatavalue2(f, _PCDATA_UnsafePoint, pc)
Dan Willemsen59ee7802021-12-15 01:08:25 -0800401 if up == _PCDATA_UnsafePointUnsafe {
Colin Cross846c3162021-05-14 11:11:40 -0700402 // Unsafe-point marked by compiler. This includes
403 // atomic sequences (e.g., write barrier) and nosplit
404 // functions (except at calls).
405 return false, 0
Patrice Arruda7f4776e2020-06-25 11:55:41 -0700406 }
Dan Willemsen59ee7802021-12-15 01:08:25 -0800407 if fd := funcdata(f, _FUNCDATA_LocalsPointerMaps); fd == nil || f.flag&funcFlag_ASM != 0 {
408 // This is assembly code. Don't assume it's well-formed.
409 // TODO: Empirically we still need the fd == nil check. Why?
Patrice Arruda7f4776e2020-06-25 11:55:41 -0700410 //
411 // TODO: Are there cases that are safe but don't have a
412 // locals pointer map, like empty frame functions?
Dan Willemsen31b9b842021-08-31 12:51:40 -0700413 // It might be possible to preempt any assembly functions
414 // except the ones that have funcFlag_SPWRITE set in f.flag.
Patrice Arruda7f4776e2020-06-25 11:55:41 -0700415 return false, 0
416 }
417 name := funcname(f)
418 if inldata := funcdata(f, _FUNCDATA_InlTree); inldata != nil {
419 inltree := (*[1 << 20]inlinedCall)(inldata)
420 ix := pcdatavalue(f, _PCDATA_InlTreeIndex, pc, nil)
421 if ix >= 0 {
422 name = funcnameFromNameoff(f, inltree[ix].func_)
423 }
424 }
425 if hasPrefix(name, "runtime.") ||
426 hasPrefix(name, "runtime/internal/") ||
427 hasPrefix(name, "reflect.") {
428 // For now we never async preempt the runtime or
429 // anything closely tied to the runtime. Known issues
430 // include: various points in the scheduler ("don't
431 // preempt between here and here"), much of the defer
432 // implementation (untyped info on stack), bulk write
433 // barriers (write barrier check),
434 // reflect.{makeFuncStub,methodValueCall}.
435 //
436 // TODO(austin): We should improve this, or opt things
437 // in incrementally.
438 return false, 0
439 }
Colin Cross846c3162021-05-14 11:11:40 -0700440 switch up {
441 case _PCDATA_Restart1, _PCDATA_Restart2:
442 // Restartable instruction sequence. Back off PC to
443 // the start PC.
444 if startpc == 0 || startpc > pc || pc-startpc > 20 {
445 throw("bad restart PC")
Patrice Arruda7f4776e2020-06-25 11:55:41 -0700446 }
Colin Cross846c3162021-05-14 11:11:40 -0700447 return true, startpc
448 case _PCDATA_RestartAtEntry:
449 // Restart from the function entry at resumption.
Dan Willemsen59ee7802021-12-15 01:08:25 -0800450 return true, f.entry()
Patrice Arruda7f4776e2020-06-25 11:55:41 -0700451 }
452 return true, pc
453}