Lightweight Thread
goruntime
以下内容涉及到的代码是基于 go1.9rc2 版本。
1. Scheduler Structure
整个调度模型由 Goroutine/Processor/Machine 以及全局调度信息 sched 组成。
Global Runnable Queue
runqueue
----------------------------
| G_10 | G_11 | G_12 | ...
----------------------------
P_0 Local Runnable Queue
+-----+ +-----+ ---------------
| M_3 | ---- | P_0 | <=== | G_8 | G_9 |
+-----+ +-----+ ---------------
|
+-----+
| G_3 | Running
+-----+
P_1 Local Runnable Queue
+-----+ +-----+ ---------------
| M_4 | ---- | P_1 | <=== | G_6 | G_7 |
+-----+ +-----+ ---------------
|
+-----+
| G_5 | Running
+-----+
1.1 Goroutine
线程
// src/runtime/runtime2.go
type g struct {
....
m *m
sched gobuf
goid int64
....
}
type gobuf struct {
sp uintptr
pc uintptr
....
}
runtimegoidgoidgoidgoidgoidruntimegoid
runtimegobuf
整体来说,Goroutine 仅代表任务的内容以及上下文,并不是具体的执行单元。
1.2 Machine
Machine 是 OS Thread,它负责执行 Goroutine。
// src/runtime/runtime2.go
type m struct {
....
g0 *g // goroutine with scheduling stack
curg *g // current running goroutine
tls [6]uintptr // thread-local storage (for x86 extern register)
p puintptr // attached p for executing go code (nil if not executing go code)
....
}
runtime
user stackruntimesystem stackmg0runtimecurg
gsettlsgm.tlsgetg
settls
// src/runtime/sys_linux_amd64.s
// set tls base to DI
TEXT runtime·settls(SB),NOSPLIT,$32
#ifdef GOOS_android
// Same as in sys_darwin_386.s:/ugliness, different constant.
// DI currently holds m->tls, which must be fs:0x1d0.
// See cgo/gcc_android_amd64.c for the derivation of the constant.
SUBQ $0x1d0, DI // In android, the tls base·
#else
ADDQ $8, DI // ELF wants to use -8(FS)
#endif
MOVQ DI, SI
MOVQ $0x1002, DI // ARCH_SET_FS
MOVQ $158, AX // arch_prctl
SYSCALL
CMPQ AX, $0xfffffffffffff001
JLS 2(PC)
MOVL $0xf1, 0xf1 // crash
RET
// src/runtime/stubs.go
// getg returns the pointer to the current g.
// The compiler rewrites calls to this function into instructions
// that fetch the g directly (from TLS or from the dedicated register).
func getg() *g
// src/runtime/go_tls.h
#ifdef GOARCH_amd64
#define get_tls(r) MOVQ TLS, r
#define g(r) 0(r)(TLS*1)
#endif
但是 Machine 想要执行一个 Goroutine,必须要绑定 Processor。
runtime
1.3 Processor
runqgoid
type p struct {
...
m muintptr // back-link to associated m (nil if idle)
// Cache of goroutine ids, amortizes accesses to runtime·sched.goidgen.
goidcache uint64
goidcacheend uint64
// Queue of runnable goroutines. Accessed without lock.
runqhead uint32
runqtail uint32
runq [256]guintptr
...
}
runtimeGOMAXPROCS
runtime
1.4 全局调度信息 sched
sched
goidgennmspinning
// src/runtime/runtime2.go
var (
...
sched schedt
...
)
type schedt struct {
// accessed atomically. keep at top to ensure alignment on 32-bit systems.
goidgen uint64
lock mutex
midle muintptr // idle m's waiting for work
nmidle int32 // number of idle m's waiting for work
maxmcount int32 // maximum number of m's allowed (or die)
pidle puintptr // idle p's
npidle uint32
nmspinning uint32 // See "Worker thread parking/unparking" comment in proc.go.
// Global runnable queue.
runqhead guintptr
runqtail guintptr
runqsize int32
....
}
2. Create a Goroutine
maindo()
➜ main cat -n main.go
1 package main
2
3 func do() {
4 // nothing
5 }
6
7 func main() {
8 go do()
9 }
gogo do()runtime.newprocruntime.newproc
➜ main uname -m -s
Linux x86_64
➜ main go build
➜ main go tool objdump -s "main.main" main
TEXT main.main(SB) /root/workspace/main/main.go
main.go:7 0x450a60 64488b0c25f8ffffff MOVQ FS:0xfffffff8, CX
main.go:7 0x450a69 483b6110 CMPQ 0x10(CX), SP
main.go:7 0x450a6d 7630 JBE 0x450a9f
main.go:7 0x450a6f 4883ec18 SUBQ $0x18, SP
main.go:7 0x450a73 48896c2410 MOVQ BP, 0x10(SP)
main.go:7 0x450a78 488d6c2410 LEAQ 0x10(SP), BP
main.go:8 0x450a7d c7042400000000 MOVL $0x0, 0(SP)
main.go:8 0x450a84 488d05e5190200 LEAQ 0x219e5(IP), AX
main.go:8 0x450a8b 4889442408 MOVQ AX, 0x8(SP)
main.go:8 0x450a90 e88bb4fdff CALL runtime.newproc(SB) <==== I'm here.
main.go:9 0x450a95 488b6c2410 MOVQ 0x10(SP), BP
main.go:9 0x450a9a 4883c418 ADDQ $0x18, SP
main.go:9 0x450a9e c3 RET
main.go:7 0x450a9f e88c7dffff CALL runtime.morestack_noctxt(SB)
main.go:7 0x450aa4 ebba JMP main.main(SB)
2.1 创建 do() 的执行上下文
do()
➜ main go tool objdump -s "main.do" main
TEXT main.do(SB) /root/workspace/main/main.go
main.go:5 0x450a50 c3 RET
Chaptor 6.3 CALLING PROCEDURES USING CALL AND RETRETIPIPRET
newg.schedgostartcallfndo()newg.sched.pcgoexitnewg.sched.spgoexit
// src/runtime/proc.go @ func newproc1
if narg > 0 {
memmove(unsafe.Pointer(spArg), unsafe.Pointer(argp), uintptr(narg)
....
}
newg.sched.sp = sp
newg.sched.pc = funcPC(goexit) + sys.PCQuantum // +PCQuantum so that previous instruction is in same function
newg.sched.g = guintptr(unsafe.Pointer(newg))
gostartcallfn(&newg.sched, fn)
newg.gopc = callerpc
newg.startpc = fn.fn
2.2 全局唯一的 goid
runtime
// src/runtime/proc.go
const (
// Number of goroutine ids to grab from sched.goidgen to local per-P cache at once.
// 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
_GoidCacheBatch = 16
)
// src/runtime/proc.go @ func newproc1
if _p_.goidcache == _p_.goidcacheend {
// Sched.goidgen is the last allocated id,
// this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch].
// At startup sched.goidgen=0, so main goroutine receives goid=1.
_p_.goidcache = atomic.Xadd64(&sched.goidgen, _GoidCacheBatch)
_p_.goidcache -= _GoidCacheBatch - 1
_p_.goidcacheend = _p_.goidcache + _GoidCacheBatch
}
newg.goid = int64(_p_.goidcache)
_p_.goidcache++
sched.goidgen_GoidCacheBatchgoid
_p_.goidcache
2.3 Local vs Global Runnable Queue
当 Goroutine 创建完毕之后,它是放在当前 Processor 的 Local Runnable Queue 还是全局队列里?
newgnewg
// src/runtime/proc.go @ func newproc1
runqput(_p_, newg, true)
2.4 小结
看到这里,一般都会有以下几个疑问:
goexit
那么就继续往下读吧~
3. main is a Goroutine
main
➜ main uname -m -s
Linux x86_64
➜ main go build --gcflags "-N -l"
➜ main gdb main
(gdb) info file
Symbols from "/root/workspace/main/main".
Local exec file:
`/root/workspace/main/main', file type elf64-x86-64.
Entry point: 0x44bb80
0x0000000000401000 - 0x0000000000450b13 is .text
0x0000000000451000 - 0x000000000047a6bc is .rodata
0x000000000047a7e0 - 0x000000000047afd4 is .typelink
0x000000000047afd8 - 0x000000000047afe0 is .itablink
0x000000000047afe0 - 0x000000000047afe0 is .gosymtab
0x000000000047afe0 - 0x00000000004a96c8 is .gopclntab
0x00000000004aa000 - 0x00000000004aaa38 is .noptrdata
0x00000000004aaa40 - 0x00000000004ab5b8 is .data
0x00000000004ab5c0 - 0x00000000004c97e8 is .bss
0x00000000004c9800 - 0x00000000004cbe18 is .noptrbss
0x0000000000400fc8 - 0x0000000000401000 is .note.go.buildid
(gdb) info symbol 0x44bb80
_rt0_amd64_linux in section .text
_rt0_amd64_linuxruntime.rt0_go
rt0_goruntime.argsruntime.osinitruntime.schedinit
g0m0m0g0m0system stackp0runtime.schedinitm0allp[0]
runtime.newprocmainPC
有了 Goroutine 之后,那么 Machine 怎么执行呢?
// src/runtime/asm_amd64.s
TEXT runtime·rt0_go(SB),NOSPLIT,$0
...
// set the per-goroutine and per-mach "registers"
// save m->g0 = g0
MOVQ CX, m_g0(AX)
// save m0 to g0->m
MOVQ AX, g_m(CX)
...
CALL runtime·args(SB)
CALL runtime·osinit(SB)
CALL runtime·schedinit(SB)
// create a new goroutine to start program
MOVQ $runtime·mainPC(SB), AX // entry
PUSHQ AX
PUSHQ $0 // arg size
CALL runtime·newproc(SB)
...
// start this M
CALL runtime·mstart(SB) <=== I'm here!
MOVL $0xf1, 0xf1 // crash
RET
4. Machine — Work Stealing
rt0_goCALL runtime.mstart(SB)
runtime
- 当前 Processor 队列已满,Machine 会将本地队列的部分 Goroutine 迁移到 Global Runnable Queue 中;
- Machine 绑定的 Processor 没有可执行的 Goroutine 时,它会去 Global Runnable Queue、Net Network 和其他 Processor 的队列中抢任务。
这种调度模式叫做 Work Stealing。
4.1 如何执行 Goroutine?
// src/runtime/proc.go
func mstart() {
...
} else if _g_.m != &m0 {
acquirep(_g_.m.nextp.ptr()) // 绑定 Processor
_g_.m.nextp = 0
}
schedule()
}
mstart() => schedule() => execute() => xxx() => goexit()
runtime.mstart
- Local Runnable Queue
- Global Runnable Queue
- Net Network
- Other Processor’s Runnable Queue
需找可执行的 Goroutine 的逻辑都在 findrunnable 里。
g.sched
RETgoexitgoexitschedulefindrunnable
那么谁来激活这些休眠状态的 Machine ?
4.2 Wake Up
runtime.newproc
// src/runtime/proc.go @ func newproc1
if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 && runtimeInitTime != 0 {
wakep()
}
spinning
atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0spinningwakep
runtime.mainwakepruntimeInitTime != 0runtime.main
wakepspinning
spinningruntime.mstart
// src/runtime/proc.go
func wakep() {
// be conservative about spinning threads
if !atomic.Cas(&sched.nmspinning, 0, 1) {
return
}
startm(nil, true)
}
func mspinning() {
// startm's caller incremented nmspinning. Set the new M's spinning.
getg().m.spinning = true
}
func startm(_p_ *p, spinning bool) {
lock(&sched.lock)
if _p_ == nil {
_p_ = pidleget()
if _p_ == nil {
unlock(&sched.lock)
if spinning {
// The caller incremented nmspinning, but there are no idle Ps,
// so it's okay to just undo the increment and give up.
if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 {
throw("startm: negative nmspinning")
}
}
return
}
}
mp := mget()
unlock(&sched.lock)
if mp == nil {
var fn func()
if spinning {
// The caller incremented nmspinning, so set m.spinning in the new M.
fn = mspinning
}
newm(fn, _p_)
return
}
...
mp.spinning = spinning
mp.nextp.set(_p_)
notewakeup(&mp.park)
}
5. Preemptive
Machine 会在全局范围内查找 Goroutine 来执行,似乎还缺少角色去通知 Machine 释放当前 Goroutine,总不能执行完毕再切换吧。
我们知道操作系统会根据时钟周期性地触发系统中断来进行调度,Golang 是用户态的线程调度,那它怎么通知 Machine 呢?
回忆上文, 提到了有些 Machine 执行任务前它并不需要绑定 Processor,它们都做什么任务呢?
// src/runtime/proc.go
func main() {
...
systemstack(func() {
newm(sysmon, nil)
})
...
}
runtime.main
// src/runtime/proc.go
// forcePreemptNS is the time slice given to a G before it is
// preempted.
const forcePreemptNS = 10 * 1000 * 1000 // 10ms
func retake(now int64) uint32 {
for i := int32(0); i < gomaxprocs; i++ {
_p_ := allp[i]
if _p_ == nil {
continue
}
pd := &_p_.sysmontick
s := _p_.status
...
} else if s == _Prunning {
// Preempt G if it's running for too long.
t := int64(_p_.schedtick)
if int64(pd.schedtick) != t {
pd.schedtick = uint32(t)
pd.schedwhen = now
continue
}
if pd.schedwhen+forcePreemptNS > now {
continue
}
preemptone(_p_)
}
}
...
}
Processor 在 Machine 上执行时间超过 10ms,Machine 会给调用 preemptone
给当前 Goroutine 加上标记:
// src/runtime/proc.go
func preemptone(_p_ *p) bool {
...
gp.preempt = true
// Every call in a go routine checks for stack overflow by
// comparing the current stack pointer to gp->stackguard0.
// Setting gp->stackguard0 to StackPreempt folds
// preemption into the normal stack overflow check.
gp.stackguard0 = stackPreempt
}
可以看到它并不是直接发信号给 Machine 让它立即释放,而是让 Goroutine 自己释放,那它什么时候会释放?
Golang 创建新的 Goroutine 时,都会分配有限的调用栈空间,按需进行拓展或者收缩。
所以在执行下一个函数时,它会检查调用栈是否溢出。
➜ main go tool objdump -s "main.main" main
TEXT main.main(SB) /root/workspace/main/main.go
main.go:7 0x450a60 64488b0c25f8ffffff MOVQ FS:0xfffffff8, CX
main.go:7 0x450a69 483b6110 CMPQ 0x10(CX), SP
main.go:7 0x450a6d 7630 JBE 0x450a9f <= I'm here!!
main.go:7 0x450a6f 4883ec18 SUBQ $0x18, SP
main.go:7 0x450a73 48896c2410 MOVQ BP, 0x10(SP)
main.go:7 0x450a78 488d6c2410 LEAQ 0x10(SP), BP
main.go:8 0x450a7d c7042400000000 MOVL $0x0, 0(SP)
main.go:8 0x450a84 488d05e5190200 LEAQ 0x219e5(IP), AX
main.go:8 0x450a8b 4889442408 MOVQ AX, 0x8(SP)
main.go:8 0x450a90 e88bb4fdff CALL runtime.newproc(SB)
main.go:9 0x450a95 488b6c2410 MOVQ 0x10(SP), BP
main.go:9 0x450a9a 4883c418 ADDQ $0x18, SP
main.go:9 0x450a9e c3 RET
main.go:7 0x450a9f e88c7dffff CALL runtime.morestack_noctxt(SB)
main.go:7 0x450aa4 ebba JMP main.main(SB)
gp.stackguard0 = stackPreemptruntime.morestack_noctxtruntime.retakeruntime.schedule
你可能会问,如果这个 Goroutine 里面没有函数调用怎么办?请查看这个 issues/11462。
一般情况下,这样的函数不是死循环,就是很快就退出了,实际开发中这种的类型函数不会太多。
6. 关于线程数目
Processor 的数目决定 go binary 能同时处理多少 Goroutine 的能力,感觉 Machine 的数目应该不会太多。
➜ scheduler cat -n main.go
1 package main
2
3 import (
4 "log"
5 "net/http"
6 "syscall"
7 )
8
9 func main() {
10 http.HandleFunc("/sleep", func(w http.ResponseWriter, r *http.Request) {
11 tspec := syscall.NsecToTimespec(1000 * 1000 * 1000)
12 if err := syscall.Nanosleep(&tspec, &tspec); err != nil {
13 panic(err)
14 }
15 })
16
17 http.HandleFunc("/echo", func(w http.ResponseWriter, r *http.Request) {
18 w.Write([]byte("hello"))
19 })
20
21 log.Fatal(http.ListenAndServe(":8080", nil))
22 }
GODEBUG
➜ scheduler go build
➜ scheduler GODEBUG=schedtrace=2000 ./scheduler
SCHED 0ms: gomaxprocs=4 idleprocs=1 threads=6 spinningthreads=1 idlethreads=0 runqueue=0 [0 0 0 0]
SCHED 2008ms: gomaxprocs=4 idleprocs=4 threads=6 spinningthreads=0 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 4016ms: gomaxprocs=4 idleprocs=4 threads=6 spinningthreads=0 idlethreads=3 runqueue=0 [0 0 0 0]
GODEBUG=schedtrace=2000schedtracesysmon
// src/runtime/proc.go
func schedtrace(detailed bool) {
...
print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle, " threads=", sched.mcount, " spinningthreads=", sched.nmspinning, " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize)
...
}
gomaxprocs: 当前 Processor 的数目
idleprocs: 空闲 Processor 的数目
threads: 共创建了多少个 Machine
spinningthreads: spinning 状态的 Machine
nmidle: 休眠状态的 Machine 数目
runqueue: Global Runnable Queue 队列长度
[x, y, z..]: 每个 Processor 的 Local Runnable Queue 队列长度
下面我们会通过 wrk 对 sleep 和 echo 这两个 endpoint 进行压力测试,并关注 Machine 的数目变化。
➜ scheduler GODEBUG=schedtrace=2000 ./scheduler > echo_result 2>&1 &
[1] 6015
➜ scheduler wrk -t12 -c400 -d30s http://localhost:8080/echo
Running 30s test @ http://localhost:8080/echo
12 threads and 400 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 51.15ms 104.96ms 1.31s 89.35%
Req/Sec 4.97k 4.48k 20.53k 74.84%
1780311 requests in 30.08s, 205.44MB read
Requests/sec: 59178.76
Transfer/sec: 6.83MB
➜ scheduler head -n 20 echo_result
SCHED 0ms: gomaxprocs=4 idleprocs=1 threads=6 spinningthreads=2 idlethreads=0 runqueue=0 [0 0 0 0]
SCHED 2000ms: gomaxprocs=4 idleprocs=4 threads=6 spinningthreads=0 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 4005ms: gomaxprocs=4 idleprocs=4 threads=6 spinningthreads=0 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 6008ms: gomaxprocs=4 idleprocs=4 threads=6 spinningthreads=0 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 8014ms: gomaxprocs=4 idleprocs=0 threads=12 spinningthreads=0 idlethreads=6 runqueue=195 [20 53 6 32]
SCHED 10018ms: gomaxprocs=4 idleprocs=0 threads=12 spinningthreads=0 idlethreads=6 runqueue=272 [65 16 5 37]
SCHED 12021ms: gomaxprocs=4 idleprocs=0 threads=13 spinningthreads=0 idlethreads=7 runqueue=218 [97 5 52 7]
SCHED 14028ms: gomaxprocs=4 idleprocs=0 threads=13 spinningthreads=0 idlethreads=7 runqueue=41 [2 1 25 3]
SCHED 16029ms: gomaxprocs=4 idleprocs=0 threads=13 spinningthreads=0 idlethreads=7 runqueue=178 [10 31 45 38]
SCHED 18033ms: gomaxprocs=4 idleprocs=0 threads=13 spinningthreads=0 idlethreads=7 runqueue=144 [15 92 47 0]
SCHED 20034ms: gomaxprocs=4 idleprocs=0 threads=13 spinningthreads=0 idlethreads=7 runqueue=195 [1 7 4 41]
SCHED 22035ms: gomaxprocs=4 idleprocs=0 threads=13 spinningthreads=0 idlethreads=7 runqueue=159 [88 14 41 5]
SCHED 24038ms: gomaxprocs=4 idleprocs=0 threads=13 spinningthreads=0 idlethreads=7 runqueue=231 [47 19 53 41]
SCHED 26046ms: gomaxprocs=4 idleprocs=0 threads=13 spinningthreads=0 idlethreads=7 runqueue=6 [1 0 1 10]
SCHED 28049ms: gomaxprocs=4 idleprocs=0 threads=13 spinningthreads=0 idlethreads=7 runqueue=133 [61 13 97 53]
SCHED 30049ms: gomaxprocs=4 idleprocs=0 threads=13 spinningthreads=0 idlethreads=7 runqueue=220 [13 49 29 28]
SCHED 32058ms: gomaxprocs=4 idleprocs=0 threads=13 spinningthreads=0 idlethreads=7 runqueue=138 [40 93 63 50]
SCHED 34062ms: gomaxprocs=4 idleprocs=0 threads=13 spinningthreads=0 idlethreads=7 runqueue=266 [51 9 38 31]
SCHED 36068ms: gomaxprocs=4 idleprocs=0 threads=13 spinningthreads=0 idlethreads=7 runqueue=189 [1 3 46 14]
SCHED 38084ms: gomaxprocs=4 idleprocs=4 threads=13 spinningthreads=0 idlethreads=10 runqueue=0 [0 0 0 0]
localhost:8080/echolocalhost:8080/sleep
➜ scheduler GODEBUG=schedtrace=1000 ./scheduler > sleep_result 2>&1 &
[1] 8284
➜ scheduler wrk -t12 -c400 -d30s http://localhost:8080/sleep
Running 30s test @ http://localhost:8080/sleep
12 threads and 400 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 1.01s 13.52ms 1.20s 86.57%
Req/Sec 83.06 89.44 320.00 79.12%
11370 requests in 30.10s, 1.26MB read
Requests/sec: 377.71
Transfer/sec: 42.79KB
➜ scheduler cat sleep_result
SCHED 0ms: gomaxprocs=4 idleprocs=1 threads=6 spinningthreads=2 idlethreads=0 runqueue=0 [0 0 0 0]
SCHED 1000ms: gomaxprocs=4 idleprocs=4 threads=6 spinningthreads=0 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 2011ms: gomaxprocs=4 idleprocs=4 threads=6 spinningthreads=0 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 3013ms: gomaxprocs=4 idleprocs=4 threads=282 spinningthreads=0 idlethreads=1 runqueue=0 [0 0 0 0]
SCHED 4020ms: gomaxprocs=4 idleprocs=4 threads=400 spinningthreads=0 idlethreads=1 runqueue=0 [0 0 0 0]
SCHED 5028ms: gomaxprocs=4 idleprocs=4 threads=401 spinningthreads=0 idlethreads=2 runqueue=0 [0 0 0 0]
SCHED 6037ms: gomaxprocs=4 idleprocs=4 threads=401 spinningthreads=0 idlethreads=2 runqueue=0 [0 0 0 0]
SCHED 7038ms: gomaxprocs=4 idleprocs=4 threads=402 spinningthreads=0 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 8039ms: gomaxprocs=4 idleprocs=4 threads=402 spinningthreads=0 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 9046ms: gomaxprocs=4 idleprocs=4 threads=402 spinningthreads=0 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 10049ms: gomaxprocs=4 idleprocs=4 threads=402 spinningthreads=0 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 11056ms: gomaxprocs=4 idleprocs=4 threads=402 spinningthreads=0 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 12058ms: gomaxprocs=4 idleprocs=4 threads=402 spinningthreads=0 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 13058ms: gomaxprocs=4 idleprocs=4 threads=403 spinningthreads=0 idlethreads=4 runqueue=0 [0 0 0 0]
SCHED 14062ms: gomaxprocs=4 idleprocs=4 threads=403 spinningthreads=0 idlethreads=4 runqueue=0 [0 0 0 0]
SCHED 15064ms: gomaxprocs=4 idleprocs=4 threads=403 spinningthreads=0 idlethreads=4 runqueue=0 [0 0 0 0]
SCHED 16066ms: gomaxprocs=4 idleprocs=4 threads=403 spinningthreads=0 idlethreads=4 runqueue=0 [0 0 0 0]
SCHED 17068ms: gomaxprocs=4 idleprocs=4 threads=403 spinningthreads=0 idlethreads=4 runqueue=0 [0 0 0 0]
SCHED 18072ms: gomaxprocs=4 idleprocs=4 threads=403 spinningthreads=0 idlethreads=4 runqueue=0 [0 0 0 0]
SCHED 19083ms: gomaxprocs=4 idleprocs=4 threads=403 spinningthreads=0 idlethreads=4 runqueue=0 [0 0 0 0]
SCHED 20084ms: gomaxprocs=4 idleprocs=4 threads=403 spinningthreads=0 idlethreads=4 runqueue=0 [0 0 0 0]
SCHED 21086ms: gomaxprocs=4 idleprocs=4 threads=403 spinningthreads=0 idlethreads=4 runqueue=0 [0 0 0 0]
SCHED 22088ms: gomaxprocs=4 idleprocs=4 threads=403 spinningthreads=0 idlethreads=4 runqueue=0 [0 0 0 0]
SCHED 23096ms: gomaxprocs=4 idleprocs=4 threads=403 spinningthreads=0 idlethreads=4 runqueue=0 [0 0 0 0]
SCHED 24100ms: gomaxprocs=4 idleprocs=4 threads=403 spinningthreads=0 idlethreads=4 runqueue=0 [0 0 0 0]
SCHED 25100ms: gomaxprocs=4 idleprocs=4 threads=403 spinningthreads=0 idlethreads=4 runqueue=0 [0 0 0 0]
SCHED 26100ms: gomaxprocs=4 idleprocs=4 threads=403 spinningthreads=0 idlethreads=4 runqueue=0 [0 0 0 0]
SCHED 27103ms: gomaxprocs=4 idleprocs=4 threads=403 spinningthreads=0 idlethreads=4 runqueue=0 [0 0 0 0]
SCHED 28110ms: gomaxprocs=4 idleprocs=4 threads=403 spinningthreads=0 idlethreads=4 runqueue=0 [0 0 0 0]
SCHED 33131ms: gomaxprocs=4 idleprocs=4 threads=403 spinningthreads=0 idlethreads=396 runqueue=0 [0 0 0 0]
SCHED 34137ms: gomaxprocs=4 idleprocs=4 threads=403 spinningthreads=0 idlethreads=400 runqueue=0 [0 0 0 0]
SCHED 35140ms: gomaxprocs=4 idleprocs=4 threads=403 spinningthreads=0 idlethreads=400 runqueue=0 [0 0 0 0]
SCHED 36150ms: gomaxprocs=4 idleprocs=4 threads=403 spinningthreads=0 idlethreads=400 runqueue=0 [0 0 0 0]
SCHED 37155ms: gomaxprocs=4 idleprocs=4 threads=403 spinningthreads=0 idlethreads=400 runqueue=0 [0 0 0 0]
localhost:8080/echogdb attachthread apply all bt
...
Thread 152 (Thread 0x7f4744fb1700 (LWP 27863)):
#0 syscall.Syscall () at /usr/local/go/src/syscall/asm_linux_amd64.s:27
#1 0x000000000047151f in syscall.Nanosleep (time=0xc42119ac90,
#2 0x000000000060f042 in main.main.func1 (w=..., r=0xc4218d8900)
#3 0x00000000005e8974 in net/http.HandlerFunc.ServeHTTP (f=
#4 0x00000000005ea020 in net/http.(*ServeMux).ServeHTTP (
#5 0x00000000005eafa4 in net/http.serverHandler.ServeHTTP (sh=..., rw=...,
#6 0x00000000005e7a5d in net/http.(*conn).serve (c=0xc420263360, ctx=...)
#7 0x0000000000458e31 in runtime.goexit ()
#8 0x000000c420263360 in ?? ()
#9 0x00000000007cf100 in crypto/elliptic.p224ZeroModP63 ()
#10 0x000000c421180ec0 in ?? ()
#11 0x0000000000000000 in ?? ()
Thread 151 (Thread 0x7f47457b2700 (LWP 27862)):
#0 syscall.Syscall () at /usr/local/go/src/syscall/asm_linux_amd64.s:27
#1 0x000000000047151f in syscall.Nanosleep (time=0xc4206bcc90,
#2 0x000000000060f042 in main.main.func1 (w=..., r=0xc4218cd300)
#3 0x00000000005e8974 in net/http.HandlerFunc.ServeHTTP (f=
#4 0x00000000005ea020 in net/http.(*ServeMux).ServeHTTP (
#5 0x00000000005eafa4 in net/http.serverHandler.ServeHTTP (sh=..., rw=...,
#6 0x00000000005e7a5d in net/http.(*conn).serve (c=0xc42048afa0, ctx=...)
#7 0x0000000000458e31 in runtime.goexit ()
#8 0x000000c42048afa0 in ?? ()
#9 0x00000000007cf100 in crypto/elliptic.p224ZeroModP63 ()
#10 0x000000c4204fd080 in ?? ()
#11 0x0000000000000000 in ?? ()
...
pstackgdb
/usr/local/go/src/syscall/asm_linux_amd64.s
// src/syscall/asm_linux_amd64.s
TEXT ·Syscall(SB),NOSPLIT,$0-56
CALL runtime·entersyscall(SB)
MOVQ a1+8(FP), DI
MOVQ a2+16(FP), SI
MOVQ a3+24(FP), DX
MOVQ $0, R10
MOVQ $0, R8
MOVQ $0, R9
MOVQ trap+0(FP), AX // syscall entry
SYSCALL
CMPQ AX, $0xfffffffffffff001
JLS ok
MOVQ $-1, r1+32(FP)
MOVQ $0, r2+40(FP)
NEGQ AX
MOVQ AX, err+48(FP)
CALL runtime·exitsyscall(SB)
RET
ok:
MOVQ AX, r1+32(FP)
MOVQ DX, r2+40(FP)
MOVQ $0, err+48(FP)
CALL runtime·exitsyscall(SB)
RET
Syscallruntime.entersyscall_Psyscallretake_Psyscall_Pidle
wakep
当然,Golang 会限制这个线程数目。
// src/runtime/proc.go
func checkmcount() {
// sched lock is held
if sched.mcount > sched.maxmcount {
print("runtime: program exceeds ", sched.maxmcount, "-thread limit\n")
throw("thread exhaustion")
}
}
runtime.exitsyscall
一般情况下,go binary 不会创建特别多的线程,但是上线的代码还是需要做一下压测,了解一下代码的实际情况。
一旦真的创建大量的线程了,Golang 目前的版本是不会回收这些空闲的线程。
不过好在 Go10/Go11 会改进这一缺点,详情请查看 issues/14592。
7. 总结
本文粗粒度地介绍了 Golang Goroutine Scheduler 的工作流程,并没有涉及到垃圾回收,Netpoll 以及 Channel Send/Receive 对调度的影响,希望能让读者有个大体的认识。
runtime.mstart