直接说,非常遗憾的是,没有类似pstack/jstack的工具可以使用。
目前只有两者可行常用的方案,但是都有缺陷:
- 直接给进程发送:SIGQUIT或者SIGABRT信号
缺点是这会导致待访问进程退出。 - 修改程序代码,增加信号处理机制
例如注册SIGUSR1信号处理函数,每回当接到该信号时,打印所有goroutine栈。缺点是需要修改用户代码。 - 补充一下,这是最近刚了解的,工具
这是go语言官方推荐的golang调试工具,用来代替gdb,因为:"Delve能够更好地理解golang"。当然这个工具可以很好的打印出所有的goroutine栈,满足我们的需求。
参考资料:https://golang.org/doc/gdb
下面介绍这两种方法:
方法1:使用SIGQUIT或者SIGABRT信号
使用如下测试代码:
package main
import (
"fmt"
"time"
"os"
"os/signal"
"runtime"
"syscall"
)
func myroutine(routineid string) {
fmt.Println("Entry routine:", routineid)
time.Sleep(1000 * time.Second)
fmt.Println("Exit routine:", routineid)
}
func main() {
fmt.Println("Entry routine: main")
// Launch goroutine 1
go myroutine("myroutine1")
// Launch goroutine 2
go func(routineid string) {
fmt.Println("Entry routine:", routineid)
time.Sleep(1000 * time.Second)
fmt.Println("Entry routine:", routineid)
}("myroutine2")
// wait main routine
fmt.Scanln()
fmt.Println("Exit routine: main")
}
运行进程:
$ go build main.go
$ ./main
Entry routine: main
Entry routine: myroutine1
Entry routine: myroutine2
在另一个终端发送信号SIGQUIT :
$ kill -SIGQUIT <pid>
回去看前一个终端的输出:
$ ./main
Entry routine: main
Entry routine: myroutine1
Entry routine: myroutine2
SIGQUIT: quit
PC=0x4641de m=0 sigcode=0
goroutine 1 [syscall]:
syscall.Syscall(0x0, 0x0, 0xc42006e170, 0x1, 0xc420000180, 0xc420086080, 0x0)
/usr/local/go/src/syscall/asm_linux_amd64.s:18 +0x5 fp=0xc420049a18 sp=0xc420049a10 pc=0x4641b5
syscall.read(0x0, 0xc42006e170, 0x1, 0x4, 0xc420086001, 0x0, 0x0)
/usr/local/go/src/syscall/zsyscall_linux_amd64.go:749 +0x5f fp=0xc420049a78 sp=0xc420049a18 pc=0x463c9f
syscall.Read(0x0, 0xc42006e170, 0x1, 0x4, 0x0, 0x0, 0x0)
/usr/local/go/src/syscall/syscall_unix.go:162 +0x49 fp=0xc420049ac0 sp=0xc420049a78 pc=0x4637b9
internal/poll.(*FD).Read(0xc42007a000, 0xc42006e170, 0x1, 0x4, 0x0, 0x0, 0x0)
/usr/local/go/src/internal/poll/fd_unix.go:153 +0x118 fp=0xc420049b10 sp=0xc420049ac0 pc=0x465918
os.(*File).read(0xc420078000, 0xc42006e170, 0x1, 0x4, 0xc420049c18, 0x40d639, 0x7ff1982c26c8)
/usr/local/go/src/os/file_unix.go:226 +0x4e fp=0xc420049b58 sp=0xc420049b10 pc=0x466d0e
os.(*File).Read(0xc420078000, 0xc42006e170, 0x1, 0x4, 0x7ff1982c26c8, 0xc420049c50, 0x446fad)
/usr/local/go/src/os/file.go:107 +0x6a fp=0xc420049bc8 sp=0xc420049b58 pc=0x4660fa
io.ReadAtLeast(0x4d09a0, 0xc420078000, 0xc42006e170, 0x1, 0x4, 0x1, 0x447293, 0x4b82c0, 0x16460)
/usr/local/go/src/io/io.go:309 +0x86 fp=0xc420049c28 sp=0xc420049bc8 pc=0x462d06
io.ReadFull(0x4d09a0, 0xc420078000, 0xc42006e170, 0x1, 0x4, 0x0, 0x21, 0x2)
/usr/local/go/src/io/io.go:327 +0x58 fp=0xc420049c80 sp=0xc420049c28 pc=0x462e78
fmt.(*readRune).readByte(0xc42006e150, 0x2, 0x40b1ad, 0x5389e0)
/usr/local/go/src/fmt/scan.go:321 +0x65 fp=0xc420049cd0 sp=0xc420049c80 pc=0x489095
fmt.(*readRune).ReadRune(0xc42006e150, 0x28, 0xc42006e150, 0x0, 0xc420000180)
/usr/local/go/src/fmt/scan.go:337 +0xb4 fp=0xc420049d40 sp=0xc420049cd0 pc=0x4891e4
fmt.(*ss).ReadRune(0xc42008e000, 0xc420049dd0, 0xc420049dd8, 0x424cb9, 0x8)
/usr/local/go/src/fmt/scan.go:189 +0x7f fp=0xc420049d98 sp=0xc420049d40 pc=0x488a4f
fmt.(*ss).getRune(0xc42008e000, 0x4c5908)
/usr/local/go/src/fmt/scan.go:211 +0x2f fp=0xc420049de8 sp=0xc420049d98 pc=0x488b9f
fmt.(*ss).doScan(0xc42008e000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)
/usr/local/go/src/fmt/scan.go:1046 +0xdb fp=0xc420049e28 sp=0xc420049de8 pc=0x48d22b
fmt.Fscanln(0x4d09a0, 0xc420078000, 0x0, 0x0, 0x0, 0xc420049f38, 0x10, 0x48db1b)
/usr/local/go/src/fmt/scan.go:132 +0xcb fp=0xc420049ed8 sp=0xc420049e28 pc=0x48889b
fmt.Scanln(0x0, 0x0, 0x0, 0xa, 0x0, 0x0)
/usr/local/go/src/fmt/scan.go:70 +0x57 fp=0xc420049f28 sp=0xc420049ed8 pc=0x488797
main.main()
/home/<username>/test/go/src/test/main.go:28 +0xd0 fp=0xc420049f88 sp=0xc420049f28 pc=0x48db30
runtime.main()
/usr/local/go/src/runtime/proc.go:198 +0x212 fp=0xc420049fe0 sp=0xc420049f88 pc=0x427a12
runtime.goexit()
/usr/local/go/src/runtime/asm_amd64.s:2361 +0x1 fp=0xc420049fe8 sp=0xc420049fe0 pc=0x44ee41
goroutine 18 [sleep]:
time.Sleep(0xe8d4a51000)
/usr/local/go/src/runtime/time.go:102 +0x166
main.myroutine(0x4bf425, 0xa)
/home/<username>/test/go/src/test/main.go:10 +0xd1
created by main.main
/home/<username>/test/go/src/test/main.go:18 +0x8e
goroutine 19 [sleep]:
time.Sleep(0xe8d4a51000)
/usr/local/go/src/runtime/time.go:102 +0x166
main.main.func1(0x4bf42f, 0xa)
/home/<username>/test/go/src/test/main.go:23 +0xd1
created by main.main
/home/<username>/test/go/src/test/main.go:21 +0xbb
rax 0x0
rbx 0x0
rcx 0x4641e0
rdx 0x1
rdi 0x0
rsi 0xc42006e170
rbp 0xc420049a68
rsp 0xc420049a10
r8 0x0
r9 0x0
r10 0x0
r11 0x202
r12 0x0
r13 0xf3
r14 0x33
r15 0x80
rip 0x4641de
rflags 0x202
cs 0x33
fs 0x0
gs 0x0
$
从屏幕输出我们看到三个goroutine的stack全部被打出来了(好像goroutine号不连续呢),也包括当前寄存器的值。
只是遗憾的是,当前进程退出了。
方法2:修改用户代码,使用自定义的信号(例如SIGUSR1)处理
代码例子:
package main
import (
"fmt"
"time"
"os"
"os/signal"
"runtime"
"syscall"
)
func myroutine(routineid string) {
fmt.Println("Entry routine:", routineid)
time.Sleep(1000 * time.Second)
fmt.Println("Exit routine:", routineid)
}
func main() {
fmt.Println("Entry routine: main")
// Register signal handler
setupSignalHandler()
// Launch goroutine 1
go myroutine("myroutine1")
// Launch goroutine 2
go func(routineid string) {
fmt.Println("Entry routine:", routineid)
time.Sleep(1000 * time.Second)
fmt.Println("Entry routine:", routineid)
}("myroutine2")
// wait main routine
fmt.Scanln()
fmt.Println("Exit routine: main")
}
func setupSignalHandler() {
c := make(chan os.Signal, 1)
signal.Notify(c, syscall.SIGUSR1)
go func() {
for range c {
dumpStacks()
}
}()
}
func dumpStacks() {
buf := make([]byte, 1024)
for {
n := runtime.Stack(buf, true)
if n < len(buf) {
buf = buf[:n]
break;
}
buf = make([]byte, 2*len(buf))
}
fmt.Printf("=== BEGIN goroutine stack dump ===n")
fmt.Printf("%s", buf)
fmt.Printf("=== END goroutine stack dump ===n")
}
kill -SIGUSR1
不知道golang为什么不提供这样的工具,难道他们认为需要打所有goroutine的时候都是进程快挂了吗,这时候选择让进程退出是必然的选择吗?
Linux上常用的工具例如pstack对于go进程根本不管用,pstack能打印出操作系统级别的thread信息,但是goroutine并没有和thread有对应关系,特别是GOMAXPROCS=1时,pstack只打印出一个线程的stack信息,没有任何goroutine的stack信息。