前言:

最近遇到一个很奇怪的问题,在net/http里打印http.Request的context会出现panic。具体的错误error, fatal error: concurrent map read and map write 。
我们知道golang的map不是线程安全的,多个协程不加锁对map并发读写会触发data race,然后panic异常退出。

感谢

另外,感谢曹大佬一语惊醒梦中人,不然不会短时间内定位问题。 哈哈 曹大佬的信息,https://github.com/cch123

分析问题

下面是出现net/http context panic的问题代码,代码的逻辑很简单,就是定义一个api,然后打印context而已。把服务运行起来后,我们可以用ab, wrk来进行压测,来制造data race竞争的场景。

// xiaorui.cc

package main

import (
	"fmt"
	"net/http"
)

func panic(w http.ResponseWriter, r *http.Request) {
	fmt.Printf("%+v", r.Context())
}

func main() {
	http.HandleFunc("/", panic)
	err := http.ListenAndServe(":9090", nil)
	if err != nil {
		fmt.Println(err)
	}
}

下面是wrk压测时,net/http服务的异常信息。

// xiaorui.cc

fatal error: concurrent map read and map write

context.Background.WithValue(&http.contextKey{name:"http-server"}, &http.Server{Addr:":9090", Handler:http.Handler(nil), TLSConfig:(*tls.Config)(0xc000062780), ReadTimeout:0, ReadHeaderTimeout:0, WriteTimeout:0, IdleTimeout:0, MaxHeaderBytes:0, TLSNextProto:map[string]func(*http.Server, *tls.Conn, http.Handler){"h2":(func(*http.Server, *tls.Conn, http.Handler))(0x120b620)}, ConnState:(func(net.Conn, http.ConnState))(nil), ErrorLog:(*log.Logger)(nil), disableKeepAlives:0, inShutdown:0, nextProtoOnce:sync.Once{m:sync.Mutex{state:0, sema:0x0}, done:0x1}, nextProtoErr:error(nil), mu:sync.Mutex{state:0, sema:0x0}, listeners:map[*net.Listener]struct {}{(*net.Listener)(0xc00007ccb0):struct {}{}}, activeConn:map[*http.conn]struct {}{(*http.conn)(0xc00009cb40):struct {}{}, (*http.conn)(0xc00009d2c0):struct {}{}, (*http.conn)(0xc00009d540):struct {}{}, (*http.conn)(0xc00009dcc0):struct {}{}, (*http.conn)(0xc00009cd20):struct {}{}, (*http.conn)(0xc00009d5e0):struct {}...
xiaorui.cc
goroutine 32 [running]:
runtime.throw(0x12b35e3, 0x21)
        /usr/local/go/src/runtime/panic.go:608 +0x72 fp=0xc00018a858 sp=0xc00018a828 pc=0x102b892
runtime.mapaccess2(0x1255800, 0xc000080ea0, 0xc00018a978, 0x14bf328, 0xc0000ac868)
        /usr/local/go/src/runtime/map.go:453 +0x223 fp=0xc00018a8a0 sp=0xc00018a858 pc=0x100ed93
reflect.mapaccess(0x1255800, 0xc000080ea0, 0xc00018a978, 0x12afece)
        /usr/local/go/src/runtime/map.go:1249 +0x3f fp=0xc00018a8d8 sp=0xc00018a8a0 pc=0x1010adf
reflect.Value.MapIndex(0x1255800, 0xc000083280, 0x1b5, 0x1288180, 0xc00009da40, 0x36, 0x1257ae0, 0x14bf328, 0xb9)
        /usr/local/go/src/reflect/value.go:1111 +0x11d fp=0xc00018a958 sp=0xc00018a8d8 pc=0x1099b7d
fmt.(*pp).printValue(0xc000188180, 0x1255800, 0xc000083280, 0x1b5, 0x76, 0x2)
        /usr/local/go/src/fmt/print.go:757 +0xf43 fp=0xc00018ab38 sp=0xc00018a958 pc=0x10aa853
fmt.(*pp).printValue(0xc000188180, 0x12a0720, 0xc0000831e0, 0x199, 0xc000000076, 0x1)
        /usr/local/go/src/fmt/print.go:783 +0x1ce9 fp=0xc00018ad18 sp=0xc00018ab38 pc=0x10ab5f9
fmt.(*pp).printValue(0xc000188180, 0x129ed00, 0xc0000831e0, 0x16, 0x76, 0x0)
        /usr/local/go/src/fmt/print.go:853 +0x1b2c fp=0xc00018aef8 sp=0xc00018ad18 pc=0x10ab43c
fmt.(*pp).printArg(0xc000188180, 0x129ed00, 0xc0000831e0, 0x76)
        /usr/local/go/src/fmt/print.go:689 +0x2b7 fp=0xc00018af90 sp=0xc00018aef8 pc=0x10a91a7
fmt.(*pp).doPrintf(0xc000188180, 0x12afaf0, 0x16, 0xc00018b108, 0x3, 0x3)
        /usr/local/go/src/fmt/print.go:1003 +0x166 fp=0xc00018b078 sp=0xc00018af90 pc=0x10acde6
fmt.Sprintf(0x12afaf0, 0x16, 0xc00018b108, 0x3, 0x3, 0x0, 0x0)
        /usr/local/go/src/fmt/print.go:203 +0x66 fp=0xc00018b0d0 sp=0xc00018b078 pc=0x10a5b26
context.(*valueCtx).String(0xc000080e10, 0x12be400, 0xc0001880c0)
        /usr/local/go/src/context/context.go:486 +0xab fp=0xc00018b148 sp=0xc00018b0d0 pc=0x11121bb
fmt.(*pp).handleMethods(0xc0001880c0, 0x76, 0x1)
        /usr/local/go/src/fmt/print.go:603 +0x27c fp=0xc00018b1d8 sp=0xc00018b148 pc=0x10a8cac
fmt.(*pp).printArg(0xc0001880c0, 0x1274ce0, 0xc000080e10, 0x76)
        /usr/local/go/src/fmt/print.go:686 +0x203 fp=0xc00018b270 sp=0xc00018b1d8 pc=0x10a90f3
fmt.(*pp).doPrintf(0xc0001880c0, 0x12afaf0, 0x16, 0xc00018b3e8, 0x3, 0x3)
...
fmt.(*pp).printArg(0xc000188000, 0x1274ce0, 0xc000114480, 0xc000000076)
        /usr/local/go/src/fmt/print.go:1003 +0x166 fp=0xc00018b638 sp=0xc00018b550 pc=0x10acde6
fmt.Sprintf(0x12ad1be, 0xd, 0xc0001106c8, 0x1, 0x1, 0xc00011e080, 0xc0001106f8)
        /usr/local/go/src/fmt/print.go:203 +0x66 fp=0xc00018b690 sp=0xc00018b638 pc=0x10a5b26
context.(*cancelCtx).String(0xc000116640, 0x12be400, 0xc000188300)
        /usr/local/go/src/context/context.go:343 +0x7d fp=0xc00018b6e8 sp=0xc00018b690 pc=0x111158d
fmt.(*pp).handleMethods(0xc000188300, 0xc000000076, 0x1034601)
        /usr/local/go/src/fmt/print.go:603 +0x27c fp=0xc00018b778 sp=0xc00018b6e8 pc=0x10a8cac
fmt.(*pp).printArg(0xc000188300, 0x12785e0, 0xc000116640, 0xc000000076)
        /usr/local/go/src/fmt/print.go:686 +0x203 fp=0xc00018b810 sp=0xc00018b778 pc=0x10a90f3
fmt.(*pp).doPrintf(0xc000188300, 0x12ad1be, 0xd, 0xc00018b988, 0x1, 0x1)
        /usr/local/go/src/fmt/print.go:1003 +0x166 fp=0xc00018b8f8 sp=0xc00018b810 pc=0x10acde6
fmt.Sprintf(0x12ad1be, 0xd, 0xc000110988, 0x1, 0x1, 0xc00011e030, 0xc0001109b8)
        /usr/local/go/src/fmt/print.go:203 +0x66 fp=0xc00018b950 sp=0xc00018b8f8 pc=0x10a5b26
context.(*cancelCtx).String(0xc000116740, 0x12be400, 0xc000188240)
        /usr/local/go/src/context/context.go:343 +0x7d fp=0xc00018b9a8 sp=0xc00018b950 pc=0x111158d
fmt.(*pp).handleMethods(0xc000188240, 0x76, 0xc000080c01)
        /usr/local/go/src/fmt/print.go:603 +0x27c fp=0xc00018ba38 sp=0xc00018b9a8 pc=0x10a8cac
fmt.(*pp).printArg(0xc000188240, 0x12785e0, 0xc000116740, 0x76)
        /usr/local/go/src/fmt/print.go:686 +0x203 fp=0xc00018bad0 sp=0xc00018ba38 pc=0x10a90f3
fmt.(*pp).doPrintf(0xc000188240, 0x12ab4df, 0x3, 0xc00018bcc0, 0x1, 0x1)
 ...
fmt.Printf(0x12ab4df, 0x3, 0xc000110cc0, 0x1, 0x1, 0xc0000fa780, 0x3, 0xc000022a70)
        /usr/local/go/src/fmt/print.go:197 +0x72 fp=0xc00018bc80 sp=0xc00018bc20 pc=0x10a5a82
main.panic(0x12f10c0, 0xc0001821c0, 0xc000120600)
        /Users/ruifengyun/test/k.go:9 +0x89 fp=0xc00018bce0 sp=0xc00018bc80 pc=0x121bb79
net/http.HandlerFunc.ServeHTTP(0x12be4e0, 0x12f10c0, 0xc0001821c0, 0xc000120600)
        /usr/local/go/src/net/http/server.go:1964 +0x44 fp=0xc00018bd08 sp=0xc00018bce0 pc=0x11f1b14
net/http.(*ServeMux).ServeHTTP(0x14a17a0, 0x12f10c0, 0xc0001821c0, 0xc000120600)
        /usr/local/go/src/net/http/server.go:2361 +0x127 fp=0xc00018bd48 sp=0xc00018bd08 pc=0x11f37c7
net/http.serverHandler.ServeHTTP(0xc0000831e0, 0x12f10c0, 0xc0001821c0, 0xc000120600)
        /usr/local/go/src/net/http/server.go:2741 +0xab fp=0xc00018bd78 sp=0xc00018bd48 pc=0x11f427b
net/http.(*conn).serve(0xc00009d180, 0x12f12c0, 0xc000116640)
        /usr/local/go/src/net/http/server.go:1847 +0x646 fp=0xc00018bfc8 sp=0xc00018bd78 pc=0x11f0d66
runtime.goexit()
        /usr/local/go/src/runtime/asm_amd64.s:1333 +0x1 fp=0xc00018bfd0 sp=0xc00018bfc8 pc=0x1057f81
created by net/http.(*Server).Serve
        /usr/local/go/src/net/http/server.go:2851 +0x2f5

通过panic出来的协程调用栈信息可以分析出,fmt print会不断的递归反射及遍历解析context里的数据。 通过上面的panic信息我们可以得知根本问题是由于map的并发读写造成的,这也就说 context 内部是有map的。
我们在正常情况下打印http.Request context,可以看到两个map,一个是listeners,一个是activeConn。另外在activeConn这个结构里还能看到很多的conn。说明这个context不单单是含有这个请求本身需要的上下文信息了,而且还包含了该server对象。

// xiaorui.cc

context.Background.WithValue(&http.contextKey{name:"http-server"}, &http.Server{Addr:":9090", Handler:http.Handler(nil), TLSConfig:(*tls.Config)(0xc000062780), ReadTimeout:0, ReadHeaderTimeout:0, WriteTimeout:0, IdleTimeout:0, MaxHeaderBytes:0, TLSNextProto:map[string]func(*http.Server, *tls.Conn, http.Handler){"h2":(func(*http.Server, *tls.Conn, http.Handler))(0x120b620)}, ConnState:(func(net.Conn, http.ConnState))(nil), ErrorLog:(*log.Logger)(nil), disableKeepAlives:0, inShutdown:0, nextProtoOnce:sync.Once{m:sync.Mutex{state:0, sema:0x0}, done:0x1}, nextProtoErr:error(nil), mu:sync.Mutex{state:0, sema:0x0}, listeners:map[*net.Listener]struct {}{(*net.Listener)(0xc00007ccb0):struct {}{}}, activeConn:map[*http.conn]struct {}{(*http.conn)(0xc00009cb40):struct {}{}, (*http.conn)(0xc00009d2c0):struct {}{}, (*http.conn)(0xc00009d540):struct {}{}, (*http.conn)(0xc00009dcc0):struct {}{}, (*http.conn)(0xc00009cd20):struct {}{}, (*http.conn)(0xc00009d5e0):struct {}{}, (*http.conn)(0xc00009d860):struct {}{}, (*http.conn)(0xc00009d9a0):struct {}{}, (*http.conn)(0xc0001940a0):struct {}{}, (*http.conn)(0xc00009cfa0):struct {}{}, (*http.conn)(0xc00009d400):struct {}{}, (*http.conn)(0xc00009dd60):struct {}{}, (*http.conn)(0xc000194140):struct {}{}, (*http.conn)(0xc0001941e0):struct {}{}, (*http.conn)(0xc00009caa0):struct {}{}, (*http.conn)(0xc00009d180):struct {}{}, (*http.conn)(0xc00009d220):struct {}{}, (*http.conn)(0xc00009d680):struct {}{}, (*http.conn)(0xc00009d900):struct {}{}, (*http.conn)(0xc00009cc80):struct {}{}, (*http.conn)(0xc00009ce60):struct {}{}, (*http.conn)(0xc00009d040):struct {}{}, (*http.conn)(0xc00009dc20):struct {}{}, (*http.conn)(0xc00009dea0):struct {}{}, (*http.conn)(0xc00009ca00):struct {}{}, (*http.conn)(0xc00009cdc0):struct {}{}, (*http.conn)(0xc00009d0e0):struct {}{}, (*http.conn)(0xc00009d360):struct {}{}, (*http.conn)(0xc00009da40):struct {}{}, (*http.conn)(0xc00009dae0):struct {}{}, (*http.conn)(0xc00009cbe0):struct {}{}, (*http.conn)(0xc00009d4a0):struct {}{}, (*http.conn)(0x

我们再来分析下 net/http的代码里对activeConn map的修改逻辑。不管是初始化,新增,删除都有加锁。但是他的锁的范围只是 net/http server的锁。fmt.Printf里对server对象的activeConn map遍历打印自然不受影响。
那么,自然就会有造成 map 的panic。

// xiaorui.cc

// 创建先的活动连接
func (s *Server) trackConn(c *conn, add bool) {
	s.mu.Lock()
	defer s.mu.Unlock()
	if s.activeConn == nil {
		s.activeConn = make(map[*conn]struct{})
	}
	if add {
		s.activeConn[c] = struct{}{}
	} else {
		delete(s.activeConn, c)
	}
}

// 关闭空闲连接
func (s *Server) closeIdleConns() bool {
	s.mu.Lock()
	defer s.mu.Unlock()
	for c := range s.activeConn {
		st, unixSec := c.getState()
                ...
		delete(s.activeConn, c)
	}
	return quiescent
}

这里还有个问题,这个server是从哪里传给每个请求的handler的。

// xiaorui.cc

func (srv *Server) Serve(l net.Listener) error {
	baseCtx := context.Background()
        ...

        // 把他自身通过withValue生成一个ctx,并传递下去
	ctx := context.WithValue(baseCtx, ServerContextKey, srv)
	for {
		rw, e := l.Accept()
        ...
		go c.serve(ctx)
	}
}

// Serve a new connection.
func (c *conn) serve(ctx context.Context) {
	c.remoteAddr = c.rwc.RemoteAddr().String()
	ctx = context.WithValue(ctx, LocalAddrContextKey, c.rwc.LocalAddr())
    ...
	for {
		w, err := c.readRequest(ctx)
		if c.r.remain != c.server.initialReadLimitSize() {
			// If we read any bytes off the wire, we're active.
			c.setState(c.rwc, StateActive)
		}
        ...

		req := w.req
		serverHandler{c.server}.ServeHTTP(w, w.req)
        }
    }
}

如何解决? 或者说安全打印

别直接把context都输出打印就可以了。😅

总结:

出问题,就要看源码。话说对外总是吹嘘net/http代码看过好几遍,但出问题还是要回炉分析代码,说明net/http的源码看的还是不精呀。 😅😅😅