实验100000个goroutines和cpu个数goroutines、单线程进行对比
package main
import (
"fmt"
"runtime"
"sync"
"sync/atomic"
"time"
)
func addConcurrent(num int) {
var c int32
atomic.StoreInt32(&c, 0)
start := time.Now()
for i := 0; i < num; i++ {
go atomic.AddInt32(&c, 1)
}
for {
if c == int32(num) {
fmt.Println(time.Since(start))
break
}
}
}
func addCPUNum(num int) {
var c int32
wg := &sync.WaitGroup{}
core := runtime.NumCPU()
start := time.Now()
wg.Add(core)
for i := 0; i < core; i++ {
go func(wg *sync.WaitGroup) {
for j := 0; j < num/core; j++ {
atomic.AddInt32(&c, 1)
}
wg.Done()
}(wg)
}
wg.Wait()
fmt.Println(time.Since(start))
}
func addOneThread(num int) {
var c int32
start := time.Now()
for i := 0; i < num; i++ {
atomic.AddInt32(&c, 1)
}
fmt.Println(time.Since(start))
}
func main() {
num := 100000
addConcurrent(num)
addCPUNum(num)
addOneThread(num)
}
运行结果:
GOROOT=/usr/local/opt/go/libexec #gosetup
GOPATH=/Users/mar/go #gosetup
/usr/local/opt/go/libexec/bin/go build -o /private/var/folders/4b/65x09q517lj_6byhlcjbk4_m0000gn/T/GoLand/___go_build_main_go /Users/mar/Work/go/demo/main.go #gosetup
/private/var/folders/4b/65x09q517lj_6byhlcjbk4_m0000gn/T/GoLand/___go_build_main_go
32.04965ms
2.13698ms
616.344µs
1.100000个goroutines时间过长,主要原因是因为线程上下文切换有延迟代价。显然100000个goroutines处理这种cpu-bound的工作很不利。io-bound处理可以在io wait的时候去切换别的线程做其他事情,但是对于cpu-bound,它会一直处理work,线程切换会损害性能。
2.cpu数量goroutines时间过长,主要原因是false sharing(cache伪共享),每个core都会去共享变量c的相同cache行,频繁操作c会导致内存抖动(cache和主存直接的换页操作),在golang程序中需要避免因为cache伪共享导致的内存抖动,尽量避免多个线程去频繁操作一个相同变量或者是地址相邻变量。