slice 预先分配内存

slice
  1. func appendOne(num int) []int {

  2. var res []int

  3. for i := 0; i < num; i++ {

  4. res = append(res, i)

  5. }

  6. return res

  7. }


  8. func appendMany(num int) []int {

  9. res := make([]int, 0, num)

  10. for i := 0; i < num; i++ {

  11. res = append(res, i)

  12. }

  13. return res

  14. }

appendOneappendMany
  1. func BenchmarkAppendOne(b *testing.B) {

  2. num := 10000

  3. for i := 0; i < b.N; i++ {

  4. _ = appendOne(num)

  5. }

  6. }


  7. func BenchmarkAppendMany(b *testing.B) {

  8. num := 10000

  9. for i := 0; i < b.N; i++ {

  10. _ = appendMany(num)

  11. }

  12. }

运行测试

  1. $ go test -bench=. -benchmem

  2. goos: darwin

  3. goarch: amd64

  4. pkg: com.learn/gormLearn/gc_gc

  5. BenchmarkAppendOne-4 23163 50675 ns/op 386296 B/op 20 allocs/op

  6. BenchmarkAppendMany-4 96781 12241 ns/op 81920 B/op 1 allocs/op

  7. PASS

AppendMany81920B12241nsAppendOneslice
map
  1. func makeMap(num int){

  2. m := make(map[int]int,num)

  3. for i:=0;i<len(num);i++{

  4. m[i]=i

  5. }

  6. }

这可以减少内存拷贝的开销,也可以减少rehash开销。

map中保存值,而不是指针,使用分段map

看下面的列子,map分别保存指针,值。

  1. func timeGC() time.Duration {

  2. start := time.Now()

  3. runtime.GC()

  4. return time.Since(start)

  5. }


  6. func mapPointer(num int) {

  7. m := make(map[int]*int, num)

  8. for i := 0; i < num; i++ {

  9. m[i] = &i

  10. }

  11. runtime.GC()

  12. fmt.Printf("With %T, GC took %s\n", m, timeGC())

  13. _ = m[0]

  14. }


  15. func mapValue(num int) {

  16. m := make(map[int]int, num)

  17. for i := 0; i < num; i++ {

  18. m[i] = i

  19. }

  20. runtime.GC()

  21. fmt.Printf("With %T, GC took %s\n", m, timeGC())

  22. _ = m[0]

  23. }


  24. func mapPointerShard(num int) {

  25. shards := make([]map[int]*int, 100)

  26. for i := range shards {

  27. shards[i] = make(map[int]*int)

  28. }

  29. for i := 0; i < num; i++ {

  30. shards[i%100][i] = &i

  31. }

  32. runtime.GC()

  33. fmt.Printf("With map shards (%T), GC took %s\n", shards, timeGC())

  34. _ = shards[0][0]

  35. }


  36. func mapValueShard(num int) {

  37. shards := make([]map[int]int, 100)

  38. for i := range shards {

  39. shards[i] = make(map[int]int)

  40. }

  41. for i := 0; i < num; i++ {

  42. shards[i%100][i] = i

  43. }

  44. runtime.GC()

  45. fmt.Printf("With map shards (%T), GC took %s\n", shards, timeGC())

  46. _ = shards[0][0]

  47. }


  48. const N = 5e7 // 5000w


  49. func BenchmarkMapPointer(b *testing.B) {

  50. mapPointer(N)

  51. }


  52. func BenchmarkMapValue(b *testing.B) {

  53. mapValue(N)

  54. }


  55. func BenchmarkMapPointerShard(b *testing.B) {

  56. mapPointerShard(N)

  57. }


  58. func BenchmarkMapValueShard(b *testing.B) {

  59. mapValueShard(N)

  60. }

运行

  1. $ go test -bench=^BenchmarkMapPointer$ -benchmem

  2. With map[int]*int, GC took 545.139836ms

  3. goos: darwin

  4. goarch: amd64

  5. pkg: com.learn/gormLearn/gc_gc

  6. BenchmarkMapPointer-4 1 9532798100 ns/op 1387850488 B/op 724960 allocs/op


  7. $ go test -bench=^BenchmarkMapPointerShard$ -benchmem

  8. With map shards ([]map[int]*int), GC took 688.39764ms

  9. goos: darwin

  10. goarch: amd64

  11. pkg: com.learn/gormLearn/gc_gc

  12. BenchmarkMapPointerShard-4 1 20670458639 ns/op 4286763416 B/op 1901279 allocs/op


  13. $ go test -bench=^BenchmarkMapValueShard$ -benchmem

  14. With map shards ([]map[int]int), GC took 1.965519ms

  15. goos: darwin

  16. goarch: amd64

  17. pkg: com.learn/gormLearn/gc_gc

  18. BenchmarkMapValueShard-4 1 16190847776 ns/op 4385268936 B/op 1918445 allocs/op


  19. $ go test -bench=^BenchmarkMapValue$ -benchmem

  20. With map[int]int, GC took 22.993926ms

  21. goos: darwin

  22. goarch: amd64

  23. pkg: com.learn/gormLearn/gc_gc

  24. BenchmarkMapValue-4 1 8253025035 ns/op 1444338752 B/op 724512 allocs/op

GODEBUG=gctrace=1
  1. $ GODEBUG=gctrace=1 go test -bench=^BenchmarkMapPointer$ -benchmem

  2. ...

  3. gc 3 @0.130s 19%: 0.006+424+0.013 ms clock, 0.027+0.18/424/848+0.055 ms cpu, 1224->1224->1224 MB, 1225 MB goal, 4 P

  4. gc 4 @9.410s 2%: 0.005+543+0.002 ms clock, 0.022+0/543/1628+0.011 ms cpu, 1325->1325->1323 MB, 2448 MB goal, 4 P (forced)

  5. gc 5 @9.957s 3%: 0.003+547+0.003 ms clock, 0.013+0/547/1631+0.013 ms cpu, 1323->1323->1323 MB, 2647 MB goal, 4 P (forced)

  6. With map[int]*int, GC took 550.40821ms

gctrace0.013+0/547/1631+0.013ms cpu
0.0130/547/1631mutator assist54716310.013
  1. $ GODEBUG=gctrace=1 go test -bench=^BenchmarkMapValue$ -benchmem

  2. ...

  3. gc 3 @0.018s 0%: 0.005+0.14+0.015 ms clock, 0.021+0.054/0.020/0.19+0.060 ms cpu, 1224->1224->1224 MB, 1225 MB goal, 4 P

  4. gc 4 @8.334s 0%: 0.006+21+0.003 ms clock, 0.027+0/6.4/21+0.013 ms cpu, 1379->1379->1334 MB, 2448 MB goal, 4 P (forced)

  5. gc 5 @8.358s 0%: 0.003+19+0.003 ms clock, 0.014+0/5.0/20+0.015 ms cpu, 1334->1334->1334 MB, 2668 MB goal, 4 P (forced)

map

string与[]byte的转换

stringstring[]byte
  1. func Example() {

  2. s := "Hello,world"

  3. b := []byte(s)

  4. }

string[]byte
  1. func String2Bytes(s string) []byte {

  2. stringHeader := (*reflect.StringHeader)(unsafe.Pointer(&s))

  3. bh := reflect.SliceHeader{

  4. Data: stringHeader.Data,

  5. Len: stringHeader.Len,

  6. Cap: stringHeader.Len,

  7. }

  8. return *(*[]byte)(unsafe.Pointer(&bh))

  9. }


  10. func Bytes2String(b []byte) string {

  11. sliceHeader

  12. sh := reflect.StringHeader{

  13. Data: sliceHeader.Data,

  14. Len: sliceHeader.Len,

  15. }

  16. return *(*string)(unsafe.Pointer(&sh))

  17. }

函数返回值使用值,不使用指针

对占用空间少,频繁分配的函数,如果函数返回指针,会带来内存逃逸,使得原来可以分配在堆(heap)上的内存,需要分配在栈(stack)上。在栈上进行小对象拷贝的性能很好,比分配对象在堆上要好得多。看下面的例子,2个函数分别返回值,和指针。

  1. type S struct {

  2. a, b, c int64

  3. d, e, f string

  4. g, h, i float64

  5. }


  6. func byCopy() S {

  7. return S{

  8. a: 1, b: 1, c: 1,

  9. e: "lyp", f: "lyp",

  10. g: 1.0, h: 1.0, i: 1.0,

  11. }

  12. }


  13. func byPointer() *S {

  14. return &S{

  15. a: 1, b: 1, c: 1,

  16. e: "lyp", f: "lyp",

  17. g: 1.0, h: 1.0, i: 1.0,

  18. }

  19. }

benchmark函数

  1. func BenchmarkMemoryStack(b *testing.B) {

  2. var s S


  3. f, err := os.Create("stack.out")

  4. if err != nil {

  5. panic(err)

  6. }

  7. defer f.Close()


  8. err = trace.Start(f)

  9. if err != nil {

  10. panic(err)

  11. }


  12. for i := 0; i < b.N; i++ {

  13. s = byCopy()

  14. }


  15. trace.Stop()


  16. b.StopTimer()

  17. _ = fmt.Sprintf("%v", s.a)

  18. }


  19. func BenchmarkMemoryHeap(b *testing.B) {

  20. var s *S


  21. f, err := os.Create("heap.out")

  22. if err != nil {

  23. panic(err)

  24. }

  25. defer f.Close()


  26. err = trace.Start(f)

  27. if err != nil {

  28. panic(err)

  29. }


  30. for i := 0; i < b.N; i++ {

  31. s = byPointer()

  32. }


  33. trace.Stop()


  34. b.StopTimer()

  35. _ = fmt.Sprintf("%v", s.a)

  36. }

运行

  1. go test ./... -bench=BenchmarkMemoryHeap -benchmem -run=^$ -count=10

  2. goos: darwin

  3. goarch: amd64

  4. pkg: com.learn/gormLearn/gc_gc

  5. BenchmarkMemoryHeap-4 19625536 53.0 ns/op 96 B/op 1 allocs/op


  6. go test ./... -bench=BenchmarkMemoryStack -benchmem -run=^$ -count=10

  7. goos: darwin

  8. goarch: amd64

  9. pkg: com.learn/gormLearn/gc_gc

  10. BenchmarkMemoryStack-4 163253341 7.22 ns/op 0 B/op 0 allocs/op

7.22ns/op53.0ns/op

使用struct{}优化

struct{}
  1. func assign(num int) {

  2. m := make(map[int]bool, num)

  3. for i := 0; i < num; i++ {

  4. m[i] = true

  5. }

  6. }


  7. func assignStruct(num int) {

  8. m := make(map[int]struct{}, num)

  9. for i := 0; i < num; i++ {

  10. m[i] = struct{}{}

  11. }

  12. }

struct{}

GC分析的工具

  • go tool pprof

  • go tool trace

  • go build -gcflags=”-m”

  • GODEBUG=”gctrace=1”

我的公众号:lyp分享的地方

我的知乎专栏:https://zhuanlan.zhihu.com/c_1275466546035740672

我的博客:https://www.liangyaopei.com/