golang生成随机字符串的八种方式与性能测试

前言

这是在StackOverflow上的一篇高赞回答，质量很高，翻译一下，大家一起学习

问题是：go语言中，有没有什么最快最简单的方法，用来生成只包含英文字母的随机字符串

icza给出了8个方案，最简单的方法并不是最快的方法，它们各有优劣，末尾附上性能测试结果：

1. Runes

比较简单的答案，声明一个rune数组，通过随机数选取rune字符，拼接成结果

package approach1

import (
    "fmt"
    "math/rand"
    "testing"
    "time"
)

var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")

func randStr(n int) string {
    b := make([]rune, n)
    for i := range b {
        b[i] = letters[rand.Intn(len(letters))]
    }
    return string(b)
}

func TestApproach1(t *testing.T) {
    rand.Seed(time.Now().UnixNano())
    fmt.Println(randStr(10))
}

func BenchmarkApproach1(b *testing.B) {
    rand.Seed(time.Now().UnixNano())
    for i := 0; i < b.N; i++ {
        _ = randStr(10)
    }
}

2. Bytes

如果随机挑选的字符只包含英文字母，我们可以直接使用bytes，因为在UTF-8编码模式下，英文字符和Bytes是一对一的（Go正是使用UTF-8模式编码）

所以可以把

var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")

用这个替代

var letters = []byte("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")

或者更好

const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

len(letters)slen(s)

letters

package approach2

import (
    "fmt"
    "math/rand"
    "testing"
    "time"
)

const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

func randStr(n int) string {
    b := make([]byte, n)
    for i := range b {
        b[i] = letters[rand.Intn(len(letters))]
    }
    return string(b)
}

func TestApproach2(t *testing.T) {
    rand.Seed(time.Now().UnixNano())

    fmt.Println(randStr(10))
}

func BenchmarkApproach2(b *testing.B) {
    rand.Seed(time.Now().UnixNano())
    for i := 0; i < b.N; i++ {
        _ = randStr(10)
    }
}

3. Remainder 余数

rand.Intn()Rand.Intn()Rand.Int31n()

rand.Int63()Rand.Int31n()

rand.Int63()len(letterBytes)

package approach3

import (
    "fmt"
    "math/rand"
    "testing"
    "time"
)

const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

func randStr(n int) string {
    b := make([]byte, n)
    for i := range b {
        b[i] = letters[rand.Int63() % int64(len(letters))]
    }
    return string(b)
}

func TestApproach3(t *testing.T) {
    rand.Seed(time.Now().UnixNano())

    fmt.Println(randStr(10))
}

func BenchmarkApproach3(b *testing.B) {
    rand.Seed(time.Now().UnixNano())
    for i := 0; i < b.N; i++ {
        _ = randStr(10)
    }
}

rand.Int63()

6/325/32

4. Masking 掩码

rand.Int63()0..len(letterBytes)-1

len(letterBytes)0.5(64-52)/64=0.19

package approach4

import (
    "fmt"
    "math/rand"
    "testing"
    "time"
)

const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

const (
    // 6 bits to represent a letters index
    letterIdBits = 6
    // All 1-bits as many as letterIdBits
    letterIdMask = 1 <<letterIdBits - 1
)

func randStr(n int) string {
    b := make([]byte, n)
    for i := range b {
        if idx := int(rand.Int63() & letterIdMask); idx < len(letters) {
            b[i] = letters[idx]
            i++
        }
    }
    return string(b)
}

func TestApproach4(t *testing.T) {
    rand.Seed(time.Now().UnixNano())

    fmt.Println(randStr(10))
}

func BenchmarkApproach4(b *testing.B) {
    rand.Seed(time.Now().UnixNano())
    for i := 0; i < b.N; i++ {
        _ = randStr(10)
    }
}

5. Masking Improved

rand.Int63()rand.Int63()

63/6=10

rand.Int63()

package approach5

import (
    "fmt"
    "math/rand"
    "testing"
    "time"
)

const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

const (
    // 6 bits to represent a letter index
    letterIdBits = 6
    // All 1-bits as many as letterIdBits
    letterIdMask = 1<<letterIdBits - 1
    letterIdMax  = 63 / letterIdBits
)

func randStr(n int) string {
    b := make([]byte, n)
    // A rand.Int63() generates 63 random bits, enough for letterIdMax letters!
    for i, cache, remain := n-1, rand.Int63(), letterIdMax; i >= 0; {
        if remain == 0 {
            cache, remain = rand.Int63(), letterIdMax
        }
        if idx := int(cache & letterIdMask); idx < len(letters) {
            b[i] = letters[idx]
            i--
        }
        cache >>= letterIdBits
        remain--
    }
    return string(b)
}

func TestApproach5(t *testing.T) {
    rand.Seed(time.Now().UnixNano())

    fmt.Println(randStr(10))
}

func BenchmarkApproach5(b *testing.B) {
    rand.Seed(time.Now().UnixNano())
    for i := 0; i < b.N; i++ {
        _ = randStr(10)
    }
}

6. Source

第5个方案非常好，能改进的点并不多。我们可以但不值得搞得很复杂。

让我们来找可以改进的点：随机数的生成源

crypto/randRead(b []byte)crypto/rand

math/randrand.Randrand.Sourcerand.SourceInt63() int64

rand.Randrand.Source

package approach6

import (
    "fmt"
    "math/rand"
    "testing"
    "time"
)

const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

var src = rand.NewSource(time.Now().UnixNano())

const (
    // 6 bits to represent a letter index
    letterIdBits = 6
    // All 1-bits as many as letterIdBits
    letterIdMask = 1<<letterIdBits - 1
    letterIdMax  = 63 / letterIdBits
)

func randStr(n int) string {
    b := make([]byte, n)
    // A rand.Int63() generates 63 random bits, enough for letterIdMax letters!
    for i, cache, remain := n-1, src.Int63(), letterIdMax; i >= 0; {
        if remain == 0 {
            cache, remain = src.Int63(), letterIdMax
        }
        if idx := int(cache & letterIdMask); idx < len(letters) {
            b[i] = letters[idx]
            i--
        }
        cache >>= letterIdBits
        remain--
    }
    return string(b)
}

func TestApproach6(t *testing.T) {
    fmt.Println(randStr(10))
}

func BenchmarkApproach6(b *testing.B) {
    for i := 0; i < b.N; i++ {
        _ = randStr(10)
    }
}

rand.Source

math/rand

默认的Source是协程安全的

rand.NewSource()Source

7. 使用 strings.Builder

之前的解决方案都返回了通过slice构造的字符串。最后的一次转换进行了一次拷贝，因为字符串是不可变的，如果转换的时候不进行拷贝，就无法保证转换完成之后，byte slice再被修改后，字符串仍能保持不变。

[]byteBuilder.String()[]byte

所以我们的下一个想法不是在slice中构建随机字符串，而是使用 strings.Builder，结束building后，我们就可以获取并返回结果，而无需复制。这可能在速度方面有所帮助，并且在内存使用和分配方面肯定会有所帮助（译者注：等会在benchmark中会清晰地看到）。

package approach7

import (
    "fmt"
    "math/rand"
    "strings"
    "testing"
    "time"
)

const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

var src = rand.NewSource(time.Now().UnixNano())

const (
    // 6 bits to represent a letter index
    letterIdBits = 6
    // All 1-bits as many as letterIdBits
    letterIdMask = 1<<letterIdBits - 1
    letterIdMax  = 63 / letterIdBits
)

func randStr(n int) string {
    sb := strings.Builder{}
    sb.Grow(n)
    // A rand.Int63() generates 63 random bits, enough for letterIdMax letters!
    for i, cache, remain := n-1, src.Int63(), letterIdMax; i >= 0; {
        if remain == 0 {
            cache, remain = src.Int63(), letterIdMax
        }
        if idx := int(cache & letterIdMask); idx < len(letters) {
            sb.WriteByte(letters[idx])
            i--
        }
        cache >>= letterIdBits
        remain--
    }
    return sb.String()
}

func TestApproach7(t *testing.T) {
    fmt.Println(randStr(10))
}

func BenchmarkApproach7(b *testing.B) {
    for i := 0; i < b.N; i++ {
        _ = randStr(10)
    }
}

Builder.Grow()

strings.Builderunsafe

模仿string.Builder使用unsafe包

[]byte

unsafe

// String returns the accumulated string.
func (b *Builder) String() string {
    return *(*string)(unsafe.Pointer(&b.buf))
}

unsafe

package approach8

import (
    "fmt"
    "math/rand"
    "testing"
    "time"
    "unsafe"
)

const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

var src = rand.NewSource(time.Now().UnixNano())

const (
    // 6 bits to represent a letter index
    letterIdBits = 6
    // All 1-bits as many as letterIdBits
    letterIdMask = 1<<letterIdBits - 1
    letterIdMax  = 63 / letterIdBits
)

func randStr(n int) string {
    b := make([]byte, n)
    // A rand.Int63() generates 63 random bits, enough for letterIdMax letters!
    for i, cache, remain := n-1, src.Int63(), letterIdMax; i >= 0; {
        if remain == 0 {
            cache, remain = src.Int63(), letterIdMax
        }
        if idx := int(cache & letterIdMask); idx < len(letters) {
            b[i] = letters[idx]
            i--
        }
        cache >>= letterIdBits
        remain--
    }
    return *(*string)(unsafe.Pointer(&b))
}

func TestApproach8(t *testing.T) {
    fmt.Println(randStr(10))
}

func BenchmarkApproach8(b *testing.B) {
    for i := 0; i < b.N; i++ {
        _ = randStr(10)
    }
}

Benchmark

go test ./... -bench=. -benchmem

原作者测试的数据

(译者注：第三列代表操作一次需要多少纳秒)

BenchmarkRunes-4                     2000000    723 ns/op   96 B/op   2 allocs/op
BenchmarkBytes-4                     3000000    550 ns/op   32 B/op   2 allocs/op
BenchmarkBytesRmndr-4                3000000    438 ns/op   32 B/op   2 allocs/op
BenchmarkBytesMask-4                 3000000    534 ns/op   32 B/op   2 allocs/op
BenchmarkBytesMaskImpr-4            10000000    176 ns/op   32 B/op   2 allocs/op
BenchmarkBytesMaskImprSrc-4         10000000    139 ns/op   32 B/op   2 allocs/op
BenchmarkBytesMaskImprSrcSB-4       10000000    134 ns/op   16 B/op   1 allocs/op
BenchmarkBytesMaskImprSrcUnsafe-4   10000000    115 ns/op   16 B/op   1 allocs/op

译者测试的数据

BenchmarkApproach1-12            3849038               299.5 ns/op            64 B/op          2 allocs/op
BenchmarkApproach2-12            5545350               216.4 ns/op            32 B/op          2 allocs/op
BenchmarkApproach3-12            7003654               169.7 ns/op            32 B/op          2 allocs/op
BenchmarkApproach4-12            7164259               168.7 ns/op            32 B/op          2 allocs/op
BenchmarkApproach5-12           13205474                89.06 ns/op           32 B/op          2 allocs/op
BenchmarkApproach6-12           13665636                84.41 ns/op           32 B/op          2 allocs/op
BenchmarkApproach7-12           17213431                70.37 ns/op           16 B/op          1 allocs/op
BenchmarkApproach8-12           19756956                61.41 ns/op           16 B/op          1 allocs/op

现在跑出来的数据，相原作者时候，已经有了一些变化，不过并不妨碍我们看出来各个方法的趋势：

rand.Int63()rand.Intn()rand.Int63()rand.Sourcerand.Randstrings.Builderunsafestrings.Builder

将第八个方案和第一个方案比较，第八个方案比第一个方案快6.3倍，仅仅使用六分之一的内存，分配次数也只有原来的一半。