前言
这是在StackOverflow上的一篇高赞回答,质量很高,翻译一下,大家一起学习
问题是:go语言中,有没有什么最快最简单的方法,用来生成只包含英文字母的随机字符串
icza给出了8个方案,最简单的方法并不是最快的方法,它们各有优劣,末尾附上性能测试结果:
1. Runes
比较简单的答案,声明一个rune数组,通过随机数选取rune字符,拼接成结果
package approach1
import (
"fmt"
"math/rand"
"testing"
"time"
)
var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
func randStr(n int) string {
b := make([]rune, n)
for i := range b {
b[i] = letters[rand.Intn(len(letters))]
}
return string(b)
}
func TestApproach1(t *testing.T) {
rand.Seed(time.Now().UnixNano())
fmt.Println(randStr(10))
}
func BenchmarkApproach1(b *testing.B) {
rand.Seed(time.Now().UnixNano())
for i := 0; i < b.N; i++ {
_ = randStr(10)
}
}
2. Bytes
如果随机挑选的字符只包含英文字母,我们可以直接使用bytes,因为在UTF-8编码模式下,英文字符和Bytes是一对一的(Go正是使用UTF-8模式编码)
所以可以把
var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
用这个替代
var letters = []byte("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
或者更好
const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
len(letters)slen(s)
letters
package approach2
import (
"fmt"
"math/rand"
"testing"
"time"
)
const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
func randStr(n int) string {
b := make([]byte, n)
for i := range b {
b[i] = letters[rand.Intn(len(letters))]
}
return string(b)
}
func TestApproach2(t *testing.T) {
rand.Seed(time.Now().UnixNano())
fmt.Println(randStr(10))
}
func BenchmarkApproach2(b *testing.B) {
rand.Seed(time.Now().UnixNano())
for i := 0; i < b.N; i++ {
_ = randStr(10)
}
}
3. Remainder 余数
rand.Intn()Rand.Intn()Rand.Int31n()
rand.Int63()Rand.Int31n()
rand.Int63()len(letterBytes)
package approach3
import (
"fmt"
"math/rand"
"testing"
"time"
)
const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
func randStr(n int) string {
b := make([]byte, n)
for i := range b {
b[i] = letters[rand.Int63() % int64(len(letters))]
}
return string(b)
}
func TestApproach3(t *testing.T) {
rand.Seed(time.Now().UnixNano())
fmt.Println(randStr(10))
}
func BenchmarkApproach3(b *testing.B) {
rand.Seed(time.Now().UnixNano())
for i := 0; i < b.N; i++ {
_ = randStr(10)
}
}
rand.Int63()
6/325/32
4. Masking 掩码
rand.Int63()0..len(letterBytes)-1
len(letterBytes)0.5(64-52)/64=0.19
package approach4
import (
"fmt"
"math/rand"
"testing"
"time"
)
const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
const (
// 6 bits to represent a letters index
letterIdBits = 6
// All 1-bits as many as letterIdBits
letterIdMask = 1 <<letterIdBits - 1
)
func randStr(n int) string {
b := make([]byte, n)
for i := range b {
if idx := int(rand.Int63() & letterIdMask); idx < len(letters) {
b[i] = letters[idx]
i++
}
}
return string(b)
}
func TestApproach4(t *testing.T) {
rand.Seed(time.Now().UnixNano())
fmt.Println(randStr(10))
}
func BenchmarkApproach4(b *testing.B) {
rand.Seed(time.Now().UnixNano())
for i := 0; i < b.N; i++ {
_ = randStr(10)
}
}
5. Masking Improved
rand.Int63()rand.Int63()
63/6=10
rand.Int63()
package approach5
import (
"fmt"
"math/rand"
"testing"
"time"
)
const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
const (
// 6 bits to represent a letter index
letterIdBits = 6
// All 1-bits as many as letterIdBits
letterIdMask = 1<<letterIdBits - 1
letterIdMax = 63 / letterIdBits
)
func randStr(n int) string {
b := make([]byte, n)
// A rand.Int63() generates 63 random bits, enough for letterIdMax letters!
for i, cache, remain := n-1, rand.Int63(), letterIdMax; i >= 0; {
if remain == 0 {
cache, remain = rand.Int63(), letterIdMax
}
if idx := int(cache & letterIdMask); idx < len(letters) {
b[i] = letters[idx]
i--
}
cache >>= letterIdBits
remain--
}
return string(b)
}
func TestApproach5(t *testing.T) {
rand.Seed(time.Now().UnixNano())
fmt.Println(randStr(10))
}
func BenchmarkApproach5(b *testing.B) {
rand.Seed(time.Now().UnixNano())
for i := 0; i < b.N; i++ {
_ = randStr(10)
}
}
6. Source
第5个方案非常好,能改进的点并不多。我们可以但不值得搞得很复杂。
让我们来找可以改进的点:随机数的生成源
crypto/randRead(b []byte)crypto/rand
math/randrand.Randrand.Sourcerand.SourceInt63() int64
rand.Randrand.Source
package approach6
import (
"fmt"
"math/rand"
"testing"
"time"
)
const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
var src = rand.NewSource(time.Now().UnixNano())
const (
// 6 bits to represent a letter index
letterIdBits = 6
// All 1-bits as many as letterIdBits
letterIdMask = 1<<letterIdBits - 1
letterIdMax = 63 / letterIdBits
)
func randStr(n int) string {
b := make([]byte, n)
// A rand.Int63() generates 63 random bits, enough for letterIdMax letters!
for i, cache, remain := n-1, src.Int63(), letterIdMax; i >= 0; {
if remain == 0 {
cache, remain = src.Int63(), letterIdMax
}
if idx := int(cache & letterIdMask); idx < len(letters) {
b[i] = letters[idx]
i--
}
cache >>= letterIdBits
remain--
}
return string(b)
}
func TestApproach6(t *testing.T) {
fmt.Println(randStr(10))
}
func BenchmarkApproach6(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = randStr(10)
}
}
rand.Source
math/rand
默认的Source是协程安全的
rand.NewSource()Source
7. 使用 strings.Builder
之前的解决方案都返回了通过slice构造的字符串。最后的一次转换进行了一次拷贝,因为字符串是不可变的,如果转换的时候不进行拷贝,就无法保证转换完成之后,byte slice再被修改后,字符串仍能保持不变。
[]byteBuilder.String()[]byte
所以我们的下一个想法不是在slice中构建随机字符串,而是使用 strings.Builder,结束building后,我们就可以获取并返回结果,而无需复制。 这可能在速度方面有所帮助,并且在内存使用和分配方面肯定会有所帮助(译者注:等会在benchmark中会清晰地看到)。
package approach7
import (
"fmt"
"math/rand"
"strings"
"testing"
"time"
)
const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
var src = rand.NewSource(time.Now().UnixNano())
const (
// 6 bits to represent a letter index
letterIdBits = 6
// All 1-bits as many as letterIdBits
letterIdMask = 1<<letterIdBits - 1
letterIdMax = 63 / letterIdBits
)
func randStr(n int) string {
sb := strings.Builder{}
sb.Grow(n)
// A rand.Int63() generates 63 random bits, enough for letterIdMax letters!
for i, cache, remain := n-1, src.Int63(), letterIdMax; i >= 0; {
if remain == 0 {
cache, remain = src.Int63(), letterIdMax
}
if idx := int(cache & letterIdMask); idx < len(letters) {
sb.WriteByte(letters[idx])
i--
}
cache >>= letterIdBits
remain--
}
return sb.String()
}
func TestApproach7(t *testing.T) {
fmt.Println(randStr(10))
}
func BenchmarkApproach7(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = randStr(10)
}
}
Builder.Grow()
strings.Builderunsafe
模仿string.Builder使用unsafe包
[]byte
unsafe
// String returns the accumulated string.
func (b *Builder) String() string {
return *(*string)(unsafe.Pointer(&b.buf))
}
unsafe
package approach8
import (
"fmt"
"math/rand"
"testing"
"time"
"unsafe"
)
const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
var src = rand.NewSource(time.Now().UnixNano())
const (
// 6 bits to represent a letter index
letterIdBits = 6
// All 1-bits as many as letterIdBits
letterIdMask = 1<<letterIdBits - 1
letterIdMax = 63 / letterIdBits
)
func randStr(n int) string {
b := make([]byte, n)
// A rand.Int63() generates 63 random bits, enough for letterIdMax letters!
for i, cache, remain := n-1, src.Int63(), letterIdMax; i >= 0; {
if remain == 0 {
cache, remain = src.Int63(), letterIdMax
}
if idx := int(cache & letterIdMask); idx < len(letters) {
b[i] = letters[idx]
i--
}
cache >>= letterIdBits
remain--
}
return *(*string)(unsafe.Pointer(&b))
}
func TestApproach8(t *testing.T) {
fmt.Println(randStr(10))
}
func BenchmarkApproach8(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = randStr(10)
}
}
Benchmark
go test ./... -bench=. -benchmem
原作者测试的数据
(译者注:第三列代表操作一次需要多少纳秒)
BenchmarkRunes-4 2000000 723 ns/op 96 B/op 2 allocs/op
BenchmarkBytes-4 3000000 550 ns/op 32 B/op 2 allocs/op
BenchmarkBytesRmndr-4 3000000 438 ns/op 32 B/op 2 allocs/op
BenchmarkBytesMask-4 3000000 534 ns/op 32 B/op 2 allocs/op
BenchmarkBytesMaskImpr-4 10000000 176 ns/op 32 B/op 2 allocs/op
BenchmarkBytesMaskImprSrc-4 10000000 139 ns/op 32 B/op 2 allocs/op
BenchmarkBytesMaskImprSrcSB-4 10000000 134 ns/op 16 B/op 1 allocs/op
BenchmarkBytesMaskImprSrcUnsafe-4 10000000 115 ns/op 16 B/op 1 allocs/op
译者测试的数据
BenchmarkApproach1-12 3849038 299.5 ns/op 64 B/op 2 allocs/op
BenchmarkApproach2-12 5545350 216.4 ns/op 32 B/op 2 allocs/op
BenchmarkApproach3-12 7003654 169.7 ns/op 32 B/op 2 allocs/op
BenchmarkApproach4-12 7164259 168.7 ns/op 32 B/op 2 allocs/op
BenchmarkApproach5-12 13205474 89.06 ns/op 32 B/op 2 allocs/op
BenchmarkApproach6-12 13665636 84.41 ns/op 32 B/op 2 allocs/op
BenchmarkApproach7-12 17213431 70.37 ns/op 16 B/op 1 allocs/op
BenchmarkApproach8-12 19756956 61.41 ns/op 16 B/op 1 allocs/op
现在跑出来的数据,相原作者时候,已经有了一些变化,不过并不妨碍我们看出来各个方法的趋势:
rand.Int63()rand.Intn()rand.Int63()rand.Sourcerand.Randstrings.Builderunsafestrings.Builder
将第八个方案和第一个方案比较,第八个方案比第一个方案快6.3倍,仅仅使用六分之一的内存,分配次数也只有原来的一半。