按照规矩,这里应该介绍一下golang和分库表,懒得写,跳过。

本文主要介绍两种分表方式,hash和range,对应不同对业务特性,假设有这样一个user表,字段id,name,home,balance:

user表

数量大概1000w条:

一个查询大概耗时是这样的:

加索引肯定快多了,但是今天咱们讨论的主题不是索引,而是分表,就不给加索引了。

接下来是分表了,先假定业务逻辑是用户用昵称登陆,获取用户信息,那么就要用到hash分表了,先创建10张表,名字用user

0-user

9,结构一样,多一行uid:

然后填充数据,代码如下:

func MakeHashTrans(db *sql.DB) {

rows,err := db.Query("select * from user")

cloumns, err := rows.Columns()

if err != nil {

fmt.Println(err)

for rows.Next() {

err := rows.Scan(&cloumns[0], &cloumns[1], &cloumns[2], &cloumns[3])

if err != nil {

fmt.Println(err)

fmt.Println(cloumns[0])

hashValue := crc32.ChecksumIEEE([]byte(cloumns[1]))

tableId := hashValue % 10

a := int(tableId)

qry := "insert into user_"+strconv.Itoa(a)+" (`uid`, `name`, `home`, `balance`) values ("+cloumns[0]+",'"+cloumns[1]+"','"+cloumns[2]+"',"+cloumns[3]+")"

_, err = db.Exec(qry)

if err != nil {

fmt.Println(err)

}

具体逻辑是先取出数据,然后对name字段进行hash--crc32运算,得到结果对10求余,然后插入到对应对表中,循环插入比较费时间,可另起方法存10w条插一次,再多估计不能插入,生成测试数据时候100w插入一次都不行,'max_

allow_package

'调到8g都插不进去,只好10w一次搞100次。看结果:

接下来就是愉快的测试时间了,开始pk,先上代码:

func ShardingSingle() {

//未分表搜索name := "测试代号9876543"

db := data.Db1()

defer db.Close()

st1 := time.Now()

row, _ := db.Query("select * from user where name = '"+name

+"'")

fmt.Println("single query unsharding spend time:", time.Since(st1))

cloumn1, err := row.Columns()

if err != nil {

fmt.Println(err)

for row.Next() {

row.Scan(&cloumn1[0], &cloumn1[1], &cloumn1[2], &cloumn1[3])

fmt.Println(cloumn1)

//分表搜索a := crc32.ChecksumIEEE([]byte(name)) % 10

qryString := "select * from user_"+strconv.Itoa(int(a))+" where name = '"+name+"'";

st2 := time.Now()

row2, _ := db.Query(qryString)

fmt.Println("single query sharding spend time:", time.Since(st2))

cloumn2, _ := row2.Columns()

for row2.Next() {

row2.Scan(&cloumn2[0], &cloumn2[1], &cloumn2[2], &cloumn2[3], &cloumn2[4])

fmt.Println(cloumn2)

return

}

喜闻乐见的结果:

大概有9倍左右的性能提升,还是不错的,当然hash运算也是要耗时的,这里没统计。

接下来是range分表,上代码:

func MakeRangeTrans() {

db := Db2()

rows,err := db.Query("select * from user")

defer rows.Close()

cloumns, err := rows.Columns()

if err != nil {

fmt.Println(err)

for rows.Next() {

err := rows.Scan(&cloumns[0], &cloumns[1], &cloumns[2], &cloumns[3])

if err != nil {

fmt.Println(err)

id, _ := strconv.Atoi(cloumns[0])

fmt.Println(id)

a :=id/1000000

qry := "insert into user_"+strconv.Itoa(a)+" (`uid`, `name`, `home`, `balance`) values ("+cloumns[0]+",'"+cloumns[1]+"','"+cloumns[2]+"',"+cloumns[3]+")"

_, err = db.Exec(qry)

if err != nil {

fmt.Println(err)

}

简单易懂,id/1000000得到表名,存入之,可另起逻辑一次插很多。

user_5

进入性能pk环节:

第一发:精确查找

func ShardingTest() {

//未分表查询db := data.Db2()

defer db.Close()

st1 := time.Now()

db.Query("select * from user where balance = 2018")

fmt.Println("all query unsharding spend time:", time.Since(st1))

//分表查询st2 := time.Now()

for i:=0;i<10;i++ {

db.Query("select * from user_"+strconv.Itoa(i)+"where balance = 2018")

fmt.Println("all query sharding spend time:", time.Since(st2))

return

}

结果:

分表完胜,但是分表要对结果进行循环组装,所幸开销不大。

第二发:范围查找

func ShardingTest() {

//未分表查询db := data.Db2()

defer db.Close()

st1 := time.Now()

db.Query("select * from user where `balance` > 5000 ")

fmt.Println("all query unsharding spend time:", time.Since(st1))

//分表查询st2 := time.Now()

for i:=0;i<10;i++ {

db.Query("select * from user_"+strconv.Itoa(i)+" where `balance` > 5000 ")

fmt.Println("all query sharding spend time:", time.Since(st2))

return

}

结果pk:

对你没有看错,不分表全胜,具体原理我还要再查查,可能和int这个数据类型有关。

第三发:id索引下查询:

func ShardingTest() {

//未分表查询

db := data.Db2()

defer db.Close()

st1 := time.Now()

db.Query("select * from user where id = 9527 ")

fmt.Println("all query unsharding spend time:", time.Since(st1))

//分表查询

st2 := time.Now()

db.Query("select * from user_0 where id = 9527 ")

fmt.Println("all query sharding spend time:", time.Since(st2))

return

}

结果:

分表微弱优势取胜。

第四发:未分表查询,分表协程查询,分表循环查询性能对比

func ShardingAllWithGoruntine() {

//未分表查询

db := data.Db2()

defer db.Close()

st1 := time.Now()

db.Query("select * from user where name = '测试代号1990030'")

fmt.Println("all query unsharding spend time:", time.Since(st1))

wg := sync.WaitGroup{}

wg.Add(10)

st2 := time.Now()

for i:=0;i<10;i++ {

go queryFunc(db, i, &wg)

wg.Wait()

fmt.Println("all query sharding with goruntine spend time:", time.Since(st2))

st3 := time.Now()

for i:=0;i<10;i++ {

db.Query("select * from user_"+strconv.Itoa(i)+" where name = '测试代号1990030'")

fmt.Println("all query sharding with loop spend time:", time.Since(st3))

return