按照规矩,这里应该介绍一下golang和分库表,懒得写,跳过。
本文主要介绍两种分表方式,hash和range,对应不同对业务特性,假设有这样一个user表,字段id,name,home,balance:
user表
数量大概1000w条:
一个查询大概耗时是这样的:
加索引肯定快多了,但是今天咱们讨论的主题不是索引,而是分表,就不给加索引了。
接下来是分表了,先假定业务逻辑是用户用昵称登陆,获取用户信息,那么就要用到hash分表了,先创建10张表,名字用user
0-user
9,结构一样,多一行uid:
然后填充数据,代码如下:
func MakeHashTrans(db *sql.DB) {
rows,err := db.Query("select * from user")
cloumns, err := rows.Columns()
if err != nil {
fmt.Println(err)
for rows.Next() {
err := rows.Scan(&cloumns[0], &cloumns[1], &cloumns[2], &cloumns[3])
if err != nil {
fmt.Println(err)
fmt.Println(cloumns[0])
hashValue := crc32.ChecksumIEEE([]byte(cloumns[1]))
tableId := hashValue % 10
a := int(tableId)
qry := "insert into user_"+strconv.Itoa(a)+" (`uid`, `name`, `home`, `balance`) values ("+cloumns[0]+",'"+cloumns[1]+"','"+cloumns[2]+"',"+cloumns[3]+")"
_, err = db.Exec(qry)
if err != nil {
fmt.Println(err)
}
具体逻辑是先取出数据,然后对name字段进行hash--crc32运算,得到结果对10求余,然后插入到对应对表中,循环插入比较费时间,可另起方法存10w条插一次,再多估计不能插入,生成测试数据时候100w插入一次都不行,'max_
allow_package
'调到8g都插不进去,只好10w一次搞100次。看结果:
接下来就是愉快的测试时间了,开始pk,先上代码:
func ShardingSingle() {
//未分表搜索name := "测试代号9876543"
db := data.Db1()
defer db.Close()
st1 := time.Now()
row, _ := db.Query("select * from user where name = '"+name
+"'")
fmt.Println("single query unsharding spend time:", time.Since(st1))
cloumn1, err := row.Columns()
if err != nil {
fmt.Println(err)
for row.Next() {
row.Scan(&cloumn1[0], &cloumn1[1], &cloumn1[2], &cloumn1[3])
fmt.Println(cloumn1)
//分表搜索a := crc32.ChecksumIEEE([]byte(name)) % 10
qryString := "select * from user_"+strconv.Itoa(int(a))+" where name = '"+name+"'";
st2 := time.Now()
row2, _ := db.Query(qryString)
fmt.Println("single query sharding spend time:", time.Since(st2))
cloumn2, _ := row2.Columns()
for row2.Next() {
row2.Scan(&cloumn2[0], &cloumn2[1], &cloumn2[2], &cloumn2[3], &cloumn2[4])
fmt.Println(cloumn2)
return
}
喜闻乐见的结果:
大概有9倍左右的性能提升,还是不错的,当然hash运算也是要耗时的,这里没统计。
接下来是range分表,上代码:
func MakeRangeTrans() {
db := Db2()
rows,err := db.Query("select * from user")
defer rows.Close()
cloumns, err := rows.Columns()
if err != nil {
fmt.Println(err)
for rows.Next() {
err := rows.Scan(&cloumns[0], &cloumns[1], &cloumns[2], &cloumns[3])
if err != nil {
fmt.Println(err)
id, _ := strconv.Atoi(cloumns[0])
fmt.Println(id)
a :=id/1000000
qry := "insert into user_"+strconv.Itoa(a)+" (`uid`, `name`, `home`, `balance`) values ("+cloumns[0]+",'"+cloumns[1]+"','"+cloumns[2]+"',"+cloumns[3]+")"
_, err = db.Exec(qry)
if err != nil {
fmt.Println(err)
}
简单易懂,id/1000000得到表名,存入之,可另起逻辑一次插很多。
user_5
进入性能pk环节:
第一发:精确查找
func ShardingTest() {
//未分表查询db := data.Db2()
defer db.Close()
st1 := time.Now()
db.Query("select * from user where balance = 2018")
fmt.Println("all query unsharding spend time:", time.Since(st1))
//分表查询st2 := time.Now()
for i:=0;i<10;i++ {
db.Query("select * from user_"+strconv.Itoa(i)+"where balance = 2018")
fmt.Println("all query sharding spend time:", time.Since(st2))
return
}
结果:
分表完胜,但是分表要对结果进行循环组装,所幸开销不大。
第二发:范围查找
func ShardingTest() {
//未分表查询db := data.Db2()
defer db.Close()
st1 := time.Now()
db.Query("select * from user where `balance` > 5000 ")
fmt.Println("all query unsharding spend time:", time.Since(st1))
//分表查询st2 := time.Now()
for i:=0;i<10;i++ {
db.Query("select * from user_"+strconv.Itoa(i)+" where `balance` > 5000 ")
fmt.Println("all query sharding spend time:", time.Since(st2))
return
}
结果pk:
对你没有看错,不分表全胜,具体原理我还要再查查,可能和int这个数据类型有关。
第三发:id索引下查询:
func ShardingTest() {
//未分表查询
db := data.Db2()
defer db.Close()
st1 := time.Now()
db.Query("select * from user where id = 9527 ")
fmt.Println("all query unsharding spend time:", time.Since(st1))
//分表查询
st2 := time.Now()
db.Query("select * from user_0 where id = 9527 ")
fmt.Println("all query sharding spend time:", time.Since(st2))
return
}
结果:
分表微弱优势取胜。
第四发:未分表查询,分表协程查询,分表循环查询性能对比
func ShardingAllWithGoruntine() {
//未分表查询
db := data.Db2()
defer db.Close()
st1 := time.Now()
db.Query("select * from user where name = '测试代号1990030'")
fmt.Println("all query unsharding spend time:", time.Since(st1))
wg := sync.WaitGroup{}
wg.Add(10)
st2 := time.Now()
for i:=0;i<10;i++ {
go queryFunc(db, i, &wg)
wg.Wait()
fmt.Println("all query sharding with goruntine spend time:", time.Since(st2))
st3 := time.Now()
for i:=0;i<10;i++ {
db.Query("select * from user_"+strconv.Itoa(i)+" where name = '测试代号1990030'")
fmt.Println("all query sharding with loop spend time:", time.Since(st3))
return