对象存储的数据冗余

如果数据只存储一份,存储设备坏了数据就丢失了,所以需要做数据冗余。

Reed-Solomon
RS4425%226
64

评价一个数据冗余策略的好坏,主要是衡量该策略对存储空间的要求和其抗数据损坏的能力。

  • 对存储空间的要求是指我们采用的冗余策略相比于不使用冗余要额外支付的存储空间,用百分比表示。
  • 抗数据损坏的能力以允许损坏或丢失的对象数量来衡量。
100%0200%114+2RS150%262M+NRSMN(M+N)/M*100%N
RS码原理简介
42ABCDEFGHIJKLMNOP44
RS6*464Coding MatrixOriginal DataCoded Data
Coded Data
341

那么,怎么由剩余的四行还原数据呢?

Coding Matrix34Coding Matrix

最后总结,根据以下公式可以得到原始数据(由等式右边可以得到等式左边):

golang实现的RS库
githubgolang
go get -u -v github.com/klauspost/reedsolomon

关键函数

docNewEncoderfunc New(dataShards, parityShards int, opts ...Option) (Encoder, error)
Encoder
Verify(shards [][]byte) (bool, error)[]byte[][]bytefalseSplit(data []byte) ([][]byte, error)Reconstruct(shards [][]byte) errorJoin(dst io.Writer, shards [][]byte, outSize int) errorshardsdstWriter

demo

demosimple-encoder.gosimple-decoder.go

下面的代码做了一点修改。

simple-decoder.goD:/objects/testrs642"D:/objects/encoder/"
80
AAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDD

Enocder

simple-encoder.go
package main

import (
    "fmt"
    "io/ioutil"
    "os"
    "path/filepath"

    "github.com/klauspost/reedsolomon"
)

const (
    dataShards = 4 // 数据分片数
    parShards  = 2 // 校验分片数
)


func main() {

    fname := "D:/objects/test"
    // Create encoding matrix.
    enc, err := reedsolomon.New(dataShards, parShards)
    checkErr(err)

    fmt.Println("Opening", fname)
    b, err := ioutil.ReadFile(fname)
    checkErr(err)

    // Split the file into equally sized shards.
    shards, err := enc.Split(b)
    checkErr(err)
    fmt.Printf("File split into %d data+parity shards with %d bytes/shard.\n", len(shards), len(shards[0]))

    // Encode parity
    err = enc.Encode(shards)
    checkErr(err)

    // Write out the resulting files.
    _, file := filepath.Split(fname)
    dir := "D:/objects/encoder/"
    for i, shard := range shards {
        outfn := fmt.Sprintf("%s.%d", file, i)

        fmt.Println("Writing to", outfn)
        err = ioutil.WriteFile(filepath.Join(dir, outfn), shard, os.ModePerm)
        checkErr(err)
    }
}

func checkErr(err error) {
    if err != nil {
        fmt.Fprintf(os.Stderr, "Error: %s", err.Error())
        os.Exit(2)
    }
}
Opening D:/objects/test
File split into 6 data+parity shards with 20 bytes/shard.
Writing to test.0
Writing to test.1
Writing to test.2
Writing to test.3
Writing to test.4
Writing to test.5
shard206test.0Atest.120Btest.220Ctest.320D

Decoder

simple-decoder.go
package main

import (
    "fmt"
    "io/ioutil"
    "os"

    "github.com/klauspost/reedsolomon"
)

const (
    dataShards = 4
    parShards  = 2
)

func main() {
    // Create matrix
    enc, err := reedsolomon.New(dataShards, parShards)
    checkErr(err)

    fname := "D:/objects/encoder/test"
    // Create shards and load the data.
    shards := make([][]byte, dataShards+parShards)
    for i := range shards {
        infn := fmt.Sprintf("%s.%d", fname, i)
        fmt.Println("Opening", infn)
        shards[i], err = ioutil.ReadFile(infn)
        if err != nil {
            fmt.Println("Error reading file", err)
            shards[i] = nil
        }
    }

    // Verify the shards
    ok, err := enc.Verify(shards)
    if ok {
        fmt.Println("No reconstruction needed")
    } else {
        fmt.Println("Verification failed. Reconstructing data")
        err = enc.Reconstruct(shards)
        if err != nil {
            fmt.Println("Reconstruct failed -", err)
            os.Exit(1)
        }
        ok, err = enc.Verify(shards)
        if !ok {
            fmt.Println("Verification failed after reconstruction, data likely corrupted.")
            os.Exit(1)
        }
        checkErr(err)
    }

    outfn := "D:/objects/decoder/test"
    fmt.Println("Writing data to", outfn)
    f, err := os.Create(outfn)
    checkErr(err)

    // We don't know the exact filesize.
    err = enc.Join(f, shards, len(shards[0])*dataShards)
    checkErr(err)
}

func checkErr(err error) {
    if err != nil {
        fmt.Fprintf(os.Stderr, "Error: %s", err.Error())
        os.Exit(2)
    }
}
Opening D:/objects/encoder/test.0
Error reading file open D:/objects/encoder/test.0: The system cannot find the file specified.
Opening D:/objects/encoder/test.1
Error reading file open D:/objects/encoder/test.1: The system cannot find the file specified.
Opening D:/objects/encoder/test.2
Opening D:/objects/encoder/test.3
Opening D:/objects/encoder/test.4
Opening D:/objects/encoder/test.5
Verification failed. Reconstructing data
Writing data to D:/objects/decoder/test
test.0test.1
残留问题
Coding Matrixdemostream-encoder.gostream-decoder.go
参考