forked from ipfs/go-ipfs-chunker
-
Notifications
You must be signed in to change notification settings - Fork 3
/
parse.go
178 lines (160 loc) · 4.88 KB
/
parse.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
package chunk
import (
"errors"
"fmt"
"io"
"strconv"
"strings"
)
const (
PrefixForDefault = "default"
PrefixForSize = "size-"
PrefixForRabin = "rabin"
PrefixForReedSolomon = "reed-solomon"
// DefaultBlockSize is the chunk size that splitters produce (or aim to).
DefaultBlockSize int64 = 1024 * 256
// No leaf block should contain more than 1MiB of payload data ( wrapping overhead aside )
// This effectively mandates the maximum chunk size
// See discussion at https://github.com/ipfs/go-ipfs-chunker/pull/21#discussion_r369124879 for background
ChunkSizeLimit int = 1048576
)
var (
ErrRabinMin = errors.New("rabin min must be greater than 16")
ErrSize = errors.New("chunker size must be greater than 0")
ErrSizeMax = fmt.Errorf("chunker parameters may not exceed the maximum chunk size of %d", ChunkSizeLimit)
)
// FromString returns a Splitter depending on the given string:
// it supports "default" (""), "size-{size}", "rabin", "rabin-{blocksize}",
// "rabin-{min}-{avg}-{max}", "reed-solomon",
// "reed-solomon-{#data}-{#parity}-{size}" and "buzhash".
func FromString(r io.Reader, chunker string) (Splitter, error) {
switch {
case chunker == "" || chunker == PrefixForDefault:
return DefaultSplitter(r), nil
case strings.HasPrefix(chunker, PrefixForSize):
sizeStr := strings.Split(chunker, "-")[1]
size, err := strconv.Atoi(sizeStr)
if err != nil {
return nil, err
} else if size <= 0 {
return nil, ErrSize
} else if size > ChunkSizeLimit {
return nil, ErrSizeMax
}
return NewSizeSplitter(r, int64(size)), nil
case strings.HasPrefix(chunker, PrefixForRabin):
return parseRabinString(r, chunker)
case strings.HasPrefix(chunker, PrefixForReedSolomon):
return parseReedSolomonString(r, chunker)
case chunker == "buzhash":
return NewBuzhash(r), nil
default:
return nil, fmt.Errorf("unrecognized chunker option: %s", chunker)
}
}
func IsReedSolomon(chunker string) bool {
return strings.HasPrefix(chunker, PrefixForReedSolomon)
}
func parseRabinString(r io.Reader, chunker string) (Splitter, error) {
parts := strings.Split(chunker, "-")
switch len(parts) {
case 1:
return NewRabin(r, uint64(DefaultBlockSize)), nil
case 2:
size, err := strconv.Atoi(parts[1])
if err != nil {
return nil, err
} else if int(float32(size)*1.5) > ChunkSizeLimit { // FIXME - this will be addressed in a subsequent PR
return nil, ErrSizeMax
}
return NewRabin(r, uint64(size)), nil
case 4:
sub := strings.Split(parts[1], ":")
if len(sub) > 1 && sub[0] != "min" {
return nil, errors.New("first label must be min")
}
min, err := strconv.Atoi(sub[len(sub)-1])
if err != nil {
return nil, err
}
if min < 16 {
return nil, ErrRabinMin
}
sub = strings.Split(parts[2], ":")
if len(sub) > 1 && sub[0] != "avg" {
log.Error("sub == ", sub)
return nil, errors.New("second label must be avg")
}
avg, err := strconv.Atoi(sub[len(sub)-1])
if err != nil {
return nil, err
}
sub = strings.Split(parts[3], ":")
if len(sub) > 1 && sub[0] != "max" {
return nil, errors.New("final label must be max")
}
max, err := strconv.Atoi(sub[len(sub)-1])
if err != nil {
return nil, err
}
if min >= avg {
return nil, errors.New("incorrect format: rabin-min must be smaller than rabin-avg")
} else if avg >= max {
return nil, errors.New("incorrect format: rabin-avg must be smaller than rabin-max")
} else if max > ChunkSizeLimit {
return nil, ErrSizeMax
}
return NewRabinMinMax(r, uint64(min), uint64(avg), uint64(max)), nil
default:
return nil, errors.New("incorrect format (expected 'rabin' 'rabin-[avg]' or 'rabin-[min]-[avg]-[max]')")
}
}
func parseReedSolomonString(r io.Reader, chunker string) (Splitter, error) {
rsMetaMap, err := GetRsMetaMapFromString(chunker)
if err != nil {
return nil, err
}
return NewReedSolomonSplitter(r,
rsMetaMap.NumData,
rsMetaMap.NumParity,
rsMetaMap.FileSize)
}
func GetRsMetaMapFromString(str string) (*RsMetaMap, error) {
parts := strings.Split(str, "-")
switch len(parts) {
case 2:
return &RsMetaMap{
uint64(DefaultReedSolomonDataShards),
uint64(DefaultReedSolomonParityShards),
uint64(DefaultReedSolomonShardSize),
false}, nil
case 5:
nd, err := strconv.Atoi(parts[2])
if err != nil {
return nil, err
}
pd, err := strconv.Atoi(parts[3])
if err != nil {
return nil, err
}
if nd <= 0 {
return nil, errors.New("invalid number of data shards")
}
if pd <= 0 {
return nil, errors.New("invalid number of parity shards")
}
if nd+pd > 256 {
return nil, errors.New("cannot encode more than 256 shards (data+parity)")
}
size, err := strconv.Atoi(parts[4])
if err != nil {
return nil, err
}
if size <= 0 {
return nil, ErrSize
}
return &RsMetaMap{uint64(nd), uint64(pd), uint64(size), false}, nil
default:
return nil, errors.New("incorrect format (expected 'reed-solomon' or 'reed-solomon-[#data]-[#parity]-[size]')")
}
}