diff --git a/zstd/dict.go b/zstd/dict.go index 8d5567fe6..b7b83164b 100644 --- a/zstd/dict.go +++ b/zstd/dict.go @@ -273,6 +273,9 @@ func BuildDict(o BuildDictOptions) ([]byte, error) { enc.Encode(&block, b) addValues(&remain, block.literals) litTotal += len(block.literals) + if len(block.sequences) == 0 { + continue + } seqs += len(block.sequences) block.genCodes() addHist(&ll, block.coders.llEnc.Histogram()) @@ -286,6 +289,9 @@ func BuildDict(o BuildDictOptions) ([]byte, error) { if offset == 0 { continue } + if int(offset) >= len(o.History) { + continue + } if offset > 3 { newOffsets[offset-3]++ } else { @@ -336,6 +342,9 @@ func BuildDict(o BuildDictOptions) ([]byte, error) { if seqs/nUsed < 512 { // Use 512 as minimum. nUsed = seqs / 512 + if nUsed == 0 { + nUsed = 1 + } } copyHist := func(dst *fseEncoder, src *[256]int) ([]byte, error) { hist := dst.Histogram() @@ -358,6 +367,28 @@ func BuildDict(o BuildDictOptions) ([]byte, error) { fakeLength += v hist[i] = uint32(v) } + + // Ensure we aren't trying to represent RLE. + if maxCount == fakeLength { + for i := range hist { + if uint8(i) == maxSym { + fakeLength++ + maxSym++ + hist[i+1] = 1 + if maxSym > 1 { + break + } + } + if hist[0] == 0 { + fakeLength++ + hist[i] = 1 + if maxSym > 1 { + break + } + } + } + } + dst.HistogramFinished(maxSym, maxCount) dst.reUsed = false dst.useRLE = false