From dc198c145e4ab925852ea6092614cd23a4a44929 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Sat, 25 Mar 2023 07:26:02 -0700 Subject: [PATCH] zstd: Respect WithAllLitEntropyCompression (#792) * zstd: Respect WithAllLitEntropyCompression With no sequences (matches) literal compression was turned off. Re-enable this and simplify code. Set as default for "Better" and "Best". * Fall back to raw block when unable to improve. --- zstd/blockenc.go | 8 +++-- zstd/encoder.go | 78 +++++++---------------------------------- zstd/encoder_options.go | 4 +-- 3 files changed, 20 insertions(+), 70 deletions(-) diff --git a/zstd/blockenc.go b/zstd/blockenc.go index 12e8f6f0b6..966897c1fa 100644 --- a/zstd/blockenc.go +++ b/zstd/blockenc.go @@ -473,7 +473,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { return b.encodeLits(b.literals, rawAllLits) } // We want some difference to at least account for the headers. - saved := b.size - len(b.literals) - (b.size >> 5) + saved := b.size - len(b.literals) - (b.size >> 6) if saved < 16 { if org == nil { return errIncompressible @@ -779,10 +779,12 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { } b.output = wr.out + // Maybe even add a bigger margin. if len(b.output)-3-bhOffset >= b.size { - // Maybe even add a bigger margin. + // Discard and encode as raw block. + b.encodeRaw(org) b.litEnc.Reuse = huff0.ReusePolicyNone - return errIncompressible + return nil } // Size is output minus block header. diff --git a/zstd/encoder.go b/zstd/encoder.go index 65c6c36dc1..4de0aed0d0 100644 --- a/zstd/encoder.go +++ b/zstd/encoder.go @@ -277,23 +277,9 @@ func (e *Encoder) nextBlock(final bool) error { s.eofWritten = true } - err := errIncompressible - // If we got the exact same number of literals as input, - // assume the literals cannot be compressed. - if len(src) != len(blk.literals) || len(src) != e.o.blockSize { - err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy) - } - switch err { - case errIncompressible: - if debugEncoder { - println("Storing incompressible block as raw") - } - blk.encodeRaw(src) - // In fast mode, we do not transfer offsets, so we don't have to deal with changing the. - case nil: - default: - s.err = err - return err + s.err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy) + if s.err != nil { + return s.err } _, s.err = s.w.Write(blk.output) s.nWritten += int64(len(blk.output)) @@ -343,22 +329,8 @@ func (e *Encoder) nextBlock(final bool) error { } s.wWg.Done() }() - err := errIncompressible - // If we got the exact same number of literals as input, - // assume the literals cannot be compressed. - if len(src) != len(blk.literals) || len(src) != e.o.blockSize { - err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy) - } - switch err { - case errIncompressible: - if debugEncoder { - println("Storing incompressible block as raw") - } - blk.encodeRaw(src) - // In fast mode, we do not transfer offsets, so we don't have to deal with changing the. - case nil: - default: - s.writeErr = err + s.writeErr = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy) + if s.writeErr != nil { return } _, s.writeErr = s.w.Write(blk.output) @@ -568,25 +540,15 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { // If we got the exact same number of literals as input, // assume the literals cannot be compressed. - err := errIncompressible oldout := blk.output - if len(blk.literals) != len(src) || len(src) != e.o.blockSize { - // Output directly to dst - blk.output = dst - err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy) - } + // Output directly to dst + blk.output = dst - switch err { - case errIncompressible: - if debugEncoder { - println("Storing incompressible block as raw") - } - dst = blk.encodeRawTo(dst, src) - case nil: - dst = blk.output - default: + err := blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy) + if err != nil { panic(err) } + dst = blk.output blk.output = oldout } else { enc.Reset(e.o.dict, false) @@ -605,25 +567,11 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { if len(src) == 0 { blk.last = true } - err := errIncompressible - // If we got the exact same number of literals as input, - // assume the literals cannot be compressed. - if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize { - err = blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy) - } - - switch err { - case errIncompressible: - if debugEncoder { - println("Storing incompressible block as raw") - } - dst = blk.encodeRawTo(dst, todo) - blk.popOffsets() - case nil: - dst = append(dst, blk.output...) - default: + err := blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy) + if err != nil { panic(err) } + dst = append(dst, blk.output...) blk.reset(nil) } } diff --git a/zstd/encoder_options.go b/zstd/encoder_options.go index 8e15be2f7f..50f70533b4 100644 --- a/zstd/encoder_options.go +++ b/zstd/encoder_options.go @@ -39,7 +39,7 @@ func (o *encoderOptions) setDefault() { blockSize: maxCompressedBlockSize, windowSize: 8 << 20, level: SpeedDefault, - allLitEntropy: true, + allLitEntropy: false, lowMem: false, } } @@ -238,7 +238,7 @@ func WithEncoderLevel(l EncoderLevel) EOption { } } if !o.customALEntropy { - o.allLitEntropy = l > SpeedFastest + o.allLitEntropy = l > SpeedDefault } return nil