Skip to content

Commit

Permalink
zstd: Respect WithAllLitEntropyCompression (#792)
Browse files Browse the repository at this point in the history
* zstd: Respect WithAllLitEntropyCompression

With no sequences (matches) literal compression was turned off.

Re-enable this and simplify code. Set as default for "Better" and "Best".

* Fall back to raw block when unable to improve.
  • Loading branch information
klauspost authored Mar 25, 2023
1 parent 382ea74 commit dc198c1
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 70 deletions.
8 changes: 5 additions & 3 deletions zstd/blockenc.go
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
return b.encodeLits(b.literals, rawAllLits)
}
// We want some difference to at least account for the headers.
saved := b.size - len(b.literals) - (b.size >> 5)
saved := b.size - len(b.literals) - (b.size >> 6)
if saved < 16 {
if org == nil {
return errIncompressible
Expand Down Expand Up @@ -779,10 +779,12 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
}
b.output = wr.out

// Maybe even add a bigger margin.
if len(b.output)-3-bhOffset >= b.size {
// Maybe even add a bigger margin.
// Discard and encode as raw block.
b.encodeRaw(org)
b.litEnc.Reuse = huff0.ReusePolicyNone
return errIncompressible
return nil
}

// Size is output minus block header.
Expand Down
78 changes: 13 additions & 65 deletions zstd/encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,23 +277,9 @@ func (e *Encoder) nextBlock(final bool) error {
s.eofWritten = true
}

err := errIncompressible
// If we got the exact same number of literals as input,
// assume the literals cannot be compressed.
if len(src) != len(blk.literals) || len(src) != e.o.blockSize {
err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
}
switch err {
case errIncompressible:
if debugEncoder {
println("Storing incompressible block as raw")
}
blk.encodeRaw(src)
// In fast mode, we do not transfer offsets, so we don't have to deal with changing the.
case nil:
default:
s.err = err
return err
s.err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
if s.err != nil {
return s.err
}
_, s.err = s.w.Write(blk.output)
s.nWritten += int64(len(blk.output))
Expand Down Expand Up @@ -343,22 +329,8 @@ func (e *Encoder) nextBlock(final bool) error {
}
s.wWg.Done()
}()
err := errIncompressible
// If we got the exact same number of literals as input,
// assume the literals cannot be compressed.
if len(src) != len(blk.literals) || len(src) != e.o.blockSize {
err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
}
switch err {
case errIncompressible:
if debugEncoder {
println("Storing incompressible block as raw")
}
blk.encodeRaw(src)
// In fast mode, we do not transfer offsets, so we don't have to deal with changing the.
case nil:
default:
s.writeErr = err
s.writeErr = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
if s.writeErr != nil {
return
}
_, s.writeErr = s.w.Write(blk.output)
Expand Down Expand Up @@ -568,25 +540,15 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {

// If we got the exact same number of literals as input,
// assume the literals cannot be compressed.
err := errIncompressible
oldout := blk.output
if len(blk.literals) != len(src) || len(src) != e.o.blockSize {
// Output directly to dst
blk.output = dst
err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
}
// Output directly to dst
blk.output = dst

switch err {
case errIncompressible:
if debugEncoder {
println("Storing incompressible block as raw")
}
dst = blk.encodeRawTo(dst, src)
case nil:
dst = blk.output
default:
err := blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
if err != nil {
panic(err)
}
dst = blk.output
blk.output = oldout
} else {
enc.Reset(e.o.dict, false)
Expand All @@ -605,25 +567,11 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
if len(src) == 0 {
blk.last = true
}
err := errIncompressible
// If we got the exact same number of literals as input,
// assume the literals cannot be compressed.
if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize {
err = blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy)
}

switch err {
case errIncompressible:
if debugEncoder {
println("Storing incompressible block as raw")
}
dst = blk.encodeRawTo(dst, todo)
blk.popOffsets()
case nil:
dst = append(dst, blk.output...)
default:
err := blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy)
if err != nil {
panic(err)
}
dst = append(dst, blk.output...)
blk.reset(nil)
}
}
Expand Down
4 changes: 2 additions & 2 deletions zstd/encoder_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func (o *encoderOptions) setDefault() {
blockSize: maxCompressedBlockSize,
windowSize: 8 << 20,
level: SpeedDefault,
allLitEntropy: true,
allLitEntropy: false,
lowMem: false,
}
}
Expand Down Expand Up @@ -238,7 +238,7 @@ func WithEncoderLevel(l EncoderLevel) EOption {
}
}
if !o.customALEntropy {
o.allLitEntropy = l > SpeedFastest
o.allLitEntropy = l > SpeedDefault
}

return nil
Expand Down

0 comments on commit dc198c1

Please sign in to comment.