From ea97c77a6099247f73bde214b9285e75978bf70b Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Wed, 11 May 2022 18:33:01 +0200 Subject: [PATCH] flate: Faster hashing for level 7-9 Remove stateful hash and need for AND operation. (benchmark pending) Fixes #589 --- flate/deflate.go | 36 ++++++++++++++---------------------- flate/fast_encoder.go | 2 +- 2 files changed, 15 insertions(+), 23 deletions(-) diff --git a/flate/deflate.go b/flate/deflate.go index bffa2f3323..f8435998e5 100644 --- a/flate/deflate.go +++ b/flate/deflate.go @@ -84,24 +84,23 @@ type advancedState struct { length int offset int maxInsertIndex int + chainHead int + hashOffset int - // Input hash chains - // hashHead[hashValue] contains the largest inputIndex with the specified hash value - // If hashHead[hashValue] is within the current window, then - // hashPrev[hashHead[hashValue] & windowMask] contains the previous index - // with the same hash value. - chainHead int - hashHead [hashSize]uint32 - hashPrev [windowSize]uint32 - hashOffset int + ii uint16 // position of last match, intended to overflow to reset. // input window: unprocessed data is window[index:windowEnd] index int estBitsPerByte int hashMatch [maxMatchLength + minMatchLength]uint32 - hash uint32 - ii uint16 // position of last match, intended to overflow to reset. + // Input hash chains + // hashHead[hashValue] contains the largest inputIndex with the specified hash value + // If hashHead[hashValue] is within the current window, then + // hashPrev[hashHead[hashValue] & windowMask] contains the previous index + // with the same hash value. + hashHead [hashSize]uint32 + hashPrev [windowSize]uint32 } type compressor struct { @@ -259,7 +258,6 @@ func (d *compressor) fillWindow(b []byte) { // Set the head of the hash chain to us. s.hashHead[newH] = uint32(di + s.hashOffset) } - s.hash = newH } // Update window information. d.windowEnd += n @@ -403,7 +401,6 @@ func (d *compressor) initDeflate() { s.hashOffset = 1 s.length = minMatchLength - 1 s.offset = 0 - s.hash = 0 s.chainHead = -1 } @@ -432,9 +429,6 @@ func (d *compressor) deflateLazy() { } s.maxInsertIndex = d.windowEnd - (minMatchLength - 1) - if s.index < s.maxInsertIndex { - s.hash = hash4(d.window[s.index:]) - } for { if sanity && s.index > d.windowEnd { @@ -466,11 +460,11 @@ func (d *compressor) deflateLazy() { } if s.index < s.maxInsertIndex { // Update the hash - s.hash = hash4(d.window[s.index:]) - ch := s.hashHead[s.hash&hashMask] + hash := hash4(d.window[s.index:]) + ch := s.hashHead[hash] s.chainHead = int(ch) s.hashPrev[s.index&windowMask] = ch - s.hashHead[s.hash&hashMask] = uint32(s.index + s.hashOffset) + s.hashHead[hash] = uint32(s.index + s.hashOffset) } prevLength := s.length prevOffset := s.offset @@ -503,7 +497,7 @@ func (d *compressor) deflateLazy() { end += prevIndex idx := prevIndex + prevLength - (4 - checkOff) h := hash4(d.window[idx:]) - ch2 := int(s.hashHead[h&hashMask]) - s.hashOffset - prevLength + (4 - checkOff) + ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength + (4 - checkOff) if ch2 > minIndex { length := matchLen(d.window[prevIndex:end], d.window[ch2:]) // It seems like a pure length metric is best. @@ -547,7 +541,6 @@ func (d *compressor) deflateLazy() { // Set the head of the hash chain to us. s.hashHead[newH] = uint32(di + s.hashOffset) } - s.hash = newH } s.index = newIndex @@ -793,7 +786,6 @@ func (d *compressor) reset(w io.Writer) { d.tokens.Reset() s.length = minMatchLength - 1 s.offset = 0 - s.hash = 0 s.ii = 0 s.maxInsertIndex = 0 } diff --git a/flate/fast_encoder.go b/flate/fast_encoder.go index d55ea2a775..f781aaa625 100644 --- a/flate/fast_encoder.go +++ b/flate/fast_encoder.go @@ -117,7 +117,7 @@ func (e *fastGen) addBlock(src []byte) int32 { // hash4 returns the hash of u to fit in a hash table with h bits. // Preferably h should be a constant and should always be <32. func hash4u(u uint32, h uint8) uint32 { - return (u * prime4bytes) >> ((32 - h) & reg8SizeMask32) + return (u * prime4bytes) >> (32 - h) } type tableEntryPrev struct {