diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index bbe6421fb8a..f0b5b07582b 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -1253,7 +1253,14 @@ unsigned HUF_minTableLog(unsigned symbolCardinality) return minBitsSymbols; } -unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace, size_t wkspSize, HUF_CElt* table, const unsigned* count, HUF_depth_mode depthMode) +unsigned HUF_optimalTableLog( + unsigned maxTableLog, + size_t srcSize, + unsigned maxSymbolValue, + void* workSpace, size_t wkspSize, + HUF_CElt* table, + const unsigned* count, + HUF_depth_mode depthMode) { unsigned optLog = FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); assert(srcSize > 1); /* Not supported, RLE should be used instead */ @@ -1266,9 +1273,11 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS size_t maxBits, hSize, newSize; const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue); + DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize); if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) return optLog; for (huffLog = HUF_minTableLog(symbolCardinality); huffLog <= maxTableLog; huffLog++) { + DEBUGLOG(7, "checking for huffLog=%u", huffLog); maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, huffLog, workSpace, wkspSize); diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index f4a3c82647e..f1626d62f20 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2661,15 +2661,14 @@ ZSTD_entropyCompressSeqStore_internal( unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO); size_t const litSize = (size_t)(seqStorePtr->lit - literals); - HUF_depth_mode depthMode = cctxParams->cParams.strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_depth_optimal : HUF_depth_fast; size_t const cSize = ZSTD_compressLiterals( - &prevEntropy->huf, &nextEntropy->huf, - cctxParams->cParams.strategy, - ZSTD_literalsCompressionIsDisabled(cctxParams), op, dstCapacity, literals, litSize, entropyWorkspace, entropyWkspSize, - bmi2, suspectUncompressible, depthMode); + &prevEntropy->huf, &nextEntropy->huf, + cctxParams->cParams.strategy, + ZSTD_literalsCompressionIsDisabled(cctxParams), + suspectUncompressible, bmi2); FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); assert(cSize <= dstCapacity); op += cSize; @@ -3688,12 +3687,12 @@ ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t end size_t estimatedSecondHalfSize; size_t midIdx = (startIdx + endIdx)/2; + DEBUGLOG(5, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx); assert(endIdx >= startIdx); if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) { DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences (%zu)", endIdx - startIdx); return; } - DEBUGLOG(5, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx); ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx); ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx); ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx); diff --git a/lib/compress/zstd_compress_literals.c b/lib/compress/zstd_compress_literals.c index e97c485ab09..5e09c8435ee 100644 --- a/lib/compress/zstd_compress_literals.c +++ b/lib/compress/zstd_compress_literals.c @@ -92,16 +92,37 @@ size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* return flSize+1; } -size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, - ZSTD_hufCTables_t* nextHuf, - ZSTD_strategy strategy, int disableLiteralCompression, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - void* entropyWorkspace, size_t entropyWorkspaceSize, - const int bmi2, - unsigned suspectUncompressible, HUF_depth_mode depthMode) +/* ZSTD_minLiteralsToCompress() : + * returns minimal amount of literals + * for literal compression to even be attempted. + * Minimum is made tighter as compression strategy increases. + */ +static size_t +ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat) +{ + assert((int)strategy >= 0); + assert((int)strategy <= 9); + /* btultra2 : min 8 bytes; + * then 2x larger for each successive compression strategy + * max threshold 64 bytes */ + { int const shift = MIN(9-strategy, 3); + size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : 8 << shift; + DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc); + return mintc; + } +} + +size_t ZSTD_compressLiterals ( + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + void* entropyWorkspace, size_t entropyWorkspaceSize, + const ZSTD_hufCTables_t* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, + int disableLiteralCompression, + int suspectUncompressible, + int bmi2) { - size_t const minGain = ZSTD_minGain(srcSize, strategy); size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); BYTE* const ostart = (BYTE*)dst; U32 singleStream = srcSize < 256; @@ -119,15 +140,14 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, if (disableLiteralCompression) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - /* small ? don't even attempt compression (speed opt) */ -# define COMPRESS_LITERALS_SIZE_MIN 63 - { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; - if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - } + /* if too small, don't even attempt compression (speed opt) */ + if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode)) + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); { HUF_repeat repeat = prevHuf->repeatMode; int const preferRepeat = (strategy < ZSTD_lazy) ? srcSize <= 1024 : 0; + HUF_depth_mode const depthMode = (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD) ? HUF_depth_optimal : HUF_depth_fast; typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int, int, unsigned, HUF_depth_mode); huf_compress_f huf_compress; if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; @@ -146,10 +166,11 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, } } - if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) { - ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - } + { size_t const minGain = ZSTD_minGain(srcSize, strategy); + if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) { + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } } if (cLitSize==1) { ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); diff --git a/lib/compress/zstd_compress_literals.h b/lib/compress/zstd_compress_literals.h index e5da204c80f..9eb74729d3d 100644 --- a/lib/compress/zstd_compress_literals.h +++ b/lib/compress/zstd_compress_literals.h @@ -18,14 +18,18 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); -/* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */ -size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, - ZSTD_hufCTables_t* nextHuf, - ZSTD_strategy strategy, int disableLiteralCompression, - void* dst, size_t dstCapacity, +/* ZSTD_compressLiterals(): + * @entropyWorkspace: must be aligned on 4-bytes boundaries + * @entropyWorkspaceSize : must be >= HUF_WORKSPACE_SIZE + * @suspectUncompressible: sampling checks, to potentially skip huffman coding + */ +size_t ZSTD_compressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize, void* entropyWorkspace, size_t entropyWorkspaceSize, - const int bmi2, - unsigned suspectUncompressible, HUF_depth_mode depthMode); + const ZSTD_hufCTables_t* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, int disableLiteralCompression, + int suspectUncompressible, + int bmi2); #endif /* ZSTD_COMPRESS_LITERALS_H */ diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 6e08dc8c732..ea927469eef 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -16,7 +16,7 @@ #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ #define ZSTD_MAX_PRICE (1<<30) -#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ +#define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ /*-************************************* @@ -88,20 +88,24 @@ static U32 sum_u32(const unsigned table[], size_t nbElts) return total; } -static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift) +typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e; + +static U32 +ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1) { U32 s, sum=0; DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift); assert(shift < 30); for (s=0; s> shift); + unsigned const base = base1 ? 1 : (table[s]>0); + table[s] = base + (table[s] >> shift); sum += table[s]; } return sum; } /* ZSTD_scaleStats() : - * reduce all elements in table is sum too large + * reduce all elements in table if sum too large * return the resulting sum of elements */ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget) { @@ -110,7 +114,7 @@ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget) DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget); assert(logTarget < 30); if (factor <= 1) return prevsum; - return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor)); + return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed); } /* ZSTD_rescaleFreqs() : @@ -188,13 +192,13 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, optPtr->offCodeSum += optPtr->offCodeFreq[of]; } } - } else { /* not a dictionary */ + } else { /* first block, no dictionary */ assert(optPtr->litFreq != NULL); if (compressedLiterals) { unsigned lit = MaxLit; HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ - optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8); + optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible); } { unsigned const baseLLfreqs[MaxLL+1] = { @@ -224,7 +228,6 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1); } - } } else { /* new block : re-use previous statistics, scaled down */