From bce0382c828b502e5d9db6e58f43c25abf16ea02 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Thu, 19 Jan 2023 10:41:24 -0500 Subject: [PATCH] Bugfixes for the External Matchfinder API (#3433) * external matchfinder bugfixes + tests * small doc fix --- lib/compress/zstd_compress.c | 35 ++++++++++++++++-------- lib/zstd.h | 21 ++++++++++++-- tests/zstreamtest.c | 53 ++++++++++++++++++++++++++++++++---- 3 files changed, 88 insertions(+), 21 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 0fa2e6e2d3d..3a48e7dcd48 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1599,7 +1599,7 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize); size_t const externalSeqSpace = useExternalMatchFinder - ? ZSTD_cwksp_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence)) + ? ZSTD_cwksp_aligned_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence)) : 0; size_t const neededSpace = @@ -3158,7 +3158,6 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, zc->appliedParams.useRowMatchFinder, dictMode); - assert(zc->externalMatchCtx.mFinder == NULL); ms->ldmSeqStore = NULL; lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); } @@ -5945,6 +5944,13 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, params.maxBlockSize = ZSTD_resolveMaxBlockSize(params.maxBlockSize); #ifdef ZSTD_MULTITHREAD + /* If external matchfinder is enabled, make sure to fail before checking job size (for consistency) */ + RETURN_ERROR_IF( + params.useExternalMatchFinder == 1 && params.nbWorkers >= 1, + parameter_combination_unsupported, + "External matchfinder isn't supported with nbWorkers >= 1" + ); + if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ } @@ -6774,14 +6780,19 @@ void ZSTD_registerExternalMatchFinder( ZSTD_CCtx* zc, void* mState, ZSTD_externalMatchFinder_F* mFinder ) { - ZSTD_externalMatchCtx emctx = { - mState, - mFinder, - - /* seqBuffer is allocated later (from the cwskp) */ - NULL, /* seqBuffer */ - 0 /* seqBufferCapacity */ - }; - zc->externalMatchCtx = emctx; - zc->requestedParams.useExternalMatchFinder = 1; + if (mFinder != NULL) { + ZSTD_externalMatchCtx emctx = { + mState, + mFinder, + + /* seqBuffer is allocated later (from the cwskp) */ + NULL, /* seqBuffer */ + 0 /* seqBufferCapacity */ + }; + zc->externalMatchCtx = emctx; + zc->requestedParams.useExternalMatchFinder = 1; + } else { + ZSTD_memset(&zc->externalMatchCtx, 0, sizeof(zc->externalMatchCtx)); + zc->requestedParams.useExternalMatchFinder = 0; + } } diff --git a/lib/zstd.h b/lib/zstd.h index 38dfb3a5d96..a2e02345248 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -2840,8 +2840,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* bloc * externalMatchState. * * *** LIMITATIONS *** - * External matchfinders are compatible with all zstd compression APIs. There are - * only two limitations. + * External matchfinders are compatible with all zstd compression APIs which respect + * advanced parameters. However, there are three limitations: * * First, the ZSTD_c_enableLongDistanceMatching cParam is not supported. * COMPRESSION WILL FAIL if it is enabled and the user tries to compress with an @@ -2863,7 +2863,11 @@ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* bloc * APIs, work with the external matchfinder, but the external matchfinder won't * receive any history from the previous block. Each block is an independent chunk. * - * Long-term, we plan to overcome both limitations. There is no technical blocker to + * Third, multi-threading within a single compression is not supported. In other words, + * COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external matchfinder is registered. + * Multi-threading across compressions is fine: simply create one CCtx per thread. + * + * Long-term, we plan to overcome all three limitations. There is no technical blocker to * overcoming them. It is purely a question of engineering effort. */ @@ -2886,6 +2890,17 @@ typedef size_t ZSTD_externalMatchFinder_F ( * compressions. It will remain set until the user explicitly resets compression * parameters. * + * External matchfinder registration is considered to be an "advanced parameter", + * part of the "advanced API". This means it will only have an effect on + * compression APIs which respect advanced parameters, such as compress2() and + * compressStream(). Older compression APIs such as compressCCtx(), which predate + * the introduction of "advanced parameters", will ignore any external matchfinder + * setting. + * + * The external matchfinder can be "cleared" by registering a NULL external + * matchfinder function pointer. This removes all limitations described above in + * the "LIMITATIONS" section of the API docs. + * * The user is strongly encouraged to read the full API documentation (above) * before calling this function. */ ZSTDLIB_STATIC_API void diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 2febfff91b4..4a621692dcd 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -1846,15 +1846,11 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests) CHECK(dstBuf == NULL || checkBuf == NULL, "allocation failed"); - ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters); + CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters)); /* Reference external matchfinder outside the test loop to * check that the reference is preserved across compressions */ - ZSTD_registerExternalMatchFinder( - zc, - &externalMatchState, - zstreamExternalMatchFinder - ); + ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder); for (enableFallback = 0; enableFallback < 1; enableFallback++) { size_t testCaseId; @@ -1916,11 +1912,56 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests) } /* Test that reset clears the external matchfinder */ + CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters)); + externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */ + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0)); + CHECK_Z(ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize)); + + /* Test that registering mFinder == NULL clears the external matchfinder */ ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters); + ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder); externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */ CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0)); + ZSTD_registerExternalMatchFinder(zc, NULL, NULL); /* clear the external matchfinder */ CHECK_Z(ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize)); + /* Test that external matchfinder doesn't interact with older APIs */ + ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters); + ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder); + externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder is used */ + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0)); + CHECK_Z(ZSTD_compressCCtx(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize, 3)); + + /* Test that compression returns the correct error with LDM */ + CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters)); + { + size_t res; + ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder); + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable)); + res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize); + CHECK(!ZSTD_isError(res), "EMF: Should have raised an error!"); + CHECK( + ZSTD_getErrorCode(res) != ZSTD_error_parameter_combination_unsupported, + "EMF: Wrong error code: %s", ZSTD_getErrorName(res) + ); + } + +#ifdef ZSTD_MULTITHREAD + /* Test that compression returns the correct error with nbWorkers > 0 */ + CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters)); + { + size_t res; + ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder); + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_nbWorkers, 1)); + res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize); + CHECK(!ZSTD_isError(res), "EMF: Should have raised an error!"); + CHECK( + ZSTD_getErrorCode(res) != ZSTD_error_parameter_combination_unsupported, + "EMF: Wrong error code: %s", ZSTD_getErrorName(res) + ); + } +#endif + free(dstBuf); free(checkBuf); }