Skip to content

Commit

Permalink
Bugfixes for the External Matchfinder API (#3433)
Browse files Browse the repository at this point in the history
* external matchfinder bugfixes + tests

* small doc fix
  • Loading branch information
embg authored Jan 19, 2023
1 parent dc1c6cc commit bce0382
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 21 deletions.
35 changes: 23 additions & 12 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -1599,7 +1599,7 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(

size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
size_t const externalSeqSpace = useExternalMatchFinder
? ZSTD_cwksp_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence))
? ZSTD_cwksp_aligned_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence))
: 0;

size_t const neededSpace =
Expand Down Expand Up @@ -3158,7 +3158,6 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
zc->appliedParams.useRowMatchFinder,
dictMode);
assert(zc->externalMatchCtx.mFinder == NULL);
ms->ldmSeqStore = NULL;
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
}
Expand Down Expand Up @@ -5945,6 +5944,13 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
params.maxBlockSize = ZSTD_resolveMaxBlockSize(params.maxBlockSize);

#ifdef ZSTD_MULTITHREAD
/* If external matchfinder is enabled, make sure to fail before checking job size (for consistency) */
RETURN_ERROR_IF(
params.useExternalMatchFinder == 1 && params.nbWorkers >= 1,
parameter_combination_unsupported,
"External matchfinder isn't supported with nbWorkers >= 1"
);

if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
}
Expand Down Expand Up @@ -6774,14 +6780,19 @@ void ZSTD_registerExternalMatchFinder(
ZSTD_CCtx* zc, void* mState,
ZSTD_externalMatchFinder_F* mFinder
) {
ZSTD_externalMatchCtx emctx = {
mState,
mFinder,

/* seqBuffer is allocated later (from the cwskp) */
NULL, /* seqBuffer */
0 /* seqBufferCapacity */
};
zc->externalMatchCtx = emctx;
zc->requestedParams.useExternalMatchFinder = 1;
if (mFinder != NULL) {
ZSTD_externalMatchCtx emctx = {
mState,
mFinder,

/* seqBuffer is allocated later (from the cwskp) */
NULL, /* seqBuffer */
0 /* seqBufferCapacity */
};
zc->externalMatchCtx = emctx;
zc->requestedParams.useExternalMatchFinder = 1;
} else {
ZSTD_memset(&zc->externalMatchCtx, 0, sizeof(zc->externalMatchCtx));
zc->requestedParams.useExternalMatchFinder = 0;
}
}
21 changes: 18 additions & 3 deletions lib/zstd.h
Original file line number Diff line number Diff line change
Expand Up @@ -2840,8 +2840,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* bloc
* externalMatchState.
*
* *** LIMITATIONS ***
* External matchfinders are compatible with all zstd compression APIs. There are
* only two limitations.
* External matchfinders are compatible with all zstd compression APIs which respect
* advanced parameters. However, there are three limitations:
*
* First, the ZSTD_c_enableLongDistanceMatching cParam is not supported.
* COMPRESSION WILL FAIL if it is enabled and the user tries to compress with an
Expand All @@ -2863,7 +2863,11 @@ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* bloc
* APIs, work with the external matchfinder, but the external matchfinder won't
* receive any history from the previous block. Each block is an independent chunk.
*
* Long-term, we plan to overcome both limitations. There is no technical blocker to
* Third, multi-threading within a single compression is not supported. In other words,
* COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external matchfinder is registered.
* Multi-threading across compressions is fine: simply create one CCtx per thread.
*
* Long-term, we plan to overcome all three limitations. There is no technical blocker to
* overcoming them. It is purely a question of engineering effort.
*/

Expand All @@ -2886,6 +2890,17 @@ typedef size_t ZSTD_externalMatchFinder_F (
* compressions. It will remain set until the user explicitly resets compression
* parameters.
*
* External matchfinder registration is considered to be an "advanced parameter",
* part of the "advanced API". This means it will only have an effect on
* compression APIs which respect advanced parameters, such as compress2() and
* compressStream(). Older compression APIs such as compressCCtx(), which predate
* the introduction of "advanced parameters", will ignore any external matchfinder
* setting.
*
* The external matchfinder can be "cleared" by registering a NULL external
* matchfinder function pointer. This removes all limitations described above in
* the "LIMITATIONS" section of the API docs.
*
* The user is strongly encouraged to read the full API documentation (above)
* before calling this function. */
ZSTDLIB_STATIC_API void
Expand Down
53 changes: 47 additions & 6 deletions tests/zstreamtest.c
Original file line number Diff line number Diff line change
Expand Up @@ -1846,15 +1846,11 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)

CHECK(dstBuf == NULL || checkBuf == NULL, "allocation failed");

ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters);
CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));

/* Reference external matchfinder outside the test loop to
* check that the reference is preserved across compressions */
ZSTD_registerExternalMatchFinder(
zc,
&externalMatchState,
zstreamExternalMatchFinder
);
ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);

for (enableFallback = 0; enableFallback < 1; enableFallback++) {
size_t testCaseId;
Expand Down Expand Up @@ -1916,11 +1912,56 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
}

/* Test that reset clears the external matchfinder */
CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));
externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0));
CHECK_Z(ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize));

/* Test that registering mFinder == NULL clears the external matchfinder */
ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters);
ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0));
ZSTD_registerExternalMatchFinder(zc, NULL, NULL); /* clear the external matchfinder */
CHECK_Z(ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize));

/* Test that external matchfinder doesn't interact with older APIs */
ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters);
ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder is used */
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0));
CHECK_Z(ZSTD_compressCCtx(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize, 3));

/* Test that compression returns the correct error with LDM */
CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));
{
size_t res;
ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable));
res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
CHECK(!ZSTD_isError(res), "EMF: Should have raised an error!");
CHECK(
ZSTD_getErrorCode(res) != ZSTD_error_parameter_combination_unsupported,
"EMF: Wrong error code: %s", ZSTD_getErrorName(res)
);
}

#ifdef ZSTD_MULTITHREAD
/* Test that compression returns the correct error with nbWorkers > 0 */
CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));
{
size_t res;
ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_nbWorkers, 1));
res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
CHECK(!ZSTD_isError(res), "EMF: Should have raised an error!");
CHECK(
ZSTD_getErrorCode(res) != ZSTD_error_parameter_combination_unsupported,
"EMF: Wrong error code: %s", ZSTD_getErrorName(res)
);
}
#endif

free(dstBuf);
free(checkBuf);
}
Expand Down

0 comments on commit bce0382

Please sign in to comment.