Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fuzz large offsets through sequence compression api #3447

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -6327,7 +6327,6 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
/* Move to the next sequence */
endPosInSequence -= currSeq.litLength + currSeq.matchLength;
startPosInSequence = 0;
idx++;
} else {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this check is right when the else branch is taken and we split the sequence (we set finalMatchSplit = 1; on line 6352. Since we don't increment idx but still store a sequence.

You could simplify this a bit by moving the idx++ to the bottom of the loop and doing:

if (!finalMatchSplit)
    ++idx;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh thanks, great catch!

/* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence
does not reach the end of the match. So, we have to split the sequence */
Expand Down Expand Up @@ -6382,6 +6381,8 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
"Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength);
ip += matchLength + litLength;
if (!finalMatchSplit)
idx++; /* Next Sequence */
}
DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
Expand Down
126 changes: 75 additions & 51 deletions tests/fuzz/sequence_compression_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,14 @@ static void* literalsBuffer = NULL;
static void* generatedSrc = NULL;
static ZSTD_Sequence* generatedSequences = NULL;

static void* dictBuffer = NULL;
static ZSTD_CDict* cdict = NULL;
static ZSTD_DDict* ddict = NULL;

#define ZSTD_FUZZ_GENERATED_SRC_MAXSIZE (1 << 20) /* Allow up to 1MB generated data */
#define ZSTD_FUZZ_GENERATED_LITERALS_SIZE (1 << 20) /* Fixed size 1MB literals buffer */
#define ZSTD_FUZZ_MATCHLENGTH_MAXSIZE (1 << 18) /* Allow up to 256KB matches */
#define ZSTD_FUZZ_GENERATED_DICT_MAXSIZE (1 << 18) /* Allow up to a 256KB dict */
#define ZSTD_FUZZ_GENERATED_LITERALS_SIZE (1 << 18) /* Fixed size 256KB literals buffer */
#define ZSTD_FUZZ_GENERATED_DICT_MAXSIZE (1 << ZSTD_WINDOWLOG_MAX_32) /* Allow up to 1 << ZSTD_WINDOWLOG_MAX_32 dictionary */
#define ZSTD_FUZZ_MAX_NBSEQ (1 << 17) /* Maximum of 128K sequences */

/* Deterministic random number generator */
Expand All @@ -55,9 +59,9 @@ static uint32_t FUZZ_RDG_rand(uint32_t* src)
/* Make a pseudorandom string - this simple function exists to avoid
* taking a dependency on datagen.h to have RDG_genBuffer().
*/
static char* generatePseudoRandomString(char* str, size_t size) {
static char* generatePseudoRandomString(char* str, size_t size, FUZZ_dataProducer_t* producer) {
const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_";
uint32_t seed = 0;
uint32_t seed = FUZZ_dataProducer_uint32(producer);
if (size) {
for (size_t n = 0; n < size; n++) {
int key = FUZZ_RDG_rand(&seed) % (int) (sizeof charset - 1);
Expand Down Expand Up @@ -100,14 +104,14 @@ static size_t decodeSequences(void* dst, size_t nbSequences,
size_t j = 0;
size_t k = 0;
if (dictSize != 0) {
if (generatedSequences[i].offset > bytesWritten) {
/* Offset goes into the dictionary */
size_t offsetFromEndOfDict = generatedSequences[i].offset - bytesWritten;
for (; k < offsetFromEndOfDict && k < matchLength; ++k) {
op[k] = dictPtr[dictSize - offsetFromEndOfDict + k];
if (generatedSequences[i].offset > bytesWritten) { /* Offset goes into the dictionary */
size_t dictOffset = generatedSequences[i].offset - bytesWritten;
size_t matchInDict = MIN(matchLength, dictOffset);
for (; k < matchInDict; ++k) {
op[k] = dictPtr[dictSize - dictOffset + k];
}
matchLength -= k;
op += k;
matchLength -= matchInDict;
op += matchInDict;
}
}
for (; j < matchLength; ++j) {
Expand Down Expand Up @@ -139,8 +143,8 @@ static size_t generateRandomSequences(FUZZ_dataProducer_t* producer,
size_t windowLog, ZSTD_sequenceFormat_e mode)
{
const uint32_t repCode = 0; /* not used by sequence ingestion api */
const uint32_t windowSize = 1 << windowLog;
const uint32_t blockSizeMax = MIN(128 << 10, 1 << windowLog);
size_t windowSize = 1ULL << windowLog;
size_t blockSizeMax = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
uint32_t matchLengthMax = ZSTD_FUZZ_MATCHLENGTH_MAXSIZE;
uint32_t bytesGenerated = 0;
uint32_t nbSeqGenerated = 0;
Expand Down Expand Up @@ -210,38 +214,31 @@ static size_t generateRandomSequences(FUZZ_dataProducer_t* producer,
}
generatedSequences[nbSeqGenerated++] = seq;
isFirstSequence = 0;
} }
}
}

if (mode == ZSTD_sf_explicitBlockDelimiters) {
/* always end sequences with a block delimiter */
const ZSTD_Sequence endBlock = {0, 0, 0, 0};
assert(nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ);
generatedSequences[nbSeqGenerated++] = endBlock;
}

return nbSeqGenerated;
}

static size_t roundTripTest(void* result, size_t resultCapacity,
void* compressed, size_t compressedCapacity,
const void* src, size_t srcSize,
const void* dict, size_t dictSize,
const ZSTD_Sequence* seqs, size_t seqSize,
int wLog, int cLevel, unsigned hasDict,
unsigned hasDict,
ZSTD_sequenceFormat_e mode)
{
size_t cSize;
size_t dSize;

ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, wLog);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, mode);
if (hasDict) {
FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary(cctx, dict, dictSize));
FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary(dctx, dict, dictSize));
FUZZ_ZASSERT(ZSTD_CCtx_refCDict(cctx, cdict));
FUZZ_ZASSERT(ZSTD_DCtx_refDDict(dctx, ddict));
}

cSize = ZSTD_compressSequences(cctx, compressed, compressedCapacity,
Expand Down Expand Up @@ -272,7 +269,6 @@ int LLVMFuzzerTestOneInput(const uint8_t* src, size_t size)
size_t cBufSize;
size_t generatedSrcSize;
size_t nbSequences;
void* dictBuffer = NULL;
size_t dictSize = 0;
unsigned hasDict;
unsigned wLog;
Expand All @@ -281,32 +277,73 @@ int LLVMFuzzerTestOneInput(const uint8_t* src, size_t size)

FUZZ_dataProducer_t* const producer = FUZZ_dataProducer_create(src, size);
FUZZ_ASSERT(producer);
if (literalsBuffer == NULL) {

if (!cctx) {
cctx = ZSTD_createCCtx();
FUZZ_ASSERT(cctx);
}
if (!dctx) {
dctx = ZSTD_createDCtx();
FUZZ_ASSERT(dctx);
}

/* Generate window log first so we don't generate offsets too large */
wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
cLevel = FUZZ_dataProducer_int32Range(producer, -3, 22);
mode = (ZSTD_sequenceFormat_e)FUZZ_dataProducer_int32Range(producer, 0, 1);

ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, wLog);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, mode);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach);

if (!literalsBuffer) {
literalsBuffer = FUZZ_malloc(ZSTD_FUZZ_GENERATED_LITERALS_SIZE);
FUZZ_ASSERT(literalsBuffer);
literalsBuffer = generatePseudoRandomString(literalsBuffer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE);
literalsBuffer = generatePseudoRandomString(literalsBuffer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, producer);
}

if (!dictBuffer) { /* Generate global dictionary buffer */
ZSTD_compressionParameters cParams;

/* Generate a large dictionary buffer */
dictBuffer = calloc(ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, 1);
FUZZ_ASSERT(dictBuffer);

/* Create global cdict and ddict */
cParams = ZSTD_getCParams(1, ZSTD_FUZZ_GENERATED_SRC_MAXSIZE, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE);
cParams.minMatch = ZSTD_MINMATCH_MIN;
cParams.hashLog = ZSTD_HASHLOG_MIN;
cParams.chainLog = ZSTD_CHAINLOG_MIN;

cdict = ZSTD_createCDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, cParams, ZSTD_defaultCMem);
ddict = ZSTD_createDDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, ZSTD_defaultCMem);
FUZZ_ASSERT(cdict);
FUZZ_ASSERT(ddict);
}

FUZZ_ASSERT(cdict);
FUZZ_ASSERT(ddict);

hasDict = FUZZ_dataProducer_uint32Range(producer, 0, 1);
if (hasDict) {
dictSize = FUZZ_dataProducer_uint32Range(producer, 1, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE);
dictBuffer = FUZZ_malloc(dictSize);
FUZZ_ASSERT(dictBuffer);
dictBuffer = generatePseudoRandomString(dictBuffer, dictSize);
dictSize = ZSTD_FUZZ_GENERATED_DICT_MAXSIZE;
}
/* Generate window log first so we don't generate offsets too large */
wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX_32);
cLevel = FUZZ_dataProducer_int32Range(producer, -3, 22);
mode = (ZSTD_sequenceFormat_e)FUZZ_dataProducer_int32Range(producer, 0, 1);

if (!generatedSequences) {
generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_MAX_NBSEQ);
}
if (!generatedSrc) {
generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE);
}

nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog, mode);
generatedSrcSize = decodeSequences(generatedSrc, nbSequences, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictBuffer, dictSize, mode);

/* Note : in explicit block delimiters mode,
* the fuzzer might generate a lot of small blocks.
* In which case, the final compressed size might be > ZSTD_compressBound().
Expand All @@ -318,30 +355,17 @@ int LLVMFuzzerTestOneInput(const uint8_t* src, size_t size)
rBufSize = generatedSrcSize;
rBuf = FUZZ_malloc(rBufSize);

if (!cctx) {
cctx = ZSTD_createCCtx();
FUZZ_ASSERT(cctx);
}
if (!dctx) {
dctx = ZSTD_createDCtx();
FUZZ_ASSERT(dctx);
}

{ const size_t result = roundTripTest(rBuf, rBufSize,
cBuf, cBufSize,
generatedSrc, generatedSrcSize,
dictBuffer, dictSize,
generatedSequences, nbSequences,
(int)wLog, cLevel, hasDict, mode);
hasDict, mode);
FUZZ_ASSERT(result <= generatedSrcSize); /* can be 0 when no round-trip */
}

free(rBuf);
free(cBuf);
FUZZ_dataProducer_free(producer);
if (hasDict) {
free(dictBuffer);
}
#ifndef STATEFUL_FUZZING
ZSTD_freeCCtx(cctx); cctx = NULL;
ZSTD_freeDCtx(dctx); dctx = NULL;
Expand Down
34 changes: 34 additions & 0 deletions tests/zstreamtest.c
Original file line number Diff line number Diff line change
Expand Up @@ -2139,6 +2139,40 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)

ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);


/* Test with no block delim */
{
size_t srcSize = 4;
void* const src = CNBuffer;
size_t dstSize = ZSTD_compressBound(srcSize);
void* const dst = compressedBuffer;
size_t const kNbSequences = 1;
ZSTD_Sequence* sequences = malloc(sizeof(ZSTD_Sequence) * kNbSequences);
void* const checkBuf = malloc(srcSize);

memset(src, 'x', srcSize);

sequences[0] = (ZSTD_Sequence) {1, 1, 3, 0};

/* Test with sequence validation */
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, 3));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1));

cSize = ZSTD_compressSequences(cctx, dst, dstSize,
sequences, kNbSequences,
src, srcSize);

CHECK(ZSTD_isError(cSize), "Should not throw an error");
CHECK_Z(ZSTD_decompress(checkBuf, srcSize, dst, cSize));
CHECK(memcmp(src, checkBuf, srcSize) != 0, "Corruption!");

free(sequences);
free(checkBuf);
}

ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);

{ /* Test case with two additional sequences */
size_t srcSize = 19;
void* const src = CNBuffer;
Expand Down