Skip to content

Commit

Permalink
refactored fuzzer tests for sequence compression api
Browse files Browse the repository at this point in the history
add explicit delimiter mode to libfuzzer test
  • Loading branch information
Cyan4973 committed Jan 25, 2022
1 parent 87dcd33 commit 87fb8a5
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 76 deletions.
6 changes: 3 additions & 3 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -5997,10 +5997,10 @@ static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)
return sequenceCopier;
}

/* Discover the size of next by searching for the block delimiter.
* Note that a block delimiter must exist in this mode,
/* Discover the size of next block by searching for the delimiter.
* Note that a block delimiter **must** exist in this mode,
* otherwise it's an input error.
* The value retrieved will be later compared to ensure it remains within bounds */
* The block size retrieved will be later compared to ensure it remains within bounds */
static size_t
blockSize_explicitDelimiter(const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_sequencePosition seqPos)
{
Expand Down
109 changes: 70 additions & 39 deletions tests/fuzz/sequence_compression_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
#include "zstd_helpers.h"
#include "fuzz_data_producer.h"

static ZSTD_CCtx *cctx = NULL;
static ZSTD_DCtx *dctx = NULL;
static ZSTD_CCtx* cctx = NULL;
static ZSTD_DCtx* dctx = NULL;
static void* literalsBuffer = NULL;
static void* generatedSrc = NULL;
static ZSTD_Sequence* generatedSequences = NULL;
Expand Down Expand Up @@ -55,7 +55,7 @@ static uint32_t FUZZ_RDG_rand(uint32_t* src)
/* Make a pseudorandom string - this simple function exists to avoid
* taking a dependency on datagen.h to have RDG_genBuffer().
*/
static char *generatePseudoRandomString(char *str, size_t size) {
static char* generatePseudoRandomString(char* str, size_t size) {
const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_";
uint32_t seed = 0;
if (size) {
Expand All @@ -69,7 +69,9 @@ static char *generatePseudoRandomString(char *str, size_t size) {

/* Returns size of source buffer */
static size_t decodeSequences(void* dst, size_t nbSequences,
size_t literalsSize, const void* dict, size_t dictSize) {
size_t literalsSize,
const void* dict, size_t dictSize)
{
const uint8_t* litPtr = literalsBuffer;
const uint8_t* const litBegin = literalsBuffer;
const uint8_t* const litEnd = litBegin + literalsSize;
Expand All @@ -87,7 +89,7 @@ static size_t decodeSequences(void* dst, size_t nbSequences,
if (litPtr + generatedSequences[i].litLength > litEnd) {
litPtr = litBegin;
}
ZSTD_memcpy(op, litPtr, generatedSequences[i].litLength);
memcpy(op, litPtr, generatedSequences[i].litLength);
bytesWritten += generatedSequences[i].litLength;
op += generatedSequences[i].litLength;
litPtr += generatedSequences[i].litLength;
Expand All @@ -109,7 +111,7 @@ static size_t decodeSequences(void* dst, size_t nbSequences,
}
}
for (; j < matchLength; ++j) {
op[j] = op[j-(int)generatedSequences[i].offset];
op[j] = op[j - generatedSequences[i].offset];
}
op += j;
FUZZ_ASSERT(generatedSequences[i].matchLength == j + k);
Expand All @@ -120,74 +122,98 @@ static size_t decodeSequences(void* dst, size_t nbSequences,
FUZZ_ASSERT(litPtr <= litEnd);
lastLLSize = (uint32_t)(litEnd - litPtr);
if (lastLLSize <= oend - op) {
ZSTD_memcpy(op, litPtr, lastLLSize);
memcpy(op, litPtr, lastLLSize);
generatedSrcBufferSize += lastLLSize;
}
return generatedSrcBufferSize;
}

/* Returns nb sequences generated
* TODO: Add repcode fuzzing once we support repcode match splits
* TODO: support generation for ZSTD_sf_explicitBlockDelimiters mode
*/
static size_t generateRandomSequences(FUZZ_dataProducer_t* producer,
size_t literalsSizeLimit, size_t dictSize,
size_t windowLog) {
size_t windowLog, ZSTD_sequenceFormat_e mode)
{
const uint32_t repCode = 0; /* not used by sequence ingestion api */
const uint32_t windowSize = 1 << windowLog;
uint32_t bytesGenerated = 0;
uint32_t nbSeqGenerated = 0;
uint32_t litLength;
uint32_t matchLength;
uint32_t matchBound;
uint32_t offset;
uint32_t offsetBound;
uint32_t repCode = 0;
uint32_t isFirstSequence = 1;
uint32_t windowSize = 1 << windowLog;
uint32_t blockSize = 0;
const uint32_t blockSizeMax = MIN(128 << 10, 1 << windowLog);

while (nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ
while ( nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ
&& bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE
&& !FUZZ_dataProducer_empty(producer)) {
matchBound = ZSTD_FUZZ_MATCHLENGTH_MAXSIZE;
litLength = isFirstSequence && dictSize == 0 ? FUZZ_dataProducer_uint32Range(producer, 1, literalsSizeLimit)
: FUZZ_dataProducer_uint32Range(producer, 0, literalsSizeLimit);
uint32_t matchLength;
uint32_t matchBound = ZSTD_FUZZ_MATCHLENGTH_MAXSIZE;
uint32_t offset;
uint32_t offsetBound;
const uint32_t minLitLength = (isFirstSequence && (dictSize == 0));
const uint32_t litLength = FUZZ_dataProducer_uint32Range(producer, minLitLength, (uint32_t)literalsSizeLimit);
bytesGenerated += litLength;
if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) {
break;
}
offsetBound = bytesGenerated > windowSize ? windowSize : bytesGenerated + dictSize;
offsetBound = (bytesGenerated > windowSize) ? windowSize : bytesGenerated + (uint32_t)dictSize;
offset = FUZZ_dataProducer_uint32Range(producer, 1, offsetBound);
if (dictSize > 0 && bytesGenerated <= windowSize) {
/* Prevent match length from being such that it would be associated with an offset too large
* from the decoder's perspective. If not possible (match would be too small),
* then reduce the offset if necessary.
*/
size_t bytesToReachWindowSize = windowSize - bytesGenerated;
const size_t bytesToReachWindowSize = windowSize - bytesGenerated;
if (bytesToReachWindowSize < ZSTD_MINMATCH_MIN) {
uint32_t newOffsetBound = offsetBound > windowSize ? windowSize : offsetBound;
const uint32_t newOffsetBound = offsetBound > windowSize ? windowSize : offsetBound;
offset = FUZZ_dataProducer_uint32Range(producer, 1, newOffsetBound);
} else {
matchBound = bytesToReachWindowSize > ZSTD_FUZZ_MATCHLENGTH_MAXSIZE ?
ZSTD_FUZZ_MATCHLENGTH_MAXSIZE : bytesToReachWindowSize;
matchBound = MIN(ZSTD_FUZZ_MATCHLENGTH_MAXSIZE, (uint32_t)bytesToReachWindowSize);
}
}
matchLength = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, matchBound);
bytesGenerated += matchLength;
if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) {
break;
}
ZSTD_Sequence seq = {offset, litLength, matchLength, repCode};
generatedSequences[nbSeqGenerated++] = seq;
isFirstSequence = 0;
}
{ ZSTD_Sequence seq = {offset, litLength, matchLength, repCode};
const uint32_t lastLits = FUZZ_dataProducer_uint32Range(producer, 0, litLength+1);
#define SPLITPROB 6000
#define SPLITMARK 5234
const int split = (FUZZ_dataProducer_uint32Range(producer, 0, SPLITPROB) == SPLITMARK);
if (mode == ZSTD_sf_explicitBlockDelimiters) {
const size_t seqSize = seq.litLength + seq.matchLength;
if (blockSize + seqSize > blockSizeMax) { /* reaching limit : must end block now */
const ZSTD_Sequence endBlock = {0, 0, 0, 0};
generatedSequences[nbSeqGenerated++] = endBlock;
blockSize = seqSize;
}
if (split) {
const ZSTD_Sequence endBlock = {lastLits, 0, 0, 0};
generatedSequences[nbSeqGenerated++] = endBlock;
assert(lastLits <= seq.litLength);
seq.litLength -= lastLits;
blockSize = seqSize - lastLits;
} else {
blockSize += seqSize;
}
}
generatedSequences[nbSeqGenerated++] = seq;
isFirstSequence = 0;
} }

return nbSeqGenerated;
}

/* TODO: fuzz ZSTD_sf_explicitBlockDelimiters mode
*/
static size_t roundTripTest(void *result, size_t resultCapacity,
void *compressed, size_t compressedCapacity,
size_t srcSize,
const void *dict, size_t dictSize,
size_t generatedSequencesSize,
size_t wLog, unsigned cLevel, unsigned hasDict)
int wLog, int cLevel, unsigned hasDict,
ZSTD_sequenceFormat_e mode)
{
size_t cSize;
size_t dSize;
Expand All @@ -200,8 +226,7 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, wLog);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1);
/* TODO: Add block delim mode fuzzing */
ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, mode);
if (hasDict) {
FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary(cctx, dict, dictSize));
FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary(dctx, dict, dictSize));
Expand Down Expand Up @@ -231,35 +256,40 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
size_t cBufSize;
size_t generatedSrcSize;
size_t nbSequences;
void* dictBuffer;
void* dictBuffer = NULL;
size_t dictSize = 0;
unsigned hasDict;
unsigned wLog;
int cLevel;
ZSTD_sequenceFormat_e mode;

FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
FUZZ_dataProducer_t* const producer = FUZZ_dataProducer_create(src, size);
FUZZ_ASSERT(producer);
if (literalsBuffer == NULL) {
literalsBuffer = FUZZ_malloc(ZSTD_FUZZ_GENERATED_LITERALS_SIZE);
FUZZ_ASSERT(literalsBuffer);
literalsBuffer = generatePseudoRandomString(literalsBuffer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE);
}

hasDict = FUZZ_dataProducer_uint32Range(producer, 0, 1);
if (hasDict) {
dictSize = FUZZ_dataProducer_uint32Range(producer, 1, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE);
dictBuffer = FUZZ_malloc(dictSize);
FUZZ_ASSERT(dictBuffer);
dictBuffer = generatePseudoRandomString(dictBuffer, dictSize);
}
/* Generate window log first so we dont generate offsets too large */
wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX_32);
cLevel = FUZZ_dataProducer_int32Range(producer, -3, 22);
mode = (ZSTD_sequenceFormat_e)FUZZ_dataProducer_int32Range(producer, 0, 1);

if (!generatedSequences) {
generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_MAX_NBSEQ);
}
if (!generatedSrc) {
generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE);
}
nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog);
nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog, mode);
generatedSrcSize = decodeSequences(generatedSrc, nbSequences, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictBuffer, dictSize);
cBufSize = ZSTD_compressBound(generatedSrcSize);
cBuf = FUZZ_malloc(cBufSize);
Expand All @@ -276,14 +306,15 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
FUZZ_ASSERT(dctx);
}

size_t const result = roundTripTest(rBuf, rBufSize,
{ const size_t result = roundTripTest(rBuf, rBufSize,
cBuf, cBufSize,
generatedSrcSize,
dictBuffer, dictSize,
nbSequences,
wLog, cLevel, hasDict);
FUZZ_ZASSERT(result);
FUZZ_ASSERT_MSG(result == generatedSrcSize, "Incorrect regenerated size");
(int)wLog, cLevel, hasDict, mode);
FUZZ_ZASSERT(result);
FUZZ_ASSERT_MSG(result == generatedSrcSize, "Incorrect regenerated size");
}
FUZZ_ASSERT_MSG(!FUZZ_memcmp(generatedSrc, rBuf, generatedSrcSize), "Corruption!");

free(rBuf);
Expand Down
68 changes: 34 additions & 34 deletions tests/fuzzer.c
Original file line number Diff line number Diff line change
Expand Up @@ -3114,55 +3114,55 @@ static int basicUnitTests(U32 const seed, double compressibility)

DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences followed by ZSTD_compressSequences : ", testNb++);
{
size_t srcSize = 500 KB;
BYTE* src = (BYTE*)CNBuffer;
BYTE* dst = (BYTE*)compressedBuffer;
size_t dstSize = ZSTD_compressBound(srcSize);
size_t decompressSize = srcSize;
char* decompressBuffer = (char*)malloc(decompressSize);
const size_t srcSize = 500 KB;
const BYTE* const src = (BYTE*)CNBuffer;
BYTE* const dst = (BYTE*)compressedBuffer;
const size_t dstCapacity = ZSTD_compressBound(srcSize);
const size_t decompressSize = srcSize;
char* const decompressBuffer = (char*)malloc(decompressSize);
size_t compressedSize;
size_t dSize;

ZSTD_CCtx* cctx = ZSTD_createCCtx();
ZSTD_Sequence* seqs = (ZSTD_Sequence*)malloc(srcSize * sizeof(ZSTD_Sequence));
size_t seqsSize;
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
ZSTD_Sequence* const seqs = (ZSTD_Sequence*)malloc(srcSize * sizeof(ZSTD_Sequence));
size_t nbSeqs;

if (seqs == NULL) goto _output_error;
assert(cctx != NULL);

/* Populate src with random data */
RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed);

/* Test with block delimiters roundtrip */
seqsSize = ZSTD_generateSequences(cctx, seqs, srcSize, src, srcSize);
/* Roundtrip Test with block delimiters generated by ZSTD_generateSequences() */
nbSeqs = ZSTD_generateSequences(cctx, seqs, srcSize, src, srcSize);
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_explicitBlockDelimiters);
compressedSize = ZSTD_compressSequences(cctx, dst, dstSize, seqs, seqsSize, src, srcSize);
compressedSize = ZSTD_compressSequences(cctx, dst, dstCapacity, seqs, nbSeqs, src, srcSize);
if (ZSTD_isError(compressedSize)) {
DISPLAY("Error in sequence compression with block delims\n");
goto _output_error;
}
dSize = ZSTD_decompress(decompressBuffer, decompressSize, dst, compressedSize);
if (ZSTD_isError(dSize)) {
DISPLAY("Error in sequence compression roundtrip with block delims\n");
goto _output_error;
}
{ size_t const dSize = ZSTD_decompress(decompressBuffer, decompressSize, dst, compressedSize);
if (ZSTD_isError(dSize)) {
DISPLAY("Error in sequence compression roundtrip with block delims\n");
goto _output_error;
} }
assert(!memcmp(decompressBuffer, src, srcSize));

/* Test with no block delimiters roundtrip */
seqsSize = ZSTD_mergeBlockDelimiters(seqs, seqsSize);
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters);
compressedSize = ZSTD_compressSequences(cctx, dst, dstSize, seqs, seqsSize, src, srcSize);
/* Roundtrip Test with no block delimiters */
{ size_t const nbSeqsAfterMerge = ZSTD_mergeBlockDelimiters(seqs, nbSeqs);
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters);
compressedSize = ZSTD_compressSequences(cctx, dst, dstCapacity, seqs, nbSeqsAfterMerge, src, srcSize);
}
if (ZSTD_isError(compressedSize)) {
DISPLAY("Error in sequence compression with no block delims\n");
goto _output_error;
}
dSize = ZSTD_decompress(decompressBuffer, decompressSize, dst, compressedSize);
if (ZSTD_isError(dSize)) {
DISPLAY("Error in sequence compression roundtrip with no block delims\n");
goto _output_error;
}
{ size_t const dSize = ZSTD_decompress(decompressBuffer, decompressSize, dst, compressedSize);
if (ZSTD_isError(dSize)) {
DISPLAY("Error in sequence compression roundtrip with no block delims\n");
goto _output_error;
} }
assert(!memcmp(decompressBuffer, src, srcSize));

ZSTD_freeCCtx(cctx);
Expand Down Expand Up @@ -3968,9 +3968,9 @@ static int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, U32 const
DISPLAYLEVEL(5, "fuzzer t%u: Bufferless streaming compression test \n", testNb);
{ U32 const testLog = FUZ_rand(&lseed) % maxSrcLog;
U32 const dictLog = FUZ_rand(&lseed) % maxSrcLog;
int const cLevel = (FUZ_rand(&lseed) %
(ZSTD_maxCLevel() -
(MAX(testLog, dictLog) / cLevelLimiter))) +
int const cLevel = (int)(FUZ_rand(&lseed) %
((U32)ZSTD_maxCLevel() -
(MAX(testLog, dictLog) / (U32)cLevelLimiter))) +
1;
maxTestSize = FUZ_rLogLength(&lseed, testLog);
if (maxTestSize >= dstBufferSize) maxTestSize = dstBufferSize-1;
Expand Down Expand Up @@ -4066,7 +4066,7 @@ static int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, U32 const
free(cBuffer);
free(dstBuffer);
free(mirrorBuffer);
return result;
return (int)result;

_output_error:
result = 1;
Expand Down Expand Up @@ -4103,7 +4103,7 @@ static unsigned readU32FromChar(const char** stringPtr)
{
unsigned result = 0;
while ((**stringPtr >='0') && (**stringPtr <='9'))
result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
result *= 10, result += (unsigned)(**stringPtr - '0'), (*stringPtr)++ ;
if ((**stringPtr=='K') || (**stringPtr=='M')) {
result <<= 10;
if (**stringPtr=='M') result <<= 10;
Expand Down Expand Up @@ -4245,7 +4245,7 @@ int main(int argc, const char** argv)
}
}
if (!result)
result = fuzzerTests(seed, nbTests, testNb, maxDuration, ((double)proba) / 100, bigTests);
result = fuzzerTests(seed, (unsigned)nbTests, (unsigned)testNb, maxDuration, ((double)proba) / 100, bigTests);
if (mainPause) {
int unused;
DISPLAY("Press Enter \n");
Expand Down

0 comments on commit 87fb8a5

Please sign in to comment.