Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provide an interface for fuzzing sequence producer plugins #3551

Merged
merged 1 commit into from
Mar 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 13 additions & 11 deletions tests/fuzz/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ ZSTDDIR = ../../lib
PRGDIR = ../../programs
CONTRIBDIR = ../../contrib

# TODO(embg) make it possible to plug in an arbitrary matchfinder as a .o file
MATCHFINDER_DIR = $(CONTRIBDIR)/externalSequenceProducer
MATCHFINDER_SRC = $(MATCHFINDER_DIR)/sequence_producer.c
DEFAULT_SEQ_PROD_DIR = $(CONTRIBDIR)/externalSequenceProducer
DEFAULT_SEQ_PROD_SRC = $(DEFAULT_SEQ_PROD_DIR)/sequence_producer.c
THIRD_PARTY_SEQ_PROD_OBJ ?=

FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
-I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(ZSTDDIR)/legacy \
-I$(CONTRIBDIR)/seekable_format -I$(PRGDIR) -I$(MATCHFINDER_DIR) \
-I$(CONTRIBDIR)/seekable_format -I$(PRGDIR) -I$(DEFAULT_SEQ_PROD_DIR) \
-DZSTD_MULTITHREAD -DZSTD_LEGACY_SUPPORT=1 $(CPPFLAGS)
FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
Expand Down Expand Up @@ -75,7 +75,7 @@ FUZZ_SRC := \
$(ZSTDCOMP_SRC) \
$(ZSTDDICT_SRC) \
$(ZSTDLEGACY_SRC) \
$(MATCHFINDER_SRC)
$(DEFAULT_SEQ_PROD_SRC)
FUZZ_SRC := $(sort $(wildcard $(FUZZ_SRC)))

FUZZ_D_OBJ1 := $(subst $(ZSTDDIR)/common/,d_lib_common_,$(FUZZ_SRC))
Expand All @@ -84,21 +84,23 @@ FUZZ_D_OBJ3 := $(subst $(ZSTDDIR)/decompress/,d_lib_decompress_,$(FUZZ_D_OBJ2))
FUZZ_D_OBJ4 := $(subst $(ZSTDDIR)/dictBuilder/,d_lib_dictBuilder_,$(FUZZ_D_OBJ3))
FUZZ_D_OBJ5 := $(subst $(ZSTDDIR)/legacy/,d_lib_legacy_,$(FUZZ_D_OBJ4))
FUZZ_D_OBJ6 := $(subst $(PRGDIR)/,d_prg_,$(FUZZ_D_OBJ5))
FUZZ_D_OBJ7 := $(subst $(MATCHFINDER_DIR)/,d_matchfinder_,$(FUZZ_D_OBJ6))
FUZZ_D_OBJ7 := $(subst $(DEFAULT_SEQ_PROD_DIR)/,d_default_seq_prod_,$(FUZZ_D_OBJ6))
FUZZ_D_OBJ8 := $(subst $\./,d_fuzz_,$(FUZZ_D_OBJ7))
FUZZ_D_OBJ9 := $(FUZZ_D_OBJ8:.c=.o)
FUZZ_DECOMPRESS_OBJ := $(FUZZ_D_OBJ9:.S=.o)
FUZZ_D_OBJ10 := $(THIRD_PARTY_SEQ_PROD_OBJ) $(FUZZ_D_OBJ9)
FUZZ_DECOMPRESS_OBJ := $(FUZZ_D_OBJ10:.S=.o)

FUZZ_RT_OBJ1 := $(subst $(ZSTDDIR)/common/,rt_lib_common_,$(FUZZ_SRC))
FUZZ_RT_OBJ2 := $(subst $(ZSTDDIR)/compress/,rt_lib_compress_,$(FUZZ_RT_OBJ1))
FUZZ_RT_OBJ3 := $(subst $(ZSTDDIR)/decompress/,rt_lib_decompress_,$(FUZZ_RT_OBJ2))
FUZZ_RT_OBJ4 := $(subst $(ZSTDDIR)/dictBuilder/,rt_lib_dictBuilder_,$(FUZZ_RT_OBJ3))
FUZZ_RT_OBJ5 := $(subst $(ZSTDDIR)/legacy/,rt_lib_legacy_,$(FUZZ_RT_OBJ4))
FUZZ_RT_OBJ6 := $(subst $(PRGDIR)/,rt_prg_,$(FUZZ_RT_OBJ5))
FUZZ_RT_OBJ7 := $(subst $(MATCHFINDER_DIR)/,rt_matchfinder_,$(FUZZ_RT_OBJ6))
FUZZ_RT_OBJ7 := $(subst $(DEFAULT_SEQ_PROD_DIR)/,rt_default_seq_prod_,$(FUZZ_RT_OBJ6))
FUZZ_RT_OBJ8 := $(subst $\./,rt_fuzz_,$(FUZZ_RT_OBJ7))
FUZZ_RT_OBJ9 := $(FUZZ_RT_OBJ8:.c=.o)
FUZZ_ROUND_TRIP_OBJ := $(FUZZ_RT_OBJ9:.S=.o)
FUZZ_RT_OBJ10 := $(THIRD_PARTY_SEQ_PROD_OBJ) $(FUZZ_RT_OBJ9)
FUZZ_ROUND_TRIP_OBJ := $(FUZZ_RT_OBJ10:.S=.o)

.PHONY: default all clean cleanall

Expand Down Expand Up @@ -151,7 +153,7 @@ rt_prg_%.o: $(PRGDIR)/%.c
rt_fuzz_%.o: %.c
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@

rt_matchfinder_%.o: $(MATCHFINDER_DIR)/%.c
rt_default_seq_prod_%.o: $(DEFAULT_SEQ_PROD_DIR)/%.c
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@

d_lib_common_%.o: $(ZSTDDIR)/common/%.c
Expand All @@ -178,7 +180,7 @@ d_prg_%.o: $(PRGDIR)/%.c
d_fuzz_%.o: %.c
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@

d_matchfinder_%.o: $(MATCHFINDER_DIR)/%.c
d_default_seq_prod_%.o: $(DEFAULT_SEQ_PROD_DIR)/%.c
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@

simple_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_simple_round_trip.o
Expand Down
6 changes: 5 additions & 1 deletion tests/fuzz/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ Alternatively, you can fuzz all targets in parallel, using one core per target:
```
python3 ./fuzz.py list | xargs -P$(python3 ./fuzz.py list | wc -l) -I__ sh -c "python3 ./fuzz.py libfuzzer __ 2>&1 | tee __.log"
```
Either way, to double-check that no crashes were found, run `ls corpora/*crash`.
Either way, to double-check that no crashes were found, run `ls corpora/*crash`.
If any crashes were found, you can use the hashes to reproduce them.

## LibFuzzer
Expand Down Expand Up @@ -113,3 +113,7 @@ CC=clang CXX=clang++ ./fuzz.py build all --enable-asan --enable-ubsan
CC=clang CXX=clang++ ./fuzz.py build all --enable-msan
./fuzz.py regression all
```

## Fuzzing a custom sequence producer plugin
Sequence producer plugin authors can use the zstd fuzzers to stress-test their code.
See the documentation in `fuzz_third_party_seq_prod.h` for details.
4 changes: 4 additions & 0 deletions tests/fuzz/block_round_trip.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "zstd.h"
#include "zstd_helpers.h"
#include "fuzz_data_producer.h"
#include "fuzz_third_party_seq_prod.h"

static ZSTD_CCtx *cctx = NULL;
static ZSTD_DCtx *dctx = NULL;
Expand Down Expand Up @@ -54,6 +55,8 @@ static size_t roundTripTest(void *result, size_t resultCapacity,

int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
FUZZ_SEQ_PROD_SETUP();

/* Give a random portion of src data to the producer, to use for
parameter generation. The rest will be used for (de)compression */
FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
Expand Down Expand Up @@ -95,5 +98,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
ZSTD_freeCCtx(cctx); cctx = NULL;
ZSTD_freeDCtx(dctx); dctx = NULL;
#endif
FUZZ_SEQ_PROD_TEARDOWN();
return 0;
}
4 changes: 4 additions & 0 deletions tests/fuzz/decompress_dstSize_tooSmall.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,15 @@
#include "zstd_errors.h"
#include "zstd_helpers.h"
#include "fuzz_data_producer.h"
#include "fuzz_third_party_seq_prod.h"

static ZSTD_CCtx *cctx = NULL;
static ZSTD_DCtx *dctx = NULL;

int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
FUZZ_SEQ_PROD_SETUP();

/* Give a random portion of src data to the producer, to use for
parameter generation. The rest will be used for (de)compression */
FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
Expand Down Expand Up @@ -66,5 +69,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
ZSTD_freeCCtx(cctx); cctx = NULL;
ZSTD_freeDCtx(dctx); dctx = NULL;
#endif
FUZZ_SEQ_PROD_TEARDOWN();
return 0;
}
4 changes: 4 additions & 0 deletions tests/fuzz/dictionary_decompress.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@
#include "fuzz_helpers.h"
#include "zstd_helpers.h"
#include "fuzz_data_producer.h"
#include "fuzz_third_party_seq_prod.h"

static ZSTD_DCtx *dctx = NULL;

int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
FUZZ_SEQ_PROD_SETUP();

/* Give a random portion of src data to the producer, to use for
parameter generation. The rest will be used for (de)compression */
FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
Expand Down Expand Up @@ -69,5 +72,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
#ifndef STATEFUL_FUZZING
ZSTD_freeDCtx(dctx); dctx = NULL;
#endif
FUZZ_SEQ_PROD_TEARDOWN();
return 0;
}
5 changes: 4 additions & 1 deletion tests/fuzz/dictionary_loader.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "fuzz_helpers.h"
#include "zstd_helpers.h"
#include "fuzz_data_producer.h"
#include "fuzz_third_party_seq_prod.h"

/**
* Compresses the data and returns the compressed size or an error.
Expand All @@ -35,7 +36,7 @@ static size_t compress(void* compressed, size_t compressedCapacity,
if (refPrefix)
FUZZ_ZASSERT(ZSTD_CCtx_refPrefix_advanced(
cctx, dict, dictSize, dictContentType));
else
else
FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced(
cctx, dict, dictSize, dictLoadMethod, dictContentType));
size_t const compressedSize = ZSTD_compress2(
Expand Down Expand Up @@ -67,6 +68,7 @@ static size_t decompress(void* result, size_t resultCapacity,

int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
FUZZ_SEQ_PROD_SETUP();
FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
int const refPrefix = FUZZ_dataProducer_uint32Range(producer, 0, 1) != 0;
ZSTD_dictLoadMethod_e const dlm =
Expand Down Expand Up @@ -99,5 +101,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
free(cBuf);
free(rBuf);
FUZZ_dataProducer_free(producer);
FUZZ_SEQ_PROD_TEARDOWN();
return 0;
}
4 changes: 4 additions & 0 deletions tests/fuzz/dictionary_round_trip.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "fuzz_helpers.h"
#include "zstd_helpers.h"
#include "fuzz_data_producer.h"
#include "fuzz_third_party_seq_prod.h"

static ZSTD_CCtx *cctx = NULL;
static ZSTD_DCtx *dctx = NULL;
Expand Down Expand Up @@ -108,6 +109,8 @@ static size_t roundTripTest(void *result, size_t resultCapacity,

int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
FUZZ_SEQ_PROD_SETUP();

/* Give a random portion of src data to the producer, to use for
parameter generation. The rest will be used for (de)compression */
FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
Expand Down Expand Up @@ -147,5 +150,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
ZSTD_freeCCtx(cctx); cctx = NULL;
ZSTD_freeDCtx(dctx); dctx = NULL;
#endif
FUZZ_SEQ_PROD_TEARDOWN();
return 0;
}
3 changes: 3 additions & 0 deletions tests/fuzz/dictionary_stream_round_trip.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "fuzz_helpers.h"
#include "zstd_helpers.h"
#include "fuzz_data_producer.h"
#include "fuzz_third_party_seq_prod.h"

ZSTD_CCtx *cctx = NULL;
static ZSTD_DCtx *dctx = NULL;
Expand Down Expand Up @@ -147,6 +148,7 @@ static size_t compress(uint8_t *dst, size_t capacity,

int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
FUZZ_SEQ_PROD_SETUP();
size_t neededBufSize;

/* Give a random portion of src data to the producer, to use for
Expand Down Expand Up @@ -202,5 +204,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
ZSTD_freeCCtx(cctx); cctx = NULL;
ZSTD_freeDCtx(dctx); dctx = NULL;
#endif
FUZZ_SEQ_PROD_TEARDOWN();
return 0;
}
5 changes: 5 additions & 0 deletions tests/fuzz/fuzz.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
* This is the canonical flag to enable deterministic builds for fuzzing.
* Changes to zstd for fuzzing are gated behind this define.
* It is recommended to define this when building zstd for fuzzing.
* @param FUZZ_THIRD_PARTY_SEQ_PROD
* This flag allows sequence producer plugin authors to replace the built-in
* default sequence producer with their own code. If you are not a plugin
* author, you should not define this flag. See the docs at
* fuzz_third_party_seq_prod.h for more information.
*/

#ifndef FUZZ_H
Expand Down
11 changes: 11 additions & 0 deletions tests/fuzz/fuzz.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def __init__(self, input_type, frame_type=FrameType.ZSTD):
CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS)
LDFLAGS = os.environ.get('LDFLAGS', '')
MFLAGS = os.environ.get('MFLAGS', '-j')
THIRD_PARTY_SEQ_PROD_OBJ = os.environ.get('THIRD_PARTY_SEQ_PROD_OBJ', '')

# Fuzzing environment variables
LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a')
Expand Down Expand Up @@ -319,6 +320,12 @@ def build_parser(args):
dest='stateful_fuzzing',
action='store_true',
help='Reuse contexts between runs (makes reproduction impossible)')
parser.add_argument(
'--custom-seq-prod',
dest='third_party_seq_prod_obj',
type=str,
default=THIRD_PARTY_SEQ_PROD_OBJ,
help='Path to an object file with symbols for fuzzing your sequence producer plugin.')
parser.add_argument(
'--cc',
dest='cc',
Expand Down Expand Up @@ -450,6 +457,10 @@ def build(args):
if args.stateful_fuzzing:
cppflags += ['-DSTATEFUL_FUZZING']

if args.third_party_seq_prod_obj:
cppflags += ['-DFUZZ_THIRD_PARTY_SEQ_PROD']
mflags += ['THIRD_PARTY_SEQ_PROD_OBJ={}'.format(args.third_party_seq_prod_obj)]

if args.fuzzing_mode:
cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION']

Expand Down
116 changes: 116 additions & 0 deletions tests/fuzz/fuzz_third_party_seq_prod.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/*
* Copyright (c) Yann Collet, Meta Platforms, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/

#ifndef EXAMPLE_SEQ_PROD_H
#define EXAMPLE_SEQ_PROD_H

#define ZSTD_STATIC_LINKING_ONLY
#include "zstd.h"

#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

/* *** INTERFACE FOR FUZZING THIRD-PARTY SEQUENCE PRODUCER PLUGINS ***
* Fuzz-testing for the external sequence producer API was introduced in PR #3437.
* However, the setup in #3437 only allows fuzzers to exercise the implementation of the
* API itself (the code in the core zstd library which interacts with your plugin).
*
* This header defines an interface for plugin authors to link their code into the fuzzer
* build. Plugin authors can provide an object file implementing the symbols below,
* and those symbols will replace the default ones provided by #3437.
*
* To fuzz your plugin, follow these steps:
* - Build your object file with a recent version of clang. Building with gcc is not supported.
* - Build your object file using appropriate flags for fuzzing. For example:
* `-g -fno-omit-frame-pointer -fsanitize=undefined,address,fuzzer`
* - Build the fuzzer binaries with options corresponding to the flags you chose. Use --custom-seq-prod= to pass in your object file:
* `./fuzz.py build all --enable-fuzzer --enable-asan --enable-ubsan --cc clang --cxx clang++ --custom-seq-prod=your_object.o`
*
* An example implementation of this header is provided at tests/fuzz/seq_prod_fuzz_example/.
* Use these commands to fuzz with the example code:
* $ make corpora
* $ make -C seq_prod_fuzz_example/
* $ python3 ./fuzz.py build all --enable-fuzzer --enable-asan --enable-ubsan --cc clang --cxx clang++ --custom-seq-prod=seq_prod_fuzz_example/example_seq_prod.o
* $ python3 ./fuzz.py libfuzzer simple_round_trip
*/

/* The fuzzer will call this function before each test-case. It should run any
* setup actions (such as starting a hardware device) needed for fuzzing.
*
* The fuzzer will assert() that the return value is zero. To signal an error,
* please return a non-zero value. */
size_t FUZZ_seqProdSetup(void);

/* The fuzzer will call this function after each test-case. It should free
* resources aquired by FUZZ_seqProdSetup() to prevent leaks across test-cases.
*
* The fuzzer will assert() that the return value is zero. To signal an error,
* please return a non-zero value. */
size_t FUZZ_seqProdTearDown(void);

/* The fuzzer will call this function before each test-case, only after calling
* FUZZ_seqProdSetup(), to obtain a sequence producer state which can be passed
* into ZSTD_registerSequenceProducer().
*
* All compressions which are part of a test-case will share a single sequence
* producer state. Sharing the state object is safe because the fuzzers currently
* don't exercise the sequence producer API in multi-threaded scenarios. We may
* need a new approach in the future to support multi-threaded fuzzing.
*
* The fuzzer will assert() that the return value is not NULL. To signal an error,
* please return NULL. */
void* FUZZ_createSeqProdState(void);

/* The fuzzer will call this function after each test-case. It should free any
* resources aquired by FUZZ_createSeqProdState().
*
* The fuzzer will assert() that the return value is zero. To signal an error,
* please return a non-zero value. */
size_t FUZZ_freeSeqProdState(void* sequenceProducerState);

/* This is the sequence producer function you would like to fuzz! It will receive
* the void* returned by FUZZ_createSeqProdState() on each invocation. */
size_t FUZZ_thirdPartySeqProd(void* sequenceProducerState,
ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
const void* src, size_t srcSize,
const void* dict, size_t dictSize,
int compressionLevel,
size_t windowSize);

/* These macros are internal helpers. You do not need to worry about them. */
#ifdef FUZZ_THIRD_PARTY_SEQ_PROD
#define FUZZ_SEQ_PROD_SETUP() \
do { \
FUZZ_ASSERT(FUZZ_seqProdSetup() == 0); \
FUZZ_seqProdState = FUZZ_createSeqProdState(); \
FUZZ_ASSERT(FUZZ_seqProdState != NULL); \
} while (0)
#else
#define FUZZ_SEQ_PROD_SETUP()
#endif

#ifdef FUZZ_THIRD_PARTY_SEQ_PROD
#define FUZZ_SEQ_PROD_TEARDOWN() \
do { \
FUZZ_ASSERT(FUZZ_freeSeqProdState(FUZZ_seqProdState) == 0); \
FUZZ_ASSERT(FUZZ_seqProdTearDown() == 0); \
embg marked this conversation as resolved.
Show resolved Hide resolved
} while (0)
#else
#define FUZZ_SEQ_PROD_TEARDOWN()
#endif

#ifdef __cplusplus
}
#endif

#endif /* EXAMPLE_SEQ_PROD_H */
Loading