Skip to content

Commit

Permalink
Merge pull request #3551 from embg/seq_prod_fuzz
Browse files Browse the repository at this point in the history
Provide an interface for fuzzing sequence producer plugins
  • Loading branch information
embg authored Mar 28, 2023
2 parents abb3585 + a810e1e commit 57e1b45
Show file tree
Hide file tree
Showing 21 changed files with 297 additions and 14 deletions.
24 changes: 13 additions & 11 deletions tests/fuzz/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ ZSTDDIR = ../../lib
PRGDIR = ../../programs
CONTRIBDIR = ../../contrib

# TODO(embg) make it possible to plug in an arbitrary matchfinder as a .o file
MATCHFINDER_DIR = $(CONTRIBDIR)/externalSequenceProducer
MATCHFINDER_SRC = $(MATCHFINDER_DIR)/sequence_producer.c
DEFAULT_SEQ_PROD_DIR = $(CONTRIBDIR)/externalSequenceProducer
DEFAULT_SEQ_PROD_SRC = $(DEFAULT_SEQ_PROD_DIR)/sequence_producer.c
THIRD_PARTY_SEQ_PROD_OBJ ?=

FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
-I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(ZSTDDIR)/legacy \
-I$(CONTRIBDIR)/seekable_format -I$(PRGDIR) -I$(MATCHFINDER_DIR) \
-I$(CONTRIBDIR)/seekable_format -I$(PRGDIR) -I$(DEFAULT_SEQ_PROD_DIR) \
-DZSTD_MULTITHREAD -DZSTD_LEGACY_SUPPORT=1 $(CPPFLAGS)
FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
Expand Down Expand Up @@ -75,7 +75,7 @@ FUZZ_SRC := \
$(ZSTDCOMP_SRC) \
$(ZSTDDICT_SRC) \
$(ZSTDLEGACY_SRC) \
$(MATCHFINDER_SRC)
$(DEFAULT_SEQ_PROD_SRC)
FUZZ_SRC := $(sort $(wildcard $(FUZZ_SRC)))

FUZZ_D_OBJ1 := $(subst $(ZSTDDIR)/common/,d_lib_common_,$(FUZZ_SRC))
Expand All @@ -84,21 +84,23 @@ FUZZ_D_OBJ3 := $(subst $(ZSTDDIR)/decompress/,d_lib_decompress_,$(FUZZ_D_OBJ2))
FUZZ_D_OBJ4 := $(subst $(ZSTDDIR)/dictBuilder/,d_lib_dictBuilder_,$(FUZZ_D_OBJ3))
FUZZ_D_OBJ5 := $(subst $(ZSTDDIR)/legacy/,d_lib_legacy_,$(FUZZ_D_OBJ4))
FUZZ_D_OBJ6 := $(subst $(PRGDIR)/,d_prg_,$(FUZZ_D_OBJ5))
FUZZ_D_OBJ7 := $(subst $(MATCHFINDER_DIR)/,d_matchfinder_,$(FUZZ_D_OBJ6))
FUZZ_D_OBJ7 := $(subst $(DEFAULT_SEQ_PROD_DIR)/,d_default_seq_prod_,$(FUZZ_D_OBJ6))
FUZZ_D_OBJ8 := $(subst $\./,d_fuzz_,$(FUZZ_D_OBJ7))
FUZZ_D_OBJ9 := $(FUZZ_D_OBJ8:.c=.o)
FUZZ_DECOMPRESS_OBJ := $(FUZZ_D_OBJ9:.S=.o)
FUZZ_D_OBJ10 := $(THIRD_PARTY_SEQ_PROD_OBJ) $(FUZZ_D_OBJ9)
FUZZ_DECOMPRESS_OBJ := $(FUZZ_D_OBJ10:.S=.o)

FUZZ_RT_OBJ1 := $(subst $(ZSTDDIR)/common/,rt_lib_common_,$(FUZZ_SRC))
FUZZ_RT_OBJ2 := $(subst $(ZSTDDIR)/compress/,rt_lib_compress_,$(FUZZ_RT_OBJ1))
FUZZ_RT_OBJ3 := $(subst $(ZSTDDIR)/decompress/,rt_lib_decompress_,$(FUZZ_RT_OBJ2))
FUZZ_RT_OBJ4 := $(subst $(ZSTDDIR)/dictBuilder/,rt_lib_dictBuilder_,$(FUZZ_RT_OBJ3))
FUZZ_RT_OBJ5 := $(subst $(ZSTDDIR)/legacy/,rt_lib_legacy_,$(FUZZ_RT_OBJ4))
FUZZ_RT_OBJ6 := $(subst $(PRGDIR)/,rt_prg_,$(FUZZ_RT_OBJ5))
FUZZ_RT_OBJ7 := $(subst $(MATCHFINDER_DIR)/,rt_matchfinder_,$(FUZZ_RT_OBJ6))
FUZZ_RT_OBJ7 := $(subst $(DEFAULT_SEQ_PROD_DIR)/,rt_default_seq_prod_,$(FUZZ_RT_OBJ6))
FUZZ_RT_OBJ8 := $(subst $\./,rt_fuzz_,$(FUZZ_RT_OBJ7))
FUZZ_RT_OBJ9 := $(FUZZ_RT_OBJ8:.c=.o)
FUZZ_ROUND_TRIP_OBJ := $(FUZZ_RT_OBJ9:.S=.o)
FUZZ_RT_OBJ10 := $(THIRD_PARTY_SEQ_PROD_OBJ) $(FUZZ_RT_OBJ9)
FUZZ_ROUND_TRIP_OBJ := $(FUZZ_RT_OBJ10:.S=.o)

.PHONY: default all clean cleanall

Expand Down Expand Up @@ -151,7 +153,7 @@ rt_prg_%.o: $(PRGDIR)/%.c
rt_fuzz_%.o: %.c
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@

rt_matchfinder_%.o: $(MATCHFINDER_DIR)/%.c
rt_default_seq_prod_%.o: $(DEFAULT_SEQ_PROD_DIR)/%.c
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@

d_lib_common_%.o: $(ZSTDDIR)/common/%.c
Expand All @@ -178,7 +180,7 @@ d_prg_%.o: $(PRGDIR)/%.c
d_fuzz_%.o: %.c
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@

d_matchfinder_%.o: $(MATCHFINDER_DIR)/%.c
d_default_seq_prod_%.o: $(DEFAULT_SEQ_PROD_DIR)/%.c
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@

simple_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_simple_round_trip.o
Expand Down
6 changes: 5 additions & 1 deletion tests/fuzz/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ Alternatively, you can fuzz all targets in parallel, using one core per target:
```
python3 ./fuzz.py list | xargs -P$(python3 ./fuzz.py list | wc -l) -I__ sh -c "python3 ./fuzz.py libfuzzer __ 2>&1 | tee __.log"
```
Either way, to double-check that no crashes were found, run `ls corpora/*crash`.
Either way, to double-check that no crashes were found, run `ls corpora/*crash`.
If any crashes were found, you can use the hashes to reproduce them.

## LibFuzzer
Expand Down Expand Up @@ -113,3 +113,7 @@ CC=clang CXX=clang++ ./fuzz.py build all --enable-asan --enable-ubsan
CC=clang CXX=clang++ ./fuzz.py build all --enable-msan
./fuzz.py regression all
```

## Fuzzing a custom sequence producer plugin
Sequence producer plugin authors can use the zstd fuzzers to stress-test their code.
See the documentation in `fuzz_third_party_seq_prod.h` for details.
4 changes: 4 additions & 0 deletions tests/fuzz/block_round_trip.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "zstd.h"
#include "zstd_helpers.h"
#include "fuzz_data_producer.h"
#include "fuzz_third_party_seq_prod.h"

static ZSTD_CCtx *cctx = NULL;
static ZSTD_DCtx *dctx = NULL;
Expand Down Expand Up @@ -54,6 +55,8 @@ static size_t roundTripTest(void *result, size_t resultCapacity,

int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
FUZZ_SEQ_PROD_SETUP();

/* Give a random portion of src data to the producer, to use for
parameter generation. The rest will be used for (de)compression */
FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
Expand Down Expand Up @@ -95,5 +98,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
ZSTD_freeCCtx(cctx); cctx = NULL;
ZSTD_freeDCtx(dctx); dctx = NULL;
#endif
FUZZ_SEQ_PROD_TEARDOWN();
return 0;
}
4 changes: 4 additions & 0 deletions tests/fuzz/decompress_dstSize_tooSmall.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,15 @@
#include "zstd_errors.h"
#include "zstd_helpers.h"
#include "fuzz_data_producer.h"
#include "fuzz_third_party_seq_prod.h"

static ZSTD_CCtx *cctx = NULL;
static ZSTD_DCtx *dctx = NULL;

int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
FUZZ_SEQ_PROD_SETUP();

/* Give a random portion of src data to the producer, to use for
parameter generation. The rest will be used for (de)compression */
FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
Expand Down Expand Up @@ -66,5 +69,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
ZSTD_freeCCtx(cctx); cctx = NULL;
ZSTD_freeDCtx(dctx); dctx = NULL;
#endif
FUZZ_SEQ_PROD_TEARDOWN();
return 0;
}
4 changes: 4 additions & 0 deletions tests/fuzz/dictionary_decompress.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@
#include "fuzz_helpers.h"
#include "zstd_helpers.h"
#include "fuzz_data_producer.h"
#include "fuzz_third_party_seq_prod.h"

static ZSTD_DCtx *dctx = NULL;

int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
FUZZ_SEQ_PROD_SETUP();

/* Give a random portion of src data to the producer, to use for
parameter generation. The rest will be used for (de)compression */
FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
Expand Down Expand Up @@ -69,5 +72,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
#ifndef STATEFUL_FUZZING
ZSTD_freeDCtx(dctx); dctx = NULL;
#endif
FUZZ_SEQ_PROD_TEARDOWN();
return 0;
}
5 changes: 4 additions & 1 deletion tests/fuzz/dictionary_loader.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "fuzz_helpers.h"
#include "zstd_helpers.h"
#include "fuzz_data_producer.h"
#include "fuzz_third_party_seq_prod.h"

/**
* Compresses the data and returns the compressed size or an error.
Expand All @@ -35,7 +36,7 @@ static size_t compress(void* compressed, size_t compressedCapacity,
if (refPrefix)
FUZZ_ZASSERT(ZSTD_CCtx_refPrefix_advanced(
cctx, dict, dictSize, dictContentType));
else
else
FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced(
cctx, dict, dictSize, dictLoadMethod, dictContentType));
size_t const compressedSize = ZSTD_compress2(
Expand Down Expand Up @@ -67,6 +68,7 @@ static size_t decompress(void* result, size_t resultCapacity,

int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
FUZZ_SEQ_PROD_SETUP();
FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
int const refPrefix = FUZZ_dataProducer_uint32Range(producer, 0, 1) != 0;
ZSTD_dictLoadMethod_e const dlm =
Expand Down Expand Up @@ -99,5 +101,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
free(cBuf);
free(rBuf);
FUZZ_dataProducer_free(producer);
FUZZ_SEQ_PROD_TEARDOWN();
return 0;
}
4 changes: 4 additions & 0 deletions tests/fuzz/dictionary_round_trip.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "fuzz_helpers.h"
#include "zstd_helpers.h"
#include "fuzz_data_producer.h"
#include "fuzz_third_party_seq_prod.h"

static ZSTD_CCtx *cctx = NULL;
static ZSTD_DCtx *dctx = NULL;
Expand Down Expand Up @@ -108,6 +109,8 @@ static size_t roundTripTest(void *result, size_t resultCapacity,

int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
FUZZ_SEQ_PROD_SETUP();

/* Give a random portion of src data to the producer, to use for
parameter generation. The rest will be used for (de)compression */
FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
Expand Down Expand Up @@ -147,5 +150,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
ZSTD_freeCCtx(cctx); cctx = NULL;
ZSTD_freeDCtx(dctx); dctx = NULL;
#endif
FUZZ_SEQ_PROD_TEARDOWN();
return 0;
}
3 changes: 3 additions & 0 deletions tests/fuzz/dictionary_stream_round_trip.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "fuzz_helpers.h"
#include "zstd_helpers.h"
#include "fuzz_data_producer.h"
#include "fuzz_third_party_seq_prod.h"

ZSTD_CCtx *cctx = NULL;
static ZSTD_DCtx *dctx = NULL;
Expand Down Expand Up @@ -147,6 +148,7 @@ static size_t compress(uint8_t *dst, size_t capacity,

int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
FUZZ_SEQ_PROD_SETUP();
size_t neededBufSize;

/* Give a random portion of src data to the producer, to use for
Expand Down Expand Up @@ -202,5 +204,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
ZSTD_freeCCtx(cctx); cctx = NULL;
ZSTD_freeDCtx(dctx); dctx = NULL;
#endif
FUZZ_SEQ_PROD_TEARDOWN();
return 0;
}
5 changes: 5 additions & 0 deletions tests/fuzz/fuzz.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
* This is the canonical flag to enable deterministic builds for fuzzing.
* Changes to zstd for fuzzing are gated behind this define.
* It is recommended to define this when building zstd for fuzzing.
* @param FUZZ_THIRD_PARTY_SEQ_PROD
* This flag allows sequence producer plugin authors to replace the built-in
* default sequence producer with their own code. If you are not a plugin
* author, you should not define this flag. See the docs at
* fuzz_third_party_seq_prod.h for more information.
*/

#ifndef FUZZ_H
Expand Down
11 changes: 11 additions & 0 deletions tests/fuzz/fuzz.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def __init__(self, input_type, frame_type=FrameType.ZSTD):
CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS)
LDFLAGS = os.environ.get('LDFLAGS', '')
MFLAGS = os.environ.get('MFLAGS', '-j')
THIRD_PARTY_SEQ_PROD_OBJ = os.environ.get('THIRD_PARTY_SEQ_PROD_OBJ', '')

# Fuzzing environment variables
LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a')
Expand Down Expand Up @@ -319,6 +320,12 @@ def build_parser(args):
dest='stateful_fuzzing',
action='store_true',
help='Reuse contexts between runs (makes reproduction impossible)')
parser.add_argument(
'--custom-seq-prod',
dest='third_party_seq_prod_obj',
type=str,
default=THIRD_PARTY_SEQ_PROD_OBJ,
help='Path to an object file with symbols for fuzzing your sequence producer plugin.')
parser.add_argument(
'--cc',
dest='cc',
Expand Down Expand Up @@ -450,6 +457,10 @@ def build(args):
if args.stateful_fuzzing:
cppflags += ['-DSTATEFUL_FUZZING']

if args.third_party_seq_prod_obj:
cppflags += ['-DFUZZ_THIRD_PARTY_SEQ_PROD']
mflags += ['THIRD_PARTY_SEQ_PROD_OBJ={}'.format(args.third_party_seq_prod_obj)]

if args.fuzzing_mode:
cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION']

Expand Down
116 changes: 116 additions & 0 deletions tests/fuzz/fuzz_third_party_seq_prod.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/*
* Copyright (c) Yann Collet, Meta Platforms, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/

#ifndef EXAMPLE_SEQ_PROD_H
#define EXAMPLE_SEQ_PROD_H

#define ZSTD_STATIC_LINKING_ONLY
#include "zstd.h"

#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

/* *** INTERFACE FOR FUZZING THIRD-PARTY SEQUENCE PRODUCER PLUGINS ***
* Fuzz-testing for the external sequence producer API was introduced in PR #3437.
* However, the setup in #3437 only allows fuzzers to exercise the implementation of the
* API itself (the code in the core zstd library which interacts with your plugin).
*
* This header defines an interface for plugin authors to link their code into the fuzzer
* build. Plugin authors can provide an object file implementing the symbols below,
* and those symbols will replace the default ones provided by #3437.
*
* To fuzz your plugin, follow these steps:
* - Build your object file with a recent version of clang. Building with gcc is not supported.
* - Build your object file using appropriate flags for fuzzing. For example:
* `-g -fno-omit-frame-pointer -fsanitize=undefined,address,fuzzer`
* - Build the fuzzer binaries with options corresponding to the flags you chose. Use --custom-seq-prod= to pass in your object file:
* `./fuzz.py build all --enable-fuzzer --enable-asan --enable-ubsan --cc clang --cxx clang++ --custom-seq-prod=your_object.o`
*
* An example implementation of this header is provided at tests/fuzz/seq_prod_fuzz_example/.
* Use these commands to fuzz with the example code:
* $ make corpora
* $ make -C seq_prod_fuzz_example/
* $ python3 ./fuzz.py build all --enable-fuzzer --enable-asan --enable-ubsan --cc clang --cxx clang++ --custom-seq-prod=seq_prod_fuzz_example/example_seq_prod.o
* $ python3 ./fuzz.py libfuzzer simple_round_trip
*/

/* The fuzzer will call this function before each test-case. It should run any
* setup actions (such as starting a hardware device) needed for fuzzing.
*
* The fuzzer will assert() that the return value is zero. To signal an error,
* please return a non-zero value. */
size_t FUZZ_seqProdSetup(void);

/* The fuzzer will call this function after each test-case. It should free
* resources aquired by FUZZ_seqProdSetup() to prevent leaks across test-cases.
*
* The fuzzer will assert() that the return value is zero. To signal an error,
* please return a non-zero value. */
size_t FUZZ_seqProdTearDown(void);

/* The fuzzer will call this function before each test-case, only after calling
* FUZZ_seqProdSetup(), to obtain a sequence producer state which can be passed
* into ZSTD_registerSequenceProducer().
*
* All compressions which are part of a test-case will share a single sequence
* producer state. Sharing the state object is safe because the fuzzers currently
* don't exercise the sequence producer API in multi-threaded scenarios. We may
* need a new approach in the future to support multi-threaded fuzzing.
*
* The fuzzer will assert() that the return value is not NULL. To signal an error,
* please return NULL. */
void* FUZZ_createSeqProdState(void);

/* The fuzzer will call this function after each test-case. It should free any
* resources aquired by FUZZ_createSeqProdState().
*
* The fuzzer will assert() that the return value is zero. To signal an error,
* please return a non-zero value. */
size_t FUZZ_freeSeqProdState(void* sequenceProducerState);

/* This is the sequence producer function you would like to fuzz! It will receive
* the void* returned by FUZZ_createSeqProdState() on each invocation. */
size_t FUZZ_thirdPartySeqProd(void* sequenceProducerState,
ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
const void* src, size_t srcSize,
const void* dict, size_t dictSize,
int compressionLevel,
size_t windowSize);

/* These macros are internal helpers. You do not need to worry about them. */
#ifdef FUZZ_THIRD_PARTY_SEQ_PROD
#define FUZZ_SEQ_PROD_SETUP() \
do { \
FUZZ_ASSERT(FUZZ_seqProdSetup() == 0); \
FUZZ_seqProdState = FUZZ_createSeqProdState(); \
FUZZ_ASSERT(FUZZ_seqProdState != NULL); \
} while (0)
#else
#define FUZZ_SEQ_PROD_SETUP()
#endif

#ifdef FUZZ_THIRD_PARTY_SEQ_PROD
#define FUZZ_SEQ_PROD_TEARDOWN() \
do { \
FUZZ_ASSERT(FUZZ_freeSeqProdState(FUZZ_seqProdState) == 0); \
FUZZ_ASSERT(FUZZ_seqProdTearDown() == 0); \
} while (0)
#else
#define FUZZ_SEQ_PROD_TEARDOWN()
#endif

#ifdef __cplusplus
}
#endif

#endif /* EXAMPLE_SEQ_PROD_H */
Loading

0 comments on commit 57e1b45

Please sign in to comment.