diff --git a/.clusterfuzzlite/Dockerfile b/.clusterfuzzlite/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..05da77acbebd9d6cc6ed2dbac762bec706e3bd85 --- /dev/null +++ b/.clusterfuzzlite/Dockerfile @@ -0,0 +1,11 @@ +# Base image with clang toolchain +FROM gcr.io/oss-fuzz-base/base-builder:v1 +# install required packages to build your project +RUN apt-get update && apt-get install -y python3-pip ninja-build +RUN pip install -U --pre meson +# Copy your project's source code +COPY . $SRC/cmp_tool +# Working directory for build.sh. +WORKDIR $SRC/cmp_tool +# Copy build.sh into $SRC dir. +COPY .clusterfuzzlite/build.sh $SRC/ diff --git a/.clusterfuzzlite/build.sh b/.clusterfuzzlite/build.sh new file mode 100755 index 0000000000000000000000000000000000000000..cf7dc646484f2f699021d9d6cc068925e92444f7 --- /dev/null +++ b/.clusterfuzzlite/build.sh @@ -0,0 +1,24 @@ +#!/bin/bash -eu + +BUILD=$WORK/build + +# cleanup +rm -rf "$BUILD" +mkdir -p "$BUILD" + +echo "$LIB_FUZZING_ENGINE" + +# setup project +meson setup "$BUILD" \ + --buildtype=plain \ + -Dfuzzer=enabled \ + -Dfuzzer_ldflags="$LIB_FUZZING_ENGINE" \ + -Ddebug_level=0 \ + -Ddefault_library=static \ + --wrap-mode=nodownload + +# build fuzzers +ninja -v -C "$BUILD" test/fuzz/fuzz_{round_trip,compression} +find "$BUILD/test/fuzz" -maxdepth 1 -executable -type f -exec cp "{}" "$OUT" \; + +#TODO prepare corps diff --git a/.clusterfuzzlite/project.yaml b/.clusterfuzzlite/project.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ec8305f971c2babb3a8eef2586d5c6b2dba4c82 --- /dev/null +++ b/.clusterfuzzlite/project.yaml @@ -0,0 +1,11 @@ +homepage: "https://gitlab.phaidra.org/loidoltd15/cmp_tool" +language: c +primary_contact: "dominik.loidolt@univie.ac.at" +auto_ccs: "dominik.loidolt@univie.ac.at" +sanitizers: +- address +- undefined +- memory + +main_repo: 'https://gitlab.phaidra.org/loidoltd15/cmp_tool.git' + diff --git a/INSTALL.md b/INSTALL.md index 70a4e0c93824557bbba73313085c476c6502d8c7..98ba3ce9f426a210bbaf28108ce05d5b58f6a2f2 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -8,7 +8,7 @@ If you're on Linux, you probably already have these. On macOS and Windows, you c ### Install meson and ninja -Meson 0.56 or newer is required. +Meson 0.63 or newer is required. You can get meson through your package manager or using: ``` @@ -130,6 +130,47 @@ ninja coverage-html ``` The coverage report can be found in the `meson-logs/coveragereport` subdirectory. +### Benchmarking + +To run the compression speed test bench, follow these steps: + +``` +cd <name of the build directory> +meson test --benchmark +``` + + +### Fuzzing +If you’re unfamiliar with fuzzing and libFuzzer, you can find a tutorial [here](https://github.com/google/fuzzing/blob/master/tutorial/libFuzzerTutorial.md). +To perform fuzzing with libFuzzer, AddressSanitizer, and UndefinedBehaviorSanitizer, follow these steps: + +Set up your build directory with the necessary configurations. Note that you’ll need a clang version that has libFuzzer support. Use the following command: + +``` +CC=clang CXX=clang++ \ +meson setup builddir_fuzzing \ + --buildtype=plain \ + -Dfuzzer=enabled \ + -Dfuzzer_ldflags=-fsanitize=fuzzer \ + -Dc_args="-O1 -gline-tables-only -fsanitize-address-use-after-scope -fsanitize=fuzzer-no-link" \ + -Db_sanitize=address,undefined \ + -Ddebug_level=0 \ + -Ddefault_library=static \ + -Db_lundef=false +``` + +The `builddir_fuzzing` directory is now configured for fuzzing. Execute different fuzz targets using the meson test command. For example: + +``` +cd builddir_fuzzing +# List available tests +meson test --list + +# Run a specific fuzz target (e.g., fuzz_round_trip for 10 minutes) +meson test fuzz_round_trip\ 10\ min --verbose +``` + +Happy fuzzing! 🚀 ## Documentation ### External dependencies diff --git a/meson_options.txt b/meson_options.txt index fc27689bb4a6e18bc5c1c4beb04579af179ea3d2..d19ed29e42451cde424f1e635baf52a429c118b2 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -1,4 +1,4 @@ -option('debug_level', type: 'integer', min: 0, max: 9, value: 1, - description: 'Enable run-time debug. See lib/common/cmp_debug.h') -option('argument_input_mode', type : 'boolean', value : false, - description : 'If set, the data file is set with the first argument and the model file with the second one') +option('debug_level', type : 'integer', min: 0, max: 9, value: 1, description: 'Enable run-time debug. See lib/common/cmp_debug.h') +option('argument_input_mode', type : 'boolean', value : false, description : 'If set, the data file is set with the first argument and the model file with the second one') +option('fuzzer', type : 'feature', value : 'disabled', description : 'Build for fuzzing') +option('fuzzer_ldflags', type : 'string', value : '-fsanitize=fuzzer', description : 'Extra LDFLAGS used during linking of fuzzing binaries') diff --git a/test/cmp_tool/meson.build b/test/cmp_tool/meson.build index 17b6ef1af62a64cf288809b7b3492dbc91dc7e2e..dfa0ce6d35ecffc3639fbe45c2b3a3a85b9d4198 100644 --- a/test/cmp_tool/meson.build +++ b/test/cmp_tool/meson.build @@ -5,6 +5,7 @@ if pytest.found() test('cmp_tool Interface Test', pytest, args : ['--color=yes', '-vvv', int_test_file], + env: test_env, depends : cmp_tool_exe, timeout : 100, workdir : meson.project_build_root()) diff --git a/test/fuzz/fuzz.h b/test/fuzz/fuzz.h new file mode 100644 index 0000000000000000000000000000000000000000..ddb25f7c26641b4be55c461a695762e4e91ed7d4 --- /dev/null +++ b/test/fuzz/fuzz.h @@ -0,0 +1,21 @@ +/** + * Fuzz target interface. + */ + +#ifndef FUZZ_H +#define FUZZ_H + +#include <stddef.h> +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/test/fuzz/fuzz_compression.c b/test/fuzz/fuzz_compression.c new file mode 100644 index 0000000000000000000000000000000000000000..64d92babc8c2069b37c351e44c8de6fa2be95809 --- /dev/null +++ b/test/fuzz/fuzz_compression.c @@ -0,0 +1,137 @@ +/** + * @file fuzz_copression.c + * @date 2024 + * + * @copyright GPLv2 + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * @brief chunk compression fuzz target + * + * This fuzzer tests the compression functionality with random data/model and + * parameters. It uses a random portion of the input data for parameter + * generation, while the rest is used for compression. + */ + + +#include <stdint.h> +#include <stddef.h> + +#include "fuzz_helpers.h" +#include "fuzz_data_producer.h" + +#include "../../lib/cmp_chunk.h" + + + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + struct cmp_par cmp_par; + struct cmp_par *cmp_par_ptr = NULL; + const uint8_t *model = NULL; + void *up_model; + uint32_t *cmp_data; + uint32_t cmp_data_capacity; + int use_a_upmodel; + uint32_t cmp_size_bound; + uint32_t return_value; + + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for data/model */ + FUZZ_dataProducer_t *producer = (FUZZ_dataProducer_t *)FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + FUZZ_dataProducer_cmp_par(producer, &cmp_par); + + /* 1/2 of the cases we use a model */ + if (FUZZ_dataProducer_uint32Range(producer, 0, 1) && size > 2) { + model = src + size/2; + size /= 2; + } + FUZZ_ASSERT(size <= UINT32_MAX); + + cmp_size_bound = compress_chunk_cmp_size_bound(src, size); + if (cmp_is_error(cmp_size_bound)) + cmp_size_bound = 0; + cmp_data_capacity = FUZZ_dataProducer_uint32Range(producer, 0, cmp_size_bound+(uint32_t)size); + cmp_data = (uint32_t *)FUZZ_malloc(cmp_data_capacity); + + FUZZ_dataProducer_cmp_par(producer, &cmp_par); + cmp_par.lossy_par = 0; /*TODO: implement lossy */ + if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) + cmp_par_ptr = &cmp_par; + + use_a_upmodel = FUZZ_dataProducer_int32Range(producer, 0, 2); + switch (use_a_upmodel) { + case 0: + up_model = NULL; + break; + case 1: + up_model = malloc(size); + break; + case 2: + up_model = (void *)model; /* in-place update */ + break; + default: + FUZZ_ASSERT(0); + } + + + return_value = compress_chunk((void *)src, size, (void *)model, up_model, + cmp_data, cmp_data_capacity, cmp_par_ptr); + + switch (cmp_get_error_code(return_value)) { + case CMP_ERROR_NO_ERROR: + case CMP_ERROR_GENERIC: + case CMP_ERROR_SMALL_BUF_: + /* compression parameter errors */ + case CMP_ERROR_PAR_GENERIC: + case CMP_ERROR_PAR_SPECIFIC: + case CMP_ERROR_PAR_BUFFERS: + case CMP_ERROR_PAR_MAX_USED_BITS: + /* chunk errors */ + case CMP_ERROR_CHUNK_NULL: + case CMP_ERROR_CHUNK_TOO_LARGE: + case CMP_ERROR_CHUNK_TOO_SMALL: + case CMP_ERROR_CHUNK_SIZE_INCONSISTENT: + case CMP_ERROR_CHUNK_SUBSERVICE_INCONSISTENT: + /* collection errors */ + case CMP_ERROR_COL_SUBSERVICE_UNSUPPORTED: + case CMP_ERROR_COL_SIZE_INCONSISTENT: + break; + /* compression entity errors */ + case CMP_ERROR_ENTITY_NULL: + FUZZ_ASSERT(0); + case CMP_ERROR_ENTITY_TOO_SMALL: + FUZZ_ASSERT(0); + case CMP_ERROR_ENTITY_HEADER: + break; + case CMP_ERROR_ENTITY_TIMESTAMP: + FUZZ_ASSERT(0); + /* internal compressor errors */ + case CMP_ERROR_INT_DECODER: + FUZZ_ASSERT(0); + case CMP_ERROR_INT_DATA_TYPE_UNSUPPORTED: + FUZZ_ASSERT(0); + case CMP_ERROR_INT_CMP_COL_TOO_LARGE: + FUZZ_ASSERT(0); + + case CMP_ERROR_DATA_VALUE_TOO_LARGE: + FUZZ_ASSERT(0); + default: + FUZZ_ASSERT(0); + } + + free(cmp_data); + if (up_model != model) + free(up_model); + FUZZ_dataProducer_free(producer); + return 0; +} + diff --git a/test/fuzz/fuzz_data_producer.c b/test/fuzz/fuzz_data_producer.c new file mode 100644 index 0000000000000000000000000000000000000000..1ee59944a44d2b2754d5ca343542c7493c47f1d1 --- /dev/null +++ b/test/fuzz/fuzz_data_producer.c @@ -0,0 +1,151 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE.BSD-3.Zstandard file in the 3rdparty_licenses directory) and the GPLv2 + * (found in the LICENSE.GPL-2 file in the 3rdparty_licenses directory). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * Modifications made by + * @author Dominik Loidolt (dominik.loidolt@univie.ac.at) + * @date 2024 + * @see https://github.com/facebook/zstd/issues/1723 + * + * - Added function providing a cmp_par struct + * + * Modifications are also licensed under the same license for consistency + * + */ + + +#include "fuzz_helpers.h" +#include "fuzz_data_producer.h" +#include <cmp_chunk.h> + +struct FUZZ_dataProducer_s{ + const uint8_t *data; + size_t size; +}; + +FUZZ_dataProducer_t *FUZZ_dataProducer_create(const uint8_t *data, size_t size) { + FUZZ_dataProducer_t *producer = FUZZ_malloc(sizeof(FUZZ_dataProducer_t)); + + producer->data = data; + producer->size = size; + return producer; +} + +void FUZZ_dataProducer_free(FUZZ_dataProducer_t *producer) { free(producer); } + +uint32_t FUZZ_dataProducer_uint32Range(FUZZ_dataProducer_t *producer, uint32_t min, + uint32_t max) { + uint32_t range = max - min; + uint32_t rolling = range; + uint32_t result = 0; + + FUZZ_ASSERT(min <= max); + + while (rolling > 0 && producer->size > 0) { + uint8_t next = *(producer->data + producer->size - 1); + producer->size -= 1; + result = (result << 8) | next; + rolling >>= 8; + } + + if (range == 0xffffffff) { + return result; + } + + return min + result % (range + 1); +} + +uint32_t FUZZ_dataProducer_uint32(FUZZ_dataProducer_t *producer) { + return FUZZ_dataProducer_uint32Range(producer, 0, 0xffffffff); +} + +int32_t FUZZ_dataProducer_int32Range(FUZZ_dataProducer_t *producer, + int32_t min, int32_t max) +{ + FUZZ_ASSERT(min <= max); + + if (min < 0) + return (int)FUZZ_dataProducer_uint32Range(producer, 0, max - min) + min; + + return FUZZ_dataProducer_uint32Range(producer, min, max); +} + +size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer){ + return producer->size; +} + +void FUZZ_dataProducer_rollBack(FUZZ_dataProducer_t *producer, size_t remainingBytes) +{ + FUZZ_ASSERT(remainingBytes >= producer->size); + producer->size = remainingBytes; +} + +int FUZZ_dataProducer_empty(FUZZ_dataProducer_t *producer) { + return producer->size == 0; +} + +size_t FUZZ_dataProducer_contract(FUZZ_dataProducer_t *producer, size_t newSize) +{ + size_t remaining; + + newSize = newSize > producer->size ? producer->size : newSize; + remaining = producer->size - newSize; + producer->data = producer->data + remaining; + producer->size = newSize; + return remaining; +} + +size_t FUZZ_dataProducer_reserveDataPrefix(FUZZ_dataProducer_t *producer) +{ + size_t producerSliceSize = FUZZ_dataProducer_uint32Range( + producer, 0, producer->size); + return FUZZ_dataProducer_contract(producer, producerSliceSize); +} + +void FUZZ_dataProducer_cmp_par(FUZZ_dataProducer_t *producer, struct cmp_par *cmp_par) +{ + cmp_par->cmp_mode = (enum cmp_mode)FUZZ_dataProducer_uint32(producer); + cmp_par->model_value = FUZZ_dataProducer_uint32(producer); + cmp_par->lossy_par = FUZZ_dataProducer_uint32(producer); + + cmp_par->nc_imagette = FUZZ_dataProducer_uint32(producer); + + cmp_par->s_exp_flags = FUZZ_dataProducer_uint32(producer); + cmp_par->s_fx = FUZZ_dataProducer_uint32(producer); + cmp_par->s_ncob = FUZZ_dataProducer_uint32(producer); + cmp_par->s_efx = FUZZ_dataProducer_uint32(producer); + cmp_par->s_ecob = FUZZ_dataProducer_uint32(producer); + + cmp_par->l_exp_flags = FUZZ_dataProducer_uint32(producer); + cmp_par->l_fx = FUZZ_dataProducer_uint32(producer); + cmp_par->l_ncob = FUZZ_dataProducer_uint32(producer); + cmp_par->l_efx = FUZZ_dataProducer_uint32(producer); + cmp_par->l_ecob = FUZZ_dataProducer_uint32(producer); + cmp_par->l_fx_cob_variance = FUZZ_dataProducer_uint32(producer); + + cmp_par->saturated_imagette = FUZZ_dataProducer_uint32(producer); + + cmp_par->nc_offset_mean = FUZZ_dataProducer_uint32(producer); + cmp_par->nc_offset_variance = FUZZ_dataProducer_uint32(producer); + cmp_par->nc_background_mean = FUZZ_dataProducer_uint32(producer); + cmp_par->nc_background_variance = FUZZ_dataProducer_uint32(producer); + cmp_par->nc_background_outlier_pixels = FUZZ_dataProducer_uint32(producer); + + cmp_par->smearing_mean = FUZZ_dataProducer_uint32(producer); + cmp_par->smearing_variance_mean = FUZZ_dataProducer_uint32(producer); + cmp_par->smearing_outlier_pixels = FUZZ_dataProducer_uint32(producer); + + cmp_par->fc_imagette = FUZZ_dataProducer_uint32(producer); + cmp_par->fc_offset_mean = FUZZ_dataProducer_uint32(producer); + cmp_par->fc_offset_variance = FUZZ_dataProducer_uint32(producer); + cmp_par->fc_background_mean = FUZZ_dataProducer_uint32(producer); + cmp_par->fc_background_variance = FUZZ_dataProducer_uint32(producer); + cmp_par->fc_background_outlier_pixels = FUZZ_dataProducer_uint32(producer); +} diff --git a/test/fuzz/fuzz_data_producer.h b/test/fuzz/fuzz_data_producer.h new file mode 100644 index 0000000000000000000000000000000000000000..cdccd16a5a916ec8d1326a0be0fb7e378ba846f5 --- /dev/null +++ b/test/fuzz/fuzz_data_producer.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE.BSD-3.Zstandard file in the 3rdparty_licenses directory) and the GPLv2 + * (found in the LICENSE.GPL-2 file in the 3rdparty_licenses directory). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * Modifications made by + * @author Dominik Loidolt (dominik.loidolt@univie.ac.at) + * @date 2024 + * + * - Added function providing a cmp_par struct + * + * Modifications are also licensed under the same license for consistency + */ + +/** + * Helper APIs for generating random data from input data stream. + The producer reads bytes from the end of the input and appends them together + to generate a random number in the requested range. If it runs out of input + data, it will keep returning the same value (min) over and over again. + + */ + +#ifndef FUZZ_DATA_PRODUCER_H +#define FUZZ_DATA_PRODUCER_H + +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +#include <cmp_chunk.h> + +/* Struct used for maintaining the state of the data */ +typedef struct FUZZ_dataProducer_s FUZZ_dataProducer_t; + +/* Returns a data producer state struct. Use for producer initialization. */ +FUZZ_dataProducer_t *FUZZ_dataProducer_create(const uint8_t *data, size_t size); + +/* Frees the data producer */ +void FUZZ_dataProducer_free(FUZZ_dataProducer_t *producer); + +/* Returns value between [min, max] */ +uint32_t FUZZ_dataProducer_uint32Range(FUZZ_dataProducer_t *producer, uint32_t min, + uint32_t max); + +/* Returns a uint32 value */ +uint32_t FUZZ_dataProducer_uint32(FUZZ_dataProducer_t *producer); + +/* Returns a signed value between [min, max] */ +int32_t FUZZ_dataProducer_int32Range(FUZZ_dataProducer_t *producer, + int32_t min, int32_t max); + +/* Provides compression parameters */ +void FUZZ_dataProducer_cmp_par(FUZZ_dataProducer_t *producer, struct cmp_par *cmp_par); + +/* Returns the size of the remaining bytes of data in the producer */ +size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer); + +/* Rolls back the data producer state to have remainingBytes remaining */ +void FUZZ_dataProducer_rollBack(FUZZ_dataProducer_t *producer, size_t remainingBytes); + +/* Returns true if the data producer is out of bytes */ +int FUZZ_dataProducer_empty(FUZZ_dataProducer_t *producer); + +/* Restricts the producer to only the last newSize bytes of data. +If newSize > current data size, nothing happens. Returns the number of bytes +the producer won't use anymore, after contracting. */ +size_t FUZZ_dataProducer_contract(FUZZ_dataProducer_t *producer, size_t newSize); + +/* Restricts the producer to use only the last X bytes of data, where X is + a random number in the interval [0, data_size]. Returns the size of the + remaining data the producer won't use anymore (the prefix). */ +size_t FUZZ_dataProducer_reserveDataPrefix(FUZZ_dataProducer_t *producer); +#endif // FUZZ_DATA_PRODUCER_H diff --git a/test/fuzz/fuzz_helpers.c b/test/fuzz/fuzz_helpers.c new file mode 100644 index 0000000000000000000000000000000000000000..27f10768511dec42d4060a616206733adb358695 --- /dev/null +++ b/test/fuzz/fuzz_helpers.c @@ -0,0 +1,49 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE.BSD-3.Zstandard file in the 3rdparty_licenses directory) and the GPLv2 + * (found in the LICENSE.GPL-2 file in the 3rdparty_licenses directory). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "fuzz_helpers.h" + +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +void* FUZZ_malloc(size_t size) +{ + if (size > 0) { + void* const mem = malloc(size); + FUZZ_ASSERT(mem); + return mem; + } + return NULL; +} + +void* FUZZ_malloc_rand(size_t size, FUZZ_dataProducer_t *producer) +{ + if (size > 0) { + void* const mem = malloc(size); + FUZZ_ASSERT(mem); + return mem; + } else { + uintptr_t ptr = 0; + /* Add +- 1M 50% of the time */ + if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) + FUZZ_dataProducer_int32Range(producer, -1000000, 1000000); + return (void*)ptr; + } + +} + +int FUZZ_memcmp(void const* lhs, void const* rhs, int32_t size) +{ + if (size <= 0) { + return 0; + } + return memcmp(lhs, rhs, (size_t)size); +} diff --git a/test/fuzz/fuzz_helpers.h b/test/fuzz/fuzz_helpers.h new file mode 100644 index 0000000000000000000000000000000000000000..70fb3fbb39b123797e340191514c48621260a366 --- /dev/null +++ b/test/fuzz/fuzz_helpers.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE.BSD-3.Zstandard file in the 3rdparty_licenses directory) and the GPLv2 + * (found in the LICENSE.GPL-2 file in the 3rdparty_licenses directory). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * Helper functions for fuzzing. + */ + +#ifndef FUZZ_HELPERS_H +#define FUZZ_HELPERS_H + +#include "fuzz.h" +#include "fuzz_data_producer.h" +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +#define FUZZ_QUOTE_IMPL(str) #str +#define FUZZ_QUOTE(str) FUZZ_QUOTE_IMPL(str) + +/** + * Asserts for fuzzing that are always enabled. + */ +#define FUZZ_ASSERT_MSG(cond, msg) \ + ((cond) ? (void)0 \ + : (fprintf(stderr, "%s: %u: Assertion: `%s' failed. %s\n", __FILE__, \ + __LINE__, FUZZ_QUOTE(cond), (msg)), \ + abort())) +#define FUZZ_ASSERT(cond) FUZZ_ASSERT_MSG((cond), ""); +#define FUZZ_ZASSERT(code) \ + FUZZ_ASSERT_MSG(!ZSTD_isError(code), ZSTD_getErrorName(code)) + +#if defined(__GNUC__) +#define FUZZ_STATIC static __inline __attribute__((unused)) +#elif defined(__cplusplus) || \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +#define FUZZ_STATIC static inline +#elif defined(_MSC_VER) +#define FUZZ_STATIC static __inline +#else +#define FUZZ_STATIC static +#endif + +/** + * malloc except return NULL for zero sized data and FUZZ_ASSERT + * that malloc doesn't fail. + */ +void* FUZZ_malloc(size_t size); + +/** + * malloc except returns random pointer for zero sized data and FUZZ_ASSERT + * that malloc doesn't fail. + */ +void* FUZZ_malloc_rand(size_t size, FUZZ_dataProducer_t *producer); + +/** + * memcmp but accepts NULL. Ignore negative sizes + */ +int FUZZ_memcmp(void const* lhs, void const* rhs, int32_t size); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/test/fuzz/fuzz_round_trip.c b/test/fuzz/fuzz_round_trip.c new file mode 100644 index 0000000000000000000000000000000000000000..0a6a1a7e9d57fd08230e5cfc8bd49102934f939e --- /dev/null +++ b/test/fuzz/fuzz_round_trip.c @@ -0,0 +1,216 @@ +/** + * @file fuzz_round_trip.c + * @date 2024 + * + * @copyright GPLv2 + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * @brief chunk compression/decompression fuzz target + * + * This fuzzer tests the (de)compression functionality with random data/model + * and parameters. It uses a random portion of the input data for parameter + * generation, while the rest is used for compression. If the compression + * succeeds, the data are decompressed and checked to see whether they match the + * input data. + */ + +#include <string.h> + +#include "fuzz_helpers.h" +#include "fuzz_data_producer.h" + +#include "../../lib/cmp_chunk.h" +#include "../../lib/decmp.h" + + +#define TEST_malloc(size) FUZZ_malloc(size) +#define TEST_ASSERT(cond) FUZZ_ASSERT(cond) + + +static uint32_t chunk_round_trip(void *data, uint32_t data_size, + void *model, void *up_model, + uint32_t *cmp_data, uint32_t cmp_data_capacity, + struct cmp_par *cmp_par, int use_decmp_buf, + int use_decmp_up_model) +{ + uint32_t cmp_size; + void *model_cpy = NULL; + + /* if in-place model update is used (up_model == model), the model + * needed for decompression is destroyed; therefore we make a copy + */ + if (model) { + if (up_model == model) { + model_cpy = TEST_malloc(data_size); + memcpy(model_cpy, model, data_size); + } else { + model_cpy = model; + } + } + + cmp_size = compress_chunk(data, data_size, model, up_model, + cmp_data, cmp_data_capacity, cmp_par); + +#if 0 + { /* Compress a second time and check for determinism */ + int32_t cSize2; + void *compressed2 = NULL; + void *up_model2 = NULL; + + if (compressed) + compressed2 = FUZZ_malloc(compressedCapacity); + + if (up_model) + up_model2 = FUZZ_malloc(srcSize); + cSize2 = compress_chunk((void *)src, srcSize, (void *)model, up_model2, + compressed2, compressedCapacity, cmp_par); + FUZZ_ASSERT(cSize == cSize2); + FUZZ_ASSERT_MSG(!FUZZ_memcmp(compressed, compressed2, cSize), "Not deterministic!"); + FUZZ_ASSERT_MSG(!FUZZ_memcmp(up_model, compressed2, cSize), "NO deterministic!"); + free(compressed2); + free(up_model2); + } +#endif + if (!cmp_is_error(cmp_size) && cmp_data) { + void *decmp_data = NULL; + void *up_model_decmp = NULL; + int decmp_size; + + decmp_size = decompress_cmp_entiy((struct cmp_entity *)cmp_data, model_cpy, NULL, NULL); + TEST_ASSERT(decmp_size >= 0); + TEST_ASSERT((uint32_t)decmp_size == data_size); + + if (use_decmp_buf) + decmp_data = TEST_malloc(data_size); + if (use_decmp_up_model) + up_model_decmp = TEST_malloc(data_size); + + decmp_size = decompress_cmp_entiy((struct cmp_entity *)cmp_data, model_cpy, + up_model_decmp, decmp_data); + TEST_ASSERT(decmp_size >= 0); + TEST_ASSERT((uint32_t)decmp_size == data_size); + + if (use_decmp_buf) { + TEST_ASSERT(!memcmp(data, decmp_data, data_size)); + + /* + * the model is only updated when the decompressed_data + * buffer is set + */ + if (up_model && up_model_decmp) + TEST_ASSERT(!memcmp(up_model, up_model_decmp, data_size)); + } + + free(decmp_data); + free(up_model_decmp); + } + + if (up_model == model) + free(model_cpy); + + return cmp_size; +} + + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + struct cmp_par cmp_par; + struct cmp_par *cmp_par_ptr=NULL; + const uint8_t *model = NULL; + void *up_model = NULL; + uint32_t cmp_size_bound; + uint32_t *cmp_data; + uint32_t cmp_data_capacity; + uint32_t return_value; + int use_decmp_buf; + int use_decmp_up_model; + + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = (FUZZ_dataProducer_t *)FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + /* 1/2 of the cases we use a model */ + if (FUZZ_dataProducer_uint32Range(producer, 0, 1) && size > 2) { + model = src + size/2; + size /= 2; + } + FUZZ_ASSERT(size <= UINT32_MAX); + + /* 1/2 of the cases we use a updated model buffer */ + if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) + up_model = FUZZ_malloc(size); + + cmp_size_bound = compress_chunk_cmp_size_bound(src, size); + if (cmp_is_error(cmp_size_bound)) + cmp_size_bound = 0; + cmp_data_capacity = FUZZ_dataProducer_uint32Range(producer, 0, cmp_size_bound+(uint32_t)size); + cmp_data = (uint32_t *)FUZZ_malloc(cmp_data_capacity); + + + FUZZ_dataProducer_cmp_par(producer, &cmp_par); + cmp_par.lossy_par = 0; /*TODO: implement lossy */ + if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) + cmp_par_ptr = &cmp_par; + + use_decmp_buf = FUZZ_dataProducer_int32Range(producer, 0, 1); + use_decmp_up_model = FUZZ_dataProducer_int32Range(producer, 0, 1); + + return_value = chunk_round_trip(src, size, model, up_model, cmp_data, + cmp_data_capacity, cmp_par_ptr, + use_decmp_buf, use_decmp_up_model); + switch (cmp_get_error_code(return_value)) { + case CMP_ERROR_NO_ERROR: + case CMP_ERROR_GENERIC: + case CMP_ERROR_SMALL_BUF_: + /* compression parameter errors */ + case CMP_ERROR_PAR_GENERIC: + case CMP_ERROR_PAR_SPECIFIC: + case CMP_ERROR_PAR_BUFFERS: + case CMP_ERROR_PAR_MAX_USED_BITS: + /* chunk errors */ + case CMP_ERROR_CHUNK_NULL: + case CMP_ERROR_CHUNK_TOO_LARGE: + case CMP_ERROR_CHUNK_TOO_SMALL: + case CMP_ERROR_CHUNK_SIZE_INCONSISTENT: + case CMP_ERROR_CHUNK_SUBSERVICE_INCONSISTENT: + /* collection errors */ + case CMP_ERROR_COL_SUBSERVICE_UNSUPPORTED: + case CMP_ERROR_COL_SIZE_INCONSISTENT: + break; + /* compression entity errors */ + case CMP_ERROR_ENTITY_NULL: + FUZZ_ASSERT(0); + case CMP_ERROR_ENTITY_TOO_SMALL: + FUZZ_ASSERT(0); + case CMP_ERROR_ENTITY_HEADER: + break; + case CMP_ERROR_ENTITY_TIMESTAMP: + FUZZ_ASSERT(0); + /* internal compressor errors */ + case CMP_ERROR_INT_DECODER: + FUZZ_ASSERT(0); + case CMP_ERROR_INT_DATA_TYPE_UNSUPPORTED: + FUZZ_ASSERT(0); + case CMP_ERROR_INT_CMP_COL_TOO_LARGE: + FUZZ_ASSERT(0); + + case CMP_ERROR_DATA_VALUE_TOO_LARGE: + FUZZ_ASSERT(0); + default: + FUZZ_ASSERT(0); + } + + free(up_model); + free(cmp_data); + FUZZ_dataProducer_free(producer); + + return 0; +} diff --git a/test/fuzz/meson.build b/test/fuzz/meson.build new file mode 100644 index 0000000000000000000000000000000000000000..02a8ebe6506f25c420b1de0a94d8fc59252fec66 --- /dev/null +++ b/test/fuzz/meson.build @@ -0,0 +1,44 @@ +if get_option('fuzzer').disabled() + subdir_done() +endif + +fuzz_common = files('fuzz_helpers.c', 'fuzz_data_producer.c') +fuzz_targets = ['fuzz_compression.c', 'fuzz_round_trip.c'] + +add_languages('cpp', native: false) # libFuzzingEngine needs c++ + + + +foreach target : fuzz_targets + file_name = target + target_name = file_name.split('.').get(0) + + fuzz_exe = executable(target_name, + fuzz_common, file_name, + include_directories : incdir, + link_with : cmp_lib, + link_args : get_option('fuzzer_ldflags'), + link_language : 'cpp' # libFuzzingEngine needs c++ + ) + +# todo add seed corpus + test(target_name + ' 10 min', + fuzz_exe, + args : ['-rss_limit_mb=2560', '-timeout=25', '-max_total_time=600'], + # args : ['-timeout=25', '-max_total_time=600'], + env : test_env, + is_parallel : false, + # suite : 'fuzzing', + timeout : 605, + ) + + test(target_name + ' non stop', + fuzz_exe, + args : ['-rss_limit_mb=2560', '-timeout=25'], + env : test_env, + is_parallel : false, + # suite : 'fuzzing', + timeout : 0, + verbose : true + ) +endforeach diff --git a/test/meson.build b/test/meson.build index 00d88c88a5299d4c347a0b3cb09f7c6cbd9a49ea..6fc1e622afa74f7d9226bd6e898ccb2689290d3f 100644 --- a/test/meson.build +++ b/test/meson.build @@ -28,12 +28,48 @@ if cppcheck.found() ) endif +# Options were copied from oss-fuzz and adapted +# see: https://github.com/google/sanitizers/wiki/SanitizerCommonFlags +test_env = environment() +test_env.set('ASAN_OPTIONS', + 'abort_on_error=1', + 'allocator_may_return_null=1', + 'allocator_release_to_os_interval_ms=500', + 'detect_container_overflow=1', + 'detect_leaks=1', + 'detect_stack_use_after_return=1', + 'fast_unwind_on_fatal=0','handle_abort=1', + 'handle_segv=1', + 'handle_sigill=1', + 'max_uar_stack_size_log=16', + 'print_scariness=1', + 'quarantine_size_mb=10', + 'strict_memcmp=1', + 'symbolize=1', + 'use_sigaltstack=1', + 'dedup_token_length=3' +) +test_env.set('UBSAN_OPTIONS', + 'abort_on_error=1', + 'print_stacktrace=1', + 'print_summary=1', + 'symbolize=1', + 'dedup_token_length=3' +) +test_env.set('MSAN_OPTIONS', + 'abort_on_error=1', + 'print_stats=1', + 'symbolize=1', + 'dedup_token_length=3' +) + subdir('tools') subdir('cmp_tool') unity_dep = dependency('unity', fallback : ['unity', 'unity_dep']) subdir('test_common') +subdir('fuzz') test_cases = [] subdir('decmp') @@ -70,7 +106,9 @@ if ruby.found() build_by_default : false ) - test(test_description, test_exe) + test(test_description, test_exe, + env : test_env + ) endforeach endif