facebook · terrelln · Jan 31, 2022 · Jan 21, 2022 · Jan 21, 2022 · Jan 21, 2022
diff --git a/lib/common/fse.h b/lib/common/fse.h
@@ -353,7 +353,7 @@ size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
 size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
 /**< build a fake FSE_DTable, designed to always generate the same symbolValue */
 
-#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
+#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
 #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
 size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
 /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */

diff --git a/lib/common/fse_decompress.c b/lib/common/fse_decompress.c
@@ -342,7 +342,8 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
     }
 
     if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
-    workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog);
+    assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize);
+    workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
     wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
 
     CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );

diff --git a/programs/dibio.c b/programs/dibio.c
@@ -27,10 +27,11 @@
 #include <string.h>         /* memset */
 #include <stdio.h>          /* fprintf, fopen, ftello64 */
 #include <errno.h>          /* errno */
-#include <assert.h>
 
 #include "timefn.h"         /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */
+#include "../lib/common/debug.h" /* assert */
 #include "../lib/common/mem.h"  /* read */
+#include "../lib/zstd_errors.h"
 #include "dibio.h"
 
 
@@ -193,7 +194,8 @@ static U32 DiB_rand(U32* src)
 static void DiB_shuffle(const char** fileNamesTable, unsigned nbFiles) {
     U32 seed = 0xFD2FB528;
     unsigned i;
-    assert(nbFiles >= 1);
+    if (nbFiles == 0)
+        return;
     for (i = nbFiles - 1; i > 0; --i) {
         unsigned const j = DiB_rand(&seed) % (i + 1);
         const char* const tmp = fileNamesTable[j];
@@ -379,7 +381,7 @@ int DiB_trainFromFiles(const char* dictFileName, size_t maxDictSize,
         srcBuffer, &loadedSize, sampleSizes, fs.nbSamples, fileNamesTable,
         nbFiles, chunkSize, displayLevel);
 
-    {   size_t dictSize;
+    {   size_t dictSize = ZSTD_error_GENERIC;
         if (params) {
             DiB_fillNoise((char*)srcBuffer + loadedSize, NOISELENGTH);   /* guard band, for end of buffer condition */
             dictSize = ZDICT_trainFromBuffer_legacy(dictBuffer, maxDictSize,
@@ -399,8 +401,7 @@ int DiB_trainFromFiles(const char* dictFileName, size_t maxDictSize,
               dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, srcBuffer,
                                                      sampleSizes, nbSamplesLoaded, *coverParams);
             }
-        } else {
-            assert(fastCoverParams != NULL);
+        } else if (fastCoverParams != NULL) {
             if (optimize) {
               dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize,
                                                               srcBuffer, sampleSizes, nbSamplesLoaded,
@@ -415,6 +416,8 @@ int DiB_trainFromFiles(const char* dictFileName, size_t maxDictSize,
               dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, srcBuffer,
                                                         sampleSizes, nbSamplesLoaded, *fastCoverParams);
             }
+        } else {
+            assert(0 /* Impossible */);
         }
         if (ZDICT_isError(dictSize)) {
             DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize));   /* should not happen */

diff --git a/programs/zstdcli.c b/programs/zstdcli.c
@@ -802,9 +802,7 @@ int main(int argCount, const char* argv[])
         separateFiles = 0,
         setRealTimePrio = 0,
         singleThread = 0,
-#ifdef ZSTD_MULTITHREAD
         defaultLogicalCores = 0,
-#endif
         showDefaultCParams = 0,
         ultra=0,
         contentSize=1;
@@ -996,15 +994,13 @@ int main(int argCount, const char* argv[])
                 if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readSizeTFromChar(&argument); continue; }
                 if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readSizeTFromChar(&argument); continue; }
                 if (longCommandWArg(&argument, "--output-dir-flat")) { NEXT_FIELD(outDirName); continue; }
-#ifdef ZSTD_MULTITHREAD
                 if (longCommandWArg(&argument, "--auto-threads")) {
                     const char* threadDefault = NULL;
                     NEXT_FIELD(threadDefault);
                     if (strcmp(threadDefault, "logical") == 0)
                         defaultLogicalCores = 1;
                     continue;
                 }
-#endif
 #ifdef UTIL_HAS_MIRRORFILELIST
                 if (longCommandWArg(&argument, "--output-dir-mirror")) { NEXT_FIELD(outMirroredDirName); continue; }
 #endif
@@ -1220,7 +1216,7 @@ int main(int argCount, const char* argv[])
         }
     }
 #else
-    (void)singleThread; (void)nbWorkers;
+    (void)singleThread; (void)nbWorkers; (void)defaultLogicalCores;
 #endif
 
     g_utilDisplayLevel = g_displayLevel;

diff --git a/tests/Makefile b/tests/Makefile
@@ -297,7 +297,7 @@ check: shortest
 fuzztest: test-fuzzer test-zstream test-decodecorpus
 
 .PHONY: test
-test: test-zstd test-fullbench test-fuzzer test-zstream test-invalidDictionaries test-legacy test-decodecorpus
+test: test-zstd test-fullbench test-fuzzer test-zstream test-invalidDictionaries test-legacy test-decodecorpus test-cli-tests
 ifeq ($(QEMU_SYS),)
 test: test-pool
 endif
@@ -322,6 +322,12 @@ test-zstd test-zstd32 test-zstd-nolegacy: datagen
 	file $(ZSTD)
 	EXE_PREFIX="$(QEMU_SYS)" ZSTD_BIN="$(ZSTD)" DATAGEN_BIN=./datagen ./playTests.sh $(ZSTDRTTEST)
 
+test-cli-tests: ZSTD = $(PRGDIR)/zstd
+test-cli-tests: zstd datagen
+	file $(ZSTD)
+	./cli-tests/run.py --exec-prefix="$(QEMU_SYS)" --zstd="$(ZSTD)" --datagen=./datagen
+
+
 test-fullbench: fullbench datagen
 	$(QEMU_SYS) ./fullbench -i1
 	$(QEMU_SYS) ./fullbench -i1 -P0

diff --git a/tests/cli-tests/.gitignore b/tests/cli-tests/.gitignore
@@ -0,0 +1,4 @@
+scratch/
+!bin/
+!datagen
+!zstdcat
diff --git a/tests/cli-tests/README.md b/tests/cli-tests/README.md
@@ -0,0 +1,248 @@
+# CLI tests
+
+The CLI tests are focused on testing the zstd CLI.
+They are intended to be simple tests that the CLI and arguments work as advertised.
+They are not intended to test the library, only the code in `programs/`.
+The library will get incidental coverage, but if you find yourself trying to trigger a specific condition in the library, this is the wrong tool.
+
+## Test runner usage
+
+The test runner `run.py` will run tests against the in-tree build of `zstd` and `datagen` by default. Which means that `zstd` and `datagen` must be built.
+
+The `zstd` binary used can be passed with `--zstd /path/to/zstd`.
+Additionally, to run `zstd` through a tool like `valgrind` or `qemu`, set the `--exec-prefix 'valgrind -q'` flag.
+
+Similarly, the `--datagen`, and `--zstdgrep` flags can be set to specify
+the paths to their respective binaries. However, these tools do not use
+the `EXEC_PREFIX`.
+
+Each test executes in its own scratch directory under `scratch/test/name`. E.g. `scratch/basic/help.sh/`. Normally these directories are removed after the test executes. However, the `--preserve` flag will preserve these directories after execution, and save the tests exit code, stdout, and stderr in the scratch directory to `exit`, `stderr`, and `stdout` respectively. This can be useful for debugging/editing a test and updating the expected output.
+
+### Running all the tests
+
+By default the test runner `run.py` will run all the tests, and report the results.
+
+Examples:
+
+```
+./run.py
+./run.py --preserve
+./run.py --zstd ../../build/programs/zstd --datagen ../../build/tests/datagen
+```
+
+### Running specific tests
+
+A set of test names can be passed to the test runner `run.py` to only execute those tests.
+This can be useful for writing or debugging a test, especially with `--preserve`.
+
+The test name can either be the path to the test file, or the test name, which is the path relative to the test directory.
+
+Examples:
+
+```
+./run.py basic/help.sh
+./run.py --preserve basic/help.sh basic/version.sh
+./run.py --preserve --verbose basic/help.sh
+```
+
+## Writing a test
+
+Test cases are arbitrary executables, and can be written in any language, but are generally shell scripts.
+After the script executes, the exit code, stderr, and stdout are compared against the expectations.
+
+Each test is run in a clean directory that the test can use for intermediate files. This directory will be cleaned up at the end of the test, unless `--preserve` is passed to the test runner. Additionally, the `setup` script can prepare the directory before the test runs.
+
+### Calling zstd, utilities, and environment variables
+
+The `$PATH` for tests is prepended with the `bin/` sub-directory, which contains helper scripts for ease of testing.
+The `zstd` binary will call the zstd binary specified by `run.py` with the correct `$EXEC_PREFIX`.
+Similarly, `datagen`, `unzstd`, `zstdgrep`, `zstdcat`, etc, are provided.
+
+Helper utilities like `cmp_size`, `println`, and `die` are provided here too. See their scripts for details.
+
+Common shell script libraries are provided under `common/`, with helper variables and functions. They can be sourced with `source "$COMMON/library.sh`.
+
+Lastly, environment variables are provided for testing, which can be listed when calling `run.py` with `--verbose`.
+They are generally used by the helper scripts in `bin/` to coordinate everything.
+
+### Basic test case
+
+When executing your `$TEST` executable, by default the exit code is expected to be `0`. However, you can provide an alterate expected exit code in a `$TEST.exit` file.
+
+When executing your `$TEST` exectuable, by default the expected stderr and stdout are empty. However, you can override the default by providing one of three files:
+
+* `$TEST.{stdout,stderr}.exact`
+* `$TEST.{stdout,stderr}.glob`
+* `$TEST.{stdout,stderr}.ignore`
+
+If you provide a `.exact` file, the output is expected to exactly match, byte-for-byte.
+
+If you provide a `.glob` file, the output is expected to match the expected file, where each line is interpreted as a glob syntax. Additionally, a line containing only `...` matches all lines until the next expected line matches.
+
+If you provide a `.ignore` file, the output is ignored.
+
+#### Passing examples
+
+All these examples pass.
+
+Exit 1, and change the expectation to be 1.
+
+```
+exit-1.sh
+---
+#!/bin/sh
+exit 1
+---
+
+exit-1.sh.exit
+---
+1
+---
+```
+
+Check the stdout output exactly matches.
+
+```
+echo.sh
+---
+#!/bin/sh
+echo "hello world"
+---
+
+echo.sh.stdout.exact
+---
+hello world
+---
+```
+
+Check the stderr output using a glob.
+
+```
+random.sh
+---
+#!/bin/sh
+head -c 10 < /dev/urandom | xxd >&2
+---
+
+random.sh.stderr.glob
+---
+00000000: * * * * *                 *
+```
+
+Multiple lines can be matched with ...
+
+```
+random-num-lines.sh
+---
+#!/bin/sh
+echo hello
+seq 0 $RANDOM
+echo world
+---
+
+random-num-lines.sh.stdout.glob
+---
+hello
+0
+...
+world
+---
+```
+
+#### Failing examples
+
+Exit code is expected to be 0, but is 1.
+
+```
+exit-1.sh
+---
+#!/bin/sh
+exit 1
+---
+```
+
+Stdout is expected to be empty, but isn't.
+
+```
+echo.sh
+---
+#!/bin/sh
+echo hello world
+```
+
+Stderr is expected to be hello but is world.
+
+```
+hello.sh
+---
+#!/bin/sh
+echo world >&2
+---
+
+hello.sh.stderr.exact
+---
+hello
+---
+```
+
+### Setup & teardown scripts
+
+Finally, test writing can be eased with setup and teardown scripts.
+Each directory in the test directory is a test-suite consisting of all tests within that directory (but not sub-directories).
+This test suite can come with 4 scripts to help test writing:
+
+* `setup_once`
+* `teardown_once`
+* `setup`
+* `teardown`
+
+The `setup_once` and `teardown_once` are run once before and after all the tests in the suite respectively.
+They operate in the scratch directory for the test suite, which is the parent directory of each scratch directory for each test case.
+They can do work that is shared between tests to improve test efficiency.
+For example, the `dictionaries/setup_once` script builds several dictionaries, for use in the `dictionaries` tests.
+
+The `setup` and `teardown` scripts run before and after each test case respectively, in the test case's scratch directory.
+These scripts can do work that is shared between test cases to make tests more succinct.
+For example, the `dictionaries/setup` script copies the dictionaries built by the `dictionaries/setup_once` script into the test's scratch directory, to make them easier to use, and make sure they aren't accidentally modified.
+
+#### Examples
+
+```
+basic/setup
+---
+#!/bin/sh
+# Create some files for testing with
+datagen > file
+datagen > file0
+datagen > file1
+---
+
+basic/test.sh
+---
+#!/bin/sh
+zstd file file0 file1
+---
+
+dictionaries/setup_once
+---
+#!/bin/sh
+set -e
+
+mkdir files/ dicts/
+for i in $(seq 10); do
+	datagen -g1000 > files/$i
+done
+
+zstd --train -r files/ -o dicts/0
+---
+
+dictionaries/setup
+---
+#!/bin/sh
+
+# Runs in the test case's scratch directory.
+# The test suite's scratch directory that
+# `setup_once` operates in is the parent directory.
+cp -r ../files ../dicts .
+---
+```
diff --git a/tests/cli-tests/basic/help.sh b/tests/cli-tests/basic/help.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+set -e
+
+println "+ zstd -h"
+zstd -h
+println "+ zstd -H"
+zstd -H
+println "+ zstd --help"
+zstd --help