1/*
2 * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11
12/*-************************************
13*  Tuning parameters
14**************************************/
15#ifndef ZSTDCLI_CLEVEL_DEFAULT
16#  define ZSTDCLI_CLEVEL_DEFAULT 3
17#endif
18
19#ifndef ZSTDCLI_CLEVEL_MAX
20#  define ZSTDCLI_CLEVEL_MAX 19   /* without using --ultra */
21#endif
22
23#ifndef ZSTDCLI_NBTHREADS_DEFAULT
24#  define ZSTDCLI_NBTHREADS_DEFAULT 1
25#endif
26
27/*-************************************
28*  Dependencies
29**************************************/
30#include "platform.h" /* IS_CONSOLE, PLATFORM_POSIX_VERSION */
31#include "util.h"     /* UTIL_HAS_CREATEFILELIST, UTIL_createFileList */
32#include <stdlib.h>   /* getenv */
33#include <string.h>   /* strcmp, strlen */
34#include <stdio.h>    /* fprintf(), stdin, stdout, stderr */
35#include <errno.h>    /* errno */
36#include <assert.h>   /* assert */
37
38#include "fileio.h"   /* stdinmark, stdoutmark, ZSTD_EXTENSION */
39#ifndef ZSTD_NOBENCH
40#  include "benchzstd.h"  /* BMK_benchFiles */
41#endif
42#ifndef ZSTD_NODICT
43#  include "dibio.h"  /* ZDICT_cover_params_t, DiB_trainFromFiles() */
44#endif
45#include "../lib/zstd.h"  /* ZSTD_VERSION_STRING, ZSTD_minCLevel, ZSTD_maxCLevel */
46
47
48/*-************************************
49*  Constants
50**************************************/
51#define COMPRESSOR_NAME "zstd command line interface"
52#ifndef ZSTD_VERSION
53#  define ZSTD_VERSION "v" ZSTD_VERSION_STRING
54#endif
55#define AUTHOR "Yann Collet"
56#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s ***\n", COMPRESSOR_NAME, (int)(sizeof(size_t)*8), ZSTD_VERSION, AUTHOR
57
58#define ZSTD_ZSTDMT "zstdmt"
59#define ZSTD_UNZSTD "unzstd"
60#define ZSTD_CAT "zstdcat"
61#define ZSTD_ZCAT "zcat"
62#define ZSTD_GZ "gzip"
63#define ZSTD_GUNZIP "gunzip"
64#define ZSTD_GZCAT "gzcat"
65#define ZSTD_LZMA "lzma"
66#define ZSTD_UNLZMA "unlzma"
67#define ZSTD_XZ "xz"
68#define ZSTD_UNXZ "unxz"
69#define ZSTD_LZ4 "lz4"
70#define ZSTD_UNLZ4 "unlz4"
71
72#define KB *(1 <<10)
73#define MB *(1 <<20)
74#define GB *(1U<<30)
75
76#define DISPLAY_LEVEL_DEFAULT 2
77
78static const char*    g_defaultDictName = "dictionary";
79static const unsigned g_defaultMaxDictSize = 110 KB;
80static const int      g_defaultDictCLevel = 3;
81static const unsigned g_defaultSelectivityLevel = 9;
82static const unsigned g_defaultMaxWindowLog = 27;
83#define OVERLAP_LOG_DEFAULT 9999
84#define LDM_PARAM_DEFAULT 9999  /* Default for parameters where 0 is valid */
85static U32 g_overlapLog = OVERLAP_LOG_DEFAULT;
86static U32 g_ldmHashLog = 0;
87static U32 g_ldmMinMatch = 0;
88static U32 g_ldmHashRateLog = LDM_PARAM_DEFAULT;
89static U32 g_ldmBucketSizeLog = LDM_PARAM_DEFAULT;
90
91
92#define DEFAULT_ACCEL 1
93
94typedef enum { cover, fastCover, legacy } dictType;
95
96/*-************************************
97*  Display Macros
98**************************************/
99#define DISPLAY_F(f, ...)    fprintf((f), __VA_ARGS__)
100#define DISPLAYOUT(...)      DISPLAY_F(stdout, __VA_ARGS__)
101#define DISPLAY(...)         DISPLAY_F(stderr, __VA_ARGS__)
102#define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
103static int g_displayLevel = DISPLAY_LEVEL_DEFAULT;   /* 0 : no display,  1: errors,  2 : + result + interaction + warnings,  3 : + progression,  4 : + information */
104
105
106/*-************************************
107*  Command Line
108**************************************/
109/* print help either in `stderr` or `stdout` depending on originating request
110 * error (badusage) => stderr
111 * help (usage_advanced) => stdout
112 */
113static void usage(FILE* f, const char* programName)
114{
115    DISPLAY_F(f, "Usage : \n");
116    DISPLAY_F(f, "      %s [args] [FILE(s)] [-o file] \n", programName);
117    DISPLAY_F(f, "\n");
118    DISPLAY_F(f, "FILE    : a filename \n");
119    DISPLAY_F(f, "          with no FILE, or when FILE is - , read standard input\n");
120    DISPLAY_F(f, "Arguments : \n");
121#ifndef ZSTD_NOCOMPRESS
122    DISPLAY_F(f, " -#     : # compression level (1-%d, default: %d) \n", ZSTDCLI_CLEVEL_MAX, ZSTDCLI_CLEVEL_DEFAULT);
123#endif
124#ifndef ZSTD_NODECOMPRESS
125    DISPLAY_F(f, " -d     : decompression \n");
126#endif
127    DISPLAY_F(f, " -D DICT: use DICT as Dictionary for compression or decompression \n");
128    DISPLAY_F(f, " -o file: result stored into `file` (only 1 output file) \n");
129    DISPLAY_F(f, " -f     : overwrite output without prompting, also (de)compress links \n");
130    DISPLAY_F(f, "--rm    : remove source file(s) after successful de/compression \n");
131    DISPLAY_F(f, " -k     : preserve source file(s) (default) \n");
132    DISPLAY_F(f, " -h/-H  : display help/long help and exit \n");
133}
134
135static void usage_advanced(const char* programName)
136{
137    DISPLAYOUT(WELCOME_MESSAGE);
138    usage(stdout, programName);
139    DISPLAYOUT( "\n");
140    DISPLAYOUT( "Advanced arguments : \n");
141    DISPLAYOUT( " -V     : display Version number and exit \n");
142
143    DISPLAYOUT( " -c     : force write to standard output, even if it is the console \n");
144
145    DISPLAYOUT( " -v     : verbose mode; specify multiple times to increase verbosity \n");
146    DISPLAYOUT( " -q     : suppress warnings; specify twice to suppress errors too \n");
147    DISPLAYOUT( "--no-progress : do not display the progress counter \n");
148
149#ifdef UTIL_HAS_CREATEFILELIST
150    DISPLAYOUT( " -r     : operate recursively on directories \n");
151    DISPLAYOUT( "--filelist FILE : read list of files to operate upon from FILE \n");
152    DISPLAYOUT( "--output-dir-flat DIR : processed files are stored into DIR \n");
153#endif
154
155#ifdef UTIL_HAS_MIRRORFILELIST
156    DISPLAYOUT( "--output-dir-mirror DIR : processed files are stored into DIR respecting original directory structure \n");
157#endif
158
159
160#ifndef ZSTD_NOCOMPRESS
161    DISPLAYOUT( "--[no-]check : during compression, add XXH64 integrity checksum to frame (default: enabled)");
162#ifndef ZSTD_NODECOMPRESS
163    DISPLAYOUT( ". If specified with -d, decompressor will ignore/validate checksums in compressed frame (default: validate).");
164#endif
165#else
166#ifdef ZSTD_NOCOMPRESS
167    DISPLAYOUT( "--[no-]check : during decompression, ignore/validate checksums in compressed frame (default: validate).");
168#endif
169#endif /* ZSTD_NOCOMPRESS */
170    DISPLAYOUT( "\n");
171
172    DISPLAYOUT( "--      : All arguments after \"--\" are treated as files \n");
173
174#ifndef ZSTD_NOCOMPRESS
175    DISPLAYOUT( "\n");
176    DISPLAYOUT( "Advanced compression arguments : \n");
177    DISPLAYOUT( "--ultra : enable levels beyond %i, up to %i (requires more memory) \n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel());
178    DISPLAYOUT( "--long[=#]: enable long distance matching with given window log (default: %u) \n", g_defaultMaxWindowLog);
179    DISPLAYOUT( "--fast[=#]: switch to very fast compression levels (default: %u) \n", 1);
180    DISPLAYOUT( "--adapt : dynamically adapt compression level to I/O conditions \n");
181# ifdef ZSTD_MULTITHREAD
182    DISPLAYOUT( " -T#    : spawns # compression threads (default: 1, 0==# cores) \n");
183    DISPLAYOUT( " -B#    : select size of each job (default: 0==automatic) \n");
184    DISPLAYOUT( "--single-thread : use a single thread for both I/O and compression (result slightly different than -T1) \n");
185    DISPLAYOUT( "--rsyncable : compress using a rsync-friendly method (-B sets block size) \n");
186# endif
187    DISPLAYOUT( "--exclude-compressed: only compress files that are not already compressed \n");
188    DISPLAYOUT( "--stream-size=# : specify size of streaming input from `stdin` \n");
189    DISPLAYOUT( "--size-hint=# optimize compression parameters for streaming input of approximately this size \n");
190    DISPLAYOUT( "--target-compressed-block-size=# : generate compressed block of approximately targeted size \n");
191    DISPLAYOUT( "--no-dictID : don't write dictID into header (dictionary compression only) \n");
192    DISPLAYOUT( "--[no-]compress-literals : force (un)compressed literals \n");
193
194    DISPLAYOUT( "--format=zstd : compress files to the .zst format (default) \n");
195#ifdef ZSTD_GZCOMPRESS
196    DISPLAYOUT( "--format=gzip : compress files to the .gz format \n");
197#endif
198#ifdef ZSTD_LZMACOMPRESS
199    DISPLAYOUT( "--format=xz : compress files to the .xz format \n");
200    DISPLAYOUT( "--format=lzma : compress files to the .lzma format \n");
201#endif
202#ifdef ZSTD_LZ4COMPRESS
203    DISPLAYOUT( "--format=lz4 : compress files to the .lz4 format \n");
204#endif
205#endif  /* !ZSTD_NOCOMPRESS */
206
207#ifndef ZSTD_NODECOMPRESS
208    DISPLAYOUT( "\n");
209    DISPLAYOUT( "Advanced decompression arguments : \n");
210    DISPLAYOUT( " -l     : print information about zstd compressed files \n");
211    DISPLAYOUT( "--test  : test compressed file integrity \n");
212    DISPLAYOUT( " -M#    : Set a memory usage limit for decompression \n");
213# if ZSTD_SPARSE_DEFAULT
214    DISPLAYOUT( "--[no-]sparse : sparse mode (default: enabled on file, disabled on stdout) \n");
215# else
216    DISPLAYOUT( "--[no-]sparse : sparse mode (default: disabled) \n");
217# endif
218#endif  /* ZSTD_NODECOMPRESS */
219
220#ifndef ZSTD_NODICT
221    DISPLAYOUT( "\n");
222    DISPLAYOUT( "Dictionary builder : \n");
223    DISPLAYOUT( "--train ## : create a dictionary from a training set of files \n");
224    DISPLAYOUT( "--train-cover[=k=#,d=#,steps=#,split=#,shrink[=#]] : use the cover algorithm with optional args \n");
225    DISPLAYOUT( "--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#,shrink[=#]] : use the fast cover algorithm with optional args \n");
226    DISPLAYOUT( "--train-legacy[=s=#] : use the legacy algorithm with selectivity (default: %u) \n", g_defaultSelectivityLevel);
227    DISPLAYOUT( " -o DICT : DICT is dictionary name (default: %s) \n", g_defaultDictName);
228    DISPLAYOUT( "--maxdict=# : limit dictionary to specified size (default: %u) \n", g_defaultMaxDictSize);
229    DISPLAYOUT( "--dictID=# : force dictionary ID to specified value (default: random) \n");
230#endif
231
232#ifndef ZSTD_NOBENCH
233    DISPLAYOUT( "\n");
234    DISPLAYOUT( "Benchmark arguments : \n");
235    DISPLAYOUT( " -b#    : benchmark file(s), using # compression level (default: %d) \n", ZSTDCLI_CLEVEL_DEFAULT);
236    DISPLAYOUT( " -e#    : test all compression levels successively from -b# to -e# (default: 1) \n");
237    DISPLAYOUT( " -i#    : minimum evaluation time in seconds (default: 3s) \n");
238    DISPLAYOUT( " -B#    : cut file into independent blocks of size # (default: no block) \n");
239    DISPLAYOUT( " -S     : output one benchmark result per input file (default: consolidated result) \n");
240    DISPLAYOUT( "--priority=rt : set process priority to real-time \n");
241#endif
242
243}
244
245static void badusage(const char* programName)
246{
247    DISPLAYLEVEL(1, "Incorrect parameters \n");
248    if (g_displayLevel >= 2) usage(stderr, programName);
249}
250
251static void waitEnter(void)
252{
253    int unused;
254    DISPLAY("Press enter to continue... \n");
255    unused = getchar();
256    (void)unused;
257}
258
259static const char* lastNameFromPath(const char* path)
260{
261    const char* name = path;
262    if (strrchr(name, '/')) name = strrchr(name, '/') + 1;
263    if (strrchr(name, '\\')) name = strrchr(name, '\\') + 1; /* windows */
264    return name;
265}
266
267/*! exeNameMatch() :
268    @return : a non-zero value if exeName matches test, excluding the extension
269   */
270static int exeNameMatch(const char* exeName, const char* test)
271{
272    return !strncmp(exeName, test, strlen(test)) &&
273        (exeName[strlen(test)] == '\0' || exeName[strlen(test)] == '.');
274}
275
276static void errorOut(const char* msg)
277{
278    DISPLAY("%s \n", msg); exit(1);
279}
280
281/*! readU32FromCharChecked() :
282 * @return 0 if success, and store the result in *value.
283 *  allows and interprets K, KB, KiB, M, MB and MiB suffix.
284 *  Will also modify `*stringPtr`, advancing it to position where it stopped reading.
285 * @return 1 if an overflow error occurs */
286static int readU32FromCharChecked(const char** stringPtr, unsigned* value)
287{
288    unsigned result = 0;
289    while ((**stringPtr >='0') && (**stringPtr <='9')) {
290        unsigned const max = ((unsigned)(-1)) / 10;
291        unsigned last = result;
292        if (result > max) return 1; /* overflow error */
293        result *= 10;
294        result += (unsigned)(**stringPtr - '0');
295        if (result < last) return 1; /* overflow error */
296        (*stringPtr)++ ;
297    }
298    if ((**stringPtr=='K') || (**stringPtr=='M')) {
299        unsigned const maxK = ((unsigned)(-1)) >> 10;
300        if (result > maxK) return 1; /* overflow error */
301        result <<= 10;
302        if (**stringPtr=='M') {
303            if (result > maxK) return 1; /* overflow error */
304            result <<= 10;
305        }
306        (*stringPtr)++;  /* skip `K` or `M` */
307        if (**stringPtr=='i') (*stringPtr)++;
308        if (**stringPtr=='B') (*stringPtr)++;
309    }
310    *value = result;
311    return 0;
312}
313
314/*! readU32FromChar() :
315 * @return : unsigned integer value read from input in `char` format.
316 *  allows and interprets K, KB, KiB, M, MB and MiB suffix.
317 *  Will also modify `*stringPtr`, advancing it to position where it stopped reading.
318 *  Note : function will exit() program if digit sequence overflows */
319static unsigned readU32FromChar(const char** stringPtr) {
320    static const char errorMsg[] = "error: numeric value overflows 32-bit unsigned int";
321    unsigned result;
322    if (readU32FromCharChecked(stringPtr, &result)) { errorOut(errorMsg); }
323    return result;
324}
325
326/*! readSizeTFromCharChecked() :
327 * @return 0 if success, and store the result in *value.
328 *  allows and interprets K, KB, KiB, M, MB and MiB suffix.
329 *  Will also modify `*stringPtr`, advancing it to position where it stopped reading.
330 * @return 1 if an overflow error occurs */
331static int readSizeTFromCharChecked(const char** stringPtr, size_t* value)
332{
333    size_t result = 0;
334    while ((**stringPtr >='0') && (**stringPtr <='9')) {
335        size_t const max = ((size_t)(-1)) / 10;
336        size_t last = result;
337        if (result > max) return 1; /* overflow error */
338        result *= 10;
339        result += (size_t)(**stringPtr - '0');
340        if (result < last) return 1; /* overflow error */
341        (*stringPtr)++ ;
342    }
343    if ((**stringPtr=='K') || (**stringPtr=='M')) {
344        size_t const maxK = ((size_t)(-1)) >> 10;
345        if (result > maxK) return 1; /* overflow error */
346        result <<= 10;
347        if (**stringPtr=='M') {
348            if (result > maxK) return 1; /* overflow error */
349            result <<= 10;
350        }
351        (*stringPtr)++;  /* skip `K` or `M` */
352        if (**stringPtr=='i') (*stringPtr)++;
353        if (**stringPtr=='B') (*stringPtr)++;
354    }
355    *value = result;
356    return 0;
357}
358
359/*! readSizeTFromChar() :
360 * @return : size_t value read from input in `char` format.
361 *  allows and interprets K, KB, KiB, M, MB and MiB suffix.
362 *  Will also modify `*stringPtr`, advancing it to position where it stopped reading.
363 *  Note : function will exit() program if digit sequence overflows */
364static size_t readSizeTFromChar(const char** stringPtr) {
365    static const char errorMsg[] = "error: numeric value overflows size_t";
366    size_t result;
367    if (readSizeTFromCharChecked(stringPtr, &result)) { errorOut(errorMsg); }
368    return result;
369}
370
371/** longCommandWArg() :
372 *  check if *stringPtr is the same as longCommand.
373 *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
374 * @return 0 and doesn't modify *stringPtr otherwise.
375 */
376static int longCommandWArg(const char** stringPtr, const char* longCommand)
377{
378    size_t const comSize = strlen(longCommand);
379    int const result = !strncmp(*stringPtr, longCommand, comSize);
380    if (result) *stringPtr += comSize;
381    return result;
382}
383
384
385#ifndef ZSTD_NODICT
386
387static const unsigned kDefaultRegression = 1;
388/**
389 * parseCoverParameters() :
390 * reads cover parameters from *stringPtr (e.g. "--train-cover=k=48,d=8,steps=32") into *params
391 * @return 1 means that cover parameters were correct
392 * @return 0 in case of malformed parameters
393 */
394static unsigned parseCoverParameters(const char* stringPtr, ZDICT_cover_params_t* params)
395{
396    memset(params, 0, sizeof(*params));
397    for (; ;) {
398        if (longCommandWArg(&stringPtr, "k=")) { params->k = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
399        if (longCommandWArg(&stringPtr, "d=")) { params->d = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
400        if (longCommandWArg(&stringPtr, "steps=")) { params->steps = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
401        if (longCommandWArg(&stringPtr, "split=")) {
402          unsigned splitPercentage = readU32FromChar(&stringPtr);
403          params->splitPoint = (double)splitPercentage / 100.0;
404          if (stringPtr[0]==',') { stringPtr++; continue; } else break;
405        }
406        if (longCommandWArg(&stringPtr, "shrink")) {
407          params->shrinkDictMaxRegression = kDefaultRegression;
408          params->shrinkDict = 1;
409          if (stringPtr[0]=='=') {
410            stringPtr++;
411            params->shrinkDictMaxRegression = readU32FromChar(&stringPtr);
412          }
413          if (stringPtr[0]==',') {
414            stringPtr++;
415            continue;
416          }
417          else break;
418        }
419        return 0;
420    }
421    if (stringPtr[0] != 0) return 0;
422    DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\nsplit=%u\nshrink%u\n", params->k, params->d, params->steps, (unsigned)(params->splitPoint * 100), params->shrinkDictMaxRegression);
423    return 1;
424}
425
426/**
427 * parseFastCoverParameters() :
428 * reads fastcover parameters from *stringPtr (e.g. "--train-fastcover=k=48,d=8,f=20,steps=32,accel=2") into *params
429 * @return 1 means that fastcover parameters were correct
430 * @return 0 in case of malformed parameters
431 */
432static unsigned parseFastCoverParameters(const char* stringPtr, ZDICT_fastCover_params_t* params)
433{
434    memset(params, 0, sizeof(*params));
435    for (; ;) {
436        if (longCommandWArg(&stringPtr, "k=")) { params->k = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
437        if (longCommandWArg(&stringPtr, "d=")) { params->d = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
438        if (longCommandWArg(&stringPtr, "f=")) { params->f = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
439        if (longCommandWArg(&stringPtr, "steps=")) { params->steps = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
440        if (longCommandWArg(&stringPtr, "accel=")) { params->accel = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
441        if (longCommandWArg(&stringPtr, "split=")) {
442          unsigned splitPercentage = readU32FromChar(&stringPtr);
443          params->splitPoint = (double)splitPercentage / 100.0;
444          if (stringPtr[0]==',') { stringPtr++; continue; } else break;
445        }
446        if (longCommandWArg(&stringPtr, "shrink")) {
447          params->shrinkDictMaxRegression = kDefaultRegression;
448          params->shrinkDict = 1;
449          if (stringPtr[0]=='=') {
450            stringPtr++;
451            params->shrinkDictMaxRegression = readU32FromChar(&stringPtr);
452          }
453          if (stringPtr[0]==',') {
454            stringPtr++;
455            continue;
456          }
457          else break;
458        }
459        return 0;
460    }
461    if (stringPtr[0] != 0) return 0;
462    DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\nshrink=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint * 100), params->accel, params->shrinkDictMaxRegression);
463    return 1;
464}
465
466/**
467 * parseLegacyParameters() :
468 * reads legacy dictionary builder parameters from *stringPtr (e.g. "--train-legacy=selectivity=8") into *selectivity
469 * @return 1 means that legacy dictionary builder parameters were correct
470 * @return 0 in case of malformed parameters
471 */
472static unsigned parseLegacyParameters(const char* stringPtr, unsigned* selectivity)
473{
474    if (!longCommandWArg(&stringPtr, "s=") && !longCommandWArg(&stringPtr, "selectivity=")) { return 0; }
475    *selectivity = readU32FromChar(&stringPtr);
476    if (stringPtr[0] != 0) return 0;
477    DISPLAYLEVEL(4, "legacy: selectivity=%u\n", *selectivity);
478    return 1;
479}
480
481static ZDICT_cover_params_t defaultCoverParams(void)
482{
483    ZDICT_cover_params_t params;
484    memset(&params, 0, sizeof(params));
485    params.d = 8;
486    params.steps = 4;
487    params.splitPoint = 1.0;
488    params.shrinkDict = 0;
489    params.shrinkDictMaxRegression = kDefaultRegression;
490    return params;
491}
492
493static ZDICT_fastCover_params_t defaultFastCoverParams(void)
494{
495    ZDICT_fastCover_params_t params;
496    memset(&params, 0, sizeof(params));
497    params.d = 8;
498    params.f = 20;
499    params.steps = 4;
500    params.splitPoint = 0.75; /* different from default splitPoint of cover */
501    params.accel = DEFAULT_ACCEL;
502    params.shrinkDict = 0;
503    params.shrinkDictMaxRegression = kDefaultRegression;
504    return params;
505}
506#endif
507
508
509/** parseAdaptParameters() :
510 *  reads adapt parameters from *stringPtr (e.g. "--zstd=min=1,max=19) and store them into adaptMinPtr and adaptMaxPtr.
511 *  Both adaptMinPtr and adaptMaxPtr must be already allocated and correctly initialized.
512 *  There is no guarantee that any of these values will be updated.
513 *  @return 1 means that parsing was successful,
514 *  @return 0 in case of malformed parameters
515 */
516static unsigned parseAdaptParameters(const char* stringPtr, int* adaptMinPtr, int* adaptMaxPtr)
517{
518    for ( ; ;) {
519        if (longCommandWArg(&stringPtr, "min=")) { *adaptMinPtr = (int)readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
520        if (longCommandWArg(&stringPtr, "max=")) { *adaptMaxPtr = (int)readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
521        DISPLAYLEVEL(4, "invalid compression parameter \n");
522        return 0;
523    }
524    if (stringPtr[0] != 0) return 0; /* check the end of string */
525    if (*adaptMinPtr > *adaptMaxPtr) {
526        DISPLAYLEVEL(4, "incoherent adaptation limits \n");
527        return 0;
528    }
529    return 1;
530}
531
532
533/** parseCompressionParameters() :
534 *  reads compression parameters from *stringPtr (e.g. "--zstd=wlog=23,clog=23,hlog=22,slog=6,mml=3,tlen=48,strat=6") into *params
535 *  @return 1 means that compression parameters were correct
536 *  @return 0 in case of malformed parameters
537 */
538static unsigned parseCompressionParameters(const char* stringPtr, ZSTD_compressionParameters* params)
539{
540    for ( ; ;) {
541        if (longCommandWArg(&stringPtr, "windowLog=") || longCommandWArg(&stringPtr, "wlog=")) { params->windowLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
542        if (longCommandWArg(&stringPtr, "chainLog=") || longCommandWArg(&stringPtr, "clog=")) { params->chainLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
543        if (longCommandWArg(&stringPtr, "hashLog=") || longCommandWArg(&stringPtr, "hlog=")) { params->hashLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
544        if (longCommandWArg(&stringPtr, "searchLog=") || longCommandWArg(&stringPtr, "slog=")) { params->searchLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
545        if (longCommandWArg(&stringPtr, "minMatch=") || longCommandWArg(&stringPtr, "mml=")) { params->minMatch = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
546        if (longCommandWArg(&stringPtr, "targetLength=") || longCommandWArg(&stringPtr, "tlen=")) { params->targetLength = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
547        if (longCommandWArg(&stringPtr, "strategy=") || longCommandWArg(&stringPtr, "strat=")) { params->strategy = (ZSTD_strategy)(readU32FromChar(&stringPtr)); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
548        if (longCommandWArg(&stringPtr, "overlapLog=") || longCommandWArg(&stringPtr, "ovlog=")) { g_overlapLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
549        if (longCommandWArg(&stringPtr, "ldmHashLog=") || longCommandWArg(&stringPtr, "lhlog=")) { g_ldmHashLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
550        if (longCommandWArg(&stringPtr, "ldmMinMatch=") || longCommandWArg(&stringPtr, "lmml=")) { g_ldmMinMatch = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
551        if (longCommandWArg(&stringPtr, "ldmBucketSizeLog=") || longCommandWArg(&stringPtr, "lblog=")) { g_ldmBucketSizeLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
552        if (longCommandWArg(&stringPtr, "ldmHashRateLog=") || longCommandWArg(&stringPtr, "lhrlog=")) { g_ldmHashRateLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
553        DISPLAYLEVEL(4, "invalid compression parameter \n");
554        return 0;
555    }
556
557    DISPLAYLEVEL(4, "windowLog=%d, chainLog=%d, hashLog=%d, searchLog=%d \n", params->windowLog, params->chainLog, params->hashLog, params->searchLog);
558    DISPLAYLEVEL(4, "minMatch=%d, targetLength=%d, strategy=%d \n", params->minMatch, params->targetLength, params->strategy);
559    if (stringPtr[0] != 0) return 0; /* check the end of string */
560    return 1;
561}
562
563static void printVersion(void)
564{
565    if (g_displayLevel < DISPLAY_LEVEL_DEFAULT) {
566        DISPLAYOUT("%s\n", ZSTD_VERSION_STRING);
567        return;
568    }
569
570    DISPLAYOUT(WELCOME_MESSAGE);
571    if (g_displayLevel >= 3) {
572    /* format support */
573        DISPLAYOUT("*** supports: zstd");
574    #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>0) && (ZSTD_LEGACY_SUPPORT<8)
575        DISPLAYOUT(", zstd legacy v0.%d+", ZSTD_LEGACY_SUPPORT);
576    #endif
577    #ifdef ZSTD_GZCOMPRESS
578        DISPLAYOUT(", gzip");
579    #endif
580    #ifdef ZSTD_LZ4COMPRESS
581        DISPLAYOUT(", lz4");
582    #endif
583    #ifdef ZSTD_LZMACOMPRESS
584        DISPLAYOUT(", lzma, xz ");
585    #endif
586        DISPLAYOUT("\n");
587        if (g_displayLevel >= 4) {
588            /* posix support */
589        #ifdef _POSIX_C_SOURCE
590            DISPLAYOUT("_POSIX_C_SOURCE defined: %ldL\n", (long) _POSIX_C_SOURCE);
591        #endif
592        #ifdef _POSIX_VERSION
593            DISPLAYOUT("_POSIX_VERSION defined: %ldL \n", (long) _POSIX_VERSION);
594        #endif
595        #ifdef PLATFORM_POSIX_VERSION
596            DISPLAYOUT("PLATFORM_POSIX_VERSION defined: %ldL\n", (long) PLATFORM_POSIX_VERSION);
597        #endif
598    }   }
599}
600
601/* Environment variables for parameter setting */
602#define ENV_CLEVEL "ZSTD_CLEVEL"
603#define ENV_NBTHREADS "ZSTD_NBTHREADS"    /* takes lower precedence than directly specifying -T# in the CLI */
604
605/* pick up environment variable */
606static int init_cLevel(void) {
607    const char* const env = getenv(ENV_CLEVEL);
608    if (env != NULL) {
609        const char* ptr = env;
610        int sign = 1;
611        if (*ptr == '-') {
612            sign = -1;
613            ptr++;
614        } else if (*ptr == '+') {
615            ptr++;
616        }
617
618        if ((*ptr>='0') && (*ptr<='9')) {
619            unsigned absLevel;
620            if (readU32FromCharChecked(&ptr, &absLevel)) {
621                DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: numeric value too large \n", ENV_CLEVEL, env);
622                return ZSTDCLI_CLEVEL_DEFAULT;
623            } else if (*ptr == 0) {
624                return sign * (int)absLevel;
625        }   }
626
627        DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: not a valid integer value \n", ENV_CLEVEL, env);
628    }
629
630    return ZSTDCLI_CLEVEL_DEFAULT;
631}
632
633#ifdef ZSTD_MULTITHREAD
634static unsigned init_nbThreads(void) {
635    const char* const env = getenv(ENV_NBTHREADS);
636    if (env != NULL) {
637        const char* ptr = env;
638        if ((*ptr>='0') && (*ptr<='9')) {
639            unsigned nbThreads;
640            if (readU32FromCharChecked(&ptr, &nbThreads)) {
641                DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: numeric value too large \n", ENV_NBTHREADS, env);
642                return ZSTDCLI_NBTHREADS_DEFAULT;
643            } else if (*ptr == 0) {
644                return nbThreads;
645            }
646        }
647        DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: not a valid unsigned value \n", ENV_NBTHREADS, env);
648    }
649
650    return ZSTDCLI_NBTHREADS_DEFAULT;
651}
652#endif
653
654#define NEXT_FIELD(ptr) {         \
655    if (*argument == '=') {       \
656        ptr = ++argument;         \
657        argument += strlen(ptr);  \
658    } else {                      \
659        argNb++;                  \
660        if (argNb >= argCount) {  \
661            DISPLAY("error: missing command argument \n"); \
662            CLEAN_RETURN(1);      \
663        }                         \
664        ptr = argv[argNb];        \
665        assert(ptr != NULL);      \
666        if (ptr[0]=='-') {        \
667            DISPLAY("error: command cannot be separated from its argument by another command \n"); \
668            CLEAN_RETURN(1);      \
669}   }   }
670
671#define NEXT_UINT32(val32) {      \
672    const char* __nb;             \
673    NEXT_FIELD(__nb);             \
674    val32 = readU32FromChar(&__nb); \
675}
676
677#define ZSTD_NB_STRATEGIES 9
678static const char* ZSTD_strategyMap[ZSTD_NB_STRATEGIES + 1] = { "", "ZSTD_fast",
679                "ZSTD_dfast", "ZSTD_greedy", "ZSTD_lazy", "ZSTD_lazy2", "ZSTD_btlazy2",
680                "ZSTD_btopt", "ZSTD_btultra", "ZSTD_btultra2"};
681
682typedef enum { zom_compress, zom_decompress, zom_test, zom_bench, zom_train, zom_list } zstd_operation_mode;
683
684#define CLEAN_RETURN(i) { operationResult = (i); goto _end; }
685
686#ifdef ZSTD_NOCOMPRESS
687/* symbols from compression library are not defined and should not be invoked */
688# define MINCLEVEL  -99
689# define MAXCLEVEL   22
690#else
691# define MINCLEVEL  ZSTD_minCLevel()
692# define MAXCLEVEL  ZSTD_maxCLevel()
693#endif
694
695int main(int const argCount, const char* argv[])
696{
697    int argNb,
698        followLinks = 0,
699        forceStdout = 0,
700        hasStdout = 0,
701        ldmFlag = 0,
702        main_pause = 0,
703        nbWorkers = 0,
704        adapt = 0,
705        adaptMin = MINCLEVEL,
706        adaptMax = MAXCLEVEL,
707        rsyncable = 0,
708        nextArgumentsAreFiles = 0,
709        operationResult = 0,
710        separateFiles = 0,
711        setRealTimePrio = 0,
712        singleThread = 0,
713        showDefaultCParams = 0,
714        ultra=0,
715        contentSize=1;
716    double compressibility = 0.5;
717    unsigned bench_nbSeconds = 3;   /* would be better if this value was synchronized from bench */
718    size_t blockSize = 0;
719
720    FIO_prefs_t* const prefs = FIO_createPreferences();
721    FIO_ctx_t* const fCtx = FIO_createContext();
722    zstd_operation_mode operation = zom_compress;
723    ZSTD_compressionParameters compressionParams;
724    int cLevel = init_cLevel();
725    int cLevelLast = MINCLEVEL - 1;  /* lower than minimum */
726    unsigned recursive = 0;
727    unsigned memLimit = 0;
728    FileNamesTable* filenames = UTIL_allocateFileNamesTable((size_t)argCount);  /* argCount >= 1 */
729    FileNamesTable* file_of_names = UTIL_allocateFileNamesTable((size_t)argCount);  /* argCount >= 1 */
730    const char* programName = argv[0];
731    const char* outFileName = NULL;
732    const char* outDirName = NULL;
733    const char* outMirroredDirName = NULL;
734    const char* dictFileName = NULL;
735    const char* patchFromDictFileName = NULL;
736    const char* suffix = ZSTD_EXTENSION;
737    unsigned maxDictSize = g_defaultMaxDictSize;
738    unsigned dictID = 0;
739    size_t streamSrcSize = 0;
740    size_t targetCBlockSize = 0;
741    size_t srcSizeHint = 0;
742    int dictCLevel = g_defaultDictCLevel;
743    unsigned dictSelect = g_defaultSelectivityLevel;
744#ifndef ZSTD_NODICT
745    ZDICT_cover_params_t coverParams = defaultCoverParams();
746    ZDICT_fastCover_params_t fastCoverParams = defaultFastCoverParams();
747    dictType dict = fastCover;
748#endif
749#ifndef ZSTD_NOBENCH
750    BMK_advancedParams_t benchParams = BMK_initAdvancedParams();
751#endif
752    ZSTD_literalCompressionMode_e literalCompressionMode = ZSTD_lcm_auto;
753
754
755    /* init */
756    (void)recursive; (void)cLevelLast;    /* not used when ZSTD_NOBENCH set */
757    (void)memLimit;
758    assert(argCount >= 1);
759    if ((filenames==NULL) || (file_of_names==NULL)) { DISPLAY("zstd: allocation error \n"); exit(1); }
760    programName = lastNameFromPath(programName);
761#ifdef ZSTD_MULTITHREAD
762    nbWorkers = init_nbThreads();
763#endif
764
765    /* preset behaviors */
766    if (exeNameMatch(programName, ZSTD_ZSTDMT)) nbWorkers=0, singleThread=0;
767    if (exeNameMatch(programName, ZSTD_UNZSTD)) operation=zom_decompress;
768    if (exeNameMatch(programName, ZSTD_CAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; }     /* supports multiple formats */
769    if (exeNameMatch(programName, ZSTD_ZCAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; }    /* behave like zcat, also supports multiple formats */
770    if (exeNameMatch(programName, ZSTD_GZ)) { suffix = GZ_EXTENSION; FIO_setCompressionType(prefs, FIO_gzipCompression); FIO_setRemoveSrcFile(prefs, 1); }        /* behave like gzip */
771    if (exeNameMatch(programName, ZSTD_GUNZIP)) { operation=zom_decompress; FIO_setRemoveSrcFile(prefs, 1); }                                                     /* behave like gunzip, also supports multiple formats */
772    if (exeNameMatch(programName, ZSTD_GZCAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; }   /* behave like gzcat, also supports multiple formats */
773    if (exeNameMatch(programName, ZSTD_LZMA)) { suffix = LZMA_EXTENSION; FIO_setCompressionType(prefs, FIO_lzmaCompression); FIO_setRemoveSrcFile(prefs, 1); }    /* behave like lzma */
774    if (exeNameMatch(programName, ZSTD_UNLZMA)) { operation=zom_decompress; FIO_setCompressionType(prefs, FIO_lzmaCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like unlzma, also supports multiple formats */
775    if (exeNameMatch(programName, ZSTD_XZ)) { suffix = XZ_EXTENSION; FIO_setCompressionType(prefs, FIO_xzCompression); FIO_setRemoveSrcFile(prefs, 1); }          /* behave like xz */
776    if (exeNameMatch(programName, ZSTD_UNXZ)) { operation=zom_decompress; FIO_setCompressionType(prefs, FIO_xzCompression); FIO_setRemoveSrcFile(prefs, 1); }     /* behave like unxz, also supports multiple formats */
777    if (exeNameMatch(programName, ZSTD_LZ4)) { suffix = LZ4_EXTENSION; FIO_setCompressionType(prefs, FIO_lz4Compression); }                                       /* behave like lz4 */
778    if (exeNameMatch(programName, ZSTD_UNLZ4)) { operation=zom_decompress; FIO_setCompressionType(prefs, FIO_lz4Compression); }                                   /* behave like unlz4, also supports multiple formats */
779    memset(&compressionParams, 0, sizeof(compressionParams));
780
781    /* init crash handler */
782    FIO_addAbortHandler();
783
784    /* command switches */
785    for (argNb=1; argNb<argCount; argNb++) {
786        const char* argument = argv[argNb];
787        if (!argument) continue;   /* Protection if argument empty */
788
789        if (nextArgumentsAreFiles) {
790            UTIL_refFilename(filenames, argument);
791            continue;
792        }
793
794        /* "-" means stdin/stdout */
795        if (!strcmp(argument, "-")){
796            UTIL_refFilename(filenames, stdinmark);
797            continue;
798        }
799
800        /* Decode commands (note : aggregated commands are allowed) */
801        if (argument[0]=='-') {
802
803            if (argument[1]=='-') {
804                /* long commands (--long-word) */
805                if (!strcmp(argument, "--")) { nextArgumentsAreFiles=1; continue; }   /* only file names allowed from now on */
806                if (!strcmp(argument, "--list")) { operation=zom_list; continue; }
807                if (!strcmp(argument, "--compress")) { operation=zom_compress; continue; }
808                if (!strcmp(argument, "--decompress")) { operation=zom_decompress; continue; }
809                if (!strcmp(argument, "--uncompress")) { operation=zom_decompress; continue; }
810                if (!strcmp(argument, "--force")) { FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; continue; }
811                if (!strcmp(argument, "--version")) { printVersion(); CLEAN_RETURN(0); }
812                if (!strcmp(argument, "--help")) { usage_advanced(programName); CLEAN_RETURN(0); }
813                if (!strcmp(argument, "--verbose")) { g_displayLevel++; continue; }
814                if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; }
815                if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; g_displayLevel-=(g_displayLevel==2); continue; }
816                if (!strcmp(argument, "--ultra")) { ultra=1; continue; }
817                if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(prefs, 2); continue; }
818                if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(prefs, 0); continue; }
819                if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(prefs, 2); continue; }
820                if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(prefs, 0); continue; }
821                if (!strcmp(argument, "--test")) { operation=zom_test; continue; }
822                if (!strcmp(argument, "--train")) { operation=zom_train; if (outFileName==NULL) outFileName=g_defaultDictName; continue; }
823                if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(prefs, 0); continue; }
824                if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(prefs, 0); continue; }
825                if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(prefs, 1); continue; }
826                if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; }
827                if (!strcmp(argument, "--show-default-cparams")) { showDefaultCParams = 1; continue; }
828                if (!strcmp(argument, "--content-size")) { contentSize = 1; continue; }
829                if (!strcmp(argument, "--no-content-size")) { contentSize = 0; continue; }
830                if (!strcmp(argument, "--adapt")) { adapt = 1; continue; }
831                if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) { badusage(programName); CLEAN_RETURN(1); } continue; }
832                if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; }
833                if (!strcmp(argument, "--format=zstd")) { suffix = ZSTD_EXTENSION; FIO_setCompressionType(prefs, FIO_zstdCompression); continue; }
834#ifdef ZSTD_GZCOMPRESS
835                if (!strcmp(argument, "--format=gzip")) { suffix = GZ_EXTENSION; FIO_setCompressionType(prefs, FIO_gzipCompression); continue; }
836#endif
837#ifdef ZSTD_LZMACOMPRESS
838                if (!strcmp(argument, "--format=lzma")) { suffix = LZMA_EXTENSION; FIO_setCompressionType(prefs, FIO_lzmaCompression);  continue; }
839                if (!strcmp(argument, "--format=xz")) { suffix = XZ_EXTENSION; FIO_setCompressionType(prefs, FIO_xzCompression);  continue; }
840#endif
841#ifdef ZSTD_LZ4COMPRESS
842                if (!strcmp(argument, "--format=lz4")) { suffix = LZ4_EXTENSION; FIO_setCompressionType(prefs, FIO_lz4Compression);  continue; }
843#endif
844                if (!strcmp(argument, "--rsyncable")) { rsyncable = 1; continue; }
845                if (!strcmp(argument, "--compress-literals")) { literalCompressionMode = ZSTD_lcm_huffman; continue; }
846                if (!strcmp(argument, "--no-compress-literals")) { literalCompressionMode = ZSTD_lcm_uncompressed; continue; }
847                if (!strcmp(argument, "--no-progress")) { FIO_setNoProgress(1); continue; }
848                if (!strcmp(argument, "--exclude-compressed")) { FIO_setExcludeCompressedFile(prefs, 1); continue; }
849
850                /* long commands with arguments */
851#ifndef ZSTD_NODICT
852                if (longCommandWArg(&argument, "--train-cover")) {
853                  operation = zom_train;
854                  if (outFileName == NULL)
855                      outFileName = g_defaultDictName;
856                  dict = cover;
857                  /* Allow optional arguments following an = */
858                  if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); }
859                  else if (*argument++ != '=') { badusage(programName); CLEAN_RETURN(1); }
860                  else if (!parseCoverParameters(argument, &coverParams)) { badusage(programName); CLEAN_RETURN(1); }
861                  continue;
862                }
863                if (longCommandWArg(&argument, "--train-fastcover")) {
864                  operation = zom_train;
865                  if (outFileName == NULL)
866                      outFileName = g_defaultDictName;
867                  dict = fastCover;
868                  /* Allow optional arguments following an = */
869                  if (*argument == 0) { memset(&fastCoverParams, 0, sizeof(fastCoverParams)); }
870                  else if (*argument++ != '=') { badusage(programName); CLEAN_RETURN(1); }
871                  else if (!parseFastCoverParameters(argument, &fastCoverParams)) { badusage(programName); CLEAN_RETURN(1); }
872                  continue;
873                }
874                if (longCommandWArg(&argument, "--train-legacy")) {
875                  operation = zom_train;
876                  if (outFileName == NULL)
877                      outFileName = g_defaultDictName;
878                  dict = legacy;
879                  /* Allow optional arguments following an = */
880                  if (*argument == 0) { continue; }
881                  else if (*argument++ != '=') { badusage(programName); CLEAN_RETURN(1); }
882                  else if (!parseLegacyParameters(argument, &dictSelect)) { badusage(programName); CLEAN_RETURN(1); }
883                  continue;
884                }
885#endif
886                if (longCommandWArg(&argument, "--threads")) { NEXT_UINT32(nbWorkers); continue; }
887                if (longCommandWArg(&argument, "--memlimit")) { NEXT_UINT32(memLimit); continue; }
888                if (longCommandWArg(&argument, "--memory")) { NEXT_UINT32(memLimit); continue; }
889                if (longCommandWArg(&argument, "--memlimit-decompress")) { NEXT_UINT32(memLimit); continue; }
890                if (longCommandWArg(&argument, "--block-size=")) { blockSize = readSizeTFromChar(&argument); continue; }
891                if (longCommandWArg(&argument, "--maxdict")) { NEXT_UINT32(maxDictSize); continue; }
892                if (longCommandWArg(&argument, "--dictID")) { NEXT_UINT32(dictID); continue; }
893                if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) { badusage(programName); CLEAN_RETURN(1); } continue; }
894                if (longCommandWArg(&argument, "--stream-size=")) { streamSrcSize = readSizeTFromChar(&argument); continue; }
895                if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readSizeTFromChar(&argument); continue; }
896                if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readSizeTFromChar(&argument); continue; }
897                if (longCommandWArg(&argument, "--output-dir-flat")) { NEXT_FIELD(outDirName); continue; }
898#ifdef UTIL_HAS_MIRRORFILELIST
899                if (longCommandWArg(&argument, "--output-dir-mirror")) { NEXT_FIELD(outMirroredDirName); continue; }
900#endif
901                if (longCommandWArg(&argument, "--patch-from")) { NEXT_FIELD(patchFromDictFileName); continue; }
902                if (longCommandWArg(&argument, "--long")) {
903                    unsigned ldmWindowLog = 0;
904                    ldmFlag = 1;
905                    /* Parse optional window log */
906                    if (*argument == '=') {
907                        ++argument;
908                        ldmWindowLog = readU32FromChar(&argument);
909                    } else if (*argument != 0) {
910                        /* Invalid character following --long */
911                        badusage(programName);
912                        CLEAN_RETURN(1);
913                    }
914                    /* Only set windowLog if not already set by --zstd */
915                    if (compressionParams.windowLog == 0)
916                        compressionParams.windowLog = ldmWindowLog;
917                    continue;
918                }
919#ifndef ZSTD_NOCOMPRESS   /* linking ZSTD_minCLevel() requires compression support */
920                if (longCommandWArg(&argument, "--fast")) {
921                    /* Parse optional acceleration factor */
922                    if (*argument == '=') {
923                        U32 const maxFast = (U32)-ZSTD_minCLevel();
924                        U32 fastLevel;
925                        ++argument;
926                        fastLevel = readU32FromChar(&argument);
927                        if (fastLevel > maxFast) fastLevel = maxFast;
928                        if (fastLevel) {
929                            dictCLevel = cLevel = -(int)fastLevel;
930                        } else {
931                            badusage(programName);
932                            CLEAN_RETURN(1);
933                        }
934                    } else if (*argument != 0) {
935                        /* Invalid character following --fast */
936                        badusage(programName);
937                        CLEAN_RETURN(1);
938                    } else {
939                        cLevel = -1;  /* default for --fast */
940                    }
941                    continue;
942                }
943#endif
944
945                if (longCommandWArg(&argument, "--filelist")) {
946                    const char* listName;
947                    NEXT_FIELD(listName);
948                    UTIL_refFilename(file_of_names, listName);
949                    continue;
950                }
951
952                /* fall-through, will trigger bad_usage() later on */
953            }
954
955            argument++;
956            while (argument[0]!=0) {
957
958#ifndef ZSTD_NOCOMPRESS
959                /* compression Level */
960                if ((*argument>='0') && (*argument<='9')) {
961                    dictCLevel = cLevel = (int)readU32FromChar(&argument);
962                    continue;
963                }
964#endif
965
966                switch(argument[0])
967                {
968                    /* Display help */
969                case 'V': printVersion(); CLEAN_RETURN(0);   /* Version Only */
970                case 'H':
971                case 'h': usage_advanced(programName); CLEAN_RETURN(0);
972
973                     /* Compress */
974                case 'z': operation=zom_compress; argument++; break;
975
976                     /* Decoding */
977                case 'd':
978#ifndef ZSTD_NOBENCH
979                        benchParams.mode = BMK_decodeOnly;
980                        if (operation==zom_bench) { argument++; break; }  /* benchmark decode (hidden option) */
981#endif
982                        operation=zom_decompress; argument++; break;
983
984                    /* Force stdout, even if stdout==console */
985                case 'c': forceStdout=1; outFileName=stdoutmark; argument++; break;
986
987                    /* Use file content as dictionary */
988                case 'D': argument++; NEXT_FIELD(dictFileName); break;
989
990                    /* Overwrite */
991                case 'f': FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; argument++; break;
992
993                    /* Verbose mode */
994                case 'v': g_displayLevel++; argument++; break;
995
996                    /* Quiet mode */
997                case 'q': g_displayLevel--; argument++; break;
998
999                    /* keep source file (default) */
1000                case 'k': FIO_setRemoveSrcFile(prefs, 0); argument++; break;
1001
1002                    /* Checksum */
1003                case 'C': FIO_setChecksumFlag(prefs, 2); argument++; break;
1004
1005                    /* test compressed file */
1006                case 't': operation=zom_test; argument++; break;
1007
1008                    /* destination file name */
1009                case 'o': argument++; NEXT_FIELD(outFileName); break;
1010
1011                    /* limit memory */
1012                case 'M':
1013                    argument++;
1014                    memLimit = readU32FromChar(&argument);
1015                    break;
1016                case 'l': operation=zom_list; argument++; break;
1017#ifdef UTIL_HAS_CREATEFILELIST
1018                    /* recursive */
1019                case 'r': recursive=1; argument++; break;
1020#endif
1021
1022#ifndef ZSTD_NOBENCH
1023                    /* Benchmark */
1024                case 'b':
1025                    operation=zom_bench;
1026                    argument++;
1027                    break;
1028
1029                    /* range bench (benchmark only) */
1030                case 'e':
1031                    /* compression Level */
1032                    argument++;
1033                    cLevelLast = (int)readU32FromChar(&argument);
1034                    break;
1035
1036                    /* Modify Nb Iterations (benchmark only) */
1037                case 'i':
1038                    argument++;
1039                    bench_nbSeconds = readU32FromChar(&argument);
1040                    break;
1041
1042                    /* cut input into blocks (benchmark only) */
1043                case 'B':
1044                    argument++;
1045                    blockSize = readU32FromChar(&argument);
1046                    break;
1047
1048                    /* benchmark files separately (hidden option) */
1049                case 'S':
1050                    argument++;
1051                    separateFiles = 1;
1052                    break;
1053
1054#endif   /* ZSTD_NOBENCH */
1055
1056                    /* nb of threads (hidden option) */
1057                case 'T':
1058                    argument++;
1059                    nbWorkers = (int)readU32FromChar(&argument);
1060                    break;
1061
1062                    /* Dictionary Selection level */
1063                case 's':
1064                    argument++;
1065                    dictSelect = readU32FromChar(&argument);
1066                    break;
1067
1068                    /* Pause at the end (-p) or set an additional param (-p#) (hidden option) */
1069                case 'p': argument++;
1070#ifndef ZSTD_NOBENCH
1071                    if ((*argument>='0') && (*argument<='9')) {
1072                        benchParams.additionalParam = (int)readU32FromChar(&argument);
1073                    } else
1074#endif
1075                        main_pause=1;
1076                    break;
1077
1078                    /* Select compressibility of synthetic sample */
1079                case 'P':
1080                    argument++;
1081                    compressibility = (double)readU32FromChar(&argument) / 100;
1082                    break;
1083
1084                    /* unknown command */
1085                default : badusage(programName); CLEAN_RETURN(1);
1086                }
1087            }
1088            continue;
1089        }   /* if (argument[0]=='-') */
1090
1091        /* none of the above : add filename to list */
1092        UTIL_refFilename(filenames, argument);
1093    }
1094
1095    /* Welcome message (if verbose) */
1096    DISPLAYLEVEL(3, WELCOME_MESSAGE);
1097
1098#ifdef ZSTD_MULTITHREAD
1099    if ((nbWorkers==0) && (!singleThread)) {
1100        /* automatically set # workers based on # of reported cpus */
1101        nbWorkers = UTIL_countPhysicalCores();
1102        DISPLAYLEVEL(3, "Note: %d physical core(s) detected \n", nbWorkers);
1103    }
1104#else
1105    (void)singleThread; (void)nbWorkers;
1106#endif
1107
1108#ifdef UTIL_HAS_CREATEFILELIST
1109    g_utilDisplayLevel = g_displayLevel;
1110    if (!followLinks) {
1111        unsigned u, fileNamesNb;
1112        unsigned const nbFilenames = (unsigned)filenames->tableSize;
1113        for (u=0, fileNamesNb=0; u<nbFilenames; u++) {
1114            if ( UTIL_isLink(filenames->fileNames[u])
1115             && !UTIL_isFIFO(filenames->fileNames[u])
1116            ) {
1117                DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring \n", filenames->fileNames[u]);
1118            } else {
1119                filenames->fileNames[fileNamesNb++] = filenames->fileNames[u];
1120        }   }
1121        if (fileNamesNb == 0 && nbFilenames > 0)  /* all names are eliminated */
1122            CLEAN_RETURN(1);
1123        filenames->tableSize = fileNamesNb;
1124    }   /* if (!followLinks) */
1125
1126    /* read names from a file */
1127    if (file_of_names->tableSize) {
1128        size_t const nbFileLists = file_of_names->tableSize;
1129        size_t flNb;
1130        for (flNb=0; flNb < nbFileLists; flNb++) {
1131            FileNamesTable* const fnt = UTIL_createFileNamesTable_fromFileName(file_of_names->fileNames[flNb]);
1132            if (fnt==NULL) {
1133                DISPLAYLEVEL(1, "zstd: error reading %s \n", file_of_names->fileNames[flNb]);
1134                CLEAN_RETURN(1);
1135            }
1136            filenames = UTIL_mergeFileNamesTable(filenames, fnt);
1137        }
1138    }
1139
1140    if (recursive) {  /* at this stage, filenameTable is a list of paths, which can contain both files and directories */
1141        UTIL_expandFNT(&filenames, followLinks);
1142    }
1143#else
1144    (void)followLinks;
1145#endif
1146
1147    if (operation == zom_list) {
1148#ifndef ZSTD_NODECOMPRESS
1149        int const ret = FIO_listMultipleFiles((unsigned)filenames->tableSize, filenames->fileNames, g_displayLevel);
1150        CLEAN_RETURN(ret);
1151#else
1152        DISPLAY("file information is not supported \n");
1153        CLEAN_RETURN(1);
1154#endif
1155    }
1156
1157    /* Check if benchmark is selected */
1158    if (operation==zom_bench) {
1159#ifndef ZSTD_NOBENCH
1160        benchParams.blockSize = blockSize;
1161        benchParams.nbWorkers = nbWorkers;
1162        benchParams.realTime = (unsigned)setRealTimePrio;
1163        benchParams.nbSeconds = bench_nbSeconds;
1164        benchParams.ldmFlag = ldmFlag;
1165        benchParams.ldmMinMatch = (int)g_ldmMinMatch;
1166        benchParams.ldmHashLog = (int)g_ldmHashLog;
1167        if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) {
1168            benchParams.ldmBucketSizeLog = (int)g_ldmBucketSizeLog;
1169        }
1170        if (g_ldmHashRateLog != LDM_PARAM_DEFAULT) {
1171            benchParams.ldmHashRateLog = (int)g_ldmHashRateLog;
1172        }
1173        benchParams.literalCompressionMode = literalCompressionMode;
1174
1175        if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel();
1176        if (cLevelLast > ZSTD_maxCLevel()) cLevelLast = ZSTD_maxCLevel();
1177        if (cLevelLast < cLevel) cLevelLast = cLevel;
1178        if (cLevelLast > cLevel)
1179            DISPLAYLEVEL(3, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
1180        if (filenames->tableSize > 0) {
1181            if(separateFiles) {
1182                unsigned i;
1183                for(i = 0; i < filenames->tableSize; i++) {
1184                    int c;
1185                    DISPLAYLEVEL(3, "Benchmarking %s \n", filenames->fileNames[i]);
1186                    for(c = cLevel; c <= cLevelLast; c++) {
1187                        BMK_benchFilesAdvanced(&filenames->fileNames[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &benchParams);
1188                }   }
1189            } else {
1190                for(; cLevel <= cLevelLast; cLevel++) {
1191                    BMK_benchFilesAdvanced(filenames->fileNames, (unsigned)filenames->tableSize, dictFileName, cLevel, &compressionParams, g_displayLevel, &benchParams);
1192            }   }
1193        } else {
1194            for(; cLevel <= cLevelLast; cLevel++) {
1195                BMK_syntheticTest(cLevel, compressibility, &compressionParams, g_displayLevel, &benchParams);
1196        }   }
1197
1198#else
1199        (void)bench_nbSeconds; (void)blockSize; (void)setRealTimePrio; (void)separateFiles; (void)compressibility;
1200#endif
1201        goto _end;
1202    }
1203
1204    /* Check if dictionary builder is selected */
1205    if (operation==zom_train) {
1206#ifndef ZSTD_NODICT
1207        ZDICT_params_t zParams;
1208        zParams.compressionLevel = dictCLevel;
1209        zParams.notificationLevel = (unsigned)g_displayLevel;
1210        zParams.dictID = dictID;
1211        if (dict == cover) {
1212            int const optimize = !coverParams.k || !coverParams.d;
1213            coverParams.nbThreads = (unsigned)nbWorkers;
1214            coverParams.zParams = zParams;
1215            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (unsigned)filenames->tableSize, blockSize, NULL, &coverParams, NULL, optimize);
1216        } else if (dict == fastCover) {
1217            int const optimize = !fastCoverParams.k || !fastCoverParams.d;
1218            fastCoverParams.nbThreads = (unsigned)nbWorkers;
1219            fastCoverParams.zParams = zParams;
1220            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (unsigned)filenames->tableSize, blockSize, NULL, NULL, &fastCoverParams, optimize);
1221        } else {
1222            ZDICT_legacy_params_t dictParams;
1223            memset(&dictParams, 0, sizeof(dictParams));
1224            dictParams.selectivityLevel = dictSelect;
1225            dictParams.zParams = zParams;
1226            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (unsigned)filenames->tableSize, blockSize, &dictParams, NULL, NULL, 0);
1227        }
1228#else
1229        (void)dictCLevel; (void)dictSelect; (void)dictID;  (void)maxDictSize; /* not used when ZSTD_NODICT set */
1230        DISPLAYLEVEL(1, "training mode not available \n");
1231        operationResult = 1;
1232#endif
1233        goto _end;
1234    }
1235
1236#ifndef ZSTD_NODECOMPRESS
1237    if (operation==zom_test) { FIO_setTestMode(prefs, 1); outFileName=nulmark; FIO_setRemoveSrcFile(prefs, 0); }  /* test mode */
1238#endif
1239
1240    /* No input filename ==> use stdin and stdout */
1241    if (filenames->tableSize == 0) UTIL_refFilename(filenames, stdinmark);
1242    if (!strcmp(filenames->fileNames[0], stdinmark) && !outFileName)
1243        outFileName = stdoutmark;  /* when input is stdin, default output is stdout */
1244
1245    /* Check if input/output defined as console; trigger an error in this case */
1246    if (!strcmp(filenames->fileNames[0], stdinmark) && IS_CONSOLE(stdin) ) {
1247        DISPLAYLEVEL(1, "stdin is a console, aborting\n");
1248        CLEAN_RETURN(1);
1249    }
1250    if ( outFileName && !strcmp(outFileName, stdoutmark)
1251      && IS_CONSOLE(stdout)
1252      && !strcmp(filenames->fileNames[0], stdinmark)
1253      && !forceStdout
1254      && operation!=zom_decompress ) {
1255        DISPLAYLEVEL(1, "stdout is a console, aborting\n");
1256        CLEAN_RETURN(1);
1257    }
1258
1259#ifndef ZSTD_NOCOMPRESS
1260    /* check compression level limits */
1261    {   int const maxCLevel = ultra ? ZSTD_maxCLevel() : ZSTDCLI_CLEVEL_MAX;
1262        if (cLevel > maxCLevel) {
1263            DISPLAYLEVEL(2, "Warning : compression level higher than max, reduced to %i \n", maxCLevel);
1264            cLevel = maxCLevel;
1265    }   }
1266#endif
1267
1268    if (showDefaultCParams) {
1269        if (operation == zom_decompress) {
1270            DISPLAY("error : can't use --show-default-cparams in decomrpession mode \n");
1271            CLEAN_RETURN(1);
1272        }
1273    }
1274
1275    if (dictFileName != NULL && patchFromDictFileName != NULL) {
1276        DISPLAY("error : can't use -D and --patch-from=# at the same time \n");
1277        CLEAN_RETURN(1);
1278    }
1279
1280    if (patchFromDictFileName != NULL && filenames->tableSize > 1) {
1281        DISPLAY("error : can't use --patch-from=# on multiple files \n");
1282        CLEAN_RETURN(1);
1283    }
1284
1285    /* No status message in pipe mode (stdin - stdout) */
1286    hasStdout = outFileName && !strcmp(outFileName,stdoutmark);
1287
1288    if (hasStdout && (g_displayLevel==2)) g_displayLevel=1;
1289
1290    /* IO Stream/File */
1291    FIO_setHasStdoutOutput(fCtx, hasStdout);
1292    FIO_setNbFilesTotal(fCtx, (int)filenames->tableSize);
1293    FIO_determineHasStdinInput(fCtx, filenames);
1294    FIO_setNotificationLevel(g_displayLevel);
1295    FIO_setPatchFromMode(prefs, patchFromDictFileName != NULL);
1296    if (memLimit == 0) {
1297        if (compressionParams.windowLog == 0) {
1298            memLimit = (U32)1 << g_defaultMaxWindowLog;
1299        } else {
1300            memLimit = (U32)1 << (compressionParams.windowLog & 31);
1301    }   }
1302    if (patchFromDictFileName != NULL)
1303        dictFileName = patchFromDictFileName;
1304    FIO_setMemLimit(prefs, memLimit);
1305    if (operation==zom_compress) {
1306#ifndef ZSTD_NOCOMPRESS
1307        FIO_setContentSize(prefs, contentSize);
1308        FIO_setNbWorkers(prefs, nbWorkers);
1309        FIO_setBlockSize(prefs, (int)blockSize);
1310        if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(prefs, (int)g_overlapLog);
1311        FIO_setLdmFlag(prefs, (unsigned)ldmFlag);
1312        FIO_setLdmHashLog(prefs, (int)g_ldmHashLog);
1313        FIO_setLdmMinMatch(prefs, (int)g_ldmMinMatch);
1314        if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) FIO_setLdmBucketSizeLog(prefs, (int)g_ldmBucketSizeLog);
1315        if (g_ldmHashRateLog != LDM_PARAM_DEFAULT) FIO_setLdmHashRateLog(prefs, (int)g_ldmHashRateLog);
1316        FIO_setAdaptiveMode(prefs, (unsigned)adapt);
1317        FIO_setAdaptMin(prefs, adaptMin);
1318        FIO_setAdaptMax(prefs, adaptMax);
1319        FIO_setRsyncable(prefs, rsyncable);
1320        FIO_setStreamSrcSize(prefs, streamSrcSize);
1321        FIO_setTargetCBlockSize(prefs, targetCBlockSize);
1322        FIO_setSrcSizeHint(prefs, srcSizeHint);
1323        FIO_setLiteralCompressionMode(prefs, literalCompressionMode);
1324        if (adaptMin > cLevel) cLevel = adaptMin;
1325        if (adaptMax < cLevel) cLevel = adaptMax;
1326
1327        /* Compare strategies constant with the ground truth */
1328        { ZSTD_bounds strategyBounds = ZSTD_cParam_getBounds(ZSTD_c_strategy);
1329          assert(ZSTD_NB_STRATEGIES == strategyBounds.upperBound);
1330          (void)strategyBounds; }
1331
1332        if (showDefaultCParams) {
1333            size_t fileNb;
1334            for (fileNb = 0; fileNb < (size_t)filenames->tableSize; fileNb++) {
1335                unsigned long long fileSize = UTIL_getFileSize(filenames->fileNames[fileNb]);
1336                const size_t dictSize = dictFileName != NULL ? (size_t)UTIL_getFileSize(dictFileName) : 0;
1337                const ZSTD_compressionParameters cParams = ZSTD_getCParams(cLevel, fileSize, dictSize);
1338                if (fileSize != UTIL_FILESIZE_UNKNOWN) DISPLAY("%s (%u bytes)\n", filenames->fileNames[fileNb], (unsigned)fileSize);
1339                else DISPLAY("%s (src size unknown)\n", filenames->fileNames[fileNb]);
1340                DISPLAY(" - windowLog     : %u\n", cParams.windowLog);
1341                DISPLAY(" - chainLog      : %u\n", cParams.chainLog);
1342                DISPLAY(" - hashLog       : %u\n", cParams.hashLog);
1343                DISPLAY(" - searchLog     : %u\n", cParams.searchLog);
1344                DISPLAY(" - minMatch      : %u\n", cParams.minMatch);
1345                DISPLAY(" - targetLength  : %u\n", cParams.targetLength);
1346                assert(cParams.strategy < ZSTD_NB_STRATEGIES + 1);
1347                DISPLAY(" - strategy      : %s (%u)\n", ZSTD_strategyMap[(int)cParams.strategy], (unsigned)cParams.strategy);
1348            }
1349        }
1350
1351        if ((filenames->tableSize==1) && outFileName)
1352            operationResult = FIO_compressFilename(fCtx, prefs, outFileName, filenames->fileNames[0], dictFileName, cLevel, compressionParams);
1353        else
1354            operationResult = FIO_compressMultipleFilenames(fCtx, prefs, filenames->fileNames, outMirroredDirName, outDirName, outFileName, suffix, dictFileName, cLevel, compressionParams);
1355#else
1356        (void)contentSize; (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; (void)ZSTD_strategyMap; /* not used when ZSTD_NOCOMPRESS set */
1357        DISPLAY("Compression not supported \n");
1358#endif
1359    } else {  /* decompression or test */
1360#ifndef ZSTD_NODECOMPRESS
1361        if (filenames->tableSize == 1 && outFileName) {
1362            operationResult = FIO_decompressFilename(fCtx, prefs, outFileName, filenames->fileNames[0], dictFileName);
1363        } else {
1364            operationResult = FIO_decompressMultipleFilenames(fCtx, prefs, filenames->fileNames, outMirroredDirName, outDirName, outFileName, dictFileName);
1365        }
1366#else
1367        DISPLAY("Decompression not supported \n");
1368#endif
1369    }
1370
1371_end:
1372    FIO_freePreferences(prefs);
1373    FIO_freeContext(fCtx);
1374    if (main_pause) waitEnter();
1375    UTIL_freeFileNamesTable(filenames);
1376    UTIL_freeFileNamesTable(file_of_names);
1377
1378    return operationResult;
1379}
1380