1/*
2 * Copyright (c) 2017-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11#include <limits.h>
12#include <math.h>
13#include <stddef.h>
14#include <stdio.h>
15#include <stdlib.h>
16#include <string.h>
17
18#include "util.h"
19#include "zstd.h"
20#include "zstd_internal.h"
21#include "mem.h"
22#define ZDICT_STATIC_LINKING_ONLY
23#include "zdict.h"
24
25// Direct access to internal compression functions is required
26#include "zstd_compress.c"
27
28#define XXH_STATIC_LINKING_ONLY
29#include "xxhash.h"     /* XXH64 */
30
31#ifndef MIN
32    #define MIN(a, b) ((a) < (b) ? (a) : (b))
33#endif
34
35#ifndef MAX_PATH
36    #ifdef PATH_MAX
37        #define MAX_PATH PATH_MAX
38    #else
39        #define MAX_PATH 256
40    #endif
41#endif
42
43/*-************************************
44*  DISPLAY Macros
45**************************************/
46#define DISPLAY(...)          fprintf(stderr, __VA_ARGS__)
47#define DISPLAYLEVEL(l, ...)  if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
48static U32 g_displayLevel = 2;
49
50#define DISPLAYUPDATE(...)                                                     \
51    do {                                                                       \
52        if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) ||           \
53            (g_displayLevel >= 4)) {                                           \
54            g_displayClock = UTIL_getTime();                                   \
55            DISPLAY(__VA_ARGS__);                                              \
56            if (g_displayLevel >= 4) fflush(stderr);                           \
57        }                                                                      \
58    } while (0)
59
60static const U64 g_refreshRate = SEC_TO_MICRO / 6;
61static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
62
63#define CHECKERR(code)                                                         \
64    do {                                                                       \
65        if (ZSTD_isError(code)) {                                              \
66            DISPLAY("Error occurred while generating data: %s\n",              \
67                    ZSTD_getErrorName(code));                                  \
68            exit(1);                                                           \
69        }                                                                      \
70    } while (0)
71
72/*-*******************************************************
73*  Random function
74*********************************************************/
75static unsigned RAND(unsigned* src)
76{
77#define RAND_rotl32(x,r) ((x << r) | (x >> (32 - r)))
78    static const U32 prime1 = 2654435761U;
79    static const U32 prime2 = 2246822519U;
80    U32 rand32 = *src;
81    rand32 *= prime1;
82    rand32 += prime2;
83    rand32  = RAND_rotl32(rand32, 13);
84    *src = rand32;
85    return RAND_rotl32(rand32, 27);
86#undef RAND_rotl32
87}
88
89#define DISTSIZE (8192)
90
91/* Write `size` bytes into `ptr`, all of which are less than or equal to `maxSymb` */
92static void RAND_bufferMaxSymb(U32* seed, void* ptr, size_t size, int maxSymb)
93{
94    size_t i;
95    BYTE* op = ptr;
96
97    for (i = 0; i < size; i++) {
98        op[i] = (BYTE) (RAND(seed) % (maxSymb + 1));
99    }
100}
101
102/* Write `size` random bytes into `ptr` */
103static void RAND_buffer(U32* seed, void* ptr, size_t size)
104{
105    size_t i;
106    BYTE* op = ptr;
107
108    for (i = 0; i + 4 <= size; i += 4) {
109        MEM_writeLE32(op + i, RAND(seed));
110    }
111    for (; i < size; i++) {
112        op[i] = RAND(seed) & 0xff;
113    }
114}
115
116/* Write `size` bytes into `ptr` following the distribution `dist` */
117static void RAND_bufferDist(U32* seed, BYTE* dist, void* ptr, size_t size)
118{
119    size_t i;
120    BYTE* op = ptr;
121
122    for (i = 0; i < size; i++) {
123        op[i] = dist[RAND(seed) % DISTSIZE];
124    }
125}
126
127/* Generate a random distribution where the frequency of each symbol follows a
128 * geometric distribution defined by `weight`
129 * `dist` should have size at least `DISTSIZE` */
130static void RAND_genDist(U32* seed, BYTE* dist, double weight)
131{
132    size_t i = 0;
133    size_t statesLeft = DISTSIZE;
134    BYTE symb = (BYTE) (RAND(seed) % 256);
135    BYTE step = (BYTE) ((RAND(seed) % 256) | 1); /* force it to be odd so it's relatively prime to 256 */
136
137    while (i < DISTSIZE) {
138        size_t states = ((size_t)(weight * statesLeft)) + 1;
139        size_t j;
140        for (j = 0; j < states && i < DISTSIZE; j++, i++) {
141            dist[i] = symb;
142        }
143
144        symb += step;
145        statesLeft -= states;
146    }
147}
148
149/* Generates a random number in the range [min, max) */
150static inline U32 RAND_range(U32* seed, U32 min, U32 max)
151{
152    return (RAND(seed) % (max-min)) + min;
153}
154
155#define ROUND(x) ((U32)(x + 0.5))
156
157/* Generates a random number in an exponential distribution with mean `mean` */
158static double RAND_exp(U32* seed, double mean)
159{
160    double const u = RAND(seed) / (double) UINT_MAX;
161    return log(1-u) * (-mean);
162}
163
164/*-*******************************************************
165*  Constants and Structs
166*********************************************************/
167const char *BLOCK_TYPES[] = {"raw", "rle", "compressed"};
168
169#define MAX_DECOMPRESSED_SIZE_LOG 20
170#define MAX_DECOMPRESSED_SIZE (1ULL << MAX_DECOMPRESSED_SIZE_LOG)
171
172#define MAX_WINDOW_LOG 22 /* Recommended support is 8MB, so limit to 4MB + mantissa */
173
174#define MIN_SEQ_LEN (3)
175#define MAX_NB_SEQ ((ZSTD_BLOCKSIZE_MAX + MIN_SEQ_LEN - 1) / MIN_SEQ_LEN)
176
177BYTE CONTENT_BUFFER[MAX_DECOMPRESSED_SIZE];
178BYTE FRAME_BUFFER[MAX_DECOMPRESSED_SIZE * 2];
179BYTE LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX];
180
181seqDef SEQUENCE_BUFFER[MAX_NB_SEQ];
182BYTE SEQUENCE_LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX]; /* storeSeq expects a place to copy literals to */
183BYTE SEQUENCE_LLCODE[ZSTD_BLOCKSIZE_MAX];
184BYTE SEQUENCE_MLCODE[ZSTD_BLOCKSIZE_MAX];
185BYTE SEQUENCE_OFCODE[ZSTD_BLOCKSIZE_MAX];
186
187unsigned WKSP[1024];
188
189typedef struct {
190    size_t contentSize; /* 0 means unknown (unless contentSize == windowSize == 0) */
191    unsigned windowSize; /* contentSize >= windowSize means single segment */
192} frameHeader_t;
193
194/* For repeat modes */
195typedef struct {
196    U32 rep[ZSTD_REP_NUM];
197
198    int hufInit;
199    /* the distribution used in the previous block for repeat mode */
200    BYTE hufDist[DISTSIZE];
201    U32 hufTable [256]; /* HUF_CElt is an incomplete type */
202
203    int fseInit;
204    FSE_CTable offcodeCTable  [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
205    FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
206    FSE_CTable litlengthCTable  [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
207
208    /* Symbols that were present in the previous distribution, for use with
209     * set_repeat */
210    BYTE litlengthSymbolSet[36];
211    BYTE offsetSymbolSet[29];
212    BYTE matchlengthSymbolSet[53];
213} cblockStats_t;
214
215typedef struct {
216    void* data;
217    void* dataStart;
218    void* dataEnd;
219
220    void* src;
221    void* srcStart;
222    void* srcEnd;
223
224    frameHeader_t header;
225
226    cblockStats_t stats;
227    cblockStats_t oldStats; /* so they can be rolled back if uncompressible */
228} frame_t;
229
230typedef struct {
231    int useDict;
232    U32 dictID;
233    size_t dictContentSize;
234    BYTE* dictContent;
235} dictInfo;
236
237typedef enum {
238  gt_frame = 0,  /* generate frames */
239  gt_block,      /* generate compressed blocks without block/frame headers */
240} genType_e;
241
242/*-*******************************************************
243*  Global variables (set from command line)
244*********************************************************/
245U32 g_maxDecompressedSizeLog = MAX_DECOMPRESSED_SIZE_LOG;  /* <= 20 */
246U32 g_maxBlockSize = ZSTD_BLOCKSIZE_MAX;                       /* <= 128 KB */
247
248/*-*******************************************************
249*  Generator Functions
250*********************************************************/
251
252struct {
253    int contentSize; /* force the content size to be present */
254} opts; /* advanced options on generation */
255
256/* Generate and write a random frame header */
257static void writeFrameHeader(U32* seed, frame_t* frame, dictInfo info)
258{
259    BYTE* const op = frame->data;
260    size_t pos = 0;
261    frameHeader_t fh;
262
263    BYTE windowByte = 0;
264
265    int singleSegment = 0;
266    int contentSizeFlag = 0;
267    int fcsCode = 0;
268
269    memset(&fh, 0, sizeof(fh));
270
271    /* generate window size */
272    {
273        /* Follow window algorithm from specification */
274        int const exponent = RAND(seed) % (MAX_WINDOW_LOG - 10);
275        int const mantissa = RAND(seed) % 8;
276        windowByte = (BYTE) ((exponent << 3) | mantissa);
277        fh.windowSize = (1U << (exponent + 10));
278        fh.windowSize += fh.windowSize / 8 * mantissa;
279    }
280
281    {
282        /* Generate random content size */
283        size_t highBit;
284        if (RAND(seed) & 7 && g_maxDecompressedSizeLog > 7) {
285            /* do content of at least 128 bytes */
286            highBit = 1ULL << RAND_range(seed, 7, g_maxDecompressedSizeLog);
287        } else if (RAND(seed) & 3) {
288            /* do small content */
289            highBit = 1ULL << RAND_range(seed, 0, MIN(7, 1U << g_maxDecompressedSizeLog));
290        } else {
291            /* 0 size frame */
292            highBit = 0;
293        }
294        fh.contentSize = highBit ? highBit + (RAND(seed) % highBit) : 0;
295
296        /* provide size sometimes */
297        contentSizeFlag = opts.contentSize | (RAND(seed) & 1);
298
299        if (contentSizeFlag && (fh.contentSize == 0 || !(RAND(seed) & 7))) {
300            /* do single segment sometimes */
301            fh.windowSize = (U32) fh.contentSize;
302            singleSegment = 1;
303        }
304    }
305
306    if (contentSizeFlag) {
307        /* Determine how large fcs field has to be */
308        int minFcsCode = (fh.contentSize >= 256) +
309                               (fh.contentSize >= 65536 + 256) +
310                               (fh.contentSize > 0xFFFFFFFFU);
311        if (!singleSegment && !minFcsCode) {
312            minFcsCode = 1;
313        }
314        fcsCode = minFcsCode + (RAND(seed) % (4 - minFcsCode));
315        if (fcsCode == 1 && fh.contentSize < 256) fcsCode++;
316    }
317
318    /* write out the header */
319    MEM_writeLE32(op + pos, ZSTD_MAGICNUMBER);
320    pos += 4;
321
322    {
323        /*
324         * fcsCode: 2-bit flag specifying how many bytes used to represent Frame_Content_Size (bits 7-6)
325         * singleSegment: 1-bit flag describing if data must be regenerated within a single continuous memory segment. (bit 5)
326         * contentChecksumFlag: 1-bit flag that is set if frame includes checksum at the end -- set to 1 below (bit 2)
327         * dictBits: 2-bit flag describing how many bytes Dictionary_ID uses -- set to 3 (bits 1-0)
328         * For more information: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_header
329         */
330        int const dictBits = info.useDict ? 3 : 0;
331        BYTE const frameHeaderDescriptor =
332                (BYTE) ((fcsCode << 6) | (singleSegment << 5) | (1 << 2) | dictBits);
333        op[pos++] = frameHeaderDescriptor;
334    }
335
336    if (!singleSegment) {
337        op[pos++] = windowByte;
338    }
339    if (info.useDict) {
340        MEM_writeLE32(op + pos, (U32) info.dictID);
341        pos += 4;
342    }
343    if (contentSizeFlag) {
344        switch (fcsCode) {
345        default: /* Impossible */
346        case 0: op[pos++] = (BYTE) fh.contentSize; break;
347        case 1: MEM_writeLE16(op + pos, (U16) (fh.contentSize - 256)); pos += 2; break;
348        case 2: MEM_writeLE32(op + pos, (U32) fh.contentSize); pos += 4; break;
349        case 3: MEM_writeLE64(op + pos, (U64) fh.contentSize); pos += 8; break;
350        }
351    }
352
353    DISPLAYLEVEL(3, " frame content size:\t%u\n", (U32)fh.contentSize);
354    DISPLAYLEVEL(3, " frame window size:\t%u\n", fh.windowSize);
355    DISPLAYLEVEL(3, " content size flag:\t%d\n", contentSizeFlag);
356    DISPLAYLEVEL(3, " single segment flag:\t%d\n", singleSegment);
357
358    frame->data = op + pos;
359    frame->header = fh;
360}
361
362/* Write a literal block in either raw or RLE form, return the literals size */
363static size_t writeLiteralsBlockSimple(U32* seed, frame_t* frame, size_t contentSize)
364{
365    BYTE* op = (BYTE*)frame->data;
366    int const type = RAND(seed) % 2;
367    int const sizeFormatDesc = RAND(seed) % 8;
368    size_t litSize;
369    size_t maxLitSize = MIN(contentSize, g_maxBlockSize);
370
371    if (sizeFormatDesc == 0) {
372        /* Size_FormatDesc = ?0 */
373        maxLitSize = MIN(maxLitSize, 31);
374    } else if (sizeFormatDesc <= 4) {
375        /* Size_FormatDesc = 01 */
376        maxLitSize = MIN(maxLitSize, 4095);
377    } else {
378        /* Size_Format = 11 */
379        maxLitSize = MIN(maxLitSize, 1048575);
380    }
381
382    litSize = RAND(seed) % (maxLitSize + 1);
383    if (frame->src == frame->srcStart && litSize == 0) {
384        litSize = 1; /* no empty literals if there's nothing preceding this block */
385    }
386    if (litSize + 3 > contentSize) {
387        litSize = contentSize; /* no matches shorter than 3 are allowed */
388    }
389    /* use smallest size format that fits */
390    if (litSize < 32) {
391        op[0] = (type | (0 << 2) | (litSize << 3)) & 0xff;
392        op += 1;
393    } else if (litSize < 4096) {
394        op[0] = (type | (1 << 2) | (litSize << 4)) & 0xff;
395        op[1] = (litSize >> 4) & 0xff;
396        op += 2;
397    } else {
398        op[0] = (type | (3 << 2) | (litSize << 4)) & 0xff;
399        op[1] = (litSize >> 4) & 0xff;
400        op[2] = (litSize >> 12) & 0xff;
401        op += 3;
402    }
403
404    if (type == 0) {
405        /* Raw literals */
406        DISPLAYLEVEL(4, "   raw literals\n");
407
408        RAND_buffer(seed, LITERAL_BUFFER, litSize);
409        memcpy(op, LITERAL_BUFFER, litSize);
410        op += litSize;
411    } else {
412        /* RLE literals */
413        BYTE const symb = (BYTE) (RAND(seed) % 256);
414
415        DISPLAYLEVEL(4, "   rle literals: 0x%02x\n", (U32)symb);
416
417        memset(LITERAL_BUFFER, symb, litSize);
418        op[0] = symb;
419        op++;
420    }
421
422    frame->data = op;
423
424    return litSize;
425}
426
427/* Generate a Huffman header for the given source */
428static size_t writeHufHeader(U32* seed, HUF_CElt* hufTable, void* dst, size_t dstSize,
429                                 const void* src, size_t srcSize)
430{
431    BYTE* const ostart = (BYTE*)dst;
432    BYTE* op = ostart;
433
434    unsigned huffLog = 11;
435    U32 maxSymbolValue = 255;
436
437    U32 count[HUF_SYMBOLVALUE_MAX+1];
438
439    /* Scan input and build symbol stats */
440    {   size_t const largest = FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, WKSP);
441        if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 0; }   /* single symbol, rle */
442        if (largest <= (srcSize >> 7)+1) return 0;   /* Fast heuristic : not compressible enough */
443    }
444
445    /* Build Huffman Tree */
446    /* Max Huffman log is 11, min is highbit(maxSymbolValue)+1 */
447    huffLog = RAND_range(seed, ZSTD_highbit32(maxSymbolValue)+1, huffLog+1);
448    DISPLAYLEVEL(6, "     huffman log: %u\n", huffLog);
449    {   size_t const maxBits = HUF_buildCTable_wksp (hufTable, count, maxSymbolValue, huffLog, WKSP, sizeof(WKSP));
450        CHECKERR(maxBits);
451        huffLog = (U32)maxBits;
452    }
453
454    /* Write table description header */
455    {   size_t const hSize = HUF_writeCTable (op, dstSize, hufTable, maxSymbolValue, huffLog);
456        if (hSize + 12 >= srcSize) return 0;   /* not useful to try compression */
457        op += hSize;
458    }
459
460    return op - ostart;
461}
462
463/* Write a Huffman coded literals block and return the literals size */
464static size_t writeLiteralsBlockCompressed(U32* seed, frame_t* frame, size_t contentSize)
465{
466    BYTE* origop = (BYTE*)frame->data;
467    BYTE* opend = (BYTE*)frame->dataEnd;
468    BYTE* op;
469    BYTE* const ostart = origop;
470    int const sizeFormat = RAND(seed) % 4;
471    size_t litSize;
472    size_t hufHeaderSize = 0;
473    size_t compressedSize = 0;
474    size_t maxLitSize = MIN(contentSize-3, g_maxBlockSize);
475
476    symbolEncodingType_e hType;
477
478    if (contentSize < 64) {
479        /* make sure we get reasonably-sized literals for compression */
480        return ERROR(GENERIC);
481    }
482
483    DISPLAYLEVEL(4, "   compressed literals\n");
484
485    switch (sizeFormat) {
486    case 0: /* fall through, size is the same as case 1 */
487    case 1:
488        maxLitSize = MIN(maxLitSize, 1023);
489        origop += 3;
490        break;
491    case 2:
492        maxLitSize = MIN(maxLitSize, 16383);
493        origop += 4;
494        break;
495    case 3:
496        maxLitSize = MIN(maxLitSize, 262143);
497        origop += 5;
498        break;
499    default:; /* impossible */
500    }
501
502    do {
503        op = origop;
504        do {
505            litSize = RAND(seed) % (maxLitSize + 1);
506        } while (litSize < 32); /* avoid small literal sizes */
507        if (litSize + 3 > contentSize) {
508            litSize = contentSize; /* no matches shorter than 3 are allowed */
509        }
510
511        /* most of the time generate a new distribution */
512        if ((RAND(seed) & 3) || !frame->stats.hufInit) {
513            do {
514                if (RAND(seed) & 3) {
515                    /* add 10 to ensure some compressability */
516                    double const weight = ((RAND(seed) % 90) + 10) / 100.0;
517
518                    DISPLAYLEVEL(5, "    distribution weight: %d%%\n",
519                                 (int)(weight * 100));
520
521                    RAND_genDist(seed, frame->stats.hufDist, weight);
522                } else {
523                    /* sometimes do restricted range literals to force
524                     * non-huffman headers */
525                    DISPLAYLEVEL(5, "    small range literals\n");
526                    RAND_bufferMaxSymb(seed, frame->stats.hufDist, DISTSIZE,
527                                       15);
528                }
529                RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER,
530                                litSize);
531
532                /* generate the header from the distribution instead of the
533                 * actual data to avoid bugs with symbols that were in the
534                 * distribution but never showed up in the output */
535                hufHeaderSize = writeHufHeader(
536                        seed, (HUF_CElt*)frame->stats.hufTable, op, opend - op,
537                        frame->stats.hufDist, DISTSIZE);
538                CHECKERR(hufHeaderSize);
539                /* repeat until a valid header is written */
540            } while (hufHeaderSize == 0);
541            op += hufHeaderSize;
542            hType = set_compressed;
543
544            frame->stats.hufInit = 1;
545        } else {
546            /* repeat the distribution/table from last time */
547            DISPLAYLEVEL(5, "    huffman repeat stats\n");
548            RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER,
549                            litSize);
550            hufHeaderSize = 0;
551            hType = set_repeat;
552        }
553
554        do {
555            compressedSize =
556                    sizeFormat == 0
557                            ? HUF_compress1X_usingCTable(
558                                      op, opend - op, LITERAL_BUFFER, litSize,
559                                      (HUF_CElt*)frame->stats.hufTable)
560                            : HUF_compress4X_usingCTable(
561                                      op, opend - op, LITERAL_BUFFER, litSize,
562                                      (HUF_CElt*)frame->stats.hufTable);
563            CHECKERR(compressedSize);
564            /* this only occurs when it could not compress or similar */
565        } while (compressedSize <= 0);
566
567        op += compressedSize;
568
569        compressedSize += hufHeaderSize;
570        DISPLAYLEVEL(5, "    regenerated size: %u\n", (U32)litSize);
571        DISPLAYLEVEL(5, "    compressed size: %u\n", (U32)compressedSize);
572        if (compressedSize >= litSize) {
573            DISPLAYLEVEL(5, "     trying again\n");
574            /* if we have to try again, reset the stats so we don't accidentally
575             * try to repeat a distribution we just made */
576            frame->stats = frame->oldStats;
577        } else {
578            break;
579        }
580    } while (1);
581
582    /* write header */
583    switch (sizeFormat) {
584    case 0: /* fall through, size is the same as case 1 */
585    case 1: {
586        U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
587                           ((U32)compressedSize << 14);
588        MEM_writeLE24(ostart, header);
589        break;
590    }
591    case 2: {
592        U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
593                           ((U32)compressedSize << 18);
594        MEM_writeLE32(ostart, header);
595        break;
596    }
597    case 3: {
598        U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
599                           ((U32)compressedSize << 22);
600        MEM_writeLE32(ostart, header);
601        ostart[4] = (BYTE)(compressedSize >> 10);
602        break;
603    }
604    default:; /* impossible */
605    }
606
607    frame->data = op;
608    return litSize;
609}
610
611static size_t writeLiteralsBlock(U32* seed, frame_t* frame, size_t contentSize)
612{
613    /* only do compressed for larger segments to avoid compressibility issues */
614    if (RAND(seed) & 7 && contentSize >= 64) {
615        return writeLiteralsBlockCompressed(seed, frame, contentSize);
616    } else {
617        return writeLiteralsBlockSimple(seed, frame, contentSize);
618    }
619}
620
621static inline void initSeqStore(seqStore_t *seqStore) {
622    seqStore->sequencesStart = SEQUENCE_BUFFER;
623    seqStore->litStart = SEQUENCE_LITERAL_BUFFER;
624    seqStore->llCode = SEQUENCE_LLCODE;
625    seqStore->mlCode = SEQUENCE_MLCODE;
626    seqStore->ofCode = SEQUENCE_OFCODE;
627
628    ZSTD_resetSeqStore(seqStore);
629}
630
631/* Randomly generate sequence commands */
632static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
633                                size_t contentSize, size_t literalsSize, dictInfo info)
634{
635    /* The total length of all the matches */
636    size_t const remainingMatch = contentSize - literalsSize;
637    size_t excessMatch = 0;
638    U32 numSequences = 0;
639
640    U32 i;
641
642
643    const BYTE* literals = LITERAL_BUFFER;
644    BYTE* srcPtr = frame->src;
645
646    if (literalsSize != contentSize) {
647        /* each match must be at least MIN_SEQ_LEN, so this is the maximum
648         * number of sequences we can have */
649        U32 const maxSequences = (U32)remainingMatch / MIN_SEQ_LEN;
650        numSequences = (RAND(seed) % maxSequences) + 1;
651
652        /* the extra match lengths we have to allocate to each sequence */
653        excessMatch = remainingMatch - numSequences * MIN_SEQ_LEN;
654    }
655
656    DISPLAYLEVEL(5, "    total match lengths: %u\n", (U32)remainingMatch);
657    for (i = 0; i < numSequences; i++) {
658        /* Generate match and literal lengths by exponential distribution to
659         * ensure nice numbers */
660        U32 matchLen =
661                MIN_SEQ_LEN +
662                ROUND(RAND_exp(seed, excessMatch / (double)(numSequences - i)));
663        U32 literalLen =
664                (RAND(seed) & 7)
665                        ? ROUND(RAND_exp(seed,
666                                         literalsSize /
667                                                 (double)(numSequences - i)))
668                        : 0;
669        /* actual offset, code to send, and point to copy up to when shifting
670         * codes in the repeat offsets history */
671        U32 offset, offsetCode, repIndex;
672
673        /* bounds checks */
674        matchLen = (U32) MIN(matchLen, excessMatch + MIN_SEQ_LEN);
675        literalLen = MIN(literalLen, (U32) literalsSize);
676        if (i == 0 && srcPtr == frame->srcStart && literalLen == 0) literalLen = 1;
677        if (i + 1 == numSequences) matchLen = MIN_SEQ_LEN + (U32) excessMatch;
678
679        memcpy(srcPtr, literals, literalLen);
680        srcPtr += literalLen;
681        do {
682            if (RAND(seed) & 7) {
683                /* do a normal offset */
684                U32 const dataDecompressed = (U32)((BYTE*)srcPtr-(BYTE*)frame->srcStart);
685                offset = (RAND(seed) %
686                          MIN(frame->header.windowSize,
687                              (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) +
688                         1;
689                if (info.useDict && (RAND(seed) & 1) && i + 1 != numSequences && dataDecompressed < frame->header.windowSize) {
690                    /* need to occasionally generate offsets that go past the start */
691                    /* including i+1 != numSequences because the last sequences has to adhere to predetermined contentSize */
692                    U32 lenPastStart = (RAND(seed) % info.dictContentSize) + 1;
693                    offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart)+lenPastStart;
694                    if (offset > frame->header.windowSize) {
695                        if (lenPastStart < MIN_SEQ_LEN) {
696                            /* when offset > windowSize, matchLen bound by end of dictionary (lenPastStart) */
697                            /* this also means that lenPastStart must be greater than MIN_SEQ_LEN */
698                            /* make sure lenPastStart does not go past dictionary start though */
699                            lenPastStart = MIN(lenPastStart+MIN_SEQ_LEN, (U32)info.dictContentSize);
700                            offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) + lenPastStart;
701                        }
702                        {
703                            U32 const matchLenBound = MIN(frame->header.windowSize, lenPastStart);
704                            matchLen = MIN(matchLen, matchLenBound);
705                        }
706                    }
707                }
708                offsetCode = offset + ZSTD_REP_MOVE;
709                repIndex = 2;
710            } else {
711                /* do a repeat offset */
712                offsetCode = RAND(seed) % 3;
713                if (literalLen > 0) {
714                    offset = frame->stats.rep[offsetCode];
715                    repIndex = offsetCode;
716                } else {
717                    /* special case */
718                    offset = offsetCode == 2 ? frame->stats.rep[0] - 1
719                                           : frame->stats.rep[offsetCode + 1];
720                    repIndex = MIN(2, offsetCode + 1);
721                }
722            }
723        } while (((!info.useDict) && (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) || offset == 0);
724
725        {
726            size_t j;
727            BYTE* const dictEnd = info.dictContent + info.dictContentSize;
728            for (j = 0; j < matchLen; j++) {
729                if ((U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) < offset) {
730                    /* copy from dictionary instead of literals */
731                    size_t const dictOffset = offset - (srcPtr - (BYTE*)frame->srcStart);
732                    *srcPtr = *(dictEnd - dictOffset);
733                }
734                else {
735                    *srcPtr = *(srcPtr-offset);
736                }
737                srcPtr++;
738            }
739        }
740
741        {   int r;
742            for (r = repIndex; r > 0; r--) {
743                frame->stats.rep[r] = frame->stats.rep[r - 1];
744            }
745            frame->stats.rep[0] = offset;
746        }
747
748        DISPLAYLEVEL(6, "      LL: %5u OF: %5u ML: %5u", literalLen, offset, matchLen);
749        DISPLAYLEVEL(7, " srcPos: %8u seqNb: %3u",
750                     (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart), i);
751        DISPLAYLEVEL(6, "\n");
752        if (offsetCode < 3) {
753            DISPLAYLEVEL(7, "        repeat offset: %d\n", repIndex);
754        }
755        /* use libzstd sequence handling */
756        ZSTD_storeSeq(seqStore, literalLen, literals, offsetCode,
757                      matchLen - MINMATCH);
758
759        literalsSize -= literalLen;
760        excessMatch -= (matchLen - MIN_SEQ_LEN);
761        literals += literalLen;
762    }
763
764    memcpy(srcPtr, literals, literalsSize);
765    srcPtr += literalsSize;
766    DISPLAYLEVEL(6, "      excess literals: %5u", (U32)literalsSize);
767    DISPLAYLEVEL(7, " srcPos: %8u", (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart));
768    DISPLAYLEVEL(6, "\n");
769
770    return numSequences;
771}
772
773static void initSymbolSet(const BYTE* symbols, size_t len, BYTE* set, BYTE maxSymbolValue)
774{
775    size_t i;
776
777    memset(set, 0, (size_t)maxSymbolValue+1);
778
779    for (i = 0; i < len; i++) {
780        set[symbols[i]] = 1;
781    }
782}
783
784static int isSymbolSubset(const BYTE* symbols, size_t len, const BYTE* set, BYTE maxSymbolValue)
785{
786    size_t i;
787
788    for (i = 0; i < len; i++) {
789        if (symbols[i] > maxSymbolValue || !set[symbols[i]]) {
790            return 0;
791        }
792    }
793    return 1;
794}
795
796static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
797                             size_t nbSeq)
798{
799    /* This code is mostly copied from ZSTD_compressSequences in zstd_compress.c */
800    U32 count[MaxSeq+1];
801    S16 norm[MaxSeq+1];
802    FSE_CTable* CTable_LitLength = frame->stats.litlengthCTable;
803    FSE_CTable* CTable_OffsetBits = frame->stats.offcodeCTable;
804    FSE_CTable* CTable_MatchLength = frame->stats.matchlengthCTable;
805    U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */
806    const seqDef* const sequences = seqStorePtr->sequencesStart;
807    const BYTE* const ofCodeTable = seqStorePtr->ofCode;
808    const BYTE* const llCodeTable = seqStorePtr->llCode;
809    const BYTE* const mlCodeTable = seqStorePtr->mlCode;
810    BYTE* const oend = (BYTE*)frame->dataEnd;
811    BYTE* op = (BYTE*)frame->data;
812    BYTE* seqHead;
813    BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
814
815    /* literals compressing block removed so that can be done separately */
816
817    /* Sequences Header */
818    if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall);
819    if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq;
820    else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
821    else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
822
823    /* seqHead : flags for FSE encoding type */
824    seqHead = op++;
825
826    if (nbSeq==0) {
827        frame->data = op;
828
829        return 0;
830    }
831
832    /* convert length/distances into codes */
833    ZSTD_seqToCodes(seqStorePtr);
834
835    /* CTable for Literal Lengths */
836    {   U32 max = MaxLL;
837        size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP);
838        if (mostFrequent == nbSeq) {
839            /* do RLE if we have the chance */
840            *op++ = llCodeTable[0];
841            FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
842            LLtype = set_rle;
843        } else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
844                   isSymbolSubset(llCodeTable, nbSeq,
845                                  frame->stats.litlengthSymbolSet, 35)) {
846            /* maybe do repeat mode if we're allowed to */
847            LLtype = set_repeat;
848        } else if (!(RAND(seed) & 3)) {
849            /* maybe use the default distribution */
850            FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
851            LLtype = set_basic;
852        } else {
853            /* fall back on a full table */
854            size_t nbSeq_1 = nbSeq;
855            const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
856            if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
857            FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
858            { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
859              if (FSE_isError(NCountSize)) return ERROR(GENERIC);
860              op += NCountSize; }
861            FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
862            LLtype = set_compressed;
863    }   }
864
865    /* CTable for Offsets */
866    /* see Literal Lengths for descriptions of mode choices */
867    {   U32 max = MaxOff;
868        size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP);
869        if (mostFrequent == nbSeq) {
870            *op++ = ofCodeTable[0];
871            FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
872            Offtype = set_rle;
873        } else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
874                   isSymbolSubset(ofCodeTable, nbSeq,
875                                  frame->stats.offsetSymbolSet, 28)) {
876            Offtype = set_repeat;
877        } else if (!(RAND(seed) & 3)) {
878            FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
879            Offtype = set_basic;
880        } else {
881            size_t nbSeq_1 = nbSeq;
882            const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
883            if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
884            FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
885            { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
886              if (FSE_isError(NCountSize)) return ERROR(GENERIC);
887              op += NCountSize; }
888            FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
889            Offtype = set_compressed;
890    }   }
891
892    /* CTable for MatchLengths */
893    /* see Literal Lengths for descriptions of mode choices */
894    {   U32 max = MaxML;
895        size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP);
896        if (mostFrequent == nbSeq) {
897            *op++ = *mlCodeTable;
898            FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
899            MLtype = set_rle;
900        } else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
901                   isSymbolSubset(mlCodeTable, nbSeq,
902                                  frame->stats.matchlengthSymbolSet, 52)) {
903            MLtype = set_repeat;
904        } else if (!(RAND(seed) & 3)) {
905            /* sometimes do default distribution */
906            FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
907            MLtype = set_basic;
908        } else {
909            /* fall back on table */
910            size_t nbSeq_1 = nbSeq;
911            const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
912            if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
913            FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
914            { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
915              if (FSE_isError(NCountSize)) return ERROR(GENERIC);
916              op += NCountSize; }
917            FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
918            MLtype = set_compressed;
919    }   }
920    frame->stats.fseInit = 1;
921    initSymbolSet(llCodeTable, nbSeq, frame->stats.litlengthSymbolSet, 35);
922    initSymbolSet(ofCodeTable, nbSeq, frame->stats.offsetSymbolSet, 28);
923    initSymbolSet(mlCodeTable, nbSeq, frame->stats.matchlengthSymbolSet, 52);
924
925    DISPLAYLEVEL(5, "    LL type: %d OF type: %d ML type: %d\n", LLtype, Offtype, MLtype);
926
927    *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
928
929    /* Encoding Sequences */
930    {   BIT_CStream_t blockStream;
931        FSE_CState_t  stateMatchLength;
932        FSE_CState_t  stateOffsetBits;
933        FSE_CState_t  stateLitLength;
934
935        CHECK_E(BIT_initCStream(&blockStream, op, oend-op), dstSize_tooSmall); /* not enough space remaining */
936
937        /* first symbols */
938        FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
939        FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]);
940        FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
941        BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
942        if (MEM_32bits()) BIT_flushBits(&blockStream);
943        BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
944        if (MEM_32bits()) BIT_flushBits(&blockStream);
945        BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
946        BIT_flushBits(&blockStream);
947
948        {   size_t n;
949            for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */
950                BYTE const llCode = llCodeTable[n];
951                BYTE const ofCode = ofCodeTable[n];
952                BYTE const mlCode = mlCodeTable[n];
953                U32  const llBits = LL_bits[llCode];
954                U32  const ofBits = ofCode;                                     /* 32b*/  /* 64b*/
955                U32  const mlBits = ML_bits[mlCode];
956                                                                                /* (7)*/  /* (7)*/
957                FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
958                FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */
959                if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
960                FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */
961                if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
962                    BIT_flushBits(&blockStream);                                /* (7)*/
963                BIT_addBits(&blockStream, sequences[n].litLength, llBits);
964                if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
965                BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
966                if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
967                BIT_addBits(&blockStream, sequences[n].offset, ofBits);         /* 31 */
968                BIT_flushBits(&blockStream);                                    /* (7)*/
969        }   }
970
971        FSE_flushCState(&blockStream, &stateMatchLength);
972        FSE_flushCState(&blockStream, &stateOffsetBits);
973        FSE_flushCState(&blockStream, &stateLitLength);
974
975        {   size_t const streamSize = BIT_closeCStream(&blockStream);
976            if (streamSize==0) return ERROR(dstSize_tooSmall);   /* not enough space */
977            op += streamSize;
978    }   }
979
980    frame->data = op;
981
982    return 0;
983}
984
985static size_t writeSequencesBlock(U32* seed, frame_t* frame, size_t contentSize,
986                                  size_t literalsSize, dictInfo info)
987{
988    seqStore_t seqStore;
989    size_t numSequences;
990
991
992    initSeqStore(&seqStore);
993
994    /* randomly generate sequences */
995    numSequences = generateSequences(seed, frame, &seqStore, contentSize, literalsSize, info);
996    /* write them out to the frame data */
997    CHECKERR(writeSequences(seed, frame, &seqStore, numSequences));
998
999    return numSequences;
1000}
1001
1002static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize, dictInfo info)
1003{
1004    BYTE* const blockStart = (BYTE*)frame->data;
1005    size_t literalsSize;
1006    size_t nbSeq;
1007
1008    DISPLAYLEVEL(4, "  compressed block:\n");
1009
1010    literalsSize = writeLiteralsBlock(seed, frame, contentSize);
1011
1012    DISPLAYLEVEL(4, "   literals size: %u\n", (U32)literalsSize);
1013
1014    nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize, info);
1015
1016    DISPLAYLEVEL(4, "   number of sequences: %u\n", (U32)nbSeq);
1017
1018    return (BYTE*)frame->data - blockStart;
1019}
1020
1021static void writeBlock(U32* seed, frame_t* frame, size_t contentSize,
1022                       int lastBlock, dictInfo info)
1023{
1024    int const blockTypeDesc = RAND(seed) % 8;
1025    size_t blockSize;
1026    int blockType;
1027
1028    BYTE *const header = (BYTE*)frame->data;
1029    BYTE *op = header + 3;
1030
1031    DISPLAYLEVEL(4, " block:\n");
1032    DISPLAYLEVEL(4, "  block content size: %u\n", (U32)contentSize);
1033    DISPLAYLEVEL(4, "  last block: %s\n", lastBlock ? "yes" : "no");
1034
1035    if (blockTypeDesc == 0) {
1036        /* Raw data frame */
1037
1038        RAND_buffer(seed, frame->src, contentSize);
1039        memcpy(op, frame->src, contentSize);
1040
1041        op += contentSize;
1042        blockType = 0;
1043        blockSize = contentSize;
1044    } else if (blockTypeDesc == 1) {
1045        /* RLE */
1046        BYTE const symbol = RAND(seed) & 0xff;
1047
1048        op[0] = symbol;
1049        memset(frame->src, symbol, contentSize);
1050
1051        op++;
1052        blockType = 1;
1053        blockSize = contentSize;
1054    } else {
1055        /* compressed, most common */
1056        size_t compressedSize;
1057        blockType = 2;
1058
1059        frame->oldStats = frame->stats;
1060
1061        frame->data = op;
1062        compressedSize = writeCompressedBlock(seed, frame, contentSize, info);
1063        if (compressedSize >= contentSize) {   /* compressed block must be strictly smaller than uncompressed one */
1064            blockType = 0;
1065            memcpy(op, frame->src, contentSize);
1066
1067            op += contentSize;
1068            blockSize = contentSize; /* fall back on raw block if data doesn't
1069                                        compress */
1070
1071            frame->stats = frame->oldStats; /* don't update the stats */
1072        } else {
1073            op += compressedSize;
1074            blockSize = compressedSize;
1075        }
1076    }
1077    frame->src = (BYTE*)frame->src + contentSize;
1078
1079    DISPLAYLEVEL(4, "  block type: %s\n", BLOCK_TYPES[blockType]);
1080    DISPLAYLEVEL(4, "  block size field: %u\n", (U32)blockSize);
1081
1082    header[0] = (BYTE) ((lastBlock | (blockType << 1) | (blockSize << 3)) & 0xff);
1083    MEM_writeLE16(header + 1, (U16) (blockSize >> 5));
1084
1085    frame->data = op;
1086}
1087
1088static void writeBlocks(U32* seed, frame_t* frame, dictInfo info)
1089{
1090    size_t contentLeft = frame->header.contentSize;
1091    size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize);
1092    while (1) {
1093        /* 1 in 4 chance of ending frame */
1094        int const lastBlock = contentLeft > maxBlockSize ? 0 : !(RAND(seed) & 3);
1095        size_t blockContentSize;
1096        if (lastBlock) {
1097            blockContentSize = contentLeft;
1098        } else {
1099            if (contentLeft > 0 && (RAND(seed) & 7)) {
1100                /* some variable size block */
1101                blockContentSize = RAND(seed) % (MIN(maxBlockSize, contentLeft)+1);
1102            } else if (contentLeft > maxBlockSize && (RAND(seed) & 1)) {
1103                /* some full size block */
1104                blockContentSize = maxBlockSize;
1105            } else {
1106                /* some empty block */
1107                blockContentSize = 0;
1108            }
1109        }
1110
1111        writeBlock(seed, frame, blockContentSize, lastBlock, info);
1112
1113        contentLeft -= blockContentSize;
1114        if (lastBlock) break;
1115    }
1116}
1117
1118static void writeChecksum(frame_t* frame)
1119{
1120    /* write checksum so implementations can verify their output */
1121    U64 digest = XXH64(frame->srcStart, (BYTE*)frame->src-(BYTE*)frame->srcStart, 0);
1122    DISPLAYLEVEL(3, "  checksum: %08x\n", (U32)digest);
1123    MEM_writeLE32(frame->data, (U32)digest);
1124    frame->data = (BYTE*)frame->data + 4;
1125}
1126
1127static void outputBuffer(const void* buf, size_t size, const char* const path)
1128{
1129    /* write data out to file */
1130    const BYTE* ip = (const BYTE*)buf;
1131    FILE* out;
1132    if (path) {
1133        out = fopen(path, "wb");
1134    } else {
1135        out = stdout;
1136    }
1137    if (!out) {
1138        fprintf(stderr, "Failed to open file at %s: ", path);
1139        perror(NULL);
1140        exit(1);
1141    }
1142
1143    {   size_t fsize = size;
1144        size_t written = 0;
1145        while (written < fsize) {
1146            written += fwrite(ip + written, 1, fsize - written, out);
1147            if (ferror(out)) {
1148                fprintf(stderr, "Failed to write to file at %s: ", path);
1149                perror(NULL);
1150                exit(1);
1151            }
1152        }
1153    }
1154
1155    if (path) {
1156        fclose(out);
1157    }
1158}
1159
1160static void initFrame(frame_t* fr)
1161{
1162    memset(fr, 0, sizeof(*fr));
1163    fr->data = fr->dataStart = FRAME_BUFFER;
1164    fr->dataEnd = FRAME_BUFFER + sizeof(FRAME_BUFFER);
1165    fr->src = fr->srcStart = CONTENT_BUFFER;
1166    fr->srcEnd = CONTENT_BUFFER + sizeof(CONTENT_BUFFER);
1167
1168    /* init repeat codes */
1169    fr->stats.rep[0] = 1;
1170    fr->stats.rep[1] = 4;
1171    fr->stats.rep[2] = 8;
1172}
1173
1174/**
1175 * Generated a single zstd compressed block with no block/frame header.
1176 * Returns the final seed.
1177 */
1178static U32 generateCompressedBlock(U32 seed, frame_t* frame, dictInfo info)
1179{
1180    size_t blockContentSize;
1181    int blockWritten = 0;
1182    BYTE* op;
1183    DISPLAYLEVEL(4, "block seed: %u\n", seed);
1184    initFrame(frame);
1185    op = (BYTE*)frame->data;
1186
1187    while (!blockWritten) {
1188        size_t cSize;
1189        /* generate window size */
1190        {   int const exponent = RAND(&seed) % (MAX_WINDOW_LOG - 10);
1191            int const mantissa = RAND(&seed) % 8;
1192            frame->header.windowSize = (1U << (exponent + 10));
1193            frame->header.windowSize += (frame->header.windowSize / 8) * mantissa;
1194        }
1195
1196        /* generate content size */
1197        {   size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize);
1198            if (RAND(&seed) & 15) {
1199                /* some full size blocks */
1200                blockContentSize = maxBlockSize;
1201            } else if (RAND(&seed) & 7 && g_maxBlockSize >= (1U << 7)) {
1202                /* some small blocks <= 128 bytes*/
1203                blockContentSize = RAND(&seed) % (1U << 7);
1204            } else {
1205                /* some variable size blocks */
1206                blockContentSize = RAND(&seed) % maxBlockSize;
1207            }
1208        }
1209
1210        /* try generating a compressed block */
1211        frame->oldStats = frame->stats;
1212        frame->data = op;
1213        cSize = writeCompressedBlock(&seed, frame, blockContentSize, info);
1214        if (cSize >= blockContentSize) {  /* compressed size must be strictly smaller than decompressed size : https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#blocks */
1215            /* data doesn't compress -- try again */
1216            frame->stats = frame->oldStats; /* don't update the stats */
1217            DISPLAYLEVEL(5, "   can't compress block : try again \n");
1218        } else {
1219            blockWritten = 1;
1220            DISPLAYLEVEL(4, "   block size: %u \n", (U32)cSize);
1221            frame->src = (BYTE*)frame->src + blockContentSize;
1222        }
1223    }
1224    return seed;
1225}
1226
1227/* Return the final seed */
1228static U32 generateFrame(U32 seed, frame_t* fr, dictInfo info)
1229{
1230    /* generate a complete frame */
1231    DISPLAYLEVEL(3, "frame seed: %u\n", seed);
1232    initFrame(fr);
1233
1234    writeFrameHeader(&seed, fr, info);
1235    writeBlocks(&seed, fr, info);
1236    writeChecksum(fr);
1237
1238    return seed;
1239}
1240
1241/*_*******************************************************
1242*  Dictionary Helper Functions
1243*********************************************************/
1244/* returns 0 if successful, otherwise returns 1 upon error */
1245static int genRandomDict(U32 dictID, U32 seed, size_t dictSize, BYTE* fullDict)
1246{
1247    /* allocate space for samples */
1248    int ret = 0;
1249    unsigned const numSamples = 4;
1250    size_t sampleSizes[4];
1251    BYTE* const samples = malloc(5000*sizeof(BYTE));
1252    if (samples == NULL) {
1253        DISPLAY("Error: could not allocate space for samples\n");
1254        return 1;
1255    }
1256
1257    /* generate samples */
1258    {   unsigned literalValue = 1;
1259        unsigned samplesPos = 0;
1260        size_t currSize = 1;
1261        while (literalValue <= 4) {
1262            sampleSizes[literalValue - 1] = currSize;
1263            {   size_t k;
1264                for (k = 0; k < currSize; k++) {
1265                    *(samples + (samplesPos++)) = (BYTE)literalValue;
1266            }   }
1267            literalValue++;
1268            currSize *= 16;
1269    }   }
1270
1271    {   size_t dictWriteSize = 0;
1272        ZDICT_params_t zdictParams;
1273        size_t const headerSize = MAX(dictSize/4, 256);
1274        size_t const dictContentSize = dictSize - headerSize;
1275        BYTE* const dictContent = fullDict + headerSize;
1276        if (dictContentSize < ZDICT_CONTENTSIZE_MIN || dictSize < ZDICT_DICTSIZE_MIN) {
1277            DISPLAY("Error: dictionary size is too small\n");
1278            ret = 1;
1279            goto exitGenRandomDict;
1280        }
1281
1282        /* init dictionary params */
1283        memset(&zdictParams, 0, sizeof(zdictParams));
1284        zdictParams.dictID = dictID;
1285        zdictParams.notificationLevel = 1;
1286
1287        /* fill in dictionary content */
1288        RAND_buffer(&seed, (void*)dictContent, dictContentSize);
1289
1290        /* finalize dictionary with random samples */
1291        dictWriteSize = ZDICT_finalizeDictionary(fullDict, dictSize,
1292                                    dictContent, dictContentSize,
1293                                    samples, sampleSizes, numSamples,
1294                                    zdictParams);
1295
1296        if (ZDICT_isError(dictWriteSize)) {
1297            DISPLAY("Could not finalize dictionary: %s\n", ZDICT_getErrorName(dictWriteSize));
1298            ret = 1;
1299        }
1300    }
1301
1302exitGenRandomDict:
1303    free(samples);
1304    return ret;
1305}
1306
1307static dictInfo initDictInfo(int useDict, size_t dictContentSize, BYTE* dictContent, U32 dictID){
1308    /* allocate space statically */
1309    dictInfo dictOp;
1310    memset(&dictOp, 0, sizeof(dictOp));
1311    dictOp.useDict = useDict;
1312    dictOp.dictContentSize = dictContentSize;
1313    dictOp.dictContent = dictContent;
1314    dictOp.dictID = dictID;
1315    return dictOp;
1316}
1317
1318/*-*******************************************************
1319*  Test Mode
1320*********************************************************/
1321
1322BYTE DECOMPRESSED_BUFFER[MAX_DECOMPRESSED_SIZE];
1323
1324static size_t testDecodeSimple(frame_t* fr)
1325{
1326    /* test decoding the generated data with the simple API */
1327    size_t const ret = ZSTD_decompress(DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
1328                           fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart);
1329
1330    if (ZSTD_isError(ret)) return ret;
1331
1332    if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart,
1333               (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) {
1334        return ERROR(corruption_detected);
1335    }
1336
1337    return ret;
1338}
1339
1340static size_t testDecodeStreaming(frame_t* fr)
1341{
1342    /* test decoding the generated data with the streaming API */
1343    ZSTD_DStream* zd = ZSTD_createDStream();
1344    ZSTD_inBuffer in;
1345    ZSTD_outBuffer out;
1346    size_t ret;
1347
1348    if (!zd) return ERROR(memory_allocation);
1349
1350    in.src = fr->dataStart;
1351    in.pos = 0;
1352    in.size = (BYTE*)fr->data - (BYTE*)fr->dataStart;
1353
1354    out.dst = DECOMPRESSED_BUFFER;
1355    out.pos = 0;
1356    out.size = ZSTD_DStreamOutSize();
1357
1358    ZSTD_initDStream(zd);
1359    while (1) {
1360        ret = ZSTD_decompressStream(zd, &out, &in);
1361        if (ZSTD_isError(ret)) goto cleanup; /* error */
1362        if (ret == 0) break; /* frame is done */
1363
1364        /* force decoding to be done in chunks */
1365        out.size += MIN(ZSTD_DStreamOutSize(), MAX_DECOMPRESSED_SIZE - out.size);
1366    }
1367
1368    ret = out.pos;
1369
1370    if (memcmp(out.dst, fr->srcStart, out.pos) != 0) {
1371        return ERROR(corruption_detected);
1372    }
1373
1374cleanup:
1375    ZSTD_freeDStream(zd);
1376    return ret;
1377}
1378
1379static size_t testDecodeWithDict(U32 seed, genType_e genType)
1380{
1381    /* create variables */
1382    size_t const dictSize = RAND(&seed) % (10 << 20) + ZDICT_DICTSIZE_MIN + ZDICT_CONTENTSIZE_MIN;
1383    U32 const dictID = RAND(&seed);
1384    size_t errorDetected = 0;
1385    BYTE* const fullDict = malloc(dictSize);
1386    if (fullDict == NULL) {
1387        return ERROR(GENERIC);
1388    }
1389
1390    /* generate random dictionary */
1391    if (genRandomDict(dictID, seed, dictSize, fullDict)) {  /* return 0 on success */
1392        errorDetected = ERROR(GENERIC);
1393        goto dictTestCleanup;
1394    }
1395
1396
1397    {   frame_t fr;
1398        dictInfo info;
1399        ZSTD_DCtx* const dctx = ZSTD_createDCtx();
1400        size_t ret;
1401
1402        /* get dict info */
1403        {   size_t const headerSize = MAX(dictSize/4, 256);
1404            size_t const dictContentSize = dictSize-headerSize;
1405            BYTE* const dictContent = fullDict+headerSize;
1406            info = initDictInfo(1, dictContentSize, dictContent, dictID);
1407        }
1408
1409        /* manually decompress and check difference */
1410        if (genType == gt_frame) {
1411            /* Test frame */
1412            generateFrame(seed, &fr, info);
1413            ret = ZSTD_decompress_usingDict(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
1414                                            fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart,
1415                                            fullDict, dictSize);
1416        } else {
1417            /* Test block */
1418            generateCompressedBlock(seed, &fr, info);
1419            ret = ZSTD_decompressBegin_usingDict(dctx, fullDict, dictSize);
1420            if (ZSTD_isError(ret)) {
1421                errorDetected = ret;
1422                ZSTD_freeDCtx(dctx);
1423                goto dictTestCleanup;
1424            }
1425            ret = ZSTD_decompressBlock(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
1426                                       fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart);
1427        }
1428        ZSTD_freeDCtx(dctx);
1429
1430        if (ZSTD_isError(ret)) {
1431            errorDetected = ret;
1432            goto dictTestCleanup;
1433        }
1434
1435        if (memcmp(DECOMPRESSED_BUFFER, fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart) != 0) {
1436            errorDetected = ERROR(corruption_detected);
1437            goto dictTestCleanup;
1438        }
1439    }
1440
1441dictTestCleanup:
1442    free(fullDict);
1443    return errorDetected;
1444}
1445
1446static size_t testDecodeRawBlock(frame_t* fr)
1447{
1448    ZSTD_DCtx* dctx = ZSTD_createDCtx();
1449    size_t ret = ZSTD_decompressBegin(dctx);
1450    if (ZSTD_isError(ret)) return ret;
1451
1452    ret = ZSTD_decompressBlock(
1453            dctx,
1454            DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
1455            fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart);
1456    ZSTD_freeDCtx(dctx);
1457    if (ZSTD_isError(ret)) return ret;
1458
1459    if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart,
1460               (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) {
1461        return ERROR(corruption_detected);
1462    }
1463
1464    return ret;
1465}
1466
1467static int runBlockTest(U32* seed)
1468{
1469    frame_t fr;
1470    U32 const seedCopy = *seed;
1471    {   dictInfo const info = initDictInfo(0, 0, NULL, 0);
1472        *seed = generateCompressedBlock(*seed, &fr, info);
1473    }
1474
1475    {   size_t const r = testDecodeRawBlock(&fr);
1476        if (ZSTD_isError(r)) {
1477            DISPLAY("Error in block mode on test seed %u: %s\n", seedCopy,
1478                    ZSTD_getErrorName(r));
1479            return 1;
1480        }
1481    }
1482
1483    {   size_t const r = testDecodeWithDict(*seed, gt_block);
1484        if (ZSTD_isError(r)) {
1485            DISPLAY("Error in block mode with dictionary on test seed %u: %s\n",
1486                    seedCopy, ZSTD_getErrorName(r));
1487            return 1;
1488        }
1489    }
1490    return 0;
1491}
1492
1493static int runFrameTest(U32* seed)
1494{
1495    frame_t fr;
1496    U32 const seedCopy = *seed;
1497    {   dictInfo const info = initDictInfo(0, 0, NULL, 0);
1498        *seed = generateFrame(*seed, &fr, info);
1499    }
1500
1501    {   size_t const r = testDecodeSimple(&fr);
1502        if (ZSTD_isError(r)) {
1503            DISPLAY("Error in simple mode on test seed %u: %s\n",
1504                    seedCopy, ZSTD_getErrorName(r));
1505            return 1;
1506        }
1507    }
1508    {   size_t const r = testDecodeStreaming(&fr);
1509        if (ZSTD_isError(r)) {
1510            DISPLAY("Error in streaming mode on test seed %u: %s\n",
1511                    seedCopy, ZSTD_getErrorName(r));
1512            return 1;
1513        }
1514    }
1515    {   size_t const r = testDecodeWithDict(*seed, gt_frame);  /* avoid big dictionaries */
1516        if (ZSTD_isError(r)) {
1517            DISPLAY("Error in dictionary mode on test seed %u: %s\n",
1518                    seedCopy, ZSTD_getErrorName(r));
1519            return 1;
1520        }
1521    }
1522    return 0;
1523}
1524
1525static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS,
1526                       genType_e genType)
1527{
1528    unsigned fnum;
1529
1530    UTIL_time_t const startClock = UTIL_getTime();
1531    U64 const maxClockSpan = testDurationS * SEC_TO_MICRO;
1532
1533    if (numFiles == 0 && !testDurationS) numFiles = 1;
1534
1535    DISPLAY("seed: %u\n", seed);
1536
1537    for (fnum = 0; fnum < numFiles || UTIL_clockSpanMicro(startClock) < maxClockSpan; fnum++) {
1538        if (fnum < numFiles)
1539            DISPLAYUPDATE("\r%u/%u        ", fnum, numFiles);
1540        else
1541            DISPLAYUPDATE("\r%u           ", fnum);
1542
1543        {   int const ret = (genType == gt_frame) ?
1544                            runFrameTest(&seed) :
1545                            runBlockTest(&seed);
1546            if (ret) return ret;
1547        }
1548    }
1549
1550    DISPLAY("\r%u tests completed: ", fnum);
1551    DISPLAY("OK\n");
1552
1553    return 0;
1554}
1555
1556/*-*******************************************************
1557*  File I/O
1558*********************************************************/
1559
1560static int generateFile(U32 seed, const char* const path,
1561                        const char* const origPath, genType_e genType)
1562{
1563    frame_t fr;
1564
1565    DISPLAY("seed: %u\n", seed);
1566
1567    {   dictInfo const info = initDictInfo(0, 0, NULL, 0);
1568        if (genType == gt_frame) {
1569            generateFrame(seed, &fr, info);
1570        } else {
1571            generateCompressedBlock(seed, &fr, info);
1572        }
1573    }
1574    outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
1575    if (origPath) {
1576        outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath);
1577    }
1578    return 0;
1579}
1580
1581static int generateCorpus(U32 seed, unsigned numFiles, const char* const path,
1582                          const char* const origPath, genType_e genType)
1583{
1584    char outPath[MAX_PATH];
1585    unsigned fnum;
1586
1587    DISPLAY("seed: %u\n", seed);
1588
1589    for (fnum = 0; fnum < numFiles; fnum++) {
1590        frame_t fr;
1591
1592        DISPLAYUPDATE("\r%u/%u        ", fnum, numFiles);
1593
1594        {   dictInfo const info = initDictInfo(0, 0, NULL, 0);
1595            if (genType == gt_frame) {
1596                seed = generateFrame(seed, &fr, info);
1597            } else {
1598                seed = generateCompressedBlock(seed, &fr, info);
1599            }
1600        }
1601
1602        if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
1603            DISPLAY("Error: path too long\n");
1604            return 1;
1605        }
1606        outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath);
1607
1608        if (origPath) {
1609            if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) {
1610                DISPLAY("Error: path too long\n");
1611                return 1;
1612            }
1613            outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath);
1614        }
1615    }
1616
1617    DISPLAY("\r%u/%u      \n", fnum, numFiles);
1618
1619    return 0;
1620}
1621
1622static int generateCorpusWithDict(U32 seed, unsigned numFiles, const char* const path,
1623                                  const char* const origPath, const size_t dictSize,
1624                                  genType_e genType)
1625{
1626    char outPath[MAX_PATH];
1627    BYTE* fullDict;
1628    U32 const dictID = RAND(&seed);
1629    int errorDetected = 0;
1630
1631    if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) {
1632        DISPLAY("Error: path too long\n");
1633        return 1;
1634    }
1635
1636    /* allocate space for the dictionary */
1637    fullDict = malloc(dictSize);
1638    if (fullDict == NULL) {
1639        DISPLAY("Error: could not allocate space for full dictionary.\n");
1640        return 1;
1641    }
1642
1643    /* randomly generate the dictionary */
1644    {   int const ret = genRandomDict(dictID, seed, dictSize, fullDict);
1645        if (ret != 0) {
1646            errorDetected = ret;
1647            goto dictCleanup;
1648        }
1649    }
1650
1651    /* write out dictionary */
1652    if (numFiles != 0) {
1653        if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) {
1654            DISPLAY("Error: dictionary path too long\n");
1655            errorDetected = 1;
1656            goto dictCleanup;
1657        }
1658        outputBuffer(fullDict, dictSize, outPath);
1659    }
1660    else {
1661        outputBuffer(fullDict, dictSize, "dictionary");
1662    }
1663
1664    /* generate random compressed/decompressed files */
1665    {   unsigned fnum;
1666        for (fnum = 0; fnum < MAX(numFiles, 1); fnum++) {
1667            frame_t fr;
1668            DISPLAYUPDATE("\r%u/%u        ", fnum, numFiles);
1669            {
1670                size_t const headerSize = MAX(dictSize/4, 256);
1671                size_t const dictContentSize = dictSize-headerSize;
1672                BYTE* const dictContent = fullDict+headerSize;
1673                dictInfo const info = initDictInfo(1, dictContentSize, dictContent, dictID);
1674                if (genType == gt_frame) {
1675                    seed = generateFrame(seed, &fr, info);
1676                } else {
1677                    seed = generateCompressedBlock(seed, &fr, info);
1678                }
1679            }
1680
1681            if (numFiles != 0) {
1682                if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
1683                    DISPLAY("Error: path too long\n");
1684                    errorDetected = 1;
1685                    goto dictCleanup;
1686                }
1687                outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath);
1688
1689                if (origPath) {
1690                    if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) {
1691                        DISPLAY("Error: path too long\n");
1692                        errorDetected = 1;
1693                        goto dictCleanup;
1694                    }
1695                    outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath);
1696                }
1697            }
1698            else {
1699                outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
1700                if (origPath) {
1701                    outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath);
1702                }
1703            }
1704        }
1705    }
1706
1707dictCleanup:
1708    free(fullDict);
1709    return errorDetected;
1710}
1711
1712
1713/*_*******************************************************
1714*  Command line
1715*********************************************************/
1716static U32 makeSeed(void)
1717{
1718    U32 t = (U32) time(NULL);
1719    return XXH32(&t, sizeof(t), 0) % 65536;
1720}
1721
1722static unsigned readInt(const char** argument)
1723{
1724    unsigned val = 0;
1725    while ((**argument>='0') && (**argument<='9')) {
1726        val *= 10;
1727        val += **argument - '0';
1728        (*argument)++;
1729    }
1730    return val;
1731}
1732
1733static void usage(const char* programName)
1734{
1735    DISPLAY( "Usage :\n");
1736    DISPLAY( "      %s [args]\n", programName);
1737    DISPLAY( "\n");
1738    DISPLAY( "Arguments :\n");
1739    DISPLAY( " -p<path> : select output path (default:stdout)\n");
1740    DISPLAY( "                in multiple files mode this should be a directory\n");
1741    DISPLAY( " -o<path> : select path to output original file (default:no output)\n");
1742    DISPLAY( "                in multiple files mode this should be a directory\n");
1743    DISPLAY( " -s#      : select seed (default:random based on time)\n");
1744    DISPLAY( " -n#      : number of files to generate (default:1)\n");
1745    DISPLAY( " -t       : activate test mode (test files against libzstd instead of outputting them)\n");
1746    DISPLAY( " -T#      : length of time to run tests for\n");
1747    DISPLAY( " -v       : increase verbosity level (default:0, max:7)\n");
1748    DISPLAY( " -h/H     : display help/long help and exit\n");
1749}
1750
1751static void advancedUsage(const char* programName)
1752{
1753    usage(programName);
1754    DISPLAY( "\n");
1755    DISPLAY( "Advanced arguments        :\n");
1756    DISPLAY( " --content-size           : always include the content size in the frame header\n");
1757    DISPLAY( " --use-dict=#             : include a dictionary used to decompress the corpus\n");
1758    DISPLAY( " --gen-blocks             : generate raw compressed blocks without block/frame headers\n");
1759    DISPLAY( " --max-block-size-log=#   : max block size log, must be in range [2, 17]\n");
1760    DISPLAY( " --max-content-size-log=# : max content size log, must be <= 20\n");
1761    DISPLAY( "                            (this is ignored with gen-blocks)\n");
1762}
1763
1764/*! readU32FromChar() :
1765    @return : unsigned integer value read from input in `char` format
1766    allows and interprets K, KB, KiB, M, MB and MiB suffix.
1767    Will also modify `*stringPtr`, advancing it to position where it stopped reading.
1768    Note : function result can overflow if digit string > MAX_UINT */
1769static unsigned readU32FromChar(const char** stringPtr)
1770{
1771    unsigned result = 0;
1772    while ((**stringPtr >='0') && (**stringPtr <='9'))
1773        result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
1774    if ((**stringPtr=='K') || (**stringPtr=='M')) {
1775        result <<= 10;
1776        if (**stringPtr=='M') result <<= 10;
1777        (*stringPtr)++ ;
1778        if (**stringPtr=='i') (*stringPtr)++;
1779        if (**stringPtr=='B') (*stringPtr)++;
1780    }
1781    return result;
1782}
1783
1784/** longCommandWArg() :
1785 *  check if *stringPtr is the same as longCommand.
1786 *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
1787 *  @return 0 and doesn't modify *stringPtr otherwise.
1788 */
1789static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
1790{
1791    size_t const comSize = strlen(longCommand);
1792    int const result = !strncmp(*stringPtr, longCommand, comSize);
1793    if (result) *stringPtr += comSize;
1794    return result;
1795}
1796
1797int main(int argc, char** argv)
1798{
1799    U32 seed = 0;
1800    int seedset = 0;
1801    unsigned numFiles = 0;
1802    unsigned testDuration = 0;
1803    int testMode = 0;
1804    const char* path = NULL;
1805    const char* origPath = NULL;
1806    int useDict = 0;
1807    unsigned dictSize = (10 << 10); /* 10 kB default */
1808    genType_e genType = gt_frame;
1809
1810    int argNb;
1811
1812    /* Check command line */
1813    for (argNb=1; argNb<argc; argNb++) {
1814        const char* argument = argv[argNb];
1815        if(!argument) continue;   /* Protection if argument empty */
1816
1817        /* Handle commands. Aggregated commands are allowed */
1818        if (argument[0]=='-') {
1819            argument++;
1820            while (*argument!=0) {
1821                switch(*argument)
1822                {
1823                case 'h':
1824                    usage(argv[0]);
1825                    return 0;
1826                case 'H':
1827                    advancedUsage(argv[0]);
1828                    return 0;
1829                case 'v':
1830                    argument++;
1831                    g_displayLevel++;
1832                    break;
1833                case 's':
1834                    argument++;
1835                    seedset=1;
1836                    seed = readInt(&argument);
1837                    break;
1838                case 'n':
1839                    argument++;
1840                    numFiles = readInt(&argument);
1841                    break;
1842                case 'T':
1843                    argument++;
1844                    testDuration = readInt(&argument);
1845                    if (*argument == 'm') {
1846                        testDuration *= 60;
1847                        argument++;
1848                        if (*argument == 'n') argument++;
1849                    }
1850                    break;
1851                case 'o':
1852                    argument++;
1853                    origPath = argument;
1854                    argument += strlen(argument);
1855                    break;
1856                case 'p':
1857                    argument++;
1858                    path = argument;
1859                    argument += strlen(argument);
1860                    break;
1861                case 't':
1862                    argument++;
1863                    testMode = 1;
1864                    break;
1865                case '-':
1866                    argument++;
1867                    if (strcmp(argument, "content-size") == 0) {
1868                        opts.contentSize = 1;
1869                    } else if (longCommandWArg(&argument, "use-dict=")) {
1870                        dictSize = readU32FromChar(&argument);
1871                        useDict = 1;
1872                    } else if (strcmp(argument, "gen-blocks") == 0) {
1873                        genType = gt_block;
1874                    } else if (longCommandWArg(&argument, "max-block-size-log=")) {
1875                        U32 value = readU32FromChar(&argument);
1876                        if (value >= 2 && value <= ZSTD_BLOCKSIZE_MAX) {
1877                            g_maxBlockSize = 1U << value;
1878                        }
1879                    } else if (longCommandWArg(&argument, "max-content-size-log=")) {
1880                        U32 value = readU32FromChar(&argument);
1881                        g_maxDecompressedSizeLog =
1882                                MIN(MAX_DECOMPRESSED_SIZE_LOG, value);
1883                    } else {
1884                        advancedUsage(argv[0]);
1885                        return 1;
1886                    }
1887                    argument += strlen(argument);
1888                    break;
1889                default:
1890                    usage(argv[0]);
1891                    return 1;
1892    }   }   }   }   /* for (argNb=1; argNb<argc; argNb++) */
1893
1894    if (!seedset) {
1895        seed = makeSeed();
1896    }
1897
1898    if (testMode) {
1899        return runTestMode(seed, numFiles, testDuration, genType);
1900    } else {
1901        if (testDuration) {
1902            DISPLAY("Error: -T requires test mode (-t)\n\n");
1903            usage(argv[0]);
1904            return 1;
1905        }
1906    }
1907
1908    if (!path) {
1909        DISPLAY("Error: path is required in file generation mode\n");
1910        usage(argv[0]);
1911        return 1;
1912    }
1913
1914    if (numFiles == 0 && useDict == 0) {
1915        return generateFile(seed, path, origPath, genType);
1916    } else if (useDict == 0){
1917        return generateCorpus(seed, numFiles, path, origPath, genType);
1918    } else {
1919        /* should generate files with a dictionary */
1920        return generateCorpusWithDict(seed, numFiles, path, origPath, dictSize, genType);
1921    }
1922
1923}
1924