1/* 2 * Copyright (c) 2017-present, Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11#include <limits.h> 12#include <math.h> 13#include <stddef.h> 14#include <stdio.h> 15#include <stdlib.h> 16#include <string.h> 17 18#include "util.h" 19#include "zstd.h" 20#include "zstd_internal.h" 21#include "mem.h" 22#define ZDICT_STATIC_LINKING_ONLY 23#include "zdict.h" 24 25// Direct access to internal compression functions is required 26#include "zstd_compress.c" 27 28#define XXH_STATIC_LINKING_ONLY 29#include "xxhash.h" /* XXH64 */ 30 31#ifndef MIN 32 #define MIN(a, b) ((a) < (b) ? (a) : (b)) 33#endif 34 35#ifndef MAX_PATH 36 #ifdef PATH_MAX 37 #define MAX_PATH PATH_MAX 38 #else 39 #define MAX_PATH 256 40 #endif 41#endif 42 43/*-************************************ 44* DISPLAY Macros 45**************************************/ 46#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) 47#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } 48static U32 g_displayLevel = 2; 49 50#define DISPLAYUPDATE(...) \ 51 do { \ 52 if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || \ 53 (g_displayLevel >= 4)) { \ 54 g_displayClock = UTIL_getTime(); \ 55 DISPLAY(__VA_ARGS__); \ 56 if (g_displayLevel >= 4) fflush(stderr); \ 57 } \ 58 } while (0) 59 60static const U64 g_refreshRate = SEC_TO_MICRO / 6; 61static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; 62 63#define CHECKERR(code) \ 64 do { \ 65 if (ZSTD_isError(code)) { \ 66 DISPLAY("Error occurred while generating data: %s\n", \ 67 ZSTD_getErrorName(code)); \ 68 exit(1); \ 69 } \ 70 } while (0) 71 72/*-******************************************************* 73* Random function 74*********************************************************/ 75static unsigned RAND(unsigned* src) 76{ 77#define RAND_rotl32(x,r) ((x << r) | (x >> (32 - r))) 78 static const U32 prime1 = 2654435761U; 79 static const U32 prime2 = 2246822519U; 80 U32 rand32 = *src; 81 rand32 *= prime1; 82 rand32 += prime2; 83 rand32 = RAND_rotl32(rand32, 13); 84 *src = rand32; 85 return RAND_rotl32(rand32, 27); 86#undef RAND_rotl32 87} 88 89#define DISTSIZE (8192) 90 91/* Write `size` bytes into `ptr`, all of which are less than or equal to `maxSymb` */ 92static void RAND_bufferMaxSymb(U32* seed, void* ptr, size_t size, int maxSymb) 93{ 94 size_t i; 95 BYTE* op = ptr; 96 97 for (i = 0; i < size; i++) { 98 op[i] = (BYTE) (RAND(seed) % (maxSymb + 1)); 99 } 100} 101 102/* Write `size` random bytes into `ptr` */ 103static void RAND_buffer(U32* seed, void* ptr, size_t size) 104{ 105 size_t i; 106 BYTE* op = ptr; 107 108 for (i = 0; i + 4 <= size; i += 4) { 109 MEM_writeLE32(op + i, RAND(seed)); 110 } 111 for (; i < size; i++) { 112 op[i] = RAND(seed) & 0xff; 113 } 114} 115 116/* Write `size` bytes into `ptr` following the distribution `dist` */ 117static void RAND_bufferDist(U32* seed, BYTE* dist, void* ptr, size_t size) 118{ 119 size_t i; 120 BYTE* op = ptr; 121 122 for (i = 0; i < size; i++) { 123 op[i] = dist[RAND(seed) % DISTSIZE]; 124 } 125} 126 127/* Generate a random distribution where the frequency of each symbol follows a 128 * geometric distribution defined by `weight` 129 * `dist` should have size at least `DISTSIZE` */ 130static void RAND_genDist(U32* seed, BYTE* dist, double weight) 131{ 132 size_t i = 0; 133 size_t statesLeft = DISTSIZE; 134 BYTE symb = (BYTE) (RAND(seed) % 256); 135 BYTE step = (BYTE) ((RAND(seed) % 256) | 1); /* force it to be odd so it's relatively prime to 256 */ 136 137 while (i < DISTSIZE) { 138 size_t states = ((size_t)(weight * statesLeft)) + 1; 139 size_t j; 140 for (j = 0; j < states && i < DISTSIZE; j++, i++) { 141 dist[i] = symb; 142 } 143 144 symb += step; 145 statesLeft -= states; 146 } 147} 148 149/* Generates a random number in the range [min, max) */ 150static inline U32 RAND_range(U32* seed, U32 min, U32 max) 151{ 152 return (RAND(seed) % (max-min)) + min; 153} 154 155#define ROUND(x) ((U32)(x + 0.5)) 156 157/* Generates a random number in an exponential distribution with mean `mean` */ 158static double RAND_exp(U32* seed, double mean) 159{ 160 double const u = RAND(seed) / (double) UINT_MAX; 161 return log(1-u) * (-mean); 162} 163 164/*-******************************************************* 165* Constants and Structs 166*********************************************************/ 167const char *BLOCK_TYPES[] = {"raw", "rle", "compressed"}; 168 169#define MAX_DECOMPRESSED_SIZE_LOG 20 170#define MAX_DECOMPRESSED_SIZE (1ULL << MAX_DECOMPRESSED_SIZE_LOG) 171 172#define MAX_WINDOW_LOG 22 /* Recommended support is 8MB, so limit to 4MB + mantissa */ 173 174#define MIN_SEQ_LEN (3) 175#define MAX_NB_SEQ ((ZSTD_BLOCKSIZE_MAX + MIN_SEQ_LEN - 1) / MIN_SEQ_LEN) 176 177BYTE CONTENT_BUFFER[MAX_DECOMPRESSED_SIZE]; 178BYTE FRAME_BUFFER[MAX_DECOMPRESSED_SIZE * 2]; 179BYTE LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX]; 180 181seqDef SEQUENCE_BUFFER[MAX_NB_SEQ]; 182BYTE SEQUENCE_LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX]; /* storeSeq expects a place to copy literals to */ 183BYTE SEQUENCE_LLCODE[ZSTD_BLOCKSIZE_MAX]; 184BYTE SEQUENCE_MLCODE[ZSTD_BLOCKSIZE_MAX]; 185BYTE SEQUENCE_OFCODE[ZSTD_BLOCKSIZE_MAX]; 186 187unsigned WKSP[1024]; 188 189typedef struct { 190 size_t contentSize; /* 0 means unknown (unless contentSize == windowSize == 0) */ 191 unsigned windowSize; /* contentSize >= windowSize means single segment */ 192} frameHeader_t; 193 194/* For repeat modes */ 195typedef struct { 196 U32 rep[ZSTD_REP_NUM]; 197 198 int hufInit; 199 /* the distribution used in the previous block for repeat mode */ 200 BYTE hufDist[DISTSIZE]; 201 U32 hufTable [256]; /* HUF_CElt is an incomplete type */ 202 203 int fseInit; 204 FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; 205 FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; 206 FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; 207 208 /* Symbols that were present in the previous distribution, for use with 209 * set_repeat */ 210 BYTE litlengthSymbolSet[36]; 211 BYTE offsetSymbolSet[29]; 212 BYTE matchlengthSymbolSet[53]; 213} cblockStats_t; 214 215typedef struct { 216 void* data; 217 void* dataStart; 218 void* dataEnd; 219 220 void* src; 221 void* srcStart; 222 void* srcEnd; 223 224 frameHeader_t header; 225 226 cblockStats_t stats; 227 cblockStats_t oldStats; /* so they can be rolled back if uncompressible */ 228} frame_t; 229 230typedef struct { 231 int useDict; 232 U32 dictID; 233 size_t dictContentSize; 234 BYTE* dictContent; 235} dictInfo; 236 237typedef enum { 238 gt_frame = 0, /* generate frames */ 239 gt_block, /* generate compressed blocks without block/frame headers */ 240} genType_e; 241 242/*-******************************************************* 243* Global variables (set from command line) 244*********************************************************/ 245U32 g_maxDecompressedSizeLog = MAX_DECOMPRESSED_SIZE_LOG; /* <= 20 */ 246U32 g_maxBlockSize = ZSTD_BLOCKSIZE_MAX; /* <= 128 KB */ 247 248/*-******************************************************* 249* Generator Functions 250*********************************************************/ 251 252struct { 253 int contentSize; /* force the content size to be present */ 254} opts; /* advanced options on generation */ 255 256/* Generate and write a random frame header */ 257static void writeFrameHeader(U32* seed, frame_t* frame, dictInfo info) 258{ 259 BYTE* const op = frame->data; 260 size_t pos = 0; 261 frameHeader_t fh; 262 263 BYTE windowByte = 0; 264 265 int singleSegment = 0; 266 int contentSizeFlag = 0; 267 int fcsCode = 0; 268 269 memset(&fh, 0, sizeof(fh)); 270 271 /* generate window size */ 272 { 273 /* Follow window algorithm from specification */ 274 int const exponent = RAND(seed) % (MAX_WINDOW_LOG - 10); 275 int const mantissa = RAND(seed) % 8; 276 windowByte = (BYTE) ((exponent << 3) | mantissa); 277 fh.windowSize = (1U << (exponent + 10)); 278 fh.windowSize += fh.windowSize / 8 * mantissa; 279 } 280 281 { 282 /* Generate random content size */ 283 size_t highBit; 284 if (RAND(seed) & 7 && g_maxDecompressedSizeLog > 7) { 285 /* do content of at least 128 bytes */ 286 highBit = 1ULL << RAND_range(seed, 7, g_maxDecompressedSizeLog); 287 } else if (RAND(seed) & 3) { 288 /* do small content */ 289 highBit = 1ULL << RAND_range(seed, 0, MIN(7, 1U << g_maxDecompressedSizeLog)); 290 } else { 291 /* 0 size frame */ 292 highBit = 0; 293 } 294 fh.contentSize = highBit ? highBit + (RAND(seed) % highBit) : 0; 295 296 /* provide size sometimes */ 297 contentSizeFlag = opts.contentSize | (RAND(seed) & 1); 298 299 if (contentSizeFlag && (fh.contentSize == 0 || !(RAND(seed) & 7))) { 300 /* do single segment sometimes */ 301 fh.windowSize = (U32) fh.contentSize; 302 singleSegment = 1; 303 } 304 } 305 306 if (contentSizeFlag) { 307 /* Determine how large fcs field has to be */ 308 int minFcsCode = (fh.contentSize >= 256) + 309 (fh.contentSize >= 65536 + 256) + 310 (fh.contentSize > 0xFFFFFFFFU); 311 if (!singleSegment && !minFcsCode) { 312 minFcsCode = 1; 313 } 314 fcsCode = minFcsCode + (RAND(seed) % (4 - minFcsCode)); 315 if (fcsCode == 1 && fh.contentSize < 256) fcsCode++; 316 } 317 318 /* write out the header */ 319 MEM_writeLE32(op + pos, ZSTD_MAGICNUMBER); 320 pos += 4; 321 322 { 323 /* 324 * fcsCode: 2-bit flag specifying how many bytes used to represent Frame_Content_Size (bits 7-6) 325 * singleSegment: 1-bit flag describing if data must be regenerated within a single continuous memory segment. (bit 5) 326 * contentChecksumFlag: 1-bit flag that is set if frame includes checksum at the end -- set to 1 below (bit 2) 327 * dictBits: 2-bit flag describing how many bytes Dictionary_ID uses -- set to 3 (bits 1-0) 328 * For more information: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_header 329 */ 330 int const dictBits = info.useDict ? 3 : 0; 331 BYTE const frameHeaderDescriptor = 332 (BYTE) ((fcsCode << 6) | (singleSegment << 5) | (1 << 2) | dictBits); 333 op[pos++] = frameHeaderDescriptor; 334 } 335 336 if (!singleSegment) { 337 op[pos++] = windowByte; 338 } 339 if (info.useDict) { 340 MEM_writeLE32(op + pos, (U32) info.dictID); 341 pos += 4; 342 } 343 if (contentSizeFlag) { 344 switch (fcsCode) { 345 default: /* Impossible */ 346 case 0: op[pos++] = (BYTE) fh.contentSize; break; 347 case 1: MEM_writeLE16(op + pos, (U16) (fh.contentSize - 256)); pos += 2; break; 348 case 2: MEM_writeLE32(op + pos, (U32) fh.contentSize); pos += 4; break; 349 case 3: MEM_writeLE64(op + pos, (U64) fh.contentSize); pos += 8; break; 350 } 351 } 352 353 DISPLAYLEVEL(3, " frame content size:\t%u\n", (U32)fh.contentSize); 354 DISPLAYLEVEL(3, " frame window size:\t%u\n", fh.windowSize); 355 DISPLAYLEVEL(3, " content size flag:\t%d\n", contentSizeFlag); 356 DISPLAYLEVEL(3, " single segment flag:\t%d\n", singleSegment); 357 358 frame->data = op + pos; 359 frame->header = fh; 360} 361 362/* Write a literal block in either raw or RLE form, return the literals size */ 363static size_t writeLiteralsBlockSimple(U32* seed, frame_t* frame, size_t contentSize) 364{ 365 BYTE* op = (BYTE*)frame->data; 366 int const type = RAND(seed) % 2; 367 int const sizeFormatDesc = RAND(seed) % 8; 368 size_t litSize; 369 size_t maxLitSize = MIN(contentSize, g_maxBlockSize); 370 371 if (sizeFormatDesc == 0) { 372 /* Size_FormatDesc = ?0 */ 373 maxLitSize = MIN(maxLitSize, 31); 374 } else if (sizeFormatDesc <= 4) { 375 /* Size_FormatDesc = 01 */ 376 maxLitSize = MIN(maxLitSize, 4095); 377 } else { 378 /* Size_Format = 11 */ 379 maxLitSize = MIN(maxLitSize, 1048575); 380 } 381 382 litSize = RAND(seed) % (maxLitSize + 1); 383 if (frame->src == frame->srcStart && litSize == 0) { 384 litSize = 1; /* no empty literals if there's nothing preceding this block */ 385 } 386 if (litSize + 3 > contentSize) { 387 litSize = contentSize; /* no matches shorter than 3 are allowed */ 388 } 389 /* use smallest size format that fits */ 390 if (litSize < 32) { 391 op[0] = (type | (0 << 2) | (litSize << 3)) & 0xff; 392 op += 1; 393 } else if (litSize < 4096) { 394 op[0] = (type | (1 << 2) | (litSize << 4)) & 0xff; 395 op[1] = (litSize >> 4) & 0xff; 396 op += 2; 397 } else { 398 op[0] = (type | (3 << 2) | (litSize << 4)) & 0xff; 399 op[1] = (litSize >> 4) & 0xff; 400 op[2] = (litSize >> 12) & 0xff; 401 op += 3; 402 } 403 404 if (type == 0) { 405 /* Raw literals */ 406 DISPLAYLEVEL(4, " raw literals\n"); 407 408 RAND_buffer(seed, LITERAL_BUFFER, litSize); 409 memcpy(op, LITERAL_BUFFER, litSize); 410 op += litSize; 411 } else { 412 /* RLE literals */ 413 BYTE const symb = (BYTE) (RAND(seed) % 256); 414 415 DISPLAYLEVEL(4, " rle literals: 0x%02x\n", (U32)symb); 416 417 memset(LITERAL_BUFFER, symb, litSize); 418 op[0] = symb; 419 op++; 420 } 421 422 frame->data = op; 423 424 return litSize; 425} 426 427/* Generate a Huffman header for the given source */ 428static size_t writeHufHeader(U32* seed, HUF_CElt* hufTable, void* dst, size_t dstSize, 429 const void* src, size_t srcSize) 430{ 431 BYTE* const ostart = (BYTE*)dst; 432 BYTE* op = ostart; 433 434 unsigned huffLog = 11; 435 U32 maxSymbolValue = 255; 436 437 U32 count[HUF_SYMBOLVALUE_MAX+1]; 438 439 /* Scan input and build symbol stats */ 440 { size_t const largest = FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, WKSP); 441 if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 0; } /* single symbol, rle */ 442 if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */ 443 } 444 445 /* Build Huffman Tree */ 446 /* Max Huffman log is 11, min is highbit(maxSymbolValue)+1 */ 447 huffLog = RAND_range(seed, ZSTD_highbit32(maxSymbolValue)+1, huffLog+1); 448 DISPLAYLEVEL(6, " huffman log: %u\n", huffLog); 449 { size_t const maxBits = HUF_buildCTable_wksp (hufTable, count, maxSymbolValue, huffLog, WKSP, sizeof(WKSP)); 450 CHECKERR(maxBits); 451 huffLog = (U32)maxBits; 452 } 453 454 /* Write table description header */ 455 { size_t const hSize = HUF_writeCTable (op, dstSize, hufTable, maxSymbolValue, huffLog); 456 if (hSize + 12 >= srcSize) return 0; /* not useful to try compression */ 457 op += hSize; 458 } 459 460 return op - ostart; 461} 462 463/* Write a Huffman coded literals block and return the literals size */ 464static size_t writeLiteralsBlockCompressed(U32* seed, frame_t* frame, size_t contentSize) 465{ 466 BYTE* origop = (BYTE*)frame->data; 467 BYTE* opend = (BYTE*)frame->dataEnd; 468 BYTE* op; 469 BYTE* const ostart = origop; 470 int const sizeFormat = RAND(seed) % 4; 471 size_t litSize; 472 size_t hufHeaderSize = 0; 473 size_t compressedSize = 0; 474 size_t maxLitSize = MIN(contentSize-3, g_maxBlockSize); 475 476 symbolEncodingType_e hType; 477 478 if (contentSize < 64) { 479 /* make sure we get reasonably-sized literals for compression */ 480 return ERROR(GENERIC); 481 } 482 483 DISPLAYLEVEL(4, " compressed literals\n"); 484 485 switch (sizeFormat) { 486 case 0: /* fall through, size is the same as case 1 */ 487 case 1: 488 maxLitSize = MIN(maxLitSize, 1023); 489 origop += 3; 490 break; 491 case 2: 492 maxLitSize = MIN(maxLitSize, 16383); 493 origop += 4; 494 break; 495 case 3: 496 maxLitSize = MIN(maxLitSize, 262143); 497 origop += 5; 498 break; 499 default:; /* impossible */ 500 } 501 502 do { 503 op = origop; 504 do { 505 litSize = RAND(seed) % (maxLitSize + 1); 506 } while (litSize < 32); /* avoid small literal sizes */ 507 if (litSize + 3 > contentSize) { 508 litSize = contentSize; /* no matches shorter than 3 are allowed */ 509 } 510 511 /* most of the time generate a new distribution */ 512 if ((RAND(seed) & 3) || !frame->stats.hufInit) { 513 do { 514 if (RAND(seed) & 3) { 515 /* add 10 to ensure some compressability */ 516 double const weight = ((RAND(seed) % 90) + 10) / 100.0; 517 518 DISPLAYLEVEL(5, " distribution weight: %d%%\n", 519 (int)(weight * 100)); 520 521 RAND_genDist(seed, frame->stats.hufDist, weight); 522 } else { 523 /* sometimes do restricted range literals to force 524 * non-huffman headers */ 525 DISPLAYLEVEL(5, " small range literals\n"); 526 RAND_bufferMaxSymb(seed, frame->stats.hufDist, DISTSIZE, 527 15); 528 } 529 RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER, 530 litSize); 531 532 /* generate the header from the distribution instead of the 533 * actual data to avoid bugs with symbols that were in the 534 * distribution but never showed up in the output */ 535 hufHeaderSize = writeHufHeader( 536 seed, (HUF_CElt*)frame->stats.hufTable, op, opend - op, 537 frame->stats.hufDist, DISTSIZE); 538 CHECKERR(hufHeaderSize); 539 /* repeat until a valid header is written */ 540 } while (hufHeaderSize == 0); 541 op += hufHeaderSize; 542 hType = set_compressed; 543 544 frame->stats.hufInit = 1; 545 } else { 546 /* repeat the distribution/table from last time */ 547 DISPLAYLEVEL(5, " huffman repeat stats\n"); 548 RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER, 549 litSize); 550 hufHeaderSize = 0; 551 hType = set_repeat; 552 } 553 554 do { 555 compressedSize = 556 sizeFormat == 0 557 ? HUF_compress1X_usingCTable( 558 op, opend - op, LITERAL_BUFFER, litSize, 559 (HUF_CElt*)frame->stats.hufTable) 560 : HUF_compress4X_usingCTable( 561 op, opend - op, LITERAL_BUFFER, litSize, 562 (HUF_CElt*)frame->stats.hufTable); 563 CHECKERR(compressedSize); 564 /* this only occurs when it could not compress or similar */ 565 } while (compressedSize <= 0); 566 567 op += compressedSize; 568 569 compressedSize += hufHeaderSize; 570 DISPLAYLEVEL(5, " regenerated size: %u\n", (U32)litSize); 571 DISPLAYLEVEL(5, " compressed size: %u\n", (U32)compressedSize); 572 if (compressedSize >= litSize) { 573 DISPLAYLEVEL(5, " trying again\n"); 574 /* if we have to try again, reset the stats so we don't accidentally 575 * try to repeat a distribution we just made */ 576 frame->stats = frame->oldStats; 577 } else { 578 break; 579 } 580 } while (1); 581 582 /* write header */ 583 switch (sizeFormat) { 584 case 0: /* fall through, size is the same as case 1 */ 585 case 1: { 586 U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) | 587 ((U32)compressedSize << 14); 588 MEM_writeLE24(ostart, header); 589 break; 590 } 591 case 2: { 592 U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) | 593 ((U32)compressedSize << 18); 594 MEM_writeLE32(ostart, header); 595 break; 596 } 597 case 3: { 598 U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) | 599 ((U32)compressedSize << 22); 600 MEM_writeLE32(ostart, header); 601 ostart[4] = (BYTE)(compressedSize >> 10); 602 break; 603 } 604 default:; /* impossible */ 605 } 606 607 frame->data = op; 608 return litSize; 609} 610 611static size_t writeLiteralsBlock(U32* seed, frame_t* frame, size_t contentSize) 612{ 613 /* only do compressed for larger segments to avoid compressibility issues */ 614 if (RAND(seed) & 7 && contentSize >= 64) { 615 return writeLiteralsBlockCompressed(seed, frame, contentSize); 616 } else { 617 return writeLiteralsBlockSimple(seed, frame, contentSize); 618 } 619} 620 621static inline void initSeqStore(seqStore_t *seqStore) { 622 seqStore->sequencesStart = SEQUENCE_BUFFER; 623 seqStore->litStart = SEQUENCE_LITERAL_BUFFER; 624 seqStore->llCode = SEQUENCE_LLCODE; 625 seqStore->mlCode = SEQUENCE_MLCODE; 626 seqStore->ofCode = SEQUENCE_OFCODE; 627 628 ZSTD_resetSeqStore(seqStore); 629} 630 631/* Randomly generate sequence commands */ 632static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore, 633 size_t contentSize, size_t literalsSize, dictInfo info) 634{ 635 /* The total length of all the matches */ 636 size_t const remainingMatch = contentSize - literalsSize; 637 size_t excessMatch = 0; 638 U32 numSequences = 0; 639 640 U32 i; 641 642 643 const BYTE* literals = LITERAL_BUFFER; 644 BYTE* srcPtr = frame->src; 645 646 if (literalsSize != contentSize) { 647 /* each match must be at least MIN_SEQ_LEN, so this is the maximum 648 * number of sequences we can have */ 649 U32 const maxSequences = (U32)remainingMatch / MIN_SEQ_LEN; 650 numSequences = (RAND(seed) % maxSequences) + 1; 651 652 /* the extra match lengths we have to allocate to each sequence */ 653 excessMatch = remainingMatch - numSequences * MIN_SEQ_LEN; 654 } 655 656 DISPLAYLEVEL(5, " total match lengths: %u\n", (U32)remainingMatch); 657 for (i = 0; i < numSequences; i++) { 658 /* Generate match and literal lengths by exponential distribution to 659 * ensure nice numbers */ 660 U32 matchLen = 661 MIN_SEQ_LEN + 662 ROUND(RAND_exp(seed, excessMatch / (double)(numSequences - i))); 663 U32 literalLen = 664 (RAND(seed) & 7) 665 ? ROUND(RAND_exp(seed, 666 literalsSize / 667 (double)(numSequences - i))) 668 : 0; 669 /* actual offset, code to send, and point to copy up to when shifting 670 * codes in the repeat offsets history */ 671 U32 offset, offsetCode, repIndex; 672 673 /* bounds checks */ 674 matchLen = (U32) MIN(matchLen, excessMatch + MIN_SEQ_LEN); 675 literalLen = MIN(literalLen, (U32) literalsSize); 676 if (i == 0 && srcPtr == frame->srcStart && literalLen == 0) literalLen = 1; 677 if (i + 1 == numSequences) matchLen = MIN_SEQ_LEN + (U32) excessMatch; 678 679 memcpy(srcPtr, literals, literalLen); 680 srcPtr += literalLen; 681 do { 682 if (RAND(seed) & 7) { 683 /* do a normal offset */ 684 U32 const dataDecompressed = (U32)((BYTE*)srcPtr-(BYTE*)frame->srcStart); 685 offset = (RAND(seed) % 686 MIN(frame->header.windowSize, 687 (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) + 688 1; 689 if (info.useDict && (RAND(seed) & 1) && i + 1 != numSequences && dataDecompressed < frame->header.windowSize) { 690 /* need to occasionally generate offsets that go past the start */ 691 /* including i+1 != numSequences because the last sequences has to adhere to predetermined contentSize */ 692 U32 lenPastStart = (RAND(seed) % info.dictContentSize) + 1; 693 offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart)+lenPastStart; 694 if (offset > frame->header.windowSize) { 695 if (lenPastStart < MIN_SEQ_LEN) { 696 /* when offset > windowSize, matchLen bound by end of dictionary (lenPastStart) */ 697 /* this also means that lenPastStart must be greater than MIN_SEQ_LEN */ 698 /* make sure lenPastStart does not go past dictionary start though */ 699 lenPastStart = MIN(lenPastStart+MIN_SEQ_LEN, (U32)info.dictContentSize); 700 offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) + lenPastStart; 701 } 702 { 703 U32 const matchLenBound = MIN(frame->header.windowSize, lenPastStart); 704 matchLen = MIN(matchLen, matchLenBound); 705 } 706 } 707 } 708 offsetCode = offset + ZSTD_REP_MOVE; 709 repIndex = 2; 710 } else { 711 /* do a repeat offset */ 712 offsetCode = RAND(seed) % 3; 713 if (literalLen > 0) { 714 offset = frame->stats.rep[offsetCode]; 715 repIndex = offsetCode; 716 } else { 717 /* special case */ 718 offset = offsetCode == 2 ? frame->stats.rep[0] - 1 719 : frame->stats.rep[offsetCode + 1]; 720 repIndex = MIN(2, offsetCode + 1); 721 } 722 } 723 } while (((!info.useDict) && (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) || offset == 0); 724 725 { 726 size_t j; 727 BYTE* const dictEnd = info.dictContent + info.dictContentSize; 728 for (j = 0; j < matchLen; j++) { 729 if ((U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) < offset) { 730 /* copy from dictionary instead of literals */ 731 size_t const dictOffset = offset - (srcPtr - (BYTE*)frame->srcStart); 732 *srcPtr = *(dictEnd - dictOffset); 733 } 734 else { 735 *srcPtr = *(srcPtr-offset); 736 } 737 srcPtr++; 738 } 739 } 740 741 { int r; 742 for (r = repIndex; r > 0; r--) { 743 frame->stats.rep[r] = frame->stats.rep[r - 1]; 744 } 745 frame->stats.rep[0] = offset; 746 } 747 748 DISPLAYLEVEL(6, " LL: %5u OF: %5u ML: %5u", literalLen, offset, matchLen); 749 DISPLAYLEVEL(7, " srcPos: %8u seqNb: %3u", 750 (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart), i); 751 DISPLAYLEVEL(6, "\n"); 752 if (offsetCode < 3) { 753 DISPLAYLEVEL(7, " repeat offset: %d\n", repIndex); 754 } 755 /* use libzstd sequence handling */ 756 ZSTD_storeSeq(seqStore, literalLen, literals, offsetCode, 757 matchLen - MINMATCH); 758 759 literalsSize -= literalLen; 760 excessMatch -= (matchLen - MIN_SEQ_LEN); 761 literals += literalLen; 762 } 763 764 memcpy(srcPtr, literals, literalsSize); 765 srcPtr += literalsSize; 766 DISPLAYLEVEL(6, " excess literals: %5u", (U32)literalsSize); 767 DISPLAYLEVEL(7, " srcPos: %8u", (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart)); 768 DISPLAYLEVEL(6, "\n"); 769 770 return numSequences; 771} 772 773static void initSymbolSet(const BYTE* symbols, size_t len, BYTE* set, BYTE maxSymbolValue) 774{ 775 size_t i; 776 777 memset(set, 0, (size_t)maxSymbolValue+1); 778 779 for (i = 0; i < len; i++) { 780 set[symbols[i]] = 1; 781 } 782} 783 784static int isSymbolSubset(const BYTE* symbols, size_t len, const BYTE* set, BYTE maxSymbolValue) 785{ 786 size_t i; 787 788 for (i = 0; i < len; i++) { 789 if (symbols[i] > maxSymbolValue || !set[symbols[i]]) { 790 return 0; 791 } 792 } 793 return 1; 794} 795 796static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr, 797 size_t nbSeq) 798{ 799 /* This code is mostly copied from ZSTD_compressSequences in zstd_compress.c */ 800 U32 count[MaxSeq+1]; 801 S16 norm[MaxSeq+1]; 802 FSE_CTable* CTable_LitLength = frame->stats.litlengthCTable; 803 FSE_CTable* CTable_OffsetBits = frame->stats.offcodeCTable; 804 FSE_CTable* CTable_MatchLength = frame->stats.matchlengthCTable; 805 U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ 806 const seqDef* const sequences = seqStorePtr->sequencesStart; 807 const BYTE* const ofCodeTable = seqStorePtr->ofCode; 808 const BYTE* const llCodeTable = seqStorePtr->llCode; 809 const BYTE* const mlCodeTable = seqStorePtr->mlCode; 810 BYTE* const oend = (BYTE*)frame->dataEnd; 811 BYTE* op = (BYTE*)frame->data; 812 BYTE* seqHead; 813 BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)]; 814 815 /* literals compressing block removed so that can be done separately */ 816 817 /* Sequences Header */ 818 if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall); 819 if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq; 820 else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; 821 else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; 822 823 /* seqHead : flags for FSE encoding type */ 824 seqHead = op++; 825 826 if (nbSeq==0) { 827 frame->data = op; 828 829 return 0; 830 } 831 832 /* convert length/distances into codes */ 833 ZSTD_seqToCodes(seqStorePtr); 834 835 /* CTable for Literal Lengths */ 836 { U32 max = MaxLL; 837 size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP); 838 if (mostFrequent == nbSeq) { 839 /* do RLE if we have the chance */ 840 *op++ = llCodeTable[0]; 841 FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); 842 LLtype = set_rle; 843 } else if (frame->stats.fseInit && !(RAND(seed) & 3) && 844 isSymbolSubset(llCodeTable, nbSeq, 845 frame->stats.litlengthSymbolSet, 35)) { 846 /* maybe do repeat mode if we're allowed to */ 847 LLtype = set_repeat; 848 } else if (!(RAND(seed) & 3)) { 849 /* maybe use the default distribution */ 850 FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); 851 LLtype = set_basic; 852 } else { 853 /* fall back on a full table */ 854 size_t nbSeq_1 = nbSeq; 855 const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); 856 if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } 857 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); 858 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ 859 if (FSE_isError(NCountSize)) return ERROR(GENERIC); 860 op += NCountSize; } 861 FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); 862 LLtype = set_compressed; 863 } } 864 865 /* CTable for Offsets */ 866 /* see Literal Lengths for descriptions of mode choices */ 867 { U32 max = MaxOff; 868 size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP); 869 if (mostFrequent == nbSeq) { 870 *op++ = ofCodeTable[0]; 871 FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); 872 Offtype = set_rle; 873 } else if (frame->stats.fseInit && !(RAND(seed) & 3) && 874 isSymbolSubset(ofCodeTable, nbSeq, 875 frame->stats.offsetSymbolSet, 28)) { 876 Offtype = set_repeat; 877 } else if (!(RAND(seed) & 3)) { 878 FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); 879 Offtype = set_basic; 880 } else { 881 size_t nbSeq_1 = nbSeq; 882 const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); 883 if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; } 884 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); 885 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ 886 if (FSE_isError(NCountSize)) return ERROR(GENERIC); 887 op += NCountSize; } 888 FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); 889 Offtype = set_compressed; 890 } } 891 892 /* CTable for MatchLengths */ 893 /* see Literal Lengths for descriptions of mode choices */ 894 { U32 max = MaxML; 895 size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP); 896 if (mostFrequent == nbSeq) { 897 *op++ = *mlCodeTable; 898 FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); 899 MLtype = set_rle; 900 } else if (frame->stats.fseInit && !(RAND(seed) & 3) && 901 isSymbolSubset(mlCodeTable, nbSeq, 902 frame->stats.matchlengthSymbolSet, 52)) { 903 MLtype = set_repeat; 904 } else if (!(RAND(seed) & 3)) { 905 /* sometimes do default distribution */ 906 FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); 907 MLtype = set_basic; 908 } else { 909 /* fall back on table */ 910 size_t nbSeq_1 = nbSeq; 911 const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); 912 if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; } 913 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); 914 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ 915 if (FSE_isError(NCountSize)) return ERROR(GENERIC); 916 op += NCountSize; } 917 FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); 918 MLtype = set_compressed; 919 } } 920 frame->stats.fseInit = 1; 921 initSymbolSet(llCodeTable, nbSeq, frame->stats.litlengthSymbolSet, 35); 922 initSymbolSet(ofCodeTable, nbSeq, frame->stats.offsetSymbolSet, 28); 923 initSymbolSet(mlCodeTable, nbSeq, frame->stats.matchlengthSymbolSet, 52); 924 925 DISPLAYLEVEL(5, " LL type: %d OF type: %d ML type: %d\n", LLtype, Offtype, MLtype); 926 927 *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); 928 929 /* Encoding Sequences */ 930 { BIT_CStream_t blockStream; 931 FSE_CState_t stateMatchLength; 932 FSE_CState_t stateOffsetBits; 933 FSE_CState_t stateLitLength; 934 935 CHECK_E(BIT_initCStream(&blockStream, op, oend-op), dstSize_tooSmall); /* not enough space remaining */ 936 937 /* first symbols */ 938 FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); 939 FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); 940 FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); 941 BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); 942 if (MEM_32bits()) BIT_flushBits(&blockStream); 943 BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); 944 if (MEM_32bits()) BIT_flushBits(&blockStream); 945 BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); 946 BIT_flushBits(&blockStream); 947 948 { size_t n; 949 for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */ 950 BYTE const llCode = llCodeTable[n]; 951 BYTE const ofCode = ofCodeTable[n]; 952 BYTE const mlCode = mlCodeTable[n]; 953 U32 const llBits = LL_bits[llCode]; 954 U32 const ofBits = ofCode; /* 32b*/ /* 64b*/ 955 U32 const mlBits = ML_bits[mlCode]; 956 /* (7)*/ /* (7)*/ 957 FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ 958 FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */ 959 if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ 960 FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */ 961 if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog))) 962 BIT_flushBits(&blockStream); /* (7)*/ 963 BIT_addBits(&blockStream, sequences[n].litLength, llBits); 964 if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); 965 BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); 966 if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ 967 BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ 968 BIT_flushBits(&blockStream); /* (7)*/ 969 } } 970 971 FSE_flushCState(&blockStream, &stateMatchLength); 972 FSE_flushCState(&blockStream, &stateOffsetBits); 973 FSE_flushCState(&blockStream, &stateLitLength); 974 975 { size_t const streamSize = BIT_closeCStream(&blockStream); 976 if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ 977 op += streamSize; 978 } } 979 980 frame->data = op; 981 982 return 0; 983} 984 985static size_t writeSequencesBlock(U32* seed, frame_t* frame, size_t contentSize, 986 size_t literalsSize, dictInfo info) 987{ 988 seqStore_t seqStore; 989 size_t numSequences; 990 991 992 initSeqStore(&seqStore); 993 994 /* randomly generate sequences */ 995 numSequences = generateSequences(seed, frame, &seqStore, contentSize, literalsSize, info); 996 /* write them out to the frame data */ 997 CHECKERR(writeSequences(seed, frame, &seqStore, numSequences)); 998 999 return numSequences; 1000} 1001 1002static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize, dictInfo info) 1003{ 1004 BYTE* const blockStart = (BYTE*)frame->data; 1005 size_t literalsSize; 1006 size_t nbSeq; 1007 1008 DISPLAYLEVEL(4, " compressed block:\n"); 1009 1010 literalsSize = writeLiteralsBlock(seed, frame, contentSize); 1011 1012 DISPLAYLEVEL(4, " literals size: %u\n", (U32)literalsSize); 1013 1014 nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize, info); 1015 1016 DISPLAYLEVEL(4, " number of sequences: %u\n", (U32)nbSeq); 1017 1018 return (BYTE*)frame->data - blockStart; 1019} 1020 1021static void writeBlock(U32* seed, frame_t* frame, size_t contentSize, 1022 int lastBlock, dictInfo info) 1023{ 1024 int const blockTypeDesc = RAND(seed) % 8; 1025 size_t blockSize; 1026 int blockType; 1027 1028 BYTE *const header = (BYTE*)frame->data; 1029 BYTE *op = header + 3; 1030 1031 DISPLAYLEVEL(4, " block:\n"); 1032 DISPLAYLEVEL(4, " block content size: %u\n", (U32)contentSize); 1033 DISPLAYLEVEL(4, " last block: %s\n", lastBlock ? "yes" : "no"); 1034 1035 if (blockTypeDesc == 0) { 1036 /* Raw data frame */ 1037 1038 RAND_buffer(seed, frame->src, contentSize); 1039 memcpy(op, frame->src, contentSize); 1040 1041 op += contentSize; 1042 blockType = 0; 1043 blockSize = contentSize; 1044 } else if (blockTypeDesc == 1) { 1045 /* RLE */ 1046 BYTE const symbol = RAND(seed) & 0xff; 1047 1048 op[0] = symbol; 1049 memset(frame->src, symbol, contentSize); 1050 1051 op++; 1052 blockType = 1; 1053 blockSize = contentSize; 1054 } else { 1055 /* compressed, most common */ 1056 size_t compressedSize; 1057 blockType = 2; 1058 1059 frame->oldStats = frame->stats; 1060 1061 frame->data = op; 1062 compressedSize = writeCompressedBlock(seed, frame, contentSize, info); 1063 if (compressedSize >= contentSize) { /* compressed block must be strictly smaller than uncompressed one */ 1064 blockType = 0; 1065 memcpy(op, frame->src, contentSize); 1066 1067 op += contentSize; 1068 blockSize = contentSize; /* fall back on raw block if data doesn't 1069 compress */ 1070 1071 frame->stats = frame->oldStats; /* don't update the stats */ 1072 } else { 1073 op += compressedSize; 1074 blockSize = compressedSize; 1075 } 1076 } 1077 frame->src = (BYTE*)frame->src + contentSize; 1078 1079 DISPLAYLEVEL(4, " block type: %s\n", BLOCK_TYPES[blockType]); 1080 DISPLAYLEVEL(4, " block size field: %u\n", (U32)blockSize); 1081 1082 header[0] = (BYTE) ((lastBlock | (blockType << 1) | (blockSize << 3)) & 0xff); 1083 MEM_writeLE16(header + 1, (U16) (blockSize >> 5)); 1084 1085 frame->data = op; 1086} 1087 1088static void writeBlocks(U32* seed, frame_t* frame, dictInfo info) 1089{ 1090 size_t contentLeft = frame->header.contentSize; 1091 size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize); 1092 while (1) { 1093 /* 1 in 4 chance of ending frame */ 1094 int const lastBlock = contentLeft > maxBlockSize ? 0 : !(RAND(seed) & 3); 1095 size_t blockContentSize; 1096 if (lastBlock) { 1097 blockContentSize = contentLeft; 1098 } else { 1099 if (contentLeft > 0 && (RAND(seed) & 7)) { 1100 /* some variable size block */ 1101 blockContentSize = RAND(seed) % (MIN(maxBlockSize, contentLeft)+1); 1102 } else if (contentLeft > maxBlockSize && (RAND(seed) & 1)) { 1103 /* some full size block */ 1104 blockContentSize = maxBlockSize; 1105 } else { 1106 /* some empty block */ 1107 blockContentSize = 0; 1108 } 1109 } 1110 1111 writeBlock(seed, frame, blockContentSize, lastBlock, info); 1112 1113 contentLeft -= blockContentSize; 1114 if (lastBlock) break; 1115 } 1116} 1117 1118static void writeChecksum(frame_t* frame) 1119{ 1120 /* write checksum so implementations can verify their output */ 1121 U64 digest = XXH64(frame->srcStart, (BYTE*)frame->src-(BYTE*)frame->srcStart, 0); 1122 DISPLAYLEVEL(3, " checksum: %08x\n", (U32)digest); 1123 MEM_writeLE32(frame->data, (U32)digest); 1124 frame->data = (BYTE*)frame->data + 4; 1125} 1126 1127static void outputBuffer(const void* buf, size_t size, const char* const path) 1128{ 1129 /* write data out to file */ 1130 const BYTE* ip = (const BYTE*)buf; 1131 FILE* out; 1132 if (path) { 1133 out = fopen(path, "wb"); 1134 } else { 1135 out = stdout; 1136 } 1137 if (!out) { 1138 fprintf(stderr, "Failed to open file at %s: ", path); 1139 perror(NULL); 1140 exit(1); 1141 } 1142 1143 { size_t fsize = size; 1144 size_t written = 0; 1145 while (written < fsize) { 1146 written += fwrite(ip + written, 1, fsize - written, out); 1147 if (ferror(out)) { 1148 fprintf(stderr, "Failed to write to file at %s: ", path); 1149 perror(NULL); 1150 exit(1); 1151 } 1152 } 1153 } 1154 1155 if (path) { 1156 fclose(out); 1157 } 1158} 1159 1160static void initFrame(frame_t* fr) 1161{ 1162 memset(fr, 0, sizeof(*fr)); 1163 fr->data = fr->dataStart = FRAME_BUFFER; 1164 fr->dataEnd = FRAME_BUFFER + sizeof(FRAME_BUFFER); 1165 fr->src = fr->srcStart = CONTENT_BUFFER; 1166 fr->srcEnd = CONTENT_BUFFER + sizeof(CONTENT_BUFFER); 1167 1168 /* init repeat codes */ 1169 fr->stats.rep[0] = 1; 1170 fr->stats.rep[1] = 4; 1171 fr->stats.rep[2] = 8; 1172} 1173 1174/** 1175 * Generated a single zstd compressed block with no block/frame header. 1176 * Returns the final seed. 1177 */ 1178static U32 generateCompressedBlock(U32 seed, frame_t* frame, dictInfo info) 1179{ 1180 size_t blockContentSize; 1181 int blockWritten = 0; 1182 BYTE* op; 1183 DISPLAYLEVEL(4, "block seed: %u\n", seed); 1184 initFrame(frame); 1185 op = (BYTE*)frame->data; 1186 1187 while (!blockWritten) { 1188 size_t cSize; 1189 /* generate window size */ 1190 { int const exponent = RAND(&seed) % (MAX_WINDOW_LOG - 10); 1191 int const mantissa = RAND(&seed) % 8; 1192 frame->header.windowSize = (1U << (exponent + 10)); 1193 frame->header.windowSize += (frame->header.windowSize / 8) * mantissa; 1194 } 1195 1196 /* generate content size */ 1197 { size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize); 1198 if (RAND(&seed) & 15) { 1199 /* some full size blocks */ 1200 blockContentSize = maxBlockSize; 1201 } else if (RAND(&seed) & 7 && g_maxBlockSize >= (1U << 7)) { 1202 /* some small blocks <= 128 bytes*/ 1203 blockContentSize = RAND(&seed) % (1U << 7); 1204 } else { 1205 /* some variable size blocks */ 1206 blockContentSize = RAND(&seed) % maxBlockSize; 1207 } 1208 } 1209 1210 /* try generating a compressed block */ 1211 frame->oldStats = frame->stats; 1212 frame->data = op; 1213 cSize = writeCompressedBlock(&seed, frame, blockContentSize, info); 1214 if (cSize >= blockContentSize) { /* compressed size must be strictly smaller than decompressed size : https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#blocks */ 1215 /* data doesn't compress -- try again */ 1216 frame->stats = frame->oldStats; /* don't update the stats */ 1217 DISPLAYLEVEL(5, " can't compress block : try again \n"); 1218 } else { 1219 blockWritten = 1; 1220 DISPLAYLEVEL(4, " block size: %u \n", (U32)cSize); 1221 frame->src = (BYTE*)frame->src + blockContentSize; 1222 } 1223 } 1224 return seed; 1225} 1226 1227/* Return the final seed */ 1228static U32 generateFrame(U32 seed, frame_t* fr, dictInfo info) 1229{ 1230 /* generate a complete frame */ 1231 DISPLAYLEVEL(3, "frame seed: %u\n", seed); 1232 initFrame(fr); 1233 1234 writeFrameHeader(&seed, fr, info); 1235 writeBlocks(&seed, fr, info); 1236 writeChecksum(fr); 1237 1238 return seed; 1239} 1240 1241/*_******************************************************* 1242* Dictionary Helper Functions 1243*********************************************************/ 1244/* returns 0 if successful, otherwise returns 1 upon error */ 1245static int genRandomDict(U32 dictID, U32 seed, size_t dictSize, BYTE* fullDict) 1246{ 1247 /* allocate space for samples */ 1248 int ret = 0; 1249 unsigned const numSamples = 4; 1250 size_t sampleSizes[4]; 1251 BYTE* const samples = malloc(5000*sizeof(BYTE)); 1252 if (samples == NULL) { 1253 DISPLAY("Error: could not allocate space for samples\n"); 1254 return 1; 1255 } 1256 1257 /* generate samples */ 1258 { unsigned literalValue = 1; 1259 unsigned samplesPos = 0; 1260 size_t currSize = 1; 1261 while (literalValue <= 4) { 1262 sampleSizes[literalValue - 1] = currSize; 1263 { size_t k; 1264 for (k = 0; k < currSize; k++) { 1265 *(samples + (samplesPos++)) = (BYTE)literalValue; 1266 } } 1267 literalValue++; 1268 currSize *= 16; 1269 } } 1270 1271 { size_t dictWriteSize = 0; 1272 ZDICT_params_t zdictParams; 1273 size_t const headerSize = MAX(dictSize/4, 256); 1274 size_t const dictContentSize = dictSize - headerSize; 1275 BYTE* const dictContent = fullDict + headerSize; 1276 if (dictContentSize < ZDICT_CONTENTSIZE_MIN || dictSize < ZDICT_DICTSIZE_MIN) { 1277 DISPLAY("Error: dictionary size is too small\n"); 1278 ret = 1; 1279 goto exitGenRandomDict; 1280 } 1281 1282 /* init dictionary params */ 1283 memset(&zdictParams, 0, sizeof(zdictParams)); 1284 zdictParams.dictID = dictID; 1285 zdictParams.notificationLevel = 1; 1286 1287 /* fill in dictionary content */ 1288 RAND_buffer(&seed, (void*)dictContent, dictContentSize); 1289 1290 /* finalize dictionary with random samples */ 1291 dictWriteSize = ZDICT_finalizeDictionary(fullDict, dictSize, 1292 dictContent, dictContentSize, 1293 samples, sampleSizes, numSamples, 1294 zdictParams); 1295 1296 if (ZDICT_isError(dictWriteSize)) { 1297 DISPLAY("Could not finalize dictionary: %s\n", ZDICT_getErrorName(dictWriteSize)); 1298 ret = 1; 1299 } 1300 } 1301 1302exitGenRandomDict: 1303 free(samples); 1304 return ret; 1305} 1306 1307static dictInfo initDictInfo(int useDict, size_t dictContentSize, BYTE* dictContent, U32 dictID){ 1308 /* allocate space statically */ 1309 dictInfo dictOp; 1310 memset(&dictOp, 0, sizeof(dictOp)); 1311 dictOp.useDict = useDict; 1312 dictOp.dictContentSize = dictContentSize; 1313 dictOp.dictContent = dictContent; 1314 dictOp.dictID = dictID; 1315 return dictOp; 1316} 1317 1318/*-******************************************************* 1319* Test Mode 1320*********************************************************/ 1321 1322BYTE DECOMPRESSED_BUFFER[MAX_DECOMPRESSED_SIZE]; 1323 1324static size_t testDecodeSimple(frame_t* fr) 1325{ 1326 /* test decoding the generated data with the simple API */ 1327 size_t const ret = ZSTD_decompress(DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, 1328 fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart); 1329 1330 if (ZSTD_isError(ret)) return ret; 1331 1332 if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart, 1333 (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) { 1334 return ERROR(corruption_detected); 1335 } 1336 1337 return ret; 1338} 1339 1340static size_t testDecodeStreaming(frame_t* fr) 1341{ 1342 /* test decoding the generated data with the streaming API */ 1343 ZSTD_DStream* zd = ZSTD_createDStream(); 1344 ZSTD_inBuffer in; 1345 ZSTD_outBuffer out; 1346 size_t ret; 1347 1348 if (!zd) return ERROR(memory_allocation); 1349 1350 in.src = fr->dataStart; 1351 in.pos = 0; 1352 in.size = (BYTE*)fr->data - (BYTE*)fr->dataStart; 1353 1354 out.dst = DECOMPRESSED_BUFFER; 1355 out.pos = 0; 1356 out.size = ZSTD_DStreamOutSize(); 1357 1358 ZSTD_initDStream(zd); 1359 while (1) { 1360 ret = ZSTD_decompressStream(zd, &out, &in); 1361 if (ZSTD_isError(ret)) goto cleanup; /* error */ 1362 if (ret == 0) break; /* frame is done */ 1363 1364 /* force decoding to be done in chunks */ 1365 out.size += MIN(ZSTD_DStreamOutSize(), MAX_DECOMPRESSED_SIZE - out.size); 1366 } 1367 1368 ret = out.pos; 1369 1370 if (memcmp(out.dst, fr->srcStart, out.pos) != 0) { 1371 return ERROR(corruption_detected); 1372 } 1373 1374cleanup: 1375 ZSTD_freeDStream(zd); 1376 return ret; 1377} 1378 1379static size_t testDecodeWithDict(U32 seed, genType_e genType) 1380{ 1381 /* create variables */ 1382 size_t const dictSize = RAND(&seed) % (10 << 20) + ZDICT_DICTSIZE_MIN + ZDICT_CONTENTSIZE_MIN; 1383 U32 const dictID = RAND(&seed); 1384 size_t errorDetected = 0; 1385 BYTE* const fullDict = malloc(dictSize); 1386 if (fullDict == NULL) { 1387 return ERROR(GENERIC); 1388 } 1389 1390 /* generate random dictionary */ 1391 if (genRandomDict(dictID, seed, dictSize, fullDict)) { /* return 0 on success */ 1392 errorDetected = ERROR(GENERIC); 1393 goto dictTestCleanup; 1394 } 1395 1396 1397 { frame_t fr; 1398 dictInfo info; 1399 ZSTD_DCtx* const dctx = ZSTD_createDCtx(); 1400 size_t ret; 1401 1402 /* get dict info */ 1403 { size_t const headerSize = MAX(dictSize/4, 256); 1404 size_t const dictContentSize = dictSize-headerSize; 1405 BYTE* const dictContent = fullDict+headerSize; 1406 info = initDictInfo(1, dictContentSize, dictContent, dictID); 1407 } 1408 1409 /* manually decompress and check difference */ 1410 if (genType == gt_frame) { 1411 /* Test frame */ 1412 generateFrame(seed, &fr, info); 1413 ret = ZSTD_decompress_usingDict(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, 1414 fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, 1415 fullDict, dictSize); 1416 } else { 1417 /* Test block */ 1418 generateCompressedBlock(seed, &fr, info); 1419 ret = ZSTD_decompressBegin_usingDict(dctx, fullDict, dictSize); 1420 if (ZSTD_isError(ret)) { 1421 errorDetected = ret; 1422 ZSTD_freeDCtx(dctx); 1423 goto dictTestCleanup; 1424 } 1425 ret = ZSTD_decompressBlock(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, 1426 fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart); 1427 } 1428 ZSTD_freeDCtx(dctx); 1429 1430 if (ZSTD_isError(ret)) { 1431 errorDetected = ret; 1432 goto dictTestCleanup; 1433 } 1434 1435 if (memcmp(DECOMPRESSED_BUFFER, fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart) != 0) { 1436 errorDetected = ERROR(corruption_detected); 1437 goto dictTestCleanup; 1438 } 1439 } 1440 1441dictTestCleanup: 1442 free(fullDict); 1443 return errorDetected; 1444} 1445 1446static size_t testDecodeRawBlock(frame_t* fr) 1447{ 1448 ZSTD_DCtx* dctx = ZSTD_createDCtx(); 1449 size_t ret = ZSTD_decompressBegin(dctx); 1450 if (ZSTD_isError(ret)) return ret; 1451 1452 ret = ZSTD_decompressBlock( 1453 dctx, 1454 DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, 1455 fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart); 1456 ZSTD_freeDCtx(dctx); 1457 if (ZSTD_isError(ret)) return ret; 1458 1459 if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart, 1460 (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) { 1461 return ERROR(corruption_detected); 1462 } 1463 1464 return ret; 1465} 1466 1467static int runBlockTest(U32* seed) 1468{ 1469 frame_t fr; 1470 U32 const seedCopy = *seed; 1471 { dictInfo const info = initDictInfo(0, 0, NULL, 0); 1472 *seed = generateCompressedBlock(*seed, &fr, info); 1473 } 1474 1475 { size_t const r = testDecodeRawBlock(&fr); 1476 if (ZSTD_isError(r)) { 1477 DISPLAY("Error in block mode on test seed %u: %s\n", seedCopy, 1478 ZSTD_getErrorName(r)); 1479 return 1; 1480 } 1481 } 1482 1483 { size_t const r = testDecodeWithDict(*seed, gt_block); 1484 if (ZSTD_isError(r)) { 1485 DISPLAY("Error in block mode with dictionary on test seed %u: %s\n", 1486 seedCopy, ZSTD_getErrorName(r)); 1487 return 1; 1488 } 1489 } 1490 return 0; 1491} 1492 1493static int runFrameTest(U32* seed) 1494{ 1495 frame_t fr; 1496 U32 const seedCopy = *seed; 1497 { dictInfo const info = initDictInfo(0, 0, NULL, 0); 1498 *seed = generateFrame(*seed, &fr, info); 1499 } 1500 1501 { size_t const r = testDecodeSimple(&fr); 1502 if (ZSTD_isError(r)) { 1503 DISPLAY("Error in simple mode on test seed %u: %s\n", 1504 seedCopy, ZSTD_getErrorName(r)); 1505 return 1; 1506 } 1507 } 1508 { size_t const r = testDecodeStreaming(&fr); 1509 if (ZSTD_isError(r)) { 1510 DISPLAY("Error in streaming mode on test seed %u: %s\n", 1511 seedCopy, ZSTD_getErrorName(r)); 1512 return 1; 1513 } 1514 } 1515 { size_t const r = testDecodeWithDict(*seed, gt_frame); /* avoid big dictionaries */ 1516 if (ZSTD_isError(r)) { 1517 DISPLAY("Error in dictionary mode on test seed %u: %s\n", 1518 seedCopy, ZSTD_getErrorName(r)); 1519 return 1; 1520 } 1521 } 1522 return 0; 1523} 1524 1525static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS, 1526 genType_e genType) 1527{ 1528 unsigned fnum; 1529 1530 UTIL_time_t const startClock = UTIL_getTime(); 1531 U64 const maxClockSpan = testDurationS * SEC_TO_MICRO; 1532 1533 if (numFiles == 0 && !testDurationS) numFiles = 1; 1534 1535 DISPLAY("seed: %u\n", seed); 1536 1537 for (fnum = 0; fnum < numFiles || UTIL_clockSpanMicro(startClock) < maxClockSpan; fnum++) { 1538 if (fnum < numFiles) 1539 DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); 1540 else 1541 DISPLAYUPDATE("\r%u ", fnum); 1542 1543 { int const ret = (genType == gt_frame) ? 1544 runFrameTest(&seed) : 1545 runBlockTest(&seed); 1546 if (ret) return ret; 1547 } 1548 } 1549 1550 DISPLAY("\r%u tests completed: ", fnum); 1551 DISPLAY("OK\n"); 1552 1553 return 0; 1554} 1555 1556/*-******************************************************* 1557* File I/O 1558*********************************************************/ 1559 1560static int generateFile(U32 seed, const char* const path, 1561 const char* const origPath, genType_e genType) 1562{ 1563 frame_t fr; 1564 1565 DISPLAY("seed: %u\n", seed); 1566 1567 { dictInfo const info = initDictInfo(0, 0, NULL, 0); 1568 if (genType == gt_frame) { 1569 generateFrame(seed, &fr, info); 1570 } else { 1571 generateCompressedBlock(seed, &fr, info); 1572 } 1573 } 1574 outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path); 1575 if (origPath) { 1576 outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath); 1577 } 1578 return 0; 1579} 1580 1581static int generateCorpus(U32 seed, unsigned numFiles, const char* const path, 1582 const char* const origPath, genType_e genType) 1583{ 1584 char outPath[MAX_PATH]; 1585 unsigned fnum; 1586 1587 DISPLAY("seed: %u\n", seed); 1588 1589 for (fnum = 0; fnum < numFiles; fnum++) { 1590 frame_t fr; 1591 1592 DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); 1593 1594 { dictInfo const info = initDictInfo(0, 0, NULL, 0); 1595 if (genType == gt_frame) { 1596 seed = generateFrame(seed, &fr, info); 1597 } else { 1598 seed = generateCompressedBlock(seed, &fr, info); 1599 } 1600 } 1601 1602 if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) { 1603 DISPLAY("Error: path too long\n"); 1604 return 1; 1605 } 1606 outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath); 1607 1608 if (origPath) { 1609 if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) { 1610 DISPLAY("Error: path too long\n"); 1611 return 1; 1612 } 1613 outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath); 1614 } 1615 } 1616 1617 DISPLAY("\r%u/%u \n", fnum, numFiles); 1618 1619 return 0; 1620} 1621 1622static int generateCorpusWithDict(U32 seed, unsigned numFiles, const char* const path, 1623 const char* const origPath, const size_t dictSize, 1624 genType_e genType) 1625{ 1626 char outPath[MAX_PATH]; 1627 BYTE* fullDict; 1628 U32 const dictID = RAND(&seed); 1629 int errorDetected = 0; 1630 1631 if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) { 1632 DISPLAY("Error: path too long\n"); 1633 return 1; 1634 } 1635 1636 /* allocate space for the dictionary */ 1637 fullDict = malloc(dictSize); 1638 if (fullDict == NULL) { 1639 DISPLAY("Error: could not allocate space for full dictionary.\n"); 1640 return 1; 1641 } 1642 1643 /* randomly generate the dictionary */ 1644 { int const ret = genRandomDict(dictID, seed, dictSize, fullDict); 1645 if (ret != 0) { 1646 errorDetected = ret; 1647 goto dictCleanup; 1648 } 1649 } 1650 1651 /* write out dictionary */ 1652 if (numFiles != 0) { 1653 if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) { 1654 DISPLAY("Error: dictionary path too long\n"); 1655 errorDetected = 1; 1656 goto dictCleanup; 1657 } 1658 outputBuffer(fullDict, dictSize, outPath); 1659 } 1660 else { 1661 outputBuffer(fullDict, dictSize, "dictionary"); 1662 } 1663 1664 /* generate random compressed/decompressed files */ 1665 { unsigned fnum; 1666 for (fnum = 0; fnum < MAX(numFiles, 1); fnum++) { 1667 frame_t fr; 1668 DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); 1669 { 1670 size_t const headerSize = MAX(dictSize/4, 256); 1671 size_t const dictContentSize = dictSize-headerSize; 1672 BYTE* const dictContent = fullDict+headerSize; 1673 dictInfo const info = initDictInfo(1, dictContentSize, dictContent, dictID); 1674 if (genType == gt_frame) { 1675 seed = generateFrame(seed, &fr, info); 1676 } else { 1677 seed = generateCompressedBlock(seed, &fr, info); 1678 } 1679 } 1680 1681 if (numFiles != 0) { 1682 if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) { 1683 DISPLAY("Error: path too long\n"); 1684 errorDetected = 1; 1685 goto dictCleanup; 1686 } 1687 outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath); 1688 1689 if (origPath) { 1690 if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) { 1691 DISPLAY("Error: path too long\n"); 1692 errorDetected = 1; 1693 goto dictCleanup; 1694 } 1695 outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath); 1696 } 1697 } 1698 else { 1699 outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path); 1700 if (origPath) { 1701 outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath); 1702 } 1703 } 1704 } 1705 } 1706 1707dictCleanup: 1708 free(fullDict); 1709 return errorDetected; 1710} 1711 1712 1713/*_******************************************************* 1714* Command line 1715*********************************************************/ 1716static U32 makeSeed(void) 1717{ 1718 U32 t = (U32) time(NULL); 1719 return XXH32(&t, sizeof(t), 0) % 65536; 1720} 1721 1722static unsigned readInt(const char** argument) 1723{ 1724 unsigned val = 0; 1725 while ((**argument>='0') && (**argument<='9')) { 1726 val *= 10; 1727 val += **argument - '0'; 1728 (*argument)++; 1729 } 1730 return val; 1731} 1732 1733static void usage(const char* programName) 1734{ 1735 DISPLAY( "Usage :\n"); 1736 DISPLAY( " %s [args]\n", programName); 1737 DISPLAY( "\n"); 1738 DISPLAY( "Arguments :\n"); 1739 DISPLAY( " -p<path> : select output path (default:stdout)\n"); 1740 DISPLAY( " in multiple files mode this should be a directory\n"); 1741 DISPLAY( " -o<path> : select path to output original file (default:no output)\n"); 1742 DISPLAY( " in multiple files mode this should be a directory\n"); 1743 DISPLAY( " -s# : select seed (default:random based on time)\n"); 1744 DISPLAY( " -n# : number of files to generate (default:1)\n"); 1745 DISPLAY( " -t : activate test mode (test files against libzstd instead of outputting them)\n"); 1746 DISPLAY( " -T# : length of time to run tests for\n"); 1747 DISPLAY( " -v : increase verbosity level (default:0, max:7)\n"); 1748 DISPLAY( " -h/H : display help/long help and exit\n"); 1749} 1750 1751static void advancedUsage(const char* programName) 1752{ 1753 usage(programName); 1754 DISPLAY( "\n"); 1755 DISPLAY( "Advanced arguments :\n"); 1756 DISPLAY( " --content-size : always include the content size in the frame header\n"); 1757 DISPLAY( " --use-dict=# : include a dictionary used to decompress the corpus\n"); 1758 DISPLAY( " --gen-blocks : generate raw compressed blocks without block/frame headers\n"); 1759 DISPLAY( " --max-block-size-log=# : max block size log, must be in range [2, 17]\n"); 1760 DISPLAY( " --max-content-size-log=# : max content size log, must be <= 20\n"); 1761 DISPLAY( " (this is ignored with gen-blocks)\n"); 1762} 1763 1764/*! readU32FromChar() : 1765 @return : unsigned integer value read from input in `char` format 1766 allows and interprets K, KB, KiB, M, MB and MiB suffix. 1767 Will also modify `*stringPtr`, advancing it to position where it stopped reading. 1768 Note : function result can overflow if digit string > MAX_UINT */ 1769static unsigned readU32FromChar(const char** stringPtr) 1770{ 1771 unsigned result = 0; 1772 while ((**stringPtr >='0') && (**stringPtr <='9')) 1773 result *= 10, result += **stringPtr - '0', (*stringPtr)++ ; 1774 if ((**stringPtr=='K') || (**stringPtr=='M')) { 1775 result <<= 10; 1776 if (**stringPtr=='M') result <<= 10; 1777 (*stringPtr)++ ; 1778 if (**stringPtr=='i') (*stringPtr)++; 1779 if (**stringPtr=='B') (*stringPtr)++; 1780 } 1781 return result; 1782} 1783 1784/** longCommandWArg() : 1785 * check if *stringPtr is the same as longCommand. 1786 * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. 1787 * @return 0 and doesn't modify *stringPtr otherwise. 1788 */ 1789static unsigned longCommandWArg(const char** stringPtr, const char* longCommand) 1790{ 1791 size_t const comSize = strlen(longCommand); 1792 int const result = !strncmp(*stringPtr, longCommand, comSize); 1793 if (result) *stringPtr += comSize; 1794 return result; 1795} 1796 1797int main(int argc, char** argv) 1798{ 1799 U32 seed = 0; 1800 int seedset = 0; 1801 unsigned numFiles = 0; 1802 unsigned testDuration = 0; 1803 int testMode = 0; 1804 const char* path = NULL; 1805 const char* origPath = NULL; 1806 int useDict = 0; 1807 unsigned dictSize = (10 << 10); /* 10 kB default */ 1808 genType_e genType = gt_frame; 1809 1810 int argNb; 1811 1812 /* Check command line */ 1813 for (argNb=1; argNb<argc; argNb++) { 1814 const char* argument = argv[argNb]; 1815 if(!argument) continue; /* Protection if argument empty */ 1816 1817 /* Handle commands. Aggregated commands are allowed */ 1818 if (argument[0]=='-') { 1819 argument++; 1820 while (*argument!=0) { 1821 switch(*argument) 1822 { 1823 case 'h': 1824 usage(argv[0]); 1825 return 0; 1826 case 'H': 1827 advancedUsage(argv[0]); 1828 return 0; 1829 case 'v': 1830 argument++; 1831 g_displayLevel++; 1832 break; 1833 case 's': 1834 argument++; 1835 seedset=1; 1836 seed = readInt(&argument); 1837 break; 1838 case 'n': 1839 argument++; 1840 numFiles = readInt(&argument); 1841 break; 1842 case 'T': 1843 argument++; 1844 testDuration = readInt(&argument); 1845 if (*argument == 'm') { 1846 testDuration *= 60; 1847 argument++; 1848 if (*argument == 'n') argument++; 1849 } 1850 break; 1851 case 'o': 1852 argument++; 1853 origPath = argument; 1854 argument += strlen(argument); 1855 break; 1856 case 'p': 1857 argument++; 1858 path = argument; 1859 argument += strlen(argument); 1860 break; 1861 case 't': 1862 argument++; 1863 testMode = 1; 1864 break; 1865 case '-': 1866 argument++; 1867 if (strcmp(argument, "content-size") == 0) { 1868 opts.contentSize = 1; 1869 } else if (longCommandWArg(&argument, "use-dict=")) { 1870 dictSize = readU32FromChar(&argument); 1871 useDict = 1; 1872 } else if (strcmp(argument, "gen-blocks") == 0) { 1873 genType = gt_block; 1874 } else if (longCommandWArg(&argument, "max-block-size-log=")) { 1875 U32 value = readU32FromChar(&argument); 1876 if (value >= 2 && value <= ZSTD_BLOCKSIZE_MAX) { 1877 g_maxBlockSize = 1U << value; 1878 } 1879 } else if (longCommandWArg(&argument, "max-content-size-log=")) { 1880 U32 value = readU32FromChar(&argument); 1881 g_maxDecompressedSizeLog = 1882 MIN(MAX_DECOMPRESSED_SIZE_LOG, value); 1883 } else { 1884 advancedUsage(argv[0]); 1885 return 1; 1886 } 1887 argument += strlen(argument); 1888 break; 1889 default: 1890 usage(argv[0]); 1891 return 1; 1892 } } } } /* for (argNb=1; argNb<argc; argNb++) */ 1893 1894 if (!seedset) { 1895 seed = makeSeed(); 1896 } 1897 1898 if (testMode) { 1899 return runTestMode(seed, numFiles, testDuration, genType); 1900 } else { 1901 if (testDuration) { 1902 DISPLAY("Error: -T requires test mode (-t)\n\n"); 1903 usage(argv[0]); 1904 return 1; 1905 } 1906 } 1907 1908 if (!path) { 1909 DISPLAY("Error: path is required in file generation mode\n"); 1910 usage(argv[0]); 1911 return 1; 1912 } 1913 1914 if (numFiles == 0 && useDict == 0) { 1915 return generateFile(seed, path, origPath, genType); 1916 } else if (useDict == 0){ 1917 return generateCorpus(seed, numFiles, path, origPath, genType); 1918 } else { 1919 /* should generate files with a dictionary */ 1920 return generateCorpusWithDict(seed, numFiles, path, origPath, dictSize, genType); 1921 } 1922 1923} 1924