1/*
2 * Copyright (c) Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11/* This header contains definitions
12 * that shall **only** be used by modules within lib/compress.
13 */
14
15#ifndef ZSTD_COMPRESS_H
16#define ZSTD_COMPRESS_H
17
18/*-*************************************
19*  Dependencies
20***************************************/
21#include "../common/zstd_internal.h"
22#include "zstd_cwksp.h"
23
24
25/*-*************************************
26*  Constants
27***************************************/
28#define kSearchStrength      8
29#define HASH_READ_SIZE       8
30#define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
31                                       It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
32                                       It's not a big deal though : candidate will just be sorted again.
33                                       Additionally, candidate position 1 will be lost.
34                                       But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
35                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
36                                       This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
37
38
39/*-*************************************
40*  Context memory management
41***************************************/
42typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
43typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
44
45typedef struct ZSTD_prefixDict_s {
46    const void* dict;
47    size_t dictSize;
48    ZSTD_dictContentType_e dictContentType;
49} ZSTD_prefixDict;
50
51typedef struct {
52    void* dictBuffer;
53    void const* dict;
54    size_t dictSize;
55    ZSTD_dictContentType_e dictContentType;
56    ZSTD_CDict* cdict;
57} ZSTD_localDict;
58
59typedef struct {
60    HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];
61    HUF_repeat repeatMode;
62} ZSTD_hufCTables_t;
63
64typedef struct {
65    FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
66    FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
67    FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
68    FSE_repeat offcode_repeatMode;
69    FSE_repeat matchlength_repeatMode;
70    FSE_repeat litlength_repeatMode;
71} ZSTD_fseCTables_t;
72
73typedef struct {
74    ZSTD_hufCTables_t huf;
75    ZSTD_fseCTables_t fse;
76} ZSTD_entropyCTables_t;
77
78/* *********************************************
79*  Entropy buffer statistics structs and funcs *
80***********************************************/
81/* ZSTD_hufCTablesMetadata_t :
82 *  Stores Literals Block Type for a super-block in hType, and
83 *  huffman tree description in hufDesBuffer.
84 *  hufDesSize refers to the size of huffman tree description in bytes.
85 *  This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
86typedef struct {
87    symbolEncodingType_e hType;
88    BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
89    size_t hufDesSize;
90} ZSTD_hufCTablesMetadata_t;
91
92/* ZSTD_fseCTablesMetadata_t :
93 *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
94 *  fse tables in fseTablesBuffer.
95 *  fseTablesSize refers to the size of fse tables in bytes.
96 *  This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
97typedef struct {
98    symbolEncodingType_e llType;
99    symbolEncodingType_e ofType;
100    symbolEncodingType_e mlType;
101    BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
102    size_t fseTablesSize;
103    size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
104} ZSTD_fseCTablesMetadata_t;
105
106typedef struct {
107    ZSTD_hufCTablesMetadata_t hufMetadata;
108    ZSTD_fseCTablesMetadata_t fseMetadata;
109} ZSTD_entropyCTablesMetadata_t;
110
111/* ZSTD_buildBlockEntropyStats() :
112 *  Builds entropy for the block.
113 *  @return : 0 on success or error code */
114size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
115                             const ZSTD_entropyCTables_t* prevEntropy,
116                                   ZSTD_entropyCTables_t* nextEntropy,
117                             const ZSTD_CCtx_params* cctxParams,
118                                   ZSTD_entropyCTablesMetadata_t* entropyMetadata,
119                                   void* workspace, size_t wkspSize);
120
121/* *******************************
122*  Compression internals structs *
123*********************************/
124
125typedef struct {
126    U32 off;            /* Offset sumtype code for the match, using ZSTD_storeSeq() format */
127    U32 len;            /* Raw length of match */
128} ZSTD_match_t;
129
130typedef struct {
131    U32 offset;         /* Offset of sequence */
132    U32 litLength;      /* Length of literals prior to match */
133    U32 matchLength;    /* Raw length of match */
134} rawSeq;
135
136typedef struct {
137  rawSeq* seq;          /* The start of the sequences */
138  size_t pos;           /* The index in seq where reading stopped. pos <= size. */
139  size_t posInSequence; /* The position within the sequence at seq[pos] where reading
140                           stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
141  size_t size;          /* The number of sequences. <= capacity. */
142  size_t capacity;      /* The capacity starting from `seq` pointer */
143} rawSeqStore_t;
144
145UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
146
147typedef struct {
148    int price;
149    U32 off;
150    U32 mlen;
151    U32 litlen;
152    U32 rep[ZSTD_REP_NUM];
153} ZSTD_optimal_t;
154
155typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
156
157typedef struct {
158    /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
159    unsigned* litFreq;           /* table of literals statistics, of size 256 */
160    unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
161    unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
162    unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
163    ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_NUM+1 */
164    ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
165
166    U32  litSum;                 /* nb of literals */
167    U32  litLengthSum;           /* nb of litLength codes */
168    U32  matchLengthSum;         /* nb of matchLength codes */
169    U32  offCodeSum;             /* nb of offset codes */
170    U32  litSumBasePrice;        /* to compare to log2(litfreq) */
171    U32  litLengthSumBasePrice;  /* to compare to log2(llfreq)  */
172    U32  matchLengthSumBasePrice;/* to compare to log2(mlfreq)  */
173    U32  offCodeSumBasePrice;    /* to compare to log2(offreq)  */
174    ZSTD_OptPrice_e priceType;   /* prices can be determined dynamically, or follow a pre-defined cost structure */
175    const ZSTD_entropyCTables_t* symbolCosts;  /* pre-calculated dictionary statistics */
176    ZSTD_paramSwitch_e literalCompressionMode;
177} optState_t;
178
179typedef struct {
180  ZSTD_entropyCTables_t entropy;
181  U32 rep[ZSTD_REP_NUM];
182} ZSTD_compressedBlockState_t;
183
184typedef struct {
185    BYTE const* nextSrc;       /* next block here to continue on current prefix */
186    BYTE const* base;          /* All regular indexes relative to this position */
187    BYTE const* dictBase;      /* extDict indexes relative to this position */
188    U32 dictLimit;             /* below that point, need extDict */
189    U32 lowLimit;              /* below that point, no more valid data */
190    U32 nbOverflowCorrections; /* Number of times overflow correction has run since
191                                * ZSTD_window_init(). Useful for debugging coredumps
192                                * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
193                                */
194} ZSTD_window_t;
195
196#define ZSTD_WINDOW_START_INDEX 2
197
198typedef struct ZSTD_matchState_t ZSTD_matchState_t;
199
200#define ZSTD_ROW_HASH_CACHE_SIZE 8       /* Size of prefetching hash cache for row-based matchfinder */
201
202struct ZSTD_matchState_t {
203    ZSTD_window_t window;   /* State for window round buffer management */
204    U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
205                             * When loadedDictEnd != 0, a dictionary is in use, and still valid.
206                             * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
207                             * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
208                             * When dict referential is copied into active context (i.e. not attached),
209                             * loadedDictEnd == dictSize, since referential starts from zero.
210                             */
211    U32 nextToUpdate;       /* index from which to continue table update */
212    U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
213
214    U32 rowHashLog;                          /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
215    U16* tagTable;                           /* For row-based matchFinder: A row-based table containing the hashes and head index. */
216    U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
217
218    U32* hashTable;
219    U32* hashTable3;
220    U32* chainTable;
221
222    U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
223
224    int dedicatedDictSearch;  /* Indicates whether this matchState is using the
225                               * dedicated dictionary search structure.
226                               */
227    optState_t opt;         /* optimal parser state */
228    const ZSTD_matchState_t* dictMatchState;
229    ZSTD_compressionParameters cParams;
230    const rawSeqStore_t* ldmSeqStore;
231};
232
233typedef struct {
234    ZSTD_compressedBlockState_t* prevCBlock;
235    ZSTD_compressedBlockState_t* nextCBlock;
236    ZSTD_matchState_t matchState;
237} ZSTD_blockState_t;
238
239typedef struct {
240    U32 offset;
241    U32 checksum;
242} ldmEntry_t;
243
244typedef struct {
245    BYTE const* split;
246    U32 hash;
247    U32 checksum;
248    ldmEntry_t* bucket;
249} ldmMatchCandidate_t;
250
251#define LDM_BATCH_SIZE 64
252
253typedef struct {
254    ZSTD_window_t window;   /* State for the window round buffer management */
255    ldmEntry_t* hashTable;
256    U32 loadedDictEnd;
257    BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
258    size_t splitIndices[LDM_BATCH_SIZE];
259    ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
260} ldmState_t;
261
262typedef struct {
263    ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */
264    U32 hashLog;            /* Log size of hashTable */
265    U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
266    U32 minMatchLength;     /* Minimum match length */
267    U32 hashRateLog;       /* Log number of entries to skip */
268    U32 windowLog;          /* Window log for the LDM */
269} ldmParams_t;
270
271typedef struct {
272    int collectSequences;
273    ZSTD_Sequence* seqStart;
274    size_t seqIndex;
275    size_t maxSequences;
276} SeqCollector;
277
278struct ZSTD_CCtx_params_s {
279    ZSTD_format_e format;
280    ZSTD_compressionParameters cParams;
281    ZSTD_frameParameters fParams;
282
283    int compressionLevel;
284    int forceWindow;           /* force back-references to respect limit of
285                                * 1<<wLog, even for dictionary */
286    size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
287                                * No target when targetCBlockSize == 0.
288                                * There is no guarantee on compressed block size */
289    int srcSizeHint;           /* User's best guess of source size.
290                                * Hint is not valid when srcSizeHint == 0.
291                                * There is no guarantee that hint is close to actual source size */
292
293    ZSTD_dictAttachPref_e attachDictPref;
294    ZSTD_paramSwitch_e literalCompressionMode;
295
296    /* Multithreading: used to pass parameters to mtctx */
297    int nbWorkers;
298    size_t jobSize;
299    int overlapLog;
300    int rsyncable;
301
302    /* Long distance matching parameters */
303    ldmParams_t ldmParams;
304
305    /* Dedicated dict search algorithm trigger */
306    int enableDedicatedDictSearch;
307
308    /* Input/output buffer modes */
309    ZSTD_bufferMode_e inBufferMode;
310    ZSTD_bufferMode_e outBufferMode;
311
312    /* Sequence compression API */
313    ZSTD_sequenceFormat_e blockDelimiters;
314    int validateSequences;
315
316    /* Block splitting */
317    ZSTD_paramSwitch_e useBlockSplitter;
318
319    /* Param for deciding whether to use row-based matchfinder */
320    ZSTD_paramSwitch_e useRowMatchFinder;
321
322    /* Always load a dictionary in ext-dict mode (not prefix mode)? */
323    int deterministicRefPrefix;
324
325    /* Internal use, for createCCtxParams() and freeCCtxParams() only */
326    ZSTD_customMem customMem;
327};  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
328
329#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
330#define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
331
332/*
333 * Indicates whether this compression proceeds directly from user-provided
334 * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
335 * whether the context needs to buffer the input/output (ZSTDb_buffered).
336 */
337typedef enum {
338    ZSTDb_not_buffered,
339    ZSTDb_buffered
340} ZSTD_buffered_policy_e;
341
342/*
343 * Struct that contains all elements of block splitter that should be allocated
344 * in a wksp.
345 */
346#define ZSTD_MAX_NB_BLOCK_SPLITS 196
347typedef struct {
348    seqStore_t fullSeqStoreChunk;
349    seqStore_t firstHalfSeqStore;
350    seqStore_t secondHalfSeqStore;
351    seqStore_t currSeqStore;
352    seqStore_t nextSeqStore;
353
354    U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
355    ZSTD_entropyCTablesMetadata_t entropyMetadata;
356} ZSTD_blockSplitCtx;
357
358struct ZSTD_CCtx_s {
359    ZSTD_compressionStage_e stage;
360    int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
361    int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
362    ZSTD_CCtx_params requestedParams;
363    ZSTD_CCtx_params appliedParams;
364    ZSTD_CCtx_params simpleApiParams;    /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
365    U32   dictID;
366    size_t dictContentSize;
367
368    ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
369    size_t blockSize;
370    unsigned long long pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
371    unsigned long long consumedSrcSize;
372    unsigned long long producedCSize;
373    struct xxh64_state xxhState;
374    ZSTD_customMem customMem;
375    ZSTD_threadPool* pool;
376    size_t staticSize;
377    SeqCollector seqCollector;
378    int isFirstBlock;
379    int initialized;
380
381    seqStore_t seqStore;      /* sequences storage ptrs */
382    ldmState_t ldmState;      /* long distance matching state */
383    rawSeq* ldmSequences;     /* Storage for the ldm output sequences */
384    size_t maxNbLdmSequences;
385    rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
386    ZSTD_blockState_t blockState;
387    U32* entropyWorkspace;  /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
388
389    /* Whether we are streaming or not */
390    ZSTD_buffered_policy_e bufferedPolicy;
391
392    /* streaming */
393    char*  inBuff;
394    size_t inBuffSize;
395    size_t inToCompress;
396    size_t inBuffPos;
397    size_t inBuffTarget;
398    char*  outBuff;
399    size_t outBuffSize;
400    size_t outBuffContentSize;
401    size_t outBuffFlushedSize;
402    ZSTD_cStreamStage streamStage;
403    U32    frameEnded;
404
405    /* Stable in/out buffer verification */
406    ZSTD_inBuffer expectedInBuffer;
407    size_t expectedOutBufferSize;
408
409    /* Dictionary */
410    ZSTD_localDict localDict;
411    const ZSTD_CDict* cdict;
412    ZSTD_prefixDict prefixDict;   /* single-usage dictionary */
413
414    /* Multi-threading */
415
416    /* Tracing */
417
418    /* Workspace for block splitter */
419    ZSTD_blockSplitCtx blockSplitCtx;
420};
421
422typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
423
424typedef enum {
425    ZSTD_noDict = 0,
426    ZSTD_extDict = 1,
427    ZSTD_dictMatchState = 2,
428    ZSTD_dedicatedDictSearch = 3
429} ZSTD_dictMode_e;
430
431typedef enum {
432    ZSTD_cpm_noAttachDict = 0,  /* Compression with ZSTD_noDict or ZSTD_extDict.
433                                 * In this mode we use both the srcSize and the dictSize
434                                 * when selecting and adjusting parameters.
435                                 */
436    ZSTD_cpm_attachDict = 1,    /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
437                                 * In this mode we only take the srcSize into account when selecting
438                                 * and adjusting parameters.
439                                 */
440    ZSTD_cpm_createCDict = 2,   /* Creating a CDict.
441                                 * In this mode we take both the source size and the dictionary size
442                                 * into account when selecting and adjusting the parameters.
443                                 */
444    ZSTD_cpm_unknown = 3,       /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
445                                 * We don't know what these parameters are for. We default to the legacy
446                                 * behavior of taking both the source size and the dict size into account
447                                 * when selecting and adjusting parameters.
448                                 */
449} ZSTD_cParamMode_e;
450
451typedef size_t (*ZSTD_blockCompressor) (
452        ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
453        void const* src, size_t srcSize);
454ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
455
456
457MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
458{
459    static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
460                                       8,  9, 10, 11, 12, 13, 14, 15,
461                                      16, 16, 17, 17, 18, 18, 19, 19,
462                                      20, 20, 20, 20, 21, 21, 21, 21,
463                                      22, 22, 22, 22, 22, 22, 22, 22,
464                                      23, 23, 23, 23, 23, 23, 23, 23,
465                                      24, 24, 24, 24, 24, 24, 24, 24,
466                                      24, 24, 24, 24, 24, 24, 24, 24 };
467    static const U32 LL_deltaCode = 19;
468    return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
469}
470
471/* ZSTD_MLcode() :
472 * note : mlBase = matchLength - MINMATCH;
473 *        because it's the format it's stored in seqStore->sequences */
474MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
475{
476    static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
477                                      16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
478                                      32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
479                                      38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
480                                      40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
481                                      41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
482                                      42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
483                                      42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
484    static const U32 ML_deltaCode = 36;
485    return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
486}
487
488/* ZSTD_cParam_withinBounds:
489 * @return 1 if value is within cParam bounds,
490 * 0 otherwise */
491MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
492{
493    ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
494    if (ZSTD_isError(bounds.error)) return 0;
495    if (value < bounds.lowerBound) return 0;
496    if (value > bounds.upperBound) return 0;
497    return 1;
498}
499
500/* ZSTD_noCompressBlock() :
501 * Writes uncompressed block to dst buffer from given src.
502 * Returns the size of the block */
503MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
504{
505    U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
506    RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
507                    dstSize_tooSmall, "dst buf too small for uncompressed block");
508    MEM_writeLE24(dst, cBlockHeader24);
509    ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
510    return ZSTD_blockHeaderSize + srcSize;
511}
512
513MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
514{
515    BYTE* const op = (BYTE*)dst;
516    U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
517    RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
518    MEM_writeLE24(op, cBlockHeader);
519    op[3] = src;
520    return 4;
521}
522
523
524/* ZSTD_minGain() :
525 * minimum compression required
526 * to generate a compress block or a compressed literals section.
527 * note : use same formula for both situations */
528MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
529{
530    U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
531    ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
532    assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
533    return (srcSize >> minlog) + 2;
534}
535
536MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)
537{
538    switch (cctxParams->literalCompressionMode) {
539    case ZSTD_ps_enable:
540        return 0;
541    case ZSTD_ps_disable:
542        return 1;
543    default:
544        assert(0 /* impossible: pre-validated */);
545        ZSTD_FALLTHROUGH;
546    case ZSTD_ps_auto:
547        return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
548    }
549}
550
551/*! ZSTD_safecopyLiterals() :
552 *  memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
553 *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
554 *  large copies.
555 */
556static void
557ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w)
558{
559    assert(iend > ilimit_w);
560    if (ip <= ilimit_w) {
561        ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
562        op += ilimit_w - ip;
563        ip = ilimit_w;
564    }
565    while (ip < iend) *op++ = *ip++;
566}
567
568#define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
569#define STORE_REPCODE_1 STORE_REPCODE(1)
570#define STORE_REPCODE_2 STORE_REPCODE(2)
571#define STORE_REPCODE_3 STORE_REPCODE(3)
572#define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1)
573#define STORE_OFFSET(o)  (assert((o)>0), o + ZSTD_REP_MOVE)
574#define STORED_IS_OFFSET(o)  ((o) > ZSTD_REP_MOVE)
575#define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE)
576#define STORED_OFFSET(o)  (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)
577#define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1)  /* returns ID 1,2,3 */
578#define STORED_TO_OFFBASE(o) ((o)+1)
579#define OFFBASE_TO_STORED(o) ((o)-1)
580
581/*! ZSTD_storeSeq() :
582 *  Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
583 *  @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET().
584 *  @matchLength : must be >= MINMATCH
585 *  Allowed to overread literals up to litLimit.
586*/
587HINT_INLINE UNUSED_ATTR void
588ZSTD_storeSeq(seqStore_t* seqStorePtr,
589              size_t litLength, const BYTE* literals, const BYTE* litLimit,
590              U32 offBase_minus1,
591              size_t matchLength)
592{
593    BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
594    BYTE const* const litEnd = literals + litLength;
595#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
596    static const BYTE* g_start = NULL;
597    if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
598    {   U32 const pos = (U32)((const BYTE*)literals - g_start);
599        DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
600               pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1);
601    }
602#endif
603    assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
604    /* copy Literals */
605    assert(seqStorePtr->maxNbLit <= 128 KB);
606    assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
607    assert(literals + litLength <= litLimit);
608    if (litEnd <= litLimit_w) {
609        /* Common case we can use wildcopy.
610	 * First copy 16 bytes, because literals are likely short.
611	 */
612        assert(WILDCOPY_OVERLENGTH >= 16);
613        ZSTD_copy16(seqStorePtr->lit, literals);
614        if (litLength > 16) {
615            ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
616        }
617    } else {
618        ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
619    }
620    seqStorePtr->lit += litLength;
621
622    /* literal Length */
623    if (litLength>0xFFFF) {
624        assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
625        seqStorePtr->longLengthType = ZSTD_llt_literalLength;
626        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
627    }
628    seqStorePtr->sequences[0].litLength = (U16)litLength;
629
630    /* match offset */
631    seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1);
632
633    /* match Length */
634    assert(matchLength >= MINMATCH);
635    {   size_t const mlBase = matchLength - MINMATCH;
636        if (mlBase>0xFFFF) {
637            assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
638            seqStorePtr->longLengthType = ZSTD_llt_matchLength;
639            seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
640        }
641        seqStorePtr->sequences[0].mlBase = (U16)mlBase;
642    }
643
644    seqStorePtr->sequences++;
645}
646
647/* ZSTD_updateRep() :
648 * updates in-place @rep (array of repeat offsets)
649 * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq()
650 */
651MEM_STATIC void
652ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
653{
654    if (STORED_IS_OFFSET(offBase_minus1)) {  /* full offset */
655        rep[2] = rep[1];
656        rep[1] = rep[0];
657        rep[0] = STORED_OFFSET(offBase_minus1);
658    } else {   /* repcode */
659        U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0;
660        if (repCode > 0) {  /* note : if repCode==0, no change */
661            U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
662            rep[2] = (repCode >= 2) ? rep[1] : rep[2];
663            rep[1] = rep[0];
664            rep[0] = currentOffset;
665        } else {   /* repCode == 0 */
666            /* nothing to do */
667        }
668    }
669}
670
671typedef struct repcodes_s {
672    U32 rep[3];
673} repcodes_t;
674
675MEM_STATIC repcodes_t
676ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
677{
678    repcodes_t newReps;
679    ZSTD_memcpy(&newReps, rep, sizeof(newReps));
680    ZSTD_updateRep(newReps.rep, offBase_minus1, ll0);
681    return newReps;
682}
683
684
685/*-*************************************
686*  Match length counter
687***************************************/
688static unsigned ZSTD_NbCommonBytes (size_t val)
689{
690    if (MEM_isLittleEndian()) {
691        if (MEM_64bits()) {
692#       if (__GNUC__ >= 4)
693            return (__builtin_ctzll((U64)val) >> 3);
694#       else
695            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
696                                                     0, 3, 1, 3, 1, 4, 2, 7,
697                                                     0, 2, 3, 6, 1, 5, 3, 5,
698                                                     1, 3, 4, 4, 2, 5, 6, 7,
699                                                     7, 0, 1, 2, 3, 3, 4, 6,
700                                                     2, 6, 5, 5, 3, 4, 5, 6,
701                                                     7, 1, 2, 4, 6, 4, 4, 5,
702                                                     7, 2, 6, 5, 7, 6, 7, 7 };
703            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
704#       endif
705        } else { /* 32 bits */
706#       if (__GNUC__ >= 3)
707            return (__builtin_ctz((U32)val) >> 3);
708#       else
709            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
710                                                     3, 2, 2, 1, 3, 2, 0, 1,
711                                                     3, 3, 1, 2, 2, 2, 2, 0,
712                                                     3, 1, 2, 0, 1, 0, 1, 1 };
713            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
714#       endif
715        }
716    } else {  /* Big Endian CPU */
717        if (MEM_64bits()) {
718#       if (__GNUC__ >= 4)
719            return (__builtin_clzll(val) >> 3);
720#       else
721            unsigned r;
722            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
723            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
724            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
725            r += (!val);
726            return r;
727#       endif
728        } else { /* 32 bits */
729#       if (__GNUC__ >= 3)
730            return (__builtin_clz((U32)val) >> 3);
731#       else
732            unsigned r;
733            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
734            r += (!val);
735            return r;
736#       endif
737    }   }
738}
739
740
741MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
742{
743    const BYTE* const pStart = pIn;
744    const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
745
746    if (pIn < pInLoopLimit) {
747        { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
748          if (diff) return ZSTD_NbCommonBytes(diff); }
749        pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
750        while (pIn < pInLoopLimit) {
751            size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
752            if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
753            pIn += ZSTD_NbCommonBytes(diff);
754            return (size_t)(pIn - pStart);
755    }   }
756    if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
757    if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
758    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
759    return (size_t)(pIn - pStart);
760}
761
762/* ZSTD_count_2segments() :
763 *  can count match length with `ip` & `match` in 2 different segments.
764 *  convention : on reaching mEnd, match count continue starting from iStart
765 */
766MEM_STATIC size_t
767ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
768                     const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
769{
770    const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
771    size_t const matchLength = ZSTD_count(ip, match, vEnd);
772    if (match + matchLength != mEnd) return matchLength;
773    DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
774    DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
775    DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
776    DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
777    DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
778    return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
779}
780
781
782/*-*************************************
783 *  Hashes
784 ***************************************/
785static const U32 prime3bytes = 506832829U;
786static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
787MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
788
789static const U32 prime4bytes = 2654435761U;
790static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
791static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
792
793static const U64 prime5bytes = 889523592379ULL;
794static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u  << (64-40)) * prime5bytes) >> (64-h)) ; }
795static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
796
797static const U64 prime6bytes = 227718039650203ULL;
798static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
799static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
800
801static const U64 prime7bytes = 58295818150454627ULL;
802static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; }
803static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
804
805static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
806static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
807static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
808
809MEM_STATIC FORCE_INLINE_ATTR
810size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
811{
812    switch(mls)
813    {
814    default:
815    case 4: return ZSTD_hash4Ptr(p, hBits);
816    case 5: return ZSTD_hash5Ptr(p, hBits);
817    case 6: return ZSTD_hash6Ptr(p, hBits);
818    case 7: return ZSTD_hash7Ptr(p, hBits);
819    case 8: return ZSTD_hash8Ptr(p, hBits);
820    }
821}
822
823/* ZSTD_ipow() :
824 * Return base^exponent.
825 */
826static U64 ZSTD_ipow(U64 base, U64 exponent)
827{
828    U64 power = 1;
829    while (exponent) {
830      if (exponent & 1) power *= base;
831      exponent >>= 1;
832      base *= base;
833    }
834    return power;
835}
836
837#define ZSTD_ROLL_HASH_CHAR_OFFSET 10
838
839/* ZSTD_rollingHash_append() :
840 * Add the buffer to the hash value.
841 */
842static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
843{
844    BYTE const* istart = (BYTE const*)buf;
845    size_t pos;
846    for (pos = 0; pos < size; ++pos) {
847        hash *= prime8bytes;
848        hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
849    }
850    return hash;
851}
852
853/* ZSTD_rollingHash_compute() :
854 * Compute the rolling hash value of the buffer.
855 */
856MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
857{
858    return ZSTD_rollingHash_append(0, buf, size);
859}
860
861/* ZSTD_rollingHash_primePower() :
862 * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
863 * over a window of length bytes.
864 */
865MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
866{
867    return ZSTD_ipow(prime8bytes, length - 1);
868}
869
870/* ZSTD_rollingHash_rotate() :
871 * Rotate the rolling hash by one byte.
872 */
873MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
874{
875    hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
876    hash *= prime8bytes;
877    hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
878    return hash;
879}
880
881/*-*************************************
882*  Round buffer management
883***************************************/
884#if (ZSTD_WINDOWLOG_MAX_64 > 31)
885# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
886#endif
887/* Max current allowed */
888#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
889/* Maximum chunk size before overflow correction needs to be called again */
890#define ZSTD_CHUNKSIZE_MAX                                                     \
891    ( ((U32)-1)                  /* Maximum ending current index */            \
892    - ZSTD_CURRENT_MAX)          /* Maximum beginning lowLimit */
893
894/*
895 * ZSTD_window_clear():
896 * Clears the window containing the history by simply setting it to empty.
897 */
898MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
899{
900    size_t const endT = (size_t)(window->nextSrc - window->base);
901    U32 const end = (U32)endT;
902
903    window->lowLimit = end;
904    window->dictLimit = end;
905}
906
907MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
908{
909    return window.dictLimit == ZSTD_WINDOW_START_INDEX &&
910           window.lowLimit == ZSTD_WINDOW_START_INDEX &&
911           (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;
912}
913
914/*
915 * ZSTD_window_hasExtDict():
916 * Returns non-zero if the window has a non-empty extDict.
917 */
918MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
919{
920    return window.lowLimit < window.dictLimit;
921}
922
923/*
924 * ZSTD_matchState_dictMode():
925 * Inspects the provided matchState and figures out what dictMode should be
926 * passed to the compressor.
927 */
928MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
929{
930    return ZSTD_window_hasExtDict(ms->window) ?
931        ZSTD_extDict :
932        ms->dictMatchState != NULL ?
933            (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
934            ZSTD_noDict;
935}
936
937/* Defining this macro to non-zero tells zstd to run the overflow correction
938 * code much more frequently. This is very inefficient, and should only be
939 * used for tests and fuzzers.
940 */
941#ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
942#  ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
943#    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
944#  else
945#    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
946#  endif
947#endif
948
949/*
950 * ZSTD_window_canOverflowCorrect():
951 * Returns non-zero if the indices are large enough for overflow correction
952 * to work correctly without impacting compression ratio.
953 */
954MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
955                                              U32 cycleLog,
956                                              U32 maxDist,
957                                              U32 loadedDictEnd,
958                                              void const* src)
959{
960    U32 const cycleSize = 1u << cycleLog;
961    U32 const curr = (U32)((BYTE const*)src - window.base);
962    U32 const minIndexToOverflowCorrect = cycleSize
963                                        + MAX(maxDist, cycleSize)
964                                        + ZSTD_WINDOW_START_INDEX;
965
966    /* Adjust the min index to backoff the overflow correction frequency,
967     * so we don't waste too much CPU in overflow correction. If this
968     * computation overflows we don't really care, we just need to make
969     * sure it is at least minIndexToOverflowCorrect.
970     */
971    U32 const adjustment = window.nbOverflowCorrections + 1;
972    U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
973                                  minIndexToOverflowCorrect);
974    U32 const indexLargeEnough = curr > adjustedIndex;
975
976    /* Only overflow correct early if the dictionary is invalidated already,
977     * so we don't hurt compression ratio.
978     */
979    U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
980
981    return indexLargeEnough && dictionaryInvalidated;
982}
983
984/*
985 * ZSTD_window_needOverflowCorrection():
986 * Returns non-zero if the indices are getting too large and need overflow
987 * protection.
988 */
989MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
990                                                  U32 cycleLog,
991                                                  U32 maxDist,
992                                                  U32 loadedDictEnd,
993                                                  void const* src,
994                                                  void const* srcEnd)
995{
996    U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
997    if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
998        if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
999            return 1;
1000        }
1001    }
1002    return curr > ZSTD_CURRENT_MAX;
1003}
1004
1005/*
1006 * ZSTD_window_correctOverflow():
1007 * Reduces the indices to protect from index overflow.
1008 * Returns the correction made to the indices, which must be applied to every
1009 * stored index.
1010 *
1011 * The least significant cycleLog bits of the indices must remain the same,
1012 * which may be 0. Every index up to maxDist in the past must be valid.
1013 */
1014MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
1015                                           U32 maxDist, void const* src)
1016{
1017    /* preemptive overflow correction:
1018     * 1. correction is large enough:
1019     *    lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
1020     *    1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
1021     *
1022     *    current - newCurrent
1023     *    > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
1024     *    > (3<<29) - (1<<chainLog)
1025     *    > (3<<29) - (1<<30)             (NOTE: chainLog <= 30)
1026     *    > 1<<29
1027     *
1028     * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
1029     *    After correction, current is less than (1<<chainLog + 1<<windowLog).
1030     *    In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
1031     *    In 32-bit mode we are safe, because (chainLog <= 29), so
1032     *    ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
1033     * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
1034     *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
1035     */
1036    U32 const cycleSize = 1u << cycleLog;
1037    U32 const cycleMask = cycleSize - 1;
1038    U32 const curr = (U32)((BYTE const*)src - window->base);
1039    U32 const currentCycle = curr & cycleMask;
1040    /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */
1041    U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX
1042                                     ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX)
1043                                     : 0;
1044    U32 const newCurrent = currentCycle
1045                         + currentCycleCorrection
1046                         + MAX(maxDist, cycleSize);
1047    U32 const correction = curr - newCurrent;
1048    /* maxDist must be a power of two so that:
1049     *   (newCurrent & cycleMask) == (curr & cycleMask)
1050     * This is required to not corrupt the chains / binary tree.
1051     */
1052    assert((maxDist & (maxDist - 1)) == 0);
1053    assert((curr & cycleMask) == (newCurrent & cycleMask));
1054    assert(curr > newCurrent);
1055    if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1056        /* Loose bound, should be around 1<<29 (see above) */
1057        assert(correction > 1<<28);
1058    }
1059
1060    window->base += correction;
1061    window->dictBase += correction;
1062    if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) {
1063        window->lowLimit = ZSTD_WINDOW_START_INDEX;
1064    } else {
1065        window->lowLimit -= correction;
1066    }
1067    if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) {
1068        window->dictLimit = ZSTD_WINDOW_START_INDEX;
1069    } else {
1070        window->dictLimit -= correction;
1071    }
1072
1073    /* Ensure we can still reference the full window. */
1074    assert(newCurrent >= maxDist);
1075    assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);
1076    /* Ensure that lowLimit and dictLimit didn't underflow. */
1077    assert(window->lowLimit <= newCurrent);
1078    assert(window->dictLimit <= newCurrent);
1079
1080    ++window->nbOverflowCorrections;
1081
1082    DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
1083             window->lowLimit);
1084    return correction;
1085}
1086
1087/*
1088 * ZSTD_window_enforceMaxDist():
1089 * Updates lowLimit so that:
1090 *    (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
1091 *
1092 * It ensures index is valid as long as index >= lowLimit.
1093 * This must be called before a block compression call.
1094 *
1095 * loadedDictEnd is only defined if a dictionary is in use for current compression.
1096 * As the name implies, loadedDictEnd represents the index at end of dictionary.
1097 * The value lies within context's referential, it can be directly compared to blockEndIdx.
1098 *
1099 * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
1100 * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
1101 * This is because dictionaries are allowed to be referenced fully
1102 * as long as the last byte of the dictionary is in the window.
1103 * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
1104 *
1105 * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
1106 * In dictMatchState mode, lowLimit and dictLimit are the same,
1107 * and the dictionary is below them.
1108 * forceWindow and dictMatchState are therefore incompatible.
1109 */
1110MEM_STATIC void
1111ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
1112                     const void* blockEnd,
1113                           U32   maxDist,
1114                           U32*  loadedDictEndPtr,
1115                     const ZSTD_matchState_t** dictMatchStatePtr)
1116{
1117    U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1118    U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
1119    DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1120                (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1121
1122    /* - When there is no dictionary : loadedDictEnd == 0.
1123         In which case, the test (blockEndIdx > maxDist) is merely to avoid
1124         overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
1125       - When there is a standard dictionary :
1126         Index referential is copied from the dictionary,
1127         which means it starts from 0.
1128         In which case, loadedDictEnd == dictSize,
1129         and it makes sense to compare `blockEndIdx > maxDist + dictSize`
1130         since `blockEndIdx` also starts from zero.
1131       - When there is an attached dictionary :
1132         loadedDictEnd is expressed within the referential of the context,
1133         so it can be directly compared against blockEndIdx.
1134    */
1135    if (blockEndIdx > maxDist + loadedDictEnd) {
1136        U32 const newLowLimit = blockEndIdx - maxDist;
1137        if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
1138        if (window->dictLimit < window->lowLimit) {
1139            DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
1140                        (unsigned)window->dictLimit, (unsigned)window->lowLimit);
1141            window->dictLimit = window->lowLimit;
1142        }
1143        /* On reaching window size, dictionaries are invalidated */
1144        if (loadedDictEndPtr) *loadedDictEndPtr = 0;
1145        if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
1146    }
1147}
1148
1149/* Similar to ZSTD_window_enforceMaxDist(),
1150 * but only invalidates dictionary
1151 * when input progresses beyond window size.
1152 * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
1153 *              loadedDictEnd uses same referential as window->base
1154 *              maxDist is the window size */
1155MEM_STATIC void
1156ZSTD_checkDictValidity(const ZSTD_window_t* window,
1157                       const void* blockEnd,
1158                             U32   maxDist,
1159                             U32*  loadedDictEndPtr,
1160                       const ZSTD_matchState_t** dictMatchStatePtr)
1161{
1162    assert(loadedDictEndPtr != NULL);
1163    assert(dictMatchStatePtr != NULL);
1164    {   U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1165        U32 const loadedDictEnd = *loadedDictEndPtr;
1166        DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1167                    (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1168        assert(blockEndIdx >= loadedDictEnd);
1169
1170        if (blockEndIdx > loadedDictEnd + maxDist) {
1171            /* On reaching window size, dictionaries are invalidated.
1172             * For simplification, if window size is reached anywhere within next block,
1173             * the dictionary is invalidated for the full block.
1174             */
1175            DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
1176            *loadedDictEndPtr = 0;
1177            *dictMatchStatePtr = NULL;
1178        } else {
1179            if (*loadedDictEndPtr != 0) {
1180                DEBUGLOG(6, "dictionary considered valid for current block");
1181    }   }   }
1182}
1183
1184MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
1185    ZSTD_memset(window, 0, sizeof(*window));
1186    window->base = (BYTE const*)" ";
1187    window->dictBase = (BYTE const*)" ";
1188    ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */
1189    window->dictLimit = ZSTD_WINDOW_START_INDEX;    /* start from >0, so that 1st position is valid */
1190    window->lowLimit = ZSTD_WINDOW_START_INDEX;     /* it ensures first and later CCtx usages compress the same */
1191    window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX;   /* see issue #1241 */
1192    window->nbOverflowCorrections = 0;
1193}
1194
1195/*
1196 * ZSTD_window_update():
1197 * Updates the window by appending [src, src + srcSize) to the window.
1198 * If it is not contiguous, the current prefix becomes the extDict, and we
1199 * forget about the extDict. Handles overlap of the prefix and extDict.
1200 * Returns non-zero if the segment is contiguous.
1201 */
1202MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
1203                                  void const* src, size_t srcSize,
1204                                  int forceNonContiguous)
1205{
1206    BYTE const* const ip = (BYTE const*)src;
1207    U32 contiguous = 1;
1208    DEBUGLOG(5, "ZSTD_window_update");
1209    if (srcSize == 0)
1210        return contiguous;
1211    assert(window->base != NULL);
1212    assert(window->dictBase != NULL);
1213    /* Check if blocks follow each other */
1214    if (src != window->nextSrc || forceNonContiguous) {
1215        /* not contiguous */
1216        size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
1217        DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
1218        window->lowLimit = window->dictLimit;
1219        assert(distanceFromBase == (size_t)(U32)distanceFromBase);  /* should never overflow */
1220        window->dictLimit = (U32)distanceFromBase;
1221        window->dictBase = window->base;
1222        window->base = ip - distanceFromBase;
1223        /* ms->nextToUpdate = window->dictLimit; */
1224        if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit;   /* too small extDict */
1225        contiguous = 0;
1226    }
1227    window->nextSrc = ip + srcSize;
1228    /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
1229    if ( (ip+srcSize > window->dictBase + window->lowLimit)
1230       & (ip < window->dictBase + window->dictLimit)) {
1231        ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
1232        U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
1233        window->lowLimit = lowLimitMax;
1234        DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
1235    }
1236    return contiguous;
1237}
1238
1239/*
1240 * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
1241 */
1242MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
1243{
1244    U32 const maxDistance = 1U << windowLog;
1245    U32 const lowestValid = ms->window.lowLimit;
1246    U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1247    U32 const isDictionary = (ms->loadedDictEnd != 0);
1248    /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
1249     * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
1250     * valid for the entire block. So this check is sufficient to find the lowest valid match index.
1251     */
1252    U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
1253    return matchLowest;
1254}
1255
1256/*
1257 * Returns the lowest allowed match index in the prefix.
1258 */
1259MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
1260{
1261    U32    const maxDistance = 1U << windowLog;
1262    U32    const lowestValid = ms->window.dictLimit;
1263    U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1264    U32    const isDictionary = (ms->loadedDictEnd != 0);
1265    /* When computing the lowest prefix index we need to take the dictionary into account to handle
1266     * the edge case where the dictionary and the source are contiguous in memory.
1267     */
1268    U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
1269    return matchLowest;
1270}
1271
1272
1273
1274/* debug functions */
1275#if (DEBUGLEVEL>=2)
1276
1277MEM_STATIC double ZSTD_fWeight(U32 rawStat)
1278{
1279    U32 const fp_accuracy = 8;
1280    U32 const fp_multiplier = (1 << fp_accuracy);
1281    U32 const newStat = rawStat + 1;
1282    U32 const hb = ZSTD_highbit32(newStat);
1283    U32 const BWeight = hb * fp_multiplier;
1284    U32 const FWeight = (newStat << fp_accuracy) >> hb;
1285    U32 const weight = BWeight + FWeight;
1286    assert(hb + fp_accuracy < 31);
1287    return (double)weight / fp_multiplier;
1288}
1289
1290/* display a table content,
1291 * listing each element, its frequency, and its predicted bit cost */
1292MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
1293{
1294    unsigned u, sum;
1295    for (u=0, sum=0; u<=max; u++) sum += table[u];
1296    DEBUGLOG(2, "total nb elts: %u", sum);
1297    for (u=0; u<=max; u++) {
1298        DEBUGLOG(2, "%2u: %5u  (%.2f)",
1299                u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
1300    }
1301}
1302
1303#endif
1304
1305
1306
1307/* ===============================================================
1308 * Shared internal declarations
1309 * These prototypes may be called from sources not in lib/compress
1310 * =============================================================== */
1311
1312/* ZSTD_loadCEntropy() :
1313 * dict : must point at beginning of a valid zstd dictionary.
1314 * return : size of dictionary header (size of magic number + dict ID + entropy tables)
1315 * assumptions : magic number supposed already checked
1316 *               and dictSize >= 8 */
1317size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
1318                         const void* const dict, size_t dictSize);
1319
1320void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
1321
1322/* ==============================================================
1323 * Private declarations
1324 * These prototypes shall only be called from within lib/compress
1325 * ============================================================== */
1326
1327/* ZSTD_getCParamsFromCCtxParams() :
1328 * cParams are built depending on compressionLevel, src size hints,
1329 * LDM and manually set compression parameters.
1330 * Note: srcSizeHint == 0 means 0!
1331 */
1332ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
1333        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
1334
1335/*! ZSTD_initCStream_internal() :
1336 *  Private use only. Init streaming operation.
1337 *  expects params to be valid.
1338 *  must receive dict, or cdict, or none, but not both.
1339 *  @return : 0, or an error code */
1340size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
1341                     const void* dict, size_t dictSize,
1342                     const ZSTD_CDict* cdict,
1343                     const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
1344
1345void ZSTD_resetSeqStore(seqStore_t* ssPtr);
1346
1347/*! ZSTD_getCParamsFromCDict() :
1348 *  as the name implies */
1349ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
1350
1351/* ZSTD_compressBegin_advanced_internal() :
1352 * Private use only. To be called from zstdmt_compress.c. */
1353size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
1354                                    const void* dict, size_t dictSize,
1355                                    ZSTD_dictContentType_e dictContentType,
1356                                    ZSTD_dictTableLoadMethod_e dtlm,
1357                                    const ZSTD_CDict* cdict,
1358                                    const ZSTD_CCtx_params* params,
1359                                    unsigned long long pledgedSrcSize);
1360
1361/* ZSTD_compress_advanced_internal() :
1362 * Private use only. To be called from zstdmt_compress.c. */
1363size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
1364                                       void* dst, size_t dstCapacity,
1365                                 const void* src, size_t srcSize,
1366                                 const void* dict,size_t dictSize,
1367                                 const ZSTD_CCtx_params* params);
1368
1369
1370/* ZSTD_writeLastEmptyBlock() :
1371 * output an empty Block with end-of-frame mark to complete a frame
1372 * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
1373 *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
1374 */
1375size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
1376
1377
1378/* ZSTD_referenceExternalSequences() :
1379 * Must be called before starting a compression operation.
1380 * seqs must parse a prefix of the source.
1381 * This cannot be used when long range matching is enabled.
1382 * Zstd will use these sequences, and pass the literals to a secondary block
1383 * compressor.
1384 * @return : An error code on failure.
1385 * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
1386 * access and data corruption.
1387 */
1388size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
1389
1390/* ZSTD_cycleLog() :
1391 *  condition for correct operation : hashLog > 1 */
1392U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
1393
1394/* ZSTD_CCtx_trace() :
1395 *  Trace the end of a compression call.
1396 */
1397void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
1398
1399#endif /* ZSTD_COMPRESS_H */
1400