1/*
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11/* This header contains definitions
12 * that shall **only** be used by modules within lib/compress.
13 */
14
15#ifndef ZSTD_COMPRESS_H
16#define ZSTD_COMPRESS_H
17
18/*-*************************************
19*  Dependencies
20***************************************/
21#include "zstd_internal.h"
22#ifdef ZSTD_MULTITHREAD
23#  include "zstdmt_compress.h"
24#endif
25
26#if defined (__cplusplus)
27extern "C" {
28#endif
29
30/*-*************************************
31*  Constants
32***************************************/
33#define kSearchStrength      8
34#define HASH_READ_SIZE       8
35#define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index 1 now means "unsorted".
36                                       It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
37                                       It's not a big deal though : candidate will just be sorted again.
38                                       Additionnally, candidate position 1 will be lost.
39                                       But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
40                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy */
41
42
43/*-*************************************
44*  Context memory management
45***************************************/
46typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
47typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
48
49typedef struct ZSTD_prefixDict_s {
50    const void* dict;
51    size_t dictSize;
52    ZSTD_dictContentType_e dictContentType;
53} ZSTD_prefixDict;
54
55typedef struct {
56    U32 hufCTable[HUF_CTABLE_SIZE_U32(255)];
57    FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
58    FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
59    FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
60    HUF_repeat hufCTable_repeatMode;
61    FSE_repeat offcode_repeatMode;
62    FSE_repeat matchlength_repeatMode;
63    FSE_repeat litlength_repeatMode;
64} ZSTD_entropyCTables_t;
65
66typedef struct {
67    U32 off;
68    U32 len;
69} ZSTD_match_t;
70
71typedef struct {
72    int price;
73    U32 off;
74    U32 mlen;
75    U32 litlen;
76    U32 rep[ZSTD_REP_NUM];
77} ZSTD_optimal_t;
78
79typedef struct {
80    /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
81    U32* litFreq;               /* table of literals statistics, of size 256 */
82    U32* litLengthFreq;         /* table of litLength statistics, of size (MaxLL+1) */
83    U32* matchLengthFreq;       /* table of matchLength statistics, of size (MaxML+1) */
84    U32* offCodeFreq;           /* table of offCode statistics, of size (MaxOff+1) */
85    ZSTD_match_t* matchTable;   /* list of found matches, of size ZSTD_OPT_NUM+1 */
86    ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
87
88    U32  litSum;                 /* nb of literals */
89    U32  litLengthSum;           /* nb of litLength codes */
90    U32  matchLengthSum;         /* nb of matchLength codes */
91    U32  offCodeSum;             /* nb of offset codes */
92    /* begin updated by ZSTD_setLog2Prices */
93    U32  log2litSum;             /* pow2 to compare log2(litfreq) to */
94    U32  log2litLengthSum;       /* pow2 to compare log2(llfreq) to */
95    U32  log2matchLengthSum;     /* pow2 to compare log2(mlfreq) to */
96    U32  log2offCodeSum;         /* pow2 to compare log2(offreq) to */
97    /* end : updated by ZSTD_setLog2Prices */
98    U32  staticPrices;           /* prices follow a pre-defined cost structure, statistics are irrelevant */
99} optState_t;
100
101typedef struct {
102  ZSTD_entropyCTables_t entropy;
103  U32 rep[ZSTD_REP_NUM];
104} ZSTD_compressedBlockState_t;
105
106typedef struct {
107    BYTE const* nextSrc;    /* next block here to continue on current prefix */
108    BYTE const* base;       /* All regular indexes relative to this position */
109    BYTE const* dictBase;   /* extDict indexes relative to this position */
110    U32 dictLimit;          /* below that point, need extDict */
111    U32 lowLimit;           /* below that point, no more data */
112} ZSTD_window_t;
113
114typedef struct {
115    ZSTD_window_t window;      /* State for window round buffer management */
116    U32 loadedDictEnd;         /* index of end of dictionary */
117    U32 nextToUpdate;          /* index from which to continue table update */
118    U32 nextToUpdate3;         /* index from which to continue table update */
119    U32 hashLog3;              /* dispatch table : larger == faster, more memory */
120    U32* hashTable;
121    U32* hashTable3;
122    U32* chainTable;
123    optState_t opt;         /* optimal parser state */
124} ZSTD_matchState_t;
125
126typedef struct {
127    ZSTD_compressedBlockState_t* prevCBlock;
128    ZSTD_compressedBlockState_t* nextCBlock;
129    ZSTD_matchState_t matchState;
130} ZSTD_blockState_t;
131
132typedef struct {
133    U32 offset;
134    U32 checksum;
135} ldmEntry_t;
136
137typedef struct {
138    ZSTD_window_t window;   /* State for the window round buffer management */
139    ldmEntry_t* hashTable;
140    BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
141    U64 hashPower;          /* Used to compute the rolling hash.
142                             * Depends on ldmParams.minMatchLength */
143} ldmState_t;
144
145typedef struct {
146    U32 enableLdm;          /* 1 if enable long distance matching */
147    U32 hashLog;            /* Log size of hashTable */
148    U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
149    U32 minMatchLength;     /* Minimum match length */
150    U32 hashEveryLog;       /* Log number of entries to skip */
151    U32 windowLog;          /* Window log for the LDM */
152} ldmParams_t;
153
154typedef struct {
155    U32 offset;
156    U32 litLength;
157    U32 matchLength;
158} rawSeq;
159
160typedef struct {
161  rawSeq* seq;     /* The start of the sequences */
162  size_t pos;      /* The position where reading stopped. <= size. */
163  size_t size;     /* The number of sequences. <= capacity. */
164  size_t capacity; /* The capacity of the `seq` pointer */
165} rawSeqStore_t;
166
167struct ZSTD_CCtx_params_s {
168    ZSTD_format_e format;
169    ZSTD_compressionParameters cParams;
170    ZSTD_frameParameters fParams;
171
172    int compressionLevel;
173    int disableLiteralCompression;
174    int forceWindow;           /* force back-references to respect limit of
175                                * 1<<wLog, even for dictionary */
176
177    /* Multithreading: used to pass parameters to mtctx */
178    unsigned nbWorkers;
179    unsigned jobSize;
180    unsigned overlapSizeLog;
181
182    /* Long distance matching parameters */
183    ldmParams_t ldmParams;
184
185    /* Internal use, for createCCtxParams() and freeCCtxParams() only */
186    ZSTD_customMem customMem;
187};  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
188
189struct ZSTD_CCtx_s {
190    ZSTD_compressionStage_e stage;
191    int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
192    int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
193    ZSTD_CCtx_params requestedParams;
194    ZSTD_CCtx_params appliedParams;
195    U32   dictID;
196    void* workSpace;
197    size_t workSpaceSize;
198    size_t blockSize;
199    unsigned long long pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
200    unsigned long long consumedSrcSize;
201    unsigned long long producedCSize;
202    XXH64_state_t xxhState;
203    ZSTD_customMem customMem;
204    size_t staticSize;
205
206    seqStore_t seqStore;      /* sequences storage ptrs */
207    ldmState_t ldmState;      /* long distance matching state */
208    rawSeq* ldmSequences;     /* Storage for the ldm output sequences */
209    size_t maxNbLdmSequences;
210    rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
211    ZSTD_blockState_t blockState;
212    U32* entropyWorkspace;  /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
213
214    /* streaming */
215    char*  inBuff;
216    size_t inBuffSize;
217    size_t inToCompress;
218    size_t inBuffPos;
219    size_t inBuffTarget;
220    char*  outBuff;
221    size_t outBuffSize;
222    size_t outBuffContentSize;
223    size_t outBuffFlushedSize;
224    ZSTD_cStreamStage streamStage;
225    U32    frameEnded;
226
227    /* Dictionary */
228    ZSTD_CDict* cdictLocal;
229    const ZSTD_CDict* cdict;
230    ZSTD_prefixDict prefixDict;   /* single-usage dictionary */
231
232    /* Multi-threading */
233#ifdef ZSTD_MULTITHREAD
234    ZSTDMT_CCtx* mtctx;
235#endif
236};
237
238
239typedef size_t (*ZSTD_blockCompressor) (
240        ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
241        ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
242ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict);
243
244
245MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
246{
247    static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
248                                       8,  9, 10, 11, 12, 13, 14, 15,
249                                      16, 16, 17, 17, 18, 18, 19, 19,
250                                      20, 20, 20, 20, 21, 21, 21, 21,
251                                      22, 22, 22, 22, 22, 22, 22, 22,
252                                      23, 23, 23, 23, 23, 23, 23, 23,
253                                      24, 24, 24, 24, 24, 24, 24, 24,
254                                      24, 24, 24, 24, 24, 24, 24, 24 };
255    static const U32 LL_deltaCode = 19;
256    return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
257}
258
259/* ZSTD_MLcode() :
260 * note : mlBase = matchLength - MINMATCH;
261 *        because it's the format it's stored in seqStore->sequences */
262MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
263{
264    static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
265                                      16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
266                                      32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
267                                      38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
268                                      40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
269                                      41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
270                                      42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
271                                      42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
272    static const U32 ML_deltaCode = 36;
273    return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
274}
275
276/*! ZSTD_storeSeq() :
277 *  Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
278 *  `offsetCode` : distance to match + 3 (values 1-3 are repCodes).
279 *  `mlBase` : matchLength - MINMATCH
280*/
281MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase)
282{
283#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 6)
284    static const BYTE* g_start = NULL;
285    if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
286    {   U32 const pos = (U32)((const BYTE*)literals - g_start);
287        DEBUGLOG(6, "Cpos%7u :%3u literals, match%3u bytes at dist.code%7u",
288               pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode);
289    }
290#endif
291    /* copy Literals */
292    assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + 128 KB);
293    ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
294    seqStorePtr->lit += litLength;
295
296    /* literal Length */
297    if (litLength>0xFFFF) {
298        assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
299        seqStorePtr->longLengthID = 1;
300        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
301    }
302    seqStorePtr->sequences[0].litLength = (U16)litLength;
303
304    /* match offset */
305    seqStorePtr->sequences[0].offset = offsetCode + 1;
306
307    /* match Length */
308    if (mlBase>0xFFFF) {
309        assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
310        seqStorePtr->longLengthID = 2;
311        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
312    }
313    seqStorePtr->sequences[0].matchLength = (U16)mlBase;
314
315    seqStorePtr->sequences++;
316}
317
318
319/*-*************************************
320*  Match length counter
321***************************************/
322static unsigned ZSTD_NbCommonBytes (size_t val)
323{
324    if (MEM_isLittleEndian()) {
325        if (MEM_64bits()) {
326#       if defined(_MSC_VER) && defined(_WIN64)
327            unsigned long r = 0;
328            _BitScanForward64( &r, (U64)val );
329            return (unsigned)(r>>3);
330#       elif defined(__GNUC__) && (__GNUC__ >= 4)
331            return (__builtin_ctzll((U64)val) >> 3);
332#       else
333            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
334                                                     0, 3, 1, 3, 1, 4, 2, 7,
335                                                     0, 2, 3, 6, 1, 5, 3, 5,
336                                                     1, 3, 4, 4, 2, 5, 6, 7,
337                                                     7, 0, 1, 2, 3, 3, 4, 6,
338                                                     2, 6, 5, 5, 3, 4, 5, 6,
339                                                     7, 1, 2, 4, 6, 4, 4, 5,
340                                                     7, 2, 6, 5, 7, 6, 7, 7 };
341            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
342#       endif
343        } else { /* 32 bits */
344#       if defined(_MSC_VER)
345            unsigned long r=0;
346            _BitScanForward( &r, (U32)val );
347            return (unsigned)(r>>3);
348#       elif defined(__GNUC__) && (__GNUC__ >= 3)
349            return (__builtin_ctz((U32)val) >> 3);
350#       else
351            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
352                                                     3, 2, 2, 1, 3, 2, 0, 1,
353                                                     3, 3, 1, 2, 2, 2, 2, 0,
354                                                     3, 1, 2, 0, 1, 0, 1, 1 };
355            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
356#       endif
357        }
358    } else {  /* Big Endian CPU */
359        if (MEM_64bits()) {
360#       if defined(_MSC_VER) && defined(_WIN64)
361            unsigned long r = 0;
362            _BitScanReverse64( &r, val );
363            return (unsigned)(r>>3);
364#       elif defined(__GNUC__) && (__GNUC__ >= 4) && __has_builtin(__builtin_clzll)
365            return (__builtin_clzll(val) >> 3);
366#       else
367            unsigned r;
368            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
369            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
370            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
371            r += (!val);
372            return r;
373#       endif
374        } else { /* 32 bits */
375#       if defined(_MSC_VER)
376            unsigned long r = 0;
377            _BitScanReverse( &r, (unsigned long)val );
378            return (unsigned)(r>>3);
379#       elif defined(__GNUC__) && (__GNUC__ >= 3) && __has_builtin(__builtin_clz)
380            return (__builtin_clz((U32)val) >> 3);
381#       else
382            unsigned r;
383            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
384            r += (!val);
385            return r;
386#       endif
387    }   }
388}
389
390
391MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
392{
393    const BYTE* const pStart = pIn;
394    const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
395
396    if (pIn < pInLoopLimit) {
397        { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
398          if (diff) return ZSTD_NbCommonBytes(diff); }
399        pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
400        while (pIn < pInLoopLimit) {
401            size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
402            if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
403            pIn += ZSTD_NbCommonBytes(diff);
404            return (size_t)(pIn - pStart);
405    }   }
406    if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
407    if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
408    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
409    return (size_t)(pIn - pStart);
410}
411
412/** ZSTD_count_2segments() :
413 *  can count match length with `ip` & `match` in 2 different segments.
414 *  convention : on reaching mEnd, match count continue starting from iStart
415 */
416MEM_STATIC size_t
417ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
418                     const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
419{
420    const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
421    size_t const matchLength = ZSTD_count(ip, match, vEnd);
422    if (match + matchLength != mEnd) return matchLength;
423    return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
424}
425
426
427/*-*************************************
428 *  Hashes
429 ***************************************/
430static const U32 prime3bytes = 506832829U;
431static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
432MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
433
434static const U32 prime4bytes = 2654435761U;
435static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
436static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
437
438static const U64 prime5bytes = 889523592379ULL;
439static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u  << (64-40)) * prime5bytes) >> (64-h)) ; }
440static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
441
442static const U64 prime6bytes = 227718039650203ULL;
443static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
444static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
445
446static const U64 prime7bytes = 58295818150454627ULL;
447static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; }
448static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
449
450static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
451static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
452static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
453
454MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
455{
456    switch(mls)
457    {
458    default:
459    case 4: return ZSTD_hash4Ptr(p, hBits);
460    case 5: return ZSTD_hash5Ptr(p, hBits);
461    case 6: return ZSTD_hash6Ptr(p, hBits);
462    case 7: return ZSTD_hash7Ptr(p, hBits);
463    case 8: return ZSTD_hash8Ptr(p, hBits);
464    }
465}
466
467/*-*************************************
468*  Round buffer management
469***************************************/
470/* Max current allowed */
471#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
472/* Maximum chunk size before overflow correction needs to be called again */
473#define ZSTD_CHUNKSIZE_MAX                                                     \
474    ( ((U32)-1)                  /* Maximum ending current index */            \
475    - ZSTD_CURRENT_MAX)          /* Maximum beginning lowLimit */
476
477/**
478 * ZSTD_window_clear():
479 * Clears the window containing the history by simply setting it to empty.
480 */
481MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
482{
483    size_t const endT = (size_t)(window->nextSrc - window->base);
484    U32 const end = (U32)endT;
485
486    window->lowLimit = end;
487    window->dictLimit = end;
488}
489
490/**
491 * ZSTD_window_hasExtDict():
492 * Returns non-zero if the window has a non-empty extDict.
493 */
494MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
495{
496    return window.lowLimit < window.dictLimit;
497}
498
499/**
500 * ZSTD_window_needOverflowCorrection():
501 * Returns non-zero if the indices are getting too large and need overflow
502 * protection.
503 */
504MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
505                                                  void const* srcEnd)
506{
507    U32 const current = (U32)((BYTE const*)srcEnd - window.base);
508    return current > ZSTD_CURRENT_MAX;
509}
510
511/**
512 * ZSTD_window_correctOverflow():
513 * Reduces the indices to protect from index overflow.
514 * Returns the correction made to the indices, which must be applied to every
515 * stored index.
516 *
517 * The least significant cycleLog bits of the indices must remain the same,
518 * which may be 0. Every index up to maxDist in the past must be valid.
519 * NOTE: (maxDist & cycleMask) must be zero.
520 */
521MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
522                                           U32 maxDist, void const* src)
523{
524    /* preemptive overflow correction:
525     * 1. correction is large enough:
526     *    lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
527     *    1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
528     *
529     *    current - newCurrent
530     *    > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
531     *    > (3<<29) - (1<<chainLog)
532     *    > (3<<29) - (1<<30)             (NOTE: chainLog <= 30)
533     *    > 1<<29
534     *
535     * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
536     *    After correction, current is less than (1<<chainLog + 1<<windowLog).
537     *    In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
538     *    In 32-bit mode we are safe, because (chainLog <= 29), so
539     *    ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
540     * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
541     *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
542     */
543    U32 const cycleMask = (1U << cycleLog) - 1;
544    U32 const current = (U32)((BYTE const*)src - window->base);
545    U32 const newCurrent = (current & cycleMask) + maxDist;
546    U32 const correction = current - newCurrent;
547    assert((maxDist & cycleMask) == 0);
548    assert(current > newCurrent);
549    /* Loose bound, should be around 1<<29 (see above) */
550    assert(correction > 1<<28);
551
552    window->base += correction;
553    window->dictBase += correction;
554    window->lowLimit -= correction;
555    window->dictLimit -= correction;
556
557    DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
558             window->lowLimit);
559    return correction;
560}
561
562/**
563 * ZSTD_window_enforceMaxDist():
564 * Updates lowLimit so that:
565 *    (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
566 * This allows a simple check that index >= lowLimit to see if index is valid.
567 * This must be called before a block compression call, with srcEnd as the block
568 * source end.
569 * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
570 * This is because dictionaries are allowed to be referenced as long as the last
571 * byte of the dictionary is in the window, but once they are out of range,
572 * they cannot be referenced. If loadedDictEndPtr is NULL, we use
573 * loadedDictEnd == 0.
574 */
575MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
576                                           void const* srcEnd, U32 maxDist,
577                                           U32* loadedDictEndPtr)
578{
579    U32 const current = (U32)((BYTE const*)srcEnd - window->base);
580    U32 loadedDictEnd = loadedDictEndPtr != NULL ? *loadedDictEndPtr : 0;
581    if (current > maxDist + loadedDictEnd) {
582        U32 const newLowLimit = current - maxDist;
583        if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
584        if (window->dictLimit < window->lowLimit) {
585            DEBUGLOG(5, "Update dictLimit from %u to %u", window->dictLimit,
586                     window->lowLimit);
587            window->dictLimit = window->lowLimit;
588        }
589        if (loadedDictEndPtr)
590            *loadedDictEndPtr = 0;
591    }
592}
593
594/**
595 * ZSTD_window_update():
596 * Updates the window by appending [src, src + srcSize) to the window.
597 * If it is not contiguous, the current prefix becomes the extDict, and we
598 * forget about the extDict. Handles overlap of the prefix and extDict.
599 * Returns non-zero if the segment is contiguous.
600 */
601MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
602                                  void const* src, size_t srcSize)
603{
604    BYTE const* const ip = (BYTE const*)src;
605    U32 contiguous = 1;
606    /* Check if blocks follow each other */
607    if (src != window->nextSrc) {
608        /* not contiguous */
609        size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
610        DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u",
611                 window->dictLimit);
612        window->lowLimit = window->dictLimit;
613        assert(distanceFromBase == (size_t)(U32)distanceFromBase);  /* should never overflow */
614        window->dictLimit = (U32)distanceFromBase;
615        window->dictBase = window->base;
616        window->base = ip - distanceFromBase;
617        // ms->nextToUpdate = window->dictLimit;
618        if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit;   /* too small extDict */
619        contiguous = 0;
620    }
621    window->nextSrc = ip + srcSize;
622    /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
623    if ( (ip+srcSize > window->dictBase + window->lowLimit)
624       & (ip < window->dictBase + window->dictLimit)) {
625        ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
626        U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
627        window->lowLimit = lowLimitMax;
628    }
629    return contiguous;
630}
631
632#if defined (__cplusplus)
633}
634#endif
635
636
637/* ==============================================================
638 * Private declarations
639 * These prototypes shall only be called from within lib/compress
640 * ============================================================== */
641
642/* ZSTD_getCParamsFromCCtxParams() :
643 * cParams are built depending on compressionLevel, src size hints,
644 * LDM and manually set compression parameters.
645 */
646ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
647        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize);
648
649/*! ZSTD_initCStream_internal() :
650 *  Private use only. Init streaming operation.
651 *  expects params to be valid.
652 *  must receive dict, or cdict, or none, but not both.
653 *  @return : 0, or an error code */
654size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
655                     const void* dict, size_t dictSize,
656                     const ZSTD_CDict* cdict,
657                     ZSTD_CCtx_params  params, unsigned long long pledgedSrcSize);
658
659/*! ZSTD_compressStream_generic() :
660 *  Private use only. To be called from zstdmt_compress.c in single-thread mode. */
661size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
662                                   ZSTD_outBuffer* output,
663                                   ZSTD_inBuffer* input,
664                                   ZSTD_EndDirective const flushMode);
665
666/*! ZSTD_getCParamsFromCDict() :
667 *  as the name implies */
668ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
669
670/* ZSTD_compressBegin_advanced_internal() :
671 * Private use only. To be called from zstdmt_compress.c. */
672size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
673                                    const void* dict, size_t dictSize,
674                                    ZSTD_dictContentType_e dictContentType,
675                                    const ZSTD_CDict* cdict,
676                                    ZSTD_CCtx_params params,
677                                    unsigned long long pledgedSrcSize);
678
679/* ZSTD_compress_advanced_internal() :
680 * Private use only. To be called from zstdmt_compress.c. */
681size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
682                                       void* dst, size_t dstCapacity,
683                                 const void* src, size_t srcSize,
684                                 const void* dict,size_t dictSize,
685                                 ZSTD_CCtx_params params);
686
687
688/* ZSTD_writeLastEmptyBlock() :
689 * output an empty Block with end-of-frame mark to complete a frame
690 * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
691 *           or an error code if `dstCapcity` is too small (<ZSTD_blockHeaderSize)
692 */
693size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
694
695
696/* ZSTD_referenceExternalSequences() :
697 * Must be called before starting a compression operation.
698 * seqs must parse a prefix of the source.
699 * This cannot be used when long range matching is enabled.
700 * Zstd will use these sequences, and pass the literals to a secondary block
701 * compressor.
702 * @return : An error code on failure.
703 * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
704 * access and data corruption.
705 */
706size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
707
708
709#endif /* ZSTD_COMPRESS_H */
710