1/*
2 * Copyright (c) Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11
12/******************************************
13*  Includes
14******************************************/
15#include <stddef.h>    /* size_t, ptrdiff_t */
16#include "zstd_v01.h"
17#include "../common/error_private.h"
18
19
20/******************************************
21*  Static allocation
22******************************************/
23/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
24#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
25
26/* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */
27#define HUF_DTABLE_SIZE_U16(maxTableLog)   (1 + (1<<maxTableLog))
28#define HUF_CREATE_STATIC_DTABLE(DTable, maxTableLog) \
29        unsigned short DTable[HUF_DTABLE_SIZE_U16(maxTableLog)] = { maxTableLog }
30
31
32/******************************************
33*  Error Management
34******************************************/
35#define FSE_LIST_ERRORS(ITEM) \
36        ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \
37        ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \
38        ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\
39        ITEM(FSE_ERROR_corruptionDetected) \
40        ITEM(FSE_ERROR_maxCode)
41
42#define FSE_GENERATE_ENUM(ENUM) ENUM,
43typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
44
45
46/******************************************
47*  FSE symbol compression API
48******************************************/
49/*
50   This API consists of small unitary functions, which highly benefit from being inlined.
51   You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
52   Visual seems to do it automatically.
53   For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
54   If none of these solutions is applicable, include "fse.c" directly.
55*/
56
57typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
58typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
59
60typedef struct
61{
62    size_t bitContainer;
63    int    bitPos;
64    char*  startPtr;
65    char*  ptr;
66    char*  endPtr;
67} FSE_CStream_t;
68
69typedef struct
70{
71    ptrdiff_t   value;
72    const void* stateTable;
73    const void* symbolTT;
74    unsigned    stateLog;
75} FSE_CState_t;
76
77typedef struct
78{
79    size_t   bitContainer;
80    unsigned bitsConsumed;
81    const char* ptr;
82    const char* start;
83} FSE_DStream_t;
84
85typedef struct
86{
87    size_t      state;
88    const void* table;   /* precise table may vary, depending on U16 */
89} FSE_DState_t;
90
91typedef enum { FSE_DStream_unfinished = 0,
92               FSE_DStream_endOfBuffer = 1,
93               FSE_DStream_completed = 2,
94               FSE_DStream_tooFar = 3 } FSE_DStream_status;  /* result of FSE_reloadDStream() */
95               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... ?! */
96
97
98/****************************************************************
99*  Tuning parameters
100****************************************************************/
101/* MEMORY_USAGE :
102*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
103*  Increasing memory usage improves compression ratio
104*  Reduced memory usage can improve speed, due to cache effect
105*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
106#define FSE_MAX_MEMORY_USAGE 14
107#define FSE_DEFAULT_MEMORY_USAGE 13
108
109/* FSE_MAX_SYMBOL_VALUE :
110*  Maximum symbol value authorized.
111*  Required for proper stack allocation */
112#define FSE_MAX_SYMBOL_VALUE 255
113
114
115/****************************************************************
116*  template functions type & suffix
117****************************************************************/
118#define FSE_FUNCTION_TYPE BYTE
119#define FSE_FUNCTION_EXTENSION
120
121
122/****************************************************************
123*  Byte symbol type
124****************************************************************/
125typedef struct
126{
127    unsigned short newState;
128    unsigned char  symbol;
129    unsigned char  nbBits;
130} FSE_decode_t;   /* size == U32 */
131
132
133
134/****************************************************************
135*  Compiler specifics
136****************************************************************/
137#ifdef _MSC_VER    /* Visual Studio */
138#  define FORCE_INLINE static __forceinline
139#  include <intrin.h>                    /* For Visual 2005 */
140#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
141#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
142#else
143#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
144#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
145#    ifdef __GNUC__
146#      define FORCE_INLINE static inline __attribute__((always_inline))
147#    else
148#      define FORCE_INLINE static inline
149#    endif
150#  else
151#    define FORCE_INLINE static
152#  endif /* __STDC_VERSION__ */
153#endif
154
155
156/****************************************************************
157*  Includes
158****************************************************************/
159#include <stdlib.h>     /* malloc, free, qsort */
160#include <string.h>     /* memcpy, memset */
161#include <stdio.h>      /* printf (debug) */
162
163
164#ifndef MEM_ACCESS_MODULE
165#define MEM_ACCESS_MODULE
166/****************************************************************
167*  Basic Types
168*****************************************************************/
169#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
170# include <stdint.h>
171typedef  uint8_t BYTE;
172typedef uint16_t U16;
173typedef  int16_t S16;
174typedef uint32_t U32;
175typedef  int32_t S32;
176typedef uint64_t U64;
177typedef  int64_t S64;
178#else
179typedef unsigned char       BYTE;
180typedef unsigned short      U16;
181typedef   signed short      S16;
182typedef unsigned int        U32;
183typedef   signed int        S32;
184typedef unsigned long long  U64;
185typedef   signed long long  S64;
186#endif
187
188#endif   /* MEM_ACCESS_MODULE */
189
190/****************************************************************
191*  Memory I/O
192*****************************************************************/
193/* FSE_FORCE_MEMORY_ACCESS
194 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
195 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
196 * The below switch allow to select different access method for improved performance.
197 * Method 0 (default) : use `memcpy()`. Safe and portable.
198 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
199 *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
200 * Method 2 : direct access. This method is portable but violate C standard.
201 *            It can generate buggy code on targets generating assembly depending on alignment.
202 *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
203 * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
204 * Prefer these methods in priority order (0 > 1 > 2)
205 */
206#ifndef FSE_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
207#  if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
208#    define FSE_FORCE_MEMORY_ACCESS 1
209#  endif
210#endif
211
212
213static unsigned FSE_32bits(void)
214{
215    return sizeof(void*)==4;
216}
217
218static unsigned FSE_isLittleEndian(void)
219{
220    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
221    return one.c[0];
222}
223
224#if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2)
225
226static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; }
227static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; }
228static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; }
229
230#elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1)
231
232/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
233/* currently only defined for gcc and icc */
234typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
235
236static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
237static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
238static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
239
240#else
241
242static U16 FSE_read16(const void* memPtr)
243{
244    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
245}
246
247static U32 FSE_read32(const void* memPtr)
248{
249    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
250}
251
252static U64 FSE_read64(const void* memPtr)
253{
254    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
255}
256
257#endif /* FSE_FORCE_MEMORY_ACCESS */
258
259static U16 FSE_readLE16(const void* memPtr)
260{
261    if (FSE_isLittleEndian())
262        return FSE_read16(memPtr);
263    else
264    {
265        const BYTE* p = (const BYTE*)memPtr;
266        return (U16)(p[0] + (p[1]<<8));
267    }
268}
269
270static U32 FSE_readLE32(const void* memPtr)
271{
272    if (FSE_isLittleEndian())
273        return FSE_read32(memPtr);
274    else
275    {
276        const BYTE* p = (const BYTE*)memPtr;
277        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
278    }
279}
280
281
282static U64 FSE_readLE64(const void* memPtr)
283{
284    if (FSE_isLittleEndian())
285        return FSE_read64(memPtr);
286    else
287    {
288        const BYTE* p = (const BYTE*)memPtr;
289        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
290                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
291    }
292}
293
294static size_t FSE_readLEST(const void* memPtr)
295{
296    if (FSE_32bits())
297        return (size_t)FSE_readLE32(memPtr);
298    else
299        return (size_t)FSE_readLE64(memPtr);
300}
301
302
303
304/****************************************************************
305*  Constants
306*****************************************************************/
307#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
308#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
309#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
310#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
311#define FSE_MIN_TABLELOG 5
312
313#define FSE_TABLELOG_ABSOLUTE_MAX 15
314#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
315#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
316#endif
317
318
319/****************************************************************
320*  Error Management
321****************************************************************/
322#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
323
324
325/****************************************************************
326*  Complex types
327****************************************************************/
328typedef struct
329{
330    int deltaFindState;
331    U32 deltaNbBits;
332} FSE_symbolCompressionTransform; /* total 8 bytes */
333
334typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
335
336/****************************************************************
337*  Internal functions
338****************************************************************/
339FORCE_INLINE unsigned FSE_highbit32 (U32 val)
340{
341#   if defined(_MSC_VER)   /* Visual */
342    unsigned long r;
343    return _BitScanReverse(&r, val) ? (unsigned)r : 0;
344#   elif defined(__GNUC__) && (GCC_VERSION >= 304)   /* GCC Intrinsic */
345    return __builtin_clz (val) ^ 31;
346#   else   /* Software version */
347    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
348    U32 v = val;
349    unsigned r;
350    v |= v >> 1;
351    v |= v >> 2;
352    v |= v >> 4;
353    v |= v >> 8;
354    v |= v >> 16;
355    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
356    return r;
357#   endif
358}
359
360
361/****************************************************************
362*  Templates
363****************************************************************/
364/*
365  designed to be included
366  for type-specific functions (template emulation in C)
367  Objective is to write these functions only once, for improved maintenance
368*/
369
370/* safety checks */
371#ifndef FSE_FUNCTION_EXTENSION
372#  error "FSE_FUNCTION_EXTENSION must be defined"
373#endif
374#ifndef FSE_FUNCTION_TYPE
375#  error "FSE_FUNCTION_TYPE must be defined"
376#endif
377
378/* Function names */
379#define FSE_CAT(X,Y) X##Y
380#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
381#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
382
383
384
385static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
386
387#define FSE_DECODE_TYPE FSE_decode_t
388
389
390typedef struct {
391    U16 tableLog;
392    U16 fastMode;
393} FSE_DTableHeader;   /* sizeof U32 */
394
395static size_t FSE_buildDTable
396(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
397{
398    void* ptr = dt;
399    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
400    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1;   /* because dt is unsigned, 32-bits aligned on 32-bits */
401    const U32 tableSize = 1 << tableLog;
402    const U32 tableMask = tableSize-1;
403    const U32 step = FSE_tableStep(tableSize);
404    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
405    U32 position = 0;
406    U32 highThreshold = tableSize-1;
407    const S16 largeLimit= (S16)(1 << (tableLog-1));
408    U32 noLarge = 1;
409    U32 s;
410
411    /* Sanity Checks */
412    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
413    if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
414
415    /* Init, lay down lowprob symbols */
416    DTableH[0].tableLog = (U16)tableLog;
417    for (s=0; s<=maxSymbolValue; s++)
418    {
419        if (normalizedCounter[s]==-1)
420        {
421            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
422            symbolNext[s] = 1;
423        }
424        else
425        {
426            if (normalizedCounter[s] >= largeLimit) noLarge=0;
427            symbolNext[s] = normalizedCounter[s];
428        }
429    }
430
431    /* Spread symbols */
432    for (s=0; s<=maxSymbolValue; s++)
433    {
434        int i;
435        for (i=0; i<normalizedCounter[s]; i++)
436        {
437            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
438            position = (position + step) & tableMask;
439            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
440        }
441    }
442
443    if (position!=0) return (size_t)-FSE_ERROR_GENERIC;   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
444
445    /* Build Decoding table */
446    {
447        U32 i;
448        for (i=0; i<tableSize; i++)
449        {
450            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
451            U16 nextState = symbolNext[symbol]++;
452            tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
453            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
454        }
455    }
456
457    DTableH->fastMode = (U16)noLarge;
458    return 0;
459}
460
461
462/******************************************
463*  FSE byte symbol
464******************************************/
465#ifndef FSE_COMMONDEFS_ONLY
466
467static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
468
469static short FSE_abs(short a)
470{
471    return a<0? -a : a;
472}
473
474
475/****************************************************************
476*  Header bitstream management
477****************************************************************/
478static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
479                 const void* headerBuffer, size_t hbSize)
480{
481    const BYTE* const istart = (const BYTE*) headerBuffer;
482    const BYTE* const iend = istart + hbSize;
483    const BYTE* ip = istart;
484    int nbBits;
485    int remaining;
486    int threshold;
487    U32 bitStream;
488    int bitCount;
489    unsigned charnum = 0;
490    int previous0 = 0;
491
492    if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong;
493    bitStream = FSE_readLE32(ip);
494    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
495    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
496    bitStream >>= 4;
497    bitCount = 4;
498    *tableLogPtr = nbBits;
499    remaining = (1<<nbBits)+1;
500    threshold = 1<<nbBits;
501    nbBits++;
502
503    while ((remaining>1) && (charnum<=*maxSVPtr))
504    {
505        if (previous0)
506        {
507            unsigned n0 = charnum;
508            while ((bitStream & 0xFFFF) == 0xFFFF)
509            {
510                n0+=24;
511                if (ip < iend-5)
512                {
513                    ip+=2;
514                    bitStream = FSE_readLE32(ip) >> bitCount;
515                }
516                else
517                {
518                    bitStream >>= 16;
519                    bitCount+=16;
520                }
521            }
522            while ((bitStream & 3) == 3)
523            {
524                n0+=3;
525                bitStream>>=2;
526                bitCount+=2;
527            }
528            n0 += bitStream & 3;
529            bitCount += 2;
530            if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall;
531            while (charnum < n0) normalizedCounter[charnum++] = 0;
532            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
533            {
534                ip += bitCount>>3;
535                bitCount &= 7;
536                bitStream = FSE_readLE32(ip) >> bitCount;
537            }
538            else
539                bitStream >>= 2;
540        }
541        {
542            const short max = (short)((2*threshold-1)-remaining);
543            short count;
544
545            if ((bitStream & (threshold-1)) < (U32)max)
546            {
547                count = (short)(bitStream & (threshold-1));
548                bitCount   += nbBits-1;
549            }
550            else
551            {
552                count = (short)(bitStream & (2*threshold-1));
553                if (count >= threshold) count -= max;
554                bitCount   += nbBits;
555            }
556
557            count--;   /* extra accuracy */
558            remaining -= FSE_abs(count);
559            normalizedCounter[charnum++] = count;
560            previous0 = !count;
561            while (remaining < threshold)
562            {
563                nbBits--;
564                threshold >>= 1;
565            }
566
567            {
568                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
569                {
570                    ip += bitCount>>3;
571                    bitCount &= 7;
572                }
573                else
574                {
575                    bitCount -= (int)(8 * (iend - 4 - ip));
576                    ip = iend - 4;
577                }
578                bitStream = FSE_readLE32(ip) >> (bitCount & 31);
579            }
580        }
581    }
582    if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
583    *maxSVPtr = charnum-1;
584
585    ip += (bitCount+7)>>3;
586    if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;
587    return ip-istart;
588}
589
590
591/*********************************************************
592*  Decompression (Byte symbols)
593*********************************************************/
594static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
595{
596    void* ptr = dt;
597    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
598    FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
599
600    DTableH->tableLog = 0;
601    DTableH->fastMode = 0;
602
603    cell->newState = 0;
604    cell->symbol = symbolValue;
605    cell->nbBits = 0;
606
607    return 0;
608}
609
610
611static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
612{
613    void* ptr = dt;
614    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
615    FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
616    const unsigned tableSize = 1 << nbBits;
617    const unsigned tableMask = tableSize - 1;
618    const unsigned maxSymbolValue = tableMask;
619    unsigned s;
620
621    /* Sanity checks */
622    if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC;             /* min size */
623
624    /* Build Decoding Table */
625    DTableH->tableLog = (U16)nbBits;
626    DTableH->fastMode = 1;
627    for (s=0; s<=maxSymbolValue; s++)
628    {
629        dinfo[s].newState = 0;
630        dinfo[s].symbol = (BYTE)s;
631        dinfo[s].nbBits = (BYTE)nbBits;
632    }
633
634    return 0;
635}
636
637
638/* FSE_initDStream
639 * Initialize a FSE_DStream_t.
640 * srcBuffer must point at the beginning of an FSE block.
641 * The function result is the size of the FSE_block (== srcSize).
642 * If srcSize is too small, the function will return an errorCode;
643 */
644static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
645{
646    if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
647
648    if (srcSize >=  sizeof(size_t))
649    {
650        U32 contain32;
651        bitD->start = (const char*)srcBuffer;
652        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
653        bitD->bitContainer = FSE_readLEST(bitD->ptr);
654        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
655        if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
656        bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
657    }
658    else
659    {
660        U32 contain32;
661        bitD->start = (const char*)srcBuffer;
662        bitD->ptr   = bitD->start;
663        bitD->bitContainer = *(const BYTE*)(bitD->start);
664        switch(srcSize)
665        {
666            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
667                    /* fallthrough */
668            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
669                    /* fallthrough */
670            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
671                    /* fallthrough */
672            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
673                    /* fallthrough */
674            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
675                    /* fallthrough */
676            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
677                    /* fallthrough */
678            default:;
679        }
680        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
681        if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
682        bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
683        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
684    }
685
686    return srcSize;
687}
688
689
690/*!FSE_lookBits
691 * Provides next n bits from the bitContainer.
692 * bitContainer is not modified (bits are still present for next read/look)
693 * On 32-bits, maxNbBits==25
694 * On 64-bits, maxNbBits==57
695 * return : value extracted.
696 */
697static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits)
698{
699    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
700    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
701}
702
703static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */
704{
705    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
706    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
707}
708
709static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits)
710{
711    bitD->bitsConsumed += nbBits;
712}
713
714
715/*!FSE_readBits
716 * Read next n bits from the bitContainer.
717 * On 32-bits, don't read more than maxNbBits==25
718 * On 64-bits, don't read more than maxNbBits==57
719 * Use the fast variant *only* if n >= 1.
720 * return : value extracted.
721 */
722static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
723{
724    size_t value = FSE_lookBits(bitD, nbBits);
725    FSE_skipBits(bitD, nbBits);
726    return value;
727}
728
729static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */
730{
731    size_t value = FSE_lookBitsFast(bitD, nbBits);
732    FSE_skipBits(bitD, nbBits);
733    return value;
734}
735
736static unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
737{
738    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
739        return FSE_DStream_tooFar;
740
741    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
742    {
743        bitD->ptr -= bitD->bitsConsumed >> 3;
744        bitD->bitsConsumed &= 7;
745        bitD->bitContainer = FSE_readLEST(bitD->ptr);
746        return FSE_DStream_unfinished;
747    }
748    if (bitD->ptr == bitD->start)
749    {
750        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer;
751        return FSE_DStream_completed;
752    }
753    {
754        U32 nbBytes = bitD->bitsConsumed >> 3;
755        U32 result = FSE_DStream_unfinished;
756        if (bitD->ptr - nbBytes < bitD->start)
757        {
758            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
759            result = FSE_DStream_endOfBuffer;
760        }
761        bitD->ptr -= nbBytes;
762        bitD->bitsConsumed -= nbBytes*8;
763        bitD->bitContainer = FSE_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
764        return result;
765    }
766}
767
768
769static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt)
770{
771    const void* ptr = dt;
772    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
773    DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog);
774    FSE_reloadDStream(bitD);
775    DStatePtr->table = dt + 1;
776}
777
778static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
779{
780    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
781    const U32  nbBits = DInfo.nbBits;
782    BYTE symbol = DInfo.symbol;
783    size_t lowBits = FSE_readBits(bitD, nbBits);
784
785    DStatePtr->state = DInfo.newState + lowBits;
786    return symbol;
787}
788
789static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
790{
791    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
792    const U32 nbBits = DInfo.nbBits;
793    BYTE symbol = DInfo.symbol;
794    size_t lowBits = FSE_readBitsFast(bitD, nbBits);
795
796    DStatePtr->state = DInfo.newState + lowBits;
797    return symbol;
798}
799
800/* FSE_endOfDStream
801   Tells if bitD has reached end of bitStream or not */
802
803static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
804{
805    return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8));
806}
807
808static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
809{
810    return DStatePtr->state == 0;
811}
812
813
814FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
815          void* dst, size_t maxDstSize,
816    const void* cSrc, size_t cSrcSize,
817    const FSE_DTable* dt, const unsigned fast)
818{
819    BYTE* const ostart = (BYTE*) dst;
820    BYTE* op = ostart;
821    BYTE* const omax = op + maxDstSize;
822    BYTE* const olimit = omax-3;
823
824    FSE_DStream_t bitD;
825    FSE_DState_t state1;
826    FSE_DState_t state2;
827    size_t errorCode;
828
829    /* Init */
830    errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
831    if (FSE_isError(errorCode)) return errorCode;
832
833    FSE_initDState(&state1, &bitD, dt);
834    FSE_initDState(&state2, &bitD, dt);
835
836#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
837
838    /* 4 symbols per loop */
839    for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4)
840    {
841        op[0] = FSE_GETSYMBOL(&state1);
842
843        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
844            FSE_reloadDStream(&bitD);
845
846        op[1] = FSE_GETSYMBOL(&state2);
847
848        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
849            { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } }
850
851        op[2] = FSE_GETSYMBOL(&state1);
852
853        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
854            FSE_reloadDStream(&bitD);
855
856        op[3] = FSE_GETSYMBOL(&state2);
857    }
858
859    /* tail */
860    /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */
861    while (1)
862    {
863        if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
864            break;
865
866        *op++ = FSE_GETSYMBOL(&state1);
867
868        if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
869            break;
870
871        *op++ = FSE_GETSYMBOL(&state2);
872    }
873
874    /* end ? */
875    if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
876        return op-ostart;
877
878    if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
879
880    return (size_t)-FSE_ERROR_corruptionDetected;
881}
882
883
884static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
885                            const void* cSrc, size_t cSrcSize,
886                            const FSE_DTable* dt)
887{
888    FSE_DTableHeader DTableH;
889    memcpy(&DTableH, dt, sizeof(DTableH));   /* memcpy() into local variable, to avoid strict aliasing warning */
890
891    /* select fast mode (static) */
892    if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
893    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
894}
895
896
897static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
898{
899    const BYTE* const istart = (const BYTE*)cSrc;
900    const BYTE* ip = istart;
901    short counting[FSE_MAX_SYMBOL_VALUE+1];
902    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
903    unsigned tableLog;
904    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
905    size_t errorCode;
906
907    if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
908
909    /* normal FSE decoding mode */
910    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
911    if (FSE_isError(errorCode)) return errorCode;
912    if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
913    ip += errorCode;
914    cSrcSize -= errorCode;
915
916    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
917    if (FSE_isError(errorCode)) return errorCode;
918
919    /* always return, even if it is an error code */
920    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
921}
922
923
924
925/* *******************************************************
926*  Huff0 : Huffman block compression
927*********************************************************/
928#define HUF_MAX_SYMBOL_VALUE 255
929#define HUF_DEFAULT_TABLELOG  12       /* used by default, when not specified */
930#define HUF_MAX_TABLELOG  12           /* max possible tableLog; for allocation purpose; can be modified */
931#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
932#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
933#  error "HUF_MAX_TABLELOG is too large !"
934#endif
935
936typedef struct HUF_CElt_s {
937  U16  val;
938  BYTE nbBits;
939} HUF_CElt ;
940
941typedef struct nodeElt_s {
942    U32 count;
943    U16 parent;
944    BYTE byte;
945    BYTE nbBits;
946} nodeElt;
947
948
949/* *******************************************************
950*  Huff0 : Huffman block decompression
951*********************************************************/
952typedef struct {
953    BYTE byte;
954    BYTE nbBits;
955} HUF_DElt;
956
957static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize)
958{
959    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
960    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];  /* large enough for values from 0 to 16 */
961    U32 weightTotal;
962    U32 maxBits;
963    const BYTE* ip = (const BYTE*) src;
964    size_t iSize;
965    size_t oSize;
966    U32 n;
967    U32 nextRankStart;
968    void* ptr = DTable+1;
969    HUF_DElt* const dt = (HUF_DElt*)ptr;
970
971    if (!srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
972    iSize = ip[0];
973
974    FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16));   /* if compilation fails here, assertion is false */
975    //memset(huffWeight, 0, sizeof(huffWeight));   /* should not be necessary, but some analyzer complain ... */
976    if (iSize >= 128)  /* special header */
977    {
978        if (iSize >= (242))   /* RLE */
979        {
980            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
981            oSize = l[iSize-242];
982            memset(huffWeight, 1, sizeof(huffWeight));
983            iSize = 0;
984        }
985        else   /* Incompressible */
986        {
987            oSize = iSize - 127;
988            iSize = ((oSize+1)/2);
989            if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
990            ip += 1;
991            for (n=0; n<oSize; n+=2)
992            {
993                huffWeight[n]   = ip[n/2] >> 4;
994                huffWeight[n+1] = ip[n/2] & 15;
995            }
996        }
997    }
998    else  /* header compressed with FSE (normal case) */
999    {
1000        if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1001        oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize);   /* max 255 values decoded, last one is implied */
1002        if (FSE_isError(oSize)) return oSize;
1003    }
1004
1005    /* collect weight stats */
1006    memset(rankVal, 0, sizeof(rankVal));
1007    weightTotal = 0;
1008    for (n=0; n<oSize; n++)
1009    {
1010        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected;
1011        rankVal[huffWeight[n]]++;
1012        weightTotal += (1 << huffWeight[n]) >> 1;
1013    }
1014    if (weightTotal == 0) return (size_t)-FSE_ERROR_corruptionDetected;
1015
1016    /* get last non-null symbol weight (implied, total must be 2^n) */
1017    maxBits = FSE_highbit32(weightTotal) + 1;
1018    if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge;   /* DTable is too small */
1019    DTable[0] = (U16)maxBits;
1020    {
1021        U32 total = 1 << maxBits;
1022        U32 rest = total - weightTotal;
1023        U32 verif = 1 << FSE_highbit32(rest);
1024        U32 lastWeight = FSE_highbit32(rest) + 1;
1025        if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected;    /* last value must be a clean power of 2 */
1026        huffWeight[oSize] = (BYTE)lastWeight;
1027        rankVal[lastWeight]++;
1028    }
1029
1030    /* check tree construction validity */
1031    if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected;   /* by construction : at least 2 elts of rank 1, must be even */
1032
1033    /* Prepare ranks */
1034    nextRankStart = 0;
1035    for (n=1; n<=maxBits; n++)
1036    {
1037        U32 current = nextRankStart;
1038        nextRankStart += (rankVal[n] << (n-1));
1039        rankVal[n] = current;
1040    }
1041
1042    /* fill DTable */
1043    for (n=0; n<=oSize; n++)
1044    {
1045        const U32 w = huffWeight[n];
1046        const U32 length = (1 << w) >> 1;
1047        U32 i;
1048        HUF_DElt D;
1049        D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w);
1050        for (i = rankVal[w]; i < rankVal[w] + length; i++)
1051            dt[i] = D;
1052        rankVal[w] += length;
1053    }
1054
1055    return iSize+1;
1056}
1057
1058
1059static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog)
1060{
1061        const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
1062        const BYTE c = dt[val].byte;
1063        FSE_skipBits(Dstream, dt[val].nbBits);
1064        return c;
1065}
1066
1067static size_t HUF_decompress_usingDTable(   /* -3% slower when non static */
1068          void* dst, size_t maxDstSize,
1069    const void* cSrc, size_t cSrcSize,
1070    const U16* DTable)
1071{
1072    if (cSrcSize < 6) return (size_t)-FSE_ERROR_srcSize_wrong;
1073    {
1074        BYTE* const ostart = (BYTE*) dst;
1075        BYTE* op = ostart;
1076        BYTE* const omax = op + maxDstSize;
1077        BYTE* const olimit = maxDstSize < 15 ? op : omax-15;
1078
1079        const void* ptr = DTable;
1080        const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
1081        const U32 dtLog = DTable[0];
1082        size_t errorCode;
1083        U32 reloadStatus;
1084
1085        /* Init */
1086
1087        const U16* jumpTable = (const U16*)cSrc;
1088        const size_t length1 = FSE_readLE16(jumpTable);
1089        const size_t length2 = FSE_readLE16(jumpTable+1);
1090        const size_t length3 = FSE_readLE16(jumpTable+2);
1091        const size_t length4 = cSrcSize - 6 - length1 - length2 - length3;   /* check coherency !! */
1092        const char* const start1 = (const char*)(cSrc) + 6;
1093        const char* const start2 = start1 + length1;
1094        const char* const start3 = start2 + length2;
1095        const char* const start4 = start3 + length3;
1096        FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
1097
1098        if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1099
1100        errorCode = FSE_initDStream(&bitD1, start1, length1);
1101        if (FSE_isError(errorCode)) return errorCode;
1102        errorCode = FSE_initDStream(&bitD2, start2, length2);
1103        if (FSE_isError(errorCode)) return errorCode;
1104        errorCode = FSE_initDStream(&bitD3, start3, length3);
1105        if (FSE_isError(errorCode)) return errorCode;
1106        errorCode = FSE_initDStream(&bitD4, start4, length4);
1107        if (FSE_isError(errorCode)) return errorCode;
1108
1109        reloadStatus=FSE_reloadDStream(&bitD2);
1110
1111        /* 16 symbols per loop */
1112        for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit);  /* D2-3-4 are supposed to be synchronized and finish together */
1113            op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
1114        {
1115    #define HUF_DECODE_SYMBOL_0(n, Dstream) \
1116            op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
1117
1118    #define HUF_DECODE_SYMBOL_1(n, Dstream) \
1119            op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1120            if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
1121
1122    #define HUF_DECODE_SYMBOL_2(n, Dstream) \
1123            op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1124            if (FSE_32bits()) FSE_reloadDStream(&Dstream)
1125
1126            HUF_DECODE_SYMBOL_1( 0, bitD1);
1127            HUF_DECODE_SYMBOL_1( 1, bitD2);
1128            HUF_DECODE_SYMBOL_1( 2, bitD3);
1129            HUF_DECODE_SYMBOL_1( 3, bitD4);
1130            HUF_DECODE_SYMBOL_2( 4, bitD1);
1131            HUF_DECODE_SYMBOL_2( 5, bitD2);
1132            HUF_DECODE_SYMBOL_2( 6, bitD3);
1133            HUF_DECODE_SYMBOL_2( 7, bitD4);
1134            HUF_DECODE_SYMBOL_1( 8, bitD1);
1135            HUF_DECODE_SYMBOL_1( 9, bitD2);
1136            HUF_DECODE_SYMBOL_1(10, bitD3);
1137            HUF_DECODE_SYMBOL_1(11, bitD4);
1138            HUF_DECODE_SYMBOL_0(12, bitD1);
1139            HUF_DECODE_SYMBOL_0(13, bitD2);
1140            HUF_DECODE_SYMBOL_0(14, bitD3);
1141            HUF_DECODE_SYMBOL_0(15, bitD4);
1142        }
1143
1144        if (reloadStatus!=FSE_DStream_completed)   /* not complete : some bitStream might be FSE_DStream_unfinished */
1145            return (size_t)-FSE_ERROR_corruptionDetected;
1146
1147        /* tail */
1148        {
1149            /* bitTail = bitD1; */   /* *much* slower : -20% !??! */
1150            FSE_DStream_t bitTail;
1151            bitTail.ptr = bitD1.ptr;
1152            bitTail.bitsConsumed = bitD1.bitsConsumed;
1153            bitTail.bitContainer = bitD1.bitContainer;   /* required in case of FSE_DStream_endOfBuffer */
1154            bitTail.start = start1;
1155            for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
1156            {
1157                HUF_DECODE_SYMBOL_0(0, bitTail);
1158            }
1159
1160            if (FSE_endOfDStream(&bitTail))
1161                return op-ostart;
1162        }
1163
1164        if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
1165
1166        return (size_t)-FSE_ERROR_corruptionDetected;
1167    }
1168}
1169
1170
1171static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
1172{
1173    HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG);
1174    const BYTE* ip = (const BYTE*) cSrc;
1175    size_t errorCode;
1176
1177    errorCode = HUF_readDTable (DTable, cSrc, cSrcSize);
1178    if (FSE_isError(errorCode)) return errorCode;
1179    if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1180    ip += errorCode;
1181    cSrcSize -= errorCode;
1182
1183    return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable);
1184}
1185
1186
1187#endif   /* FSE_COMMONDEFS_ONLY */
1188
1189/*
1190    zstd - standard compression library
1191    Copyright (C) 2014-2015, Yann Collet.
1192
1193    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
1194
1195    Redistribution and use in source and binary forms, with or without
1196    modification, are permitted provided that the following conditions are
1197    met:
1198    * Redistributions of source code must retain the above copyright
1199    notice, this list of conditions and the following disclaimer.
1200    * Redistributions in binary form must reproduce the above
1201    copyright notice, this list of conditions and the following disclaimer
1202    in the documentation and/or other materials provided with the
1203    distribution.
1204    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1205    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1206    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1207    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1208    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1209    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1210    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1211    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1212    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1213    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1214    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1215
1216    You can contact the author at :
1217    - zstd source repository : https://github.com/Cyan4973/zstd
1218    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
1219*/
1220
1221/****************************************************************
1222*  Tuning parameters
1223*****************************************************************/
1224/* MEMORY_USAGE :
1225*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
1226*  Increasing memory usage improves compression ratio
1227*  Reduced memory usage can improve speed, due to cache effect */
1228#define ZSTD_MEMORY_USAGE 17
1229
1230
1231/**************************************
1232   CPU Feature Detection
1233**************************************/
1234/*
1235 * Automated efficient unaligned memory access detection
1236 * Based on known hardware architectures
1237 * This list will be updated thanks to feedbacks
1238 */
1239#if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
1240    || defined(__ARM_FEATURE_UNALIGNED) \
1241    || defined(__i386__) || defined(__x86_64__) \
1242    || defined(_M_IX86) || defined(_M_X64) \
1243    || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
1244    || (defined(_M_ARM) && (_M_ARM >= 7))
1245#  define ZSTD_UNALIGNED_ACCESS 1
1246#else
1247#  define ZSTD_UNALIGNED_ACCESS 0
1248#endif
1249
1250
1251/********************************************************
1252*  Includes
1253*********************************************************/
1254#include <stdlib.h>      /* calloc */
1255#include <string.h>      /* memcpy, memmove */
1256#include <stdio.h>       /* debug : printf */
1257
1258
1259/********************************************************
1260*  Compiler specifics
1261*********************************************************/
1262#ifdef __AVX2__
1263#  include <immintrin.h>   /* AVX2 intrinsics */
1264#endif
1265
1266#ifdef _MSC_VER    /* Visual Studio */
1267#  include <intrin.h>                    /* For Visual 2005 */
1268#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
1269#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
1270#endif
1271
1272
1273#ifndef MEM_ACCESS_MODULE
1274#define MEM_ACCESS_MODULE
1275/********************************************************
1276*  Basic Types
1277*********************************************************/
1278#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
1279# if defined(_AIX)
1280#  include <inttypes.h>
1281# else
1282#  include <stdint.h> /* intptr_t */
1283# endif
1284typedef  uint8_t BYTE;
1285typedef uint16_t U16;
1286typedef  int16_t S16;
1287typedef uint32_t U32;
1288typedef  int32_t S32;
1289typedef uint64_t U64;
1290#else
1291typedef unsigned char       BYTE;
1292typedef unsigned short      U16;
1293typedef   signed short      S16;
1294typedef unsigned int        U32;
1295typedef   signed int        S32;
1296typedef unsigned long long  U64;
1297#endif
1298
1299#endif   /* MEM_ACCESS_MODULE */
1300
1301
1302/********************************************************
1303*  Constants
1304*********************************************************/
1305static const U32 ZSTD_magicNumber = 0xFD2FB51E;   /* 3rd version : seqNb header */
1306
1307#define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
1308#define HASH_TABLESIZE (1 << HASH_LOG)
1309#define HASH_MASK (HASH_TABLESIZE - 1)
1310
1311#define KNUTH 2654435761
1312
1313#define BIT7 128
1314#define BIT6  64
1315#define BIT5  32
1316#define BIT4  16
1317
1318#define KB *(1 <<10)
1319#define MB *(1 <<20)
1320#define GB *(1U<<30)
1321
1322#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
1323
1324#define WORKPLACESIZE (BLOCKSIZE*3)
1325#define MINMATCH 4
1326#define MLbits   7
1327#define LLbits   6
1328#define Offbits  5
1329#define MaxML  ((1<<MLbits )-1)
1330#define MaxLL  ((1<<LLbits )-1)
1331#define MaxOff ((1<<Offbits)-1)
1332#define LitFSELog  11
1333#define MLFSELog   10
1334#define LLFSELog   10
1335#define OffFSELog   9
1336#define MAX(a,b) ((a)<(b)?(b):(a))
1337#define MaxSeq MAX(MaxLL, MaxML)
1338
1339#define LITERAL_NOENTROPY 63
1340#define COMMAND_NOENTROPY 7   /* to remove */
1341
1342#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
1343
1344static const size_t ZSTD_blockHeaderSize = 3;
1345static const size_t ZSTD_frameHeaderSize = 4;
1346
1347
1348/********************************************************
1349*  Memory operations
1350*********************************************************/
1351static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; }
1352
1353static unsigned ZSTD_isLittleEndian(void)
1354{
1355    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
1356    return one.c[0];
1357}
1358
1359static U16    ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
1360
1361static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
1362
1363static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
1364
1365#define COPY8(d,s)    { ZSTD_copy8(d,s); d+=8; s+=8; }
1366
1367static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
1368{
1369    const BYTE* ip = (const BYTE*)src;
1370    BYTE* op = (BYTE*)dst;
1371    BYTE* const oend = op + length;
1372    while (op < oend) COPY8(op, ip);
1373}
1374
1375static U16 ZSTD_readLE16(const void* memPtr)
1376{
1377    if (ZSTD_isLittleEndian()) return ZSTD_read16(memPtr);
1378    else
1379    {
1380        const BYTE* p = (const BYTE*)memPtr;
1381        return (U16)((U16)p[0] + ((U16)p[1]<<8));
1382    }
1383}
1384
1385static U32 ZSTD_readLE24(const void* memPtr)
1386{
1387    return ZSTD_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
1388}
1389
1390static U32 ZSTD_readBE32(const void* memPtr)
1391{
1392    const BYTE* p = (const BYTE*)memPtr;
1393    return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0));
1394}
1395
1396
1397/**************************************
1398*  Local structures
1399***************************************/
1400typedef struct ZSTD_Cctx_s ZSTD_Cctx;
1401
1402typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
1403
1404typedef struct
1405{
1406    blockType_t blockType;
1407    U32 origSize;
1408} blockProperties_t;
1409
1410typedef struct {
1411    void* buffer;
1412    U32*  offsetStart;
1413    U32*  offset;
1414    BYTE* offCodeStart;
1415    BYTE* offCode;
1416    BYTE* litStart;
1417    BYTE* lit;
1418    BYTE* litLengthStart;
1419    BYTE* litLength;
1420    BYTE* matchLengthStart;
1421    BYTE* matchLength;
1422    BYTE* dumpsStart;
1423    BYTE* dumps;
1424} seqStore_t;
1425
1426
1427typedef struct ZSTD_Cctx_s
1428{
1429    const BYTE* base;
1430    U32 current;
1431    U32 nextUpdate;
1432    seqStore_t seqStore;
1433#ifdef __AVX2__
1434    __m256i hashTable[HASH_TABLESIZE>>3];
1435#else
1436    U32 hashTable[HASH_TABLESIZE];
1437#endif
1438    BYTE buffer[WORKPLACESIZE];
1439} cctxi_t;
1440
1441
1442
1443
1444/**************************************
1445*  Error Management
1446**************************************/
1447/* published entry point */
1448unsigned ZSTDv01_isError(size_t code) { return ERR_isError(code); }
1449
1450
1451/**************************************
1452*  Tool functions
1453**************************************/
1454#define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
1455#define ZSTD_VERSION_MINOR    1    /* for new (non-breaking) interface capabilities */
1456#define ZSTD_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */
1457#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
1458
1459/**************************************************************
1460*   Decompression code
1461**************************************************************/
1462
1463static size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
1464{
1465    const BYTE* const in = (const BYTE* const)src;
1466    BYTE headerFlags;
1467    U32 cSize;
1468
1469    if (srcSize < 3) return ERROR(srcSize_wrong);
1470
1471    headerFlags = *in;
1472    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
1473
1474    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
1475    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
1476
1477    if (bpPtr->blockType == bt_end) return 0;
1478    if (bpPtr->blockType == bt_rle) return 1;
1479    return cSize;
1480}
1481
1482
1483static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1484{
1485    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
1486    if (srcSize > 0) {
1487        memcpy(dst, src, srcSize);
1488    }
1489    return srcSize;
1490}
1491
1492
1493static size_t ZSTD_decompressLiterals(void* ctx,
1494                                      void* dst, size_t maxDstSize,
1495                                const void* src, size_t srcSize)
1496{
1497    BYTE* op = (BYTE*)dst;
1498    BYTE* const oend = op + maxDstSize;
1499    const BYTE* ip = (const BYTE*)src;
1500    size_t errorCode;
1501    size_t litSize;
1502
1503    /* check : minimum 2, for litSize, +1, for content */
1504    if (srcSize <= 3) return ERROR(corruption_detected);
1505
1506    litSize = ip[1] + (ip[0]<<8);
1507    litSize += ((ip[-3] >> 3) & 7) << 16;   /* mmmmh.... */
1508    op = oend - litSize;
1509
1510    (void)ctx;
1511    if (litSize > maxDstSize) return ERROR(dstSize_tooSmall);
1512    errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2);
1513    if (FSE_isError(errorCode)) return ERROR(GENERIC);
1514    return litSize;
1515}
1516
1517
1518static size_t ZSTDv01_decodeLiteralsBlock(void* ctx,
1519                                void* dst, size_t maxDstSize,
1520                          const BYTE** litStart, size_t* litSize,
1521                          const void* src, size_t srcSize)
1522{
1523    const BYTE* const istart = (const BYTE* const)src;
1524    const BYTE* ip = istart;
1525    BYTE* const ostart = (BYTE* const)dst;
1526    BYTE* const oend = ostart + maxDstSize;
1527    blockProperties_t litbp;
1528
1529    size_t litcSize = ZSTDv01_getcBlockSize(src, srcSize, &litbp);
1530    if (ZSTDv01_isError(litcSize)) return litcSize;
1531    if (litcSize > srcSize - ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
1532    ip += ZSTD_blockHeaderSize;
1533
1534    switch(litbp.blockType)
1535    {
1536    case bt_raw:
1537        *litStart = ip;
1538        ip += litcSize;
1539        *litSize = litcSize;
1540        break;
1541    case bt_rle:
1542        {
1543            size_t rleSize = litbp.origSize;
1544            if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall);
1545            if (!srcSize) return ERROR(srcSize_wrong);
1546            if (rleSize > 0) {
1547                memset(oend - rleSize, *ip, rleSize);
1548            }
1549            *litStart = oend - rleSize;
1550            *litSize = rleSize;
1551            ip++;
1552            break;
1553        }
1554    case bt_compressed:
1555        {
1556            size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
1557            if (ZSTDv01_isError(decodedLitSize)) return decodedLitSize;
1558            *litStart = oend - decodedLitSize;
1559            *litSize = decodedLitSize;
1560            ip += litcSize;
1561            break;
1562        }
1563    case bt_end:
1564    default:
1565        return ERROR(GENERIC);
1566    }
1567
1568    return ip-istart;
1569}
1570
1571
1572static size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
1573                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
1574                         const void* src, size_t srcSize)
1575{
1576    const BYTE* const istart = (const BYTE* const)src;
1577    const BYTE* ip = istart;
1578    const BYTE* const iend = istart + srcSize;
1579    U32 LLtype, Offtype, MLtype;
1580    U32 LLlog, Offlog, MLlog;
1581    size_t dumpsLength;
1582
1583    /* check */
1584    if (srcSize < 5) return ERROR(srcSize_wrong);
1585
1586    /* SeqHead */
1587    *nbSeq = ZSTD_readLE16(ip); ip+=2;
1588    LLtype  = *ip >> 6;
1589    Offtype = (*ip >> 4) & 3;
1590    MLtype  = (*ip >> 2) & 3;
1591    if (*ip & 2)
1592    {
1593        dumpsLength  = ip[2];
1594        dumpsLength += ip[1] << 8;
1595        ip += 3;
1596    }
1597    else
1598    {
1599        dumpsLength  = ip[1];
1600        dumpsLength += (ip[0] & 1) << 8;
1601        ip += 2;
1602    }
1603    *dumpsPtr = ip;
1604    ip += dumpsLength;
1605    *dumpsLengthPtr = dumpsLength;
1606
1607    /* check */
1608    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
1609
1610    /* sequences */
1611    {
1612        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */
1613        size_t headerSize;
1614
1615        /* Build DTables */
1616        switch(LLtype)
1617        {
1618        case bt_rle :
1619            LLlog = 0;
1620            FSE_buildDTable_rle(DTableLL, *ip++); break;
1621        case bt_raw :
1622            LLlog = LLbits;
1623            FSE_buildDTable_raw(DTableLL, LLbits); break;
1624        default :
1625            {   U32 max = MaxLL;
1626                headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
1627                if (FSE_isError(headerSize)) return ERROR(GENERIC);
1628                if (LLlog > LLFSELog) return ERROR(corruption_detected);
1629                ip += headerSize;
1630                FSE_buildDTable(DTableLL, norm, max, LLlog);
1631        }   }
1632
1633        switch(Offtype)
1634        {
1635        case bt_rle :
1636            Offlog = 0;
1637            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
1638            FSE_buildDTable_rle(DTableOffb, *ip++); break;
1639        case bt_raw :
1640            Offlog = Offbits;
1641            FSE_buildDTable_raw(DTableOffb, Offbits); break;
1642        default :
1643            {   U32 max = MaxOff;
1644                headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
1645                if (FSE_isError(headerSize)) return ERROR(GENERIC);
1646                if (Offlog > OffFSELog) return ERROR(corruption_detected);
1647                ip += headerSize;
1648                FSE_buildDTable(DTableOffb, norm, max, Offlog);
1649        }   }
1650
1651        switch(MLtype)
1652        {
1653        case bt_rle :
1654            MLlog = 0;
1655            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
1656            FSE_buildDTable_rle(DTableML, *ip++); break;
1657        case bt_raw :
1658            MLlog = MLbits;
1659            FSE_buildDTable_raw(DTableML, MLbits); break;
1660        default :
1661            {   U32 max = MaxML;
1662                headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
1663                if (FSE_isError(headerSize)) return ERROR(GENERIC);
1664                if (MLlog > MLFSELog) return ERROR(corruption_detected);
1665                ip += headerSize;
1666                FSE_buildDTable(DTableML, norm, max, MLlog);
1667    }   }   }
1668
1669    return ip-istart;
1670}
1671
1672
1673typedef struct {
1674    size_t litLength;
1675    size_t offset;
1676    size_t matchLength;
1677} seq_t;
1678
1679typedef struct {
1680    FSE_DStream_t DStream;
1681    FSE_DState_t stateLL;
1682    FSE_DState_t stateOffb;
1683    FSE_DState_t stateML;
1684    size_t prevOffset;
1685    const BYTE* dumps;
1686    const BYTE* dumpsEnd;
1687} seqState_t;
1688
1689
1690static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
1691{
1692    size_t litLength;
1693    size_t prevOffset;
1694    size_t offset;
1695    size_t matchLength;
1696    const BYTE* dumps = seqState->dumps;
1697    const BYTE* const de = seqState->dumpsEnd;
1698
1699    /* Literal length */
1700    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
1701    prevOffset = litLength ? seq->offset : seqState->prevOffset;
1702    seqState->prevOffset = seq->offset;
1703    if (litLength == MaxLL)
1704    {
1705        const U32 add = dumps<de ? *dumps++ : 0;
1706        if (add < 255) litLength += add;
1707        else
1708        {
1709            if (dumps<=(de-3))
1710            {
1711                litLength = ZSTD_readLE24(dumps);
1712                dumps += 3;
1713            }
1714        }
1715    }
1716
1717    /* Offset */
1718    {
1719        U32 offsetCode, nbBits;
1720        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));
1721        if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1722        nbBits = offsetCode - 1;
1723        if (offsetCode==0) nbBits = 0;   /* cmove */
1724        offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits);
1725        if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1726        if (offsetCode==0) offset = prevOffset;
1727    }
1728
1729    /* MatchLength */
1730    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
1731    if (matchLength == MaxML)
1732    {
1733        const U32 add = dumps<de ? *dumps++ : 0;
1734        if (add < 255) matchLength += add;
1735        else
1736        {
1737            if (dumps<=(de-3))
1738            {
1739                matchLength = ZSTD_readLE24(dumps);
1740                dumps += 3;
1741            }
1742        }
1743    }
1744    matchLength += MINMATCH;
1745
1746    /* save result */
1747    seq->litLength = litLength;
1748    seq->offset = offset;
1749    seq->matchLength = matchLength;
1750    seqState->dumps = dumps;
1751}
1752
1753
1754static size_t ZSTD_execSequence(BYTE* op,
1755                                seq_t sequence,
1756                                const BYTE** litPtr, const BYTE* const litLimit,
1757                                BYTE* const base, BYTE* const oend)
1758{
1759    static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
1760    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* subtracted */
1761    const BYTE* const ostart = op;
1762    const size_t litLength = sequence.litLength;
1763    BYTE* const endMatch = op + litLength + sequence.matchLength;    /* risk : address space overflow (32-bits) */
1764    const BYTE* const litEnd = *litPtr + litLength;
1765
1766    /* check */
1767    if (endMatch > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
1768    if (litEnd > litLimit) return ERROR(corruption_detected);
1769    if (sequence.matchLength > (size_t)(*litPtr-op))  return ERROR(dstSize_tooSmall);    /* overwrite literal segment */
1770
1771    /* copy Literals */
1772    if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8))
1773        memmove(op, *litPtr, litLength);   /* overwrite risk */
1774    else
1775        ZSTD_wildcopy(op, *litPtr, litLength);
1776    op += litLength;
1777    *litPtr = litEnd;   /* update for next sequence */
1778
1779    /* check : last match must be at a minimum distance of 8 from end of dest buffer */
1780    if (oend-op < 8) return ERROR(dstSize_tooSmall);
1781
1782    /* copy Match */
1783    {
1784        const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12);
1785        const BYTE* match = op - sequence.offset;            /* possible underflow at op - offset ? */
1786        size_t qutt = 12;
1787        U64 saved[2];
1788
1789        /* check */
1790        if (match < base) return ERROR(corruption_detected);
1791        if (sequence.offset > (size_t)base) return ERROR(corruption_detected);
1792
1793        /* save beginning of literal sequence, in case of write overlap */
1794        if (overlapRisk)
1795        {
1796            if ((endMatch + qutt) > oend) qutt = oend-endMatch;
1797            memcpy(saved, endMatch, qutt);
1798        }
1799
1800        if (sequence.offset < 8)
1801        {
1802            const int dec64 = dec64table[sequence.offset];
1803            op[0] = match[0];
1804            op[1] = match[1];
1805            op[2] = match[2];
1806            op[3] = match[3];
1807            match += dec32table[sequence.offset];
1808            ZSTD_copy4(op+4, match);
1809            match -= dec64;
1810        } else { ZSTD_copy8(op, match); }
1811        op += 8; match += 8;
1812
1813        if (endMatch > oend-(16-MINMATCH))
1814        {
1815            if (op < oend-8)
1816            {
1817                ZSTD_wildcopy(op, match, (oend-8) - op);
1818                match += (oend-8) - op;
1819                op = oend-8;
1820            }
1821            while (op<endMatch) *op++ = *match++;
1822        }
1823        else
1824            ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
1825
1826        /* restore, in case of overlap */
1827        if (overlapRisk) memcpy(endMatch, saved, qutt);
1828    }
1829
1830    return endMatch-ostart;
1831}
1832
1833typedef struct ZSTDv01_Dctx_s
1834{
1835    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
1836    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
1837    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
1838    void* previousDstEnd;
1839    void* base;
1840    size_t expected;
1841    blockType_t bType;
1842    U32 phase;
1843} dctx_t;
1844
1845
1846static size_t ZSTD_decompressSequences(
1847                               void* ctx,
1848                               void* dst, size_t maxDstSize,
1849                         const void* seqStart, size_t seqSize,
1850                         const BYTE* litStart, size_t litSize)
1851{
1852    dctx_t* dctx = (dctx_t*)ctx;
1853    const BYTE* ip = (const BYTE*)seqStart;
1854    const BYTE* const iend = ip + seqSize;
1855    BYTE* const ostart = (BYTE* const)dst;
1856    BYTE* op = ostart;
1857    BYTE* const oend = ostart + maxDstSize;
1858    size_t errorCode, dumpsLength;
1859    const BYTE* litPtr = litStart;
1860    const BYTE* const litEnd = litStart + litSize;
1861    int nbSeq;
1862    const BYTE* dumps;
1863    U32* DTableLL = dctx->LLTable;
1864    U32* DTableML = dctx->MLTable;
1865    U32* DTableOffb = dctx->OffTable;
1866    BYTE* const base = (BYTE*) (dctx->base);
1867
1868    /* Build Decoding Tables */
1869    errorCode = ZSTDv01_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
1870                                      DTableLL, DTableML, DTableOffb,
1871                                      ip, iend-ip);
1872    if (ZSTDv01_isError(errorCode)) return errorCode;
1873    ip += errorCode;
1874
1875    /* Regen sequences */
1876    {
1877        seq_t sequence;
1878        seqState_t seqState;
1879
1880        memset(&sequence, 0, sizeof(sequence));
1881        seqState.dumps = dumps;
1882        seqState.dumpsEnd = dumps + dumpsLength;
1883        seqState.prevOffset = 1;
1884        errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip);
1885        if (FSE_isError(errorCode)) return ERROR(corruption_detected);
1886        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
1887        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
1888        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
1889
1890        for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; )
1891        {
1892            size_t oneSeqSize;
1893            nbSeq--;
1894            ZSTD_decodeSequence(&sequence, &seqState);
1895            oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend);
1896            if (ZSTDv01_isError(oneSeqSize)) return oneSeqSize;
1897            op += oneSeqSize;
1898        }
1899
1900        /* check if reached exact end */
1901        if ( !FSE_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* requested too much : data is corrupted */
1902        if (nbSeq<0) return ERROR(corruption_detected);   /* requested too many sequences : data is corrupted */
1903
1904        /* last literal segment */
1905        {
1906            size_t lastLLSize = litEnd - litPtr;
1907            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
1908            if (lastLLSize > 0) {
1909                if (op != litPtr) memmove(op, litPtr, lastLLSize);
1910                op += lastLLSize;
1911            }
1912        }
1913    }
1914
1915    return op-ostart;
1916}
1917
1918
1919static size_t ZSTD_decompressBlock(
1920                            void* ctx,
1921                            void* dst, size_t maxDstSize,
1922                      const void* src, size_t srcSize)
1923{
1924    /* blockType == blockCompressed, srcSize is trusted */
1925    const BYTE* ip = (const BYTE*)src;
1926    const BYTE* litPtr = NULL;
1927    size_t litSize = 0;
1928    size_t errorCode;
1929
1930    /* Decode literals sub-block */
1931    errorCode = ZSTDv01_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize);
1932    if (ZSTDv01_isError(errorCode)) return errorCode;
1933    ip += errorCode;
1934    srcSize -= errorCode;
1935
1936    return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize);
1937}
1938
1939
1940size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1941{
1942    const BYTE* ip = (const BYTE*)src;
1943    const BYTE* iend = ip + srcSize;
1944    BYTE* const ostart = (BYTE* const)dst;
1945    BYTE* op = ostart;
1946    BYTE* const oend = ostart + maxDstSize;
1947    size_t remainingSize = srcSize;
1948    U32 magicNumber;
1949    size_t errorCode=0;
1950    blockProperties_t blockProperties;
1951
1952    /* Frame Header */
1953    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
1954    magicNumber = ZSTD_readBE32(src);
1955    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
1956    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
1957
1958    /* Loop on each block */
1959    while (1)
1960    {
1961        size_t blockSize = ZSTDv01_getcBlockSize(ip, iend-ip, &blockProperties);
1962        if (ZSTDv01_isError(blockSize)) return blockSize;
1963
1964        ip += ZSTD_blockHeaderSize;
1965        remainingSize -= ZSTD_blockHeaderSize;
1966        if (blockSize > remainingSize) return ERROR(srcSize_wrong);
1967
1968        switch(blockProperties.blockType)
1969        {
1970        case bt_compressed:
1971            errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize);
1972            break;
1973        case bt_raw :
1974            errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize);
1975            break;
1976        case bt_rle :
1977            return ERROR(GENERIC);   /* not yet supported */
1978            break;
1979        case bt_end :
1980            /* end of frame */
1981            if (remainingSize) return ERROR(srcSize_wrong);
1982            break;
1983        default:
1984            return ERROR(GENERIC);
1985        }
1986        if (blockSize == 0) break;   /* bt_end */
1987
1988        if (ZSTDv01_isError(errorCode)) return errorCode;
1989        op += errorCode;
1990        ip += blockSize;
1991        remainingSize -= blockSize;
1992    }
1993
1994    return op-ostart;
1995}
1996
1997size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1998{
1999    dctx_t ctx;
2000    ctx.base = dst;
2001    return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
2002}
2003
2004/* ZSTD_errorFrameSizeInfoLegacy() :
2005   assumes `cSize` and `dBound` are _not_ NULL */
2006static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
2007{
2008    *cSize = ret;
2009    *dBound = ZSTD_CONTENTSIZE_ERROR;
2010}
2011
2012void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
2013{
2014    const BYTE* ip = (const BYTE*)src;
2015    size_t remainingSize = srcSize;
2016    size_t nbBlocks = 0;
2017    U32 magicNumber;
2018    blockProperties_t blockProperties;
2019
2020    /* Frame Header */
2021    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
2022        ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
2023        return;
2024    }
2025    magicNumber = ZSTD_readBE32(src);
2026    if (magicNumber != ZSTD_magicNumber) {
2027        ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
2028        return;
2029    }
2030    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
2031
2032    /* Loop on each block */
2033    while (1)
2034    {
2035        size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties);
2036        if (ZSTDv01_isError(blockSize)) {
2037            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, blockSize);
2038            return;
2039        }
2040
2041        ip += ZSTD_blockHeaderSize;
2042        remainingSize -= ZSTD_blockHeaderSize;
2043        if (blockSize > remainingSize) {
2044            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
2045            return;
2046        }
2047
2048        if (blockSize == 0) break;   /* bt_end */
2049
2050        ip += blockSize;
2051        remainingSize -= blockSize;
2052        nbBlocks++;
2053    }
2054
2055    *cSize = ip - (const BYTE*)src;
2056    *dBound = nbBlocks * BLOCKSIZE;
2057}
2058
2059/*******************************
2060*  Streaming Decompression API
2061*******************************/
2062
2063size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx)
2064{
2065    dctx->expected = ZSTD_frameHeaderSize;
2066    dctx->phase = 0;
2067    dctx->previousDstEnd = NULL;
2068    dctx->base = NULL;
2069    return 0;
2070}
2071
2072ZSTDv01_Dctx* ZSTDv01_createDCtx(void)
2073{
2074    ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx));
2075    if (dctx==NULL) return NULL;
2076    ZSTDv01_resetDCtx(dctx);
2077    return dctx;
2078}
2079
2080size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx)
2081{
2082    free(dctx);
2083    return 0;
2084}
2085
2086size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx)
2087{
2088    return ((dctx_t*)dctx)->expected;
2089}
2090
2091size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
2092{
2093    dctx_t* ctx = (dctx_t*)dctx;
2094
2095    /* Sanity check */
2096    if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
2097    if (dst != ctx->previousDstEnd)  /* not contiguous */
2098        ctx->base = dst;
2099
2100    /* Decompress : frame header */
2101    if (ctx->phase == 0)
2102    {
2103        /* Check frame magic header */
2104        U32 magicNumber = ZSTD_readBE32(src);
2105        if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
2106        ctx->phase = 1;
2107        ctx->expected = ZSTD_blockHeaderSize;
2108        return 0;
2109    }
2110
2111    /* Decompress : block header */
2112    if (ctx->phase == 1)
2113    {
2114        blockProperties_t bp;
2115        size_t blockSize = ZSTDv01_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
2116        if (ZSTDv01_isError(blockSize)) return blockSize;
2117        if (bp.blockType == bt_end)
2118        {
2119            ctx->expected = 0;
2120            ctx->phase = 0;
2121        }
2122        else
2123        {
2124            ctx->expected = blockSize;
2125            ctx->bType = bp.blockType;
2126            ctx->phase = 2;
2127        }
2128
2129        return 0;
2130    }
2131
2132    /* Decompress : block content */
2133    {
2134        size_t rSize;
2135        switch(ctx->bType)
2136        {
2137        case bt_compressed:
2138            rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
2139            break;
2140        case bt_raw :
2141            rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
2142            break;
2143        case bt_rle :
2144            return ERROR(GENERIC);   /* not yet handled */
2145            break;
2146        case bt_end :   /* should never happen (filtered at phase 1) */
2147            rSize = 0;
2148            break;
2149        default:
2150            return ERROR(GENERIC);
2151        }
2152        ctx->phase = 1;
2153        ctx->expected = ZSTD_blockHeaderSize;
2154        ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
2155        return rSize;
2156    }
2157
2158}
2159