1/*
2 * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11
12/******************************************
13*  Includes
14******************************************/
15#include <stddef.h>    /* size_t, ptrdiff_t */
16#include "zstd_v01.h"
17#include "../common/error_private.h"
18
19
20/******************************************
21*  Static allocation
22******************************************/
23/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
24#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
25
26/* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */
27#define HUF_DTABLE_SIZE_U16(maxTableLog)   (1 + (1<<maxTableLog))
28#define HUF_CREATE_STATIC_DTABLE(DTable, maxTableLog) \
29        unsigned short DTable[HUF_DTABLE_SIZE_U16(maxTableLog)] = { maxTableLog }
30
31
32/******************************************
33*  Error Management
34******************************************/
35#define FSE_LIST_ERRORS(ITEM) \
36        ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \
37        ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \
38        ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\
39        ITEM(FSE_ERROR_corruptionDetected) \
40        ITEM(FSE_ERROR_maxCode)
41
42#define FSE_GENERATE_ENUM(ENUM) ENUM,
43typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
44
45
46/******************************************
47*  FSE symbol compression API
48******************************************/
49/*
50   This API consists of small unitary functions, which highly benefit from being inlined.
51   You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
52   Visual seems to do it automatically.
53   For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
54   If none of these solutions is applicable, include "fse.c" directly.
55*/
56
57typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
58typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
59
60typedef struct
61{
62    size_t bitContainer;
63    int    bitPos;
64    char*  startPtr;
65    char*  ptr;
66    char*  endPtr;
67} FSE_CStream_t;
68
69typedef struct
70{
71    ptrdiff_t   value;
72    const void* stateTable;
73    const void* symbolTT;
74    unsigned    stateLog;
75} FSE_CState_t;
76
77typedef struct
78{
79    size_t   bitContainer;
80    unsigned bitsConsumed;
81    const char* ptr;
82    const char* start;
83} FSE_DStream_t;
84
85typedef struct
86{
87    size_t      state;
88    const void* table;   /* precise table may vary, depending on U16 */
89} FSE_DState_t;
90
91typedef enum { FSE_DStream_unfinished = 0,
92               FSE_DStream_endOfBuffer = 1,
93               FSE_DStream_completed = 2,
94               FSE_DStream_tooFar = 3 } FSE_DStream_status;  /* result of FSE_reloadDStream() */
95               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... ?! */
96
97
98/****************************************************************
99*  Tuning parameters
100****************************************************************/
101/* MEMORY_USAGE :
102*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
103*  Increasing memory usage improves compression ratio
104*  Reduced memory usage can improve speed, due to cache effect
105*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
106#define FSE_MAX_MEMORY_USAGE 14
107#define FSE_DEFAULT_MEMORY_USAGE 13
108
109/* FSE_MAX_SYMBOL_VALUE :
110*  Maximum symbol value authorized.
111*  Required for proper stack allocation */
112#define FSE_MAX_SYMBOL_VALUE 255
113
114
115/****************************************************************
116*  template functions type & suffix
117****************************************************************/
118#define FSE_FUNCTION_TYPE BYTE
119#define FSE_FUNCTION_EXTENSION
120
121
122/****************************************************************
123*  Byte symbol type
124****************************************************************/
125typedef struct
126{
127    unsigned short newState;
128    unsigned char  symbol;
129    unsigned char  nbBits;
130} FSE_decode_t;   /* size == U32 */
131
132
133
134/****************************************************************
135*  Compiler specifics
136****************************************************************/
137#ifdef _MSC_VER    /* Visual Studio */
138#  define FORCE_INLINE static __forceinline
139#  include <intrin.h>                    /* For Visual 2005 */
140#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
141#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
142#else
143#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
144#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
145#    ifdef __GNUC__
146#      define FORCE_INLINE static inline __attribute__((always_inline))
147#    else
148#      define FORCE_INLINE static inline
149#    endif
150#  else
151#    define FORCE_INLINE static
152#  endif /* __STDC_VERSION__ */
153#endif
154
155
156/****************************************************************
157*  Includes
158****************************************************************/
159#include <stdlib.h>     /* malloc, free, qsort */
160#include <string.h>     /* memcpy, memset */
161#include <stdio.h>      /* printf (debug) */
162
163
164#ifndef MEM_ACCESS_MODULE
165#define MEM_ACCESS_MODULE
166/****************************************************************
167*  Basic Types
168*****************************************************************/
169#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
170# include <stdint.h>
171typedef  uint8_t BYTE;
172typedef uint16_t U16;
173typedef  int16_t S16;
174typedef uint32_t U32;
175typedef  int32_t S32;
176typedef uint64_t U64;
177typedef  int64_t S64;
178#else
179typedef unsigned char       BYTE;
180typedef unsigned short      U16;
181typedef   signed short      S16;
182typedef unsigned int        U32;
183typedef   signed int        S32;
184typedef unsigned long long  U64;
185typedef   signed long long  S64;
186#endif
187
188#endif   /* MEM_ACCESS_MODULE */
189
190/****************************************************************
191*  Memory I/O
192*****************************************************************/
193/* FSE_FORCE_MEMORY_ACCESS
194 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
195 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
196 * The below switch allow to select different access method for improved performance.
197 * Method 0 (default) : use `memcpy()`. Safe and portable.
198 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
199 *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
200 * Method 2 : direct access. This method is portable but violate C standard.
201 *            It can generate buggy code on targets generating assembly depending on alignment.
202 *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
203 * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
204 * Prefer these methods in priority order (0 > 1 > 2)
205 */
206#ifndef FSE_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
207#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
208#    define FSE_FORCE_MEMORY_ACCESS 2
209#  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
210  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
211#    define FSE_FORCE_MEMORY_ACCESS 1
212#  endif
213#endif
214
215
216static unsigned FSE_32bits(void)
217{
218    return sizeof(void*)==4;
219}
220
221static unsigned FSE_isLittleEndian(void)
222{
223    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
224    return one.c[0];
225}
226
227#if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2)
228
229static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; }
230static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; }
231static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; }
232
233#elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1)
234
235/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
236/* currently only defined for gcc and icc */
237typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
238
239static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
240static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
241static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
242
243#else
244
245static U16 FSE_read16(const void* memPtr)
246{
247    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
248}
249
250static U32 FSE_read32(const void* memPtr)
251{
252    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
253}
254
255static U64 FSE_read64(const void* memPtr)
256{
257    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
258}
259
260#endif /* FSE_FORCE_MEMORY_ACCESS */
261
262static U16 FSE_readLE16(const void* memPtr)
263{
264    if (FSE_isLittleEndian())
265        return FSE_read16(memPtr);
266    else
267    {
268        const BYTE* p = (const BYTE*)memPtr;
269        return (U16)(p[0] + (p[1]<<8));
270    }
271}
272
273static U32 FSE_readLE32(const void* memPtr)
274{
275    if (FSE_isLittleEndian())
276        return FSE_read32(memPtr);
277    else
278    {
279        const BYTE* p = (const BYTE*)memPtr;
280        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
281    }
282}
283
284
285static U64 FSE_readLE64(const void* memPtr)
286{
287    if (FSE_isLittleEndian())
288        return FSE_read64(memPtr);
289    else
290    {
291        const BYTE* p = (const BYTE*)memPtr;
292        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
293                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
294    }
295}
296
297static size_t FSE_readLEST(const void* memPtr)
298{
299    if (FSE_32bits())
300        return (size_t)FSE_readLE32(memPtr);
301    else
302        return (size_t)FSE_readLE64(memPtr);
303}
304
305
306
307/****************************************************************
308*  Constants
309*****************************************************************/
310#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
311#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
312#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
313#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
314#define FSE_MIN_TABLELOG 5
315
316#define FSE_TABLELOG_ABSOLUTE_MAX 15
317#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
318#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
319#endif
320
321
322/****************************************************************
323*  Error Management
324****************************************************************/
325#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
326
327
328/****************************************************************
329*  Complex types
330****************************************************************/
331typedef struct
332{
333    int deltaFindState;
334    U32 deltaNbBits;
335} FSE_symbolCompressionTransform; /* total 8 bytes */
336
337typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
338
339/****************************************************************
340*  Internal functions
341****************************************************************/
342FORCE_INLINE unsigned FSE_highbit32 (U32 val)
343{
344#   if defined(_MSC_VER)   /* Visual */
345    unsigned long r;
346    _BitScanReverse ( &r, val );
347    return (unsigned) r;
348#   elif defined(__GNUC__) && (GCC_VERSION >= 304)   /* GCC Intrinsic */
349    return __builtin_clz (val) ^ 31;
350#   else   /* Software version */
351    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
352    U32 v = val;
353    unsigned r;
354    v |= v >> 1;
355    v |= v >> 2;
356    v |= v >> 4;
357    v |= v >> 8;
358    v |= v >> 16;
359    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
360    return r;
361#   endif
362}
363
364
365/****************************************************************
366*  Templates
367****************************************************************/
368/*
369  designed to be included
370  for type-specific functions (template emulation in C)
371  Objective is to write these functions only once, for improved maintenance
372*/
373
374/* safety checks */
375#ifndef FSE_FUNCTION_EXTENSION
376#  error "FSE_FUNCTION_EXTENSION must be defined"
377#endif
378#ifndef FSE_FUNCTION_TYPE
379#  error "FSE_FUNCTION_TYPE must be defined"
380#endif
381
382/* Function names */
383#define FSE_CAT(X,Y) X##Y
384#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
385#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
386
387
388
389static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
390
391#define FSE_DECODE_TYPE FSE_decode_t
392
393
394typedef struct {
395    U16 tableLog;
396    U16 fastMode;
397} FSE_DTableHeader;   /* sizeof U32 */
398
399static size_t FSE_buildDTable
400(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
401{
402    void* ptr = dt;
403    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
404    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1;   /* because dt is unsigned, 32-bits aligned on 32-bits */
405    const U32 tableSize = 1 << tableLog;
406    const U32 tableMask = tableSize-1;
407    const U32 step = FSE_tableStep(tableSize);
408    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
409    U32 position = 0;
410    U32 highThreshold = tableSize-1;
411    const S16 largeLimit= (S16)(1 << (tableLog-1));
412    U32 noLarge = 1;
413    U32 s;
414
415    /* Sanity Checks */
416    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
417    if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
418
419    /* Init, lay down lowprob symbols */
420    DTableH[0].tableLog = (U16)tableLog;
421    for (s=0; s<=maxSymbolValue; s++)
422    {
423        if (normalizedCounter[s]==-1)
424        {
425            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
426            symbolNext[s] = 1;
427        }
428        else
429        {
430            if (normalizedCounter[s] >= largeLimit) noLarge=0;
431            symbolNext[s] = normalizedCounter[s];
432        }
433    }
434
435    /* Spread symbols */
436    for (s=0; s<=maxSymbolValue; s++)
437    {
438        int i;
439        for (i=0; i<normalizedCounter[s]; i++)
440        {
441            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
442            position = (position + step) & tableMask;
443            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
444        }
445    }
446
447    if (position!=0) return (size_t)-FSE_ERROR_GENERIC;   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
448
449    /* Build Decoding table */
450    {
451        U32 i;
452        for (i=0; i<tableSize; i++)
453        {
454            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
455            U16 nextState = symbolNext[symbol]++;
456            tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
457            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
458        }
459    }
460
461    DTableH->fastMode = (U16)noLarge;
462    return 0;
463}
464
465
466/******************************************
467*  FSE byte symbol
468******************************************/
469#ifndef FSE_COMMONDEFS_ONLY
470
471static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
472
473static short FSE_abs(short a)
474{
475    return a<0? -a : a;
476}
477
478
479/****************************************************************
480*  Header bitstream management
481****************************************************************/
482static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
483                 const void* headerBuffer, size_t hbSize)
484{
485    const BYTE* const istart = (const BYTE*) headerBuffer;
486    const BYTE* const iend = istart + hbSize;
487    const BYTE* ip = istart;
488    int nbBits;
489    int remaining;
490    int threshold;
491    U32 bitStream;
492    int bitCount;
493    unsigned charnum = 0;
494    int previous0 = 0;
495
496    if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong;
497    bitStream = FSE_readLE32(ip);
498    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
499    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
500    bitStream >>= 4;
501    bitCount = 4;
502    *tableLogPtr = nbBits;
503    remaining = (1<<nbBits)+1;
504    threshold = 1<<nbBits;
505    nbBits++;
506
507    while ((remaining>1) && (charnum<=*maxSVPtr))
508    {
509        if (previous0)
510        {
511            unsigned n0 = charnum;
512            while ((bitStream & 0xFFFF) == 0xFFFF)
513            {
514                n0+=24;
515                if (ip < iend-5)
516                {
517                    ip+=2;
518                    bitStream = FSE_readLE32(ip) >> bitCount;
519                }
520                else
521                {
522                    bitStream >>= 16;
523                    bitCount+=16;
524                }
525            }
526            while ((bitStream & 3) == 3)
527            {
528                n0+=3;
529                bitStream>>=2;
530                bitCount+=2;
531            }
532            n0 += bitStream & 3;
533            bitCount += 2;
534            if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall;
535            while (charnum < n0) normalizedCounter[charnum++] = 0;
536            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
537            {
538                ip += bitCount>>3;
539                bitCount &= 7;
540                bitStream = FSE_readLE32(ip) >> bitCount;
541            }
542            else
543                bitStream >>= 2;
544        }
545        {
546            const short max = (short)((2*threshold-1)-remaining);
547            short count;
548
549            if ((bitStream & (threshold-1)) < (U32)max)
550            {
551                count = (short)(bitStream & (threshold-1));
552                bitCount   += nbBits-1;
553            }
554            else
555            {
556                count = (short)(bitStream & (2*threshold-1));
557                if (count >= threshold) count -= max;
558                bitCount   += nbBits;
559            }
560
561            count--;   /* extra accuracy */
562            remaining -= FSE_abs(count);
563            normalizedCounter[charnum++] = count;
564            previous0 = !count;
565            while (remaining < threshold)
566            {
567                nbBits--;
568                threshold >>= 1;
569            }
570
571            {
572                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
573                {
574                    ip += bitCount>>3;
575                    bitCount &= 7;
576                }
577                else
578                {
579                    bitCount -= (int)(8 * (iend - 4 - ip));
580                    ip = iend - 4;
581                }
582                bitStream = FSE_readLE32(ip) >> (bitCount & 31);
583            }
584        }
585    }
586    if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
587    *maxSVPtr = charnum-1;
588
589    ip += (bitCount+7)>>3;
590    if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;
591    return ip-istart;
592}
593
594
595/*********************************************************
596*  Decompression (Byte symbols)
597*********************************************************/
598static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
599{
600    void* ptr = dt;
601    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
602    FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
603
604    DTableH->tableLog = 0;
605    DTableH->fastMode = 0;
606
607    cell->newState = 0;
608    cell->symbol = symbolValue;
609    cell->nbBits = 0;
610
611    return 0;
612}
613
614
615static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
616{
617    void* ptr = dt;
618    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
619    FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
620    const unsigned tableSize = 1 << nbBits;
621    const unsigned tableMask = tableSize - 1;
622    const unsigned maxSymbolValue = tableMask;
623    unsigned s;
624
625    /* Sanity checks */
626    if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC;             /* min size */
627
628    /* Build Decoding Table */
629    DTableH->tableLog = (U16)nbBits;
630    DTableH->fastMode = 1;
631    for (s=0; s<=maxSymbolValue; s++)
632    {
633        dinfo[s].newState = 0;
634        dinfo[s].symbol = (BYTE)s;
635        dinfo[s].nbBits = (BYTE)nbBits;
636    }
637
638    return 0;
639}
640
641
642/* FSE_initDStream
643 * Initialize a FSE_DStream_t.
644 * srcBuffer must point at the beginning of an FSE block.
645 * The function result is the size of the FSE_block (== srcSize).
646 * If srcSize is too small, the function will return an errorCode;
647 */
648static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
649{
650    if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
651
652    if (srcSize >=  sizeof(size_t))
653    {
654        U32 contain32;
655        bitD->start = (const char*)srcBuffer;
656        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
657        bitD->bitContainer = FSE_readLEST(bitD->ptr);
658        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
659        if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
660        bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
661    }
662    else
663    {
664        U32 contain32;
665        bitD->start = (const char*)srcBuffer;
666        bitD->ptr   = bitD->start;
667        bitD->bitContainer = *(const BYTE*)(bitD->start);
668        switch(srcSize)
669        {
670            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
671                    /* fallthrough */
672            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
673                    /* fallthrough */
674            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
675                    /* fallthrough */
676            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
677                    /* fallthrough */
678            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
679                    /* fallthrough */
680            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
681                    /* fallthrough */
682            default:;
683        }
684        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
685        if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
686        bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
687        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
688    }
689
690    return srcSize;
691}
692
693
694/*!FSE_lookBits
695 * Provides next n bits from the bitContainer.
696 * bitContainer is not modified (bits are still present for next read/look)
697 * On 32-bits, maxNbBits==25
698 * On 64-bits, maxNbBits==57
699 * return : value extracted.
700 */
701static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits)
702{
703    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
704    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
705}
706
707static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */
708{
709    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
710    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
711}
712
713static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits)
714{
715    bitD->bitsConsumed += nbBits;
716}
717
718
719/*!FSE_readBits
720 * Read next n bits from the bitContainer.
721 * On 32-bits, don't read more than maxNbBits==25
722 * On 64-bits, don't read more than maxNbBits==57
723 * Use the fast variant *only* if n >= 1.
724 * return : value extracted.
725 */
726static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
727{
728    size_t value = FSE_lookBits(bitD, nbBits);
729    FSE_skipBits(bitD, nbBits);
730    return value;
731}
732
733static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */
734{
735    size_t value = FSE_lookBitsFast(bitD, nbBits);
736    FSE_skipBits(bitD, nbBits);
737    return value;
738}
739
740static unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
741{
742    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
743        return FSE_DStream_tooFar;
744
745    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
746    {
747        bitD->ptr -= bitD->bitsConsumed >> 3;
748        bitD->bitsConsumed &= 7;
749        bitD->bitContainer = FSE_readLEST(bitD->ptr);
750        return FSE_DStream_unfinished;
751    }
752    if (bitD->ptr == bitD->start)
753    {
754        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer;
755        return FSE_DStream_completed;
756    }
757    {
758        U32 nbBytes = bitD->bitsConsumed >> 3;
759        U32 result = FSE_DStream_unfinished;
760        if (bitD->ptr - nbBytes < bitD->start)
761        {
762            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
763            result = FSE_DStream_endOfBuffer;
764        }
765        bitD->ptr -= nbBytes;
766        bitD->bitsConsumed -= nbBytes*8;
767        bitD->bitContainer = FSE_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
768        return result;
769    }
770}
771
772
773static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt)
774{
775    const void* ptr = dt;
776    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
777    DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog);
778    FSE_reloadDStream(bitD);
779    DStatePtr->table = dt + 1;
780}
781
782static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
783{
784    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
785    const U32  nbBits = DInfo.nbBits;
786    BYTE symbol = DInfo.symbol;
787    size_t lowBits = FSE_readBits(bitD, nbBits);
788
789    DStatePtr->state = DInfo.newState + lowBits;
790    return symbol;
791}
792
793static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
794{
795    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
796    const U32 nbBits = DInfo.nbBits;
797    BYTE symbol = DInfo.symbol;
798    size_t lowBits = FSE_readBitsFast(bitD, nbBits);
799
800    DStatePtr->state = DInfo.newState + lowBits;
801    return symbol;
802}
803
804/* FSE_endOfDStream
805   Tells if bitD has reached end of bitStream or not */
806
807static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
808{
809    return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8));
810}
811
812static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
813{
814    return DStatePtr->state == 0;
815}
816
817
818FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
819          void* dst, size_t maxDstSize,
820    const void* cSrc, size_t cSrcSize,
821    const FSE_DTable* dt, const unsigned fast)
822{
823    BYTE* const ostart = (BYTE*) dst;
824    BYTE* op = ostart;
825    BYTE* const omax = op + maxDstSize;
826    BYTE* const olimit = omax-3;
827
828    FSE_DStream_t bitD;
829    FSE_DState_t state1;
830    FSE_DState_t state2;
831    size_t errorCode;
832
833    /* Init */
834    errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
835    if (FSE_isError(errorCode)) return errorCode;
836
837    FSE_initDState(&state1, &bitD, dt);
838    FSE_initDState(&state2, &bitD, dt);
839
840#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
841
842    /* 4 symbols per loop */
843    for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4)
844    {
845        op[0] = FSE_GETSYMBOL(&state1);
846
847        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
848            FSE_reloadDStream(&bitD);
849
850        op[1] = FSE_GETSYMBOL(&state2);
851
852        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
853            { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } }
854
855        op[2] = FSE_GETSYMBOL(&state1);
856
857        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
858            FSE_reloadDStream(&bitD);
859
860        op[3] = FSE_GETSYMBOL(&state2);
861    }
862
863    /* tail */
864    /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */
865    while (1)
866    {
867        if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
868            break;
869
870        *op++ = FSE_GETSYMBOL(&state1);
871
872        if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
873            break;
874
875        *op++ = FSE_GETSYMBOL(&state2);
876    }
877
878    /* end ? */
879    if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
880        return op-ostart;
881
882    if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
883
884    return (size_t)-FSE_ERROR_corruptionDetected;
885}
886
887
888static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
889                            const void* cSrc, size_t cSrcSize,
890                            const FSE_DTable* dt)
891{
892    FSE_DTableHeader DTableH;
893    memcpy(&DTableH, dt, sizeof(DTableH));   /* memcpy() into local variable, to avoid strict aliasing warning */
894
895    /* select fast mode (static) */
896    if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
897    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
898}
899
900
901static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
902{
903    const BYTE* const istart = (const BYTE*)cSrc;
904    const BYTE* ip = istart;
905    short counting[FSE_MAX_SYMBOL_VALUE+1];
906    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
907    unsigned tableLog;
908    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
909    size_t errorCode;
910
911    if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
912
913    /* normal FSE decoding mode */
914    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
915    if (FSE_isError(errorCode)) return errorCode;
916    if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
917    ip += errorCode;
918    cSrcSize -= errorCode;
919
920    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
921    if (FSE_isError(errorCode)) return errorCode;
922
923    /* always return, even if it is an error code */
924    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
925}
926
927
928
929/* *******************************************************
930*  Huff0 : Huffman block compression
931*********************************************************/
932#define HUF_MAX_SYMBOL_VALUE 255
933#define HUF_DEFAULT_TABLELOG  12       /* used by default, when not specified */
934#define HUF_MAX_TABLELOG  12           /* max possible tableLog; for allocation purpose; can be modified */
935#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
936#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
937#  error "HUF_MAX_TABLELOG is too large !"
938#endif
939
940typedef struct HUF_CElt_s {
941  U16  val;
942  BYTE nbBits;
943} HUF_CElt ;
944
945typedef struct nodeElt_s {
946    U32 count;
947    U16 parent;
948    BYTE byte;
949    BYTE nbBits;
950} nodeElt;
951
952
953/* *******************************************************
954*  Huff0 : Huffman block decompression
955*********************************************************/
956typedef struct {
957    BYTE byte;
958    BYTE nbBits;
959} HUF_DElt;
960
961static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize)
962{
963    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
964    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];  /* large enough for values from 0 to 16 */
965    U32 weightTotal;
966    U32 maxBits;
967    const BYTE* ip = (const BYTE*) src;
968    size_t iSize;
969    size_t oSize;
970    U32 n;
971    U32 nextRankStart;
972    void* ptr = DTable+1;
973    HUF_DElt* const dt = (HUF_DElt*)ptr;
974
975    if (!srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
976    iSize = ip[0];
977
978    FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16));   /* if compilation fails here, assertion is false */
979    //memset(huffWeight, 0, sizeof(huffWeight));   /* should not be necessary, but some analyzer complain ... */
980    if (iSize >= 128)  /* special header */
981    {
982        if (iSize >= (242))   /* RLE */
983        {
984            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
985            oSize = l[iSize-242];
986            memset(huffWeight, 1, sizeof(huffWeight));
987            iSize = 0;
988        }
989        else   /* Incompressible */
990        {
991            oSize = iSize - 127;
992            iSize = ((oSize+1)/2);
993            if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
994            ip += 1;
995            for (n=0; n<oSize; n+=2)
996            {
997                huffWeight[n]   = ip[n/2] >> 4;
998                huffWeight[n+1] = ip[n/2] & 15;
999            }
1000        }
1001    }
1002    else  /* header compressed with FSE (normal case) */
1003    {
1004        if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1005        oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize);   /* max 255 values decoded, last one is implied */
1006        if (FSE_isError(oSize)) return oSize;
1007    }
1008
1009    /* collect weight stats */
1010    memset(rankVal, 0, sizeof(rankVal));
1011    weightTotal = 0;
1012    for (n=0; n<oSize; n++)
1013    {
1014        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected;
1015        rankVal[huffWeight[n]]++;
1016        weightTotal += (1 << huffWeight[n]) >> 1;
1017    }
1018    if (weightTotal == 0) return (size_t)-FSE_ERROR_corruptionDetected;
1019
1020    /* get last non-null symbol weight (implied, total must be 2^n) */
1021    maxBits = FSE_highbit32(weightTotal) + 1;
1022    if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge;   /* DTable is too small */
1023    DTable[0] = (U16)maxBits;
1024    {
1025        U32 total = 1 << maxBits;
1026        U32 rest = total - weightTotal;
1027        U32 verif = 1 << FSE_highbit32(rest);
1028        U32 lastWeight = FSE_highbit32(rest) + 1;
1029        if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected;    /* last value must be a clean power of 2 */
1030        huffWeight[oSize] = (BYTE)lastWeight;
1031        rankVal[lastWeight]++;
1032    }
1033
1034    /* check tree construction validity */
1035    if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected;   /* by construction : at least 2 elts of rank 1, must be even */
1036
1037    /* Prepare ranks */
1038    nextRankStart = 0;
1039    for (n=1; n<=maxBits; n++)
1040    {
1041        U32 current = nextRankStart;
1042        nextRankStart += (rankVal[n] << (n-1));
1043        rankVal[n] = current;
1044    }
1045
1046    /* fill DTable */
1047    for (n=0; n<=oSize; n++)
1048    {
1049        const U32 w = huffWeight[n];
1050        const U32 length = (1 << w) >> 1;
1051        U32 i;
1052        HUF_DElt D;
1053        D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w);
1054        for (i = rankVal[w]; i < rankVal[w] + length; i++)
1055            dt[i] = D;
1056        rankVal[w] += length;
1057    }
1058
1059    return iSize+1;
1060}
1061
1062
1063static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog)
1064{
1065        const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
1066        const BYTE c = dt[val].byte;
1067        FSE_skipBits(Dstream, dt[val].nbBits);
1068        return c;
1069}
1070
1071static size_t HUF_decompress_usingDTable(   /* -3% slower when non static */
1072          void* dst, size_t maxDstSize,
1073    const void* cSrc, size_t cSrcSize,
1074    const U16* DTable)
1075{
1076    if (cSrcSize < 6) return (size_t)-FSE_ERROR_srcSize_wrong;
1077    {
1078        BYTE* const ostart = (BYTE*) dst;
1079        BYTE* op = ostart;
1080        BYTE* const omax = op + maxDstSize;
1081        BYTE* const olimit = maxDstSize < 15 ? op : omax-15;
1082
1083        const void* ptr = DTable;
1084        const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
1085        const U32 dtLog = DTable[0];
1086        size_t errorCode;
1087        U32 reloadStatus;
1088
1089        /* Init */
1090
1091        const U16* jumpTable = (const U16*)cSrc;
1092        const size_t length1 = FSE_readLE16(jumpTable);
1093        const size_t length2 = FSE_readLE16(jumpTable+1);
1094        const size_t length3 = FSE_readLE16(jumpTable+2);
1095        const size_t length4 = cSrcSize - 6 - length1 - length2 - length3;   /* check coherency !! */
1096        const char* const start1 = (const char*)(cSrc) + 6;
1097        const char* const start2 = start1 + length1;
1098        const char* const start3 = start2 + length2;
1099        const char* const start4 = start3 + length3;
1100        FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
1101
1102        if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1103
1104        errorCode = FSE_initDStream(&bitD1, start1, length1);
1105        if (FSE_isError(errorCode)) return errorCode;
1106        errorCode = FSE_initDStream(&bitD2, start2, length2);
1107        if (FSE_isError(errorCode)) return errorCode;
1108        errorCode = FSE_initDStream(&bitD3, start3, length3);
1109        if (FSE_isError(errorCode)) return errorCode;
1110        errorCode = FSE_initDStream(&bitD4, start4, length4);
1111        if (FSE_isError(errorCode)) return errorCode;
1112
1113        reloadStatus=FSE_reloadDStream(&bitD2);
1114
1115        /* 16 symbols per loop */
1116        for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit);  /* D2-3-4 are supposed to be synchronized and finish together */
1117            op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
1118        {
1119    #define HUF_DECODE_SYMBOL_0(n, Dstream) \
1120            op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
1121
1122    #define HUF_DECODE_SYMBOL_1(n, Dstream) \
1123            op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1124            if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
1125
1126    #define HUF_DECODE_SYMBOL_2(n, Dstream) \
1127            op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1128            if (FSE_32bits()) FSE_reloadDStream(&Dstream)
1129
1130            HUF_DECODE_SYMBOL_1( 0, bitD1);
1131            HUF_DECODE_SYMBOL_1( 1, bitD2);
1132            HUF_DECODE_SYMBOL_1( 2, bitD3);
1133            HUF_DECODE_SYMBOL_1( 3, bitD4);
1134            HUF_DECODE_SYMBOL_2( 4, bitD1);
1135            HUF_DECODE_SYMBOL_2( 5, bitD2);
1136            HUF_DECODE_SYMBOL_2( 6, bitD3);
1137            HUF_DECODE_SYMBOL_2( 7, bitD4);
1138            HUF_DECODE_SYMBOL_1( 8, bitD1);
1139            HUF_DECODE_SYMBOL_1( 9, bitD2);
1140            HUF_DECODE_SYMBOL_1(10, bitD3);
1141            HUF_DECODE_SYMBOL_1(11, bitD4);
1142            HUF_DECODE_SYMBOL_0(12, bitD1);
1143            HUF_DECODE_SYMBOL_0(13, bitD2);
1144            HUF_DECODE_SYMBOL_0(14, bitD3);
1145            HUF_DECODE_SYMBOL_0(15, bitD4);
1146        }
1147
1148        if (reloadStatus!=FSE_DStream_completed)   /* not complete : some bitStream might be FSE_DStream_unfinished */
1149            return (size_t)-FSE_ERROR_corruptionDetected;
1150
1151        /* tail */
1152        {
1153            /* bitTail = bitD1; */   /* *much* slower : -20% !??! */
1154            FSE_DStream_t bitTail;
1155            bitTail.ptr = bitD1.ptr;
1156            bitTail.bitsConsumed = bitD1.bitsConsumed;
1157            bitTail.bitContainer = bitD1.bitContainer;   /* required in case of FSE_DStream_endOfBuffer */
1158            bitTail.start = start1;
1159            for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
1160            {
1161                HUF_DECODE_SYMBOL_0(0, bitTail);
1162            }
1163
1164            if (FSE_endOfDStream(&bitTail))
1165                return op-ostart;
1166        }
1167
1168        if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
1169
1170        return (size_t)-FSE_ERROR_corruptionDetected;
1171    }
1172}
1173
1174
1175static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
1176{
1177    HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG);
1178    const BYTE* ip = (const BYTE*) cSrc;
1179    size_t errorCode;
1180
1181    errorCode = HUF_readDTable (DTable, cSrc, cSrcSize);
1182    if (FSE_isError(errorCode)) return errorCode;
1183    if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1184    ip += errorCode;
1185    cSrcSize -= errorCode;
1186
1187    return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable);
1188}
1189
1190
1191#endif   /* FSE_COMMONDEFS_ONLY */
1192
1193/*
1194    zstd - standard compression library
1195    Copyright (C) 2014-2015, Yann Collet.
1196
1197    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
1198
1199    Redistribution and use in source and binary forms, with or without
1200    modification, are permitted provided that the following conditions are
1201    met:
1202    * Redistributions of source code must retain the above copyright
1203    notice, this list of conditions and the following disclaimer.
1204    * Redistributions in binary form must reproduce the above
1205    copyright notice, this list of conditions and the following disclaimer
1206    in the documentation and/or other materials provided with the
1207    distribution.
1208    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1209    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1210    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1211    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1212    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1213    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1214    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1215    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1216    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1217    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1218    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1219
1220    You can contact the author at :
1221    - zstd source repository : https://github.com/Cyan4973/zstd
1222    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
1223*/
1224
1225/****************************************************************
1226*  Tuning parameters
1227*****************************************************************/
1228/* MEMORY_USAGE :
1229*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
1230*  Increasing memory usage improves compression ratio
1231*  Reduced memory usage can improve speed, due to cache effect */
1232#define ZSTD_MEMORY_USAGE 17
1233
1234
1235/**************************************
1236   CPU Feature Detection
1237**************************************/
1238/*
1239 * Automated efficient unaligned memory access detection
1240 * Based on known hardware architectures
1241 * This list will be updated thanks to feedbacks
1242 */
1243#if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
1244    || defined(__ARM_FEATURE_UNALIGNED) \
1245    || defined(__i386__) || defined(__x86_64__) \
1246    || defined(_M_IX86) || defined(_M_X64) \
1247    || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
1248    || (defined(_M_ARM) && (_M_ARM >= 7))
1249#  define ZSTD_UNALIGNED_ACCESS 1
1250#else
1251#  define ZSTD_UNALIGNED_ACCESS 0
1252#endif
1253
1254
1255/********************************************************
1256*  Includes
1257*********************************************************/
1258#include <stdlib.h>      /* calloc */
1259#include <string.h>      /* memcpy, memmove */
1260#include <stdio.h>       /* debug : printf */
1261
1262
1263/********************************************************
1264*  Compiler specifics
1265*********************************************************/
1266#ifdef __AVX2__
1267#  include <immintrin.h>   /* AVX2 intrinsics */
1268#endif
1269
1270#ifdef _MSC_VER    /* Visual Studio */
1271#  include <intrin.h>                    /* For Visual 2005 */
1272#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
1273#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
1274#endif
1275
1276
1277#ifndef MEM_ACCESS_MODULE
1278#define MEM_ACCESS_MODULE
1279/********************************************************
1280*  Basic Types
1281*********************************************************/
1282#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
1283# if defined(_AIX)
1284#  include <inttypes.h>
1285# else
1286#  include <stdint.h> /* intptr_t */
1287# endif
1288typedef  uint8_t BYTE;
1289typedef uint16_t U16;
1290typedef  int16_t S16;
1291typedef uint32_t U32;
1292typedef  int32_t S32;
1293typedef uint64_t U64;
1294#else
1295typedef unsigned char       BYTE;
1296typedef unsigned short      U16;
1297typedef   signed short      S16;
1298typedef unsigned int        U32;
1299typedef   signed int        S32;
1300typedef unsigned long long  U64;
1301#endif
1302
1303#endif   /* MEM_ACCESS_MODULE */
1304
1305
1306/********************************************************
1307*  Constants
1308*********************************************************/
1309static const U32 ZSTD_magicNumber = 0xFD2FB51E;   /* 3rd version : seqNb header */
1310
1311#define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
1312#define HASH_TABLESIZE (1 << HASH_LOG)
1313#define HASH_MASK (HASH_TABLESIZE - 1)
1314
1315#define KNUTH 2654435761
1316
1317#define BIT7 128
1318#define BIT6  64
1319#define BIT5  32
1320#define BIT4  16
1321
1322#define KB *(1 <<10)
1323#define MB *(1 <<20)
1324#define GB *(1U<<30)
1325
1326#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
1327
1328#define WORKPLACESIZE (BLOCKSIZE*3)
1329#define MINMATCH 4
1330#define MLbits   7
1331#define LLbits   6
1332#define Offbits  5
1333#define MaxML  ((1<<MLbits )-1)
1334#define MaxLL  ((1<<LLbits )-1)
1335#define MaxOff ((1<<Offbits)-1)
1336#define LitFSELog  11
1337#define MLFSELog   10
1338#define LLFSELog   10
1339#define OffFSELog   9
1340#define MAX(a,b) ((a)<(b)?(b):(a))
1341#define MaxSeq MAX(MaxLL, MaxML)
1342
1343#define LITERAL_NOENTROPY 63
1344#define COMMAND_NOENTROPY 7   /* to remove */
1345
1346#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
1347
1348static const size_t ZSTD_blockHeaderSize = 3;
1349static const size_t ZSTD_frameHeaderSize = 4;
1350
1351
1352/********************************************************
1353*  Memory operations
1354*********************************************************/
1355static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; }
1356
1357static unsigned ZSTD_isLittleEndian(void)
1358{
1359    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
1360    return one.c[0];
1361}
1362
1363static U16    ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
1364
1365static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
1366
1367static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
1368
1369#define COPY8(d,s)    { ZSTD_copy8(d,s); d+=8; s+=8; }
1370
1371static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
1372{
1373    const BYTE* ip = (const BYTE*)src;
1374    BYTE* op = (BYTE*)dst;
1375    BYTE* const oend = op + length;
1376    while (op < oend) COPY8(op, ip);
1377}
1378
1379static U16 ZSTD_readLE16(const void* memPtr)
1380{
1381    if (ZSTD_isLittleEndian()) return ZSTD_read16(memPtr);
1382    else
1383    {
1384        const BYTE* p = (const BYTE*)memPtr;
1385        return (U16)((U16)p[0] + ((U16)p[1]<<8));
1386    }
1387}
1388
1389static U32 ZSTD_readLE24(const void* memPtr)
1390{
1391    return ZSTD_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
1392}
1393
1394static U32 ZSTD_readBE32(const void* memPtr)
1395{
1396    const BYTE* p = (const BYTE*)memPtr;
1397    return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0));
1398}
1399
1400
1401/**************************************
1402*  Local structures
1403***************************************/
1404typedef struct ZSTD_Cctx_s ZSTD_Cctx;
1405
1406typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
1407
1408typedef struct
1409{
1410    blockType_t blockType;
1411    U32 origSize;
1412} blockProperties_t;
1413
1414typedef struct {
1415    void* buffer;
1416    U32*  offsetStart;
1417    U32*  offset;
1418    BYTE* offCodeStart;
1419    BYTE* offCode;
1420    BYTE* litStart;
1421    BYTE* lit;
1422    BYTE* litLengthStart;
1423    BYTE* litLength;
1424    BYTE* matchLengthStart;
1425    BYTE* matchLength;
1426    BYTE* dumpsStart;
1427    BYTE* dumps;
1428} seqStore_t;
1429
1430
1431typedef struct ZSTD_Cctx_s
1432{
1433    const BYTE* base;
1434    U32 current;
1435    U32 nextUpdate;
1436    seqStore_t seqStore;
1437#ifdef __AVX2__
1438    __m256i hashTable[HASH_TABLESIZE>>3];
1439#else
1440    U32 hashTable[HASH_TABLESIZE];
1441#endif
1442    BYTE buffer[WORKPLACESIZE];
1443} cctxi_t;
1444
1445
1446
1447
1448/**************************************
1449*  Error Management
1450**************************************/
1451/* published entry point */
1452unsigned ZSTDv01_isError(size_t code) { return ERR_isError(code); }
1453
1454
1455/**************************************
1456*  Tool functions
1457**************************************/
1458#define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
1459#define ZSTD_VERSION_MINOR    1    /* for new (non-breaking) interface capabilities */
1460#define ZSTD_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */
1461#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
1462
1463/**************************************************************
1464*   Decompression code
1465**************************************************************/
1466
1467static size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
1468{
1469    const BYTE* const in = (const BYTE* const)src;
1470    BYTE headerFlags;
1471    U32 cSize;
1472
1473    if (srcSize < 3) return ERROR(srcSize_wrong);
1474
1475    headerFlags = *in;
1476    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
1477
1478    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
1479    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
1480
1481    if (bpPtr->blockType == bt_end) return 0;
1482    if (bpPtr->blockType == bt_rle) return 1;
1483    return cSize;
1484}
1485
1486
1487static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1488{
1489    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
1490    if (srcSize > 0) {
1491        memcpy(dst, src, srcSize);
1492    }
1493    return srcSize;
1494}
1495
1496
1497static size_t ZSTD_decompressLiterals(void* ctx,
1498                                      void* dst, size_t maxDstSize,
1499                                const void* src, size_t srcSize)
1500{
1501    BYTE* op = (BYTE*)dst;
1502    BYTE* const oend = op + maxDstSize;
1503    const BYTE* ip = (const BYTE*)src;
1504    size_t errorCode;
1505    size_t litSize;
1506
1507    /* check : minimum 2, for litSize, +1, for content */
1508    if (srcSize <= 3) return ERROR(corruption_detected);
1509
1510    litSize = ip[1] + (ip[0]<<8);
1511    litSize += ((ip[-3] >> 3) & 7) << 16;   /* mmmmh.... */
1512    op = oend - litSize;
1513
1514    (void)ctx;
1515    if (litSize > maxDstSize) return ERROR(dstSize_tooSmall);
1516    errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2);
1517    if (FSE_isError(errorCode)) return ERROR(GENERIC);
1518    return litSize;
1519}
1520
1521
1522static size_t ZSTDv01_decodeLiteralsBlock(void* ctx,
1523                                void* dst, size_t maxDstSize,
1524                          const BYTE** litStart, size_t* litSize,
1525                          const void* src, size_t srcSize)
1526{
1527    const BYTE* const istart = (const BYTE* const)src;
1528    const BYTE* ip = istart;
1529    BYTE* const ostart = (BYTE* const)dst;
1530    BYTE* const oend = ostart + maxDstSize;
1531    blockProperties_t litbp;
1532
1533    size_t litcSize = ZSTDv01_getcBlockSize(src, srcSize, &litbp);
1534    if (ZSTDv01_isError(litcSize)) return litcSize;
1535    if (litcSize > srcSize - ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
1536    ip += ZSTD_blockHeaderSize;
1537
1538    switch(litbp.blockType)
1539    {
1540    case bt_raw:
1541        *litStart = ip;
1542        ip += litcSize;
1543        *litSize = litcSize;
1544        break;
1545    case bt_rle:
1546        {
1547            size_t rleSize = litbp.origSize;
1548            if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall);
1549            if (!srcSize) return ERROR(srcSize_wrong);
1550            if (rleSize > 0) {
1551                memset(oend - rleSize, *ip, rleSize);
1552            }
1553            *litStart = oend - rleSize;
1554            *litSize = rleSize;
1555            ip++;
1556            break;
1557        }
1558    case bt_compressed:
1559        {
1560            size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
1561            if (ZSTDv01_isError(decodedLitSize)) return decodedLitSize;
1562            *litStart = oend - decodedLitSize;
1563            *litSize = decodedLitSize;
1564            ip += litcSize;
1565            break;
1566        }
1567    case bt_end:
1568    default:
1569        return ERROR(GENERIC);
1570    }
1571
1572    return ip-istart;
1573}
1574
1575
1576static size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
1577                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
1578                         const void* src, size_t srcSize)
1579{
1580    const BYTE* const istart = (const BYTE* const)src;
1581    const BYTE* ip = istart;
1582    const BYTE* const iend = istart + srcSize;
1583    U32 LLtype, Offtype, MLtype;
1584    U32 LLlog, Offlog, MLlog;
1585    size_t dumpsLength;
1586
1587    /* check */
1588    if (srcSize < 5) return ERROR(srcSize_wrong);
1589
1590    /* SeqHead */
1591    *nbSeq = ZSTD_readLE16(ip); ip+=2;
1592    LLtype  = *ip >> 6;
1593    Offtype = (*ip >> 4) & 3;
1594    MLtype  = (*ip >> 2) & 3;
1595    if (*ip & 2)
1596    {
1597        dumpsLength  = ip[2];
1598        dumpsLength += ip[1] << 8;
1599        ip += 3;
1600    }
1601    else
1602    {
1603        dumpsLength  = ip[1];
1604        dumpsLength += (ip[0] & 1) << 8;
1605        ip += 2;
1606    }
1607    *dumpsPtr = ip;
1608    ip += dumpsLength;
1609    *dumpsLengthPtr = dumpsLength;
1610
1611    /* check */
1612    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
1613
1614    /* sequences */
1615    {
1616        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */
1617        size_t headerSize;
1618
1619        /* Build DTables */
1620        switch(LLtype)
1621        {
1622        case bt_rle :
1623            LLlog = 0;
1624            FSE_buildDTable_rle(DTableLL, *ip++); break;
1625        case bt_raw :
1626            LLlog = LLbits;
1627            FSE_buildDTable_raw(DTableLL, LLbits); break;
1628        default :
1629            {   U32 max = MaxLL;
1630                headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
1631                if (FSE_isError(headerSize)) return ERROR(GENERIC);
1632                if (LLlog > LLFSELog) return ERROR(corruption_detected);
1633                ip += headerSize;
1634                FSE_buildDTable(DTableLL, norm, max, LLlog);
1635        }   }
1636
1637        switch(Offtype)
1638        {
1639        case bt_rle :
1640            Offlog = 0;
1641            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
1642            FSE_buildDTable_rle(DTableOffb, *ip++); break;
1643        case bt_raw :
1644            Offlog = Offbits;
1645            FSE_buildDTable_raw(DTableOffb, Offbits); break;
1646        default :
1647            {   U32 max = MaxOff;
1648                headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
1649                if (FSE_isError(headerSize)) return ERROR(GENERIC);
1650                if (Offlog > OffFSELog) return ERROR(corruption_detected);
1651                ip += headerSize;
1652                FSE_buildDTable(DTableOffb, norm, max, Offlog);
1653        }   }
1654
1655        switch(MLtype)
1656        {
1657        case bt_rle :
1658            MLlog = 0;
1659            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
1660            FSE_buildDTable_rle(DTableML, *ip++); break;
1661        case bt_raw :
1662            MLlog = MLbits;
1663            FSE_buildDTable_raw(DTableML, MLbits); break;
1664        default :
1665            {   U32 max = MaxML;
1666                headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
1667                if (FSE_isError(headerSize)) return ERROR(GENERIC);
1668                if (MLlog > MLFSELog) return ERROR(corruption_detected);
1669                ip += headerSize;
1670                FSE_buildDTable(DTableML, norm, max, MLlog);
1671    }   }   }
1672
1673    return ip-istart;
1674}
1675
1676
1677typedef struct {
1678    size_t litLength;
1679    size_t offset;
1680    size_t matchLength;
1681} seq_t;
1682
1683typedef struct {
1684    FSE_DStream_t DStream;
1685    FSE_DState_t stateLL;
1686    FSE_DState_t stateOffb;
1687    FSE_DState_t stateML;
1688    size_t prevOffset;
1689    const BYTE* dumps;
1690    const BYTE* dumpsEnd;
1691} seqState_t;
1692
1693
1694static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
1695{
1696    size_t litLength;
1697    size_t prevOffset;
1698    size_t offset;
1699    size_t matchLength;
1700    const BYTE* dumps = seqState->dumps;
1701    const BYTE* const de = seqState->dumpsEnd;
1702
1703    /* Literal length */
1704    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
1705    prevOffset = litLength ? seq->offset : seqState->prevOffset;
1706    seqState->prevOffset = seq->offset;
1707    if (litLength == MaxLL)
1708    {
1709        const U32 add = dumps<de ? *dumps++ : 0;
1710        if (add < 255) litLength += add;
1711        else
1712        {
1713            if (dumps<=(de-3))
1714            {
1715                litLength = ZSTD_readLE24(dumps);
1716                dumps += 3;
1717            }
1718        }
1719    }
1720
1721    /* Offset */
1722    {
1723        U32 offsetCode, nbBits;
1724        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));
1725        if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1726        nbBits = offsetCode - 1;
1727        if (offsetCode==0) nbBits = 0;   /* cmove */
1728        offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits);
1729        if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1730        if (offsetCode==0) offset = prevOffset;
1731    }
1732
1733    /* MatchLength */
1734    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
1735    if (matchLength == MaxML)
1736    {
1737        const U32 add = dumps<de ? *dumps++ : 0;
1738        if (add < 255) matchLength += add;
1739        else
1740        {
1741            if (dumps<=(de-3))
1742            {
1743                matchLength = ZSTD_readLE24(dumps);
1744                dumps += 3;
1745            }
1746        }
1747    }
1748    matchLength += MINMATCH;
1749
1750    /* save result */
1751    seq->litLength = litLength;
1752    seq->offset = offset;
1753    seq->matchLength = matchLength;
1754    seqState->dumps = dumps;
1755}
1756
1757
1758static size_t ZSTD_execSequence(BYTE* op,
1759                                seq_t sequence,
1760                                const BYTE** litPtr, const BYTE* const litLimit,
1761                                BYTE* const base, BYTE* const oend)
1762{
1763    static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
1764    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* subtracted */
1765    const BYTE* const ostart = op;
1766    const size_t litLength = sequence.litLength;
1767    BYTE* const endMatch = op + litLength + sequence.matchLength;    /* risk : address space overflow (32-bits) */
1768    const BYTE* const litEnd = *litPtr + litLength;
1769
1770    /* check */
1771    if (endMatch > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
1772    if (litEnd > litLimit) return ERROR(corruption_detected);
1773    if (sequence.matchLength > (size_t)(*litPtr-op))  return ERROR(dstSize_tooSmall);    /* overwrite literal segment */
1774
1775    /* copy Literals */
1776    if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8))
1777        memmove(op, *litPtr, litLength);   /* overwrite risk */
1778    else
1779        ZSTD_wildcopy(op, *litPtr, litLength);
1780    op += litLength;
1781    *litPtr = litEnd;   /* update for next sequence */
1782
1783    /* check : last match must be at a minimum distance of 8 from end of dest buffer */
1784    if (oend-op < 8) return ERROR(dstSize_tooSmall);
1785
1786    /* copy Match */
1787    {
1788        const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12);
1789        const BYTE* match = op - sequence.offset;            /* possible underflow at op - offset ? */
1790        size_t qutt = 12;
1791        U64 saved[2];
1792
1793        /* check */
1794        if (match < base) return ERROR(corruption_detected);
1795        if (sequence.offset > (size_t)base) return ERROR(corruption_detected);
1796
1797        /* save beginning of literal sequence, in case of write overlap */
1798        if (overlapRisk)
1799        {
1800            if ((endMatch + qutt) > oend) qutt = oend-endMatch;
1801            memcpy(saved, endMatch, qutt);
1802        }
1803
1804        if (sequence.offset < 8)
1805        {
1806            const int dec64 = dec64table[sequence.offset];
1807            op[0] = match[0];
1808            op[1] = match[1];
1809            op[2] = match[2];
1810            op[3] = match[3];
1811            match += dec32table[sequence.offset];
1812            ZSTD_copy4(op+4, match);
1813            match -= dec64;
1814        } else { ZSTD_copy8(op, match); }
1815        op += 8; match += 8;
1816
1817        if (endMatch > oend-(16-MINMATCH))
1818        {
1819            if (op < oend-8)
1820            {
1821                ZSTD_wildcopy(op, match, (oend-8) - op);
1822                match += (oend-8) - op;
1823                op = oend-8;
1824            }
1825            while (op<endMatch) *op++ = *match++;
1826        }
1827        else
1828            ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
1829
1830        /* restore, in case of overlap */
1831        if (overlapRisk) memcpy(endMatch, saved, qutt);
1832    }
1833
1834    return endMatch-ostart;
1835}
1836
1837typedef struct ZSTDv01_Dctx_s
1838{
1839    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
1840    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
1841    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
1842    void* previousDstEnd;
1843    void* base;
1844    size_t expected;
1845    blockType_t bType;
1846    U32 phase;
1847} dctx_t;
1848
1849
1850static size_t ZSTD_decompressSequences(
1851                               void* ctx,
1852                               void* dst, size_t maxDstSize,
1853                         const void* seqStart, size_t seqSize,
1854                         const BYTE* litStart, size_t litSize)
1855{
1856    dctx_t* dctx = (dctx_t*)ctx;
1857    const BYTE* ip = (const BYTE*)seqStart;
1858    const BYTE* const iend = ip + seqSize;
1859    BYTE* const ostart = (BYTE* const)dst;
1860    BYTE* op = ostart;
1861    BYTE* const oend = ostart + maxDstSize;
1862    size_t errorCode, dumpsLength;
1863    const BYTE* litPtr = litStart;
1864    const BYTE* const litEnd = litStart + litSize;
1865    int nbSeq;
1866    const BYTE* dumps;
1867    U32* DTableLL = dctx->LLTable;
1868    U32* DTableML = dctx->MLTable;
1869    U32* DTableOffb = dctx->OffTable;
1870    BYTE* const base = (BYTE*) (dctx->base);
1871
1872    /* Build Decoding Tables */
1873    errorCode = ZSTDv01_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
1874                                      DTableLL, DTableML, DTableOffb,
1875                                      ip, iend-ip);
1876    if (ZSTDv01_isError(errorCode)) return errorCode;
1877    ip += errorCode;
1878
1879    /* Regen sequences */
1880    {
1881        seq_t sequence;
1882        seqState_t seqState;
1883
1884        memset(&sequence, 0, sizeof(sequence));
1885        seqState.dumps = dumps;
1886        seqState.dumpsEnd = dumps + dumpsLength;
1887        seqState.prevOffset = 1;
1888        errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip);
1889        if (FSE_isError(errorCode)) return ERROR(corruption_detected);
1890        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
1891        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
1892        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
1893
1894        for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; )
1895        {
1896            size_t oneSeqSize;
1897            nbSeq--;
1898            ZSTD_decodeSequence(&sequence, &seqState);
1899            oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend);
1900            if (ZSTDv01_isError(oneSeqSize)) return oneSeqSize;
1901            op += oneSeqSize;
1902        }
1903
1904        /* check if reached exact end */
1905        if ( !FSE_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* requested too much : data is corrupted */
1906        if (nbSeq<0) return ERROR(corruption_detected);   /* requested too many sequences : data is corrupted */
1907
1908        /* last literal segment */
1909        {
1910            size_t lastLLSize = litEnd - litPtr;
1911            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
1912            if (lastLLSize > 0) {
1913                if (op != litPtr) memmove(op, litPtr, lastLLSize);
1914                op += lastLLSize;
1915            }
1916        }
1917    }
1918
1919    return op-ostart;
1920}
1921
1922
1923static size_t ZSTD_decompressBlock(
1924                            void* ctx,
1925                            void* dst, size_t maxDstSize,
1926                      const void* src, size_t srcSize)
1927{
1928    /* blockType == blockCompressed, srcSize is trusted */
1929    const BYTE* ip = (const BYTE*)src;
1930    const BYTE* litPtr = NULL;
1931    size_t litSize = 0;
1932    size_t errorCode;
1933
1934    /* Decode literals sub-block */
1935    errorCode = ZSTDv01_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize);
1936    if (ZSTDv01_isError(errorCode)) return errorCode;
1937    ip += errorCode;
1938    srcSize -= errorCode;
1939
1940    return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize);
1941}
1942
1943
1944size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1945{
1946    const BYTE* ip = (const BYTE*)src;
1947    const BYTE* iend = ip + srcSize;
1948    BYTE* const ostart = (BYTE* const)dst;
1949    BYTE* op = ostart;
1950    BYTE* const oend = ostart + maxDstSize;
1951    size_t remainingSize = srcSize;
1952    U32 magicNumber;
1953    size_t errorCode=0;
1954    blockProperties_t blockProperties;
1955
1956    /* Frame Header */
1957    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
1958    magicNumber = ZSTD_readBE32(src);
1959    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
1960    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
1961
1962    /* Loop on each block */
1963    while (1)
1964    {
1965        size_t blockSize = ZSTDv01_getcBlockSize(ip, iend-ip, &blockProperties);
1966        if (ZSTDv01_isError(blockSize)) return blockSize;
1967
1968        ip += ZSTD_blockHeaderSize;
1969        remainingSize -= ZSTD_blockHeaderSize;
1970        if (blockSize > remainingSize) return ERROR(srcSize_wrong);
1971
1972        switch(blockProperties.blockType)
1973        {
1974        case bt_compressed:
1975            errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize);
1976            break;
1977        case bt_raw :
1978            errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize);
1979            break;
1980        case bt_rle :
1981            return ERROR(GENERIC);   /* not yet supported */
1982            break;
1983        case bt_end :
1984            /* end of frame */
1985            if (remainingSize) return ERROR(srcSize_wrong);
1986            break;
1987        default:
1988            return ERROR(GENERIC);
1989        }
1990        if (blockSize == 0) break;   /* bt_end */
1991
1992        if (ZSTDv01_isError(errorCode)) return errorCode;
1993        op += errorCode;
1994        ip += blockSize;
1995        remainingSize -= blockSize;
1996    }
1997
1998    return op-ostart;
1999}
2000
2001size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
2002{
2003    dctx_t ctx;
2004    ctx.base = dst;
2005    return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
2006}
2007
2008/* ZSTD_errorFrameSizeInfoLegacy() :
2009   assumes `cSize` and `dBound` are _not_ NULL */
2010static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
2011{
2012    *cSize = ret;
2013    *dBound = ZSTD_CONTENTSIZE_ERROR;
2014}
2015
2016void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
2017{
2018    const BYTE* ip = (const BYTE*)src;
2019    size_t remainingSize = srcSize;
2020    size_t nbBlocks = 0;
2021    U32 magicNumber;
2022    blockProperties_t blockProperties;
2023
2024    /* Frame Header */
2025    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
2026        ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
2027        return;
2028    }
2029    magicNumber = ZSTD_readBE32(src);
2030    if (magicNumber != ZSTD_magicNumber) {
2031        ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
2032        return;
2033    }
2034    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
2035
2036    /* Loop on each block */
2037    while (1)
2038    {
2039        size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties);
2040        if (ZSTDv01_isError(blockSize)) {
2041            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, blockSize);
2042            return;
2043        }
2044
2045        ip += ZSTD_blockHeaderSize;
2046        remainingSize -= ZSTD_blockHeaderSize;
2047        if (blockSize > remainingSize) {
2048            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
2049            return;
2050        }
2051
2052        if (blockSize == 0) break;   /* bt_end */
2053
2054        ip += blockSize;
2055        remainingSize -= blockSize;
2056        nbBlocks++;
2057    }
2058
2059    *cSize = ip - (const BYTE*)src;
2060    *dBound = nbBlocks * BLOCKSIZE;
2061}
2062
2063/*******************************
2064*  Streaming Decompression API
2065*******************************/
2066
2067size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx)
2068{
2069    dctx->expected = ZSTD_frameHeaderSize;
2070    dctx->phase = 0;
2071    dctx->previousDstEnd = NULL;
2072    dctx->base = NULL;
2073    return 0;
2074}
2075
2076ZSTDv01_Dctx* ZSTDv01_createDCtx(void)
2077{
2078    ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx));
2079    if (dctx==NULL) return NULL;
2080    ZSTDv01_resetDCtx(dctx);
2081    return dctx;
2082}
2083
2084size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx)
2085{
2086    free(dctx);
2087    return 0;
2088}
2089
2090size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx)
2091{
2092    return ((dctx_t*)dctx)->expected;
2093}
2094
2095size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
2096{
2097    dctx_t* ctx = (dctx_t*)dctx;
2098
2099    /* Sanity check */
2100    if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
2101    if (dst != ctx->previousDstEnd)  /* not contiguous */
2102        ctx->base = dst;
2103
2104    /* Decompress : frame header */
2105    if (ctx->phase == 0)
2106    {
2107        /* Check frame magic header */
2108        U32 magicNumber = ZSTD_readBE32(src);
2109        if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
2110        ctx->phase = 1;
2111        ctx->expected = ZSTD_blockHeaderSize;
2112        return 0;
2113    }
2114
2115    /* Decompress : block header */
2116    if (ctx->phase == 1)
2117    {
2118        blockProperties_t bp;
2119        size_t blockSize = ZSTDv01_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
2120        if (ZSTDv01_isError(blockSize)) return blockSize;
2121        if (bp.blockType == bt_end)
2122        {
2123            ctx->expected = 0;
2124            ctx->phase = 0;
2125        }
2126        else
2127        {
2128            ctx->expected = blockSize;
2129            ctx->bType = bp.blockType;
2130            ctx->phase = 2;
2131        }
2132
2133        return 0;
2134    }
2135
2136    /* Decompress : block content */
2137    {
2138        size_t rSize;
2139        switch(ctx->bType)
2140        {
2141        case bt_compressed:
2142            rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
2143            break;
2144        case bt_raw :
2145            rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
2146            break;
2147        case bt_rle :
2148            return ERROR(GENERIC);   /* not yet handled */
2149            break;
2150        case bt_end :   /* should never happen (filtered at phase 1) */
2151            rSize = 0;
2152            break;
2153        default:
2154            return ERROR(GENERIC);
2155        }
2156        ctx->phase = 1;
2157        ctx->expected = ZSTD_blockHeaderSize;
2158        ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
2159        return rSize;
2160    }
2161
2162}
2163