1/* ******************************************************************
2 * huff0 huffman decoder,
3 * part of Finite State Entropy library
4 * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
5 *
6 *  You can contact the author at :
7 *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
8 *
9 * This source code is licensed under both the BSD-style license (found in the
10 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11 * in the COPYING file in the root directory of this source tree).
12 * You may select, at your option, one of the above-listed licenses.
13****************************************************************** */
14
15/* **************************************************************
16*  Dependencies
17****************************************************************/
18#include <string.h>     /* memcpy, memset */
19#include "../common/compiler.h"
20#include "../common/bitstream.h"  /* BIT_* */
21#include "../common/fse.h"        /* to compress headers */
22#define HUF_STATIC_LINKING_ONLY
23#include "../common/huf.h"
24#include "../common/error_private.h"
25
26/* **************************************************************
27*  Macros
28****************************************************************/
29
30/* These two optional macros force the use one way or another of the two
31 * Huffman decompression implementations. You can't force in both directions
32 * at the same time.
33 */
34#if defined(HUF_FORCE_DECOMPRESS_X1) && \
35    defined(HUF_FORCE_DECOMPRESS_X2)
36#error "Cannot force the use of the X1 and X2 decoders at the same time!"
37#endif
38
39
40/* **************************************************************
41*  Error Management
42****************************************************************/
43#define HUF_isError ERR_isError
44
45
46/* **************************************************************
47*  Byte alignment for workSpace management
48****************************************************************/
49#define HUF_ALIGN(x, a)         HUF_ALIGN_MASK((x), (a) - 1)
50#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
51
52
53/* **************************************************************
54*  BMI2 Variant Wrappers
55****************************************************************/
56#if DYNAMIC_BMI2
57
58#define HUF_DGEN(fn)                                                        \
59                                                                            \
60    static size_t fn##_default(                                             \
61                  void* dst,  size_t dstSize,                               \
62            const void* cSrc, size_t cSrcSize,                              \
63            const HUF_DTable* DTable)                                       \
64    {                                                                       \
65        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
66    }                                                                       \
67                                                                            \
68    static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2(                       \
69                  void* dst,  size_t dstSize,                               \
70            const void* cSrc, size_t cSrcSize,                              \
71            const HUF_DTable* DTable)                                       \
72    {                                                                       \
73        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
74    }                                                                       \
75                                                                            \
76    static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
77                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
78    {                                                                       \
79        if (bmi2) {                                                         \
80            return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);         \
81        }                                                                   \
82        return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable);          \
83    }
84
85#else
86
87#define HUF_DGEN(fn)                                                        \
88    static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
89                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
90    {                                                                       \
91        (void)bmi2;                                                         \
92        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
93    }
94
95#endif
96
97
98/*-***************************/
99/*  generic DTableDesc       */
100/*-***************************/
101typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
102
103static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
104{
105    DTableDesc dtd;
106    memcpy(&dtd, table, sizeof(dtd));
107    return dtd;
108}
109
110
111#ifndef HUF_FORCE_DECOMPRESS_X2
112
113/*-***************************/
114/*  single-symbol decoding   */
115/*-***************************/
116typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1;   /* single-symbol decoding */
117
118size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
119{
120    U32 tableLog = 0;
121    U32 nbSymbols = 0;
122    size_t iSize;
123    void* const dtPtr = DTable + 1;
124    HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
125
126    U32* rankVal;
127    BYTE* huffWeight;
128    size_t spaceUsed32 = 0;
129
130    rankVal = (U32 *)workSpace + spaceUsed32;
131    spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
132    huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
133    spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
134
135    if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
136
137    DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
138    /* memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
139
140    iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
141    if (HUF_isError(iSize)) return iSize;
142
143    /* Table header */
144    {   DTableDesc dtd = HUF_getDTableDesc(DTable);
145        if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, Huffman tree cannot fit in */
146        dtd.tableType = 0;
147        dtd.tableLog = (BYTE)tableLog;
148        memcpy(DTable, &dtd, sizeof(dtd));
149    }
150
151    /* Calculate starting value for each rank */
152    {   U32 n, nextRankStart = 0;
153        for (n=1; n<tableLog+1; n++) {
154            U32 const current = nextRankStart;
155            nextRankStart += (rankVal[n] << (n-1));
156            rankVal[n] = current;
157    }   }
158
159    /* fill DTable */
160    {   U32 n;
161        size_t const nEnd = nbSymbols;
162        for (n=0; n<nEnd; n++) {
163            size_t const w = huffWeight[n];
164            size_t const length = (1 << w) >> 1;
165            size_t const uStart = rankVal[w];
166            size_t const uEnd = uStart + length;
167            size_t u;
168            HUF_DEltX1 D;
169            D.byte = (BYTE)n;
170            D.nbBits = (BYTE)(tableLog + 1 - w);
171            rankVal[w] = (U32)uEnd;
172            if (length < 4) {
173                /* Use length in the loop bound so the compiler knows it is short. */
174                for (u = 0; u < length; ++u)
175                    dt[uStart + u] = D;
176            } else {
177                /* Unroll the loop 4 times, we know it is a power of 2. */
178                for (u = uStart; u < uEnd; u += 4) {
179                    dt[u + 0] = D;
180                    dt[u + 1] = D;
181                    dt[u + 2] = D;
182                    dt[u + 3] = D;
183    }   }   }   }
184    return iSize;
185}
186
187size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
188{
189    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
190    return HUF_readDTableX1_wksp(DTable, src, srcSize,
191                                 workSpace, sizeof(workSpace));
192}
193
194FORCE_INLINE_TEMPLATE BYTE
195HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
196{
197    size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
198    BYTE const c = dt[val].byte;
199    BIT_skipBits(Dstream, dt[val].nbBits);
200    return c;
201}
202
203#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
204    *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
205
206#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)  \
207    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
208        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
209
210#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
211    if (MEM_64bits()) \
212        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
213
214HINT_INLINE size_t
215HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
216{
217    BYTE* const pStart = p;
218
219    /* up to 4 symbols at a time */
220    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
221        HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
222        HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
223        HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
224        HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
225    }
226
227    /* [0-3] symbols remaining */
228    if (MEM_32bits())
229        while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
230            HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
231
232    /* no more data to retrieve from bitstream, no need to reload */
233    while (p < pEnd)
234        HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
235
236    return pEnd-pStart;
237}
238
239FORCE_INLINE_TEMPLATE size_t
240HUF_decompress1X1_usingDTable_internal_body(
241          void* dst,  size_t dstSize,
242    const void* cSrc, size_t cSrcSize,
243    const HUF_DTable* DTable)
244{
245    BYTE* op = (BYTE*)dst;
246    BYTE* const oend = op + dstSize;
247    const void* dtPtr = DTable + 1;
248    const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
249    BIT_DStream_t bitD;
250    DTableDesc const dtd = HUF_getDTableDesc(DTable);
251    U32 const dtLog = dtd.tableLog;
252
253    CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
254
255    HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
256
257    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
258
259    return dstSize;
260}
261
262FORCE_INLINE_TEMPLATE size_t
263HUF_decompress4X1_usingDTable_internal_body(
264          void* dst,  size_t dstSize,
265    const void* cSrc, size_t cSrcSize,
266    const HUF_DTable* DTable)
267{
268    /* Check */
269    if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
270
271    {   const BYTE* const istart = (const BYTE*) cSrc;
272        BYTE* const ostart = (BYTE*) dst;
273        BYTE* const oend = ostart + dstSize;
274        BYTE* const olimit = oend - 3;
275        const void* const dtPtr = DTable + 1;
276        const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
277
278        /* Init */
279        BIT_DStream_t bitD1;
280        BIT_DStream_t bitD2;
281        BIT_DStream_t bitD3;
282        BIT_DStream_t bitD4;
283        size_t const length1 = MEM_readLE16(istart);
284        size_t const length2 = MEM_readLE16(istart+2);
285        size_t const length3 = MEM_readLE16(istart+4);
286        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
287        const BYTE* const istart1 = istart + 6;  /* jumpTable */
288        const BYTE* const istart2 = istart1 + length1;
289        const BYTE* const istart3 = istart2 + length2;
290        const BYTE* const istart4 = istart3 + length3;
291        const size_t segmentSize = (dstSize+3) / 4;
292        BYTE* const opStart2 = ostart + segmentSize;
293        BYTE* const opStart3 = opStart2 + segmentSize;
294        BYTE* const opStart4 = opStart3 + segmentSize;
295        BYTE* op1 = ostart;
296        BYTE* op2 = opStart2;
297        BYTE* op3 = opStart3;
298        BYTE* op4 = opStart4;
299        DTableDesc const dtd = HUF_getDTableDesc(DTable);
300        U32 const dtLog = dtd.tableLog;
301        U32 endSignal = 1;
302
303        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
304        CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
305        CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
306        CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
307        CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
308
309        /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
310        for ( ; (endSignal) & (op4 < olimit) ; ) {
311            HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
312            HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
313            HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
314            HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
315            HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
316            HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
317            HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
318            HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
319            HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
320            HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
321            HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
322            HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
323            HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
324            HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
325            HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
326            HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
327            endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
328            endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
329            endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
330            endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
331        }
332
333        /* check corruption */
334        /* note : should not be necessary : op# advance in lock step, and we control op4.
335         *        but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
336        if (op1 > opStart2) return ERROR(corruption_detected);
337        if (op2 > opStart3) return ERROR(corruption_detected);
338        if (op3 > opStart4) return ERROR(corruption_detected);
339        /* note : op4 supposed already verified within main loop */
340
341        /* finish bitStreams one by one */
342        HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
343        HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
344        HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
345        HUF_decodeStreamX1(op4, &bitD4, oend,     dt, dtLog);
346
347        /* check */
348        { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
349          if (!endCheck) return ERROR(corruption_detected); }
350
351        /* decoded size */
352        return dstSize;
353    }
354}
355
356
357typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
358                                               const void *cSrc,
359                                               size_t cSrcSize,
360                                               const HUF_DTable *DTable);
361
362HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
363HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
364
365
366
367size_t HUF_decompress1X1_usingDTable(
368          void* dst,  size_t dstSize,
369    const void* cSrc, size_t cSrcSize,
370    const HUF_DTable* DTable)
371{
372    DTableDesc dtd = HUF_getDTableDesc(DTable);
373    if (dtd.tableType != 0) return ERROR(GENERIC);
374    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
375}
376
377size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
378                                   const void* cSrc, size_t cSrcSize,
379                                   void* workSpace, size_t wkspSize)
380{
381    const BYTE* ip = (const BYTE*) cSrc;
382
383    size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
384    if (HUF_isError(hSize)) return hSize;
385    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
386    ip += hSize; cSrcSize -= hSize;
387
388    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
389}
390
391
392size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
393                              const void* cSrc, size_t cSrcSize)
394{
395    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
396    return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
397                                       workSpace, sizeof(workSpace));
398}
399
400size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
401{
402    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
403    return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
404}
405
406size_t HUF_decompress4X1_usingDTable(
407          void* dst,  size_t dstSize,
408    const void* cSrc, size_t cSrcSize,
409    const HUF_DTable* DTable)
410{
411    DTableDesc dtd = HUF_getDTableDesc(DTable);
412    if (dtd.tableType != 0) return ERROR(GENERIC);
413    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
414}
415
416static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
417                                   const void* cSrc, size_t cSrcSize,
418                                   void* workSpace, size_t wkspSize, int bmi2)
419{
420    const BYTE* ip = (const BYTE*) cSrc;
421
422    size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
423                                                workSpace, wkspSize);
424    if (HUF_isError(hSize)) return hSize;
425    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
426    ip += hSize; cSrcSize -= hSize;
427
428    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
429}
430
431size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
432                                   const void* cSrc, size_t cSrcSize,
433                                   void* workSpace, size_t wkspSize)
434{
435    return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
436}
437
438
439size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
440{
441    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
442    return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
443                                       workSpace, sizeof(workSpace));
444}
445size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
446{
447    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
448    return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
449}
450
451#endif /* HUF_FORCE_DECOMPRESS_X2 */
452
453
454#ifndef HUF_FORCE_DECOMPRESS_X1
455
456/* *************************/
457/* double-symbols decoding */
458/* *************************/
459
460typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2;  /* double-symbols decoding */
461typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
462typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
463typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
464
465
466/* HUF_fillDTableX2Level2() :
467 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
468static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
469                           const U32* rankValOrigin, const int minWeight,
470                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
471                           U32 nbBitsBaseline, U16 baseSeq)
472{
473    HUF_DEltX2 DElt;
474    U32 rankVal[HUF_TABLELOG_MAX + 1];
475
476    /* get pre-calculated rankVal */
477    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
478
479    /* fill skipped values */
480    if (minWeight>1) {
481        U32 i, skipSize = rankVal[minWeight];
482        MEM_writeLE16(&(DElt.sequence), baseSeq);
483        DElt.nbBits   = (BYTE)(consumed);
484        DElt.length   = 1;
485        for (i = 0; i < skipSize; i++)
486            DTable[i] = DElt;
487    }
488
489    /* fill DTable */
490    {   U32 s; for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
491            const U32 symbol = sortedSymbols[s].symbol;
492            const U32 weight = sortedSymbols[s].weight;
493            const U32 nbBits = nbBitsBaseline - weight;
494            const U32 length = 1 << (sizeLog-nbBits);
495            const U32 start = rankVal[weight];
496            U32 i = start;
497            const U32 end = start + length;
498
499            MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
500            DElt.nbBits = (BYTE)(nbBits + consumed);
501            DElt.length = 2;
502            do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
503
504            rankVal[weight] += length;
505    }   }
506}
507
508
509static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
510                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
511                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
512                           const U32 nbBitsBaseline)
513{
514    U32 rankVal[HUF_TABLELOG_MAX + 1];
515    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
516    const U32 minBits  = nbBitsBaseline - maxWeight;
517    U32 s;
518
519    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
520
521    /* fill DTable */
522    for (s=0; s<sortedListSize; s++) {
523        const U16 symbol = sortedList[s].symbol;
524        const U32 weight = sortedList[s].weight;
525        const U32 nbBits = nbBitsBaseline - weight;
526        const U32 start = rankVal[weight];
527        const U32 length = 1 << (targetLog-nbBits);
528
529        if (targetLog-nbBits >= minBits) {   /* enough room for a second symbol */
530            U32 sortedRank;
531            int minWeight = nbBits + scaleLog;
532            if (minWeight < 1) minWeight = 1;
533            sortedRank = rankStart[minWeight];
534            HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
535                           rankValOrigin[nbBits], minWeight,
536                           sortedList+sortedRank, sortedListSize-sortedRank,
537                           nbBitsBaseline, symbol);
538        } else {
539            HUF_DEltX2 DElt;
540            MEM_writeLE16(&(DElt.sequence), symbol);
541            DElt.nbBits = (BYTE)(nbBits);
542            DElt.length = 1;
543            {   U32 const end = start + length;
544                U32 u;
545                for (u = start; u < end; u++) DTable[u] = DElt;
546        }   }
547        rankVal[weight] += length;
548    }
549}
550
551size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
552                       const void* src, size_t srcSize,
553                             void* workSpace, size_t wkspSize)
554{
555    U32 tableLog, maxW, sizeOfSort, nbSymbols;
556    DTableDesc dtd = HUF_getDTableDesc(DTable);
557    U32 const maxTableLog = dtd.maxTableLog;
558    size_t iSize;
559    void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
560    HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
561    U32 *rankStart;
562
563    rankValCol_t* rankVal;
564    U32* rankStats;
565    U32* rankStart0;
566    sortedSymbol_t* sortedSymbol;
567    BYTE* weightList;
568    size_t spaceUsed32 = 0;
569
570    rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
571    spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
572    rankStats = (U32 *)workSpace + spaceUsed32;
573    spaceUsed32 += HUF_TABLELOG_MAX + 1;
574    rankStart0 = (U32 *)workSpace + spaceUsed32;
575    spaceUsed32 += HUF_TABLELOG_MAX + 2;
576    sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
577    spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
578    weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
579    spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
580
581    if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
582
583    rankStart = rankStart0 + 1;
584    memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
585
586    DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
587    if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
588    /* memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
589
590    iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
591    if (HUF_isError(iSize)) return iSize;
592
593    /* check result */
594    if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
595
596    /* find maxWeight */
597    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
598
599    /* Get start index of each weight */
600    {   U32 w, nextRankStart = 0;
601        for (w=1; w<maxW+1; w++) {
602            U32 current = nextRankStart;
603            nextRankStart += rankStats[w];
604            rankStart[w] = current;
605        }
606        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
607        sizeOfSort = nextRankStart;
608    }
609
610    /* sort symbols by weight */
611    {   U32 s;
612        for (s=0; s<nbSymbols; s++) {
613            U32 const w = weightList[s];
614            U32 const r = rankStart[w]++;
615            sortedSymbol[r].symbol = (BYTE)s;
616            sortedSymbol[r].weight = (BYTE)w;
617        }
618        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
619    }
620
621    /* Build rankVal */
622    {   U32* const rankVal0 = rankVal[0];
623        {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
624            U32 nextRankVal = 0;
625            U32 w;
626            for (w=1; w<maxW+1; w++) {
627                U32 current = nextRankVal;
628                nextRankVal += rankStats[w] << (w+rescale);
629                rankVal0[w] = current;
630        }   }
631        {   U32 const minBits = tableLog+1 - maxW;
632            U32 consumed;
633            for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
634                U32* const rankValPtr = rankVal[consumed];
635                U32 w;
636                for (w = 1; w < maxW+1; w++) {
637                    rankValPtr[w] = rankVal0[w] >> consumed;
638    }   }   }   }
639
640    HUF_fillDTableX2(dt, maxTableLog,
641                   sortedSymbol, sizeOfSort,
642                   rankStart0, rankVal, maxW,
643                   tableLog+1);
644
645    dtd.tableLog = (BYTE)maxTableLog;
646    dtd.tableType = 1;
647    memcpy(DTable, &dtd, sizeof(dtd));
648    return iSize;
649}
650
651size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
652{
653  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
654  return HUF_readDTableX2_wksp(DTable, src, srcSize,
655                               workSpace, sizeof(workSpace));
656}
657
658
659FORCE_INLINE_TEMPLATE U32
660HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
661{
662    size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
663    memcpy(op, dt+val, 2);
664    BIT_skipBits(DStream, dt[val].nbBits);
665    return dt[val].length;
666}
667
668FORCE_INLINE_TEMPLATE U32
669HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
670{
671    size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
672    memcpy(op, dt+val, 1);
673    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
674    else {
675        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
676            BIT_skipBits(DStream, dt[val].nbBits);
677            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
678                /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
679                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
680    }   }
681    return 1;
682}
683
684#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
685    ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
686
687#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
688    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
689        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
690
691#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
692    if (MEM_64bits()) \
693        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
694
695HINT_INLINE size_t
696HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
697                const HUF_DEltX2* const dt, const U32 dtLog)
698{
699    BYTE* const pStart = p;
700
701    /* up to 8 symbols at a time */
702    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
703        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
704        HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
705        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
706        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
707    }
708
709    /* closer to end : up to 2 symbols at a time */
710    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
711        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
712
713    while (p <= pEnd-2)
714        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
715
716    if (p < pEnd)
717        p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
718
719    return p-pStart;
720}
721
722FORCE_INLINE_TEMPLATE size_t
723HUF_decompress1X2_usingDTable_internal_body(
724          void* dst,  size_t dstSize,
725    const void* cSrc, size_t cSrcSize,
726    const HUF_DTable* DTable)
727{
728    BIT_DStream_t bitD;
729
730    /* Init */
731    CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
732
733    /* decode */
734    {   BYTE* const ostart = (BYTE*) dst;
735        BYTE* const oend = ostart + dstSize;
736        const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
737        const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
738        DTableDesc const dtd = HUF_getDTableDesc(DTable);
739        HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
740    }
741
742    /* check */
743    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
744
745    /* decoded size */
746    return dstSize;
747}
748
749FORCE_INLINE_TEMPLATE size_t
750HUF_decompress4X2_usingDTable_internal_body(
751          void* dst,  size_t dstSize,
752    const void* cSrc, size_t cSrcSize,
753    const HUF_DTable* DTable)
754{
755    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
756
757    {   const BYTE* const istart = (const BYTE*) cSrc;
758        BYTE* const ostart = (BYTE*) dst;
759        BYTE* const oend = ostart + dstSize;
760        BYTE* const olimit = oend - (sizeof(size_t)-1);
761        const void* const dtPtr = DTable+1;
762        const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
763
764        /* Init */
765        BIT_DStream_t bitD1;
766        BIT_DStream_t bitD2;
767        BIT_DStream_t bitD3;
768        BIT_DStream_t bitD4;
769        size_t const length1 = MEM_readLE16(istart);
770        size_t const length2 = MEM_readLE16(istart+2);
771        size_t const length3 = MEM_readLE16(istart+4);
772        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
773        const BYTE* const istart1 = istart + 6;  /* jumpTable */
774        const BYTE* const istart2 = istart1 + length1;
775        const BYTE* const istart3 = istart2 + length2;
776        const BYTE* const istart4 = istart3 + length3;
777        size_t const segmentSize = (dstSize+3) / 4;
778        BYTE* const opStart2 = ostart + segmentSize;
779        BYTE* const opStart3 = opStart2 + segmentSize;
780        BYTE* const opStart4 = opStart3 + segmentSize;
781        BYTE* op1 = ostart;
782        BYTE* op2 = opStart2;
783        BYTE* op3 = opStart3;
784        BYTE* op4 = opStart4;
785        U32 endSignal = 1;
786        DTableDesc const dtd = HUF_getDTableDesc(DTable);
787        U32 const dtLog = dtd.tableLog;
788
789        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
790        CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
791        CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
792        CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
793        CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
794
795        /* 16-32 symbols per loop (4-8 symbols per stream) */
796        for ( ; (endSignal) & (op4 < olimit); ) {
797#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
798            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
799            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
800            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
801            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
802            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
803            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
804            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
805            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
806            endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
807            endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
808            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
809            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
810            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
811            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
812            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
813            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
814            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
815            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
816            endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
817            endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
818#else
819            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
820            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
821            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
822            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
823            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
824            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
825            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
826            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
827            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
828            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
829            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
830            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
831            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
832            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
833            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
834            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
835            endSignal = (U32)LIKELY(
836                        (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
837                      & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
838                      & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
839                      & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
840#endif
841        }
842
843        /* check corruption */
844        if (op1 > opStart2) return ERROR(corruption_detected);
845        if (op2 > opStart3) return ERROR(corruption_detected);
846        if (op3 > opStart4) return ERROR(corruption_detected);
847        /* note : op4 already verified within main loop */
848
849        /* finish bitStreams one by one */
850        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
851        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
852        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
853        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
854
855        /* check */
856        { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
857          if (!endCheck) return ERROR(corruption_detected); }
858
859        /* decoded size */
860        return dstSize;
861    }
862}
863
864HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
865HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
866
867size_t HUF_decompress1X2_usingDTable(
868          void* dst,  size_t dstSize,
869    const void* cSrc, size_t cSrcSize,
870    const HUF_DTable* DTable)
871{
872    DTableDesc dtd = HUF_getDTableDesc(DTable);
873    if (dtd.tableType != 1) return ERROR(GENERIC);
874    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
875}
876
877size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
878                                   const void* cSrc, size_t cSrcSize,
879                                   void* workSpace, size_t wkspSize)
880{
881    const BYTE* ip = (const BYTE*) cSrc;
882
883    size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
884                                               workSpace, wkspSize);
885    if (HUF_isError(hSize)) return hSize;
886    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
887    ip += hSize; cSrcSize -= hSize;
888
889    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
890}
891
892
893size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
894                              const void* cSrc, size_t cSrcSize)
895{
896    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
897    return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
898                                       workSpace, sizeof(workSpace));
899}
900
901size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
902{
903    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
904    return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
905}
906
907size_t HUF_decompress4X2_usingDTable(
908          void* dst,  size_t dstSize,
909    const void* cSrc, size_t cSrcSize,
910    const HUF_DTable* DTable)
911{
912    DTableDesc dtd = HUF_getDTableDesc(DTable);
913    if (dtd.tableType != 1) return ERROR(GENERIC);
914    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
915}
916
917static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
918                                   const void* cSrc, size_t cSrcSize,
919                                   void* workSpace, size_t wkspSize, int bmi2)
920{
921    const BYTE* ip = (const BYTE*) cSrc;
922
923    size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
924                                         workSpace, wkspSize);
925    if (HUF_isError(hSize)) return hSize;
926    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
927    ip += hSize; cSrcSize -= hSize;
928
929    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
930}
931
932size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
933                                   const void* cSrc, size_t cSrcSize,
934                                   void* workSpace, size_t wkspSize)
935{
936    return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
937}
938
939
940size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
941                              const void* cSrc, size_t cSrcSize)
942{
943    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
944    return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
945                                       workSpace, sizeof(workSpace));
946}
947
948size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
949{
950    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
951    return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
952}
953
954#endif /* HUF_FORCE_DECOMPRESS_X1 */
955
956
957/* ***********************************/
958/* Universal decompression selectors */
959/* ***********************************/
960
961size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
962                                    const void* cSrc, size_t cSrcSize,
963                                    const HUF_DTable* DTable)
964{
965    DTableDesc const dtd = HUF_getDTableDesc(DTable);
966#if defined(HUF_FORCE_DECOMPRESS_X1)
967    (void)dtd;
968    assert(dtd.tableType == 0);
969    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
970#elif defined(HUF_FORCE_DECOMPRESS_X2)
971    (void)dtd;
972    assert(dtd.tableType == 1);
973    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
974#else
975    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
976                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
977#endif
978}
979
980size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
981                                    const void* cSrc, size_t cSrcSize,
982                                    const HUF_DTable* DTable)
983{
984    DTableDesc const dtd = HUF_getDTableDesc(DTable);
985#if defined(HUF_FORCE_DECOMPRESS_X1)
986    (void)dtd;
987    assert(dtd.tableType == 0);
988    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
989#elif defined(HUF_FORCE_DECOMPRESS_X2)
990    (void)dtd;
991    assert(dtd.tableType == 1);
992    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
993#else
994    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
995                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
996#endif
997}
998
999
1000#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1001typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
1002static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
1003{
1004    /* single, double, quad */
1005    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
1006    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
1007    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
1008    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
1009    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
1010    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
1011    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
1012    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
1013    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
1014    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
1015    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
1016    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
1017    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
1018    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
1019    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
1020    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
1021};
1022#endif
1023
1024/** HUF_selectDecoder() :
1025 *  Tells which decoder is likely to decode faster,
1026 *  based on a set of pre-computed metrics.
1027 * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
1028 *  Assumption : 0 < dstSize <= 128 KB */
1029U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
1030{
1031    assert(dstSize > 0);
1032    assert(dstSize <= 128*1024);
1033#if defined(HUF_FORCE_DECOMPRESS_X1)
1034    (void)dstSize;
1035    (void)cSrcSize;
1036    return 0;
1037#elif defined(HUF_FORCE_DECOMPRESS_X2)
1038    (void)dstSize;
1039    (void)cSrcSize;
1040    return 1;
1041#else
1042    /* decoder timing evaluation */
1043    {   U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 */
1044        U32 const D256 = (U32)(dstSize >> 8);
1045        U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
1046        U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
1047        DTime1 += DTime1 >> 3;  /* advantage to algorithm using less memory, to reduce cache eviction */
1048        return DTime1 < DTime0;
1049    }
1050#endif
1051}
1052
1053
1054typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
1055
1056size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1057{
1058#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1059    static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1060#endif
1061
1062    /* validation checks */
1063    if (dstSize == 0) return ERROR(dstSize_tooSmall);
1064    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
1065    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
1066    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
1067
1068    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1069#if defined(HUF_FORCE_DECOMPRESS_X1)
1070        (void)algoNb;
1071        assert(algoNb == 0);
1072        return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
1073#elif defined(HUF_FORCE_DECOMPRESS_X2)
1074        (void)algoNb;
1075        assert(algoNb == 1);
1076        return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
1077#else
1078        return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1079#endif
1080    }
1081}
1082
1083size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1084{
1085    /* validation checks */
1086    if (dstSize == 0) return ERROR(dstSize_tooSmall);
1087    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
1088    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
1089    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
1090
1091    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1092#if defined(HUF_FORCE_DECOMPRESS_X1)
1093        (void)algoNb;
1094        assert(algoNb == 0);
1095        return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1096#elif defined(HUF_FORCE_DECOMPRESS_X2)
1097        (void)algoNb;
1098        assert(algoNb == 1);
1099        return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1100#else
1101        return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1102                        HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1103#endif
1104    }
1105}
1106
1107size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1108{
1109    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1110    return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1111                                         workSpace, sizeof(workSpace));
1112}
1113
1114
1115size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1116                                     size_t dstSize, const void* cSrc,
1117                                     size_t cSrcSize, void* workSpace,
1118                                     size_t wkspSize)
1119{
1120    /* validation checks */
1121    if (dstSize == 0) return ERROR(dstSize_tooSmall);
1122    if (cSrcSize == 0) return ERROR(corruption_detected);
1123
1124    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1125#if defined(HUF_FORCE_DECOMPRESS_X1)
1126        (void)algoNb;
1127        assert(algoNb == 0);
1128        return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1129#elif defined(HUF_FORCE_DECOMPRESS_X2)
1130        (void)algoNb;
1131        assert(algoNb == 1);
1132        return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1133#else
1134        return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1135                            cSrcSize, workSpace, wkspSize):
1136                        HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1137#endif
1138    }
1139}
1140
1141size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1142                                  const void* cSrc, size_t cSrcSize,
1143                                  void* workSpace, size_t wkspSize)
1144{
1145    /* validation checks */
1146    if (dstSize == 0) return ERROR(dstSize_tooSmall);
1147    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
1148    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
1149    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
1150
1151    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1152#if defined(HUF_FORCE_DECOMPRESS_X1)
1153        (void)algoNb;
1154        assert(algoNb == 0);
1155        return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1156                                cSrcSize, workSpace, wkspSize);
1157#elif defined(HUF_FORCE_DECOMPRESS_X2)
1158        (void)algoNb;
1159        assert(algoNb == 1);
1160        return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1161                                cSrcSize, workSpace, wkspSize);
1162#else
1163        return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1164                                cSrcSize, workSpace, wkspSize):
1165                        HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1166                                cSrcSize, workSpace, wkspSize);
1167#endif
1168    }
1169}
1170
1171size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1172                             const void* cSrc, size_t cSrcSize)
1173{
1174    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1175    return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1176                                      workSpace, sizeof(workSpace));
1177}
1178
1179
1180size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1181{
1182    DTableDesc const dtd = HUF_getDTableDesc(DTable);
1183#if defined(HUF_FORCE_DECOMPRESS_X1)
1184    (void)dtd;
1185    assert(dtd.tableType == 0);
1186    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1187#elif defined(HUF_FORCE_DECOMPRESS_X2)
1188    (void)dtd;
1189    assert(dtd.tableType == 1);
1190    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1191#else
1192    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1193                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1194#endif
1195}
1196
1197#ifndef HUF_FORCE_DECOMPRESS_X2
1198size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1199{
1200    const BYTE* ip = (const BYTE*) cSrc;
1201
1202    size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1203    if (HUF_isError(hSize)) return hSize;
1204    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1205    ip += hSize; cSrcSize -= hSize;
1206
1207    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1208}
1209#endif
1210
1211size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1212{
1213    DTableDesc const dtd = HUF_getDTableDesc(DTable);
1214#if defined(HUF_FORCE_DECOMPRESS_X1)
1215    (void)dtd;
1216    assert(dtd.tableType == 0);
1217    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1218#elif defined(HUF_FORCE_DECOMPRESS_X2)
1219    (void)dtd;
1220    assert(dtd.tableType == 1);
1221    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1222#else
1223    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1224                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1225#endif
1226}
1227
1228size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1229{
1230    /* validation checks */
1231    if (dstSize == 0) return ERROR(dstSize_tooSmall);
1232    if (cSrcSize == 0) return ERROR(corruption_detected);
1233
1234    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1235#if defined(HUF_FORCE_DECOMPRESS_X1)
1236        (void)algoNb;
1237        assert(algoNb == 0);
1238        return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1239#elif defined(HUF_FORCE_DECOMPRESS_X2)
1240        (void)algoNb;
1241        assert(algoNb == 1);
1242        return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1243#else
1244        return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1245                        HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1246#endif
1247    }
1248}
1249