117651Speter/* inftrees.c -- generate Huffman trees for efficient decoding
2313796Sdelphij * Copyright (C) 1995-2017 Mark Adler
3131377Stjr * For conditions of distribution and use, see copyright notice in zlib.h
417651Speter */
517651Speter
617651Speter#include "zutil.h"
717651Speter#include "inftrees.h"
817651Speter
9131377Stjr#define MAXBITS 15
1042468Speter
1133904Ssteveconst char inflate_copyright[] =
12313796Sdelphij   " inflate 1.2.11 Copyright 1995-2017 Mark Adler ";
1317651Speter/*
1417651Speter  If you use the zlib library in a product, an acknowledgment is welcome
1517651Speter  in the documentation of your product. If for some reason you cannot
1617651Speter  include such an acknowledgment, I would appreciate that you keep this
1717651Speter  copyright string in the executable of your product.
1817651Speter */
1917651Speter
20131377Stjr/*
21131377Stjr   Build a set of tables to decode the provided canonical Huffman code.
22131377Stjr   The code lengths are lens[0..codes-1].  The result starts at *table,
23131377Stjr   whose indices are 0..2^bits-1.  work is a writable array of at least
24131377Stjr   lens shorts, which is used as a work area.  type is the type of code
25131377Stjr   to be generated, CODES, LENS, or DISTS.  On return, zero is success,
26131377Stjr   -1 is an invalid code, and +1 means that ENOUGH isn't enough.  table
27131377Stjr   on return points to the next available entry's address.  bits is the
28131377Stjr   requested root table index bits, and on return it is the actual root
29131377Stjr   table index bits.  It will differ if the request is greater than the
30131377Stjr   longest code or if it is less than the shortest code.
31131377Stjr */
32206905Sdelphijint ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work)
33131377Stjrcodetype type;
34131377Stjrunsigned short FAR *lens;
35131377Stjrunsigned codes;
36131377Stjrcode FAR * FAR *table;
37131377Stjrunsigned FAR *bits;
38131377Stjrunsigned short FAR *work;
39131377Stjr{
40131377Stjr    unsigned len;               /* a code's length in bits */
41131377Stjr    unsigned sym;               /* index of code symbols */
42131377Stjr    unsigned min, max;          /* minimum and maximum code lengths */
43131377Stjr    unsigned root;              /* number of index bits for root table */
44131377Stjr    unsigned curr;              /* number of index bits for current table */
45131377Stjr    unsigned drop;              /* code bits to drop for sub-table */
46131377Stjr    int left;                   /* number of prefix codes available */
47131377Stjr    unsigned used;              /* code entries in table used */
48131377Stjr    unsigned huff;              /* Huffman code */
49131377Stjr    unsigned incr;              /* for incrementing code, index */
50131377Stjr    unsigned fill;              /* index for replicating entries */
51131377Stjr    unsigned low;               /* low bits for current root entry */
52131377Stjr    unsigned mask;              /* mask for low root bits */
53205194Sdelphij    code here;                  /* table entry for duplication */
54131377Stjr    code FAR *next;             /* next available space in table */
55131377Stjr    const unsigned short FAR *base;     /* base value table to use */
56131377Stjr    const unsigned short FAR *extra;    /* extra bits table to use */
57313796Sdelphij    unsigned match;             /* use base and extra for symbol >= match */
58131377Stjr    unsigned short count[MAXBITS+1];    /* number of codes of each length */
59131377Stjr    unsigned short offs[MAXBITS+1];     /* offsets in table for each length */
60131377Stjr    static const unsigned short lbase[31] = { /* Length codes 257..285 base */
6117651Speter        3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
6217651Speter        35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
63131377Stjr    static const unsigned short lext[31] = { /* Length codes 257..285 extra */
64131377Stjr        16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
65313796Sdelphij        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 202};
66131377Stjr    static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
6717651Speter        1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
6817651Speter        257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
69131377Stjr        8193, 12289, 16385, 24577, 0, 0};
70131377Stjr    static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
71131377Stjr        16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
72131377Stjr        23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
73131377Stjr        28, 28, 29, 29, 64, 64};
7417651Speter
75131377Stjr    /*
76131377Stjr       Process a set of code lengths to create a canonical Huffman code.  The
77131377Stjr       code lengths are lens[0..codes-1].  Each length corresponds to the
78131377Stjr       symbols 0..codes-1.  The Huffman code is generated by first sorting the
79131377Stjr       symbols by length from short to long, and retaining the symbol order
80131377Stjr       for codes with equal lengths.  Then the code starts with all zero bits
81131377Stjr       for the first code of the shortest length, and the codes are integer
82131377Stjr       increments for the same length, and zeros are appended as the length
83131377Stjr       increases.  For the deflate format, these bits are stored backwards
84131377Stjr       from their more natural integer increment ordering, and so when the
85131377Stjr       decoding tables are built in the large loop below, the integer codes
86131377Stjr       are incremented backwards.
8717651Speter
88131377Stjr       This routine assumes, but does not check, that all of the entries in
89131377Stjr       lens[] are in the range 0..MAXBITS.  The caller must assure this.
90131377Stjr       1..MAXBITS is interpreted as that code length.  zero means that that
91131377Stjr       symbol does not occur in this code.
9217651Speter
93131377Stjr       The codes are sorted by computing a count of codes for each length,
94131377Stjr       creating from that a table of starting indices for each length in the
95131377Stjr       sorted table, and then entering the symbols in order in the sorted
96131377Stjr       table.  The sorted table is work[], with that space being provided by
97131377Stjr       the caller.
9817651Speter
99131377Stjr       The length counts are used for other purposes as well, i.e. finding
100131377Stjr       the minimum and maximum length codes, determining if there are any
101131377Stjr       codes at all, checking for a valid set of lengths, and looking ahead
102131377Stjr       at length counts to determine sub-table sizes when building the
103131377Stjr       decoding tables.
104131377Stjr     */
10517651Speter
106131377Stjr    /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
107131377Stjr    for (len = 0; len <= MAXBITS; len++)
108131377Stjr        count[len] = 0;
109131377Stjr    for (sym = 0; sym < codes; sym++)
110131377Stjr        count[lens[sym]]++;
11117651Speter
112131377Stjr    /* bound code lengths, force root to be within code lengths */
113131377Stjr    root = *bits;
114131377Stjr    for (max = MAXBITS; max >= 1; max--)
115131377Stjr        if (count[max] != 0) break;
116131377Stjr    if (root > max) root = max;
117145474Skientzle    if (max == 0) {                     /* no symbols to code at all */
118205194Sdelphij        here.op = (unsigned char)64;    /* invalid code marker */
119205194Sdelphij        here.bits = (unsigned char)1;
120205194Sdelphij        here.val = (unsigned short)0;
121205194Sdelphij        *(*table)++ = here;             /* make a table to force an error */
122205194Sdelphij        *(*table)++ = here;
123145474Skientzle        *bits = 1;
124145474Skientzle        return 0;     /* no symbols, but wait for decoding to report error */
125145474Skientzle    }
126205194Sdelphij    for (min = 1; min < max; min++)
127131377Stjr        if (count[min] != 0) break;
128131377Stjr    if (root < min) root = min;
12917651Speter
130131377Stjr    /* check for an over-subscribed or incomplete set of lengths */
131131377Stjr    left = 1;
132131377Stjr    for (len = 1; len <= MAXBITS; len++) {
133131377Stjr        left <<= 1;
134131377Stjr        left -= count[len];
135131377Stjr        if (left < 0) return -1;        /* over-subscribed */
136131377Stjr    }
137157043Sdes    if (left > 0 && (type == CODES || max != 1))
138131377Stjr        return -1;                      /* incomplete set */
13917651Speter
140131377Stjr    /* generate offsets into symbol table for each length for sorting */
141131377Stjr    offs[1] = 0;
142131377Stjr    for (len = 1; len < MAXBITS; len++)
143131377Stjr        offs[len + 1] = offs[len] + count[len];
14417651Speter
145131377Stjr    /* sort symbols by length, by symbol order within each length */
146131377Stjr    for (sym = 0; sym < codes; sym++)
147131377Stjr        if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
14817651Speter
149131377Stjr    /*
150131377Stjr       Create and fill in decoding tables.  In this loop, the table being
151131377Stjr       filled is at next and has curr index bits.  The code being used is huff
152131377Stjr       with length len.  That code is converted to an index by dropping drop
153131377Stjr       bits off of the bottom.  For codes where len is less than drop + curr,
154131377Stjr       those top drop + curr - len bits are incremented through all values to
155131377Stjr       fill the table with replicated entries.
15617651Speter
157131377Stjr       root is the number of index bits for the root table.  When len exceeds
158131377Stjr       root, sub-tables are created pointed to by the root entry with an index
159131377Stjr       of the low root bits of huff.  This is saved in low to check for when a
160131377Stjr       new sub-table should be started.  drop is zero when the root table is
161131377Stjr       being filled, and drop is root when sub-tables are being filled.
16217651Speter
163131377Stjr       When a new sub-table is needed, it is necessary to look ahead in the
164131377Stjr       code lengths to determine what size sub-table is needed.  The length
165131377Stjr       counts are used for this, and so count[] is decremented as codes are
166131377Stjr       entered in the tables.
16717651Speter
168131377Stjr       used keeps track of how many table entries have been allocated from the
169205194Sdelphij       provided *table space.  It is checked for LENS and DIST tables against
170205194Sdelphij       the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
171205194Sdelphij       the initial root table size constants.  See the comments in inftrees.h
172205194Sdelphij       for more information.
17317651Speter
174131377Stjr       sym increments through all symbols, and the loop terminates when
175131377Stjr       all codes of length max, i.e. all codes, have been processed.  This
176131377Stjr       routine permits incomplete codes, so another loop after this one fills
177131377Stjr       in the rest of the decoding tables with invalid code markers.
178131377Stjr     */
17917651Speter
180131377Stjr    /* set up for code type */
181131377Stjr    switch (type) {
182131377Stjr    case CODES:
183131377Stjr        base = extra = work;    /* dummy value--not used */
184313796Sdelphij        match = 20;
185131377Stjr        break;
186131377Stjr    case LENS:
187131377Stjr        base = lbase;
188131377Stjr        extra = lext;
189313796Sdelphij        match = 257;
190131377Stjr        break;
191313796Sdelphij    default:    /* DISTS */
192131377Stjr        base = dbase;
193131377Stjr        extra = dext;
194313796Sdelphij        match = 0;
195131377Stjr    }
19617651Speter
197131377Stjr    /* initialize state for loop */
198131377Stjr    huff = 0;                   /* starting code */
199131377Stjr    sym = 0;                    /* starting code symbol */
200131377Stjr    len = min;                  /* starting code length */
201131377Stjr    next = *table;              /* current table to fill in */
202131377Stjr    curr = root;                /* current table index bits */
203131377Stjr    drop = 0;                   /* current bits to drop from code for index */
204131377Stjr    low = (unsigned)(-1);       /* trigger new sub-table when len > root */
205131377Stjr    used = 1U << root;          /* use root table entries */
206131377Stjr    mask = used - 1;            /* mask for comparing low */
20717651Speter
208131377Stjr    /* check available table space */
209250224Sdelphij    if ((type == LENS && used > ENOUGH_LENS) ||
210250224Sdelphij        (type == DISTS && used > ENOUGH_DISTS))
211131377Stjr        return 1;
21217651Speter
213131377Stjr    /* process all codes and make table entries */
214131377Stjr    for (;;) {
215131377Stjr        /* create table entry */
216205194Sdelphij        here.bits = (unsigned char)(len - drop);
217313796Sdelphij        if (work[sym] + 1U < match) {
218205194Sdelphij            here.op = (unsigned char)0;
219205194Sdelphij            here.val = work[sym];
22017651Speter        }
221313796Sdelphij        else if (work[sym] >= match) {
222313796Sdelphij            here.op = (unsigned char)(extra[work[sym] - match]);
223313796Sdelphij            here.val = base[work[sym] - match];
224131377Stjr        }
225131377Stjr        else {
226205194Sdelphij            here.op = (unsigned char)(32 + 64);         /* end of block */
227205194Sdelphij            here.val = 0;
228131377Stjr        }
22917651Speter
230131377Stjr        /* replicate for those indices with low len bits equal to huff */
231131377Stjr        incr = 1U << (len - drop);
232131377Stjr        fill = 1U << curr;
233157043Sdes        min = fill;                 /* save offset to next table */
234131377Stjr        do {
235131377Stjr            fill -= incr;
236205194Sdelphij            next[(huff >> drop) + fill] = here;
237131377Stjr        } while (fill != 0);
23817651Speter
239131377Stjr        /* backwards increment the len-bit code huff */
240131377Stjr        incr = 1U << (len - 1);
241131377Stjr        while (huff & incr)
242131377Stjr            incr >>= 1;
243131377Stjr        if (incr != 0) {
244131377Stjr            huff &= incr - 1;
245131377Stjr            huff += incr;
24617651Speter        }
24742468Speter        else
248131377Stjr            huff = 0;
24917651Speter
250131377Stjr        /* go to next symbol, update count, len */
251131377Stjr        sym++;
252131377Stjr        if (--(count[len]) == 0) {
253131377Stjr            if (len == max) break;
254131377Stjr            len = lens[work[sym]];
255131377Stjr        }
25617651Speter
257131377Stjr        /* create new sub-table if needed */
258131377Stjr        if (len > root && (huff & mask) != low) {
259131377Stjr            /* if first time, transition to sub-tables */
260131377Stjr            if (drop == 0)
261131377Stjr                drop = root;
26217651Speter
263131377Stjr            /* increment past last table */
264157043Sdes            next += min;            /* here min is 1 << curr */
26517651Speter
266131377Stjr            /* determine length of next table */
267131377Stjr            curr = len - drop;
268131377Stjr            left = (int)(1 << curr);
269131377Stjr            while (curr + drop < max) {
270131377Stjr                left -= count[curr + drop];
271131377Stjr                if (left <= 0) break;
272131377Stjr                curr++;
273131377Stjr                left <<= 1;
274131377Stjr            }
27517651Speter
276131377Stjr            /* check for enough space */
277131377Stjr            used += 1U << curr;
278250224Sdelphij            if ((type == LENS && used > ENOUGH_LENS) ||
279250224Sdelphij                (type == DISTS && used > ENOUGH_DISTS))
280131377Stjr                return 1;
28117651Speter
282131377Stjr            /* point entry in root table to sub-table */
283131377Stjr            low = huff & mask;
284131377Stjr            (*table)[low].op = (unsigned char)curr;
285131377Stjr            (*table)[low].bits = (unsigned char)root;
286131377Stjr            (*table)[low].val = (unsigned short)(next - *table);
287131377Stjr        }
28817651Speter    }
28917651Speter
290230837Sdelphij    /* fill in remaining table entry if code is incomplete (guaranteed to have
291230837Sdelphij       at most one remaining entry, since if the code is incomplete, the
292230837Sdelphij       maximum code length that was allowed to get this far is one bit) */
293230837Sdelphij    if (huff != 0) {
294230837Sdelphij        here.op = (unsigned char)64;            /* invalid code marker */
295230837Sdelphij        here.bits = (unsigned char)(len - drop);
296230837Sdelphij        here.val = (unsigned short)0;
297230837Sdelphij        next[huff] = here;
29833904Ssteve    }
29917651Speter
300131377Stjr    /* set return parameters */
301131377Stjr    *table += used;
302131377Stjr    *bits = root;
303131377Stjr    return 0;
30417651Speter}
305