117651Speter/* inftrees.c -- generate Huffman trees for efficient decoding 2313796Sdelphij * Copyright (C) 1995-2017 Mark Adler 3131377Stjr * For conditions of distribution and use, see copyright notice in zlib.h 417651Speter */ 517651Speter 617651Speter#include "zutil.h" 717651Speter#include "inftrees.h" 817651Speter 9131377Stjr#define MAXBITS 15 1042468Speter 1133904Ssteveconst char inflate_copyright[] = 12313796Sdelphij " inflate 1.2.11 Copyright 1995-2017 Mark Adler "; 1317651Speter/* 1417651Speter If you use the zlib library in a product, an acknowledgment is welcome 1517651Speter in the documentation of your product. If for some reason you cannot 1617651Speter include such an acknowledgment, I would appreciate that you keep this 1717651Speter copyright string in the executable of your product. 1817651Speter */ 1917651Speter 20131377Stjr/* 21131377Stjr Build a set of tables to decode the provided canonical Huffman code. 22131377Stjr The code lengths are lens[0..codes-1]. The result starts at *table, 23131377Stjr whose indices are 0..2^bits-1. work is a writable array of at least 24131377Stjr lens shorts, which is used as a work area. type is the type of code 25131377Stjr to be generated, CODES, LENS, or DISTS. On return, zero is success, 26131377Stjr -1 is an invalid code, and +1 means that ENOUGH isn't enough. table 27131377Stjr on return points to the next available entry's address. bits is the 28131377Stjr requested root table index bits, and on return it is the actual root 29131377Stjr table index bits. It will differ if the request is greater than the 30131377Stjr longest code or if it is less than the shortest code. 31131377Stjr */ 32206905Sdelphijint ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work) 33131377Stjrcodetype type; 34131377Stjrunsigned short FAR *lens; 35131377Stjrunsigned codes; 36131377Stjrcode FAR * FAR *table; 37131377Stjrunsigned FAR *bits; 38131377Stjrunsigned short FAR *work; 39131377Stjr{ 40131377Stjr unsigned len; /* a code's length in bits */ 41131377Stjr unsigned sym; /* index of code symbols */ 42131377Stjr unsigned min, max; /* minimum and maximum code lengths */ 43131377Stjr unsigned root; /* number of index bits for root table */ 44131377Stjr unsigned curr; /* number of index bits for current table */ 45131377Stjr unsigned drop; /* code bits to drop for sub-table */ 46131377Stjr int left; /* number of prefix codes available */ 47131377Stjr unsigned used; /* code entries in table used */ 48131377Stjr unsigned huff; /* Huffman code */ 49131377Stjr unsigned incr; /* for incrementing code, index */ 50131377Stjr unsigned fill; /* index for replicating entries */ 51131377Stjr unsigned low; /* low bits for current root entry */ 52131377Stjr unsigned mask; /* mask for low root bits */ 53205194Sdelphij code here; /* table entry for duplication */ 54131377Stjr code FAR *next; /* next available space in table */ 55131377Stjr const unsigned short FAR *base; /* base value table to use */ 56131377Stjr const unsigned short FAR *extra; /* extra bits table to use */ 57313796Sdelphij unsigned match; /* use base and extra for symbol >= match */ 58131377Stjr unsigned short count[MAXBITS+1]; /* number of codes of each length */ 59131377Stjr unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ 60131377Stjr static const unsigned short lbase[31] = { /* Length codes 257..285 base */ 6117651Speter 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 6217651Speter 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; 63131377Stjr static const unsigned short lext[31] = { /* Length codes 257..285 extra */ 64131377Stjr 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 65313796Sdelphij 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 202}; 66131377Stjr static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ 6717651Speter 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 6817651Speter 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 69131377Stjr 8193, 12289, 16385, 24577, 0, 0}; 70131377Stjr static const unsigned short dext[32] = { /* Distance codes 0..29 extra */ 71131377Stjr 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 72131377Stjr 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 73131377Stjr 28, 28, 29, 29, 64, 64}; 7417651Speter 75131377Stjr /* 76131377Stjr Process a set of code lengths to create a canonical Huffman code. The 77131377Stjr code lengths are lens[0..codes-1]. Each length corresponds to the 78131377Stjr symbols 0..codes-1. The Huffman code is generated by first sorting the 79131377Stjr symbols by length from short to long, and retaining the symbol order 80131377Stjr for codes with equal lengths. Then the code starts with all zero bits 81131377Stjr for the first code of the shortest length, and the codes are integer 82131377Stjr increments for the same length, and zeros are appended as the length 83131377Stjr increases. For the deflate format, these bits are stored backwards 84131377Stjr from their more natural integer increment ordering, and so when the 85131377Stjr decoding tables are built in the large loop below, the integer codes 86131377Stjr are incremented backwards. 8717651Speter 88131377Stjr This routine assumes, but does not check, that all of the entries in 89131377Stjr lens[] are in the range 0..MAXBITS. The caller must assure this. 90131377Stjr 1..MAXBITS is interpreted as that code length. zero means that that 91131377Stjr symbol does not occur in this code. 9217651Speter 93131377Stjr The codes are sorted by computing a count of codes for each length, 94131377Stjr creating from that a table of starting indices for each length in the 95131377Stjr sorted table, and then entering the symbols in order in the sorted 96131377Stjr table. The sorted table is work[], with that space being provided by 97131377Stjr the caller. 9817651Speter 99131377Stjr The length counts are used for other purposes as well, i.e. finding 100131377Stjr the minimum and maximum length codes, determining if there are any 101131377Stjr codes at all, checking for a valid set of lengths, and looking ahead 102131377Stjr at length counts to determine sub-table sizes when building the 103131377Stjr decoding tables. 104131377Stjr */ 10517651Speter 106131377Stjr /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ 107131377Stjr for (len = 0; len <= MAXBITS; len++) 108131377Stjr count[len] = 0; 109131377Stjr for (sym = 0; sym < codes; sym++) 110131377Stjr count[lens[sym]]++; 11117651Speter 112131377Stjr /* bound code lengths, force root to be within code lengths */ 113131377Stjr root = *bits; 114131377Stjr for (max = MAXBITS; max >= 1; max--) 115131377Stjr if (count[max] != 0) break; 116131377Stjr if (root > max) root = max; 117145474Skientzle if (max == 0) { /* no symbols to code at all */ 118205194Sdelphij here.op = (unsigned char)64; /* invalid code marker */ 119205194Sdelphij here.bits = (unsigned char)1; 120205194Sdelphij here.val = (unsigned short)0; 121205194Sdelphij *(*table)++ = here; /* make a table to force an error */ 122205194Sdelphij *(*table)++ = here; 123145474Skientzle *bits = 1; 124145474Skientzle return 0; /* no symbols, but wait for decoding to report error */ 125145474Skientzle } 126205194Sdelphij for (min = 1; min < max; min++) 127131377Stjr if (count[min] != 0) break; 128131377Stjr if (root < min) root = min; 12917651Speter 130131377Stjr /* check for an over-subscribed or incomplete set of lengths */ 131131377Stjr left = 1; 132131377Stjr for (len = 1; len <= MAXBITS; len++) { 133131377Stjr left <<= 1; 134131377Stjr left -= count[len]; 135131377Stjr if (left < 0) return -1; /* over-subscribed */ 136131377Stjr } 137157043Sdes if (left > 0 && (type == CODES || max != 1)) 138131377Stjr return -1; /* incomplete set */ 13917651Speter 140131377Stjr /* generate offsets into symbol table for each length for sorting */ 141131377Stjr offs[1] = 0; 142131377Stjr for (len = 1; len < MAXBITS; len++) 143131377Stjr offs[len + 1] = offs[len] + count[len]; 14417651Speter 145131377Stjr /* sort symbols by length, by symbol order within each length */ 146131377Stjr for (sym = 0; sym < codes; sym++) 147131377Stjr if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym; 14817651Speter 149131377Stjr /* 150131377Stjr Create and fill in decoding tables. In this loop, the table being 151131377Stjr filled is at next and has curr index bits. The code being used is huff 152131377Stjr with length len. That code is converted to an index by dropping drop 153131377Stjr bits off of the bottom. For codes where len is less than drop + curr, 154131377Stjr those top drop + curr - len bits are incremented through all values to 155131377Stjr fill the table with replicated entries. 15617651Speter 157131377Stjr root is the number of index bits for the root table. When len exceeds 158131377Stjr root, sub-tables are created pointed to by the root entry with an index 159131377Stjr of the low root bits of huff. This is saved in low to check for when a 160131377Stjr new sub-table should be started. drop is zero when the root table is 161131377Stjr being filled, and drop is root when sub-tables are being filled. 16217651Speter 163131377Stjr When a new sub-table is needed, it is necessary to look ahead in the 164131377Stjr code lengths to determine what size sub-table is needed. The length 165131377Stjr counts are used for this, and so count[] is decremented as codes are 166131377Stjr entered in the tables. 16717651Speter 168131377Stjr used keeps track of how many table entries have been allocated from the 169205194Sdelphij provided *table space. It is checked for LENS and DIST tables against 170205194Sdelphij the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in 171205194Sdelphij the initial root table size constants. See the comments in inftrees.h 172205194Sdelphij for more information. 17317651Speter 174131377Stjr sym increments through all symbols, and the loop terminates when 175131377Stjr all codes of length max, i.e. all codes, have been processed. This 176131377Stjr routine permits incomplete codes, so another loop after this one fills 177131377Stjr in the rest of the decoding tables with invalid code markers. 178131377Stjr */ 17917651Speter 180131377Stjr /* set up for code type */ 181131377Stjr switch (type) { 182131377Stjr case CODES: 183131377Stjr base = extra = work; /* dummy value--not used */ 184313796Sdelphij match = 20; 185131377Stjr break; 186131377Stjr case LENS: 187131377Stjr base = lbase; 188131377Stjr extra = lext; 189313796Sdelphij match = 257; 190131377Stjr break; 191313796Sdelphij default: /* DISTS */ 192131377Stjr base = dbase; 193131377Stjr extra = dext; 194313796Sdelphij match = 0; 195131377Stjr } 19617651Speter 197131377Stjr /* initialize state for loop */ 198131377Stjr huff = 0; /* starting code */ 199131377Stjr sym = 0; /* starting code symbol */ 200131377Stjr len = min; /* starting code length */ 201131377Stjr next = *table; /* current table to fill in */ 202131377Stjr curr = root; /* current table index bits */ 203131377Stjr drop = 0; /* current bits to drop from code for index */ 204131377Stjr low = (unsigned)(-1); /* trigger new sub-table when len > root */ 205131377Stjr used = 1U << root; /* use root table entries */ 206131377Stjr mask = used - 1; /* mask for comparing low */ 20717651Speter 208131377Stjr /* check available table space */ 209250224Sdelphij if ((type == LENS && used > ENOUGH_LENS) || 210250224Sdelphij (type == DISTS && used > ENOUGH_DISTS)) 211131377Stjr return 1; 21217651Speter 213131377Stjr /* process all codes and make table entries */ 214131377Stjr for (;;) { 215131377Stjr /* create table entry */ 216205194Sdelphij here.bits = (unsigned char)(len - drop); 217313796Sdelphij if (work[sym] + 1U < match) { 218205194Sdelphij here.op = (unsigned char)0; 219205194Sdelphij here.val = work[sym]; 22017651Speter } 221313796Sdelphij else if (work[sym] >= match) { 222313796Sdelphij here.op = (unsigned char)(extra[work[sym] - match]); 223313796Sdelphij here.val = base[work[sym] - match]; 224131377Stjr } 225131377Stjr else { 226205194Sdelphij here.op = (unsigned char)(32 + 64); /* end of block */ 227205194Sdelphij here.val = 0; 228131377Stjr } 22917651Speter 230131377Stjr /* replicate for those indices with low len bits equal to huff */ 231131377Stjr incr = 1U << (len - drop); 232131377Stjr fill = 1U << curr; 233157043Sdes min = fill; /* save offset to next table */ 234131377Stjr do { 235131377Stjr fill -= incr; 236205194Sdelphij next[(huff >> drop) + fill] = here; 237131377Stjr } while (fill != 0); 23817651Speter 239131377Stjr /* backwards increment the len-bit code huff */ 240131377Stjr incr = 1U << (len - 1); 241131377Stjr while (huff & incr) 242131377Stjr incr >>= 1; 243131377Stjr if (incr != 0) { 244131377Stjr huff &= incr - 1; 245131377Stjr huff += incr; 24617651Speter } 24742468Speter else 248131377Stjr huff = 0; 24917651Speter 250131377Stjr /* go to next symbol, update count, len */ 251131377Stjr sym++; 252131377Stjr if (--(count[len]) == 0) { 253131377Stjr if (len == max) break; 254131377Stjr len = lens[work[sym]]; 255131377Stjr } 25617651Speter 257131377Stjr /* create new sub-table if needed */ 258131377Stjr if (len > root && (huff & mask) != low) { 259131377Stjr /* if first time, transition to sub-tables */ 260131377Stjr if (drop == 0) 261131377Stjr drop = root; 26217651Speter 263131377Stjr /* increment past last table */ 264157043Sdes next += min; /* here min is 1 << curr */ 26517651Speter 266131377Stjr /* determine length of next table */ 267131377Stjr curr = len - drop; 268131377Stjr left = (int)(1 << curr); 269131377Stjr while (curr + drop < max) { 270131377Stjr left -= count[curr + drop]; 271131377Stjr if (left <= 0) break; 272131377Stjr curr++; 273131377Stjr left <<= 1; 274131377Stjr } 27517651Speter 276131377Stjr /* check for enough space */ 277131377Stjr used += 1U << curr; 278250224Sdelphij if ((type == LENS && used > ENOUGH_LENS) || 279250224Sdelphij (type == DISTS && used > ENOUGH_DISTS)) 280131377Stjr return 1; 28117651Speter 282131377Stjr /* point entry in root table to sub-table */ 283131377Stjr low = huff & mask; 284131377Stjr (*table)[low].op = (unsigned char)curr; 285131377Stjr (*table)[low].bits = (unsigned char)root; 286131377Stjr (*table)[low].val = (unsigned short)(next - *table); 287131377Stjr } 28817651Speter } 28917651Speter 290230837Sdelphij /* fill in remaining table entry if code is incomplete (guaranteed to have 291230837Sdelphij at most one remaining entry, since if the code is incomplete, the 292230837Sdelphij maximum code length that was allowed to get this far is one bit) */ 293230837Sdelphij if (huff != 0) { 294230837Sdelphij here.op = (unsigned char)64; /* invalid code marker */ 295230837Sdelphij here.bits = (unsigned char)(len - drop); 296230837Sdelphij here.val = (unsigned short)0; 297230837Sdelphij next[huff] = here; 29833904Ssteve } 29917651Speter 300131377Stjr /* set return parameters */ 301131377Stjr *table += used; 302131377Stjr *bits = root; 303131377Stjr return 0; 30417651Speter} 305