inflate.c revision 3417
1/*
2 * Parts of this file are not covered by:
3 * ----------------------------------------------------------------------------
4 * "THE BEER-WARE LICENSE" (Revision 42):
5 * <phk@login.dknet.dk> wrote this file.  As long as you retain this notice you
6 * can do whatever you want with this stuff. If we meet some day, and you think
7 * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
8 * ----------------------------------------------------------------------------
9 *
10 * $Id: imgact_gzip.c,v 1.4 1994/10/04 06:51:42 phk Exp $
11 *
12 * This module handles execution of a.out files which have been run through
13 * "gzip -9".
14 *
15 * For now you need to use exactly this command to compress the binaries:
16 *
17 *		gzip -9 -v < /bin/sh > /tmp/sh
18 *
19 * TODO:
20 *	text-segments should be made R/O after being filled
21 *	is the vm-stuff safe ?
22 * 	should handle the entire header of gzip'ed stuff.
23 *	inflate isn't quite reentrant yet...
24 *	error-handling is a mess...
25 *	so is the rest...
26 */
27
28#include <sys/param.h>
29#include <sys/systm.h>
30#include <sys/resourcevar.h>
31#include <sys/exec.h>
32#include <sys/inflate.h>
33#include <sys/mman.h>
34#include <sys/malloc.h>
35#include <sys/imgact.h>
36#include <sys/imgact_aout.h>
37#include <sys/kernel.h>
38#include <sys/sysent.h>
39
40#include <vm/vm.h>
41#include <vm/vm_kern.h>
42
43/* Stuff to make inflate() work */
44#  define uch u_char
45#  define ush u_short
46#  define ulg u_long
47#  define memzero(dest,len)      bzero(dest,len)
48#  define NOMEMCPY
49#define FPRINTF printf
50
51#define EOF -1
52#define CHECK_EOF
53static int
54NextByte(struct gzip *gz)
55{
56	int error;
57
58	if(gz->idx >= gz->len) {
59	    gz->where = __LINE__;
60	    return EOF;
61	}
62
63	if((!gz->inbuf) || gz->idx >= (gz->offset+PAGE_SIZE)) {
64		if(gz->inbuf) {
65		    error = vm_deallocate(kernel_map,
66		      (vm_offset_t)gz->inbuf, PAGE_SIZE);
67		    if(error) {
68			gz->where = __LINE__;
69			gz->error = error;
70			return EOF;
71		    }
72		}
73
74		gz->offset += PAGE_SIZE;
75
76		error = vm_mmap(kernel_map,             /* map */
77                        (vm_offset_t *)&gz->inbuf,       /* address */
78                        PAGE_SIZE,                      /* size */
79                        VM_PROT_READ,                   /* protection */
80                        VM_PROT_READ,                   /* max protection */
81                        0,                              /* flags */
82                        (caddr_t)gz->ip->vnodep,        /* vnode */
83                        gz->offset);                    /* offset */
84		if(error) {
85		    gz->where = __LINE__;
86		    gz->error = error;
87		    return EOF;
88		}
89
90	}
91	return gz->inbuf[(gz->idx++) - gz->offset];
92}
93
94#define NEXTBYTE NextByte(gz)
95
96static int
97Flush(struct gzip *gz,u_long siz)
98{
99    u_char *p = slide,*q;
100    int i;
101
102    /* First, find a a.out-header */
103    if(gz->output < sizeof gz->a_out) {
104	q = (u_char*) &gz->a_out;
105	i = min(siz,sizeof gz->a_out - gz->output);
106	bcopy(p,q+gz->output,i);
107	gz->output += i;
108	p += i;
109	siz -= i;
110	if(gz->output == sizeof gz->a_out) {
111	    i = do_aout_hdr(gz);
112	    if (i == -1) {
113		gz->where = __LINE__;
114		gz->error = ENOEXEC;
115		return ENOEXEC;
116	    } else if (i) {
117		gz->where = __LINE__;
118		gz->error = i;
119		return ENOEXEC;
120	    }
121	    if(gz->file_offset < sizeof gz->a_out) {
122		q = (u_char*) gz->virtual_offset + gz->output - gz->file_offset;
123		bcopy(&gz->a_out,q,sizeof gz->a_out - gz->file_offset);
124	    }
125	}
126    }
127    /* Skip over zero-padded first PAGE if needed */
128    if(gz->output < gz->file_offset && (gz->output+siz) > gz->file_offset) {
129	i = min(siz, gz->file_offset - gz->output);
130	gz->output += i;
131	p += i;
132	siz -= i;
133    }
134    if(gz->output >= gz->file_offset && gz->output < gz->file_end) {
135	i = min(siz, gz->file_end - gz->output);
136	q = (u_char*) gz->virtual_offset + gz->output - gz->file_offset;
137	bcopy(p,q,i);
138	gz->output += i;
139	p += i;
140	siz -= i;
141    }
142    gz->output += siz;
143    return 0;
144}
145
146#define FLUSH(x,y) {int foo = Flush(x,y); if (foo) return foo;}
147static
148void *
149myalloc(u_long size)
150{
151	return malloc(size, M_GZIP, M_NOWAIT);
152}
153#define malloc myalloc
154
155static
156void
157myfree(void * ptr)
158{
159	free(ptr,M_GZIP);
160}
161#define free myfree
162
163static const int qflag = 0;
164#define Trace(x) /* */
165
166
167/* This came from unzip-5.12.  I have changed it to pass a "gz" pointer
168 * around, thus hopefully making it re-entrant.  Poul-Henningi
169 */
170
171/* inflate.c -- put in the public domain by Mark Adler
172   version c14o, 23 August 1994 */
173
174/* You can do whatever you like with this source file, though I would
175   prefer that if you modify it and redistribute it that you include
176   comments to that effect with your name and the date.  Thank you.
177
178   History:
179   vers    date          who           what
180   ----  ---------  --------------  ------------------------------------
181    a    ~~ Feb 92  M. Adler        used full (large, one-step) lookup table
182    b1   21 Mar 92  M. Adler        first version with partial lookup tables
183    b2   21 Mar 92  M. Adler        fixed bug in fixed-code blocks
184    b3   22 Mar 92  M. Adler        sped up match copies, cleaned up some
185    b4   25 Mar 92  M. Adler        added prototypes; removed window[] (now
186                                    is the responsibility of unzip.h--also
187                                    changed name to slide[]), so needs diffs
188                                    for unzip.c and unzip.h (this allows
189                                    compiling in the small model on MSDOS);
190                                    fixed cast of q in huft_build();
191    b5   26 Mar 92  M. Adler        got rid of unintended macro recursion.
192    b6   27 Mar 92  M. Adler        got rid of nextbyte() routine.  fixed
193                                    bug in inflate_fixed().
194    c1   30 Mar 92  M. Adler        removed lbits, dbits environment variables.
195                                    changed BMAX to 16 for explode.  Removed
196                                    OUTB usage, and replaced it with flush()--
197                                    this was a 20% speed improvement!  Added
198                                    an explode.c (to replace unimplod.c) that
199                                    uses the huft routines here.  Removed
200                                    register union.
201    c2    4 Apr 92  M. Adler        fixed bug for file sizes a multiple of 32k.
202    c3   10 Apr 92  M. Adler        reduced memory of code tables made by
203                                    huft_build significantly (factor of two to
204                                    three).
205    c4   15 Apr 92  M. Adler        added NOMEMCPY do kill use of memcpy().
206                                    worked around a Turbo C optimization bug.
207    c5   21 Apr 92  M. Adler        added the WSIZE #define to allow reducing
208                                    the 32K window size for specialized
209                                    applications.
210    c6   31 May 92  M. Adler        added some typecasts to eliminate warnings
211    c7   27 Jun 92  G. Roelofs      added some more typecasts (444:  MSC bug).
212    c8    5 Oct 92  J-l. Gailly     added ifdef'd code to deal with PKZIP bug.
213    c9    9 Oct 92  M. Adler        removed a memory error message (~line 416).
214    c10  17 Oct 92  G. Roelofs      changed ULONG/UWORD/byte to ulg/ush/uch,
215                                    removed old inflate, renamed inflate_entry
216                                    to inflate, added Mark's fix to a comment.
217   c10.5 14 Dec 92  M. Adler        fix up error messages for incomplete trees.
218    c11   2 Jan 93  M. Adler        fixed bug in detection of incomplete
219                                    tables, and removed assumption that EOB is
220                                    the longest code (bad assumption).
221    c12   3 Jan 93  M. Adler        make tables for fixed blocks only once.
222    c13   5 Jan 93  M. Adler        allow all zero length codes (pkzip 2.04c
223                                    outputs one zero length code for an empty
224                                    distance tree).
225    c14  12 Mar 93  M. Adler        made inflate.c standalone with the
226                                    introduction of inflate.h.
227   c14b  16 Jul 93  G. Roelofs      added (unsigned) typecast to w at 470.
228   c14c  19 Jul 93  J. Bush         changed v[N_MAX], l[288], ll[28x+3x] arrays
229                                    to static for Amiga.
230   c14d  13 Aug 93  J-l. Gailly     de-complicatified Mark's c[*p++]++ thing.
231   c14e   8 Oct 93  G. Roelofs      changed memset() to memzero().
232   c14f  22 Oct 93  G. Roelofs      renamed quietflg to qflag; made Trace()
233                                    conditional; added inflate_free().
234   c14g  28 Oct 93  G. Roelofs      changed l/(lx+1) macro to pointer (Cray bug)
235   c14h   7 Dec 93  C. Ghisler      huft_build() optimizations.
236   c14i   9 Jan 94  A. Verheijen    set fixed_t{d,l} to NULL after freeing;
237                    G. Roelofs      check NEXTBYTE macro for EOF.
238   c14j  23 Jan 94  G. Roelofs      removed Ghisler "optimizations"; ifdef'd
239                                    EOF check.
240   c14k  27 Feb 94  G. Roelofs      added some typecasts to avoid warnings.
241   c14l   9 Apr 94  G. Roelofs      fixed split comments on preprocessor lines
242                                    to avoid bug in Encore compiler.
243   c14m   7 Jul 94  P. Kienitz      modified to allow assembler version of
244                                    inflate_codes() (define ASM_INFLATECODES)
245   c14n  22 Jul 94  G. Roelofs      changed fprintf to FPRINTF for DLL versions
246   c14o  23 Aug 94  C. Spieler      added a newline to a debug statement;
247                    G. Roelofs      added another typecast to avoid MSC warning
248 */
249
250
251/*
252   Inflate deflated (PKZIP's method 8 compressed) data.  The compression
253   method searches for as much of the current string of bytes (up to a
254   length of 258) in the previous 32K bytes.  If it doesn't find any
255   matches (of at least length 3), it codes the next byte.  Otherwise, it
256   codes the length of the matched string and its distance backwards from
257   the current position.  There is a single Huffman code that codes both
258   single bytes (called "literals") and match lengths.  A second Huffman
259   code codes the distance information, which follows a length code.  Each
260   length or distance code actually represents a base value and a number
261   of "extra" (sometimes zero) bits to get to add to the base value.  At
262   the end of each deflated block is a special end-of-block (EOB) literal/
263   length code.  The decoding process is basically: get a literal/length
264   code; if EOB then done; if a literal, emit the decoded byte; if a
265   length then get the distance and emit the referred-to bytes from the
266   sliding window of previously emitted data.
267
268   There are (currently) three kinds of inflate blocks: stored, fixed, and
269   dynamic.  The compressor outputs a chunk of data at a time and decides
270   which method to use on a chunk-by-chunk basis.  A chunk might typically
271   be 32K to 64K, uncompressed.  If the chunk is uncompressible, then the
272   "stored" method is used.  In this case, the bytes are simply stored as
273   is, eight bits per byte, with none of the above coding.  The bytes are
274   preceded by a count, since there is no longer an EOB code.
275
276   If the data is compressible, then either the fixed or dynamic methods
277   are used.  In the dynamic method, the compressed data is preceded by
278   an encoding of the literal/length and distance Huffman codes that are
279   to be used to decode this block.  The representation is itself Huffman
280   coded, and so is preceded by a description of that code.  These code
281   descriptions take up a little space, and so for small blocks, there is
282   a predefined set of codes, called the fixed codes.  The fixed method is
283   used if the block ends up smaller that way (usually for quite small
284   chunks); otherwise the dynamic method is used.  In the latter case, the
285   codes are customized to the probabilities in the current block and so
286   can code it much better than the pre-determined fixed codes can.
287
288   The Huffman codes themselves are decoded using a mutli-level table
289   lookup, in order to maximize the speed of decoding plus the speed of
290   building the decoding tables.  See the comments below that precede the
291   lbits and dbits tuning parameters.
292 */
293
294
295/*
296   Notes beyond the 1.93a appnote.txt:
297
298   1. Distance pointers never point before the beginning of the output
299      stream.
300   2. Distance pointers can point back across blocks, up to 32k away.
301   3. There is an implied maximum of 7 bits for the bit length table and
302      15 bits for the actual data.
303   4. If only one code exists, then it is encoded using one bit.  (Zero
304      would be more efficient, but perhaps a little confusing.)  If two
305      codes exist, they are coded using one bit each (0 and 1).
306   5. There is no way of sending zero distance codes--a dummy must be
307      sent if there are none.  (History: a pre 2.0 version of PKZIP would
308      store blocks with no distance codes, but this was discovered to be
309      too harsh a criterion.)  Valid only for 1.93a.  2.04c does allow
310      zero distance codes, which is sent as one code of zero bits in
311      length.
312   6. There are up to 286 literal/length codes.  Code 256 represents the
313      end-of-block.  Note however that the static length tree defines
314      288 codes just to fill out the Huffman codes.  Codes 286 and 287
315      cannot be used though, since there is no length base or extra bits
316      defined for them.  Similarily, there are up to 30 distance codes.
317      However, static trees define 32 codes (all 5 bits) to fill out the
318      Huffman codes, but the last two had better not show up in the data.
319   7. Unzip can check dynamic Huffman blocks for complete code sets.
320      The exception is that a single code would not be complete (see #4).
321   8. The five bits following the block type is really the number of
322      literal codes sent minus 257.
323   9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits
324      (1+6+6).  Therefore, to output three times the length, you output
325      three codes (1+1+1), whereas to output four times the same length,
326      you only need two codes (1+3).  Hmm.
327  10. In the tree reconstruction algorithm, Code = Code + Increment
328      only if BitLength(i) is not zero.  (Pretty obvious.)
329  11. Correction: 4 Bits: # of Bit Length codes - 4     (4 - 19)
330  12. Note: length code 284 can represent 227-258, but length code 285
331      really is 258.  The last length deserves its own, short code
332      since it gets used a lot in very redundant files.  The length
333      258 is special since 258 - 3 (the min match length) is 255.
334  13. The literal/length and distance code bit lengths are read as a
335      single stream of lengths.  It is possible (and advantageous) for
336      a repeat code (16, 17, or 18) to go across the boundary between
337      the two sets of lengths.
338 */
339
340
341#define PKZIP_BUG_WORKAROUND    /* PKZIP 1.93a problem--live with it */
342
343/*
344    inflate.h must supply the uch slide[WSIZE] array and the NEXTBYTE,
345    FLUSH() and memzero macros.  If the window size is not 32K, it
346    should also define WSIZE.  If INFMOD is defined, it can include
347    compiled functions to support the NEXTBYTE and/or FLUSH() macros.
348    There are defaults for NEXTBYTE and FLUSH() below for use as
349    examples of what those functions need to do.  Normally, you would
350    also want FLUSH() to compute a crc on the data.  inflate.h also
351    needs to provide these typedefs:
352
353        typedef unsigned char uch;
354        typedef unsigned short ush;
355        typedef unsigned long ulg;
356
357    This module uses the external functions malloc() and free() (and
358    probably memset() or bzero() in the memzero() macro).  Their
359    prototypes are normally found in <string.h> and <stdlib.h>.
360 */
361#define INFMOD          /* tell inflate.h to include code to be compiled */
362
363/* Huffman code lookup table entry--this entry is four bytes for machines
364   that have 16-bit pointers (e.g. PC's in the small or medium model).
365   Valid extra bits are 0..13.  e == 15 is EOB (end of block), e == 16
366   means that v is a literal, 16 < e < 32 means that v is a pointer to
367   the next table, which codes e - 16 bits, and lastly e == 99 indicates
368   an unused code.  If a code with e == 99 is looked up, this implies an
369   error in the data. */
370struct huft {
371  uch e;                /* number of extra bits or operation */
372  uch b;                /* number of bits in this code or subcode */
373  union {
374    ush n;              /* literal, length base, or distance base */
375    struct huft *t;     /* pointer to next level of table */
376  } v;
377};
378
379
380/* Function prototypes */
381#ifndef OF
382#  ifdef __STDC__
383#    define OF(a) a
384#  else /* !__STDC__ */
385#    define OF(a) ()
386#  endif /* ?__STDC__ */
387#endif
388static int huft_build OF((struct gzip *,unsigned *, unsigned, unsigned, const ush *, const ush *,
389                   struct huft **, int *));
390static int huft_free OF((struct gzip *,struct huft *));
391static int inflate_codes OF((struct gzip *,struct huft *, struct huft *, int, int));
392static int inflate_stored OF((struct gzip *));
393static int inflate_fixed OF((struct gzip *));
394static int inflate_dynamic OF((struct gzip *));
395static int inflate_block OF((struct gzip *,int *));
396static int inflate_free OF((struct gzip *));
397
398
399/* The inflate algorithm uses a sliding 32K byte window on the uncompressed
400   stream to find repeated byte strings.  This is implemented here as a
401   circular buffer.  The index is updated simply by incrementing and then
402   and'ing with 0x7fff (32K-1). */
403/* It is left to other modules to supply the 32K area.  It is assumed
404   to be usable as if it were declared "uch slide[32768];" or as just
405   "uch *slide;" and then malloc'ed in the latter case.  The definition
406   must be in unzip.h, included above. */
407
408
409/* Tables for deflate from PKZIP's appnote.txt. */
410static const unsigned border[] = {    /* Order of the bit length code lengths */
411        16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
412static const ush cplens[] = {         /* Copy lengths for literal codes 257..285 */
413        3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
414        35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
415        /* note: see note #13 above about the 258 in this list. */
416static const ush cplext[] = {         /* Extra bits for literal codes 257..285 */
417        0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
418        3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 99, 99}; /* 99==invalid */
419static const ush cpdist[] = {         /* Copy offsets for distance codes 0..29 */
420        1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
421        257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
422        8193, 12289, 16385, 24577};
423static const ush cpdext[] = {         /* Extra bits for distance codes */
424        0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
425        7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
426        12, 12, 13, 13};
427
428/* And'ing with mask[n] masks the lower n bits */
429const ush mask[] = {
430    0x0000,
431    0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
432    0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff
433};
434
435
436/* Macros for inflate() bit peeking and grabbing.
437   The usage is:
438
439        NEEDBITS(j)
440        x = b & mask[j];
441        DUMPBITS(j)
442
443   where NEEDBITS makes sure that b has at least j bits in it, and
444   DUMPBITS removes the bits from b.  The macros use the variable k
445   for the number of bits in b.  Normally, b and k are register
446   variables for speed, and are initialized at the begining of a
447   routine that uses these macros from a global bit buffer and count.
448
449   In order to not ask for more bits than there are in the compressed
450   stream, the Huffman tables are constructed to only ask for just
451   enough bits to make up the end-of-block code (value 256).  Then no
452   bytes need to be "returned" to the buffer at the end of the last
453   block.  See the huft_build() routine.
454 */
455
456/* !!! XXX !!! */
457ulg bb;                         /* bit buffer */
458unsigned bk;                    /* bits in bit buffer */
459
460#ifndef CHECK_EOF
461#  define NEEDBITS(n) {while(k<(n)){b|=((ulg)NEXTBYTE)<<k;k+=8;}}
462#else
463#  define NEEDBITS(n) {while(k<(n)){int c=NEXTBYTE;if(c==EOF)return 1;\
464    b|=((ulg)c)<<k;k+=8;}}
465#endif                      /* Piet Plomp:  change "return 1" to "break" */
466
467#define DUMPBITS(n) {b>>=(n);k-=(n);}
468
469
470/*
471   Huffman code decoding is performed using a multi-level table lookup.
472   The fastest way to decode is to simply build a lookup table whose
473   size is determined by the longest code.  However, the time it takes
474   to build this table can also be a factor if the data being decoded
475   is not very long.  The most common codes are necessarily the
476   shortest codes, so those codes dominate the decoding time, and hence
477   the speed.  The idea is you can have a shorter table that decodes the
478   shorter, more probable codes, and then point to subsidiary tables for
479   the longer codes.  The time it costs to decode the longer codes is
480   then traded against the time it takes to make longer tables.
481
482   This results of this trade are in the variables lbits and dbits
483   below.  lbits is the number of bits the first level table for literal/
484   length codes can decode in one step, and dbits is the same thing for
485   the distance codes.  Subsequent tables are also less than or equal to
486   those sizes.  These values may be adjusted either when all of the
487   codes are shorter than that, in which case the longest code length in
488   bits is used, or when the shortest code is *longer* than the requested
489   table size, in which case the length of the shortest code in bits is
490   used.
491
492   There are two different values for the two tables, since they code a
493   different number of possibilities each.  The literal/length table
494   codes 286 possible values, or in a flat code, a little over eight
495   bits.  The distance table codes 30 possible values, or a little less
496   than five bits, flat.  The optimum values for speed end up being
497   about one bit more than those, so lbits is 8+1 and dbits is 5+1.
498   The optimum values may differ though from machine to machine, and
499   possibly even between compilers.  Your mileage may vary.
500 */
501
502
503const int lbits = 9;          /* bits in base literal/length lookup table */
504const int dbits = 6;          /* bits in base distance lookup table */
505
506
507/* If BMAX needs to be larger than 16, then h and x[] should be ulg. */
508#define BMAX 16         /* maximum bit length of any code (16 for explode) */
509#define N_MAX 288       /* maximum number of codes in any set */
510
511/* !!! XXX !!! */
512unsigned hufts;         /* track memory usage */
513
514
515static int huft_build(gz,b, n, s, d, e, t, m)
516struct gzip *gz;
517unsigned *b;            /* code lengths in bits (all assumed <= BMAX) */
518unsigned n;             /* number of codes (assumed <= N_MAX) */
519unsigned s;             /* number of simple-valued codes (0..s-1) */
520const ush *d;           /* list of base values for non-simple codes */
521const ush *e;           /* list of extra bits for non-simple codes */
522struct huft **t;        /* result: starting table */
523int *m;                 /* maximum lookup bits, returns actual */
524/* Given a list of code lengths and a maximum table size, make a set of
525   tables to decode that set of codes.  Return zero on success, one if
526   the given code set is incomplete (the tables are still built in this
527   case), two if the input is invalid (all zero length codes or an
528   oversubscribed set of lengths), and three if not enough memory.
529   The code with value 256 is special, and the tables are constructed
530   so that no bits beyond that code are fetched when that code is
531   decoded. */
532{
533  unsigned a;                   /* counter for codes of length k */
534  unsigned c[BMAX+1];           /* bit length count table */
535  unsigned el;                  /* length of EOB code (value 256) */
536  unsigned f;                   /* i repeats in table every f entries */
537  int g;                        /* maximum code length */
538  int h;                        /* table level */
539  register unsigned i;          /* counter, current code */
540  register unsigned j;          /* counter */
541  register int k;               /* number of bits in current code */
542  int lx[BMAX+1];               /* memory for l[-1..BMAX-1] */
543  int *l = lx+1;                /* stack of bits per table */
544  register unsigned *p;         /* pointer into c[], b[], or v[] */
545  register struct huft *q;      /* points to current table */
546  struct huft r;                /* table entry for structure assignment */
547  struct huft *u[BMAX];         /* table stack */
548  static unsigned v[N_MAX];     /* values in order of bit length */
549  register int w;               /* bits before this table == (l * h) */
550  unsigned x[BMAX+1];           /* bit offsets, then code stack */
551  unsigned *xp;                 /* pointer into x */
552  int y;                        /* number of dummy codes added */
553  unsigned z;                   /* number of entries in current table */
554
555
556  /* Generate counts for each bit length */
557  el = n > 256 ? b[256] : BMAX; /* set length of EOB code, if any */
558  memzero((char *)c, sizeof(c));
559  p = b;  i = n;
560  do {
561    c[*p]++; p++;               /* assume all entries <= BMAX */
562  } while (--i);
563  if (c[0] == n)                /* null input--all zero length codes */
564  {
565    *t = (struct huft *)NULL;
566    *m = 0;
567    return 0;
568  }
569
570
571  /* Find minimum and maximum length, bound *m by those */
572  for (j = 1; j <= BMAX; j++)
573    if (c[j])
574      break;
575  k = j;                        /* minimum code length */
576  if ((unsigned)*m < j)
577    *m = j;
578  for (i = BMAX; i; i--)
579    if (c[i])
580      break;
581  g = i;                        /* maximum code length */
582  if ((unsigned)*m > i)
583    *m = i;
584
585
586  /* Adjust last length count to fill out codes, if needed */
587  for (y = 1 << j; j < i; j++, y <<= 1)
588    if ((y -= c[j]) < 0)
589      return 2;                 /* bad input: more codes than bits */
590  if ((y -= c[i]) < 0)
591    return 2;
592  c[i] += y;
593
594
595  /* Generate starting offsets into the value table for each length */
596  x[1] = j = 0;
597  p = c + 1;  xp = x + 2;
598  while (--i) {                 /* note that i == g from above */
599    *xp++ = (j += *p++);
600  }
601
602
603  /* Make a table of values in order of bit lengths */
604  p = b;  i = 0;
605  do {
606    if ((j = *p++) != 0)
607      v[x[j]++] = i;
608  } while (++i < n);
609
610
611  /* Generate the Huffman codes and for each, make the table entries */
612  x[0] = i = 0;                 /* first Huffman code is zero */
613  p = v;                        /* grab values in bit order */
614  h = -1;                       /* no tables yet--level -1 */
615  w = l[-1] = 0;                /* no bits decoded yet */
616  u[0] = (struct huft *)NULL;   /* just to keep compilers happy */
617  q = (struct huft *)NULL;      /* ditto */
618  z = 0;                        /* ditto */
619
620  /* go through the bit lengths (k already is bits in shortest code) */
621  for (; k <= g; k++)
622  {
623    a = c[k];
624    while (a--)
625    {
626      /* here i is the Huffman code of length k bits for value *p */
627      /* make tables up to required level */
628      while (k > w + l[h])
629      {
630        w += l[h++];            /* add bits already decoded */
631
632        /* compute minimum size table less than or equal to *m bits */
633        z = (z = g - w) > (unsigned)*m ? *m : z;        /* upper limit */
634        if ((f = 1 << (j = k - w)) > a + 1)     /* try a k-w bit table */
635        {                       /* too few codes for k-w bit table */
636          f -= a + 1;           /* deduct codes from patterns left */
637          xp = c + k;
638          while (++j < z)       /* try smaller tables up to z bits */
639          {
640            if ((f <<= 1) <= *++xp)
641              break;            /* enough codes to use up j bits */
642            f -= *xp;           /* else deduct codes from patterns */
643          }
644        }
645        if ((unsigned)w + j > el && (unsigned)w < el)
646          j = el - w;           /* make EOB code end at table */
647        z = 1 << j;             /* table entries for j-bit table */
648        l[h] = j;               /* set table size in stack */
649
650        /* allocate and link in new table */
651        if ((q = (struct huft *)malloc((z + 1)*sizeof(struct huft))) ==
652            (struct huft *)NULL)
653        {
654          if (h)
655            huft_free(gz,u[0]);
656          return 3;             /* not enough memory */
657        }
658        hufts += z + 1;         /* track memory usage */
659        *t = q + 1;             /* link to list for huft_free() */
660        *(t = &(q->v.t)) = (struct huft *)NULL;
661        u[h] = ++q;             /* table starts after link */
662
663        /* connect to last table, if there is one */
664        if (h)
665        {
666          x[h] = i;             /* save pattern for backing up */
667          r.b = (uch)l[h-1];    /* bits to dump before this table */
668          r.e = (uch)(16 + j);  /* bits in this table */
669          r.v.t = q;            /* pointer to this table */
670          j = (i & ((1 << w) - 1)) >> (w - l[h-1]);
671          u[h-1][j] = r;        /* connect to last table */
672        }
673      }
674
675      /* set up table entry in r */
676      r.b = (uch)(k - w);
677      if (p >= v + n)
678        r.e = 99;               /* out of values--invalid code */
679      else if (*p < s)
680      {
681        r.e = (uch)(*p < 256 ? 16 : 15);    /* 256 is end-of-block code */
682        r.v.n = *p++;           /* simple code is just the value */
683      }
684      else
685      {
686        r.e = (uch)e[*p - s];   /* non-simple--look up in lists */
687        r.v.n = d[*p++ - s];
688      }
689
690      /* fill code-like entries with r */
691      f = 1 << (k - w);
692      for (j = i >> w; j < z; j += f)
693        q[j] = r;
694
695      /* backwards increment the k-bit code i */
696      for (j = 1 << (k - 1); i & j; j >>= 1)
697        i ^= j;
698      i ^= j;
699
700      /* backup over finished tables */
701      while ((i & ((1 << w) - 1)) != x[h])
702        w -= l[--h];            /* don't need to update q */
703    }
704  }
705
706
707  /* return actual size of base table */
708  *m = l[0];
709
710
711  /* Return true (1) if we were given an incomplete table */
712  return y != 0 && g != 1;
713}
714
715
716
717static int huft_free(gz,t)
718struct gzip *gz;
719struct huft *t;         /* table to free */
720/* Free the malloc'ed tables built by huft_build(), which makes a linked
721   list of the tables it made, with the links in a dummy first entry of
722   each table. */
723{
724  register struct huft *p, *q;
725
726
727  /* Go through linked list, freeing from the malloced (t[-1]) address. */
728  p = t;
729  while (p != (struct huft *)NULL)
730  {
731    q = (--p)->v.t;
732    free(p);
733    p = q;
734  }
735  return 0;
736}
737
738
739
740#ifdef ASM_INFLATECODES
741#  define inflate_codes(tl,td,bl,bd)  flate_codes(tl,td,bl,bd,(uch *)slide)
742   int flate_codes OF((struct huft *, struct huft *, int, int, uch *));
743
744#else
745
746static int inflate_codes(gz,tl, td, bl, bd)
747struct gzip *gz;
748struct huft *tl, *td;   /* literal/length and distance decoder tables */
749int bl, bd;             /* number of bits decoded by tl[] and td[] */
750/* inflate (decompress) the codes in a deflated (compressed) block.
751   Return an error code or zero if it all goes ok. */
752{
753  register unsigned e;  /* table entry flag/number of extra bits */
754  unsigned n, d;        /* length and index for copy */
755  unsigned w;           /* current window position */
756  struct huft *t;       /* pointer to table entry */
757  unsigned ml, md;      /* masks for bl and bd bits */
758  register ulg b;       /* bit buffer */
759  register unsigned k;  /* number of bits in bit buffer */
760
761
762  /* make local copies of globals */
763  b = bb;                       /* initialize bit buffer */
764  k = bk;
765  w = wp;                       /* initialize window position */
766
767
768  /* inflate the coded data */
769  ml = mask[bl];           /* precompute masks for speed */
770  md = mask[bd];
771  while (1)                     /* do until end of block */
772  {
773    NEEDBITS((unsigned)bl)
774    if ((e = (t = tl + ((unsigned)b & ml))->e) > 16)
775      do {
776        if (e == 99)
777          return 1;
778        DUMPBITS(t->b)
779        e -= 16;
780        NEEDBITS(e)
781      } while ((e = (t = t->v.t + ((unsigned)b & mask[e]))->e) > 16);
782    DUMPBITS(t->b)
783    if (e == 16)                /* then it's a literal */
784    {
785      slide[w++] = (uch)t->v.n;
786      if (w == WSIZE)
787      {
788        FLUSH(gz,w);
789        w = 0;
790      }
791    }
792    else                        /* it's an EOB or a length */
793    {
794      /* exit if end of block */
795      if (e == 15)
796        break;
797
798      /* get length of block to copy */
799      NEEDBITS(e)
800      n = t->v.n + ((unsigned)b & mask[e]);
801      DUMPBITS(e);
802
803      /* decode distance of block to copy */
804      NEEDBITS((unsigned)bd)
805      if ((e = (t = td + ((unsigned)b & md))->e) > 16)
806        do {
807          if (e == 99)
808            return 1;
809          DUMPBITS(t->b)
810          e -= 16;
811          NEEDBITS(e)
812        } while ((e = (t = t->v.t + ((unsigned)b & mask[e]))->e) > 16);
813      DUMPBITS(t->b)
814      NEEDBITS(e)
815      d = w - t->v.n - ((unsigned)b & mask[e]);
816      DUMPBITS(e)
817
818      /* do the copy */
819      do {
820        n -= (e = (e = WSIZE - ((d &= WSIZE-1) > w ? d : w)) > n ? n : e);
821#ifndef NOMEMCPY
822        if (w - d >= e)         /* (this test assumes unsigned comparison) */
823        {
824          memcpy(slide + w, slide + d, e);
825          w += e;
826          d += e;
827        }
828        else                      /* do it slow to avoid memcpy() overlap */
829#endif /* !NOMEMCPY */
830          do {
831            slide[w++] = slide[d++];
832          } while (--e);
833        if (w == WSIZE)
834        {
835          FLUSH(gz,w);
836          w = 0;
837        }
838      } while (n);
839    }
840  }
841
842
843  /* restore the globals from the locals */
844  wp = w;                       /* restore global window pointer */
845  bb = b;                       /* restore global bit buffer */
846  bk = k;
847
848
849  /* done */
850  return 0;
851}
852
853#endif /* ASM_INFLATECODES */
854
855
856
857static int inflate_stored(gz)
858struct gzip *gz;
859/* "decompress" an inflated type 0 (stored) block. */
860{
861  unsigned n;           /* number of bytes in block */
862  unsigned w;           /* current window position */
863  register ulg b;       /* bit buffer */
864  register unsigned k;  /* number of bits in bit buffer */
865
866
867  /* make local copies of globals */
868  Trace((stderr, "\nstored block"));
869  b = bb;                       /* initialize bit buffer */
870  k = bk;
871  w = wp;                       /* initialize window position */
872
873
874  /* go to byte boundary */
875  n = k & 7;
876  DUMPBITS(n);
877
878
879  /* get the length and its complement */
880  NEEDBITS(16)
881  n = ((unsigned)b & 0xffff);
882  DUMPBITS(16)
883  NEEDBITS(16)
884  if (n != (unsigned)((~b) & 0xffff))
885    return 1;                   /* error in compressed data */
886  DUMPBITS(16)
887
888
889  /* read and output the compressed data */
890  while (n--)
891  {
892    NEEDBITS(8)
893    slide[w++] = (uch)b;
894    if (w == WSIZE)
895    {
896      FLUSH(gz,w);
897      w = 0;
898    }
899    DUMPBITS(8)
900  }
901
902
903  /* restore the globals from the locals */
904  wp = w;                       /* restore global window pointer */
905  bb = b;                       /* restore global bit buffer */
906  bk = k;
907  return 0;
908}
909
910
911/* Globals for literal tables (built once) */
912/* !!! XXX !!! */
913struct huft *fixed_tl = (struct huft *)NULL;
914struct huft *fixed_td;
915int fixed_bl, fixed_bd;
916
917static int inflate_fixed(gz)
918struct gzip *gz;
919/* decompress an inflated type 1 (fixed Huffman codes) block.  We should
920   either replace this with a custom decoder, or at least precompute the
921   Huffman tables. */
922{
923  /* if first time, set up tables for fixed blocks */
924  Trace((stderr, "\nliteral block"));
925  if (fixed_tl == (struct huft *)NULL)
926  {
927    int i;                /* temporary variable */
928    static unsigned l[288]; /* length list for huft_build */
929
930    /* literal table */
931    for (i = 0; i < 144; i++)
932      l[i] = 8;
933    for (; i < 256; i++)
934      l[i] = 9;
935    for (; i < 280; i++)
936      l[i] = 7;
937    for (; i < 288; i++)          /* make a complete, but wrong code set */
938      l[i] = 8;
939    fixed_bl = 7;
940    if ((i = huft_build(gz,l, 288, 257, cplens, cplext,
941                        &fixed_tl, &fixed_bl)) != 0)
942    {
943      fixed_tl = (struct huft *)NULL;
944      return i;
945    }
946
947    /* distance table */
948    for (i = 0; i < 30; i++)      /* make an incomplete code set */
949      l[i] = 5;
950    fixed_bd = 5;
951    if ((i = huft_build(gz,l, 30, 0, cpdist, cpdext, &fixed_td, &fixed_bd)) > 1)
952    {
953      huft_free(gz,fixed_tl);
954      fixed_tl = (struct huft *)NULL;
955      return i;
956    }
957  }
958
959
960  /* decompress until an end-of-block code */
961  return inflate_codes(gz,fixed_tl, fixed_td, fixed_bl, fixed_bd) != 0;
962}
963
964
965
966static int inflate_dynamic(gz)
967struct gzip *gz;
968/* decompress an inflated type 2 (dynamic Huffman codes) block. */
969{
970  int i;                /* temporary variables */
971  unsigned j;
972  unsigned l;           /* last length */
973  unsigned m;           /* mask for bit lengths table */
974  unsigned n;           /* number of lengths to get */
975  struct huft *tl;      /* literal/length code table */
976  struct huft *td;      /* distance code table */
977  int bl;               /* lookup bits for tl */
978  int bd;               /* lookup bits for td */
979  unsigned nb;          /* number of bit length codes */
980  unsigned nl;          /* number of literal/length codes */
981  unsigned nd;          /* number of distance codes */
982#ifdef PKZIP_BUG_WORKAROUND
983  static unsigned ll[288+32]; /* literal/length and distance code lengths */
984#else
985  static unsigned ll[286+30]; /* literal/length and distance code lengths */
986#endif
987  register ulg b;       /* bit buffer */
988  register unsigned k;  /* number of bits in bit buffer */
989
990
991  /* make local bit buffer */
992  Trace((stderr, "\ndynamic block"));
993  b = bb;
994  k = bk;
995
996
997  /* read in table lengths */
998  NEEDBITS(5)
999  nl = 257 + ((unsigned)b & 0x1f);      /* number of literal/length codes */
1000  DUMPBITS(5)
1001  NEEDBITS(5)
1002  nd = 1 + ((unsigned)b & 0x1f);        /* number of distance codes */
1003  DUMPBITS(5)
1004  NEEDBITS(4)
1005  nb = 4 + ((unsigned)b & 0xf);         /* number of bit length codes */
1006  DUMPBITS(4)
1007#ifdef PKZIP_BUG_WORKAROUND
1008  if (nl > 288 || nd > 32)
1009#else
1010  if (nl > 286 || nd > 30)
1011#endif
1012    return 1;                   /* bad lengths */
1013
1014
1015  /* read in bit-length-code lengths */
1016  for (j = 0; j < nb; j++)
1017  {
1018    NEEDBITS(3)
1019    ll[border[j]] = (unsigned)b & 7;
1020    DUMPBITS(3)
1021  }
1022  for (; j < 19; j++)
1023    ll[border[j]] = 0;
1024
1025
1026  /* build decoding table for trees--single level, 7 bit lookup */
1027  bl = 7;
1028  if ((i = huft_build(gz,ll, 19, 19, NULL, NULL, &tl, &bl)) != 0)
1029  {
1030    if (i == 1)
1031      huft_free(gz,tl);
1032    return i;                   /* incomplete code set */
1033  }
1034
1035
1036  /* read in literal and distance code lengths */
1037  n = nl + nd;
1038  m = mask[bl];
1039  i = l = 0;
1040  while ((unsigned)i < n)
1041  {
1042    NEEDBITS((unsigned)bl)
1043    j = (td = tl + ((unsigned)b & m))->b;
1044    DUMPBITS(j)
1045    j = td->v.n;
1046    if (j < 16)                 /* length of code in bits (0..15) */
1047      ll[i++] = l = j;          /* save last length in l */
1048    else if (j == 16)           /* repeat last length 3 to 6 times */
1049    {
1050      NEEDBITS(2)
1051      j = 3 + ((unsigned)b & 3);
1052      DUMPBITS(2)
1053      if ((unsigned)i + j > n)
1054        return 1;
1055      while (j--)
1056        ll[i++] = l;
1057    }
1058    else if (j == 17)           /* 3 to 10 zero length codes */
1059    {
1060      NEEDBITS(3)
1061      j = 3 + ((unsigned)b & 7);
1062      DUMPBITS(3)
1063      if ((unsigned)i + j > n)
1064        return 1;
1065      while (j--)
1066        ll[i++] = 0;
1067      l = 0;
1068    }
1069    else                        /* j == 18: 11 to 138 zero length codes */
1070    {
1071      NEEDBITS(7)
1072      j = 11 + ((unsigned)b & 0x7f);
1073      DUMPBITS(7)
1074      if ((unsigned)i + j > n)
1075        return 1;
1076      while (j--)
1077        ll[i++] = 0;
1078      l = 0;
1079    }
1080  }
1081
1082
1083  /* free decoding table for trees */
1084  huft_free(gz,tl);
1085
1086
1087  /* restore the global bit buffer */
1088  bb = b;
1089  bk = k;
1090
1091
1092  /* build the decoding tables for literal/length and distance codes */
1093  bl = lbits;
1094  if ((i = huft_build(gz,ll, nl, 257, cplens, cplext, &tl, &bl)) != 0)
1095  {
1096    if (i == 1 && !qflag) {
1097      FPRINTF( "(incomplete l-tree)  ");
1098      huft_free(gz,tl);
1099    }
1100    return i;                   /* incomplete code set */
1101  }
1102  bd = dbits;
1103  if ((i = huft_build(gz,ll + nl, nd, 0, cpdist, cpdext, &td, &bd)) != 0)
1104  {
1105    if (i == 1 && !qflag) {
1106      FPRINTF( "(incomplete d-tree)  ");
1107#ifdef PKZIP_BUG_WORKAROUND
1108      i = 0;
1109    }
1110#else
1111      huft_free(gz,td);
1112    }
1113    huft_free(gz,tl);
1114    return i;                   /* incomplete code set */
1115#endif
1116  }
1117
1118
1119  /* decompress until an end-of-block code */
1120  if (inflate_codes(gz,tl, td, bl, bd))
1121    return 1;
1122
1123
1124  /* free the decoding tables, return */
1125  huft_free(gz,tl);
1126  huft_free(gz,td);
1127  return 0;
1128}
1129
1130
1131
1132static int inflate_block(gz,e)
1133struct gzip *gz;
1134int *e;                 /* last block flag */
1135/* decompress an inflated block */
1136{
1137  unsigned t;           /* block type */
1138  register ulg b;       /* bit buffer */
1139  register unsigned k;  /* number of bits in bit buffer */
1140
1141
1142  /* make local bit buffer */
1143  b = bb;
1144  k = bk;
1145
1146
1147  /* read in last block bit */
1148  NEEDBITS(1)
1149  *e = (int)b & 1;
1150  DUMPBITS(1)
1151
1152
1153  /* read in block type */
1154  NEEDBITS(2)
1155  t = (unsigned)b & 3;
1156  DUMPBITS(2)
1157
1158
1159  /* restore the global bit buffer */
1160  bb = b;
1161  bk = k;
1162
1163
1164  /* inflate that block type */
1165  if (t == 2)
1166    return inflate_dynamic(gz);
1167  if (t == 0)
1168    return inflate_stored(gz);
1169  if (t == 1)
1170    return inflate_fixed(gz);
1171
1172
1173  /* bad block type */
1174  return 2;
1175}
1176
1177
1178
1179int inflate(gz, glbl)
1180struct gzip *gz;
1181struct gz_global * glbl;
1182/* decompress an inflated entry */
1183{
1184  int e;                /* last block flag */
1185  int r;                /* result code */
1186  unsigned h;           /* maximum struct huft's malloc'ed */
1187
1188
1189  /* initialize window, bit buffer */
1190  wp = 0;
1191  bk = 0;
1192  bb = 0;
1193
1194
1195  /* decompress until the last block */
1196  h = 0;
1197  do {
1198    hufts = 0;
1199    if ((r = inflate_block(gz,&e)) != 0)
1200      return r;
1201    if (hufts > h)
1202      h = hufts;
1203  } while (!e);
1204
1205
1206  /* flush out slide */
1207  FLUSH(gz,wp);
1208
1209
1210  /* return success */
1211  Trace((stderr, "\n%u bytes in Huffman tables (%d/entry)\n",
1212         h * sizeof(struct huft), sizeof(struct huft)));
1213  return 0;
1214}
1215
1216
1217
1218static int inflate_free(gz)
1219struct gzip *gz;
1220{
1221  if (fixed_tl != (struct huft *)NULL)
1222  {
1223    huft_free(gz,fixed_td);
1224    huft_free(gz,fixed_tl);
1225    fixed_td = fixed_tl = (struct huft *)NULL;
1226  }
1227  return 0;
1228}
1229