Deleted Added
full compact
1/*
2 * Parts of this file are not covered by:
3 * ----------------------------------------------------------------------------
4 * "THE BEER-WARE LICENSE" (Revision 42):
5 * <phk@login.dknet.dk> wrote this file. As long as you retain this notice you
6 * can do whatever you want with this stuff. If we meet some day, and you think
7 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
8 * ----------------------------------------------------------------------------
9 *
10 * $Id: inflate.c,v 1.2 1994/10/07 23:18:09 csgr Exp $
11 *
12 * This module handles execution of a.out files which have been run through
13 * "gzip -9".
14 *
15 * For now you need to use exactly this command to compress the binaries:
16 *
17 * gzip -9 -v < /bin/sh > /tmp/sh
18 *
19 * TODO:
20 * text-segments should be made R/O after being filled
21 * is the vm-stuff safe ?
22 * should handle the entire header of gzip'ed stuff.
23 * inflate isn't quite reentrant yet...
24 * error-handling is a mess...
25 * so is the rest...
26 */
27
28#include <sys/param.h>
29#include <sys/systm.h>
30#include <sys/inflate.h>
31#include <sys/mman.h>
32#include <sys/malloc.h>
33
34#include <vm/vm.h>
35#include <vm/vm_kern.h>
36
37/* Stuff to make inflate() work */
38# define memzero(dest,len) bzero(dest,len)
39# define NOMEMCPY
40#define FPRINTF printf
41
42#define EOF -1
43#define CHECK_EOF
44static int
45NextByte(struct gzip *gz)
46{
47 int error;
48
49 if(gz->idx >= gz->len) {
50 gz->where = __LINE__;
51 return EOF;
52 }
53
54 if((!gz->inbuf) || gz->idx >= (gz->offset+PAGE_SIZE)) {
55 if(gz->inbuf) {
56 error = vm_deallocate(kernel_map,
57 (vm_offset_t)gz->inbuf, PAGE_SIZE);
58 if(error) {
59 gz->where = __LINE__;
60 gz->error = error;
61 return EOF;
62 }
63 }
64
65 gz->offset += PAGE_SIZE;
66
67 error = vm_mmap(kernel_map, /* map */
68 (vm_offset_t *)&gz->inbuf, /* address */
69 PAGE_SIZE, /* size */
70 VM_PROT_READ, /* protection */
71 VM_PROT_READ, /* max protection */
72 0, /* flags */
73 (caddr_t)gz->ip->vnodep, /* vnode */
74 gz->offset); /* offset */
75 if(error) {
76 gz->where = __LINE__;
77 gz->error = error;
78 return EOF;
79 }
80
81 }
82 return gz->inbuf[(gz->idx++) - gz->offset];
83}
84
85#define NEXTBYTE NextByte(gz)
86
87static int
88Flush(struct gzip *gz,u_long siz)
89{
90 u_char *p = slide,*q;
91 int i;
92
93 /* First, find a a.out-header */
94 if(gz->output < sizeof gz->a_out) {
95 q = (u_char*) &gz->a_out;
96 i = min(siz,sizeof gz->a_out - gz->output);
97 bcopy(p,q+gz->output,i);
98 gz->output += i;
99 p += i;
100 siz -= i;
101 if(gz->output == sizeof gz->a_out) {
102 i = do_aout_hdr(gz);
103 if (i == -1) {
104 gz->where = __LINE__;
105 gz->error = ENOEXEC;
106 return ENOEXEC;
107 } else if (i) {
108 gz->where = __LINE__;
109 gz->error = i;
110 return ENOEXEC;
111 }
112 if(gz->file_offset < sizeof gz->a_out) {
113 q = (u_char*) gz->virtual_offset + gz->output - gz->file_offset;
114 bcopy(&gz->a_out,q,sizeof gz->a_out - gz->file_offset);
115 }
116 }
117 }
118 /* Skip over zero-padded first PAGE if needed */
119 if(gz->output < gz->file_offset && (gz->output+siz) > gz->file_offset) {
120 i = min(siz, gz->file_offset - gz->output);
121 gz->output += i;
122 p += i;
123 siz -= i;
124 }
125 if(gz->output >= gz->file_offset && gz->output < gz->file_end) {
126 i = min(siz, gz->file_end - gz->output);
127 q = (u_char*) gz->virtual_offset + gz->output - gz->file_offset;
128 bcopy(p,q,i);
129 gz->output += i;
130 p += i;
131 siz -= i;
132 }
133 gz->output += siz;
134 return 0;
135}
136
137#define FLUSH(x,y) {int foo = Flush(x,y); if (foo) return foo;}
138static
139void *
140myalloc(u_long size)
141{
142 return malloc(size, M_GZIP, M_NOWAIT);
143}
144#define malloc myalloc
145
146static
147void
148myfree(void * ptr)
149{
150 free(ptr,M_GZIP);
151}
152#define free myfree
153
154static const int qflag = 0;
155#define Trace(x) /* */
156
157
158/* This came from unzip-5.12. I have changed it to pass a "gz" pointer
159 * around, thus hopefully making it re-entrant. Poul-Henningi
160 */
161
162/* inflate.c -- put in the public domain by Mark Adler
163 version c14o, 23 August 1994 */
164
165/* You can do whatever you like with this source file, though I would
166 prefer that if you modify it and redistribute it that you include
167 comments to that effect with your name and the date. Thank you.
168
169 History:
170 vers date who what
171 ---- --------- -------------- ------------------------------------
172 a ~~ Feb 92 M. Adler used full (large, one-step) lookup table
173 b1 21 Mar 92 M. Adler first version with partial lookup tables
174 b2 21 Mar 92 M. Adler fixed bug in fixed-code blocks
175 b3 22 Mar 92 M. Adler sped up match copies, cleaned up some
176 b4 25 Mar 92 M. Adler added prototypes; removed window[] (now
177 is the responsibility of unzip.h--also
178 changed name to slide[]), so needs diffs
179 for unzip.c and unzip.h (this allows
180 compiling in the small model on MSDOS);
181 fixed cast of q in huft_build();
182 b5 26 Mar 92 M. Adler got rid of unintended macro recursion.
183 b6 27 Mar 92 M. Adler got rid of nextbyte() routine. fixed
184 bug in inflate_fixed().
185 c1 30 Mar 92 M. Adler removed lbits, dbits environment variables.
186 changed BMAX to 16 for explode. Removed
187 OUTB usage, and replaced it with flush()--
188 this was a 20% speed improvement! Added
189 an explode.c (to replace unimplod.c) that
190 uses the huft routines here. Removed
191 register union.
192 c2 4 Apr 92 M. Adler fixed bug for file sizes a multiple of 32k.
193 c3 10 Apr 92 M. Adler reduced memory of code tables made by
194 huft_build significantly (factor of two to
195 three).
196 c4 15 Apr 92 M. Adler added NOMEMCPY do kill use of memcpy().
197 worked around a Turbo C optimization bug.
198 c5 21 Apr 92 M. Adler added the WSIZE #define to allow reducing
199 the 32K window size for specialized
200 applications.
201 c6 31 May 92 M. Adler added some typecasts to eliminate warnings
202 c7 27 Jun 92 G. Roelofs added some more typecasts (444: MSC bug).
203 c8 5 Oct 92 J-l. Gailly added ifdef'd code to deal with PKZIP bug.
204 c9 9 Oct 92 M. Adler removed a memory error message (~line 416).
205 c10 17 Oct 92 G. Roelofs changed ULONG/UWORD/byte to ulg/ush/uch,
206 removed old inflate, renamed inflate_entry
207 to inflate, added Mark's fix to a comment.
208 c10.5 14 Dec 92 M. Adler fix up error messages for incomplete trees.
209 c11 2 Jan 93 M. Adler fixed bug in detection of incomplete
210 tables, and removed assumption that EOB is
211 the longest code (bad assumption).
212 c12 3 Jan 93 M. Adler make tables for fixed blocks only once.
213 c13 5 Jan 93 M. Adler allow all zero length codes (pkzip 2.04c
214 outputs one zero length code for an empty
215 distance tree).
216 c14 12 Mar 93 M. Adler made inflate.c standalone with the
217 introduction of inflate.h.
218 c14b 16 Jul 93 G. Roelofs added (unsigned) typecast to w at 470.
219 c14c 19 Jul 93 J. Bush changed v[N_MAX], l[288], ll[28x+3x] arrays
220 to static for Amiga.
221 c14d 13 Aug 93 J-l. Gailly de-complicatified Mark's c[*p++]++ thing.
222 c14e 8 Oct 93 G. Roelofs changed memset() to memzero().
223 c14f 22 Oct 93 G. Roelofs renamed quietflg to qflag; made Trace()
224 conditional; added inflate_free().
225 c14g 28 Oct 93 G. Roelofs changed l/(lx+1) macro to pointer (Cray bug)
226 c14h 7 Dec 93 C. Ghisler huft_build() optimizations.
227 c14i 9 Jan 94 A. Verheijen set fixed_t{d,l} to NULL after freeing;
228 G. Roelofs check NEXTBYTE macro for EOF.
229 c14j 23 Jan 94 G. Roelofs removed Ghisler "optimizations"; ifdef'd
230 EOF check.
231 c14k 27 Feb 94 G. Roelofs added some typecasts to avoid warnings.
232 c14l 9 Apr 94 G. Roelofs fixed split comments on preprocessor lines
233 to avoid bug in Encore compiler.
234 c14m 7 Jul 94 P. Kienitz modified to allow assembler version of
235 inflate_codes() (define ASM_INFLATECODES)
236 c14n 22 Jul 94 G. Roelofs changed fprintf to FPRINTF for DLL versions
237 c14o 23 Aug 94 C. Spieler added a newline to a debug statement;
238 G. Roelofs added another typecast to avoid MSC warning
239 */
240
241
242/*
243 Inflate deflated (PKZIP's method 8 compressed) data. The compression
244 method searches for as much of the current string of bytes (up to a
245 length of 258) in the previous 32K bytes. If it doesn't find any
246 matches (of at least length 3), it codes the next byte. Otherwise, it
247 codes the length of the matched string and its distance backwards from
248 the current position. There is a single Huffman code that codes both
249 single bytes (called "literals") and match lengths. A second Huffman
250 code codes the distance information, which follows a length code. Each
251 length or distance code actually represents a base value and a number
252 of "extra" (sometimes zero) bits to get to add to the base value. At
253 the end of each deflated block is a special end-of-block (EOB) literal/
254 length code. The decoding process is basically: get a literal/length
255 code; if EOB then done; if a literal, emit the decoded byte; if a
256 length then get the distance and emit the referred-to bytes from the
257 sliding window of previously emitted data.
258
259 There are (currently) three kinds of inflate blocks: stored, fixed, and
260 dynamic. The compressor outputs a chunk of data at a time and decides
261 which method to use on a chunk-by-chunk basis. A chunk might typically
262 be 32K to 64K, uncompressed. If the chunk is uncompressible, then the
263 "stored" method is used. In this case, the bytes are simply stored as
264 is, eight bits per byte, with none of the above coding. The bytes are
265 preceded by a count, since there is no longer an EOB code.
266
267 If the data is compressible, then either the fixed or dynamic methods
268 are used. In the dynamic method, the compressed data is preceded by
269 an encoding of the literal/length and distance Huffman codes that are
270 to be used to decode this block. The representation is itself Huffman
271 coded, and so is preceded by a description of that code. These code
272 descriptions take up a little space, and so for small blocks, there is
273 a predefined set of codes, called the fixed codes. The fixed method is
274 used if the block ends up smaller that way (usually for quite small
275 chunks); otherwise the dynamic method is used. In the latter case, the
276 codes are customized to the probabilities in the current block and so
277 can code it much better than the pre-determined fixed codes can.
278
279 The Huffman codes themselves are decoded using a mutli-level table
280 lookup, in order to maximize the speed of decoding plus the speed of
281 building the decoding tables. See the comments below that precede the
282 lbits and dbits tuning parameters.
283 */
284
285
286/*
287 Notes beyond the 1.93a appnote.txt:
288
289 1. Distance pointers never point before the beginning of the output
290 stream.
291 2. Distance pointers can point back across blocks, up to 32k away.
292 3. There is an implied maximum of 7 bits for the bit length table and
293 15 bits for the actual data.
294 4. If only one code exists, then it is encoded using one bit. (Zero
295 would be more efficient, but perhaps a little confusing.) If two
296 codes exist, they are coded using one bit each (0 and 1).
297 5. There is no way of sending zero distance codes--a dummy must be
298 sent if there are none. (History: a pre 2.0 version of PKZIP would
299 store blocks with no distance codes, but this was discovered to be
300 too harsh a criterion.) Valid only for 1.93a. 2.04c does allow
301 zero distance codes, which is sent as one code of zero bits in
302 length.
303 6. There are up to 286 literal/length codes. Code 256 represents the
304 end-of-block. Note however that the static length tree defines
305 288 codes just to fill out the Huffman codes. Codes 286 and 287
306 cannot be used though, since there is no length base or extra bits
307 defined for them. Similarily, there are up to 30 distance codes.
308 However, static trees define 32 codes (all 5 bits) to fill out the
309 Huffman codes, but the last two had better not show up in the data.
310 7. Unzip can check dynamic Huffman blocks for complete code sets.
311 The exception is that a single code would not be complete (see #4).
312 8. The five bits following the block type is really the number of
313 literal codes sent minus 257.
314 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits
315 (1+6+6). Therefore, to output three times the length, you output
316 three codes (1+1+1), whereas to output four times the same length,
317 you only need two codes (1+3). Hmm.
318 10. In the tree reconstruction algorithm, Code = Code + Increment
319 only if BitLength(i) is not zero. (Pretty obvious.)
320 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19)
321 12. Note: length code 284 can represent 227-258, but length code 285
322 really is 258. The last length deserves its own, short code
323 since it gets used a lot in very redundant files. The length
324 258 is special since 258 - 3 (the min match length) is 255.
325 13. The literal/length and distance code bit lengths are read as a
326 single stream of lengths. It is possible (and advantageous) for
327 a repeat code (16, 17, or 18) to go across the boundary between
328 the two sets of lengths.
329 */
330
331
332#define PKZIP_BUG_WORKAROUND /* PKZIP 1.93a problem--live with it */
333
334/*
335 inflate.h must supply the uch slide[WSIZE] array and the NEXTBYTE,
336 FLUSH() and memzero macros. If the window size is not 32K, it
337 should also define WSIZE. If INFMOD is defined, it can include
338 compiled functions to support the NEXTBYTE and/or FLUSH() macros.
339 There are defaults for NEXTBYTE and FLUSH() below for use as
340 examples of what those functions need to do. Normally, you would
341 also want FLUSH() to compute a crc on the data. inflate.h also
342 needs to provide these typedefs:
343
344 typedef unsigned char uch;
345 typedef unsigned short ush;
346 typedef unsigned long ulg;
347
348 This module uses the external functions malloc() and free() (and
349 probably memset() or bzero() in the memzero() macro). Their
350 prototypes are normally found in <string.h> and <stdlib.h>.
351 */
352#define INFMOD /* tell inflate.h to include code to be compiled */
353
354/* Huffman code lookup table entry--this entry is four bytes for machines
355 that have 16-bit pointers (e.g. PC's in the small or medium model).
356 Valid extra bits are 0..13. e == 15 is EOB (end of block), e == 16
357 means that v is a literal, 16 < e < 32 means that v is a pointer to
358 the next table, which codes e - 16 bits, and lastly e == 99 indicates
359 an unused code. If a code with e == 99 is looked up, this implies an
360 error in the data. */
361struct huft {
362 uch e; /* number of extra bits or operation */
363 uch b; /* number of bits in this code or subcode */
364 union {
365 ush n; /* literal, length base, or distance base */
366 struct huft *t; /* pointer to next level of table */
367 } v;
368};
369
370
371/* Function prototypes */
372#ifndef OF
373# ifdef __STDC__
374# define OF(a) a
375# else /* !__STDC__ */
376# define OF(a) ()
377# endif /* ?__STDC__ */
378#endif
379static int huft_build OF((struct gzip *,unsigned *, unsigned, unsigned, const ush *, const ush *, struct huft **, int *, struct gz_global *));
380static int huft_free OF((struct gzip *,struct huft *));
381static int inflate_codes OF((struct gzip *,struct huft *, struct huft *, int, int, struct gz_global *));
382static int inflate_stored OF((struct gzip *, struct gz_global *));
383static int inflate_fixed OF((struct gzip *, struct gz_global *));
384static int inflate_dynamic OF((struct gzip *, struct gz_global *));
385static int inflate_block OF((struct gzip *,int *, struct gz_global *));
386
387#if 0
388/* never used - why not ? */
389static int inflate_free OF((struct gzip *, struct gz_global *));
390#endif
391
392
393/* The inflate algorithm uses a sliding 32K byte window on the uncompressed
394 stream to find repeated byte strings. This is implemented here as a
395 circular buffer. The index is updated simply by incrementing and then
396 and'ing with 0x7fff (32K-1). */
397/* It is left to other modules to supply the 32K area. It is assumed
398 to be usable as if it were declared "uch slide[32768];" or as just
399 "uch *slide;" and then malloc'ed in the latter case. The definition
400 must be in unzip.h, included above. */
401
402
403/* Tables for deflate from PKZIP's appnote.txt. */
404static const unsigned border[] = { /* Order of the bit length code lengths */
405 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
406static const ush cplens[] = { /* Copy lengths for literal codes 257..285 */
407 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
408 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
409 /* note: see note #13 above about the 258 in this list. */
410static const ush cplext[] = { /* Extra bits for literal codes 257..285 */
411 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
412 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 99, 99}; /* 99==invalid */
413static const ush cpdist[] = { /* Copy offsets for distance codes 0..29 */
414 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
415 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
416 8193, 12289, 16385, 24577};
417static const ush cpdext[] = { /* Extra bits for distance codes */
418 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
419 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
420 12, 12, 13, 13};
421
422/* And'ing with mask[n] masks the lower n bits */
423static const ush mask[] = {
424 0x0000,
425 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
426 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff
427};
428
429
430/* Macros for inflate() bit peeking and grabbing.
431 The usage is:
432
433 NEEDBITS(j)
434 x = b & mask[j];
435 DUMPBITS(j)
436
437 where NEEDBITS makes sure that b has at least j bits in it, and
438 DUMPBITS removes the bits from b. The macros use the variable k
439 for the number of bits in b. Normally, b and k are register
440 variables for speed, and are initialized at the begining of a
441 routine that uses these macros from a global bit buffer and count.
442
443 In order to not ask for more bits than there are in the compressed
444 stream, the Huffman tables are constructed to only ask for just
445 enough bits to make up the end-of-block code (value 256). Then no
446 bytes need to be "returned" to the buffer at the end of the last
447 block. See the huft_build() routine.
448 */
449
450/*
451 * The following 2 were global variables.
452 * They are now fields of the gz_global structure.
453 */
454#define bb (glbl->gz_bb) /* bit buffer */
455#define bk (glbl->gz_bk) /* bits in bit buffer */
456
457#ifndef CHECK_EOF
458# define NEEDBITS(n) {while(k<(n)){b|=((ulg)NEXTBYTE)<<k;k+=8;}}
459#else
460# define NEEDBITS(n) {while(k<(n)){int c=NEXTBYTE;if(c==EOF)return 1;\
461 b|=((ulg)c)<<k;k+=8;}}
462#endif /* Piet Plomp: change "return 1" to "break" */
463
464#define DUMPBITS(n) {b>>=(n);k-=(n);}
465
466
467/*
468 Huffman code decoding is performed using a multi-level table lookup.
469 The fastest way to decode is to simply build a lookup table whose
470 size is determined by the longest code. However, the time it takes
471 to build this table can also be a factor if the data being decoded
472 is not very long. The most common codes are necessarily the
473 shortest codes, so those codes dominate the decoding time, and hence
474 the speed. The idea is you can have a shorter table that decodes the
475 shorter, more probable codes, and then point to subsidiary tables for
476 the longer codes. The time it costs to decode the longer codes is
477 then traded against the time it takes to make longer tables.
478
479 This results of this trade are in the variables lbits and dbits
480 below. lbits is the number of bits the first level table for literal/
481 length codes can decode in one step, and dbits is the same thing for
482 the distance codes. Subsequent tables are also less than or equal to
483 those sizes. These values may be adjusted either when all of the
484 codes are shorter than that, in which case the longest code length in
485 bits is used, or when the shortest code is *longer* than the requested
486 table size, in which case the length of the shortest code in bits is
487 used.
488
489 There are two different values for the two tables, since they code a
490 different number of possibilities each. The literal/length table
491 codes 286 possible values, or in a flat code, a little over eight
492 bits. The distance table codes 30 possible values, or a little less
493 than five bits, flat. The optimum values for speed end up being
494 about one bit more than those, so lbits is 8+1 and dbits is 5+1.
495 The optimum values may differ though from machine to machine, and
496 possibly even between compilers. Your mileage may vary.
497 */
498
499
500static const int lbits = 9; /* bits in base literal/length lookup table */
501static const int dbits = 6; /* bits in base distance lookup table */
502
503
504/* If BMAX needs to be larger than 16, then h and x[] should be ulg. */
505#define BMAX 16 /* maximum bit length of any code (16 for explode) */
506#define N_MAX 288 /* maximum number of codes in any set */
507
508/*
509 * This also used to be a global variable
510 */
511#define hufts (glbl->gz_hufts)
512
513
514static int huft_build(gz,b, n, s, d, e, t, m, glbl)
515struct gzip *gz;
516unsigned *b; /* code lengths in bits (all assumed <= BMAX) */
517unsigned n; /* number of codes (assumed <= N_MAX) */
518unsigned s; /* number of simple-valued codes (0..s-1) */
519const ush *d; /* list of base values for non-simple codes */
520const ush *e; /* list of extra bits for non-simple codes */
521struct huft **t; /* result: starting table */
522int *m; /* maximum lookup bits, returns actual */
523struct gz_global * glbl;
524/* Given a list of code lengths and a maximum table size, make a set of
525 tables to decode that set of codes. Return zero on success, one if
526 the given code set is incomplete (the tables are still built in this
527 case), two if the input is invalid (all zero length codes or an
528 oversubscribed set of lengths), and three if not enough memory.
529 The code with value 256 is special, and the tables are constructed
530 so that no bits beyond that code are fetched when that code is
531 decoded. */
532{
533 unsigned a; /* counter for codes of length k */
534 unsigned c[BMAX+1]; /* bit length count table */
535 unsigned el; /* length of EOB code (value 256) */
536 unsigned f; /* i repeats in table every f entries */
537 int g; /* maximum code length */
538 int h; /* table level */
539 register unsigned i; /* counter, current code */
540 register unsigned j; /* counter */
541 register int k; /* number of bits in current code */
542 int lx[BMAX+1]; /* memory for l[-1..BMAX-1] */
543 int *l = lx+1; /* stack of bits per table */
544 register unsigned *p; /* pointer into c[], b[], or v[] */
545 register struct huft *q; /* points to current table */
546 struct huft r; /* table entry for structure assignment */
547 struct huft *u[BMAX]; /* table stack */
548 unsigned v[N_MAX]; /* values in order of bit length */
549 register int w; /* bits before this table == (l * h) */
550 unsigned x[BMAX+1]; /* bit offsets, then code stack */
551 unsigned *xp; /* pointer into x */
552 int y; /* number of dummy codes added */
553 unsigned z; /* number of entries in current table */
554
555
556 /* Generate counts for each bit length */
557 el = n > 256 ? b[256] : BMAX; /* set length of EOB code, if any */
558 memzero((char *)c, sizeof(c));
559 p = b; i = n;
560 do {
561 c[*p]++; p++; /* assume all entries <= BMAX */
562 } while (--i);
563 if (c[0] == n) /* null input--all zero length codes */
564 {
565 *t = (struct huft *)NULL;
566 *m = 0;
567 return 0;
568 }
569
570
571 /* Find minimum and maximum length, bound *m by those */
572 for (j = 1; j <= BMAX; j++)
573 if (c[j])
574 break;
575 k = j; /* minimum code length */
576 if ((unsigned)*m < j)
577 *m = j;
578 for (i = BMAX; i; i--)
579 if (c[i])
580 break;
581 g = i; /* maximum code length */
582 if ((unsigned)*m > i)
583 *m = i;
584
585
586 /* Adjust last length count to fill out codes, if needed */
587 for (y = 1 << j; j < i; j++, y <<= 1)
588 if ((y -= c[j]) < 0)
589 return 2; /* bad input: more codes than bits */
590 if ((y -= c[i]) < 0)
591 return 2;
592 c[i] += y;
593
594
595 /* Generate starting offsets into the value table for each length */
596 x[1] = j = 0;
597 p = c + 1; xp = x + 2;
598 while (--i) { /* note that i == g from above */
599 *xp++ = (j += *p++);
600 }
601
602
603 /* Make a table of values in order of bit lengths */
604 p = b; i = 0;
605 do {
606 if ((j = *p++) != 0)
607 v[x[j]++] = i;
608 } while (++i < n);
609
610
611 /* Generate the Huffman codes and for each, make the table entries */
612 x[0] = i = 0; /* first Huffman code is zero */
613 p = v; /* grab values in bit order */
614 h = -1; /* no tables yet--level -1 */
615 w = l[-1] = 0; /* no bits decoded yet */
616 u[0] = (struct huft *)NULL; /* just to keep compilers happy */
617 q = (struct huft *)NULL; /* ditto */
618 z = 0; /* ditto */
619
620 /* go through the bit lengths (k already is bits in shortest code) */
621 for (; k <= g; k++)
622 {
623 a = c[k];
624 while (a--)
625 {
626 /* here i is the Huffman code of length k bits for value *p */
627 /* make tables up to required level */
628 while (k > w + l[h])
629 {
630 w += l[h++]; /* add bits already decoded */
631
632 /* compute minimum size table less than or equal to *m bits */
633 z = (z = g - w) > (unsigned)*m ? *m : z; /* upper limit */
634 if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */
635 { /* too few codes for k-w bit table */
636 f -= a + 1; /* deduct codes from patterns left */
637 xp = c + k;
638 while (++j < z) /* try smaller tables up to z bits */
639 {
640 if ((f <<= 1) <= *++xp)
641 break; /* enough codes to use up j bits */
642 f -= *xp; /* else deduct codes from patterns */
643 }
644 }
645 if ((unsigned)w + j > el && (unsigned)w < el)
646 j = el - w; /* make EOB code end at table */
647 z = 1 << j; /* table entries for j-bit table */
648 l[h] = j; /* set table size in stack */
649
650 /* allocate and link in new table */
651 if ((q = (struct huft *)malloc((z + 1)*sizeof(struct huft))) ==
652 (struct huft *)NULL)
653 {
654 if (h)
655 huft_free(gz,u[0]);
656 return 3; /* not enough memory */
657 }
658 hufts += z + 1; /* track memory usage */
659 *t = q + 1; /* link to list for huft_free() */
660 *(t = &(q->v.t)) = (struct huft *)NULL;
661 u[h] = ++q; /* table starts after link */
662
663 /* connect to last table, if there is one */
664 if (h)
665 {
666 x[h] = i; /* save pattern for backing up */
667 r.b = (uch)l[h-1]; /* bits to dump before this table */
668 r.e = (uch)(16 + j); /* bits in this table */
669 r.v.t = q; /* pointer to this table */
670 j = (i & ((1 << w) - 1)) >> (w - l[h-1]);
671 u[h-1][j] = r; /* connect to last table */
672 }
673 }
674
675 /* set up table entry in r */
676 r.b = (uch)(k - w);
677 if (p >= v + n)
678 r.e = 99; /* out of values--invalid code */
679 else if (*p < s)
680 {
681 r.e = (uch)(*p < 256 ? 16 : 15); /* 256 is end-of-block code */
682 r.v.n = *p++; /* simple code is just the value */
683 }
684 else
685 {
686 r.e = (uch)e[*p - s]; /* non-simple--look up in lists */
687 r.v.n = d[*p++ - s];
688 }
689
690 /* fill code-like entries with r */
691 f = 1 << (k - w);
692 for (j = i >> w; j < z; j += f)
693 q[j] = r;
694
695 /* backwards increment the k-bit code i */
696 for (j = 1 << (k - 1); i & j; j >>= 1)
697 i ^= j;
698 i ^= j;
699
700 /* backup over finished tables */
701 while ((i & ((1 << w) - 1)) != x[h])
702 w -= l[--h]; /* don't need to update q */
703 }
704 }
705
706
707 /* return actual size of base table */
708 *m = l[0];
709
710
711 /* Return true (1) if we were given an incomplete table */
712 return y != 0 && g != 1;
713}
714
715
716
717static int huft_free(gz,t)
718struct gzip *gz;
719struct huft *t; /* table to free */
720/* Free the malloc'ed tables built by huft_build(), which makes a linked
721 list of the tables it made, with the links in a dummy first entry of
722 each table. */
723{
724 register struct huft *p, *q;
725
726
727 /* Go through linked list, freeing from the malloced (t[-1]) address. */
728 p = t;
729 while (p != (struct huft *)NULL)
730 {
731 q = (--p)->v.t;
732 free(p);
733 p = q;
734 }
735 return 0;
736}
737
738
739
740#ifdef ASM_INFLATECODES
741# define inflate_codes(tl,td,bl,bd) flate_codes(tl,td,bl,bd,(uch *)slide)
742 int flate_codes OF((struct huft *, struct huft *, int, int, uch *));
743
744#else
745
746static int inflate_codes(gz,tl, td, bl, bd, glbl)
747struct gzip *gz;
748struct huft *tl, *td; /* literal/length and distance decoder tables */
749int bl, bd; /* number of bits decoded by tl[] and td[] */
750struct gz_global * glbl;/* global variables */
751/* inflate (decompress) the codes in a deflated (compressed) block.
752 Return an error code or zero if it all goes ok. */
753{
754 register unsigned e; /* table entry flag/number of extra bits */
755 unsigned n, d; /* length and index for copy */
756 unsigned w; /* current window position */
757 struct huft *t; /* pointer to table entry */
758 unsigned ml, md; /* masks for bl and bd bits */
759 register ulg b; /* bit buffer */
760 register unsigned k; /* number of bits in bit buffer */
761
762
763 /* make local copies of globals */
764 b = bb; /* initialize bit buffer */
765 k = bk;
766 w = wp; /* initialize window position */
767
768
769 /* inflate the coded data */
770 ml = mask[bl]; /* precompute masks for speed */
771 md = mask[bd];
772 while (1) /* do until end of block */
773 {
774 NEEDBITS((unsigned)bl)
775 if ((e = (t = tl + ((unsigned)b & ml))->e) > 16)
776 do {
777 if (e == 99)
778 return 1;
779 DUMPBITS(t->b)
780 e -= 16;
781 NEEDBITS(e)
782 } while ((e = (t = t->v.t + ((unsigned)b & mask[e]))->e) > 16);
783 DUMPBITS(t->b)
784 if (e == 16) /* then it's a literal */
785 {
786 slide[w++] = (uch)t->v.n;
787 if (w == WSIZE)
788 {
789 FLUSH(gz,w);
790 w = 0;
791 }
792 }
793 else /* it's an EOB or a length */
794 {
795 /* exit if end of block */
796 if (e == 15)
797 break;
798
799 /* get length of block to copy */
800 NEEDBITS(e)
801 n = t->v.n + ((unsigned)b & mask[e]);
802 DUMPBITS(e);
803
804 /* decode distance of block to copy */
805 NEEDBITS((unsigned)bd)
806 if ((e = (t = td + ((unsigned)b & md))->e) > 16)
807 do {
808 if (e == 99)
809 return 1;
810 DUMPBITS(t->b)
811 e -= 16;
812 NEEDBITS(e)
813 } while ((e = (t = t->v.t + ((unsigned)b & mask[e]))->e) > 16);
814 DUMPBITS(t->b)
815 NEEDBITS(e)
816 d = w - t->v.n - ((unsigned)b & mask[e]);
817 DUMPBITS(e)
818
819 /* do the copy */
820 do {
821 n -= (e = (e = WSIZE - ((d &= WSIZE-1) > w ? d : w)) > n ? n : e);
822#ifndef NOMEMCPY
823 if (w - d >= e) /* (this test assumes unsigned comparison) */
824 {
825 memcpy(slide + w, slide + d, e);
826 w += e;
827 d += e;
828 }
829 else /* do it slow to avoid memcpy() overlap */
830#endif /* !NOMEMCPY */
831 do {
832 slide[w++] = slide[d++];
833 } while (--e);
834 if (w == WSIZE)
835 {
836 FLUSH(gz,w);
837 w = 0;
838 }
839 } while (n);
840 }
841 }
842
843
844 /* restore the globals from the locals */
845 wp = w; /* restore global window pointer */
846 bb = b; /* restore global bit buffer */
847 bk = k;
848
849
850 /* done */
851 return 0;
852}
853
854#endif /* ASM_INFLATECODES */
855
856
857
858static int inflate_stored(gz, glbl)
859struct gzip *gz;
860struct gz_global * glbl;
861/* "decompress" an inflated type 0 (stored) block. */
862{
863 unsigned n; /* number of bytes in block */
864 unsigned w; /* current window position */
865 register ulg b; /* bit buffer */
866 register unsigned k; /* number of bits in bit buffer */
867
868
869 /* make local copies of globals */
870 Trace((stderr, "\nstored block"));
871 b = bb; /* initialize bit buffer */
872 k = bk;
873 w = wp; /* initialize window position */
874
875
876 /* go to byte boundary */
877 n = k & 7;
878 DUMPBITS(n);
879
880
881 /* get the length and its complement */
882 NEEDBITS(16)
883 n = ((unsigned)b & 0xffff);
884 DUMPBITS(16)
885 NEEDBITS(16)
886 if (n != (unsigned)((~b) & 0xffff))
887 return 1; /* error in compressed data */
888 DUMPBITS(16)
889
890
891 /* read and output the compressed data */
892 while (n--)
893 {
894 NEEDBITS(8)
895 slide[w++] = (uch)b;
896 if (w == WSIZE)
897 {
898 FLUSH(gz,w);
899 w = 0;
900 }
901 DUMPBITS(8)
902 }
903
904
905 /* restore the globals from the locals */
906 wp = w; /* restore global window pointer */
907 bb = b; /* restore global bit buffer */
908 bk = k;
909 return 0;
910}
911
912
913/* Globals for literal tables (built once) */
914/* The real variables are now in the struct gz_global */
915#define fixed_tl (glbl->gz_fixed_tl)
916#define fixed_td (glbl->gz_fixed_td)
917#define fixed_bl (glbl->gz_fixed_bl)
918#define fixed_bd (glbl->gz_fixed_bd)
919
920static int inflate_fixed(gz, glbl)
921struct gzip *gz;
922struct gz_global *glbl;
923/* decompress an inflated type 1 (fixed Huffman codes) block. We should
924 either replace this with a custom decoder, or at least precompute the
925 Huffman tables. */
926{
927 /* if first time, set up tables for fixed blocks */
928 Trace((stderr, "\nliteral block"));
929 if (fixed_tl == (struct huft *)NULL)
930 {
931 int i; /* temporary variable */
932 static unsigned l[288]; /* length list for huft_build */
933
934 /* literal table */
935 for (i = 0; i < 144; i++)
936 l[i] = 8;
937 for (; i < 256; i++)
938 l[i] = 9;
939 for (; i < 280; i++)
940 l[i] = 7;
941 for (; i < 288; i++) /* make a complete, but wrong code set */
942 l[i] = 8;
943 fixed_bl = 7;
944 if ((i = huft_build(gz,l, 288, 257, cplens, cplext,
945 &fixed_tl, &fixed_bl, glbl)) != 0)
946 {
947 fixed_tl = (struct huft *)NULL;
948 return i;
949 }
950
951 /* distance table */
952 for (i = 0; i < 30; i++) /* make an incomplete code set */
953 l[i] = 5;
954 fixed_bd = 5;
955 if ((i = huft_build(gz,l, 30, 0, cpdist, cpdext,
956 &fixed_td, &fixed_bd, glbl)) > 1)
957 {
958 huft_free(gz,fixed_tl);
959 fixed_tl = (struct huft *)NULL;
960 return i;
961 }
962 }
963
964
965 /* decompress until an end-of-block code */
966 return inflate_codes(gz,fixed_tl, fixed_td, fixed_bl, fixed_bd, glbl) != 0;
967}
968
969
970
971static int inflate_dynamic(gz, glbl)
972struct gzip *gz;
973struct gz_global * glbl;
974/* decompress an inflated type 2 (dynamic Huffman codes) block. */
975{
976 int i; /* temporary variables */
977 unsigned j;
978 unsigned l; /* last length */
979 unsigned m; /* mask for bit lengths table */
980 unsigned n; /* number of lengths to get */
981 struct huft *tl; /* literal/length code table */
982 struct huft *td; /* distance code table */
983 int bl; /* lookup bits for tl */
984 int bd; /* lookup bits for td */
985 unsigned nb; /* number of bit length codes */
986 unsigned nl; /* number of literal/length codes */
987 unsigned nd; /* number of distance codes */
988#ifdef PKZIP_BUG_WORKAROUND
989 unsigned ll[288+32]; /* literal/length and distance code lengths */
990#else
991 unsigned ll[286+30]; /* literal/length and distance code lengths */
992#endif
993 register ulg b; /* bit buffer */
994 register unsigned k; /* number of bits in bit buffer */
995
996
997 /* make local bit buffer */
998 Trace((stderr, "\ndynamic block"));
999 b = bb;
1000 k = bk;
1001
1002
1003 /* read in table lengths */
1004 NEEDBITS(5)
1005 nl = 257 + ((unsigned)b & 0x1f); /* number of literal/length codes */
1006 DUMPBITS(5)
1007 NEEDBITS(5)
1008 nd = 1 + ((unsigned)b & 0x1f); /* number of distance codes */
1009 DUMPBITS(5)
1010 NEEDBITS(4)
1011 nb = 4 + ((unsigned)b & 0xf); /* number of bit length codes */
1012 DUMPBITS(4)
1013#ifdef PKZIP_BUG_WORKAROUND
1014 if (nl > 288 || nd > 32)
1015#else
1016 if (nl > 286 || nd > 30)
1017#endif
1018 return 1; /* bad lengths */
1019
1020
1021 /* read in bit-length-code lengths */
1022 for (j = 0; j < nb; j++)
1023 {
1024 NEEDBITS(3)
1025 ll[border[j]] = (unsigned)b & 7;
1026 DUMPBITS(3)
1027 }
1028 for (; j < 19; j++)
1029 ll[border[j]] = 0;
1030
1031
1032 /* build decoding table for trees--single level, 7 bit lookup */
1033 bl = 7;
1034 if ((i = huft_build(gz,ll, 19, 19, NULL, NULL, &tl, &bl, glbl)) != 0)
1035 {
1036 if (i == 1)
1037 huft_free(gz,tl);
1038 return i; /* incomplete code set */
1039 }
1040
1041
1042 /* read in literal and distance code lengths */
1043 n = nl + nd;
1044 m = mask[bl];
1045 i = l = 0;
1046 while ((unsigned)i < n)
1047 {
1048 NEEDBITS((unsigned)bl)
1049 j = (td = tl + ((unsigned)b & m))->b;
1050 DUMPBITS(j)
1051 j = td->v.n;
1052 if (j < 16) /* length of code in bits (0..15) */
1053 ll[i++] = l = j; /* save last length in l */
1054 else if (j == 16) /* repeat last length 3 to 6 times */
1055 {
1056 NEEDBITS(2)
1057 j = 3 + ((unsigned)b & 3);
1058 DUMPBITS(2)
1059 if ((unsigned)i + j > n)
1060 return 1;
1061 while (j--)
1062 ll[i++] = l;
1063 }
1064 else if (j == 17) /* 3 to 10 zero length codes */
1065 {
1066 NEEDBITS(3)
1067 j = 3 + ((unsigned)b & 7);
1068 DUMPBITS(3)
1069 if ((unsigned)i + j > n)
1070 return 1;
1071 while (j--)
1072 ll[i++] = 0;
1073 l = 0;
1074 }
1075 else /* j == 18: 11 to 138 zero length codes */
1076 {
1077 NEEDBITS(7)
1078 j = 11 + ((unsigned)b & 0x7f);
1079 DUMPBITS(7)
1080 if ((unsigned)i + j > n)
1081 return 1;
1082 while (j--)
1083 ll[i++] = 0;
1084 l = 0;
1085 }
1086 }
1087
1088
1089 /* free decoding table for trees */
1090 huft_free(gz,tl);
1091
1092
1093 /* restore the global bit buffer */
1094 bb = b;
1095 bk = k;
1096
1097
1098 /* build the decoding tables for literal/length and distance codes */
1099 bl = lbits;
1100 if ((i = huft_build(gz,ll, nl, 257, cplens, cplext, &tl, &bl, glbl)) != 0)
1101 {
1102 if (i == 1 && !qflag) {
1103 FPRINTF( "(incomplete l-tree) ");
1104 huft_free(gz,tl);
1105 }
1106 return i; /* incomplete code set */
1107 }
1108 bd = dbits;
1109 if ((i = huft_build(gz,ll + nl, nd, 0, cpdist, cpdext, &td, &bd, glbl)) != 0)
1110 {
1111 if (i == 1 && !qflag) {
1112 FPRINTF( "(incomplete d-tree) ");
1113#ifdef PKZIP_BUG_WORKAROUND
1114 i = 0;
1115 }
1116#else
1117 huft_free(gz,td);
1118 }
1119 huft_free(gz,tl);
1120 return i; /* incomplete code set */
1121#endif
1122 }
1123
1124
1125 /* decompress until an end-of-block code */
1126 if (inflate_codes(gz,tl, td, bl, bd, glbl))
1127 return 1;
1128
1129
1130 /* free the decoding tables, return */
1131 huft_free(gz,tl);
1132 huft_free(gz,td);
1133 return 0;
1134}
1135
1136
1137
1138static int inflate_block(gz,e, glbl)
1139struct gzip *gz;
1140int *e; /* last block flag */
1141struct gz_global * glbl;
1142/* decompress an inflated block */
1143{
1144 unsigned t; /* block type */
1145 register ulg b; /* bit buffer */
1146 register unsigned k; /* number of bits in bit buffer */
1147
1148
1149 /* make local bit buffer */
1150 b = bb;
1151 k = bk;
1152
1153
1154 /* read in last block bit */
1155 NEEDBITS(1)
1156 *e = (int)b & 1;
1157 DUMPBITS(1)
1158
1159
1160 /* read in block type */
1161 NEEDBITS(2)
1162 t = (unsigned)b & 3;
1163 DUMPBITS(2)
1164
1165
1166 /* restore the global bit buffer */
1167 bb = b;
1168 bk = k;
1169
1170
1171 /* inflate that block type */
1172 if (t == 2)
1173 return inflate_dynamic(gz, glbl);
1174 if (t == 0)
1175 return inflate_stored(gz, glbl);
1176 if (t == 1)
1177 return inflate_fixed(gz, glbl);
1178
1179
1180 /* bad block type */
1181 return 2;
1182}
1183
1184
1185
1186int inflate(gz, glbl)
1187struct gzip *gz;
1188struct gz_global * glbl;
1189/* decompress an inflated entry */
1190{
1191 int e; /* last block flag */
1192 int r; /* result code */
1193 unsigned h; /* maximum struct huft's malloc'ed */
1194
1195
1196 fixed_tl = (struct huft *)NULL;
1197
1198 /* initialize window, bit buffer */
1199 wp = 0;
1200 bk = 0;
1201 bb = 0;
1202
1203
1204 /* decompress until the last block */
1205 h = 0;
1206 do {
1207 hufts = 0;
1208 if ((r = inflate_block(gz,&e, glbl)) != 0)
1209 return r;
1210 if (hufts > h)
1211 h = hufts;
1212 } while (!e);
1213
1214
1215 /* flush out slide */
1216 FLUSH(gz,wp);
1217
1218
1219 /* return success */
1220 Trace((stderr, "\n%u bytes in Huffman tables (%d/entry)\n",
1221 h * sizeof(struct huft), sizeof(struct huft)));
1222 return 0;
1223}
1224
1225
1226#if 0
1227/* Nobody uses this - why not? */
1228static int inflate_free(gz, glbl)
1229struct gzip *gz;
1230struct gz_global * glbl;
1231{
1232 if (fixed_tl != (struct huft *)NULL)
1233 {
1234 huft_free(gz,fixed_td);
1235 huft_free(gz,fixed_tl);
1236 fixed_td = fixed_tl = (struct huft *)NULL;
1237 }
1238 return 0;
1239}
1240#endif