imgact_gzip.c revision 3344
1284388Sngie/*
2284388Sngie * Parts of this file are not covered by:
3284388Sngie * ----------------------------------------------------------------------------
4284388Sngie * "THE BEER-WARE LICENSE" (Revision 42):
5284388Sngie * <phk@login.dknet.dk> wrote this file.  As long as you retain this notice you
6284388Sngie * can do whatever you want with this stuff. If we meet some day, and you think
7284388Sngie * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
8284388Sngie * ----------------------------------------------------------------------------
9284388Sngie *
10284388Sngie * $Id: imgact_gzip.c,v 1.1 1994/10/03 05:23:01 phk Exp $
11284388Sngie *
12284388Sngie * This module handles execution of a.out files which have been run through
13284388Sngie * "gzip -9".
14284388Sngie *
15284388Sngie * For now you need to use exactly this command to compress the binaries:
16284388Sngie *
17284388Sngie *		gzip -9 -v < /bin/sh > /tmp/sh
18284388Sngie *
19284388Sngie * TODO:
20284388Sngie *	text-segments should be made R/O after being filled
21284388Sngie *	is the vm-stuff safe ?
22284388Sngie * 	should handle the entire header of gzip'ed stuff.
23284388Sngie *	inflate isn't quite reentrant yet...
24284388Sngie *	error-handling is a mess...
25284388Sngie *	so is the rest...
26284388Sngie */
27284388Sngie
28284388Sngie#include <sys/param.h>
29284388Sngie#include <sys/systm.h>
30288330Sngie#include <sys/resourcevar.h>
31284388Sngie#include <sys/exec.h>
32288330Sngie#include <sys/mman.h>
33284388Sngie#include <sys/malloc.h>
34288330Sngie#include <sys/imgact.h>
35288330Sngie#include <sys/imgact_aout.h>
36284388Sngie#include <sys/kernel.h>
37284388Sngie#include <sys/sysent.h>
38284388Sngie
39284388Sngie#include <vm/vm.h>
40284388Sngie#include <vm/vm_kern.h>
41284388Sngie
42288330Sngie#define WSIZE 0x8000
43288330Sngie
44288330Sngiestruct gzip {
45288330Sngie	struct	image_params *ip;
46288330Sngie	struct  exec a_out;
47288330Sngie	int	error;
48289965Sngie	int	where;
49284388Sngie	u_char  *inbuf;
50284388Sngie	u_long	offset;
51284388Sngie	u_long	output;
52284388Sngie	u_long	len;
53284388Sngie	int	idx;
54284388Sngie	u_long	virtual_offset, file_offset, bss_size;
55288330Sngie        unsigned gz_wp;
56288330Sngie	u_char	gz_slide[WSIZE];
57288330Sngie};
58288330Sngie
59289965Sngieint inflate __P((struct gzip *));
60288330Sngie
61288330Sngieextern struct sysentvec aout_sysvec;
62288330Sngie
63288330Sngie#define slide (gz->gz_slide)
64288330Sngie#define wp    (gz->gz_wp)
65288330Sngie
66288330Sngieint
67288330Sngieexec_gzip_imgact(iparams)
68288330Sngie	struct image_params *iparams;
69288330Sngie{
70288330Sngie	int error,error2=0;
71288330Sngie	u_char *p = (u_char *) iparams->image_header;
72288330Sngie	struct gzip *gz;
73288330Sngie	struct vattr vattr;
74288330Sngie
75284388Sngie
76284388Sngie	if(p[0]  != 0x1f) return -1; /* Simply magic */
77289965Sngie	if(p[1]  != 0x8b) return -1; /* Simply magic */
78289965Sngie	if(p[2]  != 0x08) return -1; /* Compression method */
79289965Sngie	if(p[3]  != 0x00) return -1; /* Flags */
80289965Sngie				     /* 4 bytes timestamp */
81289965Sngie	if(p[8]  != 0x02) return -1; /* Extra flags */
82288330Sngie	if(p[9]  != 0x03) return -1; /* OS compressed on */
83289965Sngie
84288330Sngie	gz = malloc(sizeof *gz,M_TEMP,M_NOWAIT);
85288330Sngie	if(!gz)
86288330Sngie		return ENOMEM;
87289965Sngie	bzero(gz,sizeof *gz);
88289965Sngie
89289965Sngie	gz->ip = iparams;
90289965Sngie	gz->error = 0;
91289965Sngie	gz->idx = 10;
92289965Sngie
93289965Sngie	gz->len = gz->ip->attr->va_size;
94288330Sngie
95289965Sngie	error = inflate(gz);
96289965Sngie	if (gz->inbuf) {
97289965Sngie	    error2 =
98289965Sngie		vm_deallocate(kernel_map, (vm_offset_t)gz->inbuf, PAGE_SIZE);
99289965Sngie	}
100288330Sngie	printf("Output=%lu\n",gz->output);
101288330Sngie	printf("Inflate_error=%d gz->error=%d error2=%d where=%d\n",
102288330Sngie		error,gz->error,error2,gz->where);
103284388Sngie	if(error)
104284388Sngie		return ENOEXEC;
105284388Sngie
106288330Sngie	error = gz->error;
107284388Sngie	free(gz,M_TEMP);
108284388Sngie	return error;
109284388Sngie}
110284388Sngie
111288330Sngieint
112288330Sngiedo_aout_hdr(struct gzip *gz)
113288330Sngie{
114288330Sngie    int error;
115284388Sngie    struct vmspace *vmspace = gz->ip->proc->p_vmspace;
116284388Sngie    u_long vmaddr;
117284388Sngie
118284388Sngie    /*
119284388Sngie     * Set file/virtual offset based on a.out variant.
120284388Sngie     *	We do two cases: host byte order and network byte order
121284388Sngie     *	(for NetBSD compatibility)
122284388Sngie     */
123284388Sngie    switch ((int)(gz->a_out.a_magic & 0xffff)) {
124284388Sngie    case ZMAGIC:
125284388Sngie	gz->virtual_offset = 0;
126284388Sngie	if (gz->a_out.a_text) {
127284388Sngie	    gz->file_offset = NBPG;
128284388Sngie	} else {
129284388Sngie	    /* Bill's "screwball mode" */
130284388Sngie	    gz->file_offset = 0;
131284388Sngie	}
132284388Sngie	break;
133284388Sngie    case QMAGIC:
134284388Sngie	gz->virtual_offset = NBPG;
135284388Sngie	gz->file_offset = 0;
136284388Sngie	break;
137284388Sngie    default:
138284388Sngie	/* NetBSD compatibility */
139284388Sngie	switch ((int)(ntohl(gz->a_out.a_magic) & 0xffff)) {
140284388Sngie	case ZMAGIC:
141284388Sngie	case QMAGIC:
142284388Sngie	    gz->virtual_offset = NBPG;
143284388Sngie	    gz->file_offset = 0;
144284388Sngie	    break;
145288330Sngie	default:
146288330Sngie	    gz->where = __LINE__;
147288330Sngie	    return (-1);
148288330Sngie	}
149284388Sngie    }
150284388Sngie
151284388Sngie    gz->bss_size = roundup(gz->a_out.a_bss, NBPG);
152284388Sngie
153284388Sngie    /*
154284388Sngie     * Check various fields in header for validity/bounds.
155284388Sngie     */
156284388Sngie    if (/* entry point must lay with text region */
157284388Sngie	gz->a_out.a_entry < gz->virtual_offset ||
158284388Sngie	gz->a_out.a_entry >= gz->virtual_offset + gz->a_out.a_text ||
159284388Sngie
160284388Sngie	/* text and data size must each be page rounded */
161284388Sngie	gz->a_out.a_text % NBPG ||
162284388Sngie	gz->a_out.a_data % NBPG) {
163284388Sngie	    gz->where = __LINE__;
164284388Sngie	    return (-1);
165288330Sngie    }
166288330Sngie
167288330Sngie
168288330Sngie    /*
169288330Sngie     * text/data/bss must not exceed limits
170288330Sngie     */
171288330Sngie    if (/* text can't exceed maximum text size */
172288330Sngie	gz->a_out.a_text > MAXTSIZ ||
173288330Sngie
174288330Sngie	/* data + bss can't exceed maximum data size */
175288330Sngie	gz->a_out.a_data + gz->bss_size > MAXDSIZ ||
176288330Sngie
177288330Sngie	/* data + bss can't exceed rlimit */
178288330Sngie	gz->a_out.a_data + gz->bss_size >
179288330Sngie	    gz->ip->proc->p_rlimit[RLIMIT_DATA].rlim_cur) {
180288330Sngie		    gz->where = __LINE__;
181288330Sngie		    return (ENOMEM);
182288330Sngie    }
183289965Sngie
184289965Sngie    /* copy in arguments and/or environment from old process */
185289965Sngie    error = exec_extract_strings(gz->ip);
186289965Sngie    if (error) {
187289965Sngie	gz->where = __LINE__;
188289965Sngie	return (error);
189289965Sngie    }
190289965Sngie
191289965Sngie    /*
192289965Sngie     * Destroy old process VM and create a new one (with a new stack)
193289965Sngie     */
194289965Sngie    exec_new_vmspace(gz->ip);
195289965Sngie
196289965Sngie    vmaddr = gz->virtual_offset;
197289965Sngie
198288330Sngie    error = vm_mmap(&vmspace->vm_map,           /* map */
199288330Sngie	&vmaddr,                                /* address */
200288330Sngie	gz->a_out.a_text,                      /* size */
201288330Sngie	VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_WRITE,  /* protection */
202288330Sngie	VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_WRITE,
203288330Sngie	MAP_ANON | MAP_FIXED,                /* flags */
204289965Sngie	0,				        /* vnode */
205288330Sngie	0);                                     /* offset */
206288330Sngie
207288330Sngie    if (error) {
208288330Sngie	gz->where = __LINE__;
209288330Sngie	return (error);
210289965Sngie    }
211289965Sngie
212289965Sngie    vmaddr = gz->virtual_offset + gz->a_out.a_text;
213289965Sngie
214289965Sngie    /*
215289965Sngie     * Map data read/write (if text is 0, assume text is in data area
216289965Sngie     *      [Bill's screwball mode])
217289965Sngie     */
218289965Sngie
219289965Sngie    error = vm_mmap(&vmspace->vm_map,
220289965Sngie	&vmaddr,
221289965Sngie	gz->a_out.a_data,
222289965Sngie	VM_PROT_READ | VM_PROT_WRITE | (gz->a_out.a_text ? 0 : VM_PROT_EXECUTE),
223289965Sngie	VM_PROT_ALL, MAP_ANON | MAP_FIXED,
224289965Sngie	0,
225289965Sngie	0);
226289965Sngie
227289965Sngie    if (error) {
228289965Sngie	gz->where = __LINE__;
229289965Sngie	return (error);
230288330Sngie    }
231288330Sngie
232289965Sngie    /*
233289965Sngie     * Allocate demand-zeroed area for uninitialized data
234289965Sngie     * "bss" = 'block started by symbol' - named after the IBM 7090
235289965Sngie     *      instruction of the same name.
236289965Sngie     */
237289965Sngie    vmaddr = gz->virtual_offset + gz->a_out.a_text + gz->a_out.a_data;
238289965Sngie    error = vm_allocate(&vmspace->vm_map, &vmaddr, gz->bss_size, FALSE);
239289965Sngie    if (error) {
240289965Sngie	gz->where = __LINE__;
241289965Sngie	return (error);
242289965Sngie    }
243289965Sngie
244289965Sngie    /* Fill in process VM information */
245289965Sngie    vmspace->vm_tsize = gz->a_out.a_text >> PAGE_SHIFT;
246289965Sngie    vmspace->vm_dsize = (gz->a_out.a_data + gz->bss_size) >> PAGE_SHIFT;
247289965Sngie    vmspace->vm_taddr = (caddr_t) gz->virtual_offset;
248289965Sngie    vmspace->vm_daddr = (caddr_t) gz->virtual_offset + gz->a_out.a_text;
249289965Sngie
250289965Sngie    /* Fill in image_params */
251289965Sngie    gz->ip->interpreted = 0;
252289965Sngie    gz->ip->entry_addr = gz->a_out.a_entry;
253289965Sngie
254289965Sngie    gz->ip->proc->p_sysent = &aout_sysvec;
255289965Sngieprintf("a.out ok, entry=%08x\n",gz->ip->entry_addr);
256289965Sngie    return 0;
257289965Sngie}
258289965Sngie
259289965Sngie/*
260289965Sngie * Tell kern_execve.c about it, with a little help from the linker.
261289965Sngie * Since `const' objects end up in the text segment, TEXT_SET is the
262289965Sngie * correct directive to use.
263289965Sngie */
264289965Sngiestatic const struct execsw gzip_execsw = { exec_gzip_imgact, "gzip" };
265289965SngieTEXT_SET(execsw_set, gzip_execsw);
266289965Sngie
267289965Sngie/* Stuff to make inflate() work */
268289965Sngie#  define uch u_char
269289965Sngie#  define ush u_short
270289965Sngie#  define ulg u_long
271289965Sngie#  define memzero(dest,len)      bzero(dest,len)
272289965Sngie#  define NOMEMCPY
273289965Sngie#define FPRINTF printf
274289965Sngie
275289965Sngie#define EOF -1
276289965Sngie#define CHECK_EOF
277289965Sngiestatic int
278289965SngieNextByte(struct gzip *gz)
279289965Sngie{
280288330Sngie	int error;
281288330Sngie
282288330Sngie	if(gz->idx >= gz->len)
283288330Sngie	    return EOF;
284288330Sngie
285288330Sngie	if((!gz->inbuf) || gz->idx >= (gz->offset+PAGE_SIZE)) {
286288330Sngie		if(gz->inbuf) {
287288330Sngie		    error = vm_deallocate(kernel_map,
288288330Sngie		      (vm_offset_t)gz->inbuf, PAGE_SIZE);
289288330Sngie		    if(error) {
290288330Sngie			gz->where = __LINE__;
291288330Sngie			gz->error = error;
292288330Sngie			printf("exec_gzip: Error %d in vm)deallocate",error);
293288330Sngie			return EOF;
294288330Sngie		    }
295288330Sngie		}
296288330Sngie
297288330Sngie		gz->offset += PAGE_SIZE;
298288330Sngie
299288330Sngie		error = vm_mmap(kernel_map,             /* map */
300288330Sngie                        (vm_offset_t *)&gz->inbuf,       /* address */
301288330Sngie                        PAGE_SIZE,                      /* size */
302288330Sngie                        VM_PROT_READ,                   /* protection */
303288330Sngie                        VM_PROT_READ,                   /* max protection */
304288330Sngie                        0,                              /* flags */
305288330Sngie                        (caddr_t)gz->ip->vnodep,        /* vnode */
306288330Sngie                        gz->offset);                    /* offset */
307288330Sngie		if(error) {
308288330Sngie		    gz->where = __LINE__;
309288330Sngie		    gz->error = error;
310288330Sngie		    printf("exec_gzip: Error %d in vm_mmap",error);
311288330Sngie		    return EOF;
312288330Sngie		}
313288330Sngie
314288330Sngie	}
315288330Sngie	return gz->inbuf[(gz->idx++) - gz->offset];
316288330Sngie}
317288330Sngie
318288330Sngie#define NEXTBYTE NextByte(gz)
319288330Sngie
320288330Sngiestatic int
321288330SngieFlush(struct gzip *gz,u_long siz)
322289965Sngie{
323289965Sngie    u_char *p = slide,*q;
324289965Sngie    int i;
325289965Sngie
326289965Sngie#if 0
327289965Sngie    int i;
328289965Sngie    printf("<");
329289965Sngie    for(i=0;i<siz;i++)
330289965Sngie	printf("%02x",slide[i]);
331289965Sngie    printf(">\n");
332289965Sngie#endif
333289965Sngie
334289965Sngie    printf("<%lu>",siz);
335289965Sngie
336289965Sngie    /* First, find a a.out-header */
337289965Sngie    if(gz->output < sizeof gz->a_out) {
338289965Sngie	q = (u_char*) &gz->a_out;
339289965Sngie	i = min(siz,sizeof gz->a_out - gz->output);
340289965Sngie	bcopy(p,q+gz->output,i);
341289965Sngie	gz->output += i;
342289965Sngie	p += i;
343289965Sngie	siz -= i;
344289965Sngie	if(gz->output == sizeof gz->a_out) {
345289965Sngie	    for(i=0;i<sizeof gz->a_out;i+=4)
346289965Sngie		printf("%02x%02x%02x%02x ",
347289965Sngie		    q[i+0],q[i+1],q[i+2],q[i+3]);
348289965Sngie	    printf("\n");
349289965Sngie	    i = do_aout_hdr(gz);
350289965Sngie	    printf("file_offset=%lx virtual_offset=%lx",
351289965Sngie		gz->file_offset,gz->virtual_offset);
352289965Sngie	    if(i) {
353289965Sngie		gz->error = i;
354289965Sngie		return i;
355289965Sngie	    }
356289965Sngie	    if(gz->file_offset < sizeof gz->a_out) {
357289965Sngie		q = (u_char*) gz->virtual_offset + gz->output - gz->file_offset;
358289965Sngie		bcopy(&gz->a_out,q,sizeof gz->a_out);
359289965Sngie	    }
360289965Sngie	}
361289965Sngie    }
362289965Sngie    if(gz->output >= gz->file_offset &&
363289965Sngie		gz->output < (gz->file_offset+
364289965Sngie			gz->a_out.a_text+
365289965Sngie			gz->a_out.a_data)) {
366289965Sngie
367289965Sngie	i = min(siz,
368289965Sngie	    (gz->file_offset+
369289965Sngie		gz->a_out.a_text+
370289965Sngie		gz->a_out.a_data)
371289965Sngie		-gz->output);
372289965Sngie	q = (u_char*) gz->virtual_offset + gz->output - gz->file_offset;
373289965Sngie	bcopy(p,q,i);
374289965Sngie	gz->output += i;
375289965Sngie	p += i;
376289965Sngie	siz -= i;
377289965Sngie    }
378289965Sngie    if(!siz) return 0;
379289965Sngie    gz->output += siz;
380289965Sngie    return 0;
381289965Sngie}
382289965Sngie
383289965Sngie#define FLUSH(x,y) {int foo = Flush(x,y); if (foo) return foo;}
384289965Sngiestatic
385289965Sngievoid *
386289965Sngiemyalloc(u_long size)
387289965Sngie{
388289965Sngie	return malloc(size, M_TEMP, M_NOWAIT);
389289965Sngie}
390289965Sngie#define malloc myalloc
391289965Sngie
392289965Sngiestatic
393289965Sngievoid
394289965Sngiemyfree(void * ptr)
395289965Sngie{
396289965Sngie	free(ptr,M_TEMP);
397289965Sngie}
398289965Sngie#define free myfree
399289965Sngie
400289965Sngiestatic int qflag;
401289965Sngie#define Trace(x) /* */
402289965Sngie
403289965Sngie
404289965Sngie/* This came from unzip-5.12.  I have changed it to pass a "gz" pointer
405289965Sngie * around, thus hopefully making it re-entrant.  Poul-Henningi
406289965Sngie */
407289965Sngie
408289965Sngie/* inflate.c -- put in the public domain by Mark Adler
409289965Sngie   version c14o, 23 August 1994 */
410289965Sngie
411289965Sngie/* You can do whatever you like with this source file, though I would
412289965Sngie   prefer that if you modify it and redistribute it that you include
413289965Sngie   comments to that effect with your name and the date.  Thank you.
414289965Sngie
415289965Sngie   History:
416289965Sngie   vers    date          who           what
417289965Sngie   ----  ---------  --------------  ------------------------------------
418289965Sngie    a    ~~ Feb 92  M. Adler        used full (large, one-step) lookup table
419289965Sngie    b1   21 Mar 92  M. Adler        first version with partial lookup tables
420289965Sngie    b2   21 Mar 92  M. Adler        fixed bug in fixed-code blocks
421289965Sngie    b3   22 Mar 92  M. Adler        sped up match copies, cleaned up some
422289965Sngie    b4   25 Mar 92  M. Adler        added prototypes; removed window[] (now
423289965Sngie                                    is the responsibility of unzip.h--also
424289965Sngie                                    changed name to slide[]), so needs diffs
425289965Sngie                                    for unzip.c and unzip.h (this allows
426289965Sngie                                    compiling in the small model on MSDOS);
427289965Sngie                                    fixed cast of q in huft_build();
428289965Sngie    b5   26 Mar 92  M. Adler        got rid of unintended macro recursion.
429289965Sngie    b6   27 Mar 92  M. Adler        got rid of nextbyte() routine.  fixed
430289965Sngie                                    bug in inflate_fixed().
431289965Sngie    c1   30 Mar 92  M. Adler        removed lbits, dbits environment variables.
432289965Sngie                                    changed BMAX to 16 for explode.  Removed
433289965Sngie                                    OUTB usage, and replaced it with flush()--
434289965Sngie                                    this was a 20% speed improvement!  Added
435289965Sngie                                    an explode.c (to replace unimplod.c) that
436289965Sngie                                    uses the huft routines here.  Removed
437289965Sngie                                    register union.
438289965Sngie    c2    4 Apr 92  M. Adler        fixed bug for file sizes a multiple of 32k.
439289965Sngie    c3   10 Apr 92  M. Adler        reduced memory of code tables made by
440289965Sngie                                    huft_build significantly (factor of two to
441289965Sngie                                    three).
442289965Sngie    c4   15 Apr 92  M. Adler        added NOMEMCPY do kill use of memcpy().
443289965Sngie                                    worked around a Turbo C optimization bug.
444289965Sngie    c5   21 Apr 92  M. Adler        added the WSIZE #define to allow reducing
445289965Sngie                                    the 32K window size for specialized
446289965Sngie                                    applications.
447289965Sngie    c6   31 May 92  M. Adler        added some typecasts to eliminate warnings
448289965Sngie    c7   27 Jun 92  G. Roelofs      added some more typecasts (444:  MSC bug).
449289965Sngie    c8    5 Oct 92  J-l. Gailly     added ifdef'd code to deal with PKZIP bug.
450289965Sngie    c9    9 Oct 92  M. Adler        removed a memory error message (~line 416).
451289965Sngie    c10  17 Oct 92  G. Roelofs      changed ULONG/UWORD/byte to ulg/ush/uch,
452289965Sngie                                    removed old inflate, renamed inflate_entry
453289965Sngie                                    to inflate, added Mark's fix to a comment.
454289965Sngie   c10.5 14 Dec 92  M. Adler        fix up error messages for incomplete trees.
455289965Sngie    c11   2 Jan 93  M. Adler        fixed bug in detection of incomplete
456289965Sngie                                    tables, and removed assumption that EOB is
457289965Sngie                                    the longest code (bad assumption).
458289965Sngie    c12   3 Jan 93  M. Adler        make tables for fixed blocks only once.
459289965Sngie    c13   5 Jan 93  M. Adler        allow all zero length codes (pkzip 2.04c
460289965Sngie                                    outputs one zero length code for an empty
461289965Sngie                                    distance tree).
462289965Sngie    c14  12 Mar 93  M. Adler        made inflate.c standalone with the
463289965Sngie                                    introduction of inflate.h.
464289965Sngie   c14b  16 Jul 93  G. Roelofs      added (unsigned) typecast to w at 470.
465289965Sngie   c14c  19 Jul 93  J. Bush         changed v[N_MAX], l[288], ll[28x+3x] arrays
466289965Sngie                                    to static for Amiga.
467289965Sngie   c14d  13 Aug 93  J-l. Gailly     de-complicatified Mark's c[*p++]++ thing.
468289965Sngie   c14e   8 Oct 93  G. Roelofs      changed memset() to memzero().
469289965Sngie   c14f  22 Oct 93  G. Roelofs      renamed quietflg to qflag; made Trace()
470289965Sngie                                    conditional; added inflate_free().
471289965Sngie   c14g  28 Oct 93  G. Roelofs      changed l/(lx+1) macro to pointer (Cray bug)
472289965Sngie   c14h   7 Dec 93  C. Ghisler      huft_build() optimizations.
473289965Sngie   c14i   9 Jan 94  A. Verheijen    set fixed_t{d,l} to NULL after freeing;
474289965Sngie                    G. Roelofs      check NEXTBYTE macro for EOF.
475289965Sngie   c14j  23 Jan 94  G. Roelofs      removed Ghisler "optimizations"; ifdef'd
476289965Sngie                                    EOF check.
477289965Sngie   c14k  27 Feb 94  G. Roelofs      added some typecasts to avoid warnings.
478289965Sngie   c14l   9 Apr 94  G. Roelofs      fixed split comments on preprocessor lines
479289965Sngie                                    to avoid bug in Encore compiler.
480289965Sngie   c14m   7 Jul 94  P. Kienitz      modified to allow assembler version of
481289965Sngie                                    inflate_codes() (define ASM_INFLATECODES)
482289965Sngie   c14n  22 Jul 94  G. Roelofs      changed fprintf to FPRINTF for DLL versions
483289965Sngie   c14o  23 Aug 94  C. Spieler      added a newline to a debug statement;
484289965Sngie                    G. Roelofs      added another typecast to avoid MSC warning
485289965Sngie */
486289965Sngie
487289965Sngie
488289965Sngie/*
489289965Sngie   Inflate deflated (PKZIP's method 8 compressed) data.  The compression
490289965Sngie   method searches for as much of the current string of bytes (up to a
491289965Sngie   length of 258) in the previous 32K bytes.  If it doesn't find any
492289965Sngie   matches (of at least length 3), it codes the next byte.  Otherwise, it
493289965Sngie   codes the length of the matched string and its distance backwards from
494289965Sngie   the current position.  There is a single Huffman code that codes both
495289965Sngie   single bytes (called "literals") and match lengths.  A second Huffman
496289965Sngie   code codes the distance information, which follows a length code.  Each
497289965Sngie   length or distance code actually represents a base value and a number
498289965Sngie   of "extra" (sometimes zero) bits to get to add to the base value.  At
499289965Sngie   the end of each deflated block is a special end-of-block (EOB) literal/
500289965Sngie   length code.  The decoding process is basically: get a literal/length
501289965Sngie   code; if EOB then done; if a literal, emit the decoded byte; if a
502289965Sngie   length then get the distance and emit the referred-to bytes from the
503289965Sngie   sliding window of previously emitted data.
504289965Sngie
505289965Sngie   There are (currently) three kinds of inflate blocks: stored, fixed, and
506289965Sngie   dynamic.  The compressor outputs a chunk of data at a time and decides
507289965Sngie   which method to use on a chunk-by-chunk basis.  A chunk might typically
508289965Sngie   be 32K to 64K, uncompressed.  If the chunk is uncompressible, then the
509289965Sngie   "stored" method is used.  In this case, the bytes are simply stored as
510289965Sngie   is, eight bits per byte, with none of the above coding.  The bytes are
511289965Sngie   preceded by a count, since there is no longer an EOB code.
512289965Sngie
513289965Sngie   If the data is compressible, then either the fixed or dynamic methods
514289965Sngie   are used.  In the dynamic method, the compressed data is preceded by
515289965Sngie   an encoding of the literal/length and distance Huffman codes that are
516289965Sngie   to be used to decode this block.  The representation is itself Huffman
517289965Sngie   coded, and so is preceded by a description of that code.  These code
518289965Sngie   descriptions take up a little space, and so for small blocks, there is
519289965Sngie   a predefined set of codes, called the fixed codes.  The fixed method is
520289965Sngie   used if the block ends up smaller that way (usually for quite small
521289965Sngie   chunks); otherwise the dynamic method is used.  In the latter case, the
522289965Sngie   codes are customized to the probabilities in the current block and so
523289965Sngie   can code it much better than the pre-determined fixed codes can.
524289965Sngie
525289965Sngie   The Huffman codes themselves are decoded using a mutli-level table
526289965Sngie   lookup, in order to maximize the speed of decoding plus the speed of
527289965Sngie   building the decoding tables.  See the comments below that precede the
528289965Sngie   lbits and dbits tuning parameters.
529289965Sngie */
530289965Sngie
531289965Sngie
532289965Sngie/*
533289965Sngie   Notes beyond the 1.93a appnote.txt:
534289965Sngie
535289965Sngie   1. Distance pointers never point before the beginning of the output
536289965Sngie      stream.
537289965Sngie   2. Distance pointers can point back across blocks, up to 32k away.
538301627Sngie   3. There is an implied maximum of 7 bits for the bit length table and
539289965Sngie      15 bits for the actual data.
540289965Sngie   4. If only one code exists, then it is encoded using one bit.  (Zero
541289965Sngie      would be more efficient, but perhaps a little confusing.)  If two
542289965Sngie      codes exist, they are coded using one bit each (0 and 1).
543289965Sngie   5. There is no way of sending zero distance codes--a dummy must be
544289965Sngie      sent if there are none.  (History: a pre 2.0 version of PKZIP would
545289965Sngie      store blocks with no distance codes, but this was discovered to be
546289965Sngie      too harsh a criterion.)  Valid only for 1.93a.  2.04c does allow
547289965Sngie      zero distance codes, which is sent as one code of zero bits in
548289965Sngie      length.
549289965Sngie   6. There are up to 286 literal/length codes.  Code 256 represents the
550289965Sngie      end-of-block.  Note however that the static length tree defines
551289965Sngie      288 codes just to fill out the Huffman codes.  Codes 286 and 287
552289965Sngie      cannot be used though, since there is no length base or extra bits
553289965Sngie      defined for them.  Similarily, there are up to 30 distance codes.
554289965Sngie      However, static trees define 32 codes (all 5 bits) to fill out the
555289965Sngie      Huffman codes, but the last two had better not show up in the data.
556289965Sngie   7. Unzip can check dynamic Huffman blocks for complete code sets.
557289965Sngie      The exception is that a single code would not be complete (see #4).
558289965Sngie   8. The five bits following the block type is really the number of
559289965Sngie      literal codes sent minus 257.
560289965Sngie   9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits
561289965Sngie      (1+6+6).  Therefore, to output three times the length, you output
562289965Sngie      three codes (1+1+1), whereas to output four times the same length,
563289965Sngie      you only need two codes (1+3).  Hmm.
564289965Sngie  10. In the tree reconstruction algorithm, Code = Code + Increment
565289965Sngie      only if BitLength(i) is not zero.  (Pretty obvious.)
566289965Sngie  11. Correction: 4 Bits: # of Bit Length codes - 4     (4 - 19)
567289965Sngie  12. Note: length code 284 can represent 227-258, but length code 285
568289965Sngie      really is 258.  The last length deserves its own, short code
569289965Sngie      since it gets used a lot in very redundant files.  The length
570289965Sngie      258 is special since 258 - 3 (the min match length) is 255.
571289965Sngie  13. The literal/length and distance code bit lengths are read as a
572289965Sngie      single stream of lengths.  It is possible (and advantageous) for
573289965Sngie      a repeat code (16, 17, or 18) to go across the boundary between
574289965Sngie      the two sets of lengths.
575289965Sngie */
576289965Sngie
577289965Sngie
578289965Sngie#define PKZIP_BUG_WORKAROUND    /* PKZIP 1.93a problem--live with it */
579289965Sngie
580289965Sngie/*
581289965Sngie    inflate.h must supply the uch slide[WSIZE] array and the NEXTBYTE,
582289965Sngie    FLUSH() and memzero macros.  If the window size is not 32K, it
583289965Sngie    should also define WSIZE.  If INFMOD is defined, it can include
584289965Sngie    compiled functions to support the NEXTBYTE and/or FLUSH() macros.
585289965Sngie    There are defaults for NEXTBYTE and FLUSH() below for use as
586289965Sngie    examples of what those functions need to do.  Normally, you would
587289965Sngie    also want FLUSH() to compute a crc on the data.  inflate.h also
588289965Sngie    needs to provide these typedefs:
589289965Sngie
590289965Sngie        typedef unsigned char uch;
591289965Sngie        typedef unsigned short ush;
592289965Sngie        typedef unsigned long ulg;
593289965Sngie
594289965Sngie    This module uses the external functions malloc() and free() (and
595289965Sngie    probably memset() or bzero() in the memzero() macro).  Their
596289965Sngie    prototypes are normally found in <string.h> and <stdlib.h>.
597289965Sngie */
598289965Sngie#define INFMOD          /* tell inflate.h to include code to be compiled */
599289965Sngie
600289965Sngie/* Huffman code lookup table entry--this entry is four bytes for machines
601289965Sngie   that have 16-bit pointers (e.g. PC's in the small or medium model).
602289965Sngie   Valid extra bits are 0..13.  e == 15 is EOB (end of block), e == 16
603289965Sngie   means that v is a literal, 16 < e < 32 means that v is a pointer to
604289965Sngie   the next table, which codes e - 16 bits, and lastly e == 99 indicates
605289965Sngie   an unused code.  If a code with e == 99 is looked up, this implies an
606289965Sngie   error in the data. */
607289965Sngiestruct huft {
608289965Sngie  uch e;                /* number of extra bits or operation */
609289965Sngie  uch b;                /* number of bits in this code or subcode */
610289965Sngie  union {
611289965Sngie    ush n;              /* literal, length base, or distance base */
612289965Sngie    struct huft *t;     /* pointer to next level of table */
613289965Sngie  } v;
614289965Sngie};
615289965Sngie
616289965Sngie
617289965Sngie/* Function prototypes */
618289965Sngie#ifndef OF
619289965Sngie#  ifdef __STDC__
620289965Sngie#    define OF(a) a
621289965Sngie#  else /* !__STDC__ */
622289965Sngie#    define OF(a) ()
623289965Sngie#  endif /* ?__STDC__ */
624289965Sngie#endif
625289965Sngieint huft_build OF((struct gzip *,unsigned *, unsigned, unsigned, ush *, ush *,
626289965Sngie                   struct huft **, int *));
627289965Sngieint huft_free OF((struct gzip *,struct huft *));
628289965Sngieint inflate_codes OF((struct gzip *,struct huft *, struct huft *, int, int));
629289965Sngieint inflate_stored OF((struct gzip *));
630289965Sngieint inflate_fixed OF((struct gzip *));
631289965Sngieint inflate_dynamic OF((struct gzip *));
632289965Sngieint inflate_block OF((struct gzip *,int *));
633289965Sngieint inflate_free OF((struct gzip *));
634288330Sngie
635288330Sngie
636289965Sngie/* The inflate algorithm uses a sliding 32K byte window on the uncompressed
637288330Sngie   stream to find repeated byte strings.  This is implemented here as a
638288330Sngie   circular buffer.  The index is updated simply by incrementing and then
639288330Sngie   and'ing with 0x7fff (32K-1). */
640288330Sngie/* It is left to other modules to supply the 32K area.  It is assumed
641288330Sngie   to be usable as if it were declared "uch slide[32768];" or as just
642288330Sngie   "uch *slide;" and then malloc'ed in the latter case.  The definition
643288330Sngie   must be in unzip.h, included above. */
644288330Sngie
645288330Sngie
646288330Sngie/* Tables for deflate from PKZIP's appnote.txt. */
647288330Sngiestatic unsigned border[] = {    /* Order of the bit length code lengths */
648289965Sngie        16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
649289965Sngiestatic ush cplens[] = {         /* Copy lengths for literal codes 257..285 */
650289965Sngie        3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
651289965Sngie        35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
652289965Sngie        /* note: see note #13 above about the 258 in this list. */
653289965Sngiestatic ush cplext[] = {         /* Extra bits for literal codes 257..285 */
654289965Sngie        0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
655289965Sngie        3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 99, 99}; /* 99==invalid */
656289965Sngiestatic ush cpdist[] = {         /* Copy offsets for distance codes 0..29 */
657289965Sngie        1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
658289965Sngie        257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
659289965Sngie        8193, 12289, 16385, 24577};
660289965Sngiestatic ush cpdext[] = {         /* Extra bits for distance codes */
661289965Sngie        0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
662289965Sngie        7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
663289965Sngie        12, 12, 13, 13};
664289965Sngie
665289965Sngie/* And'ing with mask[n] masks the lower n bits */
666289965Sngieush mask[] = {
667289965Sngie    0x0000,
668289965Sngie    0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
669289965Sngie    0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff
670289965Sngie};
671289965Sngie
672289965Sngie
673289965Sngie/* Macros for inflate() bit peeking and grabbing.
674289965Sngie   The usage is:
675289965Sngie
676289965Sngie        NEEDBITS(j)
677289965Sngie        x = b & mask[j];
678289965Sngie        DUMPBITS(j)
679289965Sngie
680289965Sngie   where NEEDBITS makes sure that b has at least j bits in it, and
681289965Sngie   DUMPBITS removes the bits from b.  The macros use the variable k
682289965Sngie   for the number of bits in b.  Normally, b and k are register
683289965Sngie   variables for speed, and are initialized at the begining of a
684289965Sngie   routine that uses these macros from a global bit buffer and count.
685289965Sngie
686289965Sngie   In order to not ask for more bits than there are in the compressed
687289965Sngie   stream, the Huffman tables are constructed to only ask for just
688289965Sngie   enough bits to make up the end-of-block code (value 256).  Then no
689289965Sngie   bytes need to be "returned" to the buffer at the end of the last
690289965Sngie   block.  See the huft_build() routine.
691289965Sngie */
692289965Sngie
693289965Sngieulg bb;                         /* bit buffer */
694289965Sngieunsigned bk;                    /* bits in bit buffer */
695289965Sngie
696289965Sngie#ifndef CHECK_EOF
697289965Sngie#  define NEEDBITS(n) {while(k<(n)){b|=((ulg)NEXTBYTE)<<k;k+=8;}}
698289965Sngie#else
699289965Sngie#  define NEEDBITS(n) {while(k<(n)){int c=NEXTBYTE;if(c==EOF)return 1;\
700289965Sngie    b|=((ulg)c)<<k;k+=8;}}
701289965Sngie#endif                      /* Piet Plomp:  change "return 1" to "break" */
702289965Sngie
703289965Sngie#define DUMPBITS(n) {b>>=(n);k-=(n);}
704289965Sngie
705311998Sasomers
706289965Sngie/*
707289965Sngie   Huffman code decoding is performed using a multi-level table lookup.
708289965Sngie   The fastest way to decode is to simply build a lookup table whose
709289965Sngie   size is determined by the longest code.  However, the time it takes
710289965Sngie   to build this table can also be a factor if the data being decoded
711289965Sngie   is not very long.  The most common codes are necessarily the
712289965Sngie   shortest codes, so those codes dominate the decoding time, and hence
713289965Sngie   the speed.  The idea is you can have a shorter table that decodes the
714289965Sngie   shorter, more probable codes, and then point to subsidiary tables for
715289965Sngie   the longer codes.  The time it costs to decode the longer codes is
716289965Sngie   then traded against the time it takes to make longer tables.
717289965Sngie
718289965Sngie   This results of this trade are in the variables lbits and dbits
719289965Sngie   below.  lbits is the number of bits the first level table for literal/
720289965Sngie   length codes can decode in one step, and dbits is the same thing for
721289965Sngie   the distance codes.  Subsequent tables are also less than or equal to
722289965Sngie   those sizes.  These values may be adjusted either when all of the
723289965Sngie   codes are shorter than that, in which case the longest code length in
724289965Sngie   bits is used, or when the shortest code is *longer* than the requested
725289965Sngie   table size, in which case the length of the shortest code in bits is
726289965Sngie   used.
727289965Sngie
728289965Sngie   There are two different values for the two tables, since they code a
729289965Sngie   different number of possibilities each.  The literal/length table
730289965Sngie   codes 286 possible values, or in a flat code, a little over eight
731289965Sngie   bits.  The distance table codes 30 possible values, or a little less
732289965Sngie   than five bits, flat.  The optimum values for speed end up being
733289965Sngie   about one bit more than those, so lbits is 8+1 and dbits is 5+1.
734289965Sngie   The optimum values may differ though from machine to machine, and
735289965Sngie   possibly even between compilers.  Your mileage may vary.
736289965Sngie */
737289965Sngie
738289965Sngie
739289965Sngieint lbits = 9;          /* bits in base literal/length lookup table */
740289965Sngieint dbits = 6;          /* bits in base distance lookup table */
741289965Sngie
742289965Sngie
743289965Sngie/* If BMAX needs to be larger than 16, then h and x[] should be ulg. */
744289965Sngie#define BMAX 16         /* maximum bit length of any code (16 for explode) */
745289965Sngie#define N_MAX 288       /* maximum number of codes in any set */
746289965Sngie
747289965Sngie
748289965Sngieunsigned hufts;         /* track memory usage */
749289965Sngie
750289965Sngie
751289965Sngieint huft_build(gz,b, n, s, d, e, t, m)
752289965Sngiestruct gzip *gz;
753289965Sngieunsigned *b;            /* code lengths in bits (all assumed <= BMAX) */
754289965Sngieunsigned n;             /* number of codes (assumed <= N_MAX) */
755289965Sngieunsigned s;             /* number of simple-valued codes (0..s-1) */
756289965Sngieush *d;                 /* list of base values for non-simple codes */
757289965Sngieush *e;                 /* list of extra bits for non-simple codes */
758289965Sngiestruct huft **t;        /* result: starting table */
759289965Sngieint *m;                 /* maximum lookup bits, returns actual */
760289965Sngie/* Given a list of code lengths and a maximum table size, make a set of
761289965Sngie   tables to decode that set of codes.  Return zero on success, one if
762289965Sngie   the given code set is incomplete (the tables are still built in this
763289965Sngie   case), two if the input is invalid (all zero length codes or an
764289965Sngie   oversubscribed set of lengths), and three if not enough memory.
765289965Sngie   The code with value 256 is special, and the tables are constructed
766289965Sngie   so that no bits beyond that code are fetched when that code is
767289965Sngie   decoded. */
768289965Sngie{
769289965Sngie  unsigned a;                   /* counter for codes of length k */
770289965Sngie  unsigned c[BMAX+1];           /* bit length count table */
771289965Sngie  unsigned el;                  /* length of EOB code (value 256) */
772289965Sngie  unsigned f;                   /* i repeats in table every f entries */
773289965Sngie  int g;                        /* maximum code length */
774289965Sngie  int h;                        /* table level */
775289965Sngie  register unsigned i;          /* counter, current code */
776289965Sngie  register unsigned j;          /* counter */
777289965Sngie  register int k;               /* number of bits in current code */
778289965Sngie  int lx[BMAX+1];               /* memory for l[-1..BMAX-1] */
779289965Sngie  int *l = lx+1;                /* stack of bits per table */
780289965Sngie  register unsigned *p;         /* pointer into c[], b[], or v[] */
781289965Sngie  register struct huft *q;      /* points to current table */
782289965Sngie  struct huft r;                /* table entry for structure assignment */
783289965Sngie  struct huft *u[BMAX];         /* table stack */
784289965Sngie  static unsigned v[N_MAX];     /* values in order of bit length */
785289965Sngie  register int w;               /* bits before this table == (l * h) */
786289965Sngie  unsigned x[BMAX+1];           /* bit offsets, then code stack */
787289965Sngie  unsigned *xp;                 /* pointer into x */
788289965Sngie  int y;                        /* number of dummy codes added */
789289965Sngie  unsigned z;                   /* number of entries in current table */
790289965Sngie
791289965Sngie
792289965Sngie  /* Generate counts for each bit length */
793289965Sngie  el = n > 256 ? b[256] : BMAX; /* set length of EOB code, if any */
794289965Sngie  memzero((char *)c, sizeof(c));
795289965Sngie  p = b;  i = n;
796289965Sngie  do {
797289965Sngie    c[*p]++; p++;               /* assume all entries <= BMAX */
798289965Sngie  } while (--i);
799289965Sngie  if (c[0] == n)                /* null input--all zero length codes */
800289965Sngie  {
801289965Sngie    *t = (struct huft *)NULL;
802289965Sngie    *m = 0;
803289965Sngie    return 0;
804289965Sngie  }
805289965Sngie
806289965Sngie
807289965Sngie  /* Find minimum and maximum length, bound *m by those */
808289965Sngie  for (j = 1; j <= BMAX; j++)
809289965Sngie    if (c[j])
810289965Sngie      break;
811289965Sngie  k = j;                        /* minimum code length */
812289965Sngie  if ((unsigned)*m < j)
813289965Sngie    *m = j;
814289965Sngie  for (i = BMAX; i; i--)
815289965Sngie    if (c[i])
816289965Sngie      break;
817289965Sngie  g = i;                        /* maximum code length */
818289965Sngie  if ((unsigned)*m > i)
819289965Sngie    *m = i;
820289965Sngie
821289965Sngie
822289965Sngie  /* Adjust last length count to fill out codes, if needed */
823289965Sngie  for (y = 1 << j; j < i; j++, y <<= 1)
824289965Sngie    if ((y -= c[j]) < 0)
825289965Sngie      return 2;                 /* bad input: more codes than bits */
826289965Sngie  if ((y -= c[i]) < 0)
827289965Sngie    return 2;
828289965Sngie  c[i] += y;
829289965Sngie
830289965Sngie
831289965Sngie  /* Generate starting offsets into the value table for each length */
832289965Sngie  x[1] = j = 0;
833289965Sngie  p = c + 1;  xp = x + 2;
834289965Sngie  while (--i) {                 /* note that i == g from above */
835289965Sngie    *xp++ = (j += *p++);
836289965Sngie  }
837289965Sngie
838289965Sngie
839289965Sngie  /* Make a table of values in order of bit lengths */
840289965Sngie  p = b;  i = 0;
841289965Sngie  do {
842288330Sngie    if ((j = *p++) != 0)
843288330Sngie      v[x[j]++] = i;
844289965Sngie  } while (++i < n);
845288330Sngie
846288330Sngie
847288330Sngie  /* Generate the Huffman codes and for each, make the table entries */
848288330Sngie  x[0] = i = 0;                 /* first Huffman code is zero */
849288330Sngie  p = v;                        /* grab values in bit order */
850288330Sngie  h = -1;                       /* no tables yet--level -1 */
851288330Sngie  w = l[-1] = 0;                /* no bits decoded yet */
852288330Sngie  u[0] = (struct huft *)NULL;   /* just to keep compilers happy */
853288330Sngie  q = (struct huft *)NULL;      /* ditto */
854288330Sngie  z = 0;                        /* ditto */
855288330Sngie
856288330Sngie  /* go through the bit lengths (k already is bits in shortest code) */
857288330Sngie  for (; k <= g; k++)
858288330Sngie  {
859288330Sngie    a = c[k];
860288330Sngie    while (a--)
861289965Sngie    {
862288330Sngie      /* here i is the Huffman code of length k bits for value *p */
863288330Sngie      /* make tables up to required level */
864289965Sngie      while (k > w + l[h])
865289965Sngie      {
866289965Sngie        w += l[h++];            /* add bits already decoded */
867289965Sngie
868289965Sngie        /* compute minimum size table less than or equal to *m bits */
869289965Sngie        z = (z = g - w) > (unsigned)*m ? *m : z;        /* upper limit */
870289965Sngie        if ((f = 1 << (j = k - w)) > a + 1)     /* try a k-w bit table */
871289965Sngie        {                       /* too few codes for k-w bit table */
872289965Sngie          f -= a + 1;           /* deduct codes from patterns left */
873289965Sngie          xp = c + k;
874289965Sngie          while (++j < z)       /* try smaller tables up to z bits */
875289965Sngie          {
876289965Sngie            if ((f <<= 1) <= *++xp)
877289965Sngie              break;            /* enough codes to use up j bits */
878289965Sngie            f -= *xp;           /* else deduct codes from patterns */
879289965Sngie          }
880289965Sngie        }
881289965Sngie        if ((unsigned)w + j > el && (unsigned)w < el)
882289965Sngie          j = el - w;           /* make EOB code end at table */
883289965Sngie        z = 1 << j;             /* table entries for j-bit table */
884289965Sngie        l[h] = j;               /* set table size in stack */
885289965Sngie
886289965Sngie        /* allocate and link in new table */
887289965Sngie        if ((q = (struct huft *)malloc((z + 1)*sizeof(struct huft))) ==
888289965Sngie            (struct huft *)NULL)
889288330Sngie        {
890288330Sngie          if (h)
891288330Sngie            huft_free(gz,u[0]);
892288330Sngie          return 3;             /* not enough memory */
893288330Sngie        }
894288330Sngie        hufts += z + 1;         /* track memory usage */
895288330Sngie        *t = q + 1;             /* link to list for huft_free() */
896288330Sngie        *(t = &(q->v.t)) = (struct huft *)NULL;
897288330Sngie        u[h] = ++q;             /* table starts after link */
898288330Sngie
899288330Sngie        /* connect to last table, if there is one */
900288330Sngie        if (h)
901288330Sngie        {
902288330Sngie          x[h] = i;             /* save pattern for backing up */
903288330Sngie          r.b = (uch)l[h-1];    /* bits to dump before this table */
904288330Sngie          r.e = (uch)(16 + j);  /* bits in this table */
905288330Sngie          r.v.t = q;            /* pointer to this table */
906288330Sngie          j = (i & ((1 << w) - 1)) >> (w - l[h-1]);
907288330Sngie          u[h-1][j] = r;        /* connect to last table */
908288330Sngie        }
909288330Sngie      }
910288330Sngie
911288330Sngie      /* set up table entry in r */
912288330Sngie      r.b = (uch)(k - w);
913284388Sngie      if (p >= v + n)
914284388Sngie        r.e = 99;               /* out of values--invalid code */
915289965Sngie      else if (*p < s)
916284388Sngie      {
917284388Sngie        r.e = (uch)(*p < 256 ? 16 : 15);    /* 256 is end-of-block code */
918284388Sngie        r.v.n = *p++;           /* simple code is just the value */
919288330Sngie      }
920288330Sngie      else
921289965Sngie      {
922289965Sngie        r.e = (uch)e[*p - s];   /* non-simple--look up in lists */
923288330Sngie        r.v.n = d[*p++ - s];
924289965Sngie      }
925288330Sngie
926288330Sngie      /* fill code-like entries with r */
927289965Sngie      f = 1 << (k - w);
928288330Sngie      for (j = i >> w; j < z; j += f)
929289965Sngie        q[j] = r;
930289965Sngie
931289965Sngie      /* backwards increment the k-bit code i */
932288330Sngie      for (j = 1 << (k - 1); i & j; j >>= 1)
933288330Sngie        i ^= j;
934288330Sngie      i ^= j;
935288330Sngie
936289965Sngie      /* backup over finished tables */
937288330Sngie      while ((i & ((1 << w) - 1)) != x[h])
938289965Sngie        w -= l[--h];            /* don't need to update q */
939289965Sngie    }
940289965Sngie  }
941289965Sngie
942289965Sngie
943289965Sngie  /* return actual size of base table */
944289965Sngie  *m = l[0];
945288330Sngie
946289965Sngie
947289965Sngie  /* Return true (1) if we were given an incomplete table */
948289965Sngie  return y != 0 && g != 1;
949289965Sngie}
950289965Sngie
951289965Sngie
952289965Sngie
953289965Sngieint huft_free(gz,t)
954289965Sngiestruct gzip *gz;
955288330Sngiestruct huft *t;         /* table to free */
956289965Sngie/* Free the malloc'ed tables built by huft_build(), which makes a linked
957288330Sngie   list of the tables it made, with the links in a dummy first entry of
958284388Sngie   each table. */
959{
960  register struct huft *p, *q;
961
962
963  /* Go through linked list, freeing from the malloced (t[-1]) address. */
964  p = t;
965  while (p != (struct huft *)NULL)
966  {
967    q = (--p)->v.t;
968    free(p);
969    p = q;
970  }
971  return 0;
972}
973
974
975
976#ifdef ASM_INFLATECODES
977#  define inflate_codes(tl,td,bl,bd)  flate_codes(tl,td,bl,bd,(uch *)slide)
978   int flate_codes OF((struct huft *, struct huft *, int, int, uch *));
979
980#else
981
982int inflate_codes(gz,tl, td, bl, bd)
983struct gzip *gz;
984struct huft *tl, *td;   /* literal/length and distance decoder tables */
985int bl, bd;             /* number of bits decoded by tl[] and td[] */
986/* inflate (decompress) the codes in a deflated (compressed) block.
987   Return an error code or zero if it all goes ok. */
988{
989  register unsigned e;  /* table entry flag/number of extra bits */
990  unsigned n, d;        /* length and index for copy */
991  unsigned w;           /* current window position */
992  struct huft *t;       /* pointer to table entry */
993  unsigned ml, md;      /* masks for bl and bd bits */
994  register ulg b;       /* bit buffer */
995  register unsigned k;  /* number of bits in bit buffer */
996
997
998  /* make local copies of globals */
999  b = bb;                       /* initialize bit buffer */
1000  k = bk;
1001  w = wp;                       /* initialize window position */
1002
1003
1004  /* inflate the coded data */
1005  ml = mask[bl];           /* precompute masks for speed */
1006  md = mask[bd];
1007  while (1)                     /* do until end of block */
1008  {
1009    NEEDBITS((unsigned)bl)
1010    if ((e = (t = tl + ((unsigned)b & ml))->e) > 16)
1011      do {
1012        if (e == 99)
1013          return 1;
1014        DUMPBITS(t->b)
1015        e -= 16;
1016        NEEDBITS(e)
1017      } while ((e = (t = t->v.t + ((unsigned)b & mask[e]))->e) > 16);
1018    DUMPBITS(t->b)
1019    if (e == 16)                /* then it's a literal */
1020    {
1021      slide[w++] = (uch)t->v.n;
1022      if (w == WSIZE)
1023      {
1024        FLUSH(gz,w);
1025        w = 0;
1026      }
1027    }
1028    else                        /* it's an EOB or a length */
1029    {
1030      /* exit if end of block */
1031      if (e == 15)
1032        break;
1033
1034      /* get length of block to copy */
1035      NEEDBITS(e)
1036      n = t->v.n + ((unsigned)b & mask[e]);
1037      DUMPBITS(e);
1038
1039      /* decode distance of block to copy */
1040      NEEDBITS((unsigned)bd)
1041      if ((e = (t = td + ((unsigned)b & md))->e) > 16)
1042        do {
1043          if (e == 99)
1044            return 1;
1045          DUMPBITS(t->b)
1046          e -= 16;
1047          NEEDBITS(e)
1048        } while ((e = (t = t->v.t + ((unsigned)b & mask[e]))->e) > 16);
1049      DUMPBITS(t->b)
1050      NEEDBITS(e)
1051      d = w - t->v.n - ((unsigned)b & mask[e]);
1052      DUMPBITS(e)
1053
1054      /* do the copy */
1055      do {
1056        n -= (e = (e = WSIZE - ((d &= WSIZE-1) > w ? d : w)) > n ? n : e);
1057#ifndef NOMEMCPY
1058        if (w - d >= e)         /* (this test assumes unsigned comparison) */
1059        {
1060          memcpy(slide + w, slide + d, e);
1061          w += e;
1062          d += e;
1063        }
1064        else                      /* do it slow to avoid memcpy() overlap */
1065#endif /* !NOMEMCPY */
1066          do {
1067            slide[w++] = slide[d++];
1068          } while (--e);
1069        if (w == WSIZE)
1070        {
1071          FLUSH(gz,w);
1072          w = 0;
1073        }
1074      } while (n);
1075    }
1076  }
1077
1078
1079  /* restore the globals from the locals */
1080  wp = w;                       /* restore global window pointer */
1081  bb = b;                       /* restore global bit buffer */
1082  bk = k;
1083
1084
1085  /* done */
1086  return 0;
1087}
1088
1089#endif /* ASM_INFLATECODES */
1090
1091
1092
1093int inflate_stored(gz)
1094struct gzip *gz;
1095/* "decompress" an inflated type 0 (stored) block. */
1096{
1097  unsigned n;           /* number of bytes in block */
1098  unsigned w;           /* current window position */
1099  register ulg b;       /* bit buffer */
1100  register unsigned k;  /* number of bits in bit buffer */
1101
1102
1103  /* make local copies of globals */
1104  Trace((stderr, "\nstored block"));
1105  b = bb;                       /* initialize bit buffer */
1106  k = bk;
1107  w = wp;                       /* initialize window position */
1108
1109
1110  /* go to byte boundary */
1111  n = k & 7;
1112  DUMPBITS(n);
1113
1114
1115  /* get the length and its complement */
1116  NEEDBITS(16)
1117  n = ((unsigned)b & 0xffff);
1118  DUMPBITS(16)
1119  NEEDBITS(16)
1120  if (n != (unsigned)((~b) & 0xffff))
1121    return 1;                   /* error in compressed data */
1122  DUMPBITS(16)
1123
1124
1125  /* read and output the compressed data */
1126  while (n--)
1127  {
1128    NEEDBITS(8)
1129    slide[w++] = (uch)b;
1130    if (w == WSIZE)
1131    {
1132      FLUSH(gz,w);
1133      w = 0;
1134    }
1135    DUMPBITS(8)
1136  }
1137
1138
1139  /* restore the globals from the locals */
1140  wp = w;                       /* restore global window pointer */
1141  bb = b;                       /* restore global bit buffer */
1142  bk = k;
1143  return 0;
1144}
1145
1146
1147/* Globals for literal tables (built once) */
1148struct huft *fixed_tl = (struct huft *)NULL;
1149struct huft *fixed_td;
1150int fixed_bl, fixed_bd;
1151
1152int inflate_fixed(gz)
1153struct gzip *gz;
1154/* decompress an inflated type 1 (fixed Huffman codes) block.  We should
1155   either replace this with a custom decoder, or at least precompute the
1156   Huffman tables. */
1157{
1158  /* if first time, set up tables for fixed blocks */
1159  Trace((stderr, "\nliteral block"));
1160  if (fixed_tl == (struct huft *)NULL)
1161  {
1162    int i;                /* temporary variable */
1163    static unsigned l[288]; /* length list for huft_build */
1164
1165    /* literal table */
1166    for (i = 0; i < 144; i++)
1167      l[i] = 8;
1168    for (; i < 256; i++)
1169      l[i] = 9;
1170    for (; i < 280; i++)
1171      l[i] = 7;
1172    for (; i < 288; i++)          /* make a complete, but wrong code set */
1173      l[i] = 8;
1174    fixed_bl = 7;
1175    if ((i = huft_build(gz,l, 288, 257, cplens, cplext,
1176                        &fixed_tl, &fixed_bl)) != 0)
1177    {
1178      fixed_tl = (struct huft *)NULL;
1179      return i;
1180    }
1181
1182    /* distance table */
1183    for (i = 0; i < 30; i++)      /* make an incomplete code set */
1184      l[i] = 5;
1185    fixed_bd = 5;
1186    if ((i = huft_build(gz,l, 30, 0, cpdist, cpdext, &fixed_td, &fixed_bd)) > 1)
1187    {
1188      huft_free(gz,fixed_tl);
1189      fixed_tl = (struct huft *)NULL;
1190      return i;
1191    }
1192  }
1193
1194
1195  /* decompress until an end-of-block code */
1196  return inflate_codes(gz,fixed_tl, fixed_td, fixed_bl, fixed_bd) != 0;
1197}
1198
1199
1200
1201int inflate_dynamic(gz)
1202struct gzip *gz;
1203/* decompress an inflated type 2 (dynamic Huffman codes) block. */
1204{
1205  int i;                /* temporary variables */
1206  unsigned j;
1207  unsigned l;           /* last length */
1208  unsigned m;           /* mask for bit lengths table */
1209  unsigned n;           /* number of lengths to get */
1210  struct huft *tl;      /* literal/length code table */
1211  struct huft *td;      /* distance code table */
1212  int bl;               /* lookup bits for tl */
1213  int bd;               /* lookup bits for td */
1214  unsigned nb;          /* number of bit length codes */
1215  unsigned nl;          /* number of literal/length codes */
1216  unsigned nd;          /* number of distance codes */
1217#ifdef PKZIP_BUG_WORKAROUND
1218  static unsigned ll[288+32]; /* literal/length and distance code lengths */
1219#else
1220  static unsigned ll[286+30]; /* literal/length and distance code lengths */
1221#endif
1222  register ulg b;       /* bit buffer */
1223  register unsigned k;  /* number of bits in bit buffer */
1224
1225
1226  /* make local bit buffer */
1227  Trace((stderr, "\ndynamic block"));
1228  b = bb;
1229  k = bk;
1230
1231
1232  /* read in table lengths */
1233  NEEDBITS(5)
1234  nl = 257 + ((unsigned)b & 0x1f);      /* number of literal/length codes */
1235  DUMPBITS(5)
1236  NEEDBITS(5)
1237  nd = 1 + ((unsigned)b & 0x1f);        /* number of distance codes */
1238  DUMPBITS(5)
1239  NEEDBITS(4)
1240  nb = 4 + ((unsigned)b & 0xf);         /* number of bit length codes */
1241  DUMPBITS(4)
1242#ifdef PKZIP_BUG_WORKAROUND
1243  if (nl > 288 || nd > 32)
1244#else
1245  if (nl > 286 || nd > 30)
1246#endif
1247    return 1;                   /* bad lengths */
1248
1249
1250  /* read in bit-length-code lengths */
1251  for (j = 0; j < nb; j++)
1252  {
1253    NEEDBITS(3)
1254    ll[border[j]] = (unsigned)b & 7;
1255    DUMPBITS(3)
1256  }
1257  for (; j < 19; j++)
1258    ll[border[j]] = 0;
1259
1260
1261  /* build decoding table for trees--single level, 7 bit lookup */
1262  bl = 7;
1263  if ((i = huft_build(gz,ll, 19, 19, NULL, NULL, &tl, &bl)) != 0)
1264  {
1265    if (i == 1)
1266      huft_free(gz,tl);
1267    return i;                   /* incomplete code set */
1268  }
1269
1270
1271  /* read in literal and distance code lengths */
1272  n = nl + nd;
1273  m = mask[bl];
1274  i = l = 0;
1275  while ((unsigned)i < n)
1276  {
1277    NEEDBITS((unsigned)bl)
1278    j = (td = tl + ((unsigned)b & m))->b;
1279    DUMPBITS(j)
1280    j = td->v.n;
1281    if (j < 16)                 /* length of code in bits (0..15) */
1282      ll[i++] = l = j;          /* save last length in l */
1283    else if (j == 16)           /* repeat last length 3 to 6 times */
1284    {
1285      NEEDBITS(2)
1286      j = 3 + ((unsigned)b & 3);
1287      DUMPBITS(2)
1288      if ((unsigned)i + j > n)
1289        return 1;
1290      while (j--)
1291        ll[i++] = l;
1292    }
1293    else if (j == 17)           /* 3 to 10 zero length codes */
1294    {
1295      NEEDBITS(3)
1296      j = 3 + ((unsigned)b & 7);
1297      DUMPBITS(3)
1298      if ((unsigned)i + j > n)
1299        return 1;
1300      while (j--)
1301        ll[i++] = 0;
1302      l = 0;
1303    }
1304    else                        /* j == 18: 11 to 138 zero length codes */
1305    {
1306      NEEDBITS(7)
1307      j = 11 + ((unsigned)b & 0x7f);
1308      DUMPBITS(7)
1309      if ((unsigned)i + j > n)
1310        return 1;
1311      while (j--)
1312        ll[i++] = 0;
1313      l = 0;
1314    }
1315  }
1316
1317
1318  /* free decoding table for trees */
1319  huft_free(gz,tl);
1320
1321
1322  /* restore the global bit buffer */
1323  bb = b;
1324  bk = k;
1325
1326
1327  /* build the decoding tables for literal/length and distance codes */
1328  bl = lbits;
1329  if ((i = huft_build(gz,ll, nl, 257, cplens, cplext, &tl, &bl)) != 0)
1330  {
1331    if (i == 1 && !qflag) {
1332      FPRINTF( "(incomplete l-tree)  ");
1333      huft_free(gz,tl);
1334    }
1335    return i;                   /* incomplete code set */
1336  }
1337  bd = dbits;
1338  if ((i = huft_build(gz,ll + nl, nd, 0, cpdist, cpdext, &td, &bd)) != 0)
1339  {
1340    if (i == 1 && !qflag) {
1341      FPRINTF( "(incomplete d-tree)  ");
1342#ifdef PKZIP_BUG_WORKAROUND
1343      i = 0;
1344    }
1345#else
1346      huft_free(gz,td);
1347    }
1348    huft_free(gz,tl);
1349    return i;                   /* incomplete code set */
1350#endif
1351  }
1352
1353
1354  /* decompress until an end-of-block code */
1355  if (inflate_codes(gz,tl, td, bl, bd))
1356    return 1;
1357
1358
1359  /* free the decoding tables, return */
1360  huft_free(gz,tl);
1361  huft_free(gz,td);
1362  return 0;
1363}
1364
1365
1366
1367int inflate_block(gz,e)
1368struct gzip *gz;
1369int *e;                 /* last block flag */
1370/* decompress an inflated block */
1371{
1372  unsigned t;           /* block type */
1373  register ulg b;       /* bit buffer */
1374  register unsigned k;  /* number of bits in bit buffer */
1375
1376
1377  /* make local bit buffer */
1378  b = bb;
1379  k = bk;
1380
1381
1382  /* read in last block bit */
1383  NEEDBITS(1)
1384  *e = (int)b & 1;
1385  DUMPBITS(1)
1386
1387
1388  /* read in block type */
1389  NEEDBITS(2)
1390  t = (unsigned)b & 3;
1391  DUMPBITS(2)
1392
1393
1394  /* restore the global bit buffer */
1395  bb = b;
1396  bk = k;
1397
1398
1399  /* inflate that block type */
1400  if (t == 2)
1401    return inflate_dynamic(gz);
1402  if (t == 0)
1403    return inflate_stored(gz);
1404  if (t == 1)
1405    return inflate_fixed(gz);
1406
1407
1408  /* bad block type */
1409  return 2;
1410}
1411
1412
1413
1414int inflate(gz)
1415struct gzip *gz;
1416/* decompress an inflated entry */
1417{
1418  int e;                /* last block flag */
1419  int r;                /* result code */
1420  unsigned h;           /* maximum struct huft's malloc'ed */
1421
1422
1423  /* initialize window, bit buffer */
1424  wp = 0;
1425  bk = 0;
1426  bb = 0;
1427
1428
1429  /* decompress until the last block */
1430  h = 0;
1431  do {
1432    hufts = 0;
1433    if ((r = inflate_block(gz,&e)) != 0)
1434      return r;
1435    if (hufts > h)
1436      h = hufts;
1437  } while (!e);
1438
1439
1440  /* flush out slide */
1441  FLUSH(gz,wp);
1442
1443
1444  /* return success */
1445  Trace((stderr, "\n%u bytes in Huffman tables (%d/entry)\n",
1446         h * sizeof(struct huft), sizeof(struct huft)));
1447  return 0;
1448}
1449
1450
1451
1452int inflate_free(gz)
1453struct gzip *gz;
1454{
1455  if (fixed_tl != (struct huft *)NULL)
1456  {
1457    huft_free(gz,fixed_td);
1458    huft_free(gz,fixed_tl);
1459    fixed_td = fixed_tl = (struct huft *)NULL;
1460  }
1461  return 0;
1462}
1463