imgact_gzip.c revision 3344
1284388Sngie/* 2284388Sngie * Parts of this file are not covered by: 3284388Sngie * ---------------------------------------------------------------------------- 4284388Sngie * "THE BEER-WARE LICENSE" (Revision 42): 5284388Sngie * <phk@login.dknet.dk> wrote this file. As long as you retain this notice you 6284388Sngie * can do whatever you want with this stuff. If we meet some day, and you think 7284388Sngie * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 8284388Sngie * ---------------------------------------------------------------------------- 9284388Sngie * 10284388Sngie * $Id: imgact_gzip.c,v 1.1 1994/10/03 05:23:01 phk Exp $ 11284388Sngie * 12284388Sngie * This module handles execution of a.out files which have been run through 13284388Sngie * "gzip -9". 14284388Sngie * 15284388Sngie * For now you need to use exactly this command to compress the binaries: 16284388Sngie * 17284388Sngie * gzip -9 -v < /bin/sh > /tmp/sh 18284388Sngie * 19284388Sngie * TODO: 20284388Sngie * text-segments should be made R/O after being filled 21284388Sngie * is the vm-stuff safe ? 22284388Sngie * should handle the entire header of gzip'ed stuff. 23284388Sngie * inflate isn't quite reentrant yet... 24284388Sngie * error-handling is a mess... 25284388Sngie * so is the rest... 26284388Sngie */ 27284388Sngie 28284388Sngie#include <sys/param.h> 29284388Sngie#include <sys/systm.h> 30288330Sngie#include <sys/resourcevar.h> 31284388Sngie#include <sys/exec.h> 32288330Sngie#include <sys/mman.h> 33284388Sngie#include <sys/malloc.h> 34288330Sngie#include <sys/imgact.h> 35288330Sngie#include <sys/imgact_aout.h> 36284388Sngie#include <sys/kernel.h> 37284388Sngie#include <sys/sysent.h> 38284388Sngie 39284388Sngie#include <vm/vm.h> 40284388Sngie#include <vm/vm_kern.h> 41284388Sngie 42288330Sngie#define WSIZE 0x8000 43288330Sngie 44288330Sngiestruct gzip { 45288330Sngie struct image_params *ip; 46288330Sngie struct exec a_out; 47288330Sngie int error; 48289965Sngie int where; 49284388Sngie u_char *inbuf; 50284388Sngie u_long offset; 51284388Sngie u_long output; 52284388Sngie u_long len; 53284388Sngie int idx; 54284388Sngie u_long virtual_offset, file_offset, bss_size; 55288330Sngie unsigned gz_wp; 56288330Sngie u_char gz_slide[WSIZE]; 57288330Sngie}; 58288330Sngie 59289965Sngieint inflate __P((struct gzip *)); 60288330Sngie 61288330Sngieextern struct sysentvec aout_sysvec; 62288330Sngie 63288330Sngie#define slide (gz->gz_slide) 64288330Sngie#define wp (gz->gz_wp) 65288330Sngie 66288330Sngieint 67288330Sngieexec_gzip_imgact(iparams) 68288330Sngie struct image_params *iparams; 69288330Sngie{ 70288330Sngie int error,error2=0; 71288330Sngie u_char *p = (u_char *) iparams->image_header; 72288330Sngie struct gzip *gz; 73288330Sngie struct vattr vattr; 74288330Sngie 75284388Sngie 76284388Sngie if(p[0] != 0x1f) return -1; /* Simply magic */ 77289965Sngie if(p[1] != 0x8b) return -1; /* Simply magic */ 78289965Sngie if(p[2] != 0x08) return -1; /* Compression method */ 79289965Sngie if(p[3] != 0x00) return -1; /* Flags */ 80289965Sngie /* 4 bytes timestamp */ 81289965Sngie if(p[8] != 0x02) return -1; /* Extra flags */ 82288330Sngie if(p[9] != 0x03) return -1; /* OS compressed on */ 83289965Sngie 84288330Sngie gz = malloc(sizeof *gz,M_TEMP,M_NOWAIT); 85288330Sngie if(!gz) 86288330Sngie return ENOMEM; 87289965Sngie bzero(gz,sizeof *gz); 88289965Sngie 89289965Sngie gz->ip = iparams; 90289965Sngie gz->error = 0; 91289965Sngie gz->idx = 10; 92289965Sngie 93289965Sngie gz->len = gz->ip->attr->va_size; 94288330Sngie 95289965Sngie error = inflate(gz); 96289965Sngie if (gz->inbuf) { 97289965Sngie error2 = 98289965Sngie vm_deallocate(kernel_map, (vm_offset_t)gz->inbuf, PAGE_SIZE); 99289965Sngie } 100288330Sngie printf("Output=%lu\n",gz->output); 101288330Sngie printf("Inflate_error=%d gz->error=%d error2=%d where=%d\n", 102288330Sngie error,gz->error,error2,gz->where); 103284388Sngie if(error) 104284388Sngie return ENOEXEC; 105284388Sngie 106288330Sngie error = gz->error; 107284388Sngie free(gz,M_TEMP); 108284388Sngie return error; 109284388Sngie} 110284388Sngie 111288330Sngieint 112288330Sngiedo_aout_hdr(struct gzip *gz) 113288330Sngie{ 114288330Sngie int error; 115284388Sngie struct vmspace *vmspace = gz->ip->proc->p_vmspace; 116284388Sngie u_long vmaddr; 117284388Sngie 118284388Sngie /* 119284388Sngie * Set file/virtual offset based on a.out variant. 120284388Sngie * We do two cases: host byte order and network byte order 121284388Sngie * (for NetBSD compatibility) 122284388Sngie */ 123284388Sngie switch ((int)(gz->a_out.a_magic & 0xffff)) { 124284388Sngie case ZMAGIC: 125284388Sngie gz->virtual_offset = 0; 126284388Sngie if (gz->a_out.a_text) { 127284388Sngie gz->file_offset = NBPG; 128284388Sngie } else { 129284388Sngie /* Bill's "screwball mode" */ 130284388Sngie gz->file_offset = 0; 131284388Sngie } 132284388Sngie break; 133284388Sngie case QMAGIC: 134284388Sngie gz->virtual_offset = NBPG; 135284388Sngie gz->file_offset = 0; 136284388Sngie break; 137284388Sngie default: 138284388Sngie /* NetBSD compatibility */ 139284388Sngie switch ((int)(ntohl(gz->a_out.a_magic) & 0xffff)) { 140284388Sngie case ZMAGIC: 141284388Sngie case QMAGIC: 142284388Sngie gz->virtual_offset = NBPG; 143284388Sngie gz->file_offset = 0; 144284388Sngie break; 145288330Sngie default: 146288330Sngie gz->where = __LINE__; 147288330Sngie return (-1); 148288330Sngie } 149284388Sngie } 150284388Sngie 151284388Sngie gz->bss_size = roundup(gz->a_out.a_bss, NBPG); 152284388Sngie 153284388Sngie /* 154284388Sngie * Check various fields in header for validity/bounds. 155284388Sngie */ 156284388Sngie if (/* entry point must lay with text region */ 157284388Sngie gz->a_out.a_entry < gz->virtual_offset || 158284388Sngie gz->a_out.a_entry >= gz->virtual_offset + gz->a_out.a_text || 159284388Sngie 160284388Sngie /* text and data size must each be page rounded */ 161284388Sngie gz->a_out.a_text % NBPG || 162284388Sngie gz->a_out.a_data % NBPG) { 163284388Sngie gz->where = __LINE__; 164284388Sngie return (-1); 165288330Sngie } 166288330Sngie 167288330Sngie 168288330Sngie /* 169288330Sngie * text/data/bss must not exceed limits 170288330Sngie */ 171288330Sngie if (/* text can't exceed maximum text size */ 172288330Sngie gz->a_out.a_text > MAXTSIZ || 173288330Sngie 174288330Sngie /* data + bss can't exceed maximum data size */ 175288330Sngie gz->a_out.a_data + gz->bss_size > MAXDSIZ || 176288330Sngie 177288330Sngie /* data + bss can't exceed rlimit */ 178288330Sngie gz->a_out.a_data + gz->bss_size > 179288330Sngie gz->ip->proc->p_rlimit[RLIMIT_DATA].rlim_cur) { 180288330Sngie gz->where = __LINE__; 181288330Sngie return (ENOMEM); 182288330Sngie } 183289965Sngie 184289965Sngie /* copy in arguments and/or environment from old process */ 185289965Sngie error = exec_extract_strings(gz->ip); 186289965Sngie if (error) { 187289965Sngie gz->where = __LINE__; 188289965Sngie return (error); 189289965Sngie } 190289965Sngie 191289965Sngie /* 192289965Sngie * Destroy old process VM and create a new one (with a new stack) 193289965Sngie */ 194289965Sngie exec_new_vmspace(gz->ip); 195289965Sngie 196289965Sngie vmaddr = gz->virtual_offset; 197289965Sngie 198288330Sngie error = vm_mmap(&vmspace->vm_map, /* map */ 199288330Sngie &vmaddr, /* address */ 200288330Sngie gz->a_out.a_text, /* size */ 201288330Sngie VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_WRITE, /* protection */ 202288330Sngie VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_WRITE, 203288330Sngie MAP_ANON | MAP_FIXED, /* flags */ 204289965Sngie 0, /* vnode */ 205288330Sngie 0); /* offset */ 206288330Sngie 207288330Sngie if (error) { 208288330Sngie gz->where = __LINE__; 209288330Sngie return (error); 210289965Sngie } 211289965Sngie 212289965Sngie vmaddr = gz->virtual_offset + gz->a_out.a_text; 213289965Sngie 214289965Sngie /* 215289965Sngie * Map data read/write (if text is 0, assume text is in data area 216289965Sngie * [Bill's screwball mode]) 217289965Sngie */ 218289965Sngie 219289965Sngie error = vm_mmap(&vmspace->vm_map, 220289965Sngie &vmaddr, 221289965Sngie gz->a_out.a_data, 222289965Sngie VM_PROT_READ | VM_PROT_WRITE | (gz->a_out.a_text ? 0 : VM_PROT_EXECUTE), 223289965Sngie VM_PROT_ALL, MAP_ANON | MAP_FIXED, 224289965Sngie 0, 225289965Sngie 0); 226289965Sngie 227289965Sngie if (error) { 228289965Sngie gz->where = __LINE__; 229289965Sngie return (error); 230288330Sngie } 231288330Sngie 232289965Sngie /* 233289965Sngie * Allocate demand-zeroed area for uninitialized data 234289965Sngie * "bss" = 'block started by symbol' - named after the IBM 7090 235289965Sngie * instruction of the same name. 236289965Sngie */ 237289965Sngie vmaddr = gz->virtual_offset + gz->a_out.a_text + gz->a_out.a_data; 238289965Sngie error = vm_allocate(&vmspace->vm_map, &vmaddr, gz->bss_size, FALSE); 239289965Sngie if (error) { 240289965Sngie gz->where = __LINE__; 241289965Sngie return (error); 242289965Sngie } 243289965Sngie 244289965Sngie /* Fill in process VM information */ 245289965Sngie vmspace->vm_tsize = gz->a_out.a_text >> PAGE_SHIFT; 246289965Sngie vmspace->vm_dsize = (gz->a_out.a_data + gz->bss_size) >> PAGE_SHIFT; 247289965Sngie vmspace->vm_taddr = (caddr_t) gz->virtual_offset; 248289965Sngie vmspace->vm_daddr = (caddr_t) gz->virtual_offset + gz->a_out.a_text; 249289965Sngie 250289965Sngie /* Fill in image_params */ 251289965Sngie gz->ip->interpreted = 0; 252289965Sngie gz->ip->entry_addr = gz->a_out.a_entry; 253289965Sngie 254289965Sngie gz->ip->proc->p_sysent = &aout_sysvec; 255289965Sngieprintf("a.out ok, entry=%08x\n",gz->ip->entry_addr); 256289965Sngie return 0; 257289965Sngie} 258289965Sngie 259289965Sngie/* 260289965Sngie * Tell kern_execve.c about it, with a little help from the linker. 261289965Sngie * Since `const' objects end up in the text segment, TEXT_SET is the 262289965Sngie * correct directive to use. 263289965Sngie */ 264289965Sngiestatic const struct execsw gzip_execsw = { exec_gzip_imgact, "gzip" }; 265289965SngieTEXT_SET(execsw_set, gzip_execsw); 266289965Sngie 267289965Sngie/* Stuff to make inflate() work */ 268289965Sngie# define uch u_char 269289965Sngie# define ush u_short 270289965Sngie# define ulg u_long 271289965Sngie# define memzero(dest,len) bzero(dest,len) 272289965Sngie# define NOMEMCPY 273289965Sngie#define FPRINTF printf 274289965Sngie 275289965Sngie#define EOF -1 276289965Sngie#define CHECK_EOF 277289965Sngiestatic int 278289965SngieNextByte(struct gzip *gz) 279289965Sngie{ 280288330Sngie int error; 281288330Sngie 282288330Sngie if(gz->idx >= gz->len) 283288330Sngie return EOF; 284288330Sngie 285288330Sngie if((!gz->inbuf) || gz->idx >= (gz->offset+PAGE_SIZE)) { 286288330Sngie if(gz->inbuf) { 287288330Sngie error = vm_deallocate(kernel_map, 288288330Sngie (vm_offset_t)gz->inbuf, PAGE_SIZE); 289288330Sngie if(error) { 290288330Sngie gz->where = __LINE__; 291288330Sngie gz->error = error; 292288330Sngie printf("exec_gzip: Error %d in vm)deallocate",error); 293288330Sngie return EOF; 294288330Sngie } 295288330Sngie } 296288330Sngie 297288330Sngie gz->offset += PAGE_SIZE; 298288330Sngie 299288330Sngie error = vm_mmap(kernel_map, /* map */ 300288330Sngie (vm_offset_t *)&gz->inbuf, /* address */ 301288330Sngie PAGE_SIZE, /* size */ 302288330Sngie VM_PROT_READ, /* protection */ 303288330Sngie VM_PROT_READ, /* max protection */ 304288330Sngie 0, /* flags */ 305288330Sngie (caddr_t)gz->ip->vnodep, /* vnode */ 306288330Sngie gz->offset); /* offset */ 307288330Sngie if(error) { 308288330Sngie gz->where = __LINE__; 309288330Sngie gz->error = error; 310288330Sngie printf("exec_gzip: Error %d in vm_mmap",error); 311288330Sngie return EOF; 312288330Sngie } 313288330Sngie 314288330Sngie } 315288330Sngie return gz->inbuf[(gz->idx++) - gz->offset]; 316288330Sngie} 317288330Sngie 318288330Sngie#define NEXTBYTE NextByte(gz) 319288330Sngie 320288330Sngiestatic int 321288330SngieFlush(struct gzip *gz,u_long siz) 322289965Sngie{ 323289965Sngie u_char *p = slide,*q; 324289965Sngie int i; 325289965Sngie 326289965Sngie#if 0 327289965Sngie int i; 328289965Sngie printf("<"); 329289965Sngie for(i=0;i<siz;i++) 330289965Sngie printf("%02x",slide[i]); 331289965Sngie printf(">\n"); 332289965Sngie#endif 333289965Sngie 334289965Sngie printf("<%lu>",siz); 335289965Sngie 336289965Sngie /* First, find a a.out-header */ 337289965Sngie if(gz->output < sizeof gz->a_out) { 338289965Sngie q = (u_char*) &gz->a_out; 339289965Sngie i = min(siz,sizeof gz->a_out - gz->output); 340289965Sngie bcopy(p,q+gz->output,i); 341289965Sngie gz->output += i; 342289965Sngie p += i; 343289965Sngie siz -= i; 344289965Sngie if(gz->output == sizeof gz->a_out) { 345289965Sngie for(i=0;i<sizeof gz->a_out;i+=4) 346289965Sngie printf("%02x%02x%02x%02x ", 347289965Sngie q[i+0],q[i+1],q[i+2],q[i+3]); 348289965Sngie printf("\n"); 349289965Sngie i = do_aout_hdr(gz); 350289965Sngie printf("file_offset=%lx virtual_offset=%lx", 351289965Sngie gz->file_offset,gz->virtual_offset); 352289965Sngie if(i) { 353289965Sngie gz->error = i; 354289965Sngie return i; 355289965Sngie } 356289965Sngie if(gz->file_offset < sizeof gz->a_out) { 357289965Sngie q = (u_char*) gz->virtual_offset + gz->output - gz->file_offset; 358289965Sngie bcopy(&gz->a_out,q,sizeof gz->a_out); 359289965Sngie } 360289965Sngie } 361289965Sngie } 362289965Sngie if(gz->output >= gz->file_offset && 363289965Sngie gz->output < (gz->file_offset+ 364289965Sngie gz->a_out.a_text+ 365289965Sngie gz->a_out.a_data)) { 366289965Sngie 367289965Sngie i = min(siz, 368289965Sngie (gz->file_offset+ 369289965Sngie gz->a_out.a_text+ 370289965Sngie gz->a_out.a_data) 371289965Sngie -gz->output); 372289965Sngie q = (u_char*) gz->virtual_offset + gz->output - gz->file_offset; 373289965Sngie bcopy(p,q,i); 374289965Sngie gz->output += i; 375289965Sngie p += i; 376289965Sngie siz -= i; 377289965Sngie } 378289965Sngie if(!siz) return 0; 379289965Sngie gz->output += siz; 380289965Sngie return 0; 381289965Sngie} 382289965Sngie 383289965Sngie#define FLUSH(x,y) {int foo = Flush(x,y); if (foo) return foo;} 384289965Sngiestatic 385289965Sngievoid * 386289965Sngiemyalloc(u_long size) 387289965Sngie{ 388289965Sngie return malloc(size, M_TEMP, M_NOWAIT); 389289965Sngie} 390289965Sngie#define malloc myalloc 391289965Sngie 392289965Sngiestatic 393289965Sngievoid 394289965Sngiemyfree(void * ptr) 395289965Sngie{ 396289965Sngie free(ptr,M_TEMP); 397289965Sngie} 398289965Sngie#define free myfree 399289965Sngie 400289965Sngiestatic int qflag; 401289965Sngie#define Trace(x) /* */ 402289965Sngie 403289965Sngie 404289965Sngie/* This came from unzip-5.12. I have changed it to pass a "gz" pointer 405289965Sngie * around, thus hopefully making it re-entrant. Poul-Henningi 406289965Sngie */ 407289965Sngie 408289965Sngie/* inflate.c -- put in the public domain by Mark Adler 409289965Sngie version c14o, 23 August 1994 */ 410289965Sngie 411289965Sngie/* You can do whatever you like with this source file, though I would 412289965Sngie prefer that if you modify it and redistribute it that you include 413289965Sngie comments to that effect with your name and the date. Thank you. 414289965Sngie 415289965Sngie History: 416289965Sngie vers date who what 417289965Sngie ---- --------- -------------- ------------------------------------ 418289965Sngie a ~~ Feb 92 M. Adler used full (large, one-step) lookup table 419289965Sngie b1 21 Mar 92 M. Adler first version with partial lookup tables 420289965Sngie b2 21 Mar 92 M. Adler fixed bug in fixed-code blocks 421289965Sngie b3 22 Mar 92 M. Adler sped up match copies, cleaned up some 422289965Sngie b4 25 Mar 92 M. Adler added prototypes; removed window[] (now 423289965Sngie is the responsibility of unzip.h--also 424289965Sngie changed name to slide[]), so needs diffs 425289965Sngie for unzip.c and unzip.h (this allows 426289965Sngie compiling in the small model on MSDOS); 427289965Sngie fixed cast of q in huft_build(); 428289965Sngie b5 26 Mar 92 M. Adler got rid of unintended macro recursion. 429289965Sngie b6 27 Mar 92 M. Adler got rid of nextbyte() routine. fixed 430289965Sngie bug in inflate_fixed(). 431289965Sngie c1 30 Mar 92 M. Adler removed lbits, dbits environment variables. 432289965Sngie changed BMAX to 16 for explode. Removed 433289965Sngie OUTB usage, and replaced it with flush()-- 434289965Sngie this was a 20% speed improvement! Added 435289965Sngie an explode.c (to replace unimplod.c) that 436289965Sngie uses the huft routines here. Removed 437289965Sngie register union. 438289965Sngie c2 4 Apr 92 M. Adler fixed bug for file sizes a multiple of 32k. 439289965Sngie c3 10 Apr 92 M. Adler reduced memory of code tables made by 440289965Sngie huft_build significantly (factor of two to 441289965Sngie three). 442289965Sngie c4 15 Apr 92 M. Adler added NOMEMCPY do kill use of memcpy(). 443289965Sngie worked around a Turbo C optimization bug. 444289965Sngie c5 21 Apr 92 M. Adler added the WSIZE #define to allow reducing 445289965Sngie the 32K window size for specialized 446289965Sngie applications. 447289965Sngie c6 31 May 92 M. Adler added some typecasts to eliminate warnings 448289965Sngie c7 27 Jun 92 G. Roelofs added some more typecasts (444: MSC bug). 449289965Sngie c8 5 Oct 92 J-l. Gailly added ifdef'd code to deal with PKZIP bug. 450289965Sngie c9 9 Oct 92 M. Adler removed a memory error message (~line 416). 451289965Sngie c10 17 Oct 92 G. Roelofs changed ULONG/UWORD/byte to ulg/ush/uch, 452289965Sngie removed old inflate, renamed inflate_entry 453289965Sngie to inflate, added Mark's fix to a comment. 454289965Sngie c10.5 14 Dec 92 M. Adler fix up error messages for incomplete trees. 455289965Sngie c11 2 Jan 93 M. Adler fixed bug in detection of incomplete 456289965Sngie tables, and removed assumption that EOB is 457289965Sngie the longest code (bad assumption). 458289965Sngie c12 3 Jan 93 M. Adler make tables for fixed blocks only once. 459289965Sngie c13 5 Jan 93 M. Adler allow all zero length codes (pkzip 2.04c 460289965Sngie outputs one zero length code for an empty 461289965Sngie distance tree). 462289965Sngie c14 12 Mar 93 M. Adler made inflate.c standalone with the 463289965Sngie introduction of inflate.h. 464289965Sngie c14b 16 Jul 93 G. Roelofs added (unsigned) typecast to w at 470. 465289965Sngie c14c 19 Jul 93 J. Bush changed v[N_MAX], l[288], ll[28x+3x] arrays 466289965Sngie to static for Amiga. 467289965Sngie c14d 13 Aug 93 J-l. Gailly de-complicatified Mark's c[*p++]++ thing. 468289965Sngie c14e 8 Oct 93 G. Roelofs changed memset() to memzero(). 469289965Sngie c14f 22 Oct 93 G. Roelofs renamed quietflg to qflag; made Trace() 470289965Sngie conditional; added inflate_free(). 471289965Sngie c14g 28 Oct 93 G. Roelofs changed l/(lx+1) macro to pointer (Cray bug) 472289965Sngie c14h 7 Dec 93 C. Ghisler huft_build() optimizations. 473289965Sngie c14i 9 Jan 94 A. Verheijen set fixed_t{d,l} to NULL after freeing; 474289965Sngie G. Roelofs check NEXTBYTE macro for EOF. 475289965Sngie c14j 23 Jan 94 G. Roelofs removed Ghisler "optimizations"; ifdef'd 476289965Sngie EOF check. 477289965Sngie c14k 27 Feb 94 G. Roelofs added some typecasts to avoid warnings. 478289965Sngie c14l 9 Apr 94 G. Roelofs fixed split comments on preprocessor lines 479289965Sngie to avoid bug in Encore compiler. 480289965Sngie c14m 7 Jul 94 P. Kienitz modified to allow assembler version of 481289965Sngie inflate_codes() (define ASM_INFLATECODES) 482289965Sngie c14n 22 Jul 94 G. Roelofs changed fprintf to FPRINTF for DLL versions 483289965Sngie c14o 23 Aug 94 C. Spieler added a newline to a debug statement; 484289965Sngie G. Roelofs added another typecast to avoid MSC warning 485289965Sngie */ 486289965Sngie 487289965Sngie 488289965Sngie/* 489289965Sngie Inflate deflated (PKZIP's method 8 compressed) data. The compression 490289965Sngie method searches for as much of the current string of bytes (up to a 491289965Sngie length of 258) in the previous 32K bytes. If it doesn't find any 492289965Sngie matches (of at least length 3), it codes the next byte. Otherwise, it 493289965Sngie codes the length of the matched string and its distance backwards from 494289965Sngie the current position. There is a single Huffman code that codes both 495289965Sngie single bytes (called "literals") and match lengths. A second Huffman 496289965Sngie code codes the distance information, which follows a length code. Each 497289965Sngie length or distance code actually represents a base value and a number 498289965Sngie of "extra" (sometimes zero) bits to get to add to the base value. At 499289965Sngie the end of each deflated block is a special end-of-block (EOB) literal/ 500289965Sngie length code. The decoding process is basically: get a literal/length 501289965Sngie code; if EOB then done; if a literal, emit the decoded byte; if a 502289965Sngie length then get the distance and emit the referred-to bytes from the 503289965Sngie sliding window of previously emitted data. 504289965Sngie 505289965Sngie There are (currently) three kinds of inflate blocks: stored, fixed, and 506289965Sngie dynamic. The compressor outputs a chunk of data at a time and decides 507289965Sngie which method to use on a chunk-by-chunk basis. A chunk might typically 508289965Sngie be 32K to 64K, uncompressed. If the chunk is uncompressible, then the 509289965Sngie "stored" method is used. In this case, the bytes are simply stored as 510289965Sngie is, eight bits per byte, with none of the above coding. The bytes are 511289965Sngie preceded by a count, since there is no longer an EOB code. 512289965Sngie 513289965Sngie If the data is compressible, then either the fixed or dynamic methods 514289965Sngie are used. In the dynamic method, the compressed data is preceded by 515289965Sngie an encoding of the literal/length and distance Huffman codes that are 516289965Sngie to be used to decode this block. The representation is itself Huffman 517289965Sngie coded, and so is preceded by a description of that code. These code 518289965Sngie descriptions take up a little space, and so for small blocks, there is 519289965Sngie a predefined set of codes, called the fixed codes. The fixed method is 520289965Sngie used if the block ends up smaller that way (usually for quite small 521289965Sngie chunks); otherwise the dynamic method is used. In the latter case, the 522289965Sngie codes are customized to the probabilities in the current block and so 523289965Sngie can code it much better than the pre-determined fixed codes can. 524289965Sngie 525289965Sngie The Huffman codes themselves are decoded using a mutli-level table 526289965Sngie lookup, in order to maximize the speed of decoding plus the speed of 527289965Sngie building the decoding tables. See the comments below that precede the 528289965Sngie lbits and dbits tuning parameters. 529289965Sngie */ 530289965Sngie 531289965Sngie 532289965Sngie/* 533289965Sngie Notes beyond the 1.93a appnote.txt: 534289965Sngie 535289965Sngie 1. Distance pointers never point before the beginning of the output 536289965Sngie stream. 537289965Sngie 2. Distance pointers can point back across blocks, up to 32k away. 538301627Sngie 3. There is an implied maximum of 7 bits for the bit length table and 539289965Sngie 15 bits for the actual data. 540289965Sngie 4. If only one code exists, then it is encoded using one bit. (Zero 541289965Sngie would be more efficient, but perhaps a little confusing.) If two 542289965Sngie codes exist, they are coded using one bit each (0 and 1). 543289965Sngie 5. There is no way of sending zero distance codes--a dummy must be 544289965Sngie sent if there are none. (History: a pre 2.0 version of PKZIP would 545289965Sngie store blocks with no distance codes, but this was discovered to be 546289965Sngie too harsh a criterion.) Valid only for 1.93a. 2.04c does allow 547289965Sngie zero distance codes, which is sent as one code of zero bits in 548289965Sngie length. 549289965Sngie 6. There are up to 286 literal/length codes. Code 256 represents the 550289965Sngie end-of-block. Note however that the static length tree defines 551289965Sngie 288 codes just to fill out the Huffman codes. Codes 286 and 287 552289965Sngie cannot be used though, since there is no length base or extra bits 553289965Sngie defined for them. Similarily, there are up to 30 distance codes. 554289965Sngie However, static trees define 32 codes (all 5 bits) to fill out the 555289965Sngie Huffman codes, but the last two had better not show up in the data. 556289965Sngie 7. Unzip can check dynamic Huffman blocks for complete code sets. 557289965Sngie The exception is that a single code would not be complete (see #4). 558289965Sngie 8. The five bits following the block type is really the number of 559289965Sngie literal codes sent minus 257. 560289965Sngie 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits 561289965Sngie (1+6+6). Therefore, to output three times the length, you output 562289965Sngie three codes (1+1+1), whereas to output four times the same length, 563289965Sngie you only need two codes (1+3). Hmm. 564289965Sngie 10. In the tree reconstruction algorithm, Code = Code + Increment 565289965Sngie only if BitLength(i) is not zero. (Pretty obvious.) 566289965Sngie 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19) 567289965Sngie 12. Note: length code 284 can represent 227-258, but length code 285 568289965Sngie really is 258. The last length deserves its own, short code 569289965Sngie since it gets used a lot in very redundant files. The length 570289965Sngie 258 is special since 258 - 3 (the min match length) is 255. 571289965Sngie 13. The literal/length and distance code bit lengths are read as a 572289965Sngie single stream of lengths. It is possible (and advantageous) for 573289965Sngie a repeat code (16, 17, or 18) to go across the boundary between 574289965Sngie the two sets of lengths. 575289965Sngie */ 576289965Sngie 577289965Sngie 578289965Sngie#define PKZIP_BUG_WORKAROUND /* PKZIP 1.93a problem--live with it */ 579289965Sngie 580289965Sngie/* 581289965Sngie inflate.h must supply the uch slide[WSIZE] array and the NEXTBYTE, 582289965Sngie FLUSH() and memzero macros. If the window size is not 32K, it 583289965Sngie should also define WSIZE. If INFMOD is defined, it can include 584289965Sngie compiled functions to support the NEXTBYTE and/or FLUSH() macros. 585289965Sngie There are defaults for NEXTBYTE and FLUSH() below for use as 586289965Sngie examples of what those functions need to do. Normally, you would 587289965Sngie also want FLUSH() to compute a crc on the data. inflate.h also 588289965Sngie needs to provide these typedefs: 589289965Sngie 590289965Sngie typedef unsigned char uch; 591289965Sngie typedef unsigned short ush; 592289965Sngie typedef unsigned long ulg; 593289965Sngie 594289965Sngie This module uses the external functions malloc() and free() (and 595289965Sngie probably memset() or bzero() in the memzero() macro). Their 596289965Sngie prototypes are normally found in <string.h> and <stdlib.h>. 597289965Sngie */ 598289965Sngie#define INFMOD /* tell inflate.h to include code to be compiled */ 599289965Sngie 600289965Sngie/* Huffman code lookup table entry--this entry is four bytes for machines 601289965Sngie that have 16-bit pointers (e.g. PC's in the small or medium model). 602289965Sngie Valid extra bits are 0..13. e == 15 is EOB (end of block), e == 16 603289965Sngie means that v is a literal, 16 < e < 32 means that v is a pointer to 604289965Sngie the next table, which codes e - 16 bits, and lastly e == 99 indicates 605289965Sngie an unused code. If a code with e == 99 is looked up, this implies an 606289965Sngie error in the data. */ 607289965Sngiestruct huft { 608289965Sngie uch e; /* number of extra bits or operation */ 609289965Sngie uch b; /* number of bits in this code or subcode */ 610289965Sngie union { 611289965Sngie ush n; /* literal, length base, or distance base */ 612289965Sngie struct huft *t; /* pointer to next level of table */ 613289965Sngie } v; 614289965Sngie}; 615289965Sngie 616289965Sngie 617289965Sngie/* Function prototypes */ 618289965Sngie#ifndef OF 619289965Sngie# ifdef __STDC__ 620289965Sngie# define OF(a) a 621289965Sngie# else /* !__STDC__ */ 622289965Sngie# define OF(a) () 623289965Sngie# endif /* ?__STDC__ */ 624289965Sngie#endif 625289965Sngieint huft_build OF((struct gzip *,unsigned *, unsigned, unsigned, ush *, ush *, 626289965Sngie struct huft **, int *)); 627289965Sngieint huft_free OF((struct gzip *,struct huft *)); 628289965Sngieint inflate_codes OF((struct gzip *,struct huft *, struct huft *, int, int)); 629289965Sngieint inflate_stored OF((struct gzip *)); 630289965Sngieint inflate_fixed OF((struct gzip *)); 631289965Sngieint inflate_dynamic OF((struct gzip *)); 632289965Sngieint inflate_block OF((struct gzip *,int *)); 633289965Sngieint inflate_free OF((struct gzip *)); 634288330Sngie 635288330Sngie 636289965Sngie/* The inflate algorithm uses a sliding 32K byte window on the uncompressed 637288330Sngie stream to find repeated byte strings. This is implemented here as a 638288330Sngie circular buffer. The index is updated simply by incrementing and then 639288330Sngie and'ing with 0x7fff (32K-1). */ 640288330Sngie/* It is left to other modules to supply the 32K area. It is assumed 641288330Sngie to be usable as if it were declared "uch slide[32768];" or as just 642288330Sngie "uch *slide;" and then malloc'ed in the latter case. The definition 643288330Sngie must be in unzip.h, included above. */ 644288330Sngie 645288330Sngie 646288330Sngie/* Tables for deflate from PKZIP's appnote.txt. */ 647288330Sngiestatic unsigned border[] = { /* Order of the bit length code lengths */ 648289965Sngie 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; 649289965Sngiestatic ush cplens[] = { /* Copy lengths for literal codes 257..285 */ 650289965Sngie 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 651289965Sngie 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; 652289965Sngie /* note: see note #13 above about the 258 in this list. */ 653289965Sngiestatic ush cplext[] = { /* Extra bits for literal codes 257..285 */ 654289965Sngie 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 655289965Sngie 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 99, 99}; /* 99==invalid */ 656289965Sngiestatic ush cpdist[] = { /* Copy offsets for distance codes 0..29 */ 657289965Sngie 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 658289965Sngie 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 659289965Sngie 8193, 12289, 16385, 24577}; 660289965Sngiestatic ush cpdext[] = { /* Extra bits for distance codes */ 661289965Sngie 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 662289965Sngie 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 663289965Sngie 12, 12, 13, 13}; 664289965Sngie 665289965Sngie/* And'ing with mask[n] masks the lower n bits */ 666289965Sngieush mask[] = { 667289965Sngie 0x0000, 668289965Sngie 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, 669289965Sngie 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff 670289965Sngie}; 671289965Sngie 672289965Sngie 673289965Sngie/* Macros for inflate() bit peeking and grabbing. 674289965Sngie The usage is: 675289965Sngie 676289965Sngie NEEDBITS(j) 677289965Sngie x = b & mask[j]; 678289965Sngie DUMPBITS(j) 679289965Sngie 680289965Sngie where NEEDBITS makes sure that b has at least j bits in it, and 681289965Sngie DUMPBITS removes the bits from b. The macros use the variable k 682289965Sngie for the number of bits in b. Normally, b and k are register 683289965Sngie variables for speed, and are initialized at the begining of a 684289965Sngie routine that uses these macros from a global bit buffer and count. 685289965Sngie 686289965Sngie In order to not ask for more bits than there are in the compressed 687289965Sngie stream, the Huffman tables are constructed to only ask for just 688289965Sngie enough bits to make up the end-of-block code (value 256). Then no 689289965Sngie bytes need to be "returned" to the buffer at the end of the last 690289965Sngie block. See the huft_build() routine. 691289965Sngie */ 692289965Sngie 693289965Sngieulg bb; /* bit buffer */ 694289965Sngieunsigned bk; /* bits in bit buffer */ 695289965Sngie 696289965Sngie#ifndef CHECK_EOF 697289965Sngie# define NEEDBITS(n) {while(k<(n)){b|=((ulg)NEXTBYTE)<<k;k+=8;}} 698289965Sngie#else 699289965Sngie# define NEEDBITS(n) {while(k<(n)){int c=NEXTBYTE;if(c==EOF)return 1;\ 700289965Sngie b|=((ulg)c)<<k;k+=8;}} 701289965Sngie#endif /* Piet Plomp: change "return 1" to "break" */ 702289965Sngie 703289965Sngie#define DUMPBITS(n) {b>>=(n);k-=(n);} 704289965Sngie 705311998Sasomers 706289965Sngie/* 707289965Sngie Huffman code decoding is performed using a multi-level table lookup. 708289965Sngie The fastest way to decode is to simply build a lookup table whose 709289965Sngie size is determined by the longest code. However, the time it takes 710289965Sngie to build this table can also be a factor if the data being decoded 711289965Sngie is not very long. The most common codes are necessarily the 712289965Sngie shortest codes, so those codes dominate the decoding time, and hence 713289965Sngie the speed. The idea is you can have a shorter table that decodes the 714289965Sngie shorter, more probable codes, and then point to subsidiary tables for 715289965Sngie the longer codes. The time it costs to decode the longer codes is 716289965Sngie then traded against the time it takes to make longer tables. 717289965Sngie 718289965Sngie This results of this trade are in the variables lbits and dbits 719289965Sngie below. lbits is the number of bits the first level table for literal/ 720289965Sngie length codes can decode in one step, and dbits is the same thing for 721289965Sngie the distance codes. Subsequent tables are also less than or equal to 722289965Sngie those sizes. These values may be adjusted either when all of the 723289965Sngie codes are shorter than that, in which case the longest code length in 724289965Sngie bits is used, or when the shortest code is *longer* than the requested 725289965Sngie table size, in which case the length of the shortest code in bits is 726289965Sngie used. 727289965Sngie 728289965Sngie There are two different values for the two tables, since they code a 729289965Sngie different number of possibilities each. The literal/length table 730289965Sngie codes 286 possible values, or in a flat code, a little over eight 731289965Sngie bits. The distance table codes 30 possible values, or a little less 732289965Sngie than five bits, flat. The optimum values for speed end up being 733289965Sngie about one bit more than those, so lbits is 8+1 and dbits is 5+1. 734289965Sngie The optimum values may differ though from machine to machine, and 735289965Sngie possibly even between compilers. Your mileage may vary. 736289965Sngie */ 737289965Sngie 738289965Sngie 739289965Sngieint lbits = 9; /* bits in base literal/length lookup table */ 740289965Sngieint dbits = 6; /* bits in base distance lookup table */ 741289965Sngie 742289965Sngie 743289965Sngie/* If BMAX needs to be larger than 16, then h and x[] should be ulg. */ 744289965Sngie#define BMAX 16 /* maximum bit length of any code (16 for explode) */ 745289965Sngie#define N_MAX 288 /* maximum number of codes in any set */ 746289965Sngie 747289965Sngie 748289965Sngieunsigned hufts; /* track memory usage */ 749289965Sngie 750289965Sngie 751289965Sngieint huft_build(gz,b, n, s, d, e, t, m) 752289965Sngiestruct gzip *gz; 753289965Sngieunsigned *b; /* code lengths in bits (all assumed <= BMAX) */ 754289965Sngieunsigned n; /* number of codes (assumed <= N_MAX) */ 755289965Sngieunsigned s; /* number of simple-valued codes (0..s-1) */ 756289965Sngieush *d; /* list of base values for non-simple codes */ 757289965Sngieush *e; /* list of extra bits for non-simple codes */ 758289965Sngiestruct huft **t; /* result: starting table */ 759289965Sngieint *m; /* maximum lookup bits, returns actual */ 760289965Sngie/* Given a list of code lengths and a maximum table size, make a set of 761289965Sngie tables to decode that set of codes. Return zero on success, one if 762289965Sngie the given code set is incomplete (the tables are still built in this 763289965Sngie case), two if the input is invalid (all zero length codes or an 764289965Sngie oversubscribed set of lengths), and three if not enough memory. 765289965Sngie The code with value 256 is special, and the tables are constructed 766289965Sngie so that no bits beyond that code are fetched when that code is 767289965Sngie decoded. */ 768289965Sngie{ 769289965Sngie unsigned a; /* counter for codes of length k */ 770289965Sngie unsigned c[BMAX+1]; /* bit length count table */ 771289965Sngie unsigned el; /* length of EOB code (value 256) */ 772289965Sngie unsigned f; /* i repeats in table every f entries */ 773289965Sngie int g; /* maximum code length */ 774289965Sngie int h; /* table level */ 775289965Sngie register unsigned i; /* counter, current code */ 776289965Sngie register unsigned j; /* counter */ 777289965Sngie register int k; /* number of bits in current code */ 778289965Sngie int lx[BMAX+1]; /* memory for l[-1..BMAX-1] */ 779289965Sngie int *l = lx+1; /* stack of bits per table */ 780289965Sngie register unsigned *p; /* pointer into c[], b[], or v[] */ 781289965Sngie register struct huft *q; /* points to current table */ 782289965Sngie struct huft r; /* table entry for structure assignment */ 783289965Sngie struct huft *u[BMAX]; /* table stack */ 784289965Sngie static unsigned v[N_MAX]; /* values in order of bit length */ 785289965Sngie register int w; /* bits before this table == (l * h) */ 786289965Sngie unsigned x[BMAX+1]; /* bit offsets, then code stack */ 787289965Sngie unsigned *xp; /* pointer into x */ 788289965Sngie int y; /* number of dummy codes added */ 789289965Sngie unsigned z; /* number of entries in current table */ 790289965Sngie 791289965Sngie 792289965Sngie /* Generate counts for each bit length */ 793289965Sngie el = n > 256 ? b[256] : BMAX; /* set length of EOB code, if any */ 794289965Sngie memzero((char *)c, sizeof(c)); 795289965Sngie p = b; i = n; 796289965Sngie do { 797289965Sngie c[*p]++; p++; /* assume all entries <= BMAX */ 798289965Sngie } while (--i); 799289965Sngie if (c[0] == n) /* null input--all zero length codes */ 800289965Sngie { 801289965Sngie *t = (struct huft *)NULL; 802289965Sngie *m = 0; 803289965Sngie return 0; 804289965Sngie } 805289965Sngie 806289965Sngie 807289965Sngie /* Find minimum and maximum length, bound *m by those */ 808289965Sngie for (j = 1; j <= BMAX; j++) 809289965Sngie if (c[j]) 810289965Sngie break; 811289965Sngie k = j; /* minimum code length */ 812289965Sngie if ((unsigned)*m < j) 813289965Sngie *m = j; 814289965Sngie for (i = BMAX; i; i--) 815289965Sngie if (c[i]) 816289965Sngie break; 817289965Sngie g = i; /* maximum code length */ 818289965Sngie if ((unsigned)*m > i) 819289965Sngie *m = i; 820289965Sngie 821289965Sngie 822289965Sngie /* Adjust last length count to fill out codes, if needed */ 823289965Sngie for (y = 1 << j; j < i; j++, y <<= 1) 824289965Sngie if ((y -= c[j]) < 0) 825289965Sngie return 2; /* bad input: more codes than bits */ 826289965Sngie if ((y -= c[i]) < 0) 827289965Sngie return 2; 828289965Sngie c[i] += y; 829289965Sngie 830289965Sngie 831289965Sngie /* Generate starting offsets into the value table for each length */ 832289965Sngie x[1] = j = 0; 833289965Sngie p = c + 1; xp = x + 2; 834289965Sngie while (--i) { /* note that i == g from above */ 835289965Sngie *xp++ = (j += *p++); 836289965Sngie } 837289965Sngie 838289965Sngie 839289965Sngie /* Make a table of values in order of bit lengths */ 840289965Sngie p = b; i = 0; 841289965Sngie do { 842288330Sngie if ((j = *p++) != 0) 843288330Sngie v[x[j]++] = i; 844289965Sngie } while (++i < n); 845288330Sngie 846288330Sngie 847288330Sngie /* Generate the Huffman codes and for each, make the table entries */ 848288330Sngie x[0] = i = 0; /* first Huffman code is zero */ 849288330Sngie p = v; /* grab values in bit order */ 850288330Sngie h = -1; /* no tables yet--level -1 */ 851288330Sngie w = l[-1] = 0; /* no bits decoded yet */ 852288330Sngie u[0] = (struct huft *)NULL; /* just to keep compilers happy */ 853288330Sngie q = (struct huft *)NULL; /* ditto */ 854288330Sngie z = 0; /* ditto */ 855288330Sngie 856288330Sngie /* go through the bit lengths (k already is bits in shortest code) */ 857288330Sngie for (; k <= g; k++) 858288330Sngie { 859288330Sngie a = c[k]; 860288330Sngie while (a--) 861289965Sngie { 862288330Sngie /* here i is the Huffman code of length k bits for value *p */ 863288330Sngie /* make tables up to required level */ 864289965Sngie while (k > w + l[h]) 865289965Sngie { 866289965Sngie w += l[h++]; /* add bits already decoded */ 867289965Sngie 868289965Sngie /* compute minimum size table less than or equal to *m bits */ 869289965Sngie z = (z = g - w) > (unsigned)*m ? *m : z; /* upper limit */ 870289965Sngie if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */ 871289965Sngie { /* too few codes for k-w bit table */ 872289965Sngie f -= a + 1; /* deduct codes from patterns left */ 873289965Sngie xp = c + k; 874289965Sngie while (++j < z) /* try smaller tables up to z bits */ 875289965Sngie { 876289965Sngie if ((f <<= 1) <= *++xp) 877289965Sngie break; /* enough codes to use up j bits */ 878289965Sngie f -= *xp; /* else deduct codes from patterns */ 879289965Sngie } 880289965Sngie } 881289965Sngie if ((unsigned)w + j > el && (unsigned)w < el) 882289965Sngie j = el - w; /* make EOB code end at table */ 883289965Sngie z = 1 << j; /* table entries for j-bit table */ 884289965Sngie l[h] = j; /* set table size in stack */ 885289965Sngie 886289965Sngie /* allocate and link in new table */ 887289965Sngie if ((q = (struct huft *)malloc((z + 1)*sizeof(struct huft))) == 888289965Sngie (struct huft *)NULL) 889288330Sngie { 890288330Sngie if (h) 891288330Sngie huft_free(gz,u[0]); 892288330Sngie return 3; /* not enough memory */ 893288330Sngie } 894288330Sngie hufts += z + 1; /* track memory usage */ 895288330Sngie *t = q + 1; /* link to list for huft_free() */ 896288330Sngie *(t = &(q->v.t)) = (struct huft *)NULL; 897288330Sngie u[h] = ++q; /* table starts after link */ 898288330Sngie 899288330Sngie /* connect to last table, if there is one */ 900288330Sngie if (h) 901288330Sngie { 902288330Sngie x[h] = i; /* save pattern for backing up */ 903288330Sngie r.b = (uch)l[h-1]; /* bits to dump before this table */ 904288330Sngie r.e = (uch)(16 + j); /* bits in this table */ 905288330Sngie r.v.t = q; /* pointer to this table */ 906288330Sngie j = (i & ((1 << w) - 1)) >> (w - l[h-1]); 907288330Sngie u[h-1][j] = r; /* connect to last table */ 908288330Sngie } 909288330Sngie } 910288330Sngie 911288330Sngie /* set up table entry in r */ 912288330Sngie r.b = (uch)(k - w); 913284388Sngie if (p >= v + n) 914284388Sngie r.e = 99; /* out of values--invalid code */ 915289965Sngie else if (*p < s) 916284388Sngie { 917284388Sngie r.e = (uch)(*p < 256 ? 16 : 15); /* 256 is end-of-block code */ 918284388Sngie r.v.n = *p++; /* simple code is just the value */ 919288330Sngie } 920288330Sngie else 921289965Sngie { 922289965Sngie r.e = (uch)e[*p - s]; /* non-simple--look up in lists */ 923288330Sngie r.v.n = d[*p++ - s]; 924289965Sngie } 925288330Sngie 926288330Sngie /* fill code-like entries with r */ 927289965Sngie f = 1 << (k - w); 928288330Sngie for (j = i >> w; j < z; j += f) 929289965Sngie q[j] = r; 930289965Sngie 931289965Sngie /* backwards increment the k-bit code i */ 932288330Sngie for (j = 1 << (k - 1); i & j; j >>= 1) 933288330Sngie i ^= j; 934288330Sngie i ^= j; 935288330Sngie 936289965Sngie /* backup over finished tables */ 937288330Sngie while ((i & ((1 << w) - 1)) != x[h]) 938289965Sngie w -= l[--h]; /* don't need to update q */ 939289965Sngie } 940289965Sngie } 941289965Sngie 942289965Sngie 943289965Sngie /* return actual size of base table */ 944289965Sngie *m = l[0]; 945288330Sngie 946289965Sngie 947289965Sngie /* Return true (1) if we were given an incomplete table */ 948289965Sngie return y != 0 && g != 1; 949289965Sngie} 950289965Sngie 951289965Sngie 952289965Sngie 953289965Sngieint huft_free(gz,t) 954289965Sngiestruct gzip *gz; 955288330Sngiestruct huft *t; /* table to free */ 956289965Sngie/* Free the malloc'ed tables built by huft_build(), which makes a linked 957288330Sngie list of the tables it made, with the links in a dummy first entry of 958284388Sngie each table. */ 959{ 960 register struct huft *p, *q; 961 962 963 /* Go through linked list, freeing from the malloced (t[-1]) address. */ 964 p = t; 965 while (p != (struct huft *)NULL) 966 { 967 q = (--p)->v.t; 968 free(p); 969 p = q; 970 } 971 return 0; 972} 973 974 975 976#ifdef ASM_INFLATECODES 977# define inflate_codes(tl,td,bl,bd) flate_codes(tl,td,bl,bd,(uch *)slide) 978 int flate_codes OF((struct huft *, struct huft *, int, int, uch *)); 979 980#else 981 982int inflate_codes(gz,tl, td, bl, bd) 983struct gzip *gz; 984struct huft *tl, *td; /* literal/length and distance decoder tables */ 985int bl, bd; /* number of bits decoded by tl[] and td[] */ 986/* inflate (decompress) the codes in a deflated (compressed) block. 987 Return an error code or zero if it all goes ok. */ 988{ 989 register unsigned e; /* table entry flag/number of extra bits */ 990 unsigned n, d; /* length and index for copy */ 991 unsigned w; /* current window position */ 992 struct huft *t; /* pointer to table entry */ 993 unsigned ml, md; /* masks for bl and bd bits */ 994 register ulg b; /* bit buffer */ 995 register unsigned k; /* number of bits in bit buffer */ 996 997 998 /* make local copies of globals */ 999 b = bb; /* initialize bit buffer */ 1000 k = bk; 1001 w = wp; /* initialize window position */ 1002 1003 1004 /* inflate the coded data */ 1005 ml = mask[bl]; /* precompute masks for speed */ 1006 md = mask[bd]; 1007 while (1) /* do until end of block */ 1008 { 1009 NEEDBITS((unsigned)bl) 1010 if ((e = (t = tl + ((unsigned)b & ml))->e) > 16) 1011 do { 1012 if (e == 99) 1013 return 1; 1014 DUMPBITS(t->b) 1015 e -= 16; 1016 NEEDBITS(e) 1017 } while ((e = (t = t->v.t + ((unsigned)b & mask[e]))->e) > 16); 1018 DUMPBITS(t->b) 1019 if (e == 16) /* then it's a literal */ 1020 { 1021 slide[w++] = (uch)t->v.n; 1022 if (w == WSIZE) 1023 { 1024 FLUSH(gz,w); 1025 w = 0; 1026 } 1027 } 1028 else /* it's an EOB or a length */ 1029 { 1030 /* exit if end of block */ 1031 if (e == 15) 1032 break; 1033 1034 /* get length of block to copy */ 1035 NEEDBITS(e) 1036 n = t->v.n + ((unsigned)b & mask[e]); 1037 DUMPBITS(e); 1038 1039 /* decode distance of block to copy */ 1040 NEEDBITS((unsigned)bd) 1041 if ((e = (t = td + ((unsigned)b & md))->e) > 16) 1042 do { 1043 if (e == 99) 1044 return 1; 1045 DUMPBITS(t->b) 1046 e -= 16; 1047 NEEDBITS(e) 1048 } while ((e = (t = t->v.t + ((unsigned)b & mask[e]))->e) > 16); 1049 DUMPBITS(t->b) 1050 NEEDBITS(e) 1051 d = w - t->v.n - ((unsigned)b & mask[e]); 1052 DUMPBITS(e) 1053 1054 /* do the copy */ 1055 do { 1056 n -= (e = (e = WSIZE - ((d &= WSIZE-1) > w ? d : w)) > n ? n : e); 1057#ifndef NOMEMCPY 1058 if (w - d >= e) /* (this test assumes unsigned comparison) */ 1059 { 1060 memcpy(slide + w, slide + d, e); 1061 w += e; 1062 d += e; 1063 } 1064 else /* do it slow to avoid memcpy() overlap */ 1065#endif /* !NOMEMCPY */ 1066 do { 1067 slide[w++] = slide[d++]; 1068 } while (--e); 1069 if (w == WSIZE) 1070 { 1071 FLUSH(gz,w); 1072 w = 0; 1073 } 1074 } while (n); 1075 } 1076 } 1077 1078 1079 /* restore the globals from the locals */ 1080 wp = w; /* restore global window pointer */ 1081 bb = b; /* restore global bit buffer */ 1082 bk = k; 1083 1084 1085 /* done */ 1086 return 0; 1087} 1088 1089#endif /* ASM_INFLATECODES */ 1090 1091 1092 1093int inflate_stored(gz) 1094struct gzip *gz; 1095/* "decompress" an inflated type 0 (stored) block. */ 1096{ 1097 unsigned n; /* number of bytes in block */ 1098 unsigned w; /* current window position */ 1099 register ulg b; /* bit buffer */ 1100 register unsigned k; /* number of bits in bit buffer */ 1101 1102 1103 /* make local copies of globals */ 1104 Trace((stderr, "\nstored block")); 1105 b = bb; /* initialize bit buffer */ 1106 k = bk; 1107 w = wp; /* initialize window position */ 1108 1109 1110 /* go to byte boundary */ 1111 n = k & 7; 1112 DUMPBITS(n); 1113 1114 1115 /* get the length and its complement */ 1116 NEEDBITS(16) 1117 n = ((unsigned)b & 0xffff); 1118 DUMPBITS(16) 1119 NEEDBITS(16) 1120 if (n != (unsigned)((~b) & 0xffff)) 1121 return 1; /* error in compressed data */ 1122 DUMPBITS(16) 1123 1124 1125 /* read and output the compressed data */ 1126 while (n--) 1127 { 1128 NEEDBITS(8) 1129 slide[w++] = (uch)b; 1130 if (w == WSIZE) 1131 { 1132 FLUSH(gz,w); 1133 w = 0; 1134 } 1135 DUMPBITS(8) 1136 } 1137 1138 1139 /* restore the globals from the locals */ 1140 wp = w; /* restore global window pointer */ 1141 bb = b; /* restore global bit buffer */ 1142 bk = k; 1143 return 0; 1144} 1145 1146 1147/* Globals for literal tables (built once) */ 1148struct huft *fixed_tl = (struct huft *)NULL; 1149struct huft *fixed_td; 1150int fixed_bl, fixed_bd; 1151 1152int inflate_fixed(gz) 1153struct gzip *gz; 1154/* decompress an inflated type 1 (fixed Huffman codes) block. We should 1155 either replace this with a custom decoder, or at least precompute the 1156 Huffman tables. */ 1157{ 1158 /* if first time, set up tables for fixed blocks */ 1159 Trace((stderr, "\nliteral block")); 1160 if (fixed_tl == (struct huft *)NULL) 1161 { 1162 int i; /* temporary variable */ 1163 static unsigned l[288]; /* length list for huft_build */ 1164 1165 /* literal table */ 1166 for (i = 0; i < 144; i++) 1167 l[i] = 8; 1168 for (; i < 256; i++) 1169 l[i] = 9; 1170 for (; i < 280; i++) 1171 l[i] = 7; 1172 for (; i < 288; i++) /* make a complete, but wrong code set */ 1173 l[i] = 8; 1174 fixed_bl = 7; 1175 if ((i = huft_build(gz,l, 288, 257, cplens, cplext, 1176 &fixed_tl, &fixed_bl)) != 0) 1177 { 1178 fixed_tl = (struct huft *)NULL; 1179 return i; 1180 } 1181 1182 /* distance table */ 1183 for (i = 0; i < 30; i++) /* make an incomplete code set */ 1184 l[i] = 5; 1185 fixed_bd = 5; 1186 if ((i = huft_build(gz,l, 30, 0, cpdist, cpdext, &fixed_td, &fixed_bd)) > 1) 1187 { 1188 huft_free(gz,fixed_tl); 1189 fixed_tl = (struct huft *)NULL; 1190 return i; 1191 } 1192 } 1193 1194 1195 /* decompress until an end-of-block code */ 1196 return inflate_codes(gz,fixed_tl, fixed_td, fixed_bl, fixed_bd) != 0; 1197} 1198 1199 1200 1201int inflate_dynamic(gz) 1202struct gzip *gz; 1203/* decompress an inflated type 2 (dynamic Huffman codes) block. */ 1204{ 1205 int i; /* temporary variables */ 1206 unsigned j; 1207 unsigned l; /* last length */ 1208 unsigned m; /* mask for bit lengths table */ 1209 unsigned n; /* number of lengths to get */ 1210 struct huft *tl; /* literal/length code table */ 1211 struct huft *td; /* distance code table */ 1212 int bl; /* lookup bits for tl */ 1213 int bd; /* lookup bits for td */ 1214 unsigned nb; /* number of bit length codes */ 1215 unsigned nl; /* number of literal/length codes */ 1216 unsigned nd; /* number of distance codes */ 1217#ifdef PKZIP_BUG_WORKAROUND 1218 static unsigned ll[288+32]; /* literal/length and distance code lengths */ 1219#else 1220 static unsigned ll[286+30]; /* literal/length and distance code lengths */ 1221#endif 1222 register ulg b; /* bit buffer */ 1223 register unsigned k; /* number of bits in bit buffer */ 1224 1225 1226 /* make local bit buffer */ 1227 Trace((stderr, "\ndynamic block")); 1228 b = bb; 1229 k = bk; 1230 1231 1232 /* read in table lengths */ 1233 NEEDBITS(5) 1234 nl = 257 + ((unsigned)b & 0x1f); /* number of literal/length codes */ 1235 DUMPBITS(5) 1236 NEEDBITS(5) 1237 nd = 1 + ((unsigned)b & 0x1f); /* number of distance codes */ 1238 DUMPBITS(5) 1239 NEEDBITS(4) 1240 nb = 4 + ((unsigned)b & 0xf); /* number of bit length codes */ 1241 DUMPBITS(4) 1242#ifdef PKZIP_BUG_WORKAROUND 1243 if (nl > 288 || nd > 32) 1244#else 1245 if (nl > 286 || nd > 30) 1246#endif 1247 return 1; /* bad lengths */ 1248 1249 1250 /* read in bit-length-code lengths */ 1251 for (j = 0; j < nb; j++) 1252 { 1253 NEEDBITS(3) 1254 ll[border[j]] = (unsigned)b & 7; 1255 DUMPBITS(3) 1256 } 1257 for (; j < 19; j++) 1258 ll[border[j]] = 0; 1259 1260 1261 /* build decoding table for trees--single level, 7 bit lookup */ 1262 bl = 7; 1263 if ((i = huft_build(gz,ll, 19, 19, NULL, NULL, &tl, &bl)) != 0) 1264 { 1265 if (i == 1) 1266 huft_free(gz,tl); 1267 return i; /* incomplete code set */ 1268 } 1269 1270 1271 /* read in literal and distance code lengths */ 1272 n = nl + nd; 1273 m = mask[bl]; 1274 i = l = 0; 1275 while ((unsigned)i < n) 1276 { 1277 NEEDBITS((unsigned)bl) 1278 j = (td = tl + ((unsigned)b & m))->b; 1279 DUMPBITS(j) 1280 j = td->v.n; 1281 if (j < 16) /* length of code in bits (0..15) */ 1282 ll[i++] = l = j; /* save last length in l */ 1283 else if (j == 16) /* repeat last length 3 to 6 times */ 1284 { 1285 NEEDBITS(2) 1286 j = 3 + ((unsigned)b & 3); 1287 DUMPBITS(2) 1288 if ((unsigned)i + j > n) 1289 return 1; 1290 while (j--) 1291 ll[i++] = l; 1292 } 1293 else if (j == 17) /* 3 to 10 zero length codes */ 1294 { 1295 NEEDBITS(3) 1296 j = 3 + ((unsigned)b & 7); 1297 DUMPBITS(3) 1298 if ((unsigned)i + j > n) 1299 return 1; 1300 while (j--) 1301 ll[i++] = 0; 1302 l = 0; 1303 } 1304 else /* j == 18: 11 to 138 zero length codes */ 1305 { 1306 NEEDBITS(7) 1307 j = 11 + ((unsigned)b & 0x7f); 1308 DUMPBITS(7) 1309 if ((unsigned)i + j > n) 1310 return 1; 1311 while (j--) 1312 ll[i++] = 0; 1313 l = 0; 1314 } 1315 } 1316 1317 1318 /* free decoding table for trees */ 1319 huft_free(gz,tl); 1320 1321 1322 /* restore the global bit buffer */ 1323 bb = b; 1324 bk = k; 1325 1326 1327 /* build the decoding tables for literal/length and distance codes */ 1328 bl = lbits; 1329 if ((i = huft_build(gz,ll, nl, 257, cplens, cplext, &tl, &bl)) != 0) 1330 { 1331 if (i == 1 && !qflag) { 1332 FPRINTF( "(incomplete l-tree) "); 1333 huft_free(gz,tl); 1334 } 1335 return i; /* incomplete code set */ 1336 } 1337 bd = dbits; 1338 if ((i = huft_build(gz,ll + nl, nd, 0, cpdist, cpdext, &td, &bd)) != 0) 1339 { 1340 if (i == 1 && !qflag) { 1341 FPRINTF( "(incomplete d-tree) "); 1342#ifdef PKZIP_BUG_WORKAROUND 1343 i = 0; 1344 } 1345#else 1346 huft_free(gz,td); 1347 } 1348 huft_free(gz,tl); 1349 return i; /* incomplete code set */ 1350#endif 1351 } 1352 1353 1354 /* decompress until an end-of-block code */ 1355 if (inflate_codes(gz,tl, td, bl, bd)) 1356 return 1; 1357 1358 1359 /* free the decoding tables, return */ 1360 huft_free(gz,tl); 1361 huft_free(gz,td); 1362 return 0; 1363} 1364 1365 1366 1367int inflate_block(gz,e) 1368struct gzip *gz; 1369int *e; /* last block flag */ 1370/* decompress an inflated block */ 1371{ 1372 unsigned t; /* block type */ 1373 register ulg b; /* bit buffer */ 1374 register unsigned k; /* number of bits in bit buffer */ 1375 1376 1377 /* make local bit buffer */ 1378 b = bb; 1379 k = bk; 1380 1381 1382 /* read in last block bit */ 1383 NEEDBITS(1) 1384 *e = (int)b & 1; 1385 DUMPBITS(1) 1386 1387 1388 /* read in block type */ 1389 NEEDBITS(2) 1390 t = (unsigned)b & 3; 1391 DUMPBITS(2) 1392 1393 1394 /* restore the global bit buffer */ 1395 bb = b; 1396 bk = k; 1397 1398 1399 /* inflate that block type */ 1400 if (t == 2) 1401 return inflate_dynamic(gz); 1402 if (t == 0) 1403 return inflate_stored(gz); 1404 if (t == 1) 1405 return inflate_fixed(gz); 1406 1407 1408 /* bad block type */ 1409 return 2; 1410} 1411 1412 1413 1414int inflate(gz) 1415struct gzip *gz; 1416/* decompress an inflated entry */ 1417{ 1418 int e; /* last block flag */ 1419 int r; /* result code */ 1420 unsigned h; /* maximum struct huft's malloc'ed */ 1421 1422 1423 /* initialize window, bit buffer */ 1424 wp = 0; 1425 bk = 0; 1426 bb = 0; 1427 1428 1429 /* decompress until the last block */ 1430 h = 0; 1431 do { 1432 hufts = 0; 1433 if ((r = inflate_block(gz,&e)) != 0) 1434 return r; 1435 if (hufts > h) 1436 h = hufts; 1437 } while (!e); 1438 1439 1440 /* flush out slide */ 1441 FLUSH(gz,wp); 1442 1443 1444 /* return success */ 1445 Trace((stderr, "\n%u bytes in Huffman tables (%d/entry)\n", 1446 h * sizeof(struct huft), sizeof(struct huft))); 1447 return 0; 1448} 1449 1450 1451 1452int inflate_free(gz) 1453struct gzip *gz; 1454{ 1455 if (fixed_tl != (struct huft *)NULL) 1456 { 1457 huft_free(gz,fixed_td); 1458 huft_free(gz,fixed_tl); 1459 fixed_td = fixed_tl = (struct huft *)NULL; 1460 } 1461 return 0; 1462} 1463