128328Ssos/* $NetBSD: zuncompress.c,v 1.11 2011/08/16 13:55:02 joerg Exp $ */ 228328Ssos 328328Ssos/*- 428328Ssos * Copyright (c) 1985, 1986, 1992, 1993 528328Ssos * The Regents of the University of California. All rights reserved. 628328Ssos * 728328Ssos * This code is derived from software contributed to Berkeley by 828328Ssos * Diomidis Spinellis and James A. Woods, derived from original 928328Ssos * work by Spencer Thomas and Joseph Orost. 1028328Ssos * 1128328Ssos * Redistribution and use in source and binary forms, with or without 1228328Ssos * modification, are permitted provided that the following conditions 1328328Ssos * are met: 1428328Ssos * 1. Redistributions of source code must retain the above copyright 1528328Ssos * notice, this list of conditions and the following disclaimer. 1628328Ssos * 2. Redistributions in binary form must reproduce the above copyright 1728328Ssos * notice, this list of conditions and the following disclaimer in the 1828328Ssos * documentation and/or other materials provided with the distribution. 1928328Ssos * 3. Neither the name of the University nor the names of its contributors 2028328Ssos * may be used to endorse or promote products derived from this software 2128328Ssos * without specific prior written permission. 2228328Ssos * 2328328Ssos * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2428328Ssos * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2528328Ssos * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2628328Ssos * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2728328Ssos * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2850476Speter * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2928328Ssos * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3028328Ssos * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3128328Ssos * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3266834Sphk * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3328328Ssos * SUCH DAMAGE. 3428328Ssos * 3528328Ssos * from: NetBSD: zopen.c,v 1.8 2003/08/07 11:13:29 agc Exp 3628328Ssos * $FreeBSD$ 3728328Ssos */ 3828328Ssos 3928328Ssos/* This file is #included by gzip.c */ 4028328Ssos 4153013Syokotastatic int zread(void *, char *, int); 4253013Syokota 4328328Ssos#define tab_prefixof(i) (zs->zs_codetab[i]) 4428328Ssos#define tab_suffixof(i) ((char_type *)(zs->zs_htab))[i] 4528328Ssos#define de_stack ((char_type *)&tab_suffixof(1 << BITS)) 4628328Ssos 4753013Syokota#define BITS 16 /* Default bits. */ 4853013Syokota#define HSIZE 69001 /* 95% occupancy */ /* XXX may not need HSIZE */ 4928328Ssos#define BIT_MASK 0x1f /* Defines for third byte of header. */ 5053013Syokota#define BLOCK_MASK 0x80 5128328Ssos#define CHECK_GAP 10000 /* Ratio check interval. */ 5228328Ssos#define BUFSIZE (64 * 1024) 5328328Ssos 5428328Ssos/* 5553013Syokota * Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is 5628328Ssos * a fourth header byte (for expansion). 5753013Syokota */ 5853013Syokota#define INIT_BITS 9 /* Initial number of bits/code. */ 5953013Syokota 6028328Ssos/* 6128328Ssos * the next two codes should not be changed lightly, as they must not 6228328Ssos * lie within the contiguous general code space. 6353013Syokota */ 6428328Ssos#define FIRST 257 /* First free entry. */ 6553013Syokota#define CLEAR 256 /* Table clear output code. */ 6653013Syokota 6753013Syokota 6828328Ssos#define MAXCODE(n_bits) ((1 << (n_bits)) - 1) 6953013Syokota 7053013Syokotatypedef long code_int; 7153013Syokotatypedef long count_int; 7253013Syokotatypedef u_char char_type; 7353013Syokota 7453013Syokotastatic char_type magic_header[] = 7553013Syokota {'\037', '\235'}; /* 1F 9D */ 7628328Ssos 7728328Ssosstatic char_type rmask[9] = 7828328Ssos {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; 7928328Ssos 8028328Ssosstatic off_t total_compressed_bytes; 8128328Ssosstatic size_t compressed_prelen; 8228328Ssosstatic char *compressed_pre; 8328328Ssos 8428328Ssosstruct s_zstate { 8553013Syokota FILE *zs_fp; /* File stream for I/O */ 8653013Syokota char zs_mode; /* r or w */ 8753013Syokota enum { 8853013Syokota S_START, S_MIDDLE, S_EOF 8953013Syokota } zs_state; /* State of computation */ 9053013Syokota int zs_n_bits; /* Number of bits/code. */ 9153013Syokota int zs_maxbits; /* User settable max # bits/code. */ 9228328Ssos code_int zs_maxcode; /* Maximum code, given n_bits. */ 9353013Syokota code_int zs_maxmaxcode; /* Should NEVER generate this code. */ 9453013Syokota count_int zs_htab [HSIZE]; 9528328Ssos u_short zs_codetab [HSIZE]; 9628328Ssos code_int zs_hsize; /* For dynamic table sizing. */ 9728328Ssos code_int zs_free_ent; /* First unused entry. */ 9853013Syokota /* 9953013Syokota * Block compression parameters -- after all codes are used up, 10053013Syokota * and compression rate changes, start over. 10128328Ssos */ 10228328Ssos int zs_block_compress; 10353013Syokota int zs_clear_flg; 10453013Syokota long zs_ratio; 10553013Syokota count_int zs_checkpoint; 10653013Syokota int zs_offset; 10728328Ssos long zs_in_count; /* Length of input. */ 10853013Syokota long zs_bytes_out; /* Length of compressed output. */ 10953013Syokota long zs_out_count; /* # of codes output (for debugging). */ 11053013Syokota char_type zs_buf[BITS]; 11153013Syokota union { 11253013Syokota struct { 11353013Syokota long zs_fcode; 11453013Syokota code_int zs_ent; 11553013Syokota code_int zs_hsize_reg; 11653013Syokota int zs_hshift; 11753013Syokota } w; /* Write parameters */ 11853013Syokota struct { 11953013Syokota char_type *zs_stackp; 12053013Syokota int zs_finchar; 12128328Ssos code_int zs_code, zs_oldcode, zs_incode; 12228328Ssos int zs_roffset, zs_size; 12328328Ssos char_type zs_gbuf[BITS]; 12428328Ssos } r; /* Read parameters */ 12528328Ssos } u; 12628328Ssos}; 12728328Ssos 12828328Ssosstatic code_int getcode(struct s_zstate *zs); 12928328Ssos 13028328Ssosstatic off_t 13128328Ssoszuncompress(FILE *in, FILE *out, char *pre, size_t prelen, 13228328Ssos off_t *compressed_bytes) 13328328Ssos{ 13428328Ssos off_t bin, bout = 0; 13528328Ssos char *buf; 13628328Ssos 13728328Ssos buf = malloc(BUFSIZE); 13828328Ssos if (buf == NULL) 13928328Ssos return -1; 14028328Ssos 14128328Ssos /* XXX */ 14228328Ssos compressed_prelen = prelen; 14328328Ssos if (prelen != 0) 14428328Ssos compressed_pre = pre; 14528328Ssos else 14628328Ssos compressed_pre = NULL; 14728328Ssos 14828328Ssos while ((bin = fread(buf, 1, BUFSIZE, in)) != 0) { 14928328Ssos if (tflag == 0 && (off_t)fwrite(buf, 1, bin, out) != bin) { 15028328Ssos free(buf); 15128328Ssos return -1; 15228328Ssos } 15328328Ssos bout += bin; 15428328Ssos } 15528328Ssos 15628328Ssos if (compressed_bytes) 15728328Ssos *compressed_bytes = total_compressed_bytes; 15828328Ssos 15928328Ssos free(buf); 16028328Ssos return bout; 16128328Ssos} 16228328Ssos 16328328Ssosstatic int 16428328Ssoszclose(void *zs) 16528328Ssos{ 16628328Ssos free(zs); 16728328Ssos /* We leave the caller to close the fd passed to zdopen() */ 16828328Ssos return 0; 16928328Ssos} 17028328Ssos 17128328SsosFILE * 17228328Ssoszdopen(int fd) 17328328Ssos{ 17428328Ssos struct s_zstate *zs; 17528328Ssos 17628328Ssos if ((zs = calloc(1, sizeof(struct s_zstate))) == NULL) 17728328Ssos return (NULL); 17828328Ssos 17928328Ssos zs->zs_state = S_START; 18028328Ssos 18128328Ssos /* XXX we can get rid of some of these */ 18228328Ssos zs->zs_hsize = HSIZE; /* For dynamic table sizing. */ 18328328Ssos zs->zs_free_ent = 0; /* First unused entry. */ 18428328Ssos zs->zs_block_compress = BLOCK_MASK; 18528328Ssos zs->zs_clear_flg = 0; /* XXX we calloc()'d this structure why = 0? */ 18628328Ssos zs->zs_ratio = 0; 18728328Ssos zs->zs_checkpoint = CHECK_GAP; 18828328Ssos zs->zs_in_count = 1; /* Length of input. */ 18928328Ssos zs->zs_out_count = 0; /* # of codes output (for debugging). */ 19028328Ssos zs->u.r.zs_roffset = 0; 19128328Ssos zs->u.r.zs_size = 0; 19228328Ssos 19328328Ssos /* 19428328Ssos * Layering compress on top of stdio in order to provide buffering, 19528328Ssos * and ensure that reads and write work with the data specified. 19628328Ssos */ 19728328Ssos if ((zs->zs_fp = fdopen(fd, "r")) == NULL) { 19828328Ssos free(zs); 19928328Ssos return NULL; 20028328Ssos } 20128328Ssos 20228328Ssos return funopen(zs, zread, NULL, NULL, zclose); 20328328Ssos} 20428328Ssos 20528328Ssos/* 20628328Ssos * Decompress read. This routine adapts to the codes in the file building 20728328Ssos * the "string" table on-the-fly; requiring no table to be stored in the 20828328Ssos * compressed file. The tables used herein are shared with those of the 20928328Ssos * compress() routine. See the definitions above. 21028328Ssos */ 21128328Ssosstatic int 21228328Ssoszread(void *cookie, char *rbp, int num) 21328328Ssos{ 21428328Ssos u_int count, i; 21528328Ssos struct s_zstate *zs; 21628328Ssos u_char *bp, header[3]; 21728328Ssos 21828328Ssos if (num == 0) 21928328Ssos return (0); 22028328Ssos 22128328Ssos zs = cookie; 22228328Ssos count = num; 22328328Ssos bp = (u_char *)rbp; 22428328Ssos switch (zs->zs_state) { 22528328Ssos case S_START: 22628328Ssos zs->zs_state = S_MIDDLE; 22728328Ssos break; 22828328Ssos case S_MIDDLE: 22928328Ssos goto middle; 23028328Ssos case S_EOF: 23128328Ssos goto eof; 23228328Ssos } 23328328Ssos 23428328Ssos /* Check the magic number */ 23528328Ssos for (i = 0; i < 3 && compressed_prelen; i++, compressed_prelen--) 23628328Ssos header[i] = *compressed_pre++; 23728328Ssos 23828328Ssos if (fread(header + i, 1, sizeof(header) - i, zs->zs_fp) != 23928328Ssos sizeof(header) - i || 24028328Ssos memcmp(header, magic_header, sizeof(magic_header)) != 0) { 24128328Ssos errno = EFTYPE; 24228328Ssos return (-1); 24328328Ssos } 24428328Ssos total_compressed_bytes = 0; 24528328Ssos zs->zs_maxbits = header[2]; /* Set -b from file. */ 24628328Ssos zs->zs_block_compress = zs->zs_maxbits & BLOCK_MASK; 24728328Ssos zs->zs_maxbits &= BIT_MASK; 24828328Ssos zs->zs_maxmaxcode = 1L << zs->zs_maxbits; 24928328Ssos if (zs->zs_maxbits > BITS || zs->zs_maxbits < 12) { 25028328Ssos errno = EFTYPE; 25128328Ssos return (-1); 25228328Ssos } 25328328Ssos /* As above, initialize the first 256 entries in the table. */ 25428328Ssos zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS); 25528328Ssos for (zs->u.r.zs_code = 255; zs->u.r.zs_code >= 0; zs->u.r.zs_code--) { 25653013Syokota tab_prefixof(zs->u.r.zs_code) = 0; 25753013Syokota tab_suffixof(zs->u.r.zs_code) = (char_type) zs->u.r.zs_code; 25853013Syokota } 25928328Ssos zs->zs_free_ent = zs->zs_block_compress ? FIRST : 256; 26028328Ssos 26128328Ssos zs->u.r.zs_oldcode = -1; 26228328Ssos zs->u.r.zs_stackp = de_stack; 26328328Ssos 26453013Syokota while ((zs->u.r.zs_code = getcode(zs)) > -1) { 26528328Ssos 26653013Syokota if ((zs->u.r.zs_code == CLEAR) && zs->zs_block_compress) { 26753013Syokota for (zs->u.r.zs_code = 255; zs->u.r.zs_code >= 0; 26853013Syokota zs->u.r.zs_code--) 26953013Syokota tab_prefixof(zs->u.r.zs_code) = 0; 27053013Syokota zs->zs_clear_flg = 1; 27153013Syokota zs->zs_free_ent = FIRST; 27253013Syokota zs->u.r.zs_oldcode = -1; 27353013Syokota continue; 27453013Syokota } 27553013Syokota zs->u.r.zs_incode = zs->u.r.zs_code; 27653013Syokota 27728328Ssos /* Special case for KwKwK string. */ 27828328Ssos if (zs->u.r.zs_code >= zs->zs_free_ent) { 27953013Syokota if (zs->u.r.zs_code > zs->zs_free_ent || 28028328Ssos zs->u.r.zs_oldcode == -1) { 28153013Syokota /* Bad stream. */ 28228328Ssos errno = EINVAL; 28328328Ssos return (-1); 28428328Ssos } 28553013Syokota *zs->u.r.zs_stackp++ = zs->u.r.zs_finchar; 28628328Ssos zs->u.r.zs_code = zs->u.r.zs_oldcode; 28753013Syokota } 28853013Syokota /* 28953013Syokota * The above condition ensures that code < free_ent. 29053013Syokota * The construction of tab_prefixof in turn guarantees that 29153013Syokota * each iteration decreases code and therefore stack usage is 29253013Syokota * bound by 1 << BITS - 256. 29353013Syokota */ 29453013Syokota 29553013Syokota /* Generate output characters in reverse order. */ 29653013Syokota while (zs->u.r.zs_code >= 256) { 29753013Syokota *zs->u.r.zs_stackp++ = tab_suffixof(zs->u.r.zs_code); 29853013Syokota zs->u.r.zs_code = tab_prefixof(zs->u.r.zs_code); 29928328Ssos } 30028328Ssos *zs->u.r.zs_stackp++ = zs->u.r.zs_finchar = tab_suffixof(zs->u.r.zs_code); 30128328Ssos 30228328Ssos /* And put them out in forward order. */ 30328328Ssosmiddle: do { 30428328Ssos if (count-- == 0) 30528328Ssos return (num); 30628328Ssos *bp++ = *--zs->u.r.zs_stackp; 30728328Ssos } while (zs->u.r.zs_stackp > de_stack); 30828328Ssos 30928328Ssos /* Generate the new entry. */ 31028328Ssos if ((zs->u.r.zs_code = zs->zs_free_ent) < zs->zs_maxmaxcode && 31128328Ssos zs->u.r.zs_oldcode != -1) { 31228328Ssos tab_prefixof(zs->u.r.zs_code) = (u_short) zs->u.r.zs_oldcode; 31328328Ssos tab_suffixof(zs->u.r.zs_code) = zs->u.r.zs_finchar; 31428328Ssos zs->zs_free_ent = zs->u.r.zs_code + 1; 31528328Ssos } 31628328Ssos 31728328Ssos /* Remember previous code. */ 31828328Ssos zs->u.r.zs_oldcode = zs->u.r.zs_incode; 31928328Ssos } 32028328Ssos zs->zs_state = S_EOF; 32128328Ssoseof: return (num - count); 32228328Ssos} 32328328Ssos 32428328Ssos/*- 32528328Ssos * Read one code from the standard input. If EOF, return -1. 32628328Ssos * Inputs: 32728328Ssos * stdin 32828328Ssos * Outputs: 32928328Ssos * code or -1 is returned. 33028328Ssos */ 33128328Ssosstatic code_int 33228328Ssosgetcode(struct s_zstate *zs) 33328328Ssos{ 33428328Ssos code_int gcode; 33528328Ssos int r_off, bits, i; 33628328Ssos char_type *bp; 33728328Ssos 33828328Ssos bp = zs->u.r.zs_gbuf; 33928328Ssos if (zs->zs_clear_flg > 0 || zs->u.r.zs_roffset >= zs->u.r.zs_size || 34028328Ssos zs->zs_free_ent > zs->zs_maxcode) { 34128328Ssos /* 34228328Ssos * If the next entry will be too big for the current gcode 34328328Ssos * size, then we must increase the size. This implies reading 34428328Ssos * a new buffer full, too. 34528328Ssos */ 34628328Ssos if (zs->zs_free_ent > zs->zs_maxcode) { 34728328Ssos zs->zs_n_bits++; 34828328Ssos if (zs->zs_n_bits == zs->zs_maxbits) /* Won't get any bigger now. */ 34928328Ssos zs->zs_maxcode = zs->zs_maxmaxcode; 35028328Ssos else 35128328Ssos zs->zs_maxcode = MAXCODE(zs->zs_n_bits); 35228328Ssos } 35328328Ssos if (zs->zs_clear_flg > 0) { 35428328Ssos zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS); 35528328Ssos zs->zs_clear_flg = 0; 35628328Ssos } 35728328Ssos /* XXX */ 35828328Ssos for (i = 0; i < zs->zs_n_bits && compressed_prelen; i++, compressed_prelen--) 35928328Ssos zs->u.r.zs_gbuf[i] = *compressed_pre++; 36028328Ssos zs->u.r.zs_size = fread(zs->u.r.zs_gbuf + i, 1, zs->zs_n_bits - i, zs->zs_fp); 36128328Ssos zs->u.r.zs_size += i; 36228328Ssos if (zs->u.r.zs_size <= 0) /* End of file. */ 36328328Ssos return (-1); 36428328Ssos zs->u.r.zs_roffset = 0; 36528328Ssos 36628328Ssos total_compressed_bytes += zs->u.r.zs_size; 36728328Ssos 36828328Ssos /* Round size down to integral number of codes. */ 36928328Ssos zs->u.r.zs_size = (zs->u.r.zs_size << 3) - (zs->zs_n_bits - 1); 37028328Ssos } 37128328Ssos r_off = zs->u.r.zs_roffset; 37228328Ssos bits = zs->zs_n_bits; 37328328Ssos 37428328Ssos /* Get to the first byte. */ 37528328Ssos bp += (r_off >> 3); 37628328Ssos r_off &= 7; 37728328Ssos 37828328Ssos /* Get first part (low order bits). */ 37928328Ssos gcode = (*bp++ >> r_off); 38028328Ssos bits -= (8 - r_off); 38128328Ssos r_off = 8 - r_off; /* Now, roffset into gcode word. */ 38228328Ssos 38328328Ssos /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ 38428328Ssos if (bits >= 8) { 38528328Ssos gcode |= *bp++ << r_off; 38628328Ssos r_off += 8; 38728328Ssos bits -= 8; 38828328Ssos } 38928328Ssos 39028328Ssos /* High order bits. */ 39128328Ssos gcode |= (*bp & rmask[bits]) << r_off; 39228328Ssos zs->u.r.zs_roffset += zs->zs_n_bits; 39328328Ssos 39428328Ssos return (gcode); 39528328Ssos} 39628328Ssos 39728328Ssos