128328Ssos/*	$NetBSD: zuncompress.c,v 1.11 2011/08/16 13:55:02 joerg Exp $ */
228328Ssos
328328Ssos/*-
428328Ssos * Copyright (c) 1985, 1986, 1992, 1993
528328Ssos *	The Regents of the University of California.  All rights reserved.
628328Ssos *
728328Ssos * This code is derived from software contributed to Berkeley by
828328Ssos * Diomidis Spinellis and James A. Woods, derived from original
928328Ssos * work by Spencer Thomas and Joseph Orost.
1028328Ssos *
1128328Ssos * Redistribution and use in source and binary forms, with or without
1228328Ssos * modification, are permitted provided that the following conditions
1328328Ssos * are met:
1428328Ssos * 1. Redistributions of source code must retain the above copyright
1528328Ssos *    notice, this list of conditions and the following disclaimer.
1628328Ssos * 2. Redistributions in binary form must reproduce the above copyright
1728328Ssos *    notice, this list of conditions and the following disclaimer in the
1828328Ssos *    documentation and/or other materials provided with the distribution.
1928328Ssos * 3. Neither the name of the University nor the names of its contributors
2028328Ssos *    may be used to endorse or promote products derived from this software
2128328Ssos *    without specific prior written permission.
2228328Ssos *
2328328Ssos * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2428328Ssos * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2528328Ssos * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2628328Ssos * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2728328Ssos * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2850476Speter * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2928328Ssos * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3028328Ssos * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3128328Ssos * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3266834Sphk * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3328328Ssos * SUCH DAMAGE.
3428328Ssos *
3528328Ssos * from: NetBSD: zopen.c,v 1.8 2003/08/07 11:13:29 agc Exp
3628328Ssos * $FreeBSD$
3728328Ssos */
3828328Ssos
3928328Ssos/* This file is #included by gzip.c */
4028328Ssos
4153013Syokotastatic int	zread(void *, char *, int);
4253013Syokota
4328328Ssos#define	tab_prefixof(i)	(zs->zs_codetab[i])
4428328Ssos#define	tab_suffixof(i)	((char_type *)(zs->zs_htab))[i]
4528328Ssos#define	de_stack	((char_type *)&tab_suffixof(1 << BITS))
4628328Ssos
4753013Syokota#define BITS		16		/* Default bits. */
4853013Syokota#define HSIZE		69001		/* 95% occupancy */ /* XXX may not need HSIZE */
4928328Ssos#define BIT_MASK	0x1f		/* Defines for third byte of header. */
5053013Syokota#define BLOCK_MASK	0x80
5128328Ssos#define CHECK_GAP	10000		/* Ratio check interval. */
5228328Ssos#define BUFSIZE		(64 * 1024)
5328328Ssos
5428328Ssos/*
5553013Syokota * Masks 0x40 and 0x20 are free.  I think 0x20 should mean that there is
5628328Ssos * a fourth header byte (for expansion).
5753013Syokota */
5853013Syokota#define INIT_BITS	9	/* Initial number of bits/code. */
5953013Syokota
6028328Ssos/*
6128328Ssos * the next two codes should not be changed lightly, as they must not
6228328Ssos * lie within the contiguous general code space.
6353013Syokota */
6428328Ssos#define	FIRST	257		/* First free entry. */
6553013Syokota#define	CLEAR	256		/* Table clear output code. */
6653013Syokota
6753013Syokota
6828328Ssos#define MAXCODE(n_bits)	((1 << (n_bits)) - 1)
6953013Syokota
7053013Syokotatypedef long	code_int;
7153013Syokotatypedef long	count_int;
7253013Syokotatypedef u_char	char_type;
7353013Syokota
7453013Syokotastatic char_type magic_header[] =
7553013Syokota	{'\037', '\235'};	/* 1F 9D */
7628328Ssos
7728328Ssosstatic char_type rmask[9] =
7828328Ssos	{0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
7928328Ssos
8028328Ssosstatic off_t total_compressed_bytes;
8128328Ssosstatic size_t compressed_prelen;
8228328Ssosstatic char *compressed_pre;
8328328Ssos
8428328Ssosstruct s_zstate {
8553013Syokota	FILE *zs_fp;			/* File stream for I/O */
8653013Syokota	char zs_mode;			/* r or w */
8753013Syokota	enum {
8853013Syokota		S_START, S_MIDDLE, S_EOF
8953013Syokota	} zs_state;			/* State of computation */
9053013Syokota	int zs_n_bits;			/* Number of bits/code. */
9153013Syokota	int zs_maxbits;			/* User settable max # bits/code. */
9228328Ssos	code_int zs_maxcode;		/* Maximum code, given n_bits. */
9353013Syokota	code_int zs_maxmaxcode;		/* Should NEVER generate this code. */
9453013Syokota	count_int zs_htab [HSIZE];
9528328Ssos	u_short zs_codetab [HSIZE];
9628328Ssos	code_int zs_hsize;		/* For dynamic table sizing. */
9728328Ssos	code_int zs_free_ent;		/* First unused entry. */
9853013Syokota	/*
9953013Syokota	 * Block compression parameters -- after all codes are used up,
10053013Syokota	 * and compression rate changes, start over.
10128328Ssos	 */
10228328Ssos	int zs_block_compress;
10353013Syokota	int zs_clear_flg;
10453013Syokota	long zs_ratio;
10553013Syokota	count_int zs_checkpoint;
10653013Syokota	int zs_offset;
10728328Ssos	long zs_in_count;		/* Length of input. */
10853013Syokota	long zs_bytes_out;		/* Length of compressed output. */
10953013Syokota	long zs_out_count;		/* # of codes output (for debugging). */
11053013Syokota	char_type zs_buf[BITS];
11153013Syokota	union {
11253013Syokota		struct {
11353013Syokota			long zs_fcode;
11453013Syokota			code_int zs_ent;
11553013Syokota			code_int zs_hsize_reg;
11653013Syokota			int zs_hshift;
11753013Syokota		} w;			/* Write parameters */
11853013Syokota		struct {
11953013Syokota			char_type *zs_stackp;
12053013Syokota			int zs_finchar;
12128328Ssos			code_int zs_code, zs_oldcode, zs_incode;
12228328Ssos			int zs_roffset, zs_size;
12328328Ssos			char_type zs_gbuf[BITS];
12428328Ssos		} r;			/* Read parameters */
12528328Ssos	} u;
12628328Ssos};
12728328Ssos
12828328Ssosstatic code_int	getcode(struct s_zstate *zs);
12928328Ssos
13028328Ssosstatic off_t
13128328Ssoszuncompress(FILE *in, FILE *out, char *pre, size_t prelen,
13228328Ssos	    off_t *compressed_bytes)
13328328Ssos{
13428328Ssos	off_t bin, bout = 0;
13528328Ssos	char *buf;
13628328Ssos
13728328Ssos	buf = malloc(BUFSIZE);
13828328Ssos	if (buf == NULL)
13928328Ssos		return -1;
14028328Ssos
14128328Ssos	/* XXX */
14228328Ssos	compressed_prelen = prelen;
14328328Ssos	if (prelen != 0)
14428328Ssos		compressed_pre = pre;
14528328Ssos	else
14628328Ssos		compressed_pre = NULL;
14728328Ssos
14828328Ssos	while ((bin = fread(buf, 1, BUFSIZE, in)) != 0) {
14928328Ssos		if (tflag == 0 && (off_t)fwrite(buf, 1, bin, out) != bin) {
15028328Ssos			free(buf);
15128328Ssos			return -1;
15228328Ssos		}
15328328Ssos		bout += bin;
15428328Ssos	}
15528328Ssos
15628328Ssos	if (compressed_bytes)
15728328Ssos		*compressed_bytes = total_compressed_bytes;
15828328Ssos
15928328Ssos	free(buf);
16028328Ssos	return bout;
16128328Ssos}
16228328Ssos
16328328Ssosstatic int
16428328Ssoszclose(void *zs)
16528328Ssos{
16628328Ssos	free(zs);
16728328Ssos	/* We leave the caller to close the fd passed to zdopen() */
16828328Ssos	return 0;
16928328Ssos}
17028328Ssos
17128328SsosFILE *
17228328Ssoszdopen(int fd)
17328328Ssos{
17428328Ssos	struct s_zstate *zs;
17528328Ssos
17628328Ssos	if ((zs = calloc(1, sizeof(struct s_zstate))) == NULL)
17728328Ssos		return (NULL);
17828328Ssos
17928328Ssos	zs->zs_state = S_START;
18028328Ssos
18128328Ssos	/* XXX we can get rid of some of these */
18228328Ssos	zs->zs_hsize = HSIZE;			/* For dynamic table sizing. */
18328328Ssos	zs->zs_free_ent = 0;			/* First unused entry. */
18428328Ssos	zs->zs_block_compress = BLOCK_MASK;
18528328Ssos	zs->zs_clear_flg = 0;			/* XXX we calloc()'d this structure why = 0? */
18628328Ssos	zs->zs_ratio = 0;
18728328Ssos	zs->zs_checkpoint = CHECK_GAP;
18828328Ssos	zs->zs_in_count = 1;			/* Length of input. */
18928328Ssos	zs->zs_out_count = 0;			/* # of codes output (for debugging). */
19028328Ssos	zs->u.r.zs_roffset = 0;
19128328Ssos	zs->u.r.zs_size = 0;
19228328Ssos
19328328Ssos	/*
19428328Ssos	 * Layering compress on top of stdio in order to provide buffering,
19528328Ssos	 * and ensure that reads and write work with the data specified.
19628328Ssos	 */
19728328Ssos	if ((zs->zs_fp = fdopen(fd, "r")) == NULL) {
19828328Ssos		free(zs);
19928328Ssos		return NULL;
20028328Ssos	}
20128328Ssos
20228328Ssos	return funopen(zs, zread, NULL, NULL, zclose);
20328328Ssos}
20428328Ssos
20528328Ssos/*
20628328Ssos * Decompress read.  This routine adapts to the codes in the file building
20728328Ssos * the "string" table on-the-fly; requiring no table to be stored in the
20828328Ssos * compressed file.  The tables used herein are shared with those of the
20928328Ssos * compress() routine.  See the definitions above.
21028328Ssos */
21128328Ssosstatic int
21228328Ssoszread(void *cookie, char *rbp, int num)
21328328Ssos{
21428328Ssos	u_int count, i;
21528328Ssos	struct s_zstate *zs;
21628328Ssos	u_char *bp, header[3];
21728328Ssos
21828328Ssos	if (num == 0)
21928328Ssos		return (0);
22028328Ssos
22128328Ssos	zs = cookie;
22228328Ssos	count = num;
22328328Ssos	bp = (u_char *)rbp;
22428328Ssos	switch (zs->zs_state) {
22528328Ssos	case S_START:
22628328Ssos		zs->zs_state = S_MIDDLE;
22728328Ssos		break;
22828328Ssos	case S_MIDDLE:
22928328Ssos		goto middle;
23028328Ssos	case S_EOF:
23128328Ssos		goto eof;
23228328Ssos	}
23328328Ssos
23428328Ssos	/* Check the magic number */
23528328Ssos	for (i = 0; i < 3 && compressed_prelen; i++, compressed_prelen--)
23628328Ssos		header[i] = *compressed_pre++;
23728328Ssos
23828328Ssos	if (fread(header + i, 1, sizeof(header) - i, zs->zs_fp) !=
23928328Ssos		  sizeof(header) - i ||
24028328Ssos	    memcmp(header, magic_header, sizeof(magic_header)) != 0) {
24128328Ssos		errno = EFTYPE;
24228328Ssos		return (-1);
24328328Ssos	}
24428328Ssos	total_compressed_bytes = 0;
24528328Ssos	zs->zs_maxbits = header[2];	/* Set -b from file. */
24628328Ssos	zs->zs_block_compress = zs->zs_maxbits & BLOCK_MASK;
24728328Ssos	zs->zs_maxbits &= BIT_MASK;
24828328Ssos	zs->zs_maxmaxcode = 1L << zs->zs_maxbits;
24928328Ssos	if (zs->zs_maxbits > BITS || zs->zs_maxbits < 12) {
25028328Ssos		errno = EFTYPE;
25128328Ssos		return (-1);
25228328Ssos	}
25328328Ssos	/* As above, initialize the first 256 entries in the table. */
25428328Ssos	zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS);
25528328Ssos	for (zs->u.r.zs_code = 255; zs->u.r.zs_code >= 0; zs->u.r.zs_code--) {
25653013Syokota		tab_prefixof(zs->u.r.zs_code) = 0;
25753013Syokota		tab_suffixof(zs->u.r.zs_code) = (char_type) zs->u.r.zs_code;
25853013Syokota	}
25928328Ssos	zs->zs_free_ent = zs->zs_block_compress ? FIRST : 256;
26028328Ssos
26128328Ssos	zs->u.r.zs_oldcode = -1;
26228328Ssos	zs->u.r.zs_stackp = de_stack;
26328328Ssos
26453013Syokota	while ((zs->u.r.zs_code = getcode(zs)) > -1) {
26528328Ssos
26653013Syokota		if ((zs->u.r.zs_code == CLEAR) && zs->zs_block_compress) {
26753013Syokota			for (zs->u.r.zs_code = 255; zs->u.r.zs_code >= 0;
26853013Syokota			    zs->u.r.zs_code--)
26953013Syokota				tab_prefixof(zs->u.r.zs_code) = 0;
27053013Syokota			zs->zs_clear_flg = 1;
27153013Syokota			zs->zs_free_ent = FIRST;
27253013Syokota			zs->u.r.zs_oldcode = -1;
27353013Syokota			continue;
27453013Syokota		}
27553013Syokota		zs->u.r.zs_incode = zs->u.r.zs_code;
27653013Syokota
27728328Ssos		/* Special case for KwKwK string. */
27828328Ssos		if (zs->u.r.zs_code >= zs->zs_free_ent) {
27953013Syokota			if (zs->u.r.zs_code > zs->zs_free_ent ||
28028328Ssos			    zs->u.r.zs_oldcode == -1) {
28153013Syokota				/* Bad stream. */
28228328Ssos				errno = EINVAL;
28328328Ssos				return (-1);
28428328Ssos			}
28553013Syokota			*zs->u.r.zs_stackp++ = zs->u.r.zs_finchar;
28628328Ssos			zs->u.r.zs_code = zs->u.r.zs_oldcode;
28753013Syokota		}
28853013Syokota		/*
28953013Syokota		 * The above condition ensures that code < free_ent.
29053013Syokota		 * The construction of tab_prefixof in turn guarantees that
29153013Syokota		 * each iteration decreases code and therefore stack usage is
29253013Syokota		 * bound by 1 << BITS - 256.
29353013Syokota		 */
29453013Syokota
29553013Syokota		/* Generate output characters in reverse order. */
29653013Syokota		while (zs->u.r.zs_code >= 256) {
29753013Syokota			*zs->u.r.zs_stackp++ = tab_suffixof(zs->u.r.zs_code);
29853013Syokota			zs->u.r.zs_code = tab_prefixof(zs->u.r.zs_code);
29928328Ssos		}
30028328Ssos		*zs->u.r.zs_stackp++ = zs->u.r.zs_finchar = tab_suffixof(zs->u.r.zs_code);
30128328Ssos
30228328Ssos		/* And put them out in forward order.  */
30328328Ssosmiddle:		do {
30428328Ssos			if (count-- == 0)
30528328Ssos				return (num);
30628328Ssos			*bp++ = *--zs->u.r.zs_stackp;
30728328Ssos		} while (zs->u.r.zs_stackp > de_stack);
30828328Ssos
30928328Ssos		/* Generate the new entry. */
31028328Ssos		if ((zs->u.r.zs_code = zs->zs_free_ent) < zs->zs_maxmaxcode &&
31128328Ssos		    zs->u.r.zs_oldcode != -1) {
31228328Ssos			tab_prefixof(zs->u.r.zs_code) = (u_short) zs->u.r.zs_oldcode;
31328328Ssos			tab_suffixof(zs->u.r.zs_code) = zs->u.r.zs_finchar;
31428328Ssos			zs->zs_free_ent = zs->u.r.zs_code + 1;
31528328Ssos		}
31628328Ssos
31728328Ssos		/* Remember previous code. */
31828328Ssos		zs->u.r.zs_oldcode = zs->u.r.zs_incode;
31928328Ssos	}
32028328Ssos	zs->zs_state = S_EOF;
32128328Ssoseof:	return (num - count);
32228328Ssos}
32328328Ssos
32428328Ssos/*-
32528328Ssos * Read one code from the standard input.  If EOF, return -1.
32628328Ssos * Inputs:
32728328Ssos * 	stdin
32828328Ssos * Outputs:
32928328Ssos * 	code or -1 is returned.
33028328Ssos */
33128328Ssosstatic code_int
33228328Ssosgetcode(struct s_zstate *zs)
33328328Ssos{
33428328Ssos	code_int gcode;
33528328Ssos	int r_off, bits, i;
33628328Ssos	char_type *bp;
33728328Ssos
33828328Ssos	bp = zs->u.r.zs_gbuf;
33928328Ssos	if (zs->zs_clear_flg > 0 || zs->u.r.zs_roffset >= zs->u.r.zs_size ||
34028328Ssos	    zs->zs_free_ent > zs->zs_maxcode) {
34128328Ssos		/*
34228328Ssos		 * If the next entry will be too big for the current gcode
34328328Ssos		 * size, then we must increase the size.  This implies reading
34428328Ssos		 * a new buffer full, too.
34528328Ssos		 */
34628328Ssos		if (zs->zs_free_ent > zs->zs_maxcode) {
34728328Ssos			zs->zs_n_bits++;
34828328Ssos			if (zs->zs_n_bits == zs->zs_maxbits)	/* Won't get any bigger now. */
34928328Ssos				zs->zs_maxcode = zs->zs_maxmaxcode;
35028328Ssos			else
35128328Ssos				zs->zs_maxcode = MAXCODE(zs->zs_n_bits);
35228328Ssos		}
35328328Ssos		if (zs->zs_clear_flg > 0) {
35428328Ssos			zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS);
35528328Ssos			zs->zs_clear_flg = 0;
35628328Ssos		}
35728328Ssos		/* XXX */
35828328Ssos		for (i = 0; i < zs->zs_n_bits && compressed_prelen; i++, compressed_prelen--)
35928328Ssos			zs->u.r.zs_gbuf[i] = *compressed_pre++;
36028328Ssos		zs->u.r.zs_size = fread(zs->u.r.zs_gbuf + i, 1, zs->zs_n_bits - i, zs->zs_fp);
36128328Ssos		zs->u.r.zs_size += i;
36228328Ssos		if (zs->u.r.zs_size <= 0)			/* End of file. */
36328328Ssos			return (-1);
36428328Ssos		zs->u.r.zs_roffset = 0;
36528328Ssos
36628328Ssos		total_compressed_bytes += zs->u.r.zs_size;
36728328Ssos
36828328Ssos		/* Round size down to integral number of codes. */
36928328Ssos		zs->u.r.zs_size = (zs->u.r.zs_size << 3) - (zs->zs_n_bits - 1);
37028328Ssos	}
37128328Ssos	r_off = zs->u.r.zs_roffset;
37228328Ssos	bits = zs->zs_n_bits;
37328328Ssos
37428328Ssos	/* Get to the first byte. */
37528328Ssos	bp += (r_off >> 3);
37628328Ssos	r_off &= 7;
37728328Ssos
37828328Ssos	/* Get first part (low order bits). */
37928328Ssos	gcode = (*bp++ >> r_off);
38028328Ssos	bits -= (8 - r_off);
38128328Ssos	r_off = 8 - r_off;	/* Now, roffset into gcode word. */
38228328Ssos
38328328Ssos	/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
38428328Ssos	if (bits >= 8) {
38528328Ssos		gcode |= *bp++ << r_off;
38628328Ssos		r_off += 8;
38728328Ssos		bits -= 8;
38828328Ssos	}
38928328Ssos
39028328Ssos	/* High order bits. */
39128328Ssos	gcode |= (*bp & rmask[bits]) << r_off;
39228328Ssos	zs->u.r.zs_roffset += zs->zs_n_bits;
39328328Ssos
39428328Ssos	return (gcode);
39528328Ssos}
39628328Ssos
39728328Ssos