1228753Smm/*-
2228753Smm * Copyright (c) 2003-2007 Tim Kientzle
3228753Smm * All rights reserved.
4228753Smm *
5228753Smm * Redistribution and use in source and binary forms, with or without
6228753Smm * modification, are permitted provided that the following conditions
7228753Smm * are met:
8228753Smm * 1. Redistributions of source code must retain the above copyright
9228753Smm *    notice, this list of conditions and the following disclaimer.
10228753Smm * 2. Redistributions in binary form must reproduce the above copyright
11228753Smm *    notice, this list of conditions and the following disclaimer in the
12228753Smm *    documentation and/or other materials provided with the distribution.
13228753Smm *
14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24228753Smm */
25228753Smm
26228753Smm/*
27228753Smm * This code borrows heavily from "compress" source code, which is
28228753Smm * protected by the following copyright.  (Clause 3 dropped by request
29228753Smm * of the Regents.)
30228753Smm */
31228753Smm
32228753Smm/*-
33228753Smm * Copyright (c) 1985, 1986, 1992, 1993
34228753Smm *	The Regents of the University of California.  All rights reserved.
35228753Smm *
36228753Smm * This code is derived from software contributed to Berkeley by
37228753Smm * Diomidis Spinellis and James A. Woods, derived from original
38228753Smm * work by Spencer Thomas and Joseph Orost.
39228753Smm *
40228753Smm * Redistribution and use in source and binary forms, with or without
41228753Smm * modification, are permitted provided that the following conditions
42228753Smm * are met:
43228753Smm * 1. Redistributions of source code must retain the above copyright
44228753Smm *    notice, this list of conditions and the following disclaimer.
45228753Smm * 2. Redistributions in binary form must reproduce the above copyright
46228753Smm *    notice, this list of conditions and the following disclaimer in the
47228753Smm *    documentation and/or other materials provided with the distribution.
48228753Smm * 4. Neither the name of the University nor the names of its contributors
49228753Smm *    may be used to endorse or promote products derived from this software
50228753Smm *    without specific prior written permission.
51228753Smm *
52228753Smm * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53228753Smm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54228753Smm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55228753Smm * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56228753Smm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57228753Smm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58228753Smm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59228753Smm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60228753Smm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61228753Smm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62228753Smm * SUCH DAMAGE.
63228753Smm */
64228753Smm
65228753Smm
66228753Smm#include "archive_platform.h"
67231200Smm__FBSDID("$FreeBSD$");
68228753Smm
69228753Smm#ifdef HAVE_ERRNO_H
70228753Smm#include <errno.h>
71228753Smm#endif
72228753Smm#ifdef HAVE_STDLIB_H
73228753Smm#include <stdlib.h>
74228753Smm#endif
75228753Smm#ifdef HAVE_STRING_H
76228753Smm#include <string.h>
77228753Smm#endif
78228753Smm#ifdef HAVE_UNISTD_H
79228753Smm#include <unistd.h>
80228753Smm#endif
81228753Smm
82228753Smm#include "archive.h"
83228753Smm#include "archive_private.h"
84228753Smm#include "archive_read_private.h"
85228753Smm
86228753Smm/*
87228753Smm * Because LZW decompression is pretty simple, I've just implemented
88228753Smm * the whole decompressor here (cribbing from "compress" source code,
89228753Smm * of course), rather than relying on an external library.  I have
90228753Smm * made an effort to clarify and simplify the algorithm, so the
91228753Smm * names and structure here don't exactly match those used by compress.
92228753Smm */
93228753Smm
94228753Smmstruct private_data {
95228753Smm	/* Input variables. */
96228753Smm	const unsigned char	*next_in;
97228753Smm	size_t			 avail_in;
98231200Smm	size_t			 consume_unnotified;
99228753Smm	int			 bit_buffer;
100228753Smm	int			 bits_avail;
101228753Smm	size_t			 bytes_in_section;
102228753Smm
103228753Smm	/* Output variables. */
104228753Smm	size_t			 out_block_size;
105228753Smm	void			*out_block;
106228753Smm
107228753Smm	/* Decompression status variables. */
108228753Smm	int			 use_reset_code;
109228753Smm	int			 end_of_stream;	/* EOF status. */
110228753Smm	int			 maxcode;	/* Largest code. */
111228753Smm	int			 maxcode_bits;	/* Length of largest code. */
112228753Smm	int			 section_end_code; /* When to increase bits. */
113228753Smm	int			 bits;		/* Current code length. */
114228753Smm	int			 oldcode;	/* Previous code. */
115228753Smm	int			 finbyte;	/* Last byte of prev code. */
116228753Smm
117228753Smm	/* Dictionary. */
118228753Smm	int			 free_ent;       /* Next dictionary entry. */
119228753Smm	unsigned char		 suffix[65536];
120228753Smm	uint16_t		 prefix[65536];
121228753Smm
122228753Smm	/*
123228753Smm	 * Scratch area for expanding dictionary entries.  Note:
124228753Smm	 * "worst" case here comes from compressing /dev/zero: the
125228753Smm	 * last code in the dictionary will code a sequence of
126228753Smm	 * 65536-256 zero bytes.  Thus, we need stack space to expand
127228753Smm	 * a 65280-byte dictionary entry.  (Of course, 32640:1
128228753Smm	 * compression could also be considered the "best" case. ;-)
129228753Smm	 */
130228753Smm	unsigned char		*stackp;
131228753Smm	unsigned char		 stack[65300];
132228753Smm};
133228753Smm
134228753Smmstatic int	compress_bidder_bid(struct archive_read_filter_bidder *, struct archive_read_filter *);
135228753Smmstatic int	compress_bidder_init(struct archive_read_filter *);
136228753Smmstatic int	compress_bidder_free(struct archive_read_filter_bidder *);
137228753Smm
138228753Smmstatic ssize_t	compress_filter_read(struct archive_read_filter *, const void **);
139228753Smmstatic int	compress_filter_close(struct archive_read_filter *);
140228753Smm
141228753Smmstatic int	getbits(struct archive_read_filter *, int n);
142228753Smmstatic int	next_code(struct archive_read_filter *);
143228753Smm
144231200Smm#if ARCHIVE_VERSION_NUMBER < 4000000
145231200Smm/* Deprecated; remove in libarchive 4.0 */
146228753Smmint
147231200Smmarchive_read_support_compression_compress(struct archive *a)
148228753Smm{
149231200Smm	return archive_read_support_filter_compress(a);
150231200Smm}
151231200Smm#endif
152231200Smm
153231200Smmint
154231200Smmarchive_read_support_filter_compress(struct archive *_a)
155231200Smm{
156228753Smm	struct archive_read *a = (struct archive_read *)_a;
157231200Smm	struct archive_read_filter_bidder *bidder;
158228753Smm
159231200Smm	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
160231200Smm	    ARCHIVE_STATE_NEW, "archive_read_support_filter_compress");
161231200Smm
162231200Smm	if (__archive_read_get_bidder(a, &bidder) != ARCHIVE_OK)
163228753Smm		return (ARCHIVE_FATAL);
164228753Smm
165228753Smm	bidder->data = NULL;
166248616Smm	bidder->name = "compress (.Z)";
167228753Smm	bidder->bid = compress_bidder_bid;
168228753Smm	bidder->init = compress_bidder_init;
169228753Smm	bidder->options = NULL;
170228753Smm	bidder->free = compress_bidder_free;
171228753Smm	return (ARCHIVE_OK);
172228753Smm}
173228753Smm
174228753Smm/*
175228753Smm * Test whether we can handle this data.
176231200Smm * This logic returns zero if any part of the signature fails.
177228753Smm */
178228753Smmstatic int
179228753Smmcompress_bidder_bid(struct archive_read_filter_bidder *self,
180228753Smm    struct archive_read_filter *filter)
181228753Smm{
182228753Smm	const unsigned char *buffer;
183228753Smm	ssize_t avail;
184228753Smm	int bits_checked;
185228753Smm
186228753Smm	(void)self; /* UNUSED */
187228753Smm
188299529Smm	/* Shortest valid compress file is 3 bytes. */
189299529Smm	buffer = __archive_read_filter_ahead(filter, 3, &avail);
190228753Smm
191228753Smm	if (buffer == NULL)
192228753Smm		return (0);
193228753Smm
194228753Smm	bits_checked = 0;
195299529Smm	/* First two bytes are the magic value */
196231200Smm	if (buffer[0] != 0x1F || buffer[1] != 0x9D)
197228753Smm		return (0);
198299529Smm	/* Third byte holds compression parameters. */
199299529Smm	if (buffer[2] & 0x20) /* Reserved bit, must be zero. */
200299529Smm		return (0);
201299529Smm	if (buffer[2] & 0x40) /* Reserved bit, must be zero. */
202299529Smm		return (0);
203299529Smm	bits_checked += 18;
204228753Smm
205228753Smm	return (bits_checked);
206228753Smm}
207228753Smm
208228753Smm/*
209228753Smm * Setup the callbacks.
210228753Smm */
211228753Smmstatic int
212228753Smmcompress_bidder_init(struct archive_read_filter *self)
213228753Smm{
214228753Smm	struct private_data *state;
215228753Smm	static const size_t out_block_size = 64 * 1024;
216228753Smm	void *out_block;
217228753Smm	int code;
218228753Smm
219248616Smm	self->code = ARCHIVE_FILTER_COMPRESS;
220228753Smm	self->name = "compress (.Z)";
221228753Smm
222228753Smm	state = (struct private_data *)calloc(sizeof(*state), 1);
223228753Smm	out_block = malloc(out_block_size);
224228753Smm	if (state == NULL || out_block == NULL) {
225228753Smm		free(out_block);
226228753Smm		free(state);
227228753Smm		archive_set_error(&self->archive->archive, ENOMEM,
228228753Smm		    "Can't allocate data for %s decompression",
229228753Smm		    self->name);
230228753Smm		return (ARCHIVE_FATAL);
231228753Smm	}
232228753Smm
233228753Smm	self->data = state;
234228753Smm	state->out_block_size = out_block_size;
235228753Smm	state->out_block = out_block;
236228753Smm	self->read = compress_filter_read;
237228753Smm	self->skip = NULL; /* not supported */
238228753Smm	self->close = compress_filter_close;
239228753Smm
240228753Smm	/* XXX MOVE THE FOLLOWING OUT OF INIT() XXX */
241228753Smm
242228753Smm	(void)getbits(self, 8); /* Skip first signature byte. */
243228753Smm	(void)getbits(self, 8); /* Skip second signature byte. */
244228753Smm
245299529Smm	/* Get compression parameters. */
246228753Smm	code = getbits(self, 8);
247299529Smm	if ((code & 0x1f) > 16) {
248299529Smm		archive_set_error(&self->archive->archive, -1,
249299529Smm		    "Invalid compressed data");
250299529Smm		return (ARCHIVE_FATAL);
251299529Smm	}
252228753Smm	state->maxcode_bits = code & 0x1f;
253228753Smm	state->maxcode = (1 << state->maxcode_bits);
254228753Smm	state->use_reset_code = code & 0x80;
255228753Smm
256228753Smm	/* Initialize decompressor. */
257228753Smm	state->free_ent = 256;
258228753Smm	state->stackp = state->stack;
259228753Smm	if (state->use_reset_code)
260228753Smm		state->free_ent++;
261228753Smm	state->bits = 9;
262228753Smm	state->section_end_code = (1<<state->bits) - 1;
263228753Smm	state->oldcode = -1;
264228753Smm	for (code = 255; code >= 0; code--) {
265228753Smm		state->prefix[code] = 0;
266228753Smm		state->suffix[code] = code;
267228753Smm	}
268228753Smm	next_code(self);
269228753Smm
270228753Smm	return (ARCHIVE_OK);
271228753Smm}
272228753Smm
273228753Smm/*
274228753Smm * Return a block of data from the decompression buffer.  Decompress more
275228753Smm * as necessary.
276228753Smm */
277228753Smmstatic ssize_t
278228753Smmcompress_filter_read(struct archive_read_filter *self, const void **pblock)
279228753Smm{
280228753Smm	struct private_data *state;
281228753Smm	unsigned char *p, *start, *end;
282228753Smm	int ret;
283228753Smm
284228753Smm	state = (struct private_data *)self->data;
285228753Smm	if (state->end_of_stream) {
286228753Smm		*pblock = NULL;
287228753Smm		return (0);
288228753Smm	}
289228753Smm	p = start = (unsigned char *)state->out_block;
290228753Smm	end = start + state->out_block_size;
291228753Smm
292228753Smm	while (p < end && !state->end_of_stream) {
293228753Smm		if (state->stackp > state->stack) {
294228753Smm			*p++ = *--state->stackp;
295228753Smm		} else {
296228753Smm			ret = next_code(self);
297228753Smm			if (ret == -1)
298228753Smm				state->end_of_stream = ret;
299228753Smm			else if (ret != ARCHIVE_OK)
300228753Smm				return (ret);
301228753Smm		}
302228753Smm	}
303228753Smm
304228753Smm	*pblock = start;
305228753Smm	return (p - start);
306228753Smm}
307228753Smm
308228753Smm/*
309228753Smm * Clean up the reader.
310228753Smm */
311228753Smmstatic int
312228753Smmcompress_bidder_free(struct archive_read_filter_bidder *self)
313228753Smm{
314228753Smm	self->data = NULL;
315228753Smm	return (ARCHIVE_OK);
316228753Smm}
317228753Smm
318228753Smm/*
319228753Smm * Close and release the filter.
320228753Smm */
321228753Smmstatic int
322228753Smmcompress_filter_close(struct archive_read_filter *self)
323228753Smm{
324228753Smm	struct private_data *state = (struct private_data *)self->data;
325228753Smm
326228753Smm	free(state->out_block);
327228753Smm	free(state);
328228753Smm	return (ARCHIVE_OK);
329228753Smm}
330228753Smm
331228753Smm/*
332228753Smm * Process the next code and fill the stack with the expansion
333228753Smm * of the code.  Returns ARCHIVE_FATAL if there is a fatal I/O or
334228753Smm * format error, ARCHIVE_EOF if we hit end of data, ARCHIVE_OK otherwise.
335228753Smm */
336228753Smmstatic int
337228753Smmnext_code(struct archive_read_filter *self)
338228753Smm{
339228753Smm	struct private_data *state = (struct private_data *)self->data;
340228753Smm	int code, newcode;
341228753Smm
342228753Smm	static int debug_buff[1024];
343228753Smm	static unsigned debug_index;
344228753Smm
345228753Smm	code = newcode = getbits(self, state->bits);
346228753Smm	if (code < 0)
347228753Smm		return (code);
348228753Smm
349228753Smm	debug_buff[debug_index++] = code;
350228753Smm	if (debug_index >= sizeof(debug_buff)/sizeof(debug_buff[0]))
351228753Smm		debug_index = 0;
352228753Smm
353228753Smm	/* If it's a reset code, reset the dictionary. */
354228753Smm	if ((code == 256) && state->use_reset_code) {
355228753Smm		/*
356228753Smm		 * The original 'compress' implementation blocked its
357228753Smm		 * I/O in a manner that resulted in junk bytes being
358228753Smm		 * inserted after every reset.  The next section skips
359228753Smm		 * this junk.  (Yes, the number of *bytes* to skip is
360228753Smm		 * a function of the current *bit* length.)
361228753Smm		 */
362228753Smm		int skip_bytes =  state->bits -
363228753Smm		    (state->bytes_in_section % state->bits);
364228753Smm		skip_bytes %= state->bits;
365228753Smm		state->bits_avail = 0; /* Discard rest of this byte. */
366228753Smm		while (skip_bytes-- > 0) {
367228753Smm			code = getbits(self, 8);
368228753Smm			if (code < 0)
369228753Smm				return (code);
370228753Smm		}
371228753Smm		/* Now, actually do the reset. */
372228753Smm		state->bytes_in_section = 0;
373228753Smm		state->bits = 9;
374228753Smm		state->section_end_code = (1 << state->bits) - 1;
375228753Smm		state->free_ent = 257;
376228753Smm		state->oldcode = -1;
377228753Smm		return (next_code(self));
378228753Smm	}
379228753Smm
380299529Smm	if (code > state->free_ent
381299529Smm	    || (code == state->free_ent && state->oldcode < 0)) {
382228753Smm		/* An invalid code is a fatal error. */
383228753Smm		archive_set_error(&(self->archive->archive), -1,
384228753Smm		    "Invalid compressed data");
385228753Smm		return (ARCHIVE_FATAL);
386228753Smm	}
387228753Smm
388228753Smm	/* Special case for KwKwK string. */
389228753Smm	if (code >= state->free_ent) {
390228753Smm		*state->stackp++ = state->finbyte;
391228753Smm		code = state->oldcode;
392228753Smm	}
393228753Smm
394228753Smm	/* Generate output characters in reverse order. */
395228753Smm	while (code >= 256) {
396228753Smm		*state->stackp++ = state->suffix[code];
397228753Smm		code = state->prefix[code];
398228753Smm	}
399228753Smm	*state->stackp++ = state->finbyte = code;
400228753Smm
401228753Smm	/* Generate the new entry. */
402228753Smm	code = state->free_ent;
403228753Smm	if (code < state->maxcode && state->oldcode >= 0) {
404228753Smm		state->prefix[code] = state->oldcode;
405228753Smm		state->suffix[code] = state->finbyte;
406228753Smm		++state->free_ent;
407228753Smm	}
408228753Smm	if (state->free_ent > state->section_end_code) {
409228753Smm		state->bits++;
410228753Smm		state->bytes_in_section = 0;
411228753Smm		if (state->bits == state->maxcode_bits)
412228753Smm			state->section_end_code = state->maxcode;
413228753Smm		else
414228753Smm			state->section_end_code = (1 << state->bits) - 1;
415228753Smm	}
416228753Smm
417228753Smm	/* Remember previous code. */
418228753Smm	state->oldcode = newcode;
419228753Smm	return (ARCHIVE_OK);
420228753Smm}
421228753Smm
422228753Smm/*
423228753Smm * Return next 'n' bits from stream.
424228753Smm *
425228753Smm * -1 indicates end of available data.
426228753Smm */
427228753Smmstatic int
428228753Smmgetbits(struct archive_read_filter *self, int n)
429228753Smm{
430228753Smm	struct private_data *state = (struct private_data *)self->data;
431228753Smm	int code;
432228753Smm	ssize_t ret;
433228753Smm	static const int mask[] = {
434228753Smm		0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff,
435228753Smm		0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff
436228753Smm	};
437228753Smm
438228753Smm	while (state->bits_avail < n) {
439228753Smm		if (state->avail_in <= 0) {
440231200Smm			if (state->consume_unnotified) {
441231200Smm				__archive_read_filter_consume(self->upstream,
442231200Smm					state->consume_unnotified);
443231200Smm				state->consume_unnotified = 0;
444231200Smm			}
445228753Smm			state->next_in
446228753Smm			    = __archive_read_filter_ahead(self->upstream,
447228753Smm				1, &ret);
448228753Smm			if (ret == 0)
449228753Smm				return (-1);
450228753Smm			if (ret < 0 || state->next_in == NULL)
451228753Smm				return (ARCHIVE_FATAL);
452231200Smm			state->consume_unnotified = state->avail_in = ret;
453228753Smm		}
454228753Smm		state->bit_buffer |= *state->next_in++ << state->bits_avail;
455228753Smm		state->avail_in--;
456228753Smm		state->bits_avail += 8;
457228753Smm		state->bytes_in_section++;
458228753Smm	}
459228753Smm
460228753Smm	code = state->bit_buffer;
461228753Smm	state->bit_buffer >>= n;
462228753Smm	state->bits_avail -= n;
463228753Smm
464228753Smm	return (code & mask[n]);
465228753Smm}
466