1228753Smm/*-
2228753Smm * Copyright (c) 2003-2007 Tim Kientzle
3228753Smm * All rights reserved.
4228753Smm *
5228753Smm * Redistribution and use in source and binary forms, with or without
6228753Smm * modification, are permitted provided that the following conditions
7228753Smm * are met:
8228753Smm * 1. Redistributions of source code must retain the above copyright
9228753Smm *    notice, this list of conditions and the following disclaimer.
10228753Smm * 2. Redistributions in binary form must reproduce the above copyright
11228753Smm *    notice, this list of conditions and the following disclaimer in the
12228753Smm *    documentation and/or other materials provided with the distribution.
13228753Smm *
14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24228753Smm */
25228753Smm
26228753Smm/*
27228753Smm * This code borrows heavily from "compress" source code, which is
28228753Smm * protected by the following copyright.  (Clause 3 dropped by request
29228753Smm * of the Regents.)
30228753Smm */
31228753Smm
32228753Smm/*-
33228753Smm * Copyright (c) 1985, 1986, 1992, 1993
34228753Smm *	The Regents of the University of California.  All rights reserved.
35228753Smm *
36228753Smm * This code is derived from software contributed to Berkeley by
37228753Smm * Diomidis Spinellis and James A. Woods, derived from original
38228753Smm * work by Spencer Thomas and Joseph Orost.
39228753Smm *
40228753Smm * Redistribution and use in source and binary forms, with or without
41228753Smm * modification, are permitted provided that the following conditions
42228753Smm * are met:
43228753Smm * 1. Redistributions of source code must retain the above copyright
44228753Smm *    notice, this list of conditions and the following disclaimer.
45228753Smm * 2. Redistributions in binary form must reproduce the above copyright
46228753Smm *    notice, this list of conditions and the following disclaimer in the
47228753Smm *    documentation and/or other materials provided with the distribution.
48228753Smm * 4. Neither the name of the University nor the names of its contributors
49228753Smm *    may be used to endorse or promote products derived from this software
50228753Smm *    without specific prior written permission.
51228753Smm *
52228753Smm * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53228753Smm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54228753Smm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55228753Smm * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56228753Smm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57228753Smm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58228753Smm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59228753Smm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60228753Smm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61228753Smm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62228753Smm * SUCH DAMAGE.
63228753Smm */
64228753Smm
65228753Smm
66228753Smm#include "archive_platform.h"
67229592Smm__FBSDID("$FreeBSD$");
68228753Smm
69228753Smm#ifdef HAVE_ERRNO_H
70228753Smm#include <errno.h>
71228753Smm#endif
72228753Smm#ifdef HAVE_STDLIB_H
73228753Smm#include <stdlib.h>
74228753Smm#endif
75228753Smm#ifdef HAVE_STRING_H
76228753Smm#include <string.h>
77228753Smm#endif
78228753Smm#ifdef HAVE_UNISTD_H
79228753Smm#include <unistd.h>
80228753Smm#endif
81228753Smm
82228753Smm#include "archive.h"
83228753Smm#include "archive_private.h"
84228753Smm#include "archive_read_private.h"
85228753Smm
86228753Smm/*
87228753Smm * Because LZW decompression is pretty simple, I've just implemented
88228753Smm * the whole decompressor here (cribbing from "compress" source code,
89228753Smm * of course), rather than relying on an external library.  I have
90228753Smm * made an effort to clarify and simplify the algorithm, so the
91228753Smm * names and structure here don't exactly match those used by compress.
92228753Smm */
93228753Smm
94228753Smmstruct private_data {
95228753Smm	/* Input variables. */
96228753Smm	const unsigned char	*next_in;
97228753Smm	size_t			 avail_in;
98228753Smm	int			 bit_buffer;
99228753Smm	int			 bits_avail;
100228753Smm	size_t			 bytes_in_section;
101228753Smm
102228753Smm	/* Output variables. */
103228753Smm	size_t			 out_block_size;
104228753Smm	void			*out_block;
105228753Smm
106228753Smm	/* Decompression status variables. */
107228753Smm	int			 use_reset_code;
108228753Smm	int			 end_of_stream;	/* EOF status. */
109228753Smm	int			 maxcode;	/* Largest code. */
110228753Smm	int			 maxcode_bits;	/* Length of largest code. */
111228753Smm	int			 section_end_code; /* When to increase bits. */
112228753Smm	int			 bits;		/* Current code length. */
113228753Smm	int			 oldcode;	/* Previous code. */
114228753Smm	int			 finbyte;	/* Last byte of prev code. */
115228753Smm
116228753Smm	/* Dictionary. */
117228753Smm	int			 free_ent;       /* Next dictionary entry. */
118228753Smm	unsigned char		 suffix[65536];
119228753Smm	uint16_t		 prefix[65536];
120228753Smm
121228753Smm	/*
122228753Smm	 * Scratch area for expanding dictionary entries.  Note:
123228753Smm	 * "worst" case here comes from compressing /dev/zero: the
124228753Smm	 * last code in the dictionary will code a sequence of
125228753Smm	 * 65536-256 zero bytes.  Thus, we need stack space to expand
126228753Smm	 * a 65280-byte dictionary entry.  (Of course, 32640:1
127228753Smm	 * compression could also be considered the "best" case. ;-)
128228753Smm	 */
129228753Smm	unsigned char		*stackp;
130228753Smm	unsigned char		 stack[65300];
131228753Smm};
132228753Smm
133228753Smmstatic int	compress_bidder_bid(struct archive_read_filter_bidder *, struct archive_read_filter *);
134228753Smmstatic int	compress_bidder_init(struct archive_read_filter *);
135228753Smmstatic int	compress_bidder_free(struct archive_read_filter_bidder *);
136228753Smm
137228753Smmstatic ssize_t	compress_filter_read(struct archive_read_filter *, const void **);
138228753Smmstatic int	compress_filter_close(struct archive_read_filter *);
139228753Smm
140228753Smmstatic int	getbits(struct archive_read_filter *, int n);
141228753Smmstatic int	next_code(struct archive_read_filter *);
142228753Smm
143228753Smmint
144228753Smmarchive_read_support_compression_compress(struct archive *_a)
145228753Smm{
146228753Smm	struct archive_read *a = (struct archive_read *)_a;
147228753Smm	struct archive_read_filter_bidder *bidder = __archive_read_get_bidder(a);
148228753Smm
149228753Smm	if (bidder == NULL)
150228753Smm		return (ARCHIVE_FATAL);
151228753Smm
152228753Smm	bidder->data = NULL;
153228753Smm	bidder->bid = compress_bidder_bid;
154228753Smm	bidder->init = compress_bidder_init;
155228753Smm	bidder->options = NULL;
156228753Smm	bidder->free = compress_bidder_free;
157228753Smm	return (ARCHIVE_OK);
158228753Smm}
159228753Smm
160228753Smm/*
161228753Smm * Test whether we can handle this data.
162228753Smm *
163228753Smm * This logic returns zero if any part of the signature fails.  It
164228753Smm * also tries to Do The Right Thing if a very short buffer prevents us
165228753Smm * from verifying as much as we would like.
166228753Smm */
167228753Smmstatic int
168228753Smmcompress_bidder_bid(struct archive_read_filter_bidder *self,
169228753Smm    struct archive_read_filter *filter)
170228753Smm{
171228753Smm	const unsigned char *buffer;
172228753Smm	ssize_t avail;
173228753Smm	int bits_checked;
174228753Smm
175228753Smm	(void)self; /* UNUSED */
176228753Smm
177228753Smm	buffer = __archive_read_filter_ahead(filter, 2, &avail);
178228753Smm
179228753Smm	if (buffer == NULL)
180228753Smm		return (0);
181228753Smm
182228753Smm	bits_checked = 0;
183228753Smm	if (buffer[0] != 037)	/* Verify first ID byte. */
184228753Smm		return (0);
185228753Smm	bits_checked += 8;
186228753Smm
187228753Smm	if (buffer[1] != 0235)	/* Verify second ID byte. */
188228753Smm		return (0);
189228753Smm	bits_checked += 8;
190228753Smm
191228753Smm	/*
192228753Smm	 * TODO: Verify more.
193228753Smm	 */
194228753Smm
195228753Smm	return (bits_checked);
196228753Smm}
197228753Smm
198228753Smm/*
199228753Smm * Setup the callbacks.
200228753Smm */
201228753Smmstatic int
202228753Smmcompress_bidder_init(struct archive_read_filter *self)
203228753Smm{
204228753Smm	struct private_data *state;
205228753Smm	static const size_t out_block_size = 64 * 1024;
206228753Smm	void *out_block;
207228753Smm	int code;
208228753Smm
209228753Smm	self->code = ARCHIVE_COMPRESSION_COMPRESS;
210228753Smm	self->name = "compress (.Z)";
211228753Smm
212228753Smm	state = (struct private_data *)calloc(sizeof(*state), 1);
213228753Smm	out_block = malloc(out_block_size);
214228753Smm	if (state == NULL || out_block == NULL) {
215228753Smm		free(out_block);
216228753Smm		free(state);
217228753Smm		archive_set_error(&self->archive->archive, ENOMEM,
218228753Smm		    "Can't allocate data for %s decompression",
219228753Smm		    self->name);
220228753Smm		return (ARCHIVE_FATAL);
221228753Smm	}
222228753Smm
223228753Smm	self->data = state;
224228753Smm	state->out_block_size = out_block_size;
225228753Smm	state->out_block = out_block;
226228753Smm	self->read = compress_filter_read;
227228753Smm	self->skip = NULL; /* not supported */
228228753Smm	self->close = compress_filter_close;
229228753Smm
230228753Smm	/* XXX MOVE THE FOLLOWING OUT OF INIT() XXX */
231228753Smm
232228753Smm	(void)getbits(self, 8); /* Skip first signature byte. */
233228753Smm	(void)getbits(self, 8); /* Skip second signature byte. */
234228753Smm
235228753Smm	code = getbits(self, 8);
236228753Smm	state->maxcode_bits = code & 0x1f;
237228753Smm	state->maxcode = (1 << state->maxcode_bits);
238228753Smm	state->use_reset_code = code & 0x80;
239228753Smm
240228753Smm	/* Initialize decompressor. */
241228753Smm	state->free_ent = 256;
242228753Smm	state->stackp = state->stack;
243228753Smm	if (state->use_reset_code)
244228753Smm		state->free_ent++;
245228753Smm	state->bits = 9;
246228753Smm	state->section_end_code = (1<<state->bits) - 1;
247228753Smm	state->oldcode = -1;
248228753Smm	for (code = 255; code >= 0; code--) {
249228753Smm		state->prefix[code] = 0;
250228753Smm		state->suffix[code] = code;
251228753Smm	}
252228753Smm	next_code(self);
253228753Smm
254228753Smm	return (ARCHIVE_OK);
255228753Smm}
256228753Smm
257228753Smm/*
258228753Smm * Return a block of data from the decompression buffer.  Decompress more
259228753Smm * as necessary.
260228753Smm */
261228753Smmstatic ssize_t
262228753Smmcompress_filter_read(struct archive_read_filter *self, const void **pblock)
263228753Smm{
264228753Smm	struct private_data *state;
265228753Smm	unsigned char *p, *start, *end;
266228753Smm	int ret;
267228753Smm
268228753Smm	state = (struct private_data *)self->data;
269228753Smm	if (state->end_of_stream) {
270228753Smm		*pblock = NULL;
271228753Smm		return (0);
272228753Smm	}
273228753Smm	p = start = (unsigned char *)state->out_block;
274228753Smm	end = start + state->out_block_size;
275228753Smm
276228753Smm	while (p < end && !state->end_of_stream) {
277228753Smm		if (state->stackp > state->stack) {
278228753Smm			*p++ = *--state->stackp;
279228753Smm		} else {
280228753Smm			ret = next_code(self);
281228753Smm			if (ret == -1)
282228753Smm				state->end_of_stream = ret;
283228753Smm			else if (ret != ARCHIVE_OK)
284228753Smm				return (ret);
285228753Smm		}
286228753Smm	}
287228753Smm
288228753Smm	*pblock = start;
289228753Smm	return (p - start);
290228753Smm}
291228753Smm
292228753Smm/*
293228753Smm * Clean up the reader.
294228753Smm */
295228753Smmstatic int
296228753Smmcompress_bidder_free(struct archive_read_filter_bidder *self)
297228753Smm{
298228753Smm	self->data = NULL;
299228753Smm	return (ARCHIVE_OK);
300228753Smm}
301228753Smm
302228753Smm/*
303228753Smm * Close and release the filter.
304228753Smm */
305228753Smmstatic int
306228753Smmcompress_filter_close(struct archive_read_filter *self)
307228753Smm{
308228753Smm	struct private_data *state = (struct private_data *)self->data;
309228753Smm
310228753Smm	free(state->out_block);
311228753Smm	free(state);
312228753Smm	return (ARCHIVE_OK);
313228753Smm}
314228753Smm
315228753Smm/*
316228753Smm * Process the next code and fill the stack with the expansion
317228753Smm * of the code.  Returns ARCHIVE_FATAL if there is a fatal I/O or
318228753Smm * format error, ARCHIVE_EOF if we hit end of data, ARCHIVE_OK otherwise.
319228753Smm */
320228753Smmstatic int
321228753Smmnext_code(struct archive_read_filter *self)
322228753Smm{
323228753Smm	struct private_data *state = (struct private_data *)self->data;
324228753Smm	int code, newcode;
325228753Smm
326228753Smm	static int debug_buff[1024];
327228753Smm	static unsigned debug_index;
328228753Smm
329228753Smm	code = newcode = getbits(self, state->bits);
330228753Smm	if (code < 0)
331228753Smm		return (code);
332228753Smm
333228753Smm	debug_buff[debug_index++] = code;
334228753Smm	if (debug_index >= sizeof(debug_buff)/sizeof(debug_buff[0]))
335228753Smm		debug_index = 0;
336228753Smm
337228753Smm	/* If it's a reset code, reset the dictionary. */
338228753Smm	if ((code == 256) && state->use_reset_code) {
339228753Smm		/*
340228753Smm		 * The original 'compress' implementation blocked its
341228753Smm		 * I/O in a manner that resulted in junk bytes being
342228753Smm		 * inserted after every reset.  The next section skips
343228753Smm		 * this junk.  (Yes, the number of *bytes* to skip is
344228753Smm		 * a function of the current *bit* length.)
345228753Smm		 */
346228753Smm		int skip_bytes =  state->bits -
347228753Smm		    (state->bytes_in_section % state->bits);
348228753Smm		skip_bytes %= state->bits;
349228753Smm		state->bits_avail = 0; /* Discard rest of this byte. */
350228753Smm		while (skip_bytes-- > 0) {
351228753Smm			code = getbits(self, 8);
352228753Smm			if (code < 0)
353228753Smm				return (code);
354228753Smm		}
355228753Smm		/* Now, actually do the reset. */
356228753Smm		state->bytes_in_section = 0;
357228753Smm		state->bits = 9;
358228753Smm		state->section_end_code = (1 << state->bits) - 1;
359228753Smm		state->free_ent = 257;
360228753Smm		state->oldcode = -1;
361228753Smm		return (next_code(self));
362228753Smm	}
363228753Smm
364228753Smm	if (code > state->free_ent) {
365228753Smm		/* An invalid code is a fatal error. */
366228753Smm		archive_set_error(&(self->archive->archive), -1,
367228753Smm		    "Invalid compressed data");
368228753Smm		return (ARCHIVE_FATAL);
369228753Smm	}
370228753Smm
371228753Smm	/* Special case for KwKwK string. */
372228753Smm	if (code >= state->free_ent) {
373228753Smm		*state->stackp++ = state->finbyte;
374228753Smm		code = state->oldcode;
375228753Smm	}
376228753Smm
377228753Smm	/* Generate output characters in reverse order. */
378228753Smm	while (code >= 256) {
379228753Smm		*state->stackp++ = state->suffix[code];
380228753Smm		code = state->prefix[code];
381228753Smm	}
382228753Smm	*state->stackp++ = state->finbyte = code;
383228753Smm
384228753Smm	/* Generate the new entry. */
385228753Smm	code = state->free_ent;
386228753Smm	if (code < state->maxcode && state->oldcode >= 0) {
387228753Smm		state->prefix[code] = state->oldcode;
388228753Smm		state->suffix[code] = state->finbyte;
389228753Smm		++state->free_ent;
390228753Smm	}
391228753Smm	if (state->free_ent > state->section_end_code) {
392228753Smm		state->bits++;
393228753Smm		state->bytes_in_section = 0;
394228753Smm		if (state->bits == state->maxcode_bits)
395228753Smm			state->section_end_code = state->maxcode;
396228753Smm		else
397228753Smm			state->section_end_code = (1 << state->bits) - 1;
398228753Smm	}
399228753Smm
400228753Smm	/* Remember previous code. */
401228753Smm	state->oldcode = newcode;
402228753Smm	return (ARCHIVE_OK);
403228753Smm}
404228753Smm
405228753Smm/*
406228753Smm * Return next 'n' bits from stream.
407228753Smm *
408228753Smm * -1 indicates end of available data.
409228753Smm */
410228753Smmstatic int
411228753Smmgetbits(struct archive_read_filter *self, int n)
412228753Smm{
413228753Smm	struct private_data *state = (struct private_data *)self->data;
414228753Smm	int code;
415228753Smm	ssize_t ret;
416228753Smm	static const int mask[] = {
417228753Smm		0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff,
418228753Smm		0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff
419228753Smm	};
420228753Smm
421228753Smm	while (state->bits_avail < n) {
422228753Smm		if (state->avail_in <= 0) {
423228753Smm			state->next_in
424228753Smm			    = __archive_read_filter_ahead(self->upstream,
425228753Smm				1, &ret);
426228753Smm			if (ret == 0)
427228753Smm				return (-1);
428228753Smm			if (ret < 0 || state->next_in == NULL)
429228753Smm				return (ARCHIVE_FATAL);
430228753Smm			state->avail_in = ret;
431228753Smm			__archive_read_filter_consume(self->upstream, ret);
432228753Smm		}
433228753Smm		state->bit_buffer |= *state->next_in++ << state->bits_avail;
434228753Smm		state->avail_in--;
435228753Smm		state->bits_avail += 8;
436228753Smm		state->bytes_in_section++;
437228753Smm	}
438228753Smm
439228753Smm	code = state->bit_buffer;
440228753Smm	state->bit_buffer >>= n;
441228753Smm	state->bits_avail -= n;
442228753Smm
443228753Smm	return (code & mask[n]);
444228753Smm}
445