1228753Smm/*-
2228753Smm * Copyright (c) 2003-2007 Tim Kientzle
3228753Smm * All rights reserved.
4228753Smm *
5228753Smm * Redistribution and use in source and binary forms, with or without
6228753Smm * modification, are permitted provided that the following conditions
7228753Smm * are met:
8228753Smm * 1. Redistributions of source code must retain the above copyright
9228753Smm *    notice, this list of conditions and the following disclaimer.
10228753Smm * 2. Redistributions in binary form must reproduce the above copyright
11228753Smm *    notice, this list of conditions and the following disclaimer in the
12228753Smm *    documentation and/or other materials provided with the distribution.
13228753Smm *
14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24228753Smm */
25228753Smm
26228753Smm#include "archive_platform.h"
27228753Smm
28229592Smm__FBSDID("$FreeBSD$");
29228753Smm
30228753Smm#ifdef HAVE_ERRNO_H
31228753Smm#include <errno.h>
32228753Smm#endif
33228753Smm#include <stdio.h>
34228753Smm#ifdef HAVE_STDLIB_H
35228753Smm#include <stdlib.h>
36228753Smm#endif
37228753Smm#ifdef HAVE_STRING_H
38228753Smm#include <string.h>
39228753Smm#endif
40228753Smm#ifdef HAVE_UNISTD_H
41228753Smm#include <unistd.h>
42228753Smm#endif
43228753Smm#ifdef HAVE_BZLIB_H
44228753Smm#include <bzlib.h>
45228753Smm#endif
46228753Smm
47228753Smm#include "archive.h"
48228753Smm#include "archive_private.h"
49228753Smm#include "archive_read_private.h"
50228753Smm
51228753Smm#if defined(HAVE_BZLIB_H) && defined(BZ_CONFIG_ERROR)
52228753Smmstruct private_data {
53228753Smm	bz_stream	 stream;
54228753Smm	char		*out_block;
55228753Smm	size_t		 out_block_size;
56228753Smm	char		 valid; /* True = decompressor is initialized */
57228753Smm	char		 eof; /* True = found end of compressed data. */
58228753Smm};
59228753Smm
60228753Smm/* Bzip2 filter */
61228753Smmstatic ssize_t	bzip2_filter_read(struct archive_read_filter *, const void **);
62228753Smmstatic int	bzip2_filter_close(struct archive_read_filter *);
63228753Smm#endif
64228753Smm
65228753Smm/*
66228753Smm * Note that we can detect bzip2 archives even if we can't decompress
67228753Smm * them.  (In fact, we like detecting them because we can give better
68228753Smm * error messages.)  So the bid framework here gets compiled even
69228753Smm * if bzlib is unavailable.
70228753Smm */
71228753Smmstatic int	bzip2_reader_bid(struct archive_read_filter_bidder *, struct archive_read_filter *);
72228753Smmstatic int	bzip2_reader_init(struct archive_read_filter *);
73228753Smmstatic int	bzip2_reader_free(struct archive_read_filter_bidder *);
74228753Smm
75228753Smmint
76228753Smmarchive_read_support_compression_bzip2(struct archive *_a)
77228753Smm{
78228753Smm	struct archive_read *a = (struct archive_read *)_a;
79228753Smm	struct archive_read_filter_bidder *reader = __archive_read_get_bidder(a);
80228753Smm
81228753Smm	if (reader == NULL)
82228753Smm		return (ARCHIVE_FATAL);
83228753Smm
84228753Smm	reader->data = NULL;
85228753Smm	reader->bid = bzip2_reader_bid;
86228753Smm	reader->init = bzip2_reader_init;
87228753Smm	reader->options = NULL;
88228753Smm	reader->free = bzip2_reader_free;
89228753Smm#if defined(HAVE_BZLIB_H) && defined(BZ_CONFIG_ERROR)
90228753Smm	return (ARCHIVE_OK);
91228753Smm#else
92228753Smm	archive_set_error(_a, ARCHIVE_ERRNO_MISC,
93228753Smm	    "Using external bunzip2 program");
94228753Smm	return (ARCHIVE_WARN);
95228753Smm#endif
96228753Smm}
97228753Smm
98228753Smmstatic int
99228753Smmbzip2_reader_free(struct archive_read_filter_bidder *self){
100228753Smm	(void)self; /* UNUSED */
101228753Smm	return (ARCHIVE_OK);
102228753Smm}
103228753Smm
104228753Smm/*
105228753Smm * Test whether we can handle this data.
106228753Smm *
107228753Smm * This logic returns zero if any part of the signature fails.  It
108228753Smm * also tries to Do The Right Thing if a very short buffer prevents us
109228753Smm * from verifying as much as we would like.
110228753Smm */
111228753Smmstatic int
112228753Smmbzip2_reader_bid(struct archive_read_filter_bidder *self, struct archive_read_filter *filter)
113228753Smm{
114228753Smm	const unsigned char *buffer;
115228753Smm	ssize_t avail;
116228753Smm	int bits_checked;
117228753Smm
118228753Smm	(void)self; /* UNUSED */
119228753Smm
120228753Smm	/* Minimal bzip2 archive is 14 bytes. */
121228753Smm	buffer = __archive_read_filter_ahead(filter, 14, &avail);
122228753Smm	if (buffer == NULL)
123228753Smm		return (0);
124228753Smm
125228753Smm	/* First three bytes must be "BZh" */
126228753Smm	bits_checked = 0;
127228753Smm	if (buffer[0] != 'B' || buffer[1] != 'Z' || buffer[2] != 'h')
128228753Smm		return (0);
129228753Smm	bits_checked += 24;
130228753Smm
131228753Smm	/* Next follows a compression flag which must be an ASCII digit. */
132228753Smm	if (buffer[3] < '1' || buffer[3] > '9')
133228753Smm		return (0);
134228753Smm	bits_checked += 5;
135228753Smm
136228753Smm	/* After BZh[1-9], there must be either a data block
137228753Smm	 * which begins with 0x314159265359 or an end-of-data
138228753Smm	 * marker of 0x177245385090. */
139228753Smm	if (memcmp(buffer + 4, "\x31\x41\x59\x26\x53\x59", 6) == 0)
140228753Smm		bits_checked += 48;
141228753Smm	else if (memcmp(buffer + 4, "\x17\x72\x45\x38\x50\x90", 6) == 0)
142228753Smm		bits_checked += 48;
143228753Smm	else
144228753Smm		return (0);
145228753Smm
146228753Smm	return (bits_checked);
147228753Smm}
148228753Smm
149228753Smm#if !defined(HAVE_BZLIB_H) || !defined(BZ_CONFIG_ERROR)
150228753Smm
151228753Smm/*
152228753Smm * If we don't have the library on this system, we can't actually do the
153228753Smm * decompression.  We can, however, still detect compressed archives
154228753Smm * and emit a useful message.
155228753Smm */
156228753Smmstatic int
157228753Smmbzip2_reader_init(struct archive_read_filter *self)
158228753Smm{
159228753Smm	int r;
160228753Smm
161228753Smm	r = __archive_read_program(self, "bunzip2");
162228753Smm	/* Note: We set the format here even if __archive_read_program()
163228753Smm	 * above fails.  We do, after all, know what the format is
164228753Smm	 * even if we weren't able to read it. */
165228753Smm	self->code = ARCHIVE_COMPRESSION_BZIP2;
166228753Smm	self->name = "bzip2";
167228753Smm	return (r);
168228753Smm}
169228753Smm
170228753Smm
171228753Smm#else
172228753Smm
173228753Smm/*
174228753Smm * Setup the callbacks.
175228753Smm */
176228753Smmstatic int
177228753Smmbzip2_reader_init(struct archive_read_filter *self)
178228753Smm{
179228753Smm	static const size_t out_block_size = 64 * 1024;
180228753Smm	void *out_block;
181228753Smm	struct private_data *state;
182228753Smm
183228753Smm	self->code = ARCHIVE_COMPRESSION_BZIP2;
184228753Smm	self->name = "bzip2";
185228753Smm
186228753Smm	state = (struct private_data *)calloc(sizeof(*state), 1);
187228753Smm	out_block = (unsigned char *)malloc(out_block_size);
188228753Smm	if (self == NULL || state == NULL || out_block == NULL) {
189228753Smm		archive_set_error(&self->archive->archive, ENOMEM,
190228753Smm		    "Can't allocate data for bzip2 decompression");
191228753Smm		free(out_block);
192228753Smm		free(state);
193228753Smm		return (ARCHIVE_FATAL);
194228753Smm	}
195228753Smm
196228753Smm	self->data = state;
197228753Smm	state->out_block_size = out_block_size;
198228753Smm	state->out_block = out_block;
199228753Smm	self->read = bzip2_filter_read;
200228753Smm	self->skip = NULL; /* not supported */
201228753Smm	self->close = bzip2_filter_close;
202228753Smm
203228753Smm	return (ARCHIVE_OK);
204228753Smm}
205228753Smm
206228753Smm/*
207228753Smm * Return the next block of decompressed data.
208228753Smm */
209228753Smmstatic ssize_t
210228753Smmbzip2_filter_read(struct archive_read_filter *self, const void **p)
211228753Smm{
212228753Smm	struct private_data *state;
213228753Smm	size_t decompressed;
214228753Smm	const char *read_buf;
215228753Smm	ssize_t ret;
216228753Smm
217228753Smm	state = (struct private_data *)self->data;
218228753Smm
219228753Smm	if (state->eof) {
220228753Smm		*p = NULL;
221228753Smm		return (0);
222228753Smm	}
223228753Smm
224228753Smm	/* Empty our output buffer. */
225228753Smm	state->stream.next_out = state->out_block;
226228753Smm	state->stream.avail_out = state->out_block_size;
227228753Smm
228228753Smm	/* Try to fill the output buffer. */
229228753Smm	for (;;) {
230228753Smm		if (!state->valid) {
231228753Smm			if (bzip2_reader_bid(self->bidder, self->upstream) == 0) {
232228753Smm				state->eof = 1;
233228753Smm				*p = state->out_block;
234228753Smm				decompressed = state->stream.next_out
235228753Smm				    - state->out_block;
236228753Smm				return (decompressed);
237228753Smm			}
238228753Smm			/* Initialize compression library. */
239228753Smm			ret = BZ2_bzDecompressInit(&(state->stream),
240228753Smm					   0 /* library verbosity */,
241228753Smm					   0 /* don't use low-mem algorithm */);
242228753Smm
243228753Smm			/* If init fails, try low-memory algorithm instead. */
244228753Smm			if (ret == BZ_MEM_ERROR)
245228753Smm				ret = BZ2_bzDecompressInit(&(state->stream),
246228753Smm					   0 /* library verbosity */,
247228753Smm					   1 /* do use low-mem algo */);
248228753Smm
249228753Smm			if (ret != BZ_OK) {
250228753Smm				const char *detail = NULL;
251228753Smm				int err = ARCHIVE_ERRNO_MISC;
252228753Smm				switch (ret) {
253228753Smm				case BZ_PARAM_ERROR:
254228753Smm					detail = "invalid setup parameter";
255228753Smm					break;
256228753Smm				case BZ_MEM_ERROR:
257228753Smm					err = ENOMEM;
258228753Smm					detail = "out of memory";
259228753Smm					break;
260228753Smm				case BZ_CONFIG_ERROR:
261228753Smm					detail = "mis-compiled library";
262228753Smm					break;
263228753Smm				}
264228753Smm				archive_set_error(&self->archive->archive, err,
265228753Smm				    "Internal error initializing decompressor%s%s",
266228753Smm				    detail == NULL ? "" : ": ",
267228753Smm				    detail);
268228753Smm				return (ARCHIVE_FATAL);
269228753Smm			}
270228753Smm			state->valid = 1;
271228753Smm		}
272228753Smm
273228753Smm		/* stream.next_in is really const, but bzlib
274228753Smm		 * doesn't declare it so. <sigh> */
275228753Smm		read_buf =
276228753Smm		    __archive_read_filter_ahead(self->upstream, 1, &ret);
277228753Smm		if (read_buf == NULL)
278228753Smm			return (ARCHIVE_FATAL);
279228753Smm		state->stream.next_in = (char *)(uintptr_t)read_buf;
280228753Smm		state->stream.avail_in = ret;
281228753Smm		/* There is no more data, return whatever we have. */
282228753Smm		if (ret == 0) {
283228753Smm			state->eof = 1;
284228753Smm			*p = state->out_block;
285228753Smm			decompressed = state->stream.next_out
286228753Smm			    - state->out_block;
287228753Smm			return (decompressed);
288228753Smm		}
289228753Smm
290228753Smm		/* Decompress as much as we can in one pass. */
291228753Smm		ret = BZ2_bzDecompress(&(state->stream));
292228753Smm		__archive_read_filter_consume(self->upstream,
293228753Smm		    state->stream.next_in - read_buf);
294228753Smm
295228753Smm		switch (ret) {
296228753Smm		case BZ_STREAM_END: /* Found end of stream. */
297228753Smm			switch (BZ2_bzDecompressEnd(&(state->stream))) {
298228753Smm			case BZ_OK:
299228753Smm				break;
300228753Smm			default:
301228753Smm				archive_set_error(&(self->archive->archive),
302228753Smm					  ARCHIVE_ERRNO_MISC,
303228753Smm					  "Failed to clean up decompressor");
304228753Smm				return (ARCHIVE_FATAL);
305228753Smm			}
306228753Smm			state->valid = 0;
307228753Smm			/* FALLTHROUGH */
308228753Smm		case BZ_OK: /* Decompressor made some progress. */
309228753Smm			/* If we filled our buffer, update stats and return. */
310228753Smm			if (state->stream.avail_out == 0) {
311228753Smm				*p = state->out_block;
312228753Smm				decompressed = state->stream.next_out
313228753Smm				    - state->out_block;
314228753Smm				return (decompressed);
315228753Smm			}
316228753Smm			break;
317228753Smm		default: /* Return an error. */
318228753Smm			archive_set_error(&self->archive->archive,
319228753Smm			    ARCHIVE_ERRNO_MISC, "bzip decompression failed");
320228753Smm			return (ARCHIVE_FATAL);
321228753Smm		}
322228753Smm	}
323228753Smm}
324228753Smm
325228753Smm/*
326228753Smm * Clean up the decompressor.
327228753Smm */
328228753Smmstatic int
329228753Smmbzip2_filter_close(struct archive_read_filter *self)
330228753Smm{
331228753Smm	struct private_data *state;
332228753Smm	int ret = ARCHIVE_OK;
333228753Smm
334228753Smm	state = (struct private_data *)self->data;
335228753Smm
336228753Smm	if (state->valid) {
337228753Smm		switch (BZ2_bzDecompressEnd(&state->stream)) {
338228753Smm		case BZ_OK:
339228753Smm			break;
340228753Smm		default:
341228753Smm			archive_set_error(&self->archive->archive,
342228753Smm					  ARCHIVE_ERRNO_MISC,
343228753Smm					  "Failed to clean up decompressor");
344228753Smm			ret = ARCHIVE_FATAL;
345228753Smm		}
346228753Smm	}
347228753Smm
348228753Smm	free(state->out_block);
349228753Smm	free(state);
350228753Smm	return (ret);
351228753Smm}
352228753Smm
353228753Smm#endif /* HAVE_BZLIB_H && BZ_CONFIG_ERROR */
354