archive_read_support_filter_gzip.c revision 248616
1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "archive_platform.h"
27
28__FBSDID("$FreeBSD$");
29
30
31#ifdef HAVE_ERRNO_H
32#include <errno.h>
33#endif
34#ifdef HAVE_STDLIB_H
35#include <stdlib.h>
36#endif
37#ifdef HAVE_STRING_H
38#include <string.h>
39#endif
40#ifdef HAVE_UNISTD_H
41#include <unistd.h>
42#endif
43#ifdef HAVE_ZLIB_H
44#include <zlib.h>
45#endif
46
47#include "archive.h"
48#include "archive_private.h"
49#include "archive_read_private.h"
50
51#ifdef HAVE_ZLIB_H
52struct private_data {
53	z_stream	 stream;
54	char		 in_stream;
55	unsigned char	*out_block;
56	size_t		 out_block_size;
57	int64_t		 total_out;
58	unsigned long	 crc;
59	char		 eof; /* True = found end of compressed data. */
60};
61
62/* Gzip Filter. */
63static ssize_t	gzip_filter_read(struct archive_read_filter *, const void **);
64static int	gzip_filter_close(struct archive_read_filter *);
65#endif
66
67/*
68 * Note that we can detect gzip archives even if we can't decompress
69 * them.  (In fact, we like detecting them because we can give better
70 * error messages.)  So the bid framework here gets compiled even
71 * if zlib is unavailable.
72 *
73 * TODO: If zlib is unavailable, gzip_bidder_init() should
74 * use the compress_program framework to try to fire up an external
75 * gzip program.
76 */
77static int	gzip_bidder_bid(struct archive_read_filter_bidder *,
78		    struct archive_read_filter *);
79static int	gzip_bidder_init(struct archive_read_filter *);
80
81#if ARCHIVE_VERSION_NUMBER < 4000000
82/* Deprecated; remove in libarchive 4.0 */
83int
84archive_read_support_compression_gzip(struct archive *a)
85{
86	return archive_read_support_filter_gzip(a);
87}
88#endif
89
90int
91archive_read_support_filter_gzip(struct archive *_a)
92{
93	struct archive_read *a = (struct archive_read *)_a;
94	struct archive_read_filter_bidder *bidder;
95
96	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
97	    ARCHIVE_STATE_NEW, "archive_read_support_filter_gzip");
98
99	if (__archive_read_get_bidder(a, &bidder) != ARCHIVE_OK)
100		return (ARCHIVE_FATAL);
101
102	bidder->data = NULL;
103	bidder->name = "gzip";
104	bidder->bid = gzip_bidder_bid;
105	bidder->init = gzip_bidder_init;
106	bidder->options = NULL;
107	bidder->free = NULL; /* No data, so no cleanup necessary. */
108	/* Signal the extent of gzip support with the return value here. */
109#if HAVE_ZLIB_H
110	return (ARCHIVE_OK);
111#else
112	archive_set_error(_a, ARCHIVE_ERRNO_MISC,
113	    "Using external gzip program");
114	return (ARCHIVE_WARN);
115#endif
116}
117
118/*
119 * Read and verify the header.
120 *
121 * Returns zero if the header couldn't be validated, else returns
122 * number of bytes in header.  If pbits is non-NULL, it receives a
123 * count of bits verified, suitable for use by bidder.
124 */
125static ssize_t
126peek_at_header(struct archive_read_filter *filter, int *pbits)
127{
128	const unsigned char *p;
129	ssize_t avail, len;
130	int bits = 0;
131	int header_flags;
132
133	/* Start by looking at the first ten bytes of the header, which
134	 * is all fixed layout. */
135	len = 10;
136	p = __archive_read_filter_ahead(filter, len, &avail);
137	if (p == NULL || avail == 0)
138		return (0);
139	/* We only support deflation- third byte must be 0x08. */
140	if (memcmp(p, "\x1F\x8B\x08", 3) != 0)
141		return (0);
142	bits += 24;
143	if ((p[3] & 0xE0)!= 0)	/* No reserved flags set. */
144		return (0);
145	bits += 3;
146	header_flags = p[3];
147	/* Bytes 4-7 are mod time. */
148	/* Byte 8 is deflate flags. */
149	/* XXXX TODO: return deflate flags back to consume_header for use
150	   in initializing the decompressor. */
151	/* Byte 9 is OS. */
152
153	/* Optional extra data:  2 byte length plus variable body. */
154	if (header_flags & 4) {
155		p = __archive_read_filter_ahead(filter, len + 2, &avail);
156		if (p == NULL)
157			return (0);
158		len += ((int)p[len + 1] << 8) | (int)p[len];
159		len += 2;
160	}
161
162	/* Null-terminated optional filename. */
163	if (header_flags & 8) {
164		do {
165			++len;
166			if (avail < len)
167				p = __archive_read_filter_ahead(filter,
168				    len, &avail);
169			if (p == NULL)
170				return (0);
171		} while (p[len - 1] != 0);
172	}
173
174	/* Null-terminated optional comment. */
175	if (header_flags & 16) {
176		do {
177			++len;
178			if (avail < len)
179				p = __archive_read_filter_ahead(filter,
180				    len, &avail);
181			if (p == NULL)
182				return (0);
183		} while (p[len - 1] != 0);
184	}
185
186	/* Optional header CRC */
187	if ((header_flags & 2)) {
188		p = __archive_read_filter_ahead(filter, len + 2, &avail);
189		if (p == NULL)
190			return (0);
191#if 0
192	int hcrc = ((int)p[len + 1] << 8) | (int)p[len];
193	int crc = /* XXX TODO: Compute header CRC. */;
194	if (crc != hcrc)
195		return (0);
196	bits += 16;
197#endif
198		len += 2;
199	}
200
201	if (pbits != NULL)
202		*pbits = bits;
203	return (len);
204}
205
206/*
207 * Bidder just verifies the header and returns the number of verified bits.
208 */
209static int
210gzip_bidder_bid(struct archive_read_filter_bidder *self,
211    struct archive_read_filter *filter)
212{
213	int bits_checked;
214
215	(void)self; /* UNUSED */
216
217	if (peek_at_header(filter, &bits_checked))
218		return (bits_checked);
219	return (0);
220}
221
222
223#ifndef HAVE_ZLIB_H
224
225/*
226 * If we don't have the library on this system, we can't do the
227 * decompression directly.  We can, however, try to run "gzip -d"
228 * in case that's available.
229 */
230static int
231gzip_bidder_init(struct archive_read_filter *self)
232{
233	int r;
234
235	r = __archive_read_program(self, "gzip -d");
236	/* Note: We set the format here even if __archive_read_program()
237	 * above fails.  We do, after all, know what the format is
238	 * even if we weren't able to read it. */
239	self->code = ARCHIVE_FILTER_GZIP;
240	self->name = "gzip";
241	return (r);
242}
243
244#else
245
246/*
247 * Initialize the filter object.
248 */
249static int
250gzip_bidder_init(struct archive_read_filter *self)
251{
252	struct private_data *state;
253	static const size_t out_block_size = 64 * 1024;
254	void *out_block;
255
256	self->code = ARCHIVE_FILTER_GZIP;
257	self->name = "gzip";
258
259	state = (struct private_data *)calloc(sizeof(*state), 1);
260	out_block = (unsigned char *)malloc(out_block_size);
261	if (state == NULL || out_block == NULL) {
262		free(out_block);
263		free(state);
264		archive_set_error(&self->archive->archive, ENOMEM,
265		    "Can't allocate data for gzip decompression");
266		return (ARCHIVE_FATAL);
267	}
268
269	self->data = state;
270	state->out_block_size = out_block_size;
271	state->out_block = out_block;
272	self->read = gzip_filter_read;
273	self->skip = NULL; /* not supported */
274	self->close = gzip_filter_close;
275
276	state->in_stream = 0; /* We're not actually within a stream yet. */
277
278	return (ARCHIVE_OK);
279}
280
281static int
282consume_header(struct archive_read_filter *self)
283{
284	struct private_data *state;
285	ssize_t avail;
286	size_t len;
287	int ret;
288
289	state = (struct private_data *)self->data;
290
291	/* If this is a real header, consume it. */
292	len = peek_at_header(self->upstream, NULL);
293	if (len == 0)
294		return (ARCHIVE_EOF);
295	__archive_read_filter_consume(self->upstream, len);
296
297	/* Initialize CRC accumulator. */
298	state->crc = crc32(0L, NULL, 0);
299
300	/* Initialize compression library. */
301	state->stream.next_in = (unsigned char *)(uintptr_t)
302	    __archive_read_filter_ahead(self->upstream, 1, &avail);
303	state->stream.avail_in = (uInt)avail;
304	ret = inflateInit2(&(state->stream),
305	    -15 /* Don't check for zlib header */);
306
307	/* Decipher the error code. */
308	switch (ret) {
309	case Z_OK:
310		state->in_stream = 1;
311		return (ARCHIVE_OK);
312	case Z_STREAM_ERROR:
313		archive_set_error(&self->archive->archive,
314		    ARCHIVE_ERRNO_MISC,
315		    "Internal error initializing compression library: "
316		    "invalid setup parameter");
317		break;
318	case Z_MEM_ERROR:
319		archive_set_error(&self->archive->archive, ENOMEM,
320		    "Internal error initializing compression library: "
321		    "out of memory");
322		break;
323	case Z_VERSION_ERROR:
324		archive_set_error(&self->archive->archive,
325		    ARCHIVE_ERRNO_MISC,
326		    "Internal error initializing compression library: "
327		    "invalid library version");
328		break;
329	default:
330		archive_set_error(&self->archive->archive,
331		    ARCHIVE_ERRNO_MISC,
332		    "Internal error initializing compression library: "
333		    " Zlib error %d", ret);
334		break;
335	}
336	return (ARCHIVE_FATAL);
337}
338
339static int
340consume_trailer(struct archive_read_filter *self)
341{
342	struct private_data *state;
343	const unsigned char *p;
344	ssize_t avail;
345
346	state = (struct private_data *)self->data;
347
348	state->in_stream = 0;
349	switch (inflateEnd(&(state->stream))) {
350	case Z_OK:
351		break;
352	default:
353		archive_set_error(&self->archive->archive,
354		    ARCHIVE_ERRNO_MISC,
355		    "Failed to clean up gzip decompressor");
356		return (ARCHIVE_FATAL);
357	}
358
359	/* GZip trailer is a fixed 8 byte structure. */
360	p = __archive_read_filter_ahead(self->upstream, 8, &avail);
361	if (p == NULL || avail == 0)
362		return (ARCHIVE_FATAL);
363
364	/* XXX TODO: Verify the length and CRC. */
365
366	/* We've verified the trailer, so consume it now. */
367	__archive_read_filter_consume(self->upstream, 8);
368
369	return (ARCHIVE_OK);
370}
371
372static ssize_t
373gzip_filter_read(struct archive_read_filter *self, const void **p)
374{
375	struct private_data *state;
376	size_t decompressed;
377	ssize_t avail_in;
378	int ret;
379
380	state = (struct private_data *)self->data;
381
382	/* Empty our output buffer. */
383	state->stream.next_out = state->out_block;
384	state->stream.avail_out = (uInt)state->out_block_size;
385
386	/* Try to fill the output buffer. */
387	while (state->stream.avail_out > 0 && !state->eof) {
388		/* If we're not in a stream, read a header
389		 * and initialize the decompression library. */
390		if (!state->in_stream) {
391			ret = consume_header(self);
392			if (ret == ARCHIVE_EOF) {
393				state->eof = 1;
394				break;
395			}
396			if (ret < ARCHIVE_OK)
397				return (ret);
398		}
399
400		/* Peek at the next available data. */
401		/* ZLib treats stream.next_in as const but doesn't declare
402		 * it so, hence this ugly cast. */
403		state->stream.next_in = (unsigned char *)(uintptr_t)
404		    __archive_read_filter_ahead(self->upstream, 1, &avail_in);
405		if (state->stream.next_in == NULL) {
406			archive_set_error(&self->archive->archive,
407			    ARCHIVE_ERRNO_MISC,
408			    "truncated gzip input");
409			return (ARCHIVE_FATAL);
410		}
411		state->stream.avail_in = (uInt)avail_in;
412
413		/* Decompress and consume some of that data. */
414		ret = inflate(&(state->stream), 0);
415		switch (ret) {
416		case Z_OK: /* Decompressor made some progress. */
417			__archive_read_filter_consume(self->upstream,
418			    avail_in - state->stream.avail_in);
419			break;
420		case Z_STREAM_END: /* Found end of stream. */
421			__archive_read_filter_consume(self->upstream,
422			    avail_in - state->stream.avail_in);
423			/* Consume the stream trailer; release the
424			 * decompression library. */
425			ret = consume_trailer(self);
426			if (ret < ARCHIVE_OK)
427				return (ret);
428			break;
429		default:
430			/* Return an error. */
431			archive_set_error(&self->archive->archive,
432			    ARCHIVE_ERRNO_MISC,
433			    "gzip decompression failed");
434			return (ARCHIVE_FATAL);
435		}
436	}
437
438	/* We've read as much as we can. */
439	decompressed = state->stream.next_out - state->out_block;
440	state->total_out += decompressed;
441	if (decompressed == 0)
442		*p = NULL;
443	else
444		*p = state->out_block;
445	return (decompressed);
446}
447
448/*
449 * Clean up the decompressor.
450 */
451static int
452gzip_filter_close(struct archive_read_filter *self)
453{
454	struct private_data *state;
455	int ret;
456
457	state = (struct private_data *)self->data;
458	ret = ARCHIVE_OK;
459
460	if (state->in_stream) {
461		switch (inflateEnd(&(state->stream))) {
462		case Z_OK:
463			break;
464		default:
465			archive_set_error(&(self->archive->archive),
466			    ARCHIVE_ERRNO_MISC,
467			    "Failed to clean up gzip compressor");
468			ret = ARCHIVE_FATAL;
469		}
470	}
471
472	free(state->out_block);
473	free(state);
474	return (ret);
475}
476
477#endif /* HAVE_ZLIB_H */
478