archive_read_support_filter_gzip.c revision 348607
1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "archive_platform.h"
27
28__FBSDID("$FreeBSD$");
29
30
31#ifdef HAVE_ERRNO_H
32#include <errno.h>
33#endif
34#ifdef HAVE_STDLIB_H
35#include <stdlib.h>
36#endif
37#ifdef HAVE_STRING_H
38#include <string.h>
39#endif
40#ifdef HAVE_LIMITS_H
41#include <limits.h>
42#endif
43#ifdef HAVE_UNISTD_H
44#include <unistd.h>
45#endif
46#ifdef HAVE_ZLIB_H
47#include <zlib.h>
48#endif
49
50#include "archive.h"
51#include "archive_entry.h"
52#include "archive_endian.h"
53#include "archive_private.h"
54#include "archive_read_private.h"
55
56#ifdef HAVE_ZLIB_H
57struct private_data {
58	z_stream	 stream;
59	char		 in_stream;
60	unsigned char	*out_block;
61	size_t		 out_block_size;
62	int64_t		 total_out;
63	unsigned long	 crc;
64	uint32_t	 mtime;
65	char		*name;
66	char		 eof; /* True = found end of compressed data. */
67};
68
69/* Gzip Filter. */
70static ssize_t	gzip_filter_read(struct archive_read_filter *, const void **);
71static int	gzip_filter_close(struct archive_read_filter *);
72#endif
73
74/*
75 * Note that we can detect gzip archives even if we can't decompress
76 * them.  (In fact, we like detecting them because we can give better
77 * error messages.)  So the bid framework here gets compiled even
78 * if zlib is unavailable.
79 *
80 * TODO: If zlib is unavailable, gzip_bidder_init() should
81 * use the compress_program framework to try to fire up an external
82 * gzip program.
83 */
84static int	gzip_bidder_bid(struct archive_read_filter_bidder *,
85		    struct archive_read_filter *);
86static int	gzip_bidder_init(struct archive_read_filter *);
87
88#if ARCHIVE_VERSION_NUMBER < 4000000
89/* Deprecated; remove in libarchive 4.0 */
90int
91archive_read_support_compression_gzip(struct archive *a)
92{
93	return archive_read_support_filter_gzip(a);
94}
95#endif
96
97int
98archive_read_support_filter_gzip(struct archive *_a)
99{
100	struct archive_read *a = (struct archive_read *)_a;
101	struct archive_read_filter_bidder *bidder;
102
103	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
104	    ARCHIVE_STATE_NEW, "archive_read_support_filter_gzip");
105
106	if (__archive_read_get_bidder(a, &bidder) != ARCHIVE_OK)
107		return (ARCHIVE_FATAL);
108
109	bidder->data = NULL;
110	bidder->name = "gzip";
111	bidder->bid = gzip_bidder_bid;
112	bidder->init = gzip_bidder_init;
113	bidder->options = NULL;
114	bidder->free = NULL; /* No data, so no cleanup necessary. */
115	/* Signal the extent of gzip support with the return value here. */
116#if HAVE_ZLIB_H
117	return (ARCHIVE_OK);
118#else
119	archive_set_error(_a, ARCHIVE_ERRNO_MISC,
120	    "Using external gzip program");
121	return (ARCHIVE_WARN);
122#endif
123}
124
125/*
126 * Read and verify the header.
127 *
128 * Returns zero if the header couldn't be validated, else returns
129 * number of bytes in header.  If pbits is non-NULL, it receives a
130 * count of bits verified, suitable for use by bidder.
131 */
132static ssize_t
133peek_at_header(struct archive_read_filter *filter, int *pbits,
134	       struct private_data *state)
135{
136	const unsigned char *p;
137	ssize_t avail, len;
138	int bits = 0;
139	int header_flags;
140
141	/* Start by looking at the first ten bytes of the header, which
142	 * is all fixed layout. */
143	len = 10;
144	p = __archive_read_filter_ahead(filter, len, &avail);
145	if (p == NULL || avail == 0)
146		return (0);
147	/* We only support deflation- third byte must be 0x08. */
148	if (memcmp(p, "\x1F\x8B\x08", 3) != 0)
149		return (0);
150	bits += 24;
151	if ((p[3] & 0xE0)!= 0)	/* No reserved flags set. */
152		return (0);
153	bits += 3;
154	header_flags = p[3];
155	/* Bytes 4-7 are mod time in little endian. */
156	if (state)
157		state->mtime = archive_le32dec(p + 4);
158	/* Byte 8 is deflate flags. */
159	/* XXXX TODO: return deflate flags back to consume_header for use
160	   in initializing the decompressor. */
161	/* Byte 9 is OS. */
162
163	/* Optional extra data:  2 byte length plus variable body. */
164	if (header_flags & 4) {
165		p = __archive_read_filter_ahead(filter, len + 2, &avail);
166		if (p == NULL)
167			return (0);
168		len += ((int)p[len + 1] << 8) | (int)p[len];
169		len += 2;
170	}
171
172	/* Null-terminated optional filename. */
173	if (header_flags & 8) {
174		ssize_t file_start = len;
175		do {
176			++len;
177			if (avail < len)
178				p = __archive_read_filter_ahead(filter,
179				    len, &avail);
180			if (p == NULL)
181				return (0);
182		} while (p[len - 1] != 0);
183
184		if (state) {
185			/* Reset the name in case of repeat header reads. */
186			free(state->name);
187			state->name = strdup((const char *)&p[file_start]);
188		}
189	}
190
191	/* Null-terminated optional comment. */
192	if (header_flags & 16) {
193		do {
194			++len;
195			if (avail < len)
196				p = __archive_read_filter_ahead(filter,
197				    len, &avail);
198			if (p == NULL)
199				return (0);
200		} while (p[len - 1] != 0);
201	}
202
203	/* Optional header CRC */
204	if ((header_flags & 2)) {
205		p = __archive_read_filter_ahead(filter, len + 2, &avail);
206		if (p == NULL)
207			return (0);
208#if 0
209	int hcrc = ((int)p[len + 1] << 8) | (int)p[len];
210	int crc = /* XXX TODO: Compute header CRC. */;
211	if (crc != hcrc)
212		return (0);
213	bits += 16;
214#endif
215		len += 2;
216	}
217
218	if (pbits != NULL)
219		*pbits = bits;
220	return (len);
221}
222
223/*
224 * Bidder just verifies the header and returns the number of verified bits.
225 */
226static int
227gzip_bidder_bid(struct archive_read_filter_bidder *self,
228    struct archive_read_filter *filter)
229{
230	int bits_checked;
231
232	(void)self; /* UNUSED */
233
234	if (peek_at_header(filter, &bits_checked, NULL))
235		return (bits_checked);
236	return (0);
237}
238
239static int
240gzip_read_header(struct archive_read_filter *self, struct archive_entry *entry)
241{
242	struct private_data *state;
243
244	state = (struct private_data *)self->data;
245
246	/* A mtime of 0 is considered invalid/missing. */
247	if (state->mtime != 0)
248		archive_entry_set_mtime(entry, state->mtime, 0);
249
250	/* If the name is available, extract it. */
251	if (state->name)
252		archive_entry_set_pathname(entry, state->name);
253
254	return (ARCHIVE_OK);
255}
256
257#ifndef HAVE_ZLIB_H
258
259/*
260 * If we don't have the library on this system, we can't do the
261 * decompression directly.  We can, however, try to run "gzip -d"
262 * in case that's available.
263 */
264static int
265gzip_bidder_init(struct archive_read_filter *self)
266{
267	int r;
268
269	r = __archive_read_program(self, "gzip -d");
270	/* Note: We set the format here even if __archive_read_program()
271	 * above fails.  We do, after all, know what the format is
272	 * even if we weren't able to read it. */
273	self->code = ARCHIVE_FILTER_GZIP;
274	self->name = "gzip";
275	return (r);
276}
277
278#else
279
280/*
281 * Initialize the filter object.
282 */
283static int
284gzip_bidder_init(struct archive_read_filter *self)
285{
286	struct private_data *state;
287	static const size_t out_block_size = 64 * 1024;
288	void *out_block;
289
290	self->code = ARCHIVE_FILTER_GZIP;
291	self->name = "gzip";
292
293	state = (struct private_data *)calloc(sizeof(*state), 1);
294	out_block = (unsigned char *)malloc(out_block_size);
295	if (state == NULL || out_block == NULL) {
296		free(out_block);
297		free(state);
298		archive_set_error(&self->archive->archive, ENOMEM,
299		    "Can't allocate data for gzip decompression");
300		return (ARCHIVE_FATAL);
301	}
302
303	self->data = state;
304	state->out_block_size = out_block_size;
305	state->out_block = out_block;
306	self->read = gzip_filter_read;
307	self->skip = NULL; /* not supported */
308	self->close = gzip_filter_close;
309	self->read_header = gzip_read_header;
310
311	state->in_stream = 0; /* We're not actually within a stream yet. */
312
313	return (ARCHIVE_OK);
314}
315
316static int
317consume_header(struct archive_read_filter *self)
318{
319	struct private_data *state;
320	ssize_t avail;
321	size_t len;
322	int ret;
323
324	state = (struct private_data *)self->data;
325
326	/* If this is a real header, consume it. */
327	len = peek_at_header(self->upstream, NULL, state);
328	if (len == 0)
329		return (ARCHIVE_EOF);
330	__archive_read_filter_consume(self->upstream, len);
331
332	/* Initialize CRC accumulator. */
333	state->crc = crc32(0L, NULL, 0);
334
335	/* Initialize compression library. */
336	state->stream.next_in = (unsigned char *)(uintptr_t)
337	    __archive_read_filter_ahead(self->upstream, 1, &avail);
338	state->stream.avail_in = (uInt)avail;
339	ret = inflateInit2(&(state->stream),
340	    -15 /* Don't check for zlib header */);
341
342	/* Decipher the error code. */
343	switch (ret) {
344	case Z_OK:
345		state->in_stream = 1;
346		return (ARCHIVE_OK);
347	case Z_STREAM_ERROR:
348		archive_set_error(&self->archive->archive,
349		    ARCHIVE_ERRNO_MISC,
350		    "Internal error initializing compression library: "
351		    "invalid setup parameter");
352		break;
353	case Z_MEM_ERROR:
354		archive_set_error(&self->archive->archive, ENOMEM,
355		    "Internal error initializing compression library: "
356		    "out of memory");
357		break;
358	case Z_VERSION_ERROR:
359		archive_set_error(&self->archive->archive,
360		    ARCHIVE_ERRNO_MISC,
361		    "Internal error initializing compression library: "
362		    "invalid library version");
363		break;
364	default:
365		archive_set_error(&self->archive->archive,
366		    ARCHIVE_ERRNO_MISC,
367		    "Internal error initializing compression library: "
368		    " Zlib error %d", ret);
369		break;
370	}
371	return (ARCHIVE_FATAL);
372}
373
374static int
375consume_trailer(struct archive_read_filter *self)
376{
377	struct private_data *state;
378	const unsigned char *p;
379	ssize_t avail;
380
381	state = (struct private_data *)self->data;
382
383	state->in_stream = 0;
384	switch (inflateEnd(&(state->stream))) {
385	case Z_OK:
386		break;
387	default:
388		archive_set_error(&self->archive->archive,
389		    ARCHIVE_ERRNO_MISC,
390		    "Failed to clean up gzip decompressor");
391		return (ARCHIVE_FATAL);
392	}
393
394	/* GZip trailer is a fixed 8 byte structure. */
395	p = __archive_read_filter_ahead(self->upstream, 8, &avail);
396	if (p == NULL || avail == 0)
397		return (ARCHIVE_FATAL);
398
399	/* XXX TODO: Verify the length and CRC. */
400
401	/* We've verified the trailer, so consume it now. */
402	__archive_read_filter_consume(self->upstream, 8);
403
404	return (ARCHIVE_OK);
405}
406
407static ssize_t
408gzip_filter_read(struct archive_read_filter *self, const void **p)
409{
410	struct private_data *state;
411	size_t decompressed;
412	ssize_t avail_in, max_in;
413	int ret;
414
415	state = (struct private_data *)self->data;
416
417	/* Empty our output buffer. */
418	state->stream.next_out = state->out_block;
419	state->stream.avail_out = (uInt)state->out_block_size;
420
421	/* Try to fill the output buffer. */
422	while (state->stream.avail_out > 0 && !state->eof) {
423		/* If we're not in a stream, read a header
424		 * and initialize the decompression library. */
425		if (!state->in_stream) {
426			ret = consume_header(self);
427			if (ret == ARCHIVE_EOF) {
428				state->eof = 1;
429				break;
430			}
431			if (ret < ARCHIVE_OK)
432				return (ret);
433		}
434
435		/* Peek at the next available data. */
436		/* ZLib treats stream.next_in as const but doesn't declare
437		 * it so, hence this ugly cast. */
438		state->stream.next_in = (unsigned char *)(uintptr_t)
439		    __archive_read_filter_ahead(self->upstream, 1, &avail_in);
440		if (state->stream.next_in == NULL) {
441			archive_set_error(&self->archive->archive,
442			    ARCHIVE_ERRNO_MISC,
443			    "truncated gzip input");
444			return (ARCHIVE_FATAL);
445		}
446		if (UINT_MAX >= SSIZE_MAX)
447			max_in = SSIZE_MAX;
448		else
449			max_in = UINT_MAX;
450		if (avail_in > max_in)
451			avail_in = max_in;
452		state->stream.avail_in = (uInt)avail_in;
453
454		/* Decompress and consume some of that data. */
455		ret = inflate(&(state->stream), 0);
456		switch (ret) {
457		case Z_OK: /* Decompressor made some progress. */
458			__archive_read_filter_consume(self->upstream,
459			    avail_in - state->stream.avail_in);
460			break;
461		case Z_STREAM_END: /* Found end of stream. */
462			__archive_read_filter_consume(self->upstream,
463			    avail_in - state->stream.avail_in);
464			/* Consume the stream trailer; release the
465			 * decompression library. */
466			ret = consume_trailer(self);
467			if (ret < ARCHIVE_OK)
468				return (ret);
469			break;
470		default:
471			/* Return an error. */
472			archive_set_error(&self->archive->archive,
473			    ARCHIVE_ERRNO_MISC,
474			    "gzip decompression failed");
475			return (ARCHIVE_FATAL);
476		}
477	}
478
479	/* We've read as much as we can. */
480	decompressed = state->stream.next_out - state->out_block;
481	state->total_out += decompressed;
482	if (decompressed == 0)
483		*p = NULL;
484	else
485		*p = state->out_block;
486	return (decompressed);
487}
488
489/*
490 * Clean up the decompressor.
491 */
492static int
493gzip_filter_close(struct archive_read_filter *self)
494{
495	struct private_data *state;
496	int ret;
497
498	state = (struct private_data *)self->data;
499	ret = ARCHIVE_OK;
500
501	if (state->in_stream) {
502		switch (inflateEnd(&(state->stream))) {
503		case Z_OK:
504			break;
505		default:
506			archive_set_error(&(self->archive->archive),
507			    ARCHIVE_ERRNO_MISC,
508			    "Failed to clean up gzip compressor");
509			ret = ARCHIVE_FATAL;
510		}
511	}
512
513	free(state->name);
514	free(state->out_block);
515	free(state);
516	return (ret);
517}
518
519#endif /* HAVE_ZLIB_H */
520