1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "archive_platform.h"
27
28__FBSDID("$FreeBSD$");
29
30
31#ifdef HAVE_ERRNO_H
32#include <errno.h>
33#endif
34#ifdef HAVE_STDLIB_H
35#include <stdlib.h>
36#endif
37#ifdef HAVE_STRING_H
38#include <string.h>
39#endif
40#ifdef HAVE_LIMITS_H
41#include <limits.h>
42#endif
43#ifdef HAVE_UNISTD_H
44#include <unistd.h>
45#endif
46#ifdef HAVE_ZLIB_H
47#include <zlib.h>
48#endif
49
50#include "archive.h"
51#include "archive_entry.h"
52#include "archive_endian.h"
53#include "archive_private.h"
54#include "archive_read_private.h"
55
56#ifdef HAVE_ZLIB_H
57struct private_data {
58	z_stream	 stream;
59	char		 in_stream;
60	unsigned char	*out_block;
61	size_t		 out_block_size;
62	int64_t		 total_out;
63	unsigned long	 crc;
64	uint32_t	 mtime;
65	char		*name;
66	char		 eof; /* True = found end of compressed data. */
67};
68
69/* Gzip Filter. */
70static ssize_t	gzip_filter_read(struct archive_read_filter *, const void **);
71static int	gzip_filter_close(struct archive_read_filter *);
72#endif
73
74/*
75 * Note that we can detect gzip archives even if we can't decompress
76 * them.  (In fact, we like detecting them because we can give better
77 * error messages.)  So the bid framework here gets compiled even
78 * if zlib is unavailable.
79 *
80 * TODO: If zlib is unavailable, gzip_bidder_init() should
81 * use the compress_program framework to try to fire up an external
82 * gzip program.
83 */
84static int	gzip_bidder_bid(struct archive_read_filter_bidder *,
85		    struct archive_read_filter *);
86static int	gzip_bidder_init(struct archive_read_filter *);
87
88#if ARCHIVE_VERSION_NUMBER < 4000000
89/* Deprecated; remove in libarchive 4.0 */
90int
91archive_read_support_compression_gzip(struct archive *a)
92{
93	return archive_read_support_filter_gzip(a);
94}
95#endif
96
97int
98archive_read_support_filter_gzip(struct archive *_a)
99{
100	struct archive_read *a = (struct archive_read *)_a;
101	struct archive_read_filter_bidder *bidder;
102
103	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
104	    ARCHIVE_STATE_NEW, "archive_read_support_filter_gzip");
105
106	if (__archive_read_get_bidder(a, &bidder) != ARCHIVE_OK)
107		return (ARCHIVE_FATAL);
108
109	bidder->data = NULL;
110	bidder->name = "gzip";
111	bidder->bid = gzip_bidder_bid;
112	bidder->init = gzip_bidder_init;
113	bidder->options = NULL;
114	bidder->free = NULL; /* No data, so no cleanup necessary. */
115	/* Signal the extent of gzip support with the return value here. */
116#if HAVE_ZLIB_H
117	return (ARCHIVE_OK);
118#else
119	archive_set_error(_a, ARCHIVE_ERRNO_MISC,
120	    "Using external gzip program");
121	return (ARCHIVE_WARN);
122#endif
123}
124
125/*
126 * Read and verify the header.
127 *
128 * Returns zero if the header couldn't be validated, else returns
129 * number of bytes in header.  If pbits is non-NULL, it receives a
130 * count of bits verified, suitable for use by bidder.
131 */
132static ssize_t
133peek_at_header(struct archive_read_filter *filter, int *pbits,
134#ifdef HAVE_ZLIB_H
135	       struct private_data *state
136#else
137	       void *state
138#endif
139	      )
140{
141	const unsigned char *p;
142	ssize_t avail, len;
143	int bits = 0;
144	int header_flags;
145#ifndef HAVE_ZLIB_H
146	(void)state; /* UNUSED */
147#endif
148
149	/* Start by looking at the first ten bytes of the header, which
150	 * is all fixed layout. */
151	len = 10;
152	p = __archive_read_filter_ahead(filter, len, &avail);
153	if (p == NULL || avail == 0)
154		return (0);
155	/* We only support deflation- third byte must be 0x08. */
156	if (memcmp(p, "\x1F\x8B\x08", 3) != 0)
157		return (0);
158	bits += 24;
159	if ((p[3] & 0xE0)!= 0)	/* No reserved flags set. */
160		return (0);
161	bits += 3;
162	header_flags = p[3];
163	/* Bytes 4-7 are mod time in little endian. */
164#ifdef HAVE_ZLIB_H
165	if (state)
166		state->mtime = archive_le32dec(p + 4);
167#endif
168	/* Byte 8 is deflate flags. */
169	/* XXXX TODO: return deflate flags back to consume_header for use
170	   in initializing the decompressor. */
171	/* Byte 9 is OS. */
172
173	/* Optional extra data:  2 byte length plus variable body. */
174	if (header_flags & 4) {
175		p = __archive_read_filter_ahead(filter, len + 2, &avail);
176		if (p == NULL)
177			return (0);
178		len += ((int)p[len + 1] << 8) | (int)p[len];
179		len += 2;
180	}
181
182	/* Null-terminated optional filename. */
183	if (header_flags & 8) {
184#ifdef HAVE_ZLIB_H
185		ssize_t file_start = len;
186#endif
187		do {
188			++len;
189			if (avail < len)
190				p = __archive_read_filter_ahead(filter,
191				    len, &avail);
192			if (p == NULL)
193				return (0);
194		} while (p[len - 1] != 0);
195
196#ifdef HAVE_ZLIB_H
197		if (state) {
198			/* Reset the name in case of repeat header reads. */
199			free(state->name);
200			state->name = strdup((const char *)&p[file_start]);
201		}
202#endif
203	}
204
205	/* Null-terminated optional comment. */
206	if (header_flags & 16) {
207		do {
208			++len;
209			if (avail < len)
210				p = __archive_read_filter_ahead(filter,
211				    len, &avail);
212			if (p == NULL)
213				return (0);
214		} while (p[len - 1] != 0);
215	}
216
217	/* Optional header CRC */
218	if ((header_flags & 2)) {
219		p = __archive_read_filter_ahead(filter, len + 2, &avail);
220		if (p == NULL)
221			return (0);
222#if 0
223	int hcrc = ((int)p[len + 1] << 8) | (int)p[len];
224	int crc = /* XXX TODO: Compute header CRC. */;
225	if (crc != hcrc)
226		return (0);
227	bits += 16;
228#endif
229		len += 2;
230	}
231
232	if (pbits != NULL)
233		*pbits = bits;
234	return (len);
235}
236
237/*
238 * Bidder just verifies the header and returns the number of verified bits.
239 */
240static int
241gzip_bidder_bid(struct archive_read_filter_bidder *self,
242    struct archive_read_filter *filter)
243{
244	int bits_checked;
245
246	(void)self; /* UNUSED */
247
248	if (peek_at_header(filter, &bits_checked, NULL))
249		return (bits_checked);
250	return (0);
251}
252
253#ifndef HAVE_ZLIB_H
254
255/*
256 * If we don't have the library on this system, we can't do the
257 * decompression directly.  We can, however, try to run "gzip -d"
258 * in case that's available.
259 */
260static int
261gzip_bidder_init(struct archive_read_filter *self)
262{
263	int r;
264
265	r = __archive_read_program(self, "gzip -d");
266	/* Note: We set the format here even if __archive_read_program()
267	 * above fails.  We do, after all, know what the format is
268	 * even if we weren't able to read it. */
269	self->code = ARCHIVE_FILTER_GZIP;
270	self->name = "gzip";
271	return (r);
272}
273
274#else
275
276static int
277gzip_read_header(struct archive_read_filter *self, struct archive_entry *entry)
278{
279	struct private_data *state;
280
281	state = (struct private_data *)self->data;
282
283	/* A mtime of 0 is considered invalid/missing. */
284	if (state->mtime != 0)
285		archive_entry_set_mtime(entry, state->mtime, 0);
286
287	/* If the name is available, extract it. */
288	if (state->name)
289		archive_entry_set_pathname(entry, state->name);
290
291	return (ARCHIVE_OK);
292}
293
294/*
295 * Initialize the filter object.
296 */
297static int
298gzip_bidder_init(struct archive_read_filter *self)
299{
300	struct private_data *state;
301	static const size_t out_block_size = 64 * 1024;
302	void *out_block;
303
304	self->code = ARCHIVE_FILTER_GZIP;
305	self->name = "gzip";
306
307	state = (struct private_data *)calloc(sizeof(*state), 1);
308	out_block = (unsigned char *)malloc(out_block_size);
309	if (state == NULL || out_block == NULL) {
310		free(out_block);
311		free(state);
312		archive_set_error(&self->archive->archive, ENOMEM,
313		    "Can't allocate data for gzip decompression");
314		return (ARCHIVE_FATAL);
315	}
316
317	self->data = state;
318	state->out_block_size = out_block_size;
319	state->out_block = out_block;
320	self->read = gzip_filter_read;
321	self->skip = NULL; /* not supported */
322	self->close = gzip_filter_close;
323#ifdef HAVE_ZLIB_H
324	self->read_header = gzip_read_header;
325#endif
326
327	state->in_stream = 0; /* We're not actually within a stream yet. */
328
329	return (ARCHIVE_OK);
330}
331
332static int
333consume_header(struct archive_read_filter *self)
334{
335	struct private_data *state;
336	ssize_t avail;
337	size_t len;
338	int ret;
339
340	state = (struct private_data *)self->data;
341
342	/* If this is a real header, consume it. */
343	len = peek_at_header(self->upstream, NULL, state);
344	if (len == 0)
345		return (ARCHIVE_EOF);
346	__archive_read_filter_consume(self->upstream, len);
347
348	/* Initialize CRC accumulator. */
349	state->crc = crc32(0L, NULL, 0);
350
351	/* Initialize compression library. */
352	state->stream.next_in = (unsigned char *)(uintptr_t)
353	    __archive_read_filter_ahead(self->upstream, 1, &avail);
354	state->stream.avail_in = (uInt)avail;
355	ret = inflateInit2(&(state->stream),
356	    -15 /* Don't check for zlib header */);
357
358	/* Decipher the error code. */
359	switch (ret) {
360	case Z_OK:
361		state->in_stream = 1;
362		return (ARCHIVE_OK);
363	case Z_STREAM_ERROR:
364		archive_set_error(&self->archive->archive,
365		    ARCHIVE_ERRNO_MISC,
366		    "Internal error initializing compression library: "
367		    "invalid setup parameter");
368		break;
369	case Z_MEM_ERROR:
370		archive_set_error(&self->archive->archive, ENOMEM,
371		    "Internal error initializing compression library: "
372		    "out of memory");
373		break;
374	case Z_VERSION_ERROR:
375		archive_set_error(&self->archive->archive,
376		    ARCHIVE_ERRNO_MISC,
377		    "Internal error initializing compression library: "
378		    "invalid library version");
379		break;
380	default:
381		archive_set_error(&self->archive->archive,
382		    ARCHIVE_ERRNO_MISC,
383		    "Internal error initializing compression library: "
384		    " Zlib error %d", ret);
385		break;
386	}
387	return (ARCHIVE_FATAL);
388}
389
390static int
391consume_trailer(struct archive_read_filter *self)
392{
393	struct private_data *state;
394	const unsigned char *p;
395	ssize_t avail;
396
397	state = (struct private_data *)self->data;
398
399	state->in_stream = 0;
400	switch (inflateEnd(&(state->stream))) {
401	case Z_OK:
402		break;
403	default:
404		archive_set_error(&self->archive->archive,
405		    ARCHIVE_ERRNO_MISC,
406		    "Failed to clean up gzip decompressor");
407		return (ARCHIVE_FATAL);
408	}
409
410	/* GZip trailer is a fixed 8 byte structure. */
411	p = __archive_read_filter_ahead(self->upstream, 8, &avail);
412	if (p == NULL || avail == 0)
413		return (ARCHIVE_FATAL);
414
415	/* XXX TODO: Verify the length and CRC. */
416
417	/* We've verified the trailer, so consume it now. */
418	__archive_read_filter_consume(self->upstream, 8);
419
420	return (ARCHIVE_OK);
421}
422
423static ssize_t
424gzip_filter_read(struct archive_read_filter *self, const void **p)
425{
426	struct private_data *state;
427	size_t decompressed;
428	ssize_t avail_in, max_in;
429	int ret;
430
431	state = (struct private_data *)self->data;
432
433	/* Empty our output buffer. */
434	state->stream.next_out = state->out_block;
435	state->stream.avail_out = (uInt)state->out_block_size;
436
437	/* Try to fill the output buffer. */
438	while (state->stream.avail_out > 0 && !state->eof) {
439		/* If we're not in a stream, read a header
440		 * and initialize the decompression library. */
441		if (!state->in_stream) {
442			ret = consume_header(self);
443			if (ret == ARCHIVE_EOF) {
444				state->eof = 1;
445				break;
446			}
447			if (ret < ARCHIVE_OK)
448				return (ret);
449		}
450
451		/* Peek at the next available data. */
452		/* ZLib treats stream.next_in as const but doesn't declare
453		 * it so, hence this ugly cast. */
454		state->stream.next_in = (unsigned char *)(uintptr_t)
455		    __archive_read_filter_ahead(self->upstream, 1, &avail_in);
456		if (state->stream.next_in == NULL) {
457			archive_set_error(&self->archive->archive,
458			    ARCHIVE_ERRNO_MISC,
459			    "truncated gzip input");
460			return (ARCHIVE_FATAL);
461		}
462		if (UINT_MAX >= SSIZE_MAX)
463			max_in = SSIZE_MAX;
464		else
465			max_in = UINT_MAX;
466		if (avail_in > max_in)
467			avail_in = max_in;
468		state->stream.avail_in = (uInt)avail_in;
469
470		/* Decompress and consume some of that data. */
471		ret = inflate(&(state->stream), 0);
472		switch (ret) {
473		case Z_OK: /* Decompressor made some progress. */
474			__archive_read_filter_consume(self->upstream,
475			    avail_in - state->stream.avail_in);
476			break;
477		case Z_STREAM_END: /* Found end of stream. */
478			__archive_read_filter_consume(self->upstream,
479			    avail_in - state->stream.avail_in);
480			/* Consume the stream trailer; release the
481			 * decompression library. */
482			ret = consume_trailer(self);
483			if (ret < ARCHIVE_OK)
484				return (ret);
485			break;
486		default:
487			/* Return an error. */
488			archive_set_error(&self->archive->archive,
489			    ARCHIVE_ERRNO_MISC,
490			    "gzip decompression failed");
491			return (ARCHIVE_FATAL);
492		}
493	}
494
495	/* We've read as much as we can. */
496	decompressed = state->stream.next_out - state->out_block;
497	state->total_out += decompressed;
498	if (decompressed == 0)
499		*p = NULL;
500	else
501		*p = state->out_block;
502	return (decompressed);
503}
504
505/*
506 * Clean up the decompressor.
507 */
508static int
509gzip_filter_close(struct archive_read_filter *self)
510{
511	struct private_data *state;
512	int ret;
513
514	state = (struct private_data *)self->data;
515	ret = ARCHIVE_OK;
516
517	if (state->in_stream) {
518		switch (inflateEnd(&(state->stream))) {
519		case Z_OK:
520			break;
521		default:
522			archive_set_error(&(self->archive->archive),
523			    ARCHIVE_ERRNO_MISC,
524			    "Failed to clean up gzip compressor");
525			ret = ARCHIVE_FATAL;
526		}
527	}
528
529	free(state->name);
530	free(state->out_block);
531	free(state);
532	return (ret);
533}
534
535#endif /* HAVE_ZLIB_H */
536