archive_read_support_filter_xz.c revision 231200
1228753Smm/*-
2231200Smm * Copyright (c) 2009-2011 Michihiro NAKAJIMA
3228753Smm * Copyright (c) 2003-2008 Tim Kientzle and Miklos Vajna
4228753Smm * All rights reserved.
5228753Smm *
6228753Smm * Redistribution and use in source and binary forms, with or without
7228753Smm * modification, are permitted provided that the following conditions
8228753Smm * are met:
9228753Smm * 1. Redistributions of source code must retain the above copyright
10228753Smm *    notice, this list of conditions and the following disclaimer.
11228753Smm * 2. Redistributions in binary form must reproduce the above copyright
12228753Smm *    notice, this list of conditions and the following disclaimer in the
13228753Smm *    documentation and/or other materials provided with the distribution.
14228753Smm *
15228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25228753Smm */
26228753Smm
27228753Smm#include "archive_platform.h"
28228753Smm
29231200Smm__FBSDID("$FreeBSD$");
30228753Smm
31228753Smm#ifdef HAVE_ERRNO_H
32228753Smm#include <errno.h>
33228753Smm#endif
34228753Smm#include <stdio.h>
35228753Smm#ifdef HAVE_STDLIB_H
36228753Smm#include <stdlib.h>
37228753Smm#endif
38228753Smm#ifdef HAVE_STRING_H
39228753Smm#include <string.h>
40228753Smm#endif
41228753Smm#ifdef HAVE_UNISTD_H
42228753Smm#include <unistd.h>
43228753Smm#endif
44228753Smm#if HAVE_LZMA_H
45228753Smm#include <lzma.h>
46228753Smm#elif HAVE_LZMADEC_H
47228753Smm#include <lzmadec.h>
48228753Smm#endif
49228753Smm
50228753Smm#include "archive.h"
51228753Smm#include "archive_endian.h"
52228753Smm#include "archive_private.h"
53228753Smm#include "archive_read_private.h"
54228753Smm
55228753Smm#if HAVE_LZMA_H && HAVE_LIBLZMA
56228753Smm
57228753Smmstruct private_data {
58228753Smm	lzma_stream	 stream;
59228753Smm	unsigned char	*out_block;
60228753Smm	size_t		 out_block_size;
61228753Smm	int64_t		 total_out;
62228753Smm	char		 eof; /* True = found end of compressed data. */
63231200Smm	char		 in_stream;
64231200Smm
65231200Smm	/* Following variables are used for lzip only. */
66231200Smm	char		 lzip_ver;
67231200Smm	uint32_t	 crc32;
68231200Smm	int64_t		 member_in;
69231200Smm	int64_t		 member_out;
70228753Smm};
71228753Smm
72231200Smm#if LZMA_VERSION_MAJOR >= 5
73231200Smm/* Effectively disable the limiter. */
74231200Smm#define LZMA_MEMLIMIT	UINT64_MAX
75231200Smm#else
76231200Smm/* NOTE: This needs to check memory size which running system has. */
77231200Smm#define LZMA_MEMLIMIT	(1U << 30)
78231200Smm#endif
79231200Smm
80231200Smm/* Combined lzip/lzma/xz filter */
81228753Smmstatic ssize_t	xz_filter_read(struct archive_read_filter *, const void **);
82228753Smmstatic int	xz_filter_close(struct archive_read_filter *);
83228753Smmstatic int	xz_lzma_bidder_init(struct archive_read_filter *);
84228753Smm
85228753Smm#elif HAVE_LZMADEC_H && HAVE_LIBLZMADEC
86228753Smm
87228753Smmstruct private_data {
88228753Smm	lzmadec_stream	 stream;
89228753Smm	unsigned char	*out_block;
90228753Smm	size_t		 out_block_size;
91228753Smm	int64_t		 total_out;
92228753Smm	char		 eof; /* True = found end of compressed data. */
93228753Smm};
94228753Smm
95228753Smm/* Lzma-only filter */
96228753Smmstatic ssize_t	lzma_filter_read(struct archive_read_filter *, const void **);
97228753Smmstatic int	lzma_filter_close(struct archive_read_filter *);
98228753Smm#endif
99228753Smm
100228753Smm/*
101228753Smm * Note that we can detect xz and lzma compressed files even if we
102228753Smm * can't decompress them.  (In fact, we like detecting them because we
103228753Smm * can give better error messages.)  So the bid framework here gets
104228753Smm * compiled even if no lzma library is available.
105228753Smm */
106228753Smmstatic int	xz_bidder_bid(struct archive_read_filter_bidder *,
107228753Smm		    struct archive_read_filter *);
108228753Smmstatic int	xz_bidder_init(struct archive_read_filter *);
109228753Smmstatic int	lzma_bidder_bid(struct archive_read_filter_bidder *,
110228753Smm		    struct archive_read_filter *);
111228753Smmstatic int	lzma_bidder_init(struct archive_read_filter *);
112231200Smmstatic int	lzip_has_member(struct archive_read_filter *);
113231200Smmstatic int	lzip_bidder_bid(struct archive_read_filter_bidder *,
114231200Smm		    struct archive_read_filter *);
115231200Smmstatic int	lzip_bidder_init(struct archive_read_filter *);
116228753Smm
117231200Smm#if ARCHIVE_VERSION_NUMBER < 4000000
118231200Smm/* Deprecated; remove in libarchive 4.0 */
119228753Smmint
120231200Smmarchive_read_support_compression_xz(struct archive *a)
121228753Smm{
122231200Smm	return archive_read_support_filter_xz(a);
123231200Smm}
124231200Smm#endif
125231200Smm
126231200Smmint
127231200Smmarchive_read_support_filter_xz(struct archive *_a)
128231200Smm{
129228753Smm	struct archive_read *a = (struct archive_read *)_a;
130231200Smm	struct archive_read_filter_bidder *bidder;
131228753Smm
132231200Smm	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
133231200Smm	    ARCHIVE_STATE_NEW, "archive_read_support_filter_xz");
134231200Smm
135231200Smm	if (__archive_read_get_bidder(a, &bidder) != ARCHIVE_OK)
136228753Smm		return (ARCHIVE_FATAL);
137228753Smm
138228753Smm	bidder->data = NULL;
139228753Smm	bidder->bid = xz_bidder_bid;
140228753Smm	bidder->init = xz_bidder_init;
141228753Smm	bidder->options = NULL;
142228753Smm	bidder->free = NULL;
143228753Smm#if HAVE_LZMA_H && HAVE_LIBLZMA
144228753Smm	return (ARCHIVE_OK);
145228753Smm#else
146228753Smm	archive_set_error(_a, ARCHIVE_ERRNO_MISC,
147228753Smm	    "Using external unxz program for xz decompression");
148228753Smm	return (ARCHIVE_WARN);
149228753Smm#endif
150228753Smm}
151228753Smm
152231200Smm#if ARCHIVE_VERSION_NUMBER < 4000000
153228753Smmint
154231200Smmarchive_read_support_compression_lzma(struct archive *a)
155228753Smm{
156231200Smm	return archive_read_support_filter_lzma(a);
157231200Smm}
158231200Smm#endif
159231200Smm
160231200Smmint
161231200Smmarchive_read_support_filter_lzma(struct archive *_a)
162231200Smm{
163228753Smm	struct archive_read *a = (struct archive_read *)_a;
164231200Smm	struct archive_read_filter_bidder *bidder;
165228753Smm
166231200Smm	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
167231200Smm	    ARCHIVE_STATE_NEW, "archive_read_support_filter_lzma");
168231200Smm
169231200Smm	if (__archive_read_get_bidder(a, &bidder) != ARCHIVE_OK)
170228753Smm		return (ARCHIVE_FATAL);
171228753Smm
172228753Smm	bidder->data = NULL;
173228753Smm	bidder->bid = lzma_bidder_bid;
174228753Smm	bidder->init = lzma_bidder_init;
175228753Smm	bidder->options = NULL;
176228753Smm	bidder->free = NULL;
177228753Smm#if HAVE_LZMA_H && HAVE_LIBLZMA
178228753Smm	return (ARCHIVE_OK);
179228753Smm#elif HAVE_LZMADEC_H && HAVE_LIBLZMADEC
180228753Smm	return (ARCHIVE_OK);
181228753Smm#else
182228753Smm	archive_set_error(_a, ARCHIVE_ERRNO_MISC,
183228753Smm	    "Using external unlzma program for lzma decompression");
184228753Smm	return (ARCHIVE_WARN);
185228753Smm#endif
186228753Smm}
187228753Smm
188231200Smm
189231200Smm#if ARCHIVE_VERSION_NUMBER < 4000000
190231200Smmint
191231200Smmarchive_read_support_compression_lzip(struct archive *a)
192231200Smm{
193231200Smm	return archive_read_support_filter_lzip(a);
194231200Smm}
195231200Smm#endif
196231200Smm
197231200Smmint
198231200Smmarchive_read_support_filter_lzip(struct archive *_a)
199231200Smm{
200231200Smm	struct archive_read *a = (struct archive_read *)_a;
201231200Smm	struct archive_read_filter_bidder *bidder;
202231200Smm
203231200Smm	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
204231200Smm	    ARCHIVE_STATE_NEW, "archive_read_support_filter_lzip");
205231200Smm
206231200Smm	if (__archive_read_get_bidder(a, &bidder) != ARCHIVE_OK)
207231200Smm		return (ARCHIVE_FATAL);
208231200Smm
209231200Smm	bidder->data = NULL;
210231200Smm	bidder->bid = lzip_bidder_bid;
211231200Smm	bidder->init = lzip_bidder_init;
212231200Smm	bidder->options = NULL;
213231200Smm	bidder->free = NULL;
214231200Smm#if HAVE_LZMA_H && HAVE_LIBLZMA
215231200Smm	return (ARCHIVE_OK);
216231200Smm#else
217231200Smm	archive_set_error(_a, ARCHIVE_ERRNO_MISC,
218231200Smm	    "Using external lzip program for lzip decompression");
219231200Smm	return (ARCHIVE_WARN);
220231200Smm#endif
221231200Smm}
222231200Smm
223228753Smm/*
224228753Smm * Test whether we can handle this data.
225228753Smm */
226228753Smmstatic int
227228753Smmxz_bidder_bid(struct archive_read_filter_bidder *self,
228228753Smm    struct archive_read_filter *filter)
229228753Smm{
230228753Smm	const unsigned char *buffer;
231228753Smm	ssize_t avail;
232228753Smm
233228753Smm	(void)self; /* UNUSED */
234228753Smm
235228753Smm	buffer = __archive_read_filter_ahead(filter, 6, &avail);
236228753Smm	if (buffer == NULL)
237228753Smm		return (0);
238228753Smm
239228753Smm	/*
240228753Smm	 * Verify Header Magic Bytes : FD 37 7A 58 5A 00
241228753Smm	 */
242231200Smm	if (memcmp(buffer, "\xFD\x37\x7A\x58\x5A\x00", 6) != 0)
243228753Smm		return (0);
244228753Smm
245231200Smm	return (48);
246228753Smm}
247228753Smm
248228753Smm/*
249228753Smm * Test whether we can handle this data.
250228753Smm *
251228753Smm * <sigh> LZMA has a rather poor file signature.  Zeros do not
252228753Smm * make good signature bytes as a rule, and the only non-zero byte
253228753Smm * here is an ASCII character.  For example, an uncompressed tar
254228753Smm * archive whose first file is ']' would satisfy this check.  It may
255228753Smm * be necessary to exclude LZMA from compression_all() because of
256228753Smm * this.  Clients of libarchive would then have to explicitly enable
257228753Smm * LZMA checking instead of (or in addition to) compression_all() when
258228753Smm * they have other evidence (file name, command-line option) to go on.
259228753Smm */
260228753Smmstatic int
261228753Smmlzma_bidder_bid(struct archive_read_filter_bidder *self,
262228753Smm    struct archive_read_filter *filter)
263228753Smm{
264228753Smm	const unsigned char *buffer;
265228753Smm	ssize_t avail;
266228753Smm	uint32_t dicsize;
267228753Smm	uint64_t uncompressed_size;
268228753Smm	int bits_checked;
269228753Smm
270228753Smm	(void)self; /* UNUSED */
271228753Smm
272228753Smm	buffer = __archive_read_filter_ahead(filter, 14, &avail);
273228753Smm	if (buffer == NULL)
274228753Smm		return (0);
275228753Smm
276228753Smm	/* First byte of raw LZMA stream is commonly 0x5d.
277228753Smm	 * The first byte is a special number, which consists of
278228753Smm	 * three parameters of LZMA compression, a number of literal
279228753Smm	 * context bits(which is from 0 to 8, default is 3), a number
280228753Smm	 * of literal pos bits(which is from 0 to 4, default is 0),
281228753Smm	 * a number of pos bits(which is from 0 to 4, default is 2).
282228753Smm	 * The first byte is made by
283228753Smm	 * (pos bits * 5 + literal pos bit) * 9 + * literal contest bit,
284228753Smm	 * and so the default value in this field is
285228753Smm	 * (2 * 5 + 0) * 9 + 3 = 0x5d.
286228753Smm	 * lzma of LZMA SDK has options to change those parameters.
287228753Smm	 * It means a range of this field is from 0 to 224. And lzma of
288228753Smm	 * XZ Utils with option -e records 0x5e in this field. */
289228753Smm	/* NOTE: If this checking of the first byte increases false
290228753Smm	 * recognition, we should allow only 0x5d and 0x5e for the first
291228753Smm	 * byte of LZMA stream. */
292228753Smm	bits_checked = 0;
293228753Smm	if (buffer[0] > (4 * 5 + 4) * 9 + 8)
294228753Smm		return (0);
295228753Smm	/* Most likely value in the first byte of LZMA stream. */
296228753Smm	if (buffer[0] == 0x5d || buffer[0] == 0x5e)
297228753Smm		bits_checked += 8;
298228753Smm
299228753Smm	/* Sixth through fourteenth bytes are uncompressed size,
300228753Smm	 * stored in little-endian order. `-1' means uncompressed
301228753Smm	 * size is unknown and lzma of XZ Utils always records `-1'
302228753Smm	 * in this field. */
303228753Smm	uncompressed_size = archive_le64dec(buffer+5);
304228753Smm	if (uncompressed_size == (uint64_t)ARCHIVE_LITERAL_LL(-1))
305228753Smm		bits_checked += 64;
306228753Smm
307228753Smm	/* Second through fifth bytes are dictionary size, stored in
308228753Smm	 * little-endian order. The minimum dictionary size is
309228753Smm	 * 1 << 12(4KiB) which the lzma of LZMA SDK uses with option
310228753Smm	 * -d12 and the maxinam dictionary size is 1 << 27(128MiB)
311228753Smm	 * which the one uses with option -d27.
312228753Smm	 * NOTE: A comment of LZMA SDK source code says this dictionary
313228753Smm	 * range is from 1 << 12 to 1 << 30. */
314228753Smm	dicsize = archive_le32dec(buffer+1);
315228753Smm	switch (dicsize) {
316228753Smm	case 0x00001000:/* lzma of LZMA SDK option -d12. */
317228753Smm	case 0x00002000:/* lzma of LZMA SDK option -d13. */
318228753Smm	case 0x00004000:/* lzma of LZMA SDK option -d14. */
319228753Smm	case 0x00008000:/* lzma of LZMA SDK option -d15. */
320228753Smm	case 0x00010000:/* lzma of XZ Utils option -0 and -1.
321228753Smm			 * lzma of LZMA SDK option -d16. */
322228753Smm	case 0x00020000:/* lzma of LZMA SDK option -d17. */
323228753Smm	case 0x00040000:/* lzma of LZMA SDK option -d18. */
324228753Smm	case 0x00080000:/* lzma of XZ Utils option -2.
325228753Smm			 * lzma of LZMA SDK option -d19. */
326228753Smm	case 0x00100000:/* lzma of XZ Utils option -3.
327228753Smm			 * lzma of LZMA SDK option -d20. */
328228753Smm	case 0x00200000:/* lzma of XZ Utils option -4.
329228753Smm			 * lzma of LZMA SDK option -d21. */
330228753Smm	case 0x00400000:/* lzma of XZ Utils option -5.
331228753Smm			 * lzma of LZMA SDK option -d22. */
332228753Smm	case 0x00800000:/* lzma of XZ Utils option -6.
333228753Smm			 * lzma of LZMA SDK option -d23. */
334228753Smm	case 0x01000000:/* lzma of XZ Utils option -7.
335228753Smm			 * lzma of LZMA SDK option -d24. */
336228753Smm	case 0x02000000:/* lzma of XZ Utils option -8.
337228753Smm			 * lzma of LZMA SDK option -d25. */
338228753Smm	case 0x04000000:/* lzma of XZ Utils option -9.
339228753Smm			 * lzma of LZMA SDK option -d26. */
340228753Smm	case 0x08000000:/* lzma of LZMA SDK option -d27. */
341228753Smm		bits_checked += 32;
342228753Smm		break;
343228753Smm	default:
344228753Smm		/* If a memory usage for encoding was not enough on
345228753Smm		 * the platform where LZMA stream was made, lzma of
346228753Smm		 * XZ Utils automatically decreased the dictionary
347228753Smm		 * size to enough memory for encoding by 1Mi bytes
348228753Smm		 * (1 << 20).*/
349228753Smm		if (dicsize <= 0x03F00000 && dicsize >= 0x00300000 &&
350228753Smm		    (dicsize & ((1 << 20)-1)) == 0 &&
351228753Smm		    bits_checked == 8 + 64) {
352228753Smm			bits_checked += 32;
353228753Smm			break;
354228753Smm		}
355228753Smm		/* Otherwise dictionary size is unlikely. But it is
356228753Smm		 * possible that someone makes lzma stream with
357228753Smm		 * liblzma/LZMA SDK in one's dictionary size. */
358228753Smm		return (0);
359228753Smm	}
360228753Smm
361228753Smm	/* TODO: The above test is still very weak.  It would be
362228753Smm	 * good to do better. */
363228753Smm
364228753Smm	return (bits_checked);
365228753Smm}
366228753Smm
367231200Smmstatic int
368231200Smmlzip_has_member(struct archive_read_filter *filter)
369231200Smm{
370231200Smm	const unsigned char *buffer;
371231200Smm	ssize_t avail;
372231200Smm	int bits_checked;
373231200Smm	int log2dic;
374231200Smm
375231200Smm	buffer = __archive_read_filter_ahead(filter, 6, &avail);
376231200Smm	if (buffer == NULL)
377231200Smm		return (0);
378231200Smm
379231200Smm	/*
380231200Smm	 * Verify Header Magic Bytes : 4C 5A 49 50 (`LZIP')
381231200Smm	 */
382231200Smm	bits_checked = 0;
383231200Smm	if (memcmp(buffer, "LZIP", 4) != 0)
384231200Smm		return (0);
385231200Smm	bits_checked += 32;
386231200Smm
387231200Smm	/* A version number must be 0 or 1 */
388231200Smm	if (buffer[4] != 0 && buffer[4] != 1)
389231200Smm		return (0);
390231200Smm	bits_checked += 8;
391231200Smm
392231200Smm	/* Dictionary size. */
393231200Smm	log2dic = buffer[5] & 0x1f;
394231200Smm	if (log2dic < 12 || log2dic > 27)
395231200Smm		return (0);
396231200Smm	bits_checked += 8;
397231200Smm
398231200Smm	return (bits_checked);
399231200Smm}
400231200Smm
401231200Smmstatic int
402231200Smmlzip_bidder_bid(struct archive_read_filter_bidder *self,
403231200Smm    struct archive_read_filter *filter)
404231200Smm{
405231200Smm
406231200Smm	(void)self; /* UNUSED */
407231200Smm	return (lzip_has_member(filter));
408231200Smm}
409231200Smm
410228753Smm#if HAVE_LZMA_H && HAVE_LIBLZMA
411228753Smm
412228753Smm/*
413228753Smm * liblzma 4.999.7 and later support both lzma and xz streams.
414228753Smm */
415228753Smmstatic int
416228753Smmxz_bidder_init(struct archive_read_filter *self)
417228753Smm{
418228753Smm	self->code = ARCHIVE_COMPRESSION_XZ;
419228753Smm	self->name = "xz";
420228753Smm	return (xz_lzma_bidder_init(self));
421228753Smm}
422228753Smm
423228753Smmstatic int
424228753Smmlzma_bidder_init(struct archive_read_filter *self)
425228753Smm{
426228753Smm	self->code = ARCHIVE_COMPRESSION_LZMA;
427228753Smm	self->name = "lzma";
428228753Smm	return (xz_lzma_bidder_init(self));
429228753Smm}
430228753Smm
431231200Smmstatic int
432231200Smmlzip_bidder_init(struct archive_read_filter *self)
433231200Smm{
434231200Smm	self->code = ARCHIVE_COMPRESSION_LZIP;
435231200Smm	self->name = "lzip";
436231200Smm	return (xz_lzma_bidder_init(self));
437231200Smm}
438231200Smm
439228753Smm/*
440231200Smm * Set an error code and choose an error message
441231200Smm */
442231200Smmstatic void
443231200Smmset_error(struct archive_read_filter *self, int ret)
444231200Smm{
445231200Smm
446231200Smm	switch (ret) {
447231200Smm	case LZMA_STREAM_END: /* Found end of stream. */
448231200Smm	case LZMA_OK: /* Decompressor made some progress. */
449231200Smm		break;
450231200Smm	case LZMA_MEM_ERROR:
451231200Smm		archive_set_error(&self->archive->archive, ENOMEM,
452231200Smm		    "Lzma library error: Cannot allocate memory");
453231200Smm		break;
454231200Smm	case LZMA_MEMLIMIT_ERROR:
455231200Smm		archive_set_error(&self->archive->archive, ENOMEM,
456231200Smm		    "Lzma library error: Out of memory");
457231200Smm		break;
458231200Smm	case LZMA_FORMAT_ERROR:
459231200Smm		archive_set_error(&self->archive->archive,
460231200Smm		    ARCHIVE_ERRNO_MISC,
461231200Smm		    "Lzma library error: format not recognized");
462231200Smm		break;
463231200Smm	case LZMA_OPTIONS_ERROR:
464231200Smm		archive_set_error(&self->archive->archive,
465231200Smm		    ARCHIVE_ERRNO_MISC,
466231200Smm		    "Lzma library error: Invalid options");
467231200Smm		break;
468231200Smm	case LZMA_DATA_ERROR:
469231200Smm		archive_set_error(&self->archive->archive,
470231200Smm		    ARCHIVE_ERRNO_MISC,
471231200Smm		    "Lzma library error: Corrupted input data");
472231200Smm		break;
473231200Smm	case LZMA_BUF_ERROR:
474231200Smm		archive_set_error(&self->archive->archive,
475231200Smm		    ARCHIVE_ERRNO_MISC,
476231200Smm		    "Lzma library error:  No progress is possible");
477231200Smm		break;
478231200Smm	default:
479231200Smm		/* Return an error. */
480231200Smm		archive_set_error(&self->archive->archive,
481231200Smm		    ARCHIVE_ERRNO_MISC,
482231200Smm		    "Lzma decompression failed:  Unknown error");
483231200Smm		break;
484231200Smm	}
485231200Smm}
486231200Smm
487231200Smm/*
488228753Smm * Setup the callbacks.
489228753Smm */
490228753Smmstatic int
491228753Smmxz_lzma_bidder_init(struct archive_read_filter *self)
492228753Smm{
493228753Smm	static const size_t out_block_size = 64 * 1024;
494228753Smm	void *out_block;
495228753Smm	struct private_data *state;
496228753Smm	int ret;
497228753Smm
498228753Smm	state = (struct private_data *)calloc(sizeof(*state), 1);
499228753Smm	out_block = (unsigned char *)malloc(out_block_size);
500228753Smm	if (state == NULL || out_block == NULL) {
501228753Smm		archive_set_error(&self->archive->archive, ENOMEM,
502228753Smm		    "Can't allocate data for xz decompression");
503228753Smm		free(out_block);
504228753Smm		free(state);
505228753Smm		return (ARCHIVE_FATAL);
506228753Smm	}
507228753Smm
508228753Smm	self->data = state;
509228753Smm	state->out_block_size = out_block_size;
510228753Smm	state->out_block = out_block;
511228753Smm	self->read = xz_filter_read;
512228753Smm	self->skip = NULL; /* not supported */
513228753Smm	self->close = xz_filter_close;
514228753Smm
515228753Smm	state->stream.avail_in = 0;
516228753Smm
517228753Smm	state->stream.next_out = state->out_block;
518228753Smm	state->stream.avail_out = state->out_block_size;
519228753Smm
520231200Smm	state->crc32 = 0;
521231200Smm	if (self->code == ARCHIVE_COMPRESSION_LZIP) {
522231200Smm		/*
523231200Smm		 * We have to read a lzip header and use it to initialize
524231200Smm		 * compression library, thus we cannot initialize the
525231200Smm		 * library for lzip here.
526231200Smm		 */
527231200Smm		state->in_stream = 0;
528231200Smm		return (ARCHIVE_OK);
529231200Smm	} else
530231200Smm		state->in_stream = 1;
531231200Smm
532231200Smm	/* Initialize compression library. */
533228753Smm	if (self->code == ARCHIVE_COMPRESSION_XZ)
534228753Smm		ret = lzma_stream_decoder(&(state->stream),
535231200Smm		    LZMA_MEMLIMIT,/* memlimit */
536228753Smm		    LZMA_CONCATENATED);
537228753Smm	else
538228753Smm		ret = lzma_alone_decoder(&(state->stream),
539231200Smm		    LZMA_MEMLIMIT);/* memlimit */
540228753Smm
541228753Smm	if (ret == LZMA_OK)
542228753Smm		return (ARCHIVE_OK);
543228753Smm
544228753Smm	/* Library setup failed: Choose an error message and clean up. */
545231200Smm	set_error(self, ret);
546228753Smm
547228753Smm	free(state->out_block);
548228753Smm	free(state);
549228753Smm	self->data = NULL;
550228753Smm	return (ARCHIVE_FATAL);
551228753Smm}
552228753Smm
553231200Smmstatic int
554231200Smmlzip_init(struct archive_read_filter *self)
555231200Smm{
556231200Smm	struct private_data *state;
557231200Smm	const unsigned char *h;
558231200Smm	lzma_filter filters[2];
559231200Smm	unsigned char props[5];
560231200Smm	ssize_t avail_in;
561231200Smm	uint32_t dicsize;
562231200Smm	int log2dic, ret;
563231200Smm
564231200Smm	state = (struct private_data *)self->data;
565231200Smm	h = __archive_read_filter_ahead(self->upstream, 6, &avail_in);
566231200Smm	if (h == NULL)
567231200Smm		return (ARCHIVE_FATAL);
568231200Smm
569231200Smm	/* Get a version number. */
570231200Smm	state->lzip_ver = h[4];
571231200Smm
572231200Smm	/*
573231200Smm	 * Setup lzma property.
574231200Smm	 */
575231200Smm	props[0] = 0x5d;
576231200Smm
577231200Smm	/* Get dictionary size. */
578231200Smm	log2dic = h[5] & 0x1f;
579231200Smm	if (log2dic < 12 || log2dic > 27)
580231200Smm		return (ARCHIVE_FATAL);
581231200Smm	dicsize = 1U << log2dic;
582231200Smm	if (log2dic > 12)
583231200Smm		dicsize -= (dicsize / 16) * (h[5] >> 5);
584231200Smm	archive_le32enc(props+1, dicsize);
585231200Smm
586231200Smm	/* Consume lzip header. */
587231200Smm	__archive_read_filter_consume(self->upstream, 6);
588231200Smm	state->member_in = 6;
589231200Smm
590231200Smm	filters[0].id = LZMA_FILTER_LZMA1;
591231200Smm	filters[0].options = NULL;
592231200Smm	filters[1].id = LZMA_VLI_UNKNOWN;
593231200Smm	filters[1].options = NULL;
594231200Smm
595231200Smm	ret = lzma_properties_decode(&filters[0], NULL, props, sizeof(props));
596231200Smm	if (ret != LZMA_OK) {
597231200Smm		set_error(self, ret);
598231200Smm		return (ARCHIVE_FATAL);
599231200Smm	}
600231200Smm	ret = lzma_raw_decoder(&(state->stream), filters);
601231200Smm#if LZMA_VERSION < 50000030
602231200Smm	free(filters[0].options);
603231200Smm#endif
604231200Smm	if (ret != LZMA_OK) {
605231200Smm		set_error(self, ret);
606231200Smm		return (ARCHIVE_FATAL);
607231200Smm	}
608231200Smm	return (ARCHIVE_OK);
609231200Smm}
610231200Smm
611231200Smmstatic int
612231200Smmlzip_tail(struct archive_read_filter *self)
613231200Smm{
614231200Smm	struct private_data *state;
615231200Smm	const unsigned char *f;
616231200Smm	ssize_t avail_in;
617231200Smm	int tail;
618231200Smm
619231200Smm	state = (struct private_data *)self->data;
620231200Smm	if (state->lzip_ver == 0)
621231200Smm		tail = 12;
622231200Smm	else
623231200Smm		tail = 20;
624231200Smm	f = __archive_read_filter_ahead(self->upstream, tail, &avail_in);
625231200Smm	if (f == NULL && avail_in < 0)
626231200Smm		return (ARCHIVE_FATAL);
627231200Smm	if (avail_in < tail) {
628231200Smm		archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
629231200Smm		    "Lzip: Remaining data is less bytes");
630231200Smm		return (ARCHIVE_FAILED);
631231200Smm	}
632231200Smm
633231200Smm	/* Check the crc32 value of the uncompressed data of the current
634231200Smm	 * member */
635231200Smm	if (state->crc32 != archive_le32dec(f)) {
636231200Smm		archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
637231200Smm		    "Lzip: CRC32 error");
638231200Smm		return (ARCHIVE_FAILED);
639231200Smm	}
640231200Smm
641231200Smm	/* Check the uncompressed size of the current member */
642231200Smm	if ((uint64_t)state->member_out != archive_le64dec(f + 4)) {
643231200Smm		archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
644231200Smm		    "Lzip: Uncompressed size error");
645231200Smm		return (ARCHIVE_FAILED);
646231200Smm	}
647231200Smm
648231200Smm	/* Check the total size of the current member */
649231200Smm	if (state->lzip_ver == 1 &&
650231200Smm	    (uint64_t)state->member_in + tail != archive_le64dec(f + 12)) {
651231200Smm		archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
652231200Smm		    "Lzip: Member size error");
653231200Smm		return (ARCHIVE_FAILED);
654231200Smm	}
655231200Smm	__archive_read_filter_consume(self->upstream, tail);
656231200Smm
657231200Smm	/* If current lzip data consists of multi member, try decompressing
658231200Smm	 * a next member. */
659231200Smm	if (lzip_has_member(self->upstream) != 0) {
660231200Smm		state->in_stream = 0;
661231200Smm		state->crc32 = 0;
662231200Smm		state->member_out = 0;
663231200Smm		state->member_in = 0;
664231200Smm		state->eof = 0;
665231200Smm	}
666231200Smm	return (ARCHIVE_OK);
667231200Smm}
668231200Smm
669228753Smm/*
670228753Smm * Return the next block of decompressed data.
671228753Smm */
672228753Smmstatic ssize_t
673228753Smmxz_filter_read(struct archive_read_filter *self, const void **p)
674228753Smm{
675228753Smm	struct private_data *state;
676228753Smm	size_t decompressed;
677228753Smm	ssize_t avail_in;
678228753Smm	int ret;
679228753Smm
680228753Smm	state = (struct private_data *)self->data;
681228753Smm
682228753Smm	/* Empty our output buffer. */
683228753Smm	state->stream.next_out = state->out_block;
684228753Smm	state->stream.avail_out = state->out_block_size;
685228753Smm
686228753Smm	/* Try to fill the output buffer. */
687228753Smm	while (state->stream.avail_out > 0 && !state->eof) {
688231200Smm		if (!state->in_stream) {
689231200Smm			/*
690231200Smm			 * Initialize liblzma for lzip
691231200Smm			 */
692231200Smm			ret = lzip_init(self);
693231200Smm			if (ret != ARCHIVE_OK)
694231200Smm				return (ret);
695231200Smm			state->in_stream = 1;
696231200Smm		}
697228753Smm		state->stream.next_in =
698228753Smm		    __archive_read_filter_ahead(self->upstream, 1, &avail_in);
699231200Smm		if (state->stream.next_in == NULL && avail_in < 0) {
700231200Smm			archive_set_error(&self->archive->archive,
701231200Smm			    ARCHIVE_ERRNO_MISC,
702231200Smm			    "truncated input");
703228753Smm			return (ARCHIVE_FATAL);
704231200Smm		}
705228753Smm		state->stream.avail_in = avail_in;
706228753Smm
707228753Smm		/* Decompress as much as we can in one pass. */
708228753Smm		ret = lzma_code(&(state->stream),
709228753Smm		    (state->stream.avail_in == 0)? LZMA_FINISH: LZMA_RUN);
710228753Smm		switch (ret) {
711228753Smm		case LZMA_STREAM_END: /* Found end of stream. */
712228753Smm			state->eof = 1;
713228753Smm			/* FALL THROUGH */
714228753Smm		case LZMA_OK: /* Decompressor made some progress. */
715228753Smm			__archive_read_filter_consume(self->upstream,
716228753Smm			    avail_in - state->stream.avail_in);
717231200Smm			state->member_in +=
718231200Smm			    avail_in - state->stream.avail_in;
719228753Smm			break;
720228753Smm		default:
721231200Smm			set_error(self, ret);
722228753Smm			return (ARCHIVE_FATAL);
723228753Smm		}
724228753Smm	}
725228753Smm
726228753Smm	decompressed = state->stream.next_out - state->out_block;
727228753Smm	state->total_out += decompressed;
728231200Smm	state->member_out += decompressed;
729228753Smm	if (decompressed == 0)
730228753Smm		*p = NULL;
731231200Smm	else {
732228753Smm		*p = state->out_block;
733231200Smm		if (self->code == ARCHIVE_COMPRESSION_LZIP) {
734231200Smm			state->crc32 = lzma_crc32(state->out_block,
735231200Smm			    decompressed, state->crc32);
736231200Smm			if (state->eof) {
737231200Smm				ret = lzip_tail(self);
738231200Smm				if (ret != ARCHIVE_OK)
739231200Smm					return (ret);
740231200Smm			}
741231200Smm		}
742231200Smm	}
743228753Smm	return (decompressed);
744228753Smm}
745228753Smm
746228753Smm/*
747228753Smm * Clean up the decompressor.
748228753Smm */
749228753Smmstatic int
750228753Smmxz_filter_close(struct archive_read_filter *self)
751228753Smm{
752228753Smm	struct private_data *state;
753228753Smm
754228753Smm	state = (struct private_data *)self->data;
755228753Smm	lzma_end(&(state->stream));
756228753Smm	free(state->out_block);
757228753Smm	free(state);
758228753Smm	return (ARCHIVE_OK);
759228753Smm}
760228753Smm
761228753Smm#else
762228753Smm
763228753Smm#if HAVE_LZMADEC_H && HAVE_LIBLZMADEC
764228753Smm
765228753Smm/*
766228753Smm * If we have the older liblzmadec library, then we can handle
767228753Smm * LZMA streams but not XZ streams.
768228753Smm */
769228753Smm
770228753Smm/*
771228753Smm * Setup the callbacks.
772228753Smm */
773228753Smmstatic int
774228753Smmlzma_bidder_init(struct archive_read_filter *self)
775228753Smm{
776228753Smm	static const size_t out_block_size = 64 * 1024;
777228753Smm	void *out_block;
778228753Smm	struct private_data *state;
779228753Smm	ssize_t ret, avail_in;
780228753Smm
781228753Smm	self->code = ARCHIVE_COMPRESSION_LZMA;
782228753Smm	self->name = "lzma";
783228753Smm
784228753Smm	state = (struct private_data *)calloc(sizeof(*state), 1);
785228753Smm	out_block = (unsigned char *)malloc(out_block_size);
786228753Smm	if (state == NULL || out_block == NULL) {
787228753Smm		archive_set_error(&self->archive->archive, ENOMEM,
788228753Smm		    "Can't allocate data for lzma decompression");
789228753Smm		free(out_block);
790228753Smm		free(state);
791228753Smm		return (ARCHIVE_FATAL);
792228753Smm	}
793228753Smm
794228753Smm	self->data = state;
795228753Smm	state->out_block_size = out_block_size;
796228753Smm	state->out_block = out_block;
797228753Smm	self->read = lzma_filter_read;
798228753Smm	self->skip = NULL; /* not supported */
799228753Smm	self->close = lzma_filter_close;
800228753Smm
801228753Smm	/* Prime the lzma library with 18 bytes of input. */
802228753Smm	state->stream.next_in = (unsigned char *)(uintptr_t)
803228753Smm	    __archive_read_filter_ahead(self->upstream, 18, &avail_in);
804228753Smm	if (state->stream.next_in == NULL)
805228753Smm		return (ARCHIVE_FATAL);
806228753Smm	state->stream.avail_in = avail_in;
807228753Smm	state->stream.next_out = state->out_block;
808228753Smm	state->stream.avail_out = state->out_block_size;
809228753Smm
810228753Smm	/* Initialize compression library. */
811228753Smm	ret = lzmadec_init(&(state->stream));
812228753Smm	__archive_read_filter_consume(self->upstream,
813228753Smm	    avail_in - state->stream.avail_in);
814228753Smm	if (ret == LZMADEC_OK)
815228753Smm		return (ARCHIVE_OK);
816228753Smm
817228753Smm	/* Library setup failed: Clean up. */
818228753Smm	archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
819228753Smm	    "Internal error initializing lzma library");
820228753Smm
821228753Smm	/* Override the error message if we know what really went wrong. */
822228753Smm	switch (ret) {
823228753Smm	case LZMADEC_HEADER_ERROR:
824228753Smm		archive_set_error(&self->archive->archive,
825228753Smm		    ARCHIVE_ERRNO_MISC,
826228753Smm		    "Internal error initializing compression library: "
827228753Smm		    "invalid header");
828228753Smm		break;
829228753Smm	case LZMADEC_MEM_ERROR:
830228753Smm		archive_set_error(&self->archive->archive, ENOMEM,
831228753Smm		    "Internal error initializing compression library: "
832228753Smm		    "out of memory");
833228753Smm		break;
834228753Smm	}
835228753Smm
836228753Smm	free(state->out_block);
837228753Smm	free(state);
838228753Smm	self->data = NULL;
839228753Smm	return (ARCHIVE_FATAL);
840228753Smm}
841228753Smm
842228753Smm/*
843228753Smm * Return the next block of decompressed data.
844228753Smm */
845228753Smmstatic ssize_t
846228753Smmlzma_filter_read(struct archive_read_filter *self, const void **p)
847228753Smm{
848228753Smm	struct private_data *state;
849228753Smm	size_t decompressed;
850228753Smm	ssize_t avail_in, ret;
851228753Smm
852228753Smm	state = (struct private_data *)self->data;
853228753Smm
854228753Smm	/* Empty our output buffer. */
855228753Smm	state->stream.next_out = state->out_block;
856228753Smm	state->stream.avail_out = state->out_block_size;
857228753Smm
858228753Smm	/* Try to fill the output buffer. */
859228753Smm	while (state->stream.avail_out > 0 && !state->eof) {
860228753Smm		state->stream.next_in = (unsigned char *)(uintptr_t)
861228753Smm		    __archive_read_filter_ahead(self->upstream, 1, &avail_in);
862231200Smm		if (state->stream.next_in == NULL && avail_in < 0) {
863231200Smm			archive_set_error(&self->archive->archive,
864231200Smm			    ARCHIVE_ERRNO_MISC,
865231200Smm			    "truncated lzma input");
866228753Smm			return (ARCHIVE_FATAL);
867231200Smm		}
868228753Smm		state->stream.avail_in = avail_in;
869228753Smm
870228753Smm		/* Decompress as much as we can in one pass. */
871228753Smm		ret = lzmadec_decode(&(state->stream), avail_in == 0);
872228753Smm		switch (ret) {
873228753Smm		case LZMADEC_STREAM_END: /* Found end of stream. */
874228753Smm			state->eof = 1;
875228753Smm			/* FALL THROUGH */
876228753Smm		case LZMADEC_OK: /* Decompressor made some progress. */
877228753Smm			__archive_read_filter_consume(self->upstream,
878228753Smm			    avail_in - state->stream.avail_in);
879228753Smm			break;
880228753Smm		case LZMADEC_BUF_ERROR: /* Insufficient input data? */
881228753Smm			archive_set_error(&self->archive->archive,
882228753Smm			    ARCHIVE_ERRNO_MISC,
883228753Smm			    "Insufficient compressed data");
884228753Smm			return (ARCHIVE_FATAL);
885228753Smm		default:
886228753Smm			/* Return an error. */
887228753Smm			archive_set_error(&self->archive->archive,
888228753Smm			    ARCHIVE_ERRNO_MISC,
889228753Smm			    "Lzma decompression failed");
890228753Smm			return (ARCHIVE_FATAL);
891228753Smm		}
892228753Smm	}
893228753Smm
894228753Smm	decompressed = state->stream.next_out - state->out_block;
895228753Smm	state->total_out += decompressed;
896228753Smm	if (decompressed == 0)
897228753Smm		*p = NULL;
898228753Smm	else
899228753Smm		*p = state->out_block;
900228753Smm	return (decompressed);
901228753Smm}
902228753Smm
903228753Smm/*
904228753Smm * Clean up the decompressor.
905228753Smm */
906228753Smmstatic int
907228753Smmlzma_filter_close(struct archive_read_filter *self)
908228753Smm{
909228753Smm	struct private_data *state;
910228753Smm	int ret;
911228753Smm
912228753Smm	state = (struct private_data *)self->data;
913228753Smm	ret = ARCHIVE_OK;
914228753Smm	switch (lzmadec_end(&(state->stream))) {
915228753Smm	case LZMADEC_OK:
916228753Smm		break;
917228753Smm	default:
918228753Smm		archive_set_error(&(self->archive->archive),
919228753Smm		    ARCHIVE_ERRNO_MISC,
920228753Smm		    "Failed to clean up %s compressor",
921228753Smm		    self->archive->archive.compression_name);
922228753Smm		ret = ARCHIVE_FATAL;
923228753Smm	}
924228753Smm
925228753Smm	free(state->out_block);
926228753Smm	free(state);
927228753Smm	return (ret);
928228753Smm}
929228753Smm
930228753Smm#else
931228753Smm
932228753Smm/*
933228753Smm *
934228753Smm * If we have no suitable library on this system, we can't actually do
935228753Smm * the decompression.  We can, however, still detect compressed
936228753Smm * archives and emit a useful message.
937228753Smm *
938228753Smm */
939228753Smmstatic int
940228753Smmlzma_bidder_init(struct archive_read_filter *self)
941228753Smm{
942228753Smm	int r;
943228753Smm
944228753Smm	r = __archive_read_program(self, "unlzma");
945228753Smm	/* Note: We set the format here even if __archive_read_program()
946228753Smm	 * above fails.  We do, after all, know what the format is
947228753Smm	 * even if we weren't able to read it. */
948228753Smm	self->code = ARCHIVE_COMPRESSION_LZMA;
949228753Smm	self->name = "lzma";
950228753Smm	return (r);
951228753Smm}
952228753Smm
953228753Smm#endif /* HAVE_LZMADEC_H */
954228753Smm
955228753Smm
956228753Smmstatic int
957228753Smmxz_bidder_init(struct archive_read_filter *self)
958228753Smm{
959228753Smm	int r;
960228753Smm
961228753Smm	r = __archive_read_program(self, "unxz");
962228753Smm	/* Note: We set the format here even if __archive_read_program()
963228753Smm	 * above fails.  We do, after all, know what the format is
964228753Smm	 * even if we weren't able to read it. */
965228753Smm	self->code = ARCHIVE_COMPRESSION_XZ;
966228753Smm	self->name = "xz";
967228753Smm	return (r);
968228753Smm}
969228753Smm
970231200Smmstatic int
971231200Smmlzip_bidder_init(struct archive_read_filter *self)
972231200Smm{
973231200Smm	int r;
974228753Smm
975231200Smm	r = __archive_read_program(self, "unlzip");
976231200Smm	/* Note: We set the format here even if __archive_read_program()
977231200Smm	 * above fails.  We do, after all, know what the format is
978231200Smm	 * even if we weren't able to read it. */
979231200Smm	self->code = ARCHIVE_COMPRESSION_LZIP;
980231200Smm	self->name = "lzip";
981231200Smm	return (r);
982231200Smm}
983231200Smm
984231200Smm
985228753Smm#endif /* HAVE_LZMA_H */
986