archive_write_add_filter_xz.c revision 313570
1/*-
2 * Copyright (c) 2003-2010 Tim Kientzle
3 * Copyright (c) 2009-2012 Michihiro NAKAJIMA
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "archive_platform.h"
28
29__FBSDID("$FreeBSD: head/lib/libarchive/archive_write_set_compression_xz.c 201108 2009-12-28 03:28:21Z kientzle $");
30
31#ifdef HAVE_ERRNO_H
32#include <errno.h>
33#endif
34#ifdef HAVE_STDLIB_H
35#include <stdlib.h>
36#endif
37#ifdef HAVE_STRING_H
38#include <string.h>
39#endif
40#include <time.h>
41#ifdef HAVE_LZMA_H
42#include <lzma.h>
43#endif
44
45#include "archive.h"
46#include "archive_endian.h"
47#include "archive_private.h"
48#include "archive_write_private.h"
49
50#if ARCHIVE_VERSION_NUMBER < 4000000
51int
52archive_write_set_compression_lzip(struct archive *a)
53{
54	__archive_write_filters_free(a);
55	return (archive_write_add_filter_lzip(a));
56}
57
58int
59archive_write_set_compression_lzma(struct archive *a)
60{
61	__archive_write_filters_free(a);
62	return (archive_write_add_filter_lzma(a));
63}
64
65int
66archive_write_set_compression_xz(struct archive *a)
67{
68	__archive_write_filters_free(a);
69	return (archive_write_add_filter_xz(a));
70}
71
72#endif
73
74#ifndef HAVE_LZMA_H
75int
76archive_write_add_filter_xz(struct archive *a)
77{
78	archive_set_error(a, ARCHIVE_ERRNO_MISC,
79	    "xz compression not supported on this platform");
80	return (ARCHIVE_FATAL);
81}
82
83int
84archive_write_add_filter_lzma(struct archive *a)
85{
86	archive_set_error(a, ARCHIVE_ERRNO_MISC,
87	    "lzma compression not supported on this platform");
88	return (ARCHIVE_FATAL);
89}
90
91int
92archive_write_add_filter_lzip(struct archive *a)
93{
94	archive_set_error(a, ARCHIVE_ERRNO_MISC,
95	    "lzma compression not supported on this platform");
96	return (ARCHIVE_FATAL);
97}
98#else
99/* Don't compile this if we don't have liblzma. */
100
101struct private_data {
102	int		 compression_level;
103	uint32_t	 threads;
104	lzma_stream	 stream;
105	lzma_filter	 lzmafilters[2];
106	lzma_options_lzma lzma_opt;
107	int64_t		 total_in;
108	unsigned char	*compressed;
109	size_t		 compressed_buffer_size;
110	int64_t		 total_out;
111	/* the CRC32 value of uncompressed data for lzip */
112	uint32_t	 crc32;
113};
114
115static int	archive_compressor_xz_options(struct archive_write_filter *,
116		    const char *, const char *);
117static int	archive_compressor_xz_open(struct archive_write_filter *);
118static int	archive_compressor_xz_write(struct archive_write_filter *,
119		    const void *, size_t);
120static int	archive_compressor_xz_close(struct archive_write_filter *);
121static int	archive_compressor_xz_free(struct archive_write_filter *);
122static int	drive_compressor(struct archive_write_filter *,
123		    struct private_data *, int finishing);
124
125struct option_value {
126	uint32_t dict_size;
127	uint32_t nice_len;
128	lzma_match_finder mf;
129};
130static const struct option_value option_values[] = {
131	{ 1 << 16, 32, LZMA_MF_HC3},
132	{ 1 << 20, 32, LZMA_MF_HC3},
133	{ 3 << 19, 32, LZMA_MF_HC4},
134	{ 1 << 21, 32, LZMA_MF_BT4},
135	{ 3 << 20, 32, LZMA_MF_BT4},
136	{ 1 << 22, 32, LZMA_MF_BT4},
137	{ 1 << 23, 64, LZMA_MF_BT4},
138	{ 1 << 24, 64, LZMA_MF_BT4},
139	{ 3 << 23, 64, LZMA_MF_BT4},
140	{ 1 << 25, 64, LZMA_MF_BT4}
141};
142
143static int
144common_setup(struct archive_write_filter *f)
145{
146	struct private_data *data;
147	struct archive_write *a = (struct archive_write *)f->archive;
148	data = calloc(1, sizeof(*data));
149	if (data == NULL) {
150		archive_set_error(&a->archive, ENOMEM, "Out of memory");
151		return (ARCHIVE_FATAL);
152	}
153	f->data = data;
154	data->compression_level = LZMA_PRESET_DEFAULT;
155	data->threads = 1;
156	f->open = &archive_compressor_xz_open;
157	f->close = archive_compressor_xz_close;
158	f->free = archive_compressor_xz_free;
159	f->options = &archive_compressor_xz_options;
160	return (ARCHIVE_OK);
161}
162
163/*
164 * Add an xz compression filter to this write handle.
165 */
166int
167archive_write_add_filter_xz(struct archive *_a)
168{
169	struct archive_write_filter *f;
170	int r;
171
172	archive_check_magic(_a, ARCHIVE_WRITE_MAGIC,
173	    ARCHIVE_STATE_NEW, "archive_write_add_filter_xz");
174	f = __archive_write_allocate_filter(_a);
175	r = common_setup(f);
176	if (r == ARCHIVE_OK) {
177		f->code = ARCHIVE_FILTER_XZ;
178		f->name = "xz";
179	}
180	return (r);
181}
182
183/* LZMA is handled identically, we just need a different compression
184 * code set.  (The liblzma setup looks at the code to determine
185 * the one place that XZ and LZMA require different handling.) */
186int
187archive_write_add_filter_lzma(struct archive *_a)
188{
189	struct archive_write_filter *f;
190	int r;
191
192	archive_check_magic(_a, ARCHIVE_WRITE_MAGIC,
193	    ARCHIVE_STATE_NEW, "archive_write_add_filter_lzma");
194	f = __archive_write_allocate_filter(_a);
195	r = common_setup(f);
196	if (r == ARCHIVE_OK) {
197		f->code = ARCHIVE_FILTER_LZMA;
198		f->name = "lzma";
199	}
200	return (r);
201}
202
203int
204archive_write_add_filter_lzip(struct archive *_a)
205{
206	struct archive_write_filter *f;
207	int r;
208
209	archive_check_magic(_a, ARCHIVE_WRITE_MAGIC,
210	    ARCHIVE_STATE_NEW, "archive_write_add_filter_lzip");
211	f = __archive_write_allocate_filter(_a);
212	r = common_setup(f);
213	if (r == ARCHIVE_OK) {
214		f->code = ARCHIVE_FILTER_LZIP;
215		f->name = "lzip";
216	}
217	return (r);
218}
219
220static int
221archive_compressor_xz_init_stream(struct archive_write_filter *f,
222    struct private_data *data)
223{
224	static const lzma_stream lzma_stream_init_data = LZMA_STREAM_INIT;
225	int ret;
226#ifdef HAVE_LZMA_STREAM_ENCODER_MT
227	lzma_mt mt_options;
228#endif
229
230	data->stream = lzma_stream_init_data;
231	data->stream.next_out = data->compressed;
232	data->stream.avail_out = data->compressed_buffer_size;
233	if (f->code == ARCHIVE_FILTER_XZ) {
234#ifdef HAVE_LZMA_STREAM_ENCODER_MT
235		if (data->threads != 1) {
236			memset(&mt_options, 0, sizeof(mt_options));
237			mt_options.threads = data->threads;
238			mt_options.timeout = 300;
239			mt_options.filters = data->lzmafilters;
240			mt_options.check = LZMA_CHECK_CRC64;
241			ret = lzma_stream_encoder_mt(&(data->stream),
242			    &mt_options);
243		} else
244#endif
245			ret = lzma_stream_encoder(&(data->stream),
246			    data->lzmafilters, LZMA_CHECK_CRC64);
247	} else if (f->code == ARCHIVE_FILTER_LZMA) {
248		ret = lzma_alone_encoder(&(data->stream), &data->lzma_opt);
249	} else {	/* ARCHIVE_FILTER_LZIP */
250		int dict_size = data->lzma_opt.dict_size;
251		int ds, log2dic, wedges;
252
253		/* Calculate a coded dictionary size */
254		if (dict_size < (1 << 12) || dict_size > (1 << 27)) {
255			archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
256			    "Unacceptable dictionary size for lzip: %d",
257			    dict_size);
258			return (ARCHIVE_FATAL);
259		}
260		for (log2dic = 27; log2dic >= 12; log2dic--) {
261			if (dict_size & (1 << log2dic))
262				break;
263		}
264		if (dict_size > (1 << log2dic)) {
265			log2dic++;
266			wedges =
267			    ((1 << log2dic) - dict_size) / (1 << (log2dic - 4));
268		} else
269			wedges = 0;
270		ds = ((wedges << 5) & 0xe0) | (log2dic & 0x1f);
271
272		data->crc32 = 0;
273		/* Make a header */
274		data->compressed[0] = 0x4C;
275		data->compressed[1] = 0x5A;
276		data->compressed[2] = 0x49;
277		data->compressed[3] = 0x50;
278		data->compressed[4] = 1;/* Version */
279		data->compressed[5] = (unsigned char)ds;
280		data->stream.next_out += 6;
281		data->stream.avail_out -= 6;
282
283		ret = lzma_raw_encoder(&(data->stream), data->lzmafilters);
284	}
285	if (ret == LZMA_OK)
286		return (ARCHIVE_OK);
287
288	switch (ret) {
289	case LZMA_MEM_ERROR:
290		archive_set_error(f->archive, ENOMEM,
291		    "Internal error initializing compression library: "
292		    "Cannot allocate memory");
293		break;
294	default:
295		archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
296		    "Internal error initializing compression library: "
297		    "It's a bug in liblzma");
298		break;
299	}
300	return (ARCHIVE_FATAL);
301}
302
303/*
304 * Setup callback.
305 */
306static int
307archive_compressor_xz_open(struct archive_write_filter *f)
308{
309	struct private_data *data = f->data;
310	int ret;
311
312	ret = __archive_write_open_filter(f->next_filter);
313	if (ret != ARCHIVE_OK)
314		return (ret);
315
316	if (data->compressed == NULL) {
317		size_t bs = 65536, bpb;
318		if (f->archive->magic == ARCHIVE_WRITE_MAGIC) {
319			/* Buffer size should be a multiple number of the of bytes
320			 * per block for performance. */
321			bpb = archive_write_get_bytes_per_block(f->archive);
322			if (bpb > bs)
323				bs = bpb;
324			else if (bpb != 0)
325				bs -= bs % bpb;
326		}
327		data->compressed_buffer_size = bs;
328		data->compressed
329		    = (unsigned char *)malloc(data->compressed_buffer_size);
330		if (data->compressed == NULL) {
331			archive_set_error(f->archive, ENOMEM,
332			    "Can't allocate data for compression buffer");
333			return (ARCHIVE_FATAL);
334		}
335	}
336
337	f->write = archive_compressor_xz_write;
338
339	/* Initialize compression library. */
340	if (f->code == ARCHIVE_FILTER_LZIP) {
341		const struct option_value *val =
342		    &option_values[data->compression_level];
343
344		data->lzma_opt.dict_size = val->dict_size;
345		data->lzma_opt.preset_dict = NULL;
346		data->lzma_opt.preset_dict_size = 0;
347		data->lzma_opt.lc = LZMA_LC_DEFAULT;
348		data->lzma_opt.lp = LZMA_LP_DEFAULT;
349		data->lzma_opt.pb = LZMA_PB_DEFAULT;
350		data->lzma_opt.mode =
351		    data->compression_level<= 2? LZMA_MODE_FAST:LZMA_MODE_NORMAL;
352		data->lzma_opt.nice_len = val->nice_len;
353		data->lzma_opt.mf = val->mf;
354		data->lzma_opt.depth = 0;
355		data->lzmafilters[0].id = LZMA_FILTER_LZMA1;
356		data->lzmafilters[0].options = &data->lzma_opt;
357		data->lzmafilters[1].id = LZMA_VLI_UNKNOWN;/* Terminate */
358	} else {
359		if (lzma_lzma_preset(&data->lzma_opt, data->compression_level)) {
360			archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
361			    "Internal error initializing compression library");
362		}
363		data->lzmafilters[0].id = LZMA_FILTER_LZMA2;
364		data->lzmafilters[0].options = &data->lzma_opt;
365		data->lzmafilters[1].id = LZMA_VLI_UNKNOWN;/* Terminate */
366	}
367	ret = archive_compressor_xz_init_stream(f, data);
368	if (ret == LZMA_OK) {
369		f->data = data;
370		return (0);
371	}
372	return (ARCHIVE_FATAL);
373}
374
375/*
376 * Set write options.
377 */
378static int
379archive_compressor_xz_options(struct archive_write_filter *f,
380    const char *key, const char *value)
381{
382	struct private_data *data = (struct private_data *)f->data;
383
384	if (strcmp(key, "compression-level") == 0) {
385		if (value == NULL || !(value[0] >= '0' && value[0] <= '9') ||
386		    value[1] != '\0')
387			return (ARCHIVE_WARN);
388		data->compression_level = value[0] - '0';
389		if (data->compression_level > 6)
390			data->compression_level = 6;
391		return (ARCHIVE_OK);
392	} else if (strcmp(key, "threads") == 0) {
393		if (value == NULL)
394			return (ARCHIVE_WARN);
395		data->threads = (int)strtoul(value, NULL, 10);
396		if (data->threads == 0 && errno != 0) {
397			data->threads = 1;
398			return (ARCHIVE_WARN);
399		}
400		if (data->threads == 0) {
401#ifdef HAVE_LZMA_STREAM_ENCODER_MT
402			data->threads = lzma_cputhreads();
403#else
404			data->threads = 1;
405#endif
406		}
407		return (ARCHIVE_OK);
408	}
409
410	/* Note: The "warn" return is just to inform the options
411	 * supervisor that we didn't handle it.  It will generate
412	 * a suitable error if no one used this option. */
413	return (ARCHIVE_WARN);
414}
415
416/*
417 * Write data to the compressed stream.
418 */
419static int
420archive_compressor_xz_write(struct archive_write_filter *f,
421    const void *buff, size_t length)
422{
423	struct private_data *data = (struct private_data *)f->data;
424	int ret;
425
426	/* Update statistics */
427	data->total_in += length;
428	if (f->code == ARCHIVE_FILTER_LZIP)
429		data->crc32 = lzma_crc32(buff, length, data->crc32);
430
431	/* Compress input data to output buffer */
432	data->stream.next_in = buff;
433	data->stream.avail_in = length;
434	if ((ret = drive_compressor(f, data, 0)) != ARCHIVE_OK)
435		return (ret);
436
437	return (ARCHIVE_OK);
438}
439
440
441/*
442 * Finish the compression...
443 */
444static int
445archive_compressor_xz_close(struct archive_write_filter *f)
446{
447	struct private_data *data = (struct private_data *)f->data;
448	int ret, r1;
449
450	ret = drive_compressor(f, data, 1);
451	if (ret == ARCHIVE_OK) {
452		data->total_out +=
453		    data->compressed_buffer_size - data->stream.avail_out;
454		ret = __archive_write_filter(f->next_filter,
455		    data->compressed,
456		    data->compressed_buffer_size - data->stream.avail_out);
457		if (f->code == ARCHIVE_FILTER_LZIP && ret == ARCHIVE_OK) {
458			archive_le32enc(data->compressed, data->crc32);
459			archive_le64enc(data->compressed+4, data->total_in);
460			archive_le64enc(data->compressed+12, data->total_out + 20);
461			ret = __archive_write_filter(f->next_filter,
462			    data->compressed, 20);
463		}
464	}
465	lzma_end(&(data->stream));
466	r1 = __archive_write_close_filter(f->next_filter);
467	return (r1 < ret ? r1 : ret);
468}
469
470static int
471archive_compressor_xz_free(struct archive_write_filter *f)
472{
473	struct private_data *data = (struct private_data *)f->data;
474	free(data->compressed);
475	free(data);
476	f->data = NULL;
477	return (ARCHIVE_OK);
478}
479
480/*
481 * Utility function to push input data through compressor,
482 * writing full output blocks as necessary.
483 *
484 * Note that this handles both the regular write case (finishing ==
485 * false) and the end-of-archive case (finishing == true).
486 */
487static int
488drive_compressor(struct archive_write_filter *f,
489    struct private_data *data, int finishing)
490{
491	int ret;
492
493	for (;;) {
494		if (data->stream.avail_out == 0) {
495			data->total_out += data->compressed_buffer_size;
496			ret = __archive_write_filter(f->next_filter,
497			    data->compressed,
498			    data->compressed_buffer_size);
499			if (ret != ARCHIVE_OK)
500				return (ARCHIVE_FATAL);
501			data->stream.next_out = data->compressed;
502			data->stream.avail_out = data->compressed_buffer_size;
503		}
504
505		/* If there's nothing to do, we're done. */
506		if (!finishing && data->stream.avail_in == 0)
507			return (ARCHIVE_OK);
508
509		ret = lzma_code(&(data->stream),
510		    finishing ? LZMA_FINISH : LZMA_RUN );
511
512		switch (ret) {
513		case LZMA_OK:
514			/* In non-finishing case, check if compressor
515			 * consumed everything */
516			if (!finishing && data->stream.avail_in == 0)
517				return (ARCHIVE_OK);
518			/* In finishing case, this return always means
519			 * there's more work */
520			break;
521		case LZMA_STREAM_END:
522			/* This return can only occur in finishing case. */
523			if (finishing)
524				return (ARCHIVE_OK);
525			archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
526			    "lzma compression data error");
527			return (ARCHIVE_FATAL);
528		case LZMA_MEMLIMIT_ERROR:
529			archive_set_error(f->archive, ENOMEM,
530			    "lzma compression error: "
531			    "%ju MiB would have been needed",
532			    (uintmax_t)((lzma_memusage(&(data->stream))
533				    + 1024 * 1024 -1)
534				/ (1024 * 1024)));
535			return (ARCHIVE_FATAL);
536		default:
537			/* Any other return value indicates an error. */
538			archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
539			    "lzma compression failed:"
540			    " lzma_code() call returned status %d",
541			    ret);
542			return (ARCHIVE_FATAL);
543		}
544	}
545}
546
547#endif /* HAVE_LZMA_H */
548