1/*	$NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $	*/
2
3/*-
4 * SPDX-License-Identifier: BSD-2-Clause-NetBSD
5 *
6 * Copyright (c) 2011 The NetBSD Foundation, Inc.
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to The NetBSD Foundation
10 * by Christos Zoulas.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: stable/11/usr.bin/gzip/unxz.c 343251 2019-01-21 06:52:35Z delphij $");
35
36#include <stdarg.h>
37#include <errno.h>
38#include <stdio.h>
39#include <unistd.h>
40#include <lzma.h>
41
42static off_t
43unxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in)
44{
45	lzma_stream strm = LZMA_STREAM_INIT;
46	static const int flags = LZMA_TELL_UNSUPPORTED_CHECK|LZMA_CONCATENATED;
47	lzma_ret ret;
48	lzma_action action = LZMA_RUN;
49	off_t bytes_out, bp;
50	uint8_t ibuf[BUFSIZ];
51	uint8_t obuf[BUFSIZ];
52
53	if (bytes_in == NULL)
54		bytes_in = &bp;
55
56	strm.next_in = ibuf;
57	memcpy(ibuf, pre, prelen);
58	strm.avail_in = read(i, ibuf + prelen, sizeof(ibuf) - prelen);
59	if (strm.avail_in == (size_t)-1)
60		maybe_err("read failed");
61	infile_newdata(strm.avail_in);
62	strm.avail_in += prelen;
63	*bytes_in = strm.avail_in;
64
65	if ((ret = lzma_stream_decoder(&strm, UINT64_MAX, flags)) != LZMA_OK)
66		maybe_errx("Can't initialize decoder (%d)", ret);
67
68	strm.next_out = NULL;
69	strm.avail_out = 0;
70	if ((ret = lzma_code(&strm, LZMA_RUN)) != LZMA_OK)
71		maybe_errx("Can't read headers (%d)", ret);
72
73	bytes_out = 0;
74	strm.next_out = obuf;
75	strm.avail_out = sizeof(obuf);
76
77	for (;;) {
78		check_siginfo();
79		if (strm.avail_in == 0) {
80			strm.next_in = ibuf;
81			strm.avail_in = read(i, ibuf, sizeof(ibuf));
82			switch (strm.avail_in) {
83			case (size_t)-1:
84				maybe_err("read failed");
85				/*NOTREACHED*/
86			case 0:
87				action = LZMA_FINISH;
88				break;
89			default:
90				infile_newdata(strm.avail_in);
91				*bytes_in += strm.avail_in;
92				break;
93			}
94		}
95
96		ret = lzma_code(&strm, action);
97
98		// Write and check write error before checking decoder error.
99		// This way as much data as possible gets written to output
100		// even if decoder detected an error.
101		if (strm.avail_out == 0 || ret != LZMA_OK) {
102			const size_t write_size = sizeof(obuf) - strm.avail_out;
103
104			if (write(o, obuf, write_size) != (ssize_t)write_size)
105				maybe_err("write failed");
106
107			strm.next_out = obuf;
108			strm.avail_out = sizeof(obuf);
109			bytes_out += write_size;
110		}
111
112		if (ret != LZMA_OK) {
113			if (ret == LZMA_STREAM_END) {
114				// Check that there's no trailing garbage.
115				if (strm.avail_in != 0 || read(i, ibuf, 1))
116					ret = LZMA_DATA_ERROR;
117				else {
118					lzma_end(&strm);
119					return bytes_out;
120				}
121			}
122
123			const char *msg;
124			switch (ret) {
125			case LZMA_MEM_ERROR:
126				msg = strerror(ENOMEM);
127				break;
128
129			case LZMA_FORMAT_ERROR:
130				msg = "File format not recognized";
131				break;
132
133			case LZMA_OPTIONS_ERROR:
134				// FIXME: Better message?
135				msg = "Unsupported compression options";
136				break;
137
138			case LZMA_DATA_ERROR:
139				msg = "File is corrupt";
140				break;
141
142			case LZMA_BUF_ERROR:
143				msg = "Unexpected end of input";
144				break;
145
146			case LZMA_MEMLIMIT_ERROR:
147				msg = "Reached memory limit";
148				break;
149
150			default:
151				maybe_errx("Unknown error (%d)", ret);
152				break;
153			}
154			maybe_errx("%s", msg);
155
156		}
157	}
158}
159
160#include <stdbool.h>
161
162/*
163 * Copied various bits and pieces from xz support code or brute force
164 * replacements.
165 */
166
167#define	my_min(A,B)	((A)<(B)?(A):(B))
168
169// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them.
170// We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t))
171#if BUFSIZ <= 1024
172#       define IO_BUFFER_SIZE 8192
173#else
174#       define IO_BUFFER_SIZE (BUFSIZ & ~7U)
175#endif
176
177/// is_sparse() accesses the buffer as uint64_t for maximum speed.
178/// Use an union to make sure that the buffer is properly aligned.
179typedef union {
180        uint8_t u8[IO_BUFFER_SIZE];
181        uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)];
182        uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)];
183} io_buf;
184
185
186static bool
187io_pread(int fd, io_buf *buf, size_t size, off_t pos)
188{
189	// Using lseek() and read() is more portable than pread() and
190	// for us it is as good as real pread().
191	if (lseek(fd, pos, SEEK_SET) != pos) {
192		return true;
193	}
194
195	const size_t amount = read(fd, buf, size);
196	if (amount == SIZE_MAX)
197		return true;
198
199	if (amount != size) {
200		return true;
201	}
202
203	return false;
204}
205
206/*
207 * Most of the following is copied (mostly verbatim) from the xz
208 * distribution, from file src/xz/list.c
209 */
210
211///////////////////////////////////////////////////////////////////////////////
212//
213/// \file       list.c
214/// \brief      Listing information about .xz files
215//
216//  Author:     Lasse Collin
217//
218//  This file has been put into the public domain.
219//  You can do whatever you want with this file.
220//
221///////////////////////////////////////////////////////////////////////////////
222
223
224/// Information about a .xz file
225typedef struct {
226	/// Combined Index of all Streams in the file
227	lzma_index *idx;
228
229	/// Total amount of Stream Padding
230	uint64_t stream_padding;
231
232	/// Highest memory usage so far
233	uint64_t memusage_max;
234
235	/// True if all Blocks so far have Compressed Size and
236	/// Uncompressed Size fields
237	bool all_have_sizes;
238
239	/// Oldest XZ Utils version that will decompress the file
240	uint32_t min_version;
241
242} xz_file_info;
243
244#define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 }
245
246
247/// \brief      Parse the Index(es) from the given .xz file
248///
249/// \param      xfi     Pointer to structure where the decoded information
250///                     is stored.
251/// \param      pair    Input file
252///
253/// \return     On success, false is returned. On error, true is returned.
254///
255// TODO: This function is pretty big. liblzma should have a function that
256// takes a callback function to parse the Index(es) from a .xz file to make
257// it easy for applications.
258static bool
259parse_indexes(xz_file_info *xfi, int src_fd)
260{
261	struct stat st;
262
263	fstat(src_fd, &st);
264	if (st.st_size <= 0) {
265		return true;
266	}
267
268	if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
269		return true;
270	}
271
272	io_buf buf;
273	lzma_stream_flags header_flags;
274	lzma_stream_flags footer_flags;
275	lzma_ret ret;
276
277	// lzma_stream for the Index decoder
278	lzma_stream strm = LZMA_STREAM_INIT;
279
280	// All Indexes decoded so far
281	lzma_index *combined_index = NULL;
282
283	// The Index currently being decoded
284	lzma_index *this_index = NULL;
285
286	// Current position in the file. We parse the file backwards so
287	// initialize it to point to the end of the file.
288	off_t pos = st.st_size;
289
290	// Each loop iteration decodes one Index.
291	do {
292		// Check that there is enough data left to contain at least
293		// the Stream Header and Stream Footer. This check cannot
294		// fail in the first pass of this loop.
295		if (pos < 2 * LZMA_STREAM_HEADER_SIZE) {
296			goto error;
297		}
298
299		pos -= LZMA_STREAM_HEADER_SIZE;
300		lzma_vli stream_padding = 0;
301
302		// Locate the Stream Footer. There may be Stream Padding which
303		// we must skip when reading backwards.
304		while (true) {
305			if (pos < LZMA_STREAM_HEADER_SIZE) {
306				goto error;
307			}
308
309			if (io_pread(src_fd, &buf,
310					LZMA_STREAM_HEADER_SIZE, pos))
311				goto error;
312
313			// Stream Padding is always a multiple of four bytes.
314			int i = 2;
315			if (buf.u32[i] != 0)
316				break;
317
318			// To avoid calling io_pread() for every four bytes
319			// of Stream Padding, take advantage that we read
320			// 12 bytes (LZMA_STREAM_HEADER_SIZE) already and
321			// check them too before calling io_pread() again.
322			do {
323				stream_padding += 4;
324				pos -= 4;
325				--i;
326			} while (i >= 0 && buf.u32[i] == 0);
327		}
328
329		// Decode the Stream Footer.
330		ret = lzma_stream_footer_decode(&footer_flags, buf.u8);
331		if (ret != LZMA_OK) {
332			goto error;
333		}
334
335		// Check that the Stream Footer doesn't specify something
336		// that we don't support. This can only happen if the xz
337		// version is older than liblzma and liblzma supports
338		// something new.
339		//
340		// It is enough to check Stream Footer. Stream Header must
341		// match when it is compared against Stream Footer with
342		// lzma_stream_flags_compare().
343		if (footer_flags.version != 0) {
344			goto error;
345		}
346
347		// Check that the size of the Index field looks sane.
348		lzma_vli index_size = footer_flags.backward_size;
349		if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) {
350			goto error;
351		}
352
353		// Set pos to the beginning of the Index.
354		pos -= index_size;
355
356		// Decode the Index.
357		ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX);
358		if (ret != LZMA_OK) {
359			goto error;
360		}
361
362		do {
363			// Don't give the decoder more input than the
364			// Index size.
365			strm.avail_in = my_min(IO_BUFFER_SIZE, index_size);
366			if (io_pread(src_fd, &buf, strm.avail_in, pos))
367				goto error;
368
369			pos += strm.avail_in;
370			index_size -= strm.avail_in;
371
372			strm.next_in = buf.u8;
373			ret = lzma_code(&strm, LZMA_RUN);
374
375		} while (ret == LZMA_OK);
376
377		// If the decoding seems to be successful, check also that
378		// the Index decoder consumed as much input as indicated
379		// by the Backward Size field.
380		if (ret == LZMA_STREAM_END)
381			if (index_size != 0 || strm.avail_in != 0)
382				ret = LZMA_DATA_ERROR;
383
384		if (ret != LZMA_STREAM_END) {
385			// LZMA_BUFFER_ERROR means that the Index decoder
386			// would have liked more input than what the Index
387			// size should be according to Stream Footer.
388			// The message for LZMA_DATA_ERROR makes more
389			// sense in that case.
390			if (ret == LZMA_BUF_ERROR)
391				ret = LZMA_DATA_ERROR;
392
393			goto error;
394		}
395
396		// Decode the Stream Header and check that its Stream Flags
397		// match the Stream Footer.
398		pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE;
399		if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) {
400			goto error;
401		}
402
403		pos -= lzma_index_total_size(this_index);
404		if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos))
405			goto error;
406
407		ret = lzma_stream_header_decode(&header_flags, buf.u8);
408		if (ret != LZMA_OK) {
409			goto error;
410		}
411
412		ret = lzma_stream_flags_compare(&header_flags, &footer_flags);
413		if (ret != LZMA_OK) {
414			goto error;
415		}
416
417		// Store the decoded Stream Flags into this_index. This is
418		// needed so that we can print which Check is used in each
419		// Stream.
420		ret = lzma_index_stream_flags(this_index, &footer_flags);
421		if (ret != LZMA_OK)
422			goto error;
423
424		// Store also the size of the Stream Padding field. It is
425		// needed to show the offsets of the Streams correctly.
426		ret = lzma_index_stream_padding(this_index, stream_padding);
427		if (ret != LZMA_OK)
428			goto error;
429
430		if (combined_index != NULL) {
431			// Append the earlier decoded Indexes
432			// after this_index.
433			ret = lzma_index_cat(
434					this_index, combined_index, NULL);
435			if (ret != LZMA_OK) {
436				goto error;
437			}
438		}
439
440		combined_index = this_index;
441		this_index = NULL;
442
443		xfi->stream_padding += stream_padding;
444
445	} while (pos > 0);
446
447	lzma_end(&strm);
448
449	// All OK. Make combined_index available to the caller.
450	xfi->idx = combined_index;
451	return false;
452
453error:
454	// Something went wrong, free the allocated memory.
455	lzma_end(&strm);
456	lzma_index_end(combined_index, NULL);
457	lzma_index_end(this_index, NULL);
458	return true;
459}
460
461/***************** end of copy form list.c *************************/
462
463/*
464 * Small wrapper to extract total length of a file
465 */
466off_t
467unxz_len(int fd)
468{
469	xz_file_info xfi = XZ_FILE_INFO_INIT;
470	if (!parse_indexes(&xfi, fd)) {
471		off_t res = lzma_index_uncompressed_size(xfi.idx);
472		lzma_index_end(xfi.idx, NULL);
473		return res;
474	}
475	return 0;
476}
477
478