Deleted Added
full compact
unxz.c (330449) unxz.c (343251)
1/* $NetBSD: unxz.c,v 1.7 2017/08/04 07:27:08 mrg Exp $ */
1/* $NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $ */
2
3/*-
4 * SPDX-License-Identifier: BSD-2-Clause-NetBSD
5 *
6 * Copyright (c) 2011 The NetBSD Foundation, Inc.
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to The NetBSD Foundation

--- 16 unchanged lines hidden (view full) ---

26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33#include <sys/cdefs.h>
2
3/*-
4 * SPDX-License-Identifier: BSD-2-Clause-NetBSD
5 *
6 * Copyright (c) 2011 The NetBSD Foundation, Inc.
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to The NetBSD Foundation

--- 16 unchanged lines hidden (view full) ---

26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: stable/11/usr.bin/gzip/unxz.c 330449 2018-03-05 07:26:05Z eadler $");
34__FBSDID("$FreeBSD: stable/11/usr.bin/gzip/unxz.c 343251 2019-01-21 06:52:35Z delphij $");
35
36#include <stdarg.h>
37#include <errno.h>
38#include <stdio.h>
39#include <unistd.h>
40#include <lzma.h>
41
42static off_t

--- 108 unchanged lines hidden (view full) ---

151 maybe_errx("Unknown error (%d)", ret);
152 break;
153 }
154 maybe_errx("%s", msg);
155
156 }
157 }
158}
35
36#include <stdarg.h>
37#include <errno.h>
38#include <stdio.h>
39#include <unistd.h>
40#include <lzma.h>
41
42static off_t

--- 108 unchanged lines hidden (view full) ---

151 maybe_errx("Unknown error (%d)", ret);
152 break;
153 }
154 maybe_errx("%s", msg);
155
156 }
157 }
158}
159
160#include <stdbool.h>
161
162/*
163 * Copied various bits and pieces from xz support code or brute force
164 * replacements.
165 */
166
167#define my_min(A,B) ((A)<(B)?(A):(B))
168
169// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them.
170// We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t))
171#if BUFSIZ <= 1024
172# define IO_BUFFER_SIZE 8192
173#else
174# define IO_BUFFER_SIZE (BUFSIZ & ~7U)
175#endif
176
177/// is_sparse() accesses the buffer as uint64_t for maximum speed.
178/// Use an union to make sure that the buffer is properly aligned.
179typedef union {
180 uint8_t u8[IO_BUFFER_SIZE];
181 uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)];
182 uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)];
183} io_buf;
184
185
186static bool
187io_pread(int fd, io_buf *buf, size_t size, off_t pos)
188{
189 // Using lseek() and read() is more portable than pread() and
190 // for us it is as good as real pread().
191 if (lseek(fd, pos, SEEK_SET) != pos) {
192 return true;
193 }
194
195 const size_t amount = read(fd, buf, size);
196 if (amount == SIZE_MAX)
197 return true;
198
199 if (amount != size) {
200 return true;
201 }
202
203 return false;
204}
205
206/*
207 * Most of the following is copied (mostly verbatim) from the xz
208 * distribution, from file src/xz/list.c
209 */
210
211///////////////////////////////////////////////////////////////////////////////
212//
213/// \file list.c
214/// \brief Listing information about .xz files
215//
216// Author: Lasse Collin
217//
218// This file has been put into the public domain.
219// You can do whatever you want with this file.
220//
221///////////////////////////////////////////////////////////////////////////////
222
223
224/// Information about a .xz file
225typedef struct {
226 /// Combined Index of all Streams in the file
227 lzma_index *idx;
228
229 /// Total amount of Stream Padding
230 uint64_t stream_padding;
231
232 /// Highest memory usage so far
233 uint64_t memusage_max;
234
235 /// True if all Blocks so far have Compressed Size and
236 /// Uncompressed Size fields
237 bool all_have_sizes;
238
239 /// Oldest XZ Utils version that will decompress the file
240 uint32_t min_version;
241
242} xz_file_info;
243
244#define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 }
245
246
247/// \brief Parse the Index(es) from the given .xz file
248///
249/// \param xfi Pointer to structure where the decoded information
250/// is stored.
251/// \param pair Input file
252///
253/// \return On success, false is returned. On error, true is returned.
254///
255// TODO: This function is pretty big. liblzma should have a function that
256// takes a callback function to parse the Index(es) from a .xz file to make
257// it easy for applications.
258static bool
259parse_indexes(xz_file_info *xfi, int src_fd)
260{
261 struct stat st;
262
263 fstat(src_fd, &st);
264 if (st.st_size <= 0) {
265 return true;
266 }
267
268 if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
269 return true;
270 }
271
272 io_buf buf;
273 lzma_stream_flags header_flags;
274 lzma_stream_flags footer_flags;
275 lzma_ret ret;
276
277 // lzma_stream for the Index decoder
278 lzma_stream strm = LZMA_STREAM_INIT;
279
280 // All Indexes decoded so far
281 lzma_index *combined_index = NULL;
282
283 // The Index currently being decoded
284 lzma_index *this_index = NULL;
285
286 // Current position in the file. We parse the file backwards so
287 // initialize it to point to the end of the file.
288 off_t pos = st.st_size;
289
290 // Each loop iteration decodes one Index.
291 do {
292 // Check that there is enough data left to contain at least
293 // the Stream Header and Stream Footer. This check cannot
294 // fail in the first pass of this loop.
295 if (pos < 2 * LZMA_STREAM_HEADER_SIZE) {
296 goto error;
297 }
298
299 pos -= LZMA_STREAM_HEADER_SIZE;
300 lzma_vli stream_padding = 0;
301
302 // Locate the Stream Footer. There may be Stream Padding which
303 // we must skip when reading backwards.
304 while (true) {
305 if (pos < LZMA_STREAM_HEADER_SIZE) {
306 goto error;
307 }
308
309 if (io_pread(src_fd, &buf,
310 LZMA_STREAM_HEADER_SIZE, pos))
311 goto error;
312
313 // Stream Padding is always a multiple of four bytes.
314 int i = 2;
315 if (buf.u32[i] != 0)
316 break;
317
318 // To avoid calling io_pread() for every four bytes
319 // of Stream Padding, take advantage that we read
320 // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and
321 // check them too before calling io_pread() again.
322 do {
323 stream_padding += 4;
324 pos -= 4;
325 --i;
326 } while (i >= 0 && buf.u32[i] == 0);
327 }
328
329 // Decode the Stream Footer.
330 ret = lzma_stream_footer_decode(&footer_flags, buf.u8);
331 if (ret != LZMA_OK) {
332 goto error;
333 }
334
335 // Check that the Stream Footer doesn't specify something
336 // that we don't support. This can only happen if the xz
337 // version is older than liblzma and liblzma supports
338 // something new.
339 //
340 // It is enough to check Stream Footer. Stream Header must
341 // match when it is compared against Stream Footer with
342 // lzma_stream_flags_compare().
343 if (footer_flags.version != 0) {
344 goto error;
345 }
346
347 // Check that the size of the Index field looks sane.
348 lzma_vli index_size = footer_flags.backward_size;
349 if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) {
350 goto error;
351 }
352
353 // Set pos to the beginning of the Index.
354 pos -= index_size;
355
356 // Decode the Index.
357 ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX);
358 if (ret != LZMA_OK) {
359 goto error;
360 }
361
362 do {
363 // Don't give the decoder more input than the
364 // Index size.
365 strm.avail_in = my_min(IO_BUFFER_SIZE, index_size);
366 if (io_pread(src_fd, &buf, strm.avail_in, pos))
367 goto error;
368
369 pos += strm.avail_in;
370 index_size -= strm.avail_in;
371
372 strm.next_in = buf.u8;
373 ret = lzma_code(&strm, LZMA_RUN);
374
375 } while (ret == LZMA_OK);
376
377 // If the decoding seems to be successful, check also that
378 // the Index decoder consumed as much input as indicated
379 // by the Backward Size field.
380 if (ret == LZMA_STREAM_END)
381 if (index_size != 0 || strm.avail_in != 0)
382 ret = LZMA_DATA_ERROR;
383
384 if (ret != LZMA_STREAM_END) {
385 // LZMA_BUFFER_ERROR means that the Index decoder
386 // would have liked more input than what the Index
387 // size should be according to Stream Footer.
388 // The message for LZMA_DATA_ERROR makes more
389 // sense in that case.
390 if (ret == LZMA_BUF_ERROR)
391 ret = LZMA_DATA_ERROR;
392
393 goto error;
394 }
395
396 // Decode the Stream Header and check that its Stream Flags
397 // match the Stream Footer.
398 pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE;
399 if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) {
400 goto error;
401 }
402
403 pos -= lzma_index_total_size(this_index);
404 if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos))
405 goto error;
406
407 ret = lzma_stream_header_decode(&header_flags, buf.u8);
408 if (ret != LZMA_OK) {
409 goto error;
410 }
411
412 ret = lzma_stream_flags_compare(&header_flags, &footer_flags);
413 if (ret != LZMA_OK) {
414 goto error;
415 }
416
417 // Store the decoded Stream Flags into this_index. This is
418 // needed so that we can print which Check is used in each
419 // Stream.
420 ret = lzma_index_stream_flags(this_index, &footer_flags);
421 if (ret != LZMA_OK)
422 goto error;
423
424 // Store also the size of the Stream Padding field. It is
425 // needed to show the offsets of the Streams correctly.
426 ret = lzma_index_stream_padding(this_index, stream_padding);
427 if (ret != LZMA_OK)
428 goto error;
429
430 if (combined_index != NULL) {
431 // Append the earlier decoded Indexes
432 // after this_index.
433 ret = lzma_index_cat(
434 this_index, combined_index, NULL);
435 if (ret != LZMA_OK) {
436 goto error;
437 }
438 }
439
440 combined_index = this_index;
441 this_index = NULL;
442
443 xfi->stream_padding += stream_padding;
444
445 } while (pos > 0);
446
447 lzma_end(&strm);
448
449 // All OK. Make combined_index available to the caller.
450 xfi->idx = combined_index;
451 return false;
452
453error:
454 // Something went wrong, free the allocated memory.
455 lzma_end(&strm);
456 lzma_index_end(combined_index, NULL);
457 lzma_index_end(this_index, NULL);
458 return true;
459}
460
461/***************** end of copy form list.c *************************/
462
463/*
464 * Small wrapper to extract total length of a file
465 */
466off_t
467unxz_len(int fd)
468{
469 xz_file_info xfi = XZ_FILE_INFO_INIT;
470 if (!parse_indexes(&xfi, fd)) {
471 off_t res = lzma_index_uncompressed_size(xfi.idx);
472 lzma_index_end(xfi.idx, NULL);
473 return res;
474 }
475 return 0;
476}
477