1343251Sdelphij/* $NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $ */ 2226184Sdelphij 3226184Sdelphij/*- 4330449Seadler * SPDX-License-Identifier: BSD-2-Clause-NetBSD 5330449Seadler * 6226184Sdelphij * Copyright (c) 2011 The NetBSD Foundation, Inc. 7226184Sdelphij * All rights reserved. 8226184Sdelphij * 9226184Sdelphij * This code is derived from software contributed to The NetBSD Foundation 10226184Sdelphij * by Christos Zoulas. 11226184Sdelphij * 12226184Sdelphij * Redistribution and use in source and binary forms, with or without 13226184Sdelphij * modification, are permitted provided that the following conditions 14226184Sdelphij * are met: 15226184Sdelphij * 1. Redistributions of source code must retain the above copyright 16226184Sdelphij * notice, this list of conditions and the following disclaimer. 17226184Sdelphij * 2. Redistributions in binary form must reproduce the above copyright 18226184Sdelphij * notice, this list of conditions and the following disclaimer in the 19226184Sdelphij * documentation and/or other materials provided with the distribution. 20226184Sdelphij * 21226184Sdelphij * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22226184Sdelphij * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23226184Sdelphij * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24226184Sdelphij * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25226184Sdelphij * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26226184Sdelphij * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27226184Sdelphij * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28226184Sdelphij * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29226184Sdelphij * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30226184Sdelphij * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31226184Sdelphij * POSSIBILITY OF SUCH DAMAGE. 32226184Sdelphij */ 33226184Sdelphij#include <sys/cdefs.h> 34226184Sdelphij__FBSDID("$FreeBSD: stable/11/usr.bin/gzip/unxz.c 343251 2019-01-21 06:52:35Z delphij $"); 35226184Sdelphij 36226184Sdelphij#include <stdarg.h> 37226184Sdelphij#include <errno.h> 38226184Sdelphij#include <stdio.h> 39226184Sdelphij#include <unistd.h> 40226184Sdelphij#include <lzma.h> 41226184Sdelphij 42226184Sdelphijstatic off_t 43226184Sdelphijunxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in) 44226184Sdelphij{ 45226184Sdelphij lzma_stream strm = LZMA_STREAM_INIT; 46226184Sdelphij static const int flags = LZMA_TELL_UNSUPPORTED_CHECK|LZMA_CONCATENATED; 47226184Sdelphij lzma_ret ret; 48226184Sdelphij lzma_action action = LZMA_RUN; 49226184Sdelphij off_t bytes_out, bp; 50226184Sdelphij uint8_t ibuf[BUFSIZ]; 51226184Sdelphij uint8_t obuf[BUFSIZ]; 52226184Sdelphij 53226184Sdelphij if (bytes_in == NULL) 54226184Sdelphij bytes_in = &bp; 55226184Sdelphij 56226184Sdelphij strm.next_in = ibuf; 57226184Sdelphij memcpy(ibuf, pre, prelen); 58226184Sdelphij strm.avail_in = read(i, ibuf + prelen, sizeof(ibuf) - prelen); 59226184Sdelphij if (strm.avail_in == (size_t)-1) 60226184Sdelphij maybe_err("read failed"); 61326559Sdelphij infile_newdata(strm.avail_in); 62226184Sdelphij strm.avail_in += prelen; 63226184Sdelphij *bytes_in = strm.avail_in; 64226184Sdelphij 65226184Sdelphij if ((ret = lzma_stream_decoder(&strm, UINT64_MAX, flags)) != LZMA_OK) 66226184Sdelphij maybe_errx("Can't initialize decoder (%d)", ret); 67226184Sdelphij 68226184Sdelphij strm.next_out = NULL; 69226184Sdelphij strm.avail_out = 0; 70226184Sdelphij if ((ret = lzma_code(&strm, LZMA_RUN)) != LZMA_OK) 71226184Sdelphij maybe_errx("Can't read headers (%d)", ret); 72226184Sdelphij 73226184Sdelphij bytes_out = 0; 74226184Sdelphij strm.next_out = obuf; 75226184Sdelphij strm.avail_out = sizeof(obuf); 76226184Sdelphij 77226184Sdelphij for (;;) { 78326559Sdelphij check_siginfo(); 79226184Sdelphij if (strm.avail_in == 0) { 80226184Sdelphij strm.next_in = ibuf; 81226184Sdelphij strm.avail_in = read(i, ibuf, sizeof(ibuf)); 82226184Sdelphij switch (strm.avail_in) { 83226184Sdelphij case (size_t)-1: 84226184Sdelphij maybe_err("read failed"); 85226184Sdelphij /*NOTREACHED*/ 86226184Sdelphij case 0: 87226184Sdelphij action = LZMA_FINISH; 88226184Sdelphij break; 89226184Sdelphij default: 90326559Sdelphij infile_newdata(strm.avail_in); 91226184Sdelphij *bytes_in += strm.avail_in; 92226184Sdelphij break; 93226184Sdelphij } 94226184Sdelphij } 95226184Sdelphij 96226184Sdelphij ret = lzma_code(&strm, action); 97226184Sdelphij 98226184Sdelphij // Write and check write error before checking decoder error. 99226184Sdelphij // This way as much data as possible gets written to output 100226184Sdelphij // even if decoder detected an error. 101226184Sdelphij if (strm.avail_out == 0 || ret != LZMA_OK) { 102226184Sdelphij const size_t write_size = sizeof(obuf) - strm.avail_out; 103226184Sdelphij 104226184Sdelphij if (write(o, obuf, write_size) != (ssize_t)write_size) 105226184Sdelphij maybe_err("write failed"); 106226184Sdelphij 107226184Sdelphij strm.next_out = obuf; 108226184Sdelphij strm.avail_out = sizeof(obuf); 109226184Sdelphij bytes_out += write_size; 110226184Sdelphij } 111226184Sdelphij 112226184Sdelphij if (ret != LZMA_OK) { 113226184Sdelphij if (ret == LZMA_STREAM_END) { 114226184Sdelphij // Check that there's no trailing garbage. 115226184Sdelphij if (strm.avail_in != 0 || read(i, ibuf, 1)) 116226184Sdelphij ret = LZMA_DATA_ERROR; 117226184Sdelphij else { 118226184Sdelphij lzma_end(&strm); 119226184Sdelphij return bytes_out; 120226184Sdelphij } 121226184Sdelphij } 122226184Sdelphij 123226184Sdelphij const char *msg; 124226184Sdelphij switch (ret) { 125226184Sdelphij case LZMA_MEM_ERROR: 126226184Sdelphij msg = strerror(ENOMEM); 127226184Sdelphij break; 128226184Sdelphij 129226184Sdelphij case LZMA_FORMAT_ERROR: 130226184Sdelphij msg = "File format not recognized"; 131226184Sdelphij break; 132226184Sdelphij 133226184Sdelphij case LZMA_OPTIONS_ERROR: 134226184Sdelphij // FIXME: Better message? 135226184Sdelphij msg = "Unsupported compression options"; 136226184Sdelphij break; 137226184Sdelphij 138226184Sdelphij case LZMA_DATA_ERROR: 139226184Sdelphij msg = "File is corrupt"; 140226184Sdelphij break; 141226184Sdelphij 142226184Sdelphij case LZMA_BUF_ERROR: 143226184Sdelphij msg = "Unexpected end of input"; 144226184Sdelphij break; 145226184Sdelphij 146226184Sdelphij case LZMA_MEMLIMIT_ERROR: 147226184Sdelphij msg = "Reached memory limit"; 148226184Sdelphij break; 149226184Sdelphij 150226184Sdelphij default: 151226184Sdelphij maybe_errx("Unknown error (%d)", ret); 152226184Sdelphij break; 153226184Sdelphij } 154226184Sdelphij maybe_errx("%s", msg); 155226184Sdelphij 156226184Sdelphij } 157226184Sdelphij } 158226184Sdelphij} 159343251Sdelphij 160343251Sdelphij#include <stdbool.h> 161343251Sdelphij 162343251Sdelphij/* 163343251Sdelphij * Copied various bits and pieces from xz support code or brute force 164343251Sdelphij * replacements. 165343251Sdelphij */ 166343251Sdelphij 167343251Sdelphij#define my_min(A,B) ((A)<(B)?(A):(B)) 168343251Sdelphij 169343251Sdelphij// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them. 170343251Sdelphij// We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t)) 171343251Sdelphij#if BUFSIZ <= 1024 172343251Sdelphij# define IO_BUFFER_SIZE 8192 173343251Sdelphij#else 174343251Sdelphij# define IO_BUFFER_SIZE (BUFSIZ & ~7U) 175343251Sdelphij#endif 176343251Sdelphij 177343251Sdelphij/// is_sparse() accesses the buffer as uint64_t for maximum speed. 178343251Sdelphij/// Use an union to make sure that the buffer is properly aligned. 179343251Sdelphijtypedef union { 180343251Sdelphij uint8_t u8[IO_BUFFER_SIZE]; 181343251Sdelphij uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)]; 182343251Sdelphij uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)]; 183343251Sdelphij} io_buf; 184343251Sdelphij 185343251Sdelphij 186343251Sdelphijstatic bool 187343251Sdelphijio_pread(int fd, io_buf *buf, size_t size, off_t pos) 188343251Sdelphij{ 189343251Sdelphij // Using lseek() and read() is more portable than pread() and 190343251Sdelphij // for us it is as good as real pread(). 191343251Sdelphij if (lseek(fd, pos, SEEK_SET) != pos) { 192343251Sdelphij return true; 193343251Sdelphij } 194343251Sdelphij 195343251Sdelphij const size_t amount = read(fd, buf, size); 196343251Sdelphij if (amount == SIZE_MAX) 197343251Sdelphij return true; 198343251Sdelphij 199343251Sdelphij if (amount != size) { 200343251Sdelphij return true; 201343251Sdelphij } 202343251Sdelphij 203343251Sdelphij return false; 204343251Sdelphij} 205343251Sdelphij 206343251Sdelphij/* 207343251Sdelphij * Most of the following is copied (mostly verbatim) from the xz 208343251Sdelphij * distribution, from file src/xz/list.c 209343251Sdelphij */ 210343251Sdelphij 211343251Sdelphij/////////////////////////////////////////////////////////////////////////////// 212343251Sdelphij// 213343251Sdelphij/// \file list.c 214343251Sdelphij/// \brief Listing information about .xz files 215343251Sdelphij// 216343251Sdelphij// Author: Lasse Collin 217343251Sdelphij// 218343251Sdelphij// This file has been put into the public domain. 219343251Sdelphij// You can do whatever you want with this file. 220343251Sdelphij// 221343251Sdelphij/////////////////////////////////////////////////////////////////////////////// 222343251Sdelphij 223343251Sdelphij 224343251Sdelphij/// Information about a .xz file 225343251Sdelphijtypedef struct { 226343251Sdelphij /// Combined Index of all Streams in the file 227343251Sdelphij lzma_index *idx; 228343251Sdelphij 229343251Sdelphij /// Total amount of Stream Padding 230343251Sdelphij uint64_t stream_padding; 231343251Sdelphij 232343251Sdelphij /// Highest memory usage so far 233343251Sdelphij uint64_t memusage_max; 234343251Sdelphij 235343251Sdelphij /// True if all Blocks so far have Compressed Size and 236343251Sdelphij /// Uncompressed Size fields 237343251Sdelphij bool all_have_sizes; 238343251Sdelphij 239343251Sdelphij /// Oldest XZ Utils version that will decompress the file 240343251Sdelphij uint32_t min_version; 241343251Sdelphij 242343251Sdelphij} xz_file_info; 243343251Sdelphij 244343251Sdelphij#define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 } 245343251Sdelphij 246343251Sdelphij 247343251Sdelphij/// \brief Parse the Index(es) from the given .xz file 248343251Sdelphij/// 249343251Sdelphij/// \param xfi Pointer to structure where the decoded information 250343251Sdelphij/// is stored. 251343251Sdelphij/// \param pair Input file 252343251Sdelphij/// 253343251Sdelphij/// \return On success, false is returned. On error, true is returned. 254343251Sdelphij/// 255343251Sdelphij// TODO: This function is pretty big. liblzma should have a function that 256343251Sdelphij// takes a callback function to parse the Index(es) from a .xz file to make 257343251Sdelphij// it easy for applications. 258343251Sdelphijstatic bool 259343251Sdelphijparse_indexes(xz_file_info *xfi, int src_fd) 260343251Sdelphij{ 261343251Sdelphij struct stat st; 262343251Sdelphij 263343251Sdelphij fstat(src_fd, &st); 264343251Sdelphij if (st.st_size <= 0) { 265343251Sdelphij return true; 266343251Sdelphij } 267343251Sdelphij 268343251Sdelphij if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) { 269343251Sdelphij return true; 270343251Sdelphij } 271343251Sdelphij 272343251Sdelphij io_buf buf; 273343251Sdelphij lzma_stream_flags header_flags; 274343251Sdelphij lzma_stream_flags footer_flags; 275343251Sdelphij lzma_ret ret; 276343251Sdelphij 277343251Sdelphij // lzma_stream for the Index decoder 278343251Sdelphij lzma_stream strm = LZMA_STREAM_INIT; 279343251Sdelphij 280343251Sdelphij // All Indexes decoded so far 281343251Sdelphij lzma_index *combined_index = NULL; 282343251Sdelphij 283343251Sdelphij // The Index currently being decoded 284343251Sdelphij lzma_index *this_index = NULL; 285343251Sdelphij 286343251Sdelphij // Current position in the file. We parse the file backwards so 287343251Sdelphij // initialize it to point to the end of the file. 288343251Sdelphij off_t pos = st.st_size; 289343251Sdelphij 290343251Sdelphij // Each loop iteration decodes one Index. 291343251Sdelphij do { 292343251Sdelphij // Check that there is enough data left to contain at least 293343251Sdelphij // the Stream Header and Stream Footer. This check cannot 294343251Sdelphij // fail in the first pass of this loop. 295343251Sdelphij if (pos < 2 * LZMA_STREAM_HEADER_SIZE) { 296343251Sdelphij goto error; 297343251Sdelphij } 298343251Sdelphij 299343251Sdelphij pos -= LZMA_STREAM_HEADER_SIZE; 300343251Sdelphij lzma_vli stream_padding = 0; 301343251Sdelphij 302343251Sdelphij // Locate the Stream Footer. There may be Stream Padding which 303343251Sdelphij // we must skip when reading backwards. 304343251Sdelphij while (true) { 305343251Sdelphij if (pos < LZMA_STREAM_HEADER_SIZE) { 306343251Sdelphij goto error; 307343251Sdelphij } 308343251Sdelphij 309343251Sdelphij if (io_pread(src_fd, &buf, 310343251Sdelphij LZMA_STREAM_HEADER_SIZE, pos)) 311343251Sdelphij goto error; 312343251Sdelphij 313343251Sdelphij // Stream Padding is always a multiple of four bytes. 314343251Sdelphij int i = 2; 315343251Sdelphij if (buf.u32[i] != 0) 316343251Sdelphij break; 317343251Sdelphij 318343251Sdelphij // To avoid calling io_pread() for every four bytes 319343251Sdelphij // of Stream Padding, take advantage that we read 320343251Sdelphij // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and 321343251Sdelphij // check them too before calling io_pread() again. 322343251Sdelphij do { 323343251Sdelphij stream_padding += 4; 324343251Sdelphij pos -= 4; 325343251Sdelphij --i; 326343251Sdelphij } while (i >= 0 && buf.u32[i] == 0); 327343251Sdelphij } 328343251Sdelphij 329343251Sdelphij // Decode the Stream Footer. 330343251Sdelphij ret = lzma_stream_footer_decode(&footer_flags, buf.u8); 331343251Sdelphij if (ret != LZMA_OK) { 332343251Sdelphij goto error; 333343251Sdelphij } 334343251Sdelphij 335343251Sdelphij // Check that the Stream Footer doesn't specify something 336343251Sdelphij // that we don't support. This can only happen if the xz 337343251Sdelphij // version is older than liblzma and liblzma supports 338343251Sdelphij // something new. 339343251Sdelphij // 340343251Sdelphij // It is enough to check Stream Footer. Stream Header must 341343251Sdelphij // match when it is compared against Stream Footer with 342343251Sdelphij // lzma_stream_flags_compare(). 343343251Sdelphij if (footer_flags.version != 0) { 344343251Sdelphij goto error; 345343251Sdelphij } 346343251Sdelphij 347343251Sdelphij // Check that the size of the Index field looks sane. 348343251Sdelphij lzma_vli index_size = footer_flags.backward_size; 349343251Sdelphij if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) { 350343251Sdelphij goto error; 351343251Sdelphij } 352343251Sdelphij 353343251Sdelphij // Set pos to the beginning of the Index. 354343251Sdelphij pos -= index_size; 355343251Sdelphij 356343251Sdelphij // Decode the Index. 357343251Sdelphij ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX); 358343251Sdelphij if (ret != LZMA_OK) { 359343251Sdelphij goto error; 360343251Sdelphij } 361343251Sdelphij 362343251Sdelphij do { 363343251Sdelphij // Don't give the decoder more input than the 364343251Sdelphij // Index size. 365343251Sdelphij strm.avail_in = my_min(IO_BUFFER_SIZE, index_size); 366343251Sdelphij if (io_pread(src_fd, &buf, strm.avail_in, pos)) 367343251Sdelphij goto error; 368343251Sdelphij 369343251Sdelphij pos += strm.avail_in; 370343251Sdelphij index_size -= strm.avail_in; 371343251Sdelphij 372343251Sdelphij strm.next_in = buf.u8; 373343251Sdelphij ret = lzma_code(&strm, LZMA_RUN); 374343251Sdelphij 375343251Sdelphij } while (ret == LZMA_OK); 376343251Sdelphij 377343251Sdelphij // If the decoding seems to be successful, check also that 378343251Sdelphij // the Index decoder consumed as much input as indicated 379343251Sdelphij // by the Backward Size field. 380343251Sdelphij if (ret == LZMA_STREAM_END) 381343251Sdelphij if (index_size != 0 || strm.avail_in != 0) 382343251Sdelphij ret = LZMA_DATA_ERROR; 383343251Sdelphij 384343251Sdelphij if (ret != LZMA_STREAM_END) { 385343251Sdelphij // LZMA_BUFFER_ERROR means that the Index decoder 386343251Sdelphij // would have liked more input than what the Index 387343251Sdelphij // size should be according to Stream Footer. 388343251Sdelphij // The message for LZMA_DATA_ERROR makes more 389343251Sdelphij // sense in that case. 390343251Sdelphij if (ret == LZMA_BUF_ERROR) 391343251Sdelphij ret = LZMA_DATA_ERROR; 392343251Sdelphij 393343251Sdelphij goto error; 394343251Sdelphij } 395343251Sdelphij 396343251Sdelphij // Decode the Stream Header and check that its Stream Flags 397343251Sdelphij // match the Stream Footer. 398343251Sdelphij pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE; 399343251Sdelphij if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) { 400343251Sdelphij goto error; 401343251Sdelphij } 402343251Sdelphij 403343251Sdelphij pos -= lzma_index_total_size(this_index); 404343251Sdelphij if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos)) 405343251Sdelphij goto error; 406343251Sdelphij 407343251Sdelphij ret = lzma_stream_header_decode(&header_flags, buf.u8); 408343251Sdelphij if (ret != LZMA_OK) { 409343251Sdelphij goto error; 410343251Sdelphij } 411343251Sdelphij 412343251Sdelphij ret = lzma_stream_flags_compare(&header_flags, &footer_flags); 413343251Sdelphij if (ret != LZMA_OK) { 414343251Sdelphij goto error; 415343251Sdelphij } 416343251Sdelphij 417343251Sdelphij // Store the decoded Stream Flags into this_index. This is 418343251Sdelphij // needed so that we can print which Check is used in each 419343251Sdelphij // Stream. 420343251Sdelphij ret = lzma_index_stream_flags(this_index, &footer_flags); 421343251Sdelphij if (ret != LZMA_OK) 422343251Sdelphij goto error; 423343251Sdelphij 424343251Sdelphij // Store also the size of the Stream Padding field. It is 425343251Sdelphij // needed to show the offsets of the Streams correctly. 426343251Sdelphij ret = lzma_index_stream_padding(this_index, stream_padding); 427343251Sdelphij if (ret != LZMA_OK) 428343251Sdelphij goto error; 429343251Sdelphij 430343251Sdelphij if (combined_index != NULL) { 431343251Sdelphij // Append the earlier decoded Indexes 432343251Sdelphij // after this_index. 433343251Sdelphij ret = lzma_index_cat( 434343251Sdelphij this_index, combined_index, NULL); 435343251Sdelphij if (ret != LZMA_OK) { 436343251Sdelphij goto error; 437343251Sdelphij } 438343251Sdelphij } 439343251Sdelphij 440343251Sdelphij combined_index = this_index; 441343251Sdelphij this_index = NULL; 442343251Sdelphij 443343251Sdelphij xfi->stream_padding += stream_padding; 444343251Sdelphij 445343251Sdelphij } while (pos > 0); 446343251Sdelphij 447343251Sdelphij lzma_end(&strm); 448343251Sdelphij 449343251Sdelphij // All OK. Make combined_index available to the caller. 450343251Sdelphij xfi->idx = combined_index; 451343251Sdelphij return false; 452343251Sdelphij 453343251Sdelphijerror: 454343251Sdelphij // Something went wrong, free the allocated memory. 455343251Sdelphij lzma_end(&strm); 456343251Sdelphij lzma_index_end(combined_index, NULL); 457343251Sdelphij lzma_index_end(this_index, NULL); 458343251Sdelphij return true; 459343251Sdelphij} 460343251Sdelphij 461343251Sdelphij/***************** end of copy form list.c *************************/ 462343251Sdelphij 463343251Sdelphij/* 464343251Sdelphij * Small wrapper to extract total length of a file 465343251Sdelphij */ 466343251Sdelphijoff_t 467343251Sdelphijunxz_len(int fd) 468343251Sdelphij{ 469343251Sdelphij xz_file_info xfi = XZ_FILE_INFO_INIT; 470343251Sdelphij if (!parse_indexes(&xfi, fd)) { 471343251Sdelphij off_t res = lzma_index_uncompressed_size(xfi.idx); 472343251Sdelphij lzma_index_end(xfi.idx, NULL); 473343251Sdelphij return res; 474343251Sdelphij } 475343251Sdelphij return 0; 476343251Sdelphij} 477343251Sdelphij 478