1207753Smm/////////////////////////////////////////////////////////////////////////////// 2207753Smm// 3207753Smm/// \file coder.c 4207753Smm/// \brief Compresses or uncompresses a file 5207753Smm// 6207753Smm// Author: Lasse Collin 7207753Smm// 8207753Smm// This file has been put into the public domain. 9207753Smm// You can do whatever you want with this file. 10207753Smm// 11207753Smm/////////////////////////////////////////////////////////////////////////////// 12207753Smm 13207753Smm#include "private.h" 14207753Smm 15207753Smm 16207753Smm/// Return value type for coder_init(). 17207753Smmenum coder_init_ret { 18207753Smm CODER_INIT_NORMAL, 19207753Smm CODER_INIT_PASSTHRU, 20207753Smm CODER_INIT_ERROR, 21207753Smm}; 22207753Smm 23207753Smm 24207753Smmenum operation_mode opt_mode = MODE_COMPRESS; 25207753Smmenum format_type opt_format = FORMAT_AUTO; 26213700Smmbool opt_auto_adjust = true; 27292588Sdelphijbool opt_single_stream = false; 28292588Sdelphijuint64_t opt_block_size = 0; 29292588Sdelphijuint64_t *opt_block_list = NULL; 30207753Smm 31207753Smm 32207753Smm/// Stream used to communicate with liblzma 33207753Smmstatic lzma_stream strm = LZMA_STREAM_INIT; 34207753Smm 35207753Smm/// Filters needed for all encoding all formats, and also decoding in raw data 36207753Smmstatic lzma_filter filters[LZMA_FILTERS_MAX + 1]; 37207753Smm 38207753Smm/// Input and output buffers 39207753Smmstatic io_buf in_buf; 40207753Smmstatic io_buf out_buf; 41207753Smm 42207753Smm/// Number of filters. Zero indicates that we are using a preset. 43263285Sdelphijstatic uint32_t filters_count = 0; 44207753Smm 45207753Smm/// Number of the preset (0-9) 46263285Sdelphijstatic uint32_t preset_number = LZMA_PRESET_DEFAULT; 47207753Smm 48207753Smm/// Integrity check type 49207753Smmstatic lzma_check check; 50207753Smm 51207753Smm/// This becomes false if the --check=CHECK option is used. 52207753Smmstatic bool check_default = true; 53207753Smm 54312518Sdelphij#if defined(HAVE_ENCODERS) && defined(MYTHREAD_ENABLED) 55292588Sdelphijstatic lzma_mt mt_options = { 56292588Sdelphij .flags = 0, 57292588Sdelphij .timeout = 300, 58292588Sdelphij .filters = filters, 59292588Sdelphij}; 60292588Sdelphij#endif 61207753Smm 62292588Sdelphij 63207753Smmextern void 64207753Smmcoder_set_check(lzma_check new_check) 65207753Smm{ 66207753Smm check = new_check; 67207753Smm check_default = false; 68207753Smm return; 69207753Smm} 70207753Smm 71207753Smm 72263285Sdelphijstatic void 73263285Sdelphijforget_filter_chain(void) 74207753Smm{ 75213700Smm // Setting a preset makes us forget a possibly defined custom 76213700Smm // filter chain. 77213700Smm while (filters_count > 0) { 78213700Smm --filters_count; 79213700Smm free(filters[filters_count].options); 80213700Smm filters[filters_count].options = NULL; 81213700Smm } 82213700Smm 83207753Smm return; 84207753Smm} 85207753Smm 86207753Smm 87207753Smmextern void 88263285Sdelphijcoder_set_preset(uint32_t new_preset) 89263285Sdelphij{ 90263285Sdelphij preset_number &= ~LZMA_PRESET_LEVEL_MASK; 91263285Sdelphij preset_number |= new_preset; 92263285Sdelphij forget_filter_chain(); 93263285Sdelphij return; 94263285Sdelphij} 95263285Sdelphij 96263285Sdelphij 97263285Sdelphijextern void 98207753Smmcoder_set_extreme(void) 99207753Smm{ 100263285Sdelphij preset_number |= LZMA_PRESET_EXTREME; 101263285Sdelphij forget_filter_chain(); 102207753Smm return; 103207753Smm} 104207753Smm 105207753Smm 106207753Smmextern void 107207753Smmcoder_add_filter(lzma_vli id, void *options) 108207753Smm{ 109207753Smm if (filters_count == LZMA_FILTERS_MAX) 110207753Smm message_fatal(_("Maximum number of filters is four")); 111207753Smm 112207753Smm filters[filters_count].id = id; 113207753Smm filters[filters_count].options = options; 114207753Smm ++filters_count; 115207753Smm 116263285Sdelphij // Setting a custom filter chain makes us forget the preset options. 117263285Sdelphij // This makes a difference if one specifies e.g. "xz -9 --lzma2 -e" 118263285Sdelphij // where the custom filter chain resets the preset level back to 119263285Sdelphij // the default 6, making the example equivalent to "xz -6e". 120263285Sdelphij preset_number = LZMA_PRESET_DEFAULT; 121263285Sdelphij 122207753Smm return; 123207753Smm} 124207753Smm 125207753Smm 126223935Smmstatic void lzma_attribute((__noreturn__)) 127207753Smmmemlimit_too_small(uint64_t memory_usage) 128207753Smm{ 129207753Smm message(V_ERROR, _("Memory usage limit is too low for the given " 130207753Smm "filter setup.")); 131207753Smm message_mem_needed(V_ERROR, memory_usage); 132207753Smm tuklib_exit(E_ERROR, E_ERROR, false); 133207753Smm} 134207753Smm 135207753Smm 136207753Smmextern void 137207753Smmcoder_set_compression_settings(void) 138207753Smm{ 139292588Sdelphij // The default check type is CRC64, but fallback to CRC32 140292588Sdelphij // if CRC64 isn't supported by the copy of liblzma we are 141292588Sdelphij // using. CRC32 is always supported. 142292588Sdelphij if (check_default) { 143292588Sdelphij check = LZMA_CHECK_CRC64; 144292588Sdelphij if (!lzma_check_is_supported(check)) 145292588Sdelphij check = LZMA_CHECK_CRC32; 146292588Sdelphij } 147292588Sdelphij 148207753Smm // Options for LZMA1 or LZMA2 in case we are using a preset. 149207753Smm static lzma_options_lzma opt_lzma; 150207753Smm 151207753Smm if (filters_count == 0) { 152207753Smm // We are using a preset. This is not a good idea in raw mode 153207753Smm // except when playing around with things. Different versions 154207753Smm // of this software may use different options in presets, and 155207753Smm // thus make uncompressing the raw data difficult. 156207753Smm if (opt_format == FORMAT_RAW) { 157207753Smm // The message is shown only if warnings are allowed 158207753Smm // but the exit status isn't changed. 159207753Smm message(V_WARNING, _("Using a preset in raw mode " 160207753Smm "is discouraged.")); 161207753Smm message(V_WARNING, _("The exact options of the " 162207753Smm "presets may vary between software " 163207753Smm "versions.")); 164207753Smm } 165207753Smm 166207753Smm // Get the preset for LZMA1 or LZMA2. 167207753Smm if (lzma_lzma_preset(&opt_lzma, preset_number)) 168207753Smm message_bug(); 169207753Smm 170207753Smm // Use LZMA2 except with --format=lzma we use LZMA1. 171207753Smm filters[0].id = opt_format == FORMAT_LZMA 172207753Smm ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2; 173207753Smm filters[0].options = &opt_lzma; 174207753Smm filters_count = 1; 175207753Smm } 176207753Smm 177207753Smm // Terminate the filter options array. 178207753Smm filters[filters_count].id = LZMA_VLI_UNKNOWN; 179207753Smm 180207753Smm // If we are using the .lzma format, allow exactly one filter 181207753Smm // which has to be LZMA1. 182207753Smm if (opt_format == FORMAT_LZMA && (filters_count != 1 183207753Smm || filters[0].id != LZMA_FILTER_LZMA1)) 184207753Smm message_fatal(_("The .lzma format supports only " 185207753Smm "the LZMA1 filter")); 186207753Smm 187207753Smm // If we are using the .xz format, make sure that there is no LZMA1 188207753Smm // filter to prevent LZMA_PROG_ERROR. 189207753Smm if (opt_format == FORMAT_XZ) 190207753Smm for (size_t i = 0; i < filters_count; ++i) 191207753Smm if (filters[i].id == LZMA_FILTER_LZMA1) 192207753Smm message_fatal(_("LZMA1 cannot be used " 193207753Smm "with the .xz format")); 194207753Smm 195207753Smm // Print the selected filter chain. 196213700Smm message_filters_show(V_DEBUG, filters); 197207753Smm 198292588Sdelphij // The --flush-timeout option requires LZMA_SYNC_FLUSH support 199292588Sdelphij // from the filter chain. Currently threaded encoder doesn't support 200292588Sdelphij // LZMA_SYNC_FLUSH so single-threaded mode must be used. 201292588Sdelphij if (opt_mode == MODE_COMPRESS && opt_flush_timeout != 0) { 202292588Sdelphij for (size_t i = 0; i < filters_count; ++i) { 203292588Sdelphij switch (filters[i].id) { 204292588Sdelphij case LZMA_FILTER_LZMA2: 205292588Sdelphij case LZMA_FILTER_DELTA: 206292588Sdelphij break; 207292588Sdelphij 208292588Sdelphij default: 209292588Sdelphij message_fatal(_("The filter chain is " 210292588Sdelphij "incompatible with --flush-timeout")); 211292588Sdelphij } 212292588Sdelphij } 213292588Sdelphij 214292588Sdelphij if (hardware_threads_get() > 1) { 215292588Sdelphij message(V_WARNING, _("Switching to single-threaded " 216292588Sdelphij "mode due to --flush-timeout")); 217292588Sdelphij hardware_threads_set(1); 218292588Sdelphij } 219292588Sdelphij } 220292588Sdelphij 221292588Sdelphij // Get the memory usage. Note that if --format=raw was used, 222292588Sdelphij // we can be decompressing. 223213700Smm const uint64_t memory_limit = hardware_memlimit_get(opt_mode); 224312518Sdelphij uint64_t memory_usage = UINT64_MAX; 225292588Sdelphij if (opt_mode == MODE_COMPRESS) { 226312518Sdelphij#ifdef HAVE_ENCODERS 227312518Sdelphij# ifdef MYTHREAD_ENABLED 228292588Sdelphij if (opt_format == FORMAT_XZ && hardware_threads_get() > 1) { 229292588Sdelphij mt_options.threads = hardware_threads_get(); 230292588Sdelphij mt_options.block_size = opt_block_size; 231292588Sdelphij mt_options.check = check; 232292588Sdelphij memory_usage = lzma_stream_encoder_mt_memusage( 233292588Sdelphij &mt_options); 234292588Sdelphij if (memory_usage != UINT64_MAX) 235292588Sdelphij message(V_DEBUG, _("Using up to %" PRIu32 236292588Sdelphij " threads."), 237292588Sdelphij mt_options.threads); 238292588Sdelphij } else 239312518Sdelphij# endif 240292588Sdelphij { 241292588Sdelphij memory_usage = lzma_raw_encoder_memusage(filters); 242292588Sdelphij } 243312518Sdelphij#endif 244292588Sdelphij } else { 245312518Sdelphij#ifdef HAVE_DECODERS 246207753Smm memory_usage = lzma_raw_decoder_memusage(filters); 247312518Sdelphij#endif 248292588Sdelphij } 249207753Smm 250207753Smm if (memory_usage == UINT64_MAX) 251207753Smm message_fatal(_("Unsupported filter chain or filter options")); 252207753Smm 253207753Smm // Print memory usage info before possible dictionary 254207753Smm // size auto-adjusting. 255312518Sdelphij // 256312518Sdelphij // NOTE: If only encoder support was built, we cannot show the 257312518Sdelphij // what the decoder memory usage will be. 258207753Smm message_mem_needed(V_DEBUG, memory_usage); 259312518Sdelphij#ifdef HAVE_DECODERS 260213700Smm if (opt_mode == MODE_COMPRESS) { 261213700Smm const uint64_t decmem = lzma_raw_decoder_memusage(filters); 262213700Smm if (decmem != UINT64_MAX) 263213700Smm message(V_DEBUG, _("Decompression will need " 264213700Smm "%s MiB of memory."), uint64_to_str( 265213700Smm round_up_to_mib(decmem), 0)); 266213700Smm } 267312518Sdelphij#endif 268207753Smm 269292588Sdelphij if (memory_usage <= memory_limit) 270292588Sdelphij return; 271207753Smm 272292588Sdelphij // If --no-adjust was used or we didn't find LZMA1 or 273292588Sdelphij // LZMA2 as the last filter, give an error immediately. 274292588Sdelphij // --format=raw implies --no-adjust. 275292588Sdelphij if (!opt_auto_adjust || opt_format == FORMAT_RAW) 276292588Sdelphij memlimit_too_small(memory_usage); 277207753Smm 278292588Sdelphij assert(opt_mode == MODE_COMPRESS); 279207753Smm 280312518Sdelphij#ifdef HAVE_ENCODERS 281312518Sdelphij# ifdef MYTHREAD_ENABLED 282292588Sdelphij if (opt_format == FORMAT_XZ && mt_options.threads > 1) { 283292588Sdelphij // Try to reduce the number of threads before 284292588Sdelphij // adjusting the compression settings down. 285292588Sdelphij do { 286292588Sdelphij // FIXME? The real single-threaded mode has 287292588Sdelphij // lower memory usage, but it's not comparable 288292588Sdelphij // because it doesn't write the size info 289292588Sdelphij // into Block Headers. 290292588Sdelphij if (--mt_options.threads == 0) 291207753Smm memlimit_too_small(memory_usage); 292207753Smm 293292588Sdelphij memory_usage = lzma_stream_encoder_mt_memusage( 294292588Sdelphij &mt_options); 295207753Smm if (memory_usage == UINT64_MAX) 296207753Smm message_bug(); 297207753Smm 298292588Sdelphij } while (memory_usage > memory_limit); 299207753Smm 300292588Sdelphij message(V_WARNING, _("Adjusted the number of threads " 301292588Sdelphij "from %s to %s to not exceed " 302292588Sdelphij "the memory usage limit of %s MiB"), 303292588Sdelphij uint64_to_str(hardware_threads_get(), 0), 304292588Sdelphij uint64_to_str(mt_options.threads, 1), 305292588Sdelphij uint64_to_str(round_up_to_mib( 306292588Sdelphij memory_limit), 2)); 307292588Sdelphij } 308312518Sdelphij# endif 309207753Smm 310292588Sdelphij if (memory_usage <= memory_limit) 311292588Sdelphij return; 312292588Sdelphij 313292588Sdelphij // Look for the last filter if it is LZMA2 or LZMA1, so we can make 314292588Sdelphij // it use less RAM. With other filters we don't know what to do. 315292588Sdelphij size_t i = 0; 316292588Sdelphij while (filters[i].id != LZMA_FILTER_LZMA2 317292588Sdelphij && filters[i].id != LZMA_FILTER_LZMA1) { 318292588Sdelphij if (filters[i].id == LZMA_VLI_UNKNOWN) 319292588Sdelphij memlimit_too_small(memory_usage); 320292588Sdelphij 321292588Sdelphij ++i; 322207753Smm } 323207753Smm 324292588Sdelphij // Decrease the dictionary size until we meet the memory 325292588Sdelphij // usage limit. First round down to full mebibytes. 326292588Sdelphij lzma_options_lzma *opt = filters[i].options; 327292588Sdelphij const uint32_t orig_dict_size = opt->dict_size; 328292588Sdelphij opt->dict_size &= ~((UINT32_C(1) << 20) - 1); 329292588Sdelphij while (true) { 330292588Sdelphij // If it is below 1 MiB, auto-adjusting failed. We could be 331292588Sdelphij // more sophisticated and scale it down even more, but let's 332292588Sdelphij // see if many complain about this version. 333292588Sdelphij // 334292588Sdelphij // FIXME: Displays the scaled memory usage instead 335292588Sdelphij // of the original. 336292588Sdelphij if (opt->dict_size < (UINT32_C(1) << 20)) 337292588Sdelphij memlimit_too_small(memory_usage); 338207753Smm 339292588Sdelphij memory_usage = lzma_raw_encoder_memusage(filters); 340292588Sdelphij if (memory_usage == UINT64_MAX) 341292588Sdelphij message_bug(); 342207753Smm 343292588Sdelphij // Accept it if it is low enough. 344292588Sdelphij if (memory_usage <= memory_limit) 345292588Sdelphij break; 346292588Sdelphij 347292588Sdelphij // Otherwise 1 MiB down and try again. I hope this 348292588Sdelphij // isn't too slow method for cases where the original 349292588Sdelphij // dict_size is very big. 350292588Sdelphij opt->dict_size -= UINT32_C(1) << 20; 351207753Smm } 352207753Smm 353292588Sdelphij // Tell the user that we decreased the dictionary size. 354292588Sdelphij message(V_WARNING, _("Adjusted LZMA%c dictionary size " 355292588Sdelphij "from %s MiB to %s MiB to not exceed " 356292588Sdelphij "the memory usage limit of %s MiB"), 357292588Sdelphij filters[i].id == LZMA_FILTER_LZMA2 358292588Sdelphij ? '2' : '1', 359292588Sdelphij uint64_to_str(orig_dict_size >> 20, 0), 360292588Sdelphij uint64_to_str(opt->dict_size >> 20, 1), 361292588Sdelphij uint64_to_str(round_up_to_mib(memory_limit), 2)); 362312518Sdelphij#endif 363292588Sdelphij 364207753Smm return; 365207753Smm} 366207753Smm 367207753Smm 368312518Sdelphij#ifdef HAVE_DECODERS 369207753Smm/// Return true if the data in in_buf seems to be in the .xz format. 370207753Smmstatic bool 371207753Smmis_format_xz(void) 372207753Smm{ 373244601Smm // Specify the magic as hex to be compatible with EBCDIC systems. 374244601Smm static const uint8_t magic[6] = { 0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00 }; 375244601Smm return strm.avail_in >= sizeof(magic) 376244601Smm && memcmp(in_buf.u8, magic, sizeof(magic)) == 0; 377207753Smm} 378207753Smm 379207753Smm 380207753Smm/// Return true if the data in in_buf seems to be in the .lzma format. 381207753Smmstatic bool 382207753Smmis_format_lzma(void) 383207753Smm{ 384207753Smm // The .lzma header is 13 bytes. 385207753Smm if (strm.avail_in < 13) 386207753Smm return false; 387207753Smm 388207753Smm // Decode the LZMA1 properties. 389207753Smm lzma_filter filter = { .id = LZMA_FILTER_LZMA1 }; 390207753Smm if (lzma_properties_decode(&filter, NULL, in_buf.u8, 5) != LZMA_OK) 391207753Smm return false; 392207753Smm 393207753Smm // A hack to ditch tons of false positives: We allow only dictionary 394207753Smm // sizes that are 2^n or 2^n + 2^(n-1) or UINT32_MAX. LZMA_Alone 395207753Smm // created only files with 2^n, but accepts any dictionary size. 396207753Smm // If someone complains, this will be reconsidered. 397207753Smm lzma_options_lzma *opt = filter.options; 398207753Smm const uint32_t dict_size = opt->dict_size; 399207753Smm free(opt); 400207753Smm 401207753Smm if (dict_size != UINT32_MAX) { 402207753Smm uint32_t d = dict_size - 1; 403207753Smm d |= d >> 2; 404207753Smm d |= d >> 3; 405207753Smm d |= d >> 4; 406207753Smm d |= d >> 8; 407207753Smm d |= d >> 16; 408207753Smm ++d; 409207753Smm if (d != dict_size || dict_size == 0) 410207753Smm return false; 411207753Smm } 412207753Smm 413207753Smm // Another hack to ditch false positives: Assume that if the 414207753Smm // uncompressed size is known, it must be less than 256 GiB. 415207753Smm // Again, if someone complains, this will be reconsidered. 416207753Smm uint64_t uncompressed_size = 0; 417207753Smm for (size_t i = 0; i < 8; ++i) 418207753Smm uncompressed_size |= (uint64_t)(in_buf.u8[5 + i]) << (i * 8); 419207753Smm 420207753Smm if (uncompressed_size != UINT64_MAX 421207753Smm && uncompressed_size > (UINT64_C(1) << 38)) 422207753Smm return false; 423207753Smm 424207753Smm return true; 425207753Smm} 426312518Sdelphij#endif 427207753Smm 428207753Smm 429207753Smm/// Detect the input file type (for now, this done only when decompressing), 430207753Smm/// and initialize an appropriate coder. Return value indicates if a normal 431207753Smm/// liblzma-based coder was initialized (CODER_INIT_NORMAL), if passthru 432207753Smm/// mode should be used (CODER_INIT_PASSTHRU), or if an error occurred 433207753Smm/// (CODER_INIT_ERROR). 434207753Smmstatic enum coder_init_ret 435207753Smmcoder_init(file_pair *pair) 436207753Smm{ 437207753Smm lzma_ret ret = LZMA_PROG_ERROR; 438207753Smm 439207753Smm if (opt_mode == MODE_COMPRESS) { 440312518Sdelphij#ifdef HAVE_ENCODERS 441207753Smm switch (opt_format) { 442207753Smm case FORMAT_AUTO: 443207753Smm // args.c ensures this. 444207753Smm assert(0); 445207753Smm break; 446207753Smm 447207753Smm case FORMAT_XZ: 448312518Sdelphij# ifdef MYTHREAD_ENABLED 449292588Sdelphij if (hardware_threads_get() > 1) 450292588Sdelphij ret = lzma_stream_encoder_mt( 451292588Sdelphij &strm, &mt_options); 452292588Sdelphij else 453312518Sdelphij# endif 454292588Sdelphij ret = lzma_stream_encoder( 455292588Sdelphij &strm, filters, check); 456207753Smm break; 457207753Smm 458207753Smm case FORMAT_LZMA: 459207753Smm ret = lzma_alone_encoder(&strm, filters[0].options); 460207753Smm break; 461207753Smm 462207753Smm case FORMAT_RAW: 463207753Smm ret = lzma_raw_encoder(&strm, filters); 464207753Smm break; 465207753Smm } 466312518Sdelphij#endif 467207753Smm } else { 468312518Sdelphij#ifdef HAVE_DECODERS 469292588Sdelphij uint32_t flags = 0; 470207753Smm 471292588Sdelphij // It seems silly to warn about unsupported check if the 472292588Sdelphij // check won't be verified anyway due to --ignore-check. 473292588Sdelphij if (opt_ignore_check) 474292588Sdelphij flags |= LZMA_IGNORE_CHECK; 475292588Sdelphij else 476292588Sdelphij flags |= LZMA_TELL_UNSUPPORTED_CHECK; 477292588Sdelphij 478292588Sdelphij if (!opt_single_stream) 479292588Sdelphij flags |= LZMA_CONCATENATED; 480292588Sdelphij 481207753Smm // We abuse FORMAT_AUTO to indicate unknown file format, 482207753Smm // for which we may consider passthru mode. 483207753Smm enum format_type init_format = FORMAT_AUTO; 484207753Smm 485207753Smm switch (opt_format) { 486207753Smm case FORMAT_AUTO: 487207753Smm if (is_format_xz()) 488207753Smm init_format = FORMAT_XZ; 489207753Smm else if (is_format_lzma()) 490207753Smm init_format = FORMAT_LZMA; 491207753Smm break; 492207753Smm 493207753Smm case FORMAT_XZ: 494207753Smm if (is_format_xz()) 495207753Smm init_format = FORMAT_XZ; 496207753Smm break; 497207753Smm 498207753Smm case FORMAT_LZMA: 499207753Smm if (is_format_lzma()) 500207753Smm init_format = FORMAT_LZMA; 501207753Smm break; 502207753Smm 503207753Smm case FORMAT_RAW: 504207753Smm init_format = FORMAT_RAW; 505207753Smm break; 506207753Smm } 507207753Smm 508207753Smm switch (init_format) { 509207753Smm case FORMAT_AUTO: 510292588Sdelphij // Unknown file format. If --decompress --stdout 511207753Smm // --force have been given, then we copy the input 512207753Smm // as is to stdout. Checking for MODE_DECOMPRESS 513207753Smm // is needed, because we don't want to do use 514207753Smm // passthru mode with --test. 515207753Smm if (opt_mode == MODE_DECOMPRESS 516207753Smm && opt_stdout && opt_force) 517207753Smm return CODER_INIT_PASSTHRU; 518207753Smm 519207753Smm ret = LZMA_FORMAT_ERROR; 520207753Smm break; 521207753Smm 522207753Smm case FORMAT_XZ: 523207753Smm ret = lzma_stream_decoder(&strm, 524213700Smm hardware_memlimit_get( 525213700Smm MODE_DECOMPRESS), flags); 526207753Smm break; 527207753Smm 528207753Smm case FORMAT_LZMA: 529207753Smm ret = lzma_alone_decoder(&strm, 530213700Smm hardware_memlimit_get( 531213700Smm MODE_DECOMPRESS)); 532207753Smm break; 533207753Smm 534207753Smm case FORMAT_RAW: 535207753Smm // Memory usage has already been checked in 536207753Smm // coder_set_compression_settings(). 537207753Smm ret = lzma_raw_decoder(&strm, filters); 538207753Smm break; 539207753Smm } 540207753Smm 541207753Smm // Try to decode the headers. This will catch too low 542207753Smm // memory usage limit in case it happens in the first 543207753Smm // Block of the first Stream, which is where it very 544207753Smm // probably will happen if it is going to happen. 545207753Smm if (ret == LZMA_OK && init_format != FORMAT_RAW) { 546207753Smm strm.next_out = NULL; 547207753Smm strm.avail_out = 0; 548207753Smm ret = lzma_code(&strm, LZMA_RUN); 549207753Smm } 550312518Sdelphij#endif 551207753Smm } 552207753Smm 553207753Smm if (ret != LZMA_OK) { 554207753Smm message_error("%s: %s", pair->src_name, message_strm(ret)); 555207753Smm if (ret == LZMA_MEMLIMIT_ERROR) 556207753Smm message_mem_needed(V_ERROR, lzma_memusage(&strm)); 557207753Smm 558207753Smm return CODER_INIT_ERROR; 559207753Smm } 560207753Smm 561207753Smm return CODER_INIT_NORMAL; 562207753Smm} 563207753Smm 564207753Smm 565292588Sdelphij/// Resolve conflicts between opt_block_size and opt_block_list in single 566292588Sdelphij/// threaded mode. We want to default to opt_block_list, except when it is 567292588Sdelphij/// larger than opt_block_size. If this is the case for the current Block 568292588Sdelphij/// at *list_pos, then we break into smaller Blocks. Otherwise advance 569292588Sdelphij/// to the next Block in opt_block_list, and break apart if needed. 570292588Sdelphijstatic void 571292588Sdelphijsplit_block(uint64_t *block_remaining, 572292588Sdelphij uint64_t *next_block_remaining, 573292588Sdelphij size_t *list_pos) 574292588Sdelphij{ 575292588Sdelphij if (*next_block_remaining > 0) { 576292588Sdelphij // The Block at *list_pos has previously been split up. 577292588Sdelphij assert(hardware_threads_get() == 1); 578292588Sdelphij assert(opt_block_size > 0); 579292588Sdelphij assert(opt_block_list != NULL); 580292588Sdelphij 581292588Sdelphij if (*next_block_remaining > opt_block_size) { 582292588Sdelphij // We have to split the current Block at *list_pos 583292588Sdelphij // into another opt_block_size length Block. 584292588Sdelphij *block_remaining = opt_block_size; 585292588Sdelphij } else { 586292588Sdelphij // This is the last remaining split Block for the 587292588Sdelphij // Block at *list_pos. 588292588Sdelphij *block_remaining = *next_block_remaining; 589292588Sdelphij } 590292588Sdelphij 591292588Sdelphij *next_block_remaining -= *block_remaining; 592292588Sdelphij 593292588Sdelphij } else { 594292588Sdelphij // The Block at *list_pos has been finished. Go to the next 595292588Sdelphij // entry in the list. If the end of the list has been reached, 596292588Sdelphij // reuse the size of the last Block. 597292588Sdelphij if (opt_block_list[*list_pos + 1] != 0) 598292588Sdelphij ++*list_pos; 599292588Sdelphij 600292588Sdelphij *block_remaining = opt_block_list[*list_pos]; 601292588Sdelphij 602292588Sdelphij // If in single-threaded mode, split up the Block if needed. 603292588Sdelphij // This is not needed in multi-threaded mode because liblzma 604292588Sdelphij // will do this due to how threaded encoding works. 605292588Sdelphij if (hardware_threads_get() == 1 && opt_block_size > 0 606292588Sdelphij && *block_remaining > opt_block_size) { 607292588Sdelphij *next_block_remaining 608292588Sdelphij = *block_remaining - opt_block_size; 609292588Sdelphij *block_remaining = opt_block_size; 610292588Sdelphij } 611292588Sdelphij } 612292588Sdelphij} 613292588Sdelphij 614292588Sdelphij 615207753Smm/// Compress or decompress using liblzma. 616207753Smmstatic bool 617207753Smmcoder_normal(file_pair *pair) 618207753Smm{ 619207753Smm // Encoder needs to know when we have given all the input to it. 620207753Smm // The decoders need to know it too when we are using 621207753Smm // LZMA_CONCATENATED. We need to check for src_eof here, because 622292588Sdelphij // the first input chunk has been already read if decompressing, 623292588Sdelphij // and that may have been the only chunk we will read. 624207753Smm lzma_action action = pair->src_eof ? LZMA_FINISH : LZMA_RUN; 625207753Smm 626207753Smm lzma_ret ret; 627207753Smm 628207753Smm // Assume that something goes wrong. 629207753Smm bool success = false; 630207753Smm 631292588Sdelphij // block_remaining indicates how many input bytes to encode before 632292588Sdelphij // finishing the current .xz Block. The Block size is set with 633292588Sdelphij // --block-size=SIZE and --block-list. They have an effect only when 634292588Sdelphij // compressing to the .xz format. If block_remaining == UINT64_MAX, 635292588Sdelphij // only a single block is created. 636292588Sdelphij uint64_t block_remaining = UINT64_MAX; 637292588Sdelphij 638292588Sdelphij // next_block_remining for when we are in single-threaded mode and 639292588Sdelphij // the Block in --block-list is larger than the --block-size=SIZE. 640292588Sdelphij uint64_t next_block_remaining = 0; 641292588Sdelphij 642292588Sdelphij // Position in opt_block_list. Unused if --block-list wasn't used. 643292588Sdelphij size_t list_pos = 0; 644292588Sdelphij 645292588Sdelphij // Handle --block-size for single-threaded mode and the first step 646292588Sdelphij // of --block-list. 647292588Sdelphij if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ) { 648292588Sdelphij // --block-size doesn't do anything here in threaded mode, 649292588Sdelphij // because the threaded encoder will take care of splitting 650292588Sdelphij // to fixed-sized Blocks. 651292588Sdelphij if (hardware_threads_get() == 1 && opt_block_size > 0) 652292588Sdelphij block_remaining = opt_block_size; 653292588Sdelphij 654292588Sdelphij // If --block-list was used, start with the first size. 655292588Sdelphij // 656292588Sdelphij // For threaded case, --block-size specifies how big Blocks 657292588Sdelphij // the encoder needs to be prepared to create at maximum 658292588Sdelphij // and --block-list will simultaneously cause new Blocks 659292588Sdelphij // to be started at specified intervals. To keep things 660292588Sdelphij // logical, the same is done in single-threaded mode. The 661292588Sdelphij // output is still not identical because in single-threaded 662292588Sdelphij // mode the size info isn't written into Block Headers. 663292588Sdelphij if (opt_block_list != NULL) { 664292588Sdelphij if (block_remaining < opt_block_list[list_pos]) { 665292588Sdelphij assert(hardware_threads_get() == 1); 666292588Sdelphij next_block_remaining = opt_block_list[list_pos] 667292588Sdelphij - block_remaining; 668292588Sdelphij } else { 669292588Sdelphij block_remaining = opt_block_list[list_pos]; 670292588Sdelphij } 671292588Sdelphij } 672292588Sdelphij } 673292588Sdelphij 674207753Smm strm.next_out = out_buf.u8; 675207753Smm strm.avail_out = IO_BUFFER_SIZE; 676207753Smm 677207753Smm while (!user_abort) { 678292588Sdelphij // Fill the input buffer if it is empty and we aren't 679292588Sdelphij // flushing or finishing. 680292588Sdelphij if (strm.avail_in == 0 && action == LZMA_RUN) { 681207753Smm strm.next_in = in_buf.u8; 682292588Sdelphij strm.avail_in = io_read(pair, &in_buf, 683292588Sdelphij my_min(block_remaining, 684292588Sdelphij IO_BUFFER_SIZE)); 685207753Smm 686207753Smm if (strm.avail_in == SIZE_MAX) 687207753Smm break; 688207753Smm 689292588Sdelphij if (pair->src_eof) { 690207753Smm action = LZMA_FINISH; 691292588Sdelphij 692292588Sdelphij } else if (block_remaining != UINT64_MAX) { 693292588Sdelphij // Start a new Block after every 694292588Sdelphij // opt_block_size bytes of input. 695292588Sdelphij block_remaining -= strm.avail_in; 696292588Sdelphij if (block_remaining == 0) 697292588Sdelphij action = LZMA_FULL_BARRIER; 698292588Sdelphij } 699292588Sdelphij 700292588Sdelphij if (action == LZMA_RUN && flush_needed) 701292588Sdelphij action = LZMA_SYNC_FLUSH; 702207753Smm } 703207753Smm 704207753Smm // Let liblzma do the actual work. 705207753Smm ret = lzma_code(&strm, action); 706207753Smm 707207753Smm // Write out if the output buffer became full. 708207753Smm if (strm.avail_out == 0) { 709207753Smm if (opt_mode != MODE_TEST && io_write(pair, &out_buf, 710207753Smm IO_BUFFER_SIZE - strm.avail_out)) 711207753Smm break; 712207753Smm 713207753Smm strm.next_out = out_buf.u8; 714207753Smm strm.avail_out = IO_BUFFER_SIZE; 715207753Smm } 716207753Smm 717292588Sdelphij if (ret == LZMA_STREAM_END && (action == LZMA_SYNC_FLUSH 718292588Sdelphij || action == LZMA_FULL_BARRIER)) { 719292588Sdelphij if (action == LZMA_SYNC_FLUSH) { 720292588Sdelphij // Flushing completed. Write the pending data 721292588Sdelphij // out immediatelly so that the reading side 722292588Sdelphij // can decompress everything compressed so far. 723292588Sdelphij if (io_write(pair, &out_buf, IO_BUFFER_SIZE 724292588Sdelphij - strm.avail_out)) 725292588Sdelphij break; 726292588Sdelphij 727292588Sdelphij strm.next_out = out_buf.u8; 728292588Sdelphij strm.avail_out = IO_BUFFER_SIZE; 729292588Sdelphij 730292588Sdelphij // Set the time of the most recent flushing. 731292588Sdelphij mytime_set_flush_time(); 732292588Sdelphij } else { 733292588Sdelphij // Start a new Block after LZMA_FULL_BARRIER. 734292588Sdelphij if (opt_block_list == NULL) { 735292588Sdelphij assert(hardware_threads_get() == 1); 736292588Sdelphij assert(opt_block_size > 0); 737292588Sdelphij block_remaining = opt_block_size; 738292588Sdelphij } else { 739292588Sdelphij split_block(&block_remaining, 740292588Sdelphij &next_block_remaining, 741292588Sdelphij &list_pos); 742292588Sdelphij } 743292588Sdelphij } 744292588Sdelphij 745292588Sdelphij // Start a new Block after LZMA_FULL_FLUSH or continue 746292588Sdelphij // the same block after LZMA_SYNC_FLUSH. 747292588Sdelphij action = LZMA_RUN; 748292588Sdelphij 749292588Sdelphij } else if (ret != LZMA_OK) { 750207753Smm // Determine if the return value indicates that we 751207753Smm // won't continue coding. 752207753Smm const bool stop = ret != LZMA_NO_CHECK 753207753Smm && ret != LZMA_UNSUPPORTED_CHECK; 754207753Smm 755207753Smm if (stop) { 756207753Smm // Write the remaining bytes even if something 757207753Smm // went wrong, because that way the user gets 758207753Smm // as much data as possible, which can be good 759207753Smm // when trying to get at least some useful 760207753Smm // data out of damaged files. 761207753Smm if (opt_mode != MODE_TEST && io_write(pair, 762207753Smm &out_buf, IO_BUFFER_SIZE 763207753Smm - strm.avail_out)) 764207753Smm break; 765207753Smm } 766207753Smm 767207753Smm if (ret == LZMA_STREAM_END) { 768292588Sdelphij if (opt_single_stream) { 769292588Sdelphij io_fix_src_pos(pair, strm.avail_in); 770292588Sdelphij success = true; 771292588Sdelphij break; 772292588Sdelphij } 773292588Sdelphij 774207753Smm // Check that there is no trailing garbage. 775207753Smm // This is needed for LZMA_Alone and raw 776207753Smm // streams. 777207753Smm if (strm.avail_in == 0 && !pair->src_eof) { 778207753Smm // Try reading one more byte. 779207753Smm // Hopefully we don't get any more 780207753Smm // input, and thus pair->src_eof 781207753Smm // becomes true. 782207753Smm strm.avail_in = io_read( 783207753Smm pair, &in_buf, 1); 784207753Smm if (strm.avail_in == SIZE_MAX) 785207753Smm break; 786207753Smm 787207753Smm assert(strm.avail_in == 0 788207753Smm || strm.avail_in == 1); 789207753Smm } 790207753Smm 791207753Smm if (strm.avail_in == 0) { 792207753Smm assert(pair->src_eof); 793207753Smm success = true; 794207753Smm break; 795207753Smm } 796207753Smm 797207753Smm // We hadn't reached the end of the file. 798207753Smm ret = LZMA_DATA_ERROR; 799207753Smm assert(stop); 800207753Smm } 801207753Smm 802207753Smm // If we get here and stop is true, something went 803207753Smm // wrong and we print an error. Otherwise it's just 804207753Smm // a warning and coding can continue. 805207753Smm if (stop) { 806207753Smm message_error("%s: %s", pair->src_name, 807207753Smm message_strm(ret)); 808207753Smm } else { 809207753Smm message_warning("%s: %s", pair->src_name, 810207753Smm message_strm(ret)); 811207753Smm 812207753Smm // When compressing, all possible errors set 813207753Smm // stop to true. 814207753Smm assert(opt_mode != MODE_COMPRESS); 815207753Smm } 816207753Smm 817207753Smm if (ret == LZMA_MEMLIMIT_ERROR) { 818207753Smm // Display how much memory it would have 819207753Smm // actually needed. 820207753Smm message_mem_needed(V_ERROR, 821207753Smm lzma_memusage(&strm)); 822207753Smm } 823207753Smm 824207753Smm if (stop) 825207753Smm break; 826207753Smm } 827207753Smm 828207753Smm // Show progress information under certain conditions. 829207753Smm message_progress_update(); 830207753Smm } 831207753Smm 832207753Smm return success; 833207753Smm} 834207753Smm 835207753Smm 836207753Smm/// Copy from input file to output file without processing the data in any 837207753Smm/// way. This is used only when trying to decompress unrecognized files 838207753Smm/// with --decompress --stdout --force, so the output is always stdout. 839207753Smmstatic bool 840207753Smmcoder_passthru(file_pair *pair) 841207753Smm{ 842207753Smm while (strm.avail_in != 0) { 843207753Smm if (user_abort) 844207753Smm return false; 845207753Smm 846207753Smm if (io_write(pair, &in_buf, strm.avail_in)) 847207753Smm return false; 848207753Smm 849207753Smm strm.total_in += strm.avail_in; 850207753Smm strm.total_out = strm.total_in; 851207753Smm message_progress_update(); 852207753Smm 853207753Smm strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE); 854207753Smm if (strm.avail_in == SIZE_MAX) 855207753Smm return false; 856207753Smm } 857207753Smm 858207753Smm return true; 859207753Smm} 860207753Smm 861207753Smm 862207753Smmextern void 863207753Smmcoder_run(const char *filename) 864207753Smm{ 865207753Smm // Set and possibly print the filename for the progress message. 866207753Smm message_filename(filename); 867207753Smm 868207753Smm // Try to open the input file. 869207753Smm file_pair *pair = io_open_src(filename); 870207753Smm if (pair == NULL) 871207753Smm return; 872207753Smm 873207753Smm // Assume that something goes wrong. 874207753Smm bool success = false; 875207753Smm 876292588Sdelphij if (opt_mode == MODE_COMPRESS) { 877292588Sdelphij strm.next_in = NULL; 878292588Sdelphij strm.avail_in = 0; 879292588Sdelphij } else { 880292588Sdelphij // Read the first chunk of input data. This is needed 881292588Sdelphij // to detect the input file type. 882292588Sdelphij strm.next_in = in_buf.u8; 883292588Sdelphij strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE); 884292588Sdelphij } 885207753Smm 886207753Smm if (strm.avail_in != SIZE_MAX) { 887207753Smm // Initialize the coder. This will detect the file format 888207753Smm // and, in decompression or testing mode, check the memory 889207753Smm // usage of the first Block too. This way we don't try to 890207753Smm // open the destination file if we see that coding wouldn't 891207753Smm // work at all anyway. This also avoids deleting the old 892207753Smm // "target" file if --force was used. 893207753Smm const enum coder_init_ret init_ret = coder_init(pair); 894207753Smm 895207753Smm if (init_ret != CODER_INIT_ERROR && !user_abort) { 896207753Smm // Don't open the destination file when --test 897207753Smm // is used. 898207753Smm if (opt_mode == MODE_TEST || !io_open_dest(pair)) { 899292588Sdelphij // Remember the current time. It is needed 900292588Sdelphij // for progress indicator and for timed 901292588Sdelphij // flushing. 902292588Sdelphij mytime_set_start_time(); 903292588Sdelphij 904207753Smm // Initialize the progress indicator. 905207753Smm const uint64_t in_size 906207753Smm = pair->src_st.st_size <= 0 907207753Smm ? 0 : pair->src_st.st_size; 908207753Smm message_progress_start(&strm, in_size); 909207753Smm 910207753Smm // Do the actual coding or passthru. 911207753Smm if (init_ret == CODER_INIT_NORMAL) 912207753Smm success = coder_normal(pair); 913207753Smm else 914207753Smm success = coder_passthru(pair); 915207753Smm 916207753Smm message_progress_end(success); 917207753Smm } 918207753Smm } 919207753Smm } 920207753Smm 921207753Smm // Close the file pair. It needs to know if coding was successful to 922207753Smm // know if the source or target file should be unlinked. 923207753Smm io_close(pair, success); 924207753Smm 925207753Smm return; 926207753Smm} 927292588Sdelphij 928292588Sdelphij 929292588Sdelphij#ifndef NDEBUG 930292588Sdelphijextern void 931292588Sdelphijcoder_free(void) 932292588Sdelphij{ 933292588Sdelphij lzma_end(&strm); 934292588Sdelphij return; 935292588Sdelphij} 936292588Sdelphij#endif 937