1207753Smm/////////////////////////////////////////////////////////////////////////////// 2207753Smm// 3207753Smm/// \file coder.c 4207753Smm/// \brief Compresses or uncompresses a file 5207753Smm// 6207753Smm// Author: Lasse Collin 7207753Smm// 8207753Smm// This file has been put into the public domain. 9207753Smm// You can do whatever you want with this file. 10207753Smm// 11207753Smm/////////////////////////////////////////////////////////////////////////////// 12207753Smm 13207753Smm#include "private.h" 14207753Smm 15207753Smm 16207753Smm/// Return value type for coder_init(). 17207753Smmenum coder_init_ret { 18207753Smm CODER_INIT_NORMAL, 19207753Smm CODER_INIT_PASSTHRU, 20207753Smm CODER_INIT_ERROR, 21207753Smm}; 22207753Smm 23207753Smm 24207753Smmenum operation_mode opt_mode = MODE_COMPRESS; 25207753Smmenum format_type opt_format = FORMAT_AUTO; 26213700Smmbool opt_auto_adjust = true; 27278433Srpaulobool opt_single_stream = false; 28278433Srpaulouint64_t opt_block_size = 0; 29278433Srpaulouint64_t *opt_block_list = NULL; 30207753Smm 31207753Smm 32207753Smm/// Stream used to communicate with liblzma 33207753Smmstatic lzma_stream strm = LZMA_STREAM_INIT; 34207753Smm 35207753Smm/// Filters needed for all encoding all formats, and also decoding in raw data 36207753Smmstatic lzma_filter filters[LZMA_FILTERS_MAX + 1]; 37207753Smm 38207753Smm/// Input and output buffers 39207753Smmstatic io_buf in_buf; 40207753Smmstatic io_buf out_buf; 41207753Smm 42207753Smm/// Number of filters. Zero indicates that we are using a preset. 43262754Sdelphijstatic uint32_t filters_count = 0; 44207753Smm 45207753Smm/// Number of the preset (0-9) 46262754Sdelphijstatic uint32_t preset_number = LZMA_PRESET_DEFAULT; 47207753Smm 48207753Smm/// Integrity check type 49207753Smmstatic lzma_check check; 50207753Smm 51207753Smm/// This becomes false if the --check=CHECK option is used. 52207753Smmstatic bool check_default = true; 53207753Smm 54312517Sdelphij#if defined(HAVE_ENCODERS) && defined(MYTHREAD_ENABLED) 55278433Srpaulostatic lzma_mt mt_options = { 56278433Srpaulo .flags = 0, 57278433Srpaulo .timeout = 300, 58278433Srpaulo .filters = filters, 59278433Srpaulo}; 60278433Srpaulo#endif 61207753Smm 62278433Srpaulo 63207753Smmextern void 64207753Smmcoder_set_check(lzma_check new_check) 65207753Smm{ 66207753Smm check = new_check; 67207753Smm check_default = false; 68207753Smm return; 69207753Smm} 70207753Smm 71207753Smm 72262754Sdelphijstatic void 73262754Sdelphijforget_filter_chain(void) 74207753Smm{ 75213700Smm // Setting a preset makes us forget a possibly defined custom 76213700Smm // filter chain. 77213700Smm while (filters_count > 0) { 78213700Smm --filters_count; 79213700Smm free(filters[filters_count].options); 80213700Smm filters[filters_count].options = NULL; 81213700Smm } 82213700Smm 83207753Smm return; 84207753Smm} 85207753Smm 86207753Smm 87207753Smmextern void 88262754Sdelphijcoder_set_preset(uint32_t new_preset) 89262754Sdelphij{ 90262754Sdelphij preset_number &= ~LZMA_PRESET_LEVEL_MASK; 91262754Sdelphij preset_number |= new_preset; 92262754Sdelphij forget_filter_chain(); 93262754Sdelphij return; 94262754Sdelphij} 95262754Sdelphij 96262754Sdelphij 97262754Sdelphijextern void 98207753Smmcoder_set_extreme(void) 99207753Smm{ 100262754Sdelphij preset_number |= LZMA_PRESET_EXTREME; 101262754Sdelphij forget_filter_chain(); 102207753Smm return; 103207753Smm} 104207753Smm 105207753Smm 106207753Smmextern void 107207753Smmcoder_add_filter(lzma_vli id, void *options) 108207753Smm{ 109207753Smm if (filters_count == LZMA_FILTERS_MAX) 110207753Smm message_fatal(_("Maximum number of filters is four")); 111207753Smm 112207753Smm filters[filters_count].id = id; 113207753Smm filters[filters_count].options = options; 114207753Smm ++filters_count; 115207753Smm 116262754Sdelphij // Setting a custom filter chain makes us forget the preset options. 117262754Sdelphij // This makes a difference if one specifies e.g. "xz -9 --lzma2 -e" 118262754Sdelphij // where the custom filter chain resets the preset level back to 119262754Sdelphij // the default 6, making the example equivalent to "xz -6e". 120262754Sdelphij preset_number = LZMA_PRESET_DEFAULT; 121262754Sdelphij 122207753Smm return; 123207753Smm} 124207753Smm 125207753Smm 126223935Smmstatic void lzma_attribute((__noreturn__)) 127207753Smmmemlimit_too_small(uint64_t memory_usage) 128207753Smm{ 129207753Smm message(V_ERROR, _("Memory usage limit is too low for the given " 130207753Smm "filter setup.")); 131207753Smm message_mem_needed(V_ERROR, memory_usage); 132207753Smm tuklib_exit(E_ERROR, E_ERROR, false); 133207753Smm} 134207753Smm 135207753Smm 136207753Smmextern void 137207753Smmcoder_set_compression_settings(void) 138207753Smm{ 139278433Srpaulo // The default check type is CRC64, but fallback to CRC32 140278433Srpaulo // if CRC64 isn't supported by the copy of liblzma we are 141278433Srpaulo // using. CRC32 is always supported. 142278433Srpaulo if (check_default) { 143278433Srpaulo check = LZMA_CHECK_CRC64; 144278433Srpaulo if (!lzma_check_is_supported(check)) 145278433Srpaulo check = LZMA_CHECK_CRC32; 146278433Srpaulo } 147278433Srpaulo 148207753Smm // Options for LZMA1 or LZMA2 in case we are using a preset. 149207753Smm static lzma_options_lzma opt_lzma; 150207753Smm 151207753Smm if (filters_count == 0) { 152207753Smm // We are using a preset. This is not a good idea in raw mode 153207753Smm // except when playing around with things. Different versions 154207753Smm // of this software may use different options in presets, and 155207753Smm // thus make uncompressing the raw data difficult. 156207753Smm if (opt_format == FORMAT_RAW) { 157207753Smm // The message is shown only if warnings are allowed 158207753Smm // but the exit status isn't changed. 159207753Smm message(V_WARNING, _("Using a preset in raw mode " 160207753Smm "is discouraged.")); 161207753Smm message(V_WARNING, _("The exact options of the " 162207753Smm "presets may vary between software " 163207753Smm "versions.")); 164207753Smm } 165207753Smm 166207753Smm // Get the preset for LZMA1 or LZMA2. 167207753Smm if (lzma_lzma_preset(&opt_lzma, preset_number)) 168207753Smm message_bug(); 169207753Smm 170207753Smm // Use LZMA2 except with --format=lzma we use LZMA1. 171207753Smm filters[0].id = opt_format == FORMAT_LZMA 172207753Smm ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2; 173207753Smm filters[0].options = &opt_lzma; 174207753Smm filters_count = 1; 175207753Smm } 176207753Smm 177207753Smm // Terminate the filter options array. 178207753Smm filters[filters_count].id = LZMA_VLI_UNKNOWN; 179207753Smm 180207753Smm // If we are using the .lzma format, allow exactly one filter 181207753Smm // which has to be LZMA1. 182207753Smm if (opt_format == FORMAT_LZMA && (filters_count != 1 183207753Smm || filters[0].id != LZMA_FILTER_LZMA1)) 184207753Smm message_fatal(_("The .lzma format supports only " 185207753Smm "the LZMA1 filter")); 186207753Smm 187207753Smm // If we are using the .xz format, make sure that there is no LZMA1 188207753Smm // filter to prevent LZMA_PROG_ERROR. 189207753Smm if (opt_format == FORMAT_XZ) 190207753Smm for (size_t i = 0; i < filters_count; ++i) 191207753Smm if (filters[i].id == LZMA_FILTER_LZMA1) 192207753Smm message_fatal(_("LZMA1 cannot be used " 193207753Smm "with the .xz format")); 194207753Smm 195207753Smm // Print the selected filter chain. 196213700Smm message_filters_show(V_DEBUG, filters); 197207753Smm 198278433Srpaulo // The --flush-timeout option requires LZMA_SYNC_FLUSH support 199278433Srpaulo // from the filter chain. Currently threaded encoder doesn't support 200278433Srpaulo // LZMA_SYNC_FLUSH so single-threaded mode must be used. 201278433Srpaulo if (opt_mode == MODE_COMPRESS && opt_flush_timeout != 0) { 202278433Srpaulo for (size_t i = 0; i < filters_count; ++i) { 203278433Srpaulo switch (filters[i].id) { 204278433Srpaulo case LZMA_FILTER_LZMA2: 205278433Srpaulo case LZMA_FILTER_DELTA: 206278433Srpaulo break; 207278433Srpaulo 208278433Srpaulo default: 209278433Srpaulo message_fatal(_("The filter chain is " 210278433Srpaulo "incompatible with --flush-timeout")); 211278433Srpaulo } 212278433Srpaulo } 213278433Srpaulo 214278433Srpaulo if (hardware_threads_get() > 1) { 215278433Srpaulo message(V_WARNING, _("Switching to single-threaded " 216278433Srpaulo "mode due to --flush-timeout")); 217278433Srpaulo hardware_threads_set(1); 218278433Srpaulo } 219278433Srpaulo } 220278433Srpaulo 221278433Srpaulo // Get the memory usage. Note that if --format=raw was used, 222278433Srpaulo // we can be decompressing. 223213700Smm const uint64_t memory_limit = hardware_memlimit_get(opt_mode); 224312517Sdelphij uint64_t memory_usage = UINT64_MAX; 225278433Srpaulo if (opt_mode == MODE_COMPRESS) { 226312517Sdelphij#ifdef HAVE_ENCODERS 227312517Sdelphij# ifdef MYTHREAD_ENABLED 228278433Srpaulo if (opt_format == FORMAT_XZ && hardware_threads_get() > 1) { 229278433Srpaulo mt_options.threads = hardware_threads_get(); 230278433Srpaulo mt_options.block_size = opt_block_size; 231278433Srpaulo mt_options.check = check; 232278433Srpaulo memory_usage = lzma_stream_encoder_mt_memusage( 233278433Srpaulo &mt_options); 234278433Srpaulo if (memory_usage != UINT64_MAX) 235278433Srpaulo message(V_DEBUG, _("Using up to %" PRIu32 236278433Srpaulo " threads."), 237278433Srpaulo mt_options.threads); 238278433Srpaulo } else 239312517Sdelphij# endif 240278433Srpaulo { 241278433Srpaulo memory_usage = lzma_raw_encoder_memusage(filters); 242278433Srpaulo } 243312517Sdelphij#endif 244278433Srpaulo } else { 245312517Sdelphij#ifdef HAVE_DECODERS 246207753Smm memory_usage = lzma_raw_decoder_memusage(filters); 247312517Sdelphij#endif 248278433Srpaulo } 249207753Smm 250207753Smm if (memory_usage == UINT64_MAX) 251207753Smm message_fatal(_("Unsupported filter chain or filter options")); 252207753Smm 253207753Smm // Print memory usage info before possible dictionary 254207753Smm // size auto-adjusting. 255312517Sdelphij // 256312517Sdelphij // NOTE: If only encoder support was built, we cannot show the 257312517Sdelphij // what the decoder memory usage will be. 258207753Smm message_mem_needed(V_DEBUG, memory_usage); 259312517Sdelphij#ifdef HAVE_DECODERS 260213700Smm if (opt_mode == MODE_COMPRESS) { 261213700Smm const uint64_t decmem = lzma_raw_decoder_memusage(filters); 262213700Smm if (decmem != UINT64_MAX) 263213700Smm message(V_DEBUG, _("Decompression will need " 264213700Smm "%s MiB of memory."), uint64_to_str( 265213700Smm round_up_to_mib(decmem), 0)); 266213700Smm } 267312517Sdelphij#endif 268207753Smm 269278433Srpaulo if (memory_usage <= memory_limit) 270278433Srpaulo return; 271207753Smm 272278433Srpaulo // If --no-adjust was used or we didn't find LZMA1 or 273278433Srpaulo // LZMA2 as the last filter, give an error immediately. 274278433Srpaulo // --format=raw implies --no-adjust. 275278433Srpaulo if (!opt_auto_adjust || opt_format == FORMAT_RAW) 276278433Srpaulo memlimit_too_small(memory_usage); 277207753Smm 278278433Srpaulo assert(opt_mode == MODE_COMPRESS); 279207753Smm 280312517Sdelphij#ifdef HAVE_ENCODERS 281312517Sdelphij# ifdef MYTHREAD_ENABLED 282278433Srpaulo if (opt_format == FORMAT_XZ && mt_options.threads > 1) { 283278433Srpaulo // Try to reduce the number of threads before 284278433Srpaulo // adjusting the compression settings down. 285278433Srpaulo do { 286278433Srpaulo // FIXME? The real single-threaded mode has 287278433Srpaulo // lower memory usage, but it's not comparable 288278433Srpaulo // because it doesn't write the size info 289278433Srpaulo // into Block Headers. 290278433Srpaulo if (--mt_options.threads == 0) 291207753Smm memlimit_too_small(memory_usage); 292207753Smm 293278433Srpaulo memory_usage = lzma_stream_encoder_mt_memusage( 294278433Srpaulo &mt_options); 295207753Smm if (memory_usage == UINT64_MAX) 296207753Smm message_bug(); 297207753Smm 298278433Srpaulo } while (memory_usage > memory_limit); 299207753Smm 300278433Srpaulo message(V_WARNING, _("Adjusted the number of threads " 301278433Srpaulo "from %s to %s to not exceed " 302278433Srpaulo "the memory usage limit of %s MiB"), 303278433Srpaulo uint64_to_str(hardware_threads_get(), 0), 304278433Srpaulo uint64_to_str(mt_options.threads, 1), 305278433Srpaulo uint64_to_str(round_up_to_mib( 306278433Srpaulo memory_limit), 2)); 307278433Srpaulo } 308312517Sdelphij# endif 309207753Smm 310278433Srpaulo if (memory_usage <= memory_limit) 311278433Srpaulo return; 312278433Srpaulo 313278433Srpaulo // Look for the last filter if it is LZMA2 or LZMA1, so we can make 314278433Srpaulo // it use less RAM. With other filters we don't know what to do. 315278433Srpaulo size_t i = 0; 316278433Srpaulo while (filters[i].id != LZMA_FILTER_LZMA2 317278433Srpaulo && filters[i].id != LZMA_FILTER_LZMA1) { 318278433Srpaulo if (filters[i].id == LZMA_VLI_UNKNOWN) 319278433Srpaulo memlimit_too_small(memory_usage); 320278433Srpaulo 321278433Srpaulo ++i; 322207753Smm } 323207753Smm 324278433Srpaulo // Decrease the dictionary size until we meet the memory 325278433Srpaulo // usage limit. First round down to full mebibytes. 326278433Srpaulo lzma_options_lzma *opt = filters[i].options; 327278433Srpaulo const uint32_t orig_dict_size = opt->dict_size; 328278433Srpaulo opt->dict_size &= ~((UINT32_C(1) << 20) - 1); 329278433Srpaulo while (true) { 330278433Srpaulo // If it is below 1 MiB, auto-adjusting failed. We could be 331278433Srpaulo // more sophisticated and scale it down even more, but let's 332278433Srpaulo // see if many complain about this version. 333278433Srpaulo // 334278433Srpaulo // FIXME: Displays the scaled memory usage instead 335278433Srpaulo // of the original. 336278433Srpaulo if (opt->dict_size < (UINT32_C(1) << 20)) 337278433Srpaulo memlimit_too_small(memory_usage); 338207753Smm 339278433Srpaulo memory_usage = lzma_raw_encoder_memusage(filters); 340278433Srpaulo if (memory_usage == UINT64_MAX) 341278433Srpaulo message_bug(); 342207753Smm 343278433Srpaulo // Accept it if it is low enough. 344278433Srpaulo if (memory_usage <= memory_limit) 345278433Srpaulo break; 346278433Srpaulo 347278433Srpaulo // Otherwise 1 MiB down and try again. I hope this 348278433Srpaulo // isn't too slow method for cases where the original 349278433Srpaulo // dict_size is very big. 350278433Srpaulo opt->dict_size -= UINT32_C(1) << 20; 351207753Smm } 352207753Smm 353278433Srpaulo // Tell the user that we decreased the dictionary size. 354278433Srpaulo message(V_WARNING, _("Adjusted LZMA%c dictionary size " 355278433Srpaulo "from %s MiB to %s MiB to not exceed " 356278433Srpaulo "the memory usage limit of %s MiB"), 357278433Srpaulo filters[i].id == LZMA_FILTER_LZMA2 358278433Srpaulo ? '2' : '1', 359278433Srpaulo uint64_to_str(orig_dict_size >> 20, 0), 360278433Srpaulo uint64_to_str(opt->dict_size >> 20, 1), 361278433Srpaulo uint64_to_str(round_up_to_mib(memory_limit), 2)); 362312517Sdelphij#endif 363278433Srpaulo 364207753Smm return; 365207753Smm} 366207753Smm 367207753Smm 368312517Sdelphij#ifdef HAVE_DECODERS 369207753Smm/// Return true if the data in in_buf seems to be in the .xz format. 370207753Smmstatic bool 371207753Smmis_format_xz(void) 372207753Smm{ 373244601Smm // Specify the magic as hex to be compatible with EBCDIC systems. 374244601Smm static const uint8_t magic[6] = { 0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00 }; 375244601Smm return strm.avail_in >= sizeof(magic) 376244601Smm && memcmp(in_buf.u8, magic, sizeof(magic)) == 0; 377207753Smm} 378207753Smm 379207753Smm 380207753Smm/// Return true if the data in in_buf seems to be in the .lzma format. 381207753Smmstatic bool 382207753Smmis_format_lzma(void) 383207753Smm{ 384207753Smm // The .lzma header is 13 bytes. 385207753Smm if (strm.avail_in < 13) 386207753Smm return false; 387207753Smm 388207753Smm // Decode the LZMA1 properties. 389207753Smm lzma_filter filter = { .id = LZMA_FILTER_LZMA1 }; 390207753Smm if (lzma_properties_decode(&filter, NULL, in_buf.u8, 5) != LZMA_OK) 391207753Smm return false; 392207753Smm 393207753Smm // A hack to ditch tons of false positives: We allow only dictionary 394207753Smm // sizes that are 2^n or 2^n + 2^(n-1) or UINT32_MAX. LZMA_Alone 395207753Smm // created only files with 2^n, but accepts any dictionary size. 396207753Smm // If someone complains, this will be reconsidered. 397207753Smm lzma_options_lzma *opt = filter.options; 398207753Smm const uint32_t dict_size = opt->dict_size; 399207753Smm free(opt); 400207753Smm 401207753Smm if (dict_size != UINT32_MAX) { 402207753Smm uint32_t d = dict_size - 1; 403207753Smm d |= d >> 2; 404207753Smm d |= d >> 3; 405207753Smm d |= d >> 4; 406207753Smm d |= d >> 8; 407207753Smm d |= d >> 16; 408207753Smm ++d; 409207753Smm if (d != dict_size || dict_size == 0) 410207753Smm return false; 411207753Smm } 412207753Smm 413207753Smm // Another hack to ditch false positives: Assume that if the 414207753Smm // uncompressed size is known, it must be less than 256 GiB. 415207753Smm // Again, if someone complains, this will be reconsidered. 416207753Smm uint64_t uncompressed_size = 0; 417207753Smm for (size_t i = 0; i < 8; ++i) 418207753Smm uncompressed_size |= (uint64_t)(in_buf.u8[5 + i]) << (i * 8); 419207753Smm 420207753Smm if (uncompressed_size != UINT64_MAX 421207753Smm && uncompressed_size > (UINT64_C(1) << 38)) 422207753Smm return false; 423207753Smm 424207753Smm return true; 425207753Smm} 426312517Sdelphij#endif 427207753Smm 428207753Smm 429207753Smm/// Detect the input file type (for now, this done only when decompressing), 430207753Smm/// and initialize an appropriate coder. Return value indicates if a normal 431207753Smm/// liblzma-based coder was initialized (CODER_INIT_NORMAL), if passthru 432207753Smm/// mode should be used (CODER_INIT_PASSTHRU), or if an error occurred 433207753Smm/// (CODER_INIT_ERROR). 434207753Smmstatic enum coder_init_ret 435207753Smmcoder_init(file_pair *pair) 436207753Smm{ 437207753Smm lzma_ret ret = LZMA_PROG_ERROR; 438207753Smm 439207753Smm if (opt_mode == MODE_COMPRESS) { 440312517Sdelphij#ifdef HAVE_ENCODERS 441207753Smm switch (opt_format) { 442207753Smm case FORMAT_AUTO: 443207753Smm // args.c ensures this. 444207753Smm assert(0); 445207753Smm break; 446207753Smm 447207753Smm case FORMAT_XZ: 448312517Sdelphij# ifdef MYTHREAD_ENABLED 449278433Srpaulo if (hardware_threads_get() > 1) 450278433Srpaulo ret = lzma_stream_encoder_mt( 451278433Srpaulo &strm, &mt_options); 452278433Srpaulo else 453312517Sdelphij# endif 454278433Srpaulo ret = lzma_stream_encoder( 455278433Srpaulo &strm, filters, check); 456207753Smm break; 457207753Smm 458207753Smm case FORMAT_LZMA: 459207753Smm ret = lzma_alone_encoder(&strm, filters[0].options); 460207753Smm break; 461207753Smm 462207753Smm case FORMAT_RAW: 463207753Smm ret = lzma_raw_encoder(&strm, filters); 464207753Smm break; 465207753Smm } 466312517Sdelphij#endif 467207753Smm } else { 468312517Sdelphij#ifdef HAVE_DECODERS 469278433Srpaulo uint32_t flags = 0; 470207753Smm 471278433Srpaulo // It seems silly to warn about unsupported check if the 472278433Srpaulo // check won't be verified anyway due to --ignore-check. 473278433Srpaulo if (opt_ignore_check) 474278433Srpaulo flags |= LZMA_IGNORE_CHECK; 475278433Srpaulo else 476278433Srpaulo flags |= LZMA_TELL_UNSUPPORTED_CHECK; 477278433Srpaulo 478278433Srpaulo if (!opt_single_stream) 479278433Srpaulo flags |= LZMA_CONCATENATED; 480278433Srpaulo 481207753Smm // We abuse FORMAT_AUTO to indicate unknown file format, 482207753Smm // for which we may consider passthru mode. 483207753Smm enum format_type init_format = FORMAT_AUTO; 484207753Smm 485207753Smm switch (opt_format) { 486207753Smm case FORMAT_AUTO: 487207753Smm if (is_format_xz()) 488207753Smm init_format = FORMAT_XZ; 489207753Smm else if (is_format_lzma()) 490207753Smm init_format = FORMAT_LZMA; 491207753Smm break; 492207753Smm 493207753Smm case FORMAT_XZ: 494207753Smm if (is_format_xz()) 495207753Smm init_format = FORMAT_XZ; 496207753Smm break; 497207753Smm 498207753Smm case FORMAT_LZMA: 499207753Smm if (is_format_lzma()) 500207753Smm init_format = FORMAT_LZMA; 501207753Smm break; 502207753Smm 503207753Smm case FORMAT_RAW: 504207753Smm init_format = FORMAT_RAW; 505207753Smm break; 506207753Smm } 507207753Smm 508207753Smm switch (init_format) { 509207753Smm case FORMAT_AUTO: 510278433Srpaulo // Unknown file format. If --decompress --stdout 511207753Smm // --force have been given, then we copy the input 512207753Smm // as is to stdout. Checking for MODE_DECOMPRESS 513207753Smm // is needed, because we don't want to do use 514207753Smm // passthru mode with --test. 515207753Smm if (opt_mode == MODE_DECOMPRESS 516207753Smm && opt_stdout && opt_force) 517207753Smm return CODER_INIT_PASSTHRU; 518207753Smm 519207753Smm ret = LZMA_FORMAT_ERROR; 520207753Smm break; 521207753Smm 522207753Smm case FORMAT_XZ: 523207753Smm ret = lzma_stream_decoder(&strm, 524213700Smm hardware_memlimit_get( 525213700Smm MODE_DECOMPRESS), flags); 526207753Smm break; 527207753Smm 528207753Smm case FORMAT_LZMA: 529207753Smm ret = lzma_alone_decoder(&strm, 530213700Smm hardware_memlimit_get( 531213700Smm MODE_DECOMPRESS)); 532207753Smm break; 533207753Smm 534207753Smm case FORMAT_RAW: 535207753Smm // Memory usage has already been checked in 536207753Smm // coder_set_compression_settings(). 537207753Smm ret = lzma_raw_decoder(&strm, filters); 538207753Smm break; 539207753Smm } 540207753Smm 541207753Smm // Try to decode the headers. This will catch too low 542207753Smm // memory usage limit in case it happens in the first 543207753Smm // Block of the first Stream, which is where it very 544207753Smm // probably will happen if it is going to happen. 545207753Smm if (ret == LZMA_OK && init_format != FORMAT_RAW) { 546207753Smm strm.next_out = NULL; 547207753Smm strm.avail_out = 0; 548207753Smm ret = lzma_code(&strm, LZMA_RUN); 549207753Smm } 550312517Sdelphij#endif 551207753Smm } 552207753Smm 553207753Smm if (ret != LZMA_OK) { 554207753Smm message_error("%s: %s", pair->src_name, message_strm(ret)); 555207753Smm if (ret == LZMA_MEMLIMIT_ERROR) 556207753Smm message_mem_needed(V_ERROR, lzma_memusage(&strm)); 557207753Smm 558207753Smm return CODER_INIT_ERROR; 559207753Smm } 560207753Smm 561207753Smm return CODER_INIT_NORMAL; 562207753Smm} 563207753Smm 564207753Smm 565278433Srpaulo/// Resolve conflicts between opt_block_size and opt_block_list in single 566278433Srpaulo/// threaded mode. We want to default to opt_block_list, except when it is 567278433Srpaulo/// larger than opt_block_size. If this is the case for the current Block 568278433Srpaulo/// at *list_pos, then we break into smaller Blocks. Otherwise advance 569278433Srpaulo/// to the next Block in opt_block_list, and break apart if needed. 570278433Srpaulostatic void 571278433Srpaulosplit_block(uint64_t *block_remaining, 572278433Srpaulo uint64_t *next_block_remaining, 573278433Srpaulo size_t *list_pos) 574278433Srpaulo{ 575278433Srpaulo if (*next_block_remaining > 0) { 576278433Srpaulo // The Block at *list_pos has previously been split up. 577278433Srpaulo assert(hardware_threads_get() == 1); 578278433Srpaulo assert(opt_block_size > 0); 579278433Srpaulo assert(opt_block_list != NULL); 580278433Srpaulo 581278433Srpaulo if (*next_block_remaining > opt_block_size) { 582278433Srpaulo // We have to split the current Block at *list_pos 583278433Srpaulo // into another opt_block_size length Block. 584278433Srpaulo *block_remaining = opt_block_size; 585278433Srpaulo } else { 586278433Srpaulo // This is the last remaining split Block for the 587278433Srpaulo // Block at *list_pos. 588278433Srpaulo *block_remaining = *next_block_remaining; 589278433Srpaulo } 590278433Srpaulo 591278433Srpaulo *next_block_remaining -= *block_remaining; 592278433Srpaulo 593278433Srpaulo } else { 594278433Srpaulo // The Block at *list_pos has been finished. Go to the next 595278433Srpaulo // entry in the list. If the end of the list has been reached, 596278433Srpaulo // reuse the size of the last Block. 597278433Srpaulo if (opt_block_list[*list_pos + 1] != 0) 598278433Srpaulo ++*list_pos; 599278433Srpaulo 600278433Srpaulo *block_remaining = opt_block_list[*list_pos]; 601278433Srpaulo 602278433Srpaulo // If in single-threaded mode, split up the Block if needed. 603278433Srpaulo // This is not needed in multi-threaded mode because liblzma 604278433Srpaulo // will do this due to how threaded encoding works. 605278433Srpaulo if (hardware_threads_get() == 1 && opt_block_size > 0 606278433Srpaulo && *block_remaining > opt_block_size) { 607278433Srpaulo *next_block_remaining 608278433Srpaulo = *block_remaining - opt_block_size; 609278433Srpaulo *block_remaining = opt_block_size; 610278433Srpaulo } 611278433Srpaulo } 612278433Srpaulo} 613278433Srpaulo 614278433Srpaulo 615360523Sdelphijstatic bool 616360523Sdelphijcoder_write_output(file_pair *pair) 617360523Sdelphij{ 618360523Sdelphij if (opt_mode != MODE_TEST) { 619360523Sdelphij if (io_write(pair, &out_buf, IO_BUFFER_SIZE - strm.avail_out)) 620360523Sdelphij return true; 621360523Sdelphij } 622360523Sdelphij 623360523Sdelphij strm.next_out = out_buf.u8; 624360523Sdelphij strm.avail_out = IO_BUFFER_SIZE; 625360523Sdelphij return false; 626360523Sdelphij} 627360523Sdelphij 628360523Sdelphij 629207753Smm/// Compress or decompress using liblzma. 630207753Smmstatic bool 631207753Smmcoder_normal(file_pair *pair) 632207753Smm{ 633207753Smm // Encoder needs to know when we have given all the input to it. 634207753Smm // The decoders need to know it too when we are using 635207753Smm // LZMA_CONCATENATED. We need to check for src_eof here, because 636278433Srpaulo // the first input chunk has been already read if decompressing, 637278433Srpaulo // and that may have been the only chunk we will read. 638207753Smm lzma_action action = pair->src_eof ? LZMA_FINISH : LZMA_RUN; 639207753Smm 640207753Smm lzma_ret ret; 641207753Smm 642207753Smm // Assume that something goes wrong. 643207753Smm bool success = false; 644207753Smm 645278433Srpaulo // block_remaining indicates how many input bytes to encode before 646278433Srpaulo // finishing the current .xz Block. The Block size is set with 647278433Srpaulo // --block-size=SIZE and --block-list. They have an effect only when 648278433Srpaulo // compressing to the .xz format. If block_remaining == UINT64_MAX, 649278433Srpaulo // only a single block is created. 650278433Srpaulo uint64_t block_remaining = UINT64_MAX; 651278433Srpaulo 652360523Sdelphij // next_block_remaining for when we are in single-threaded mode and 653278433Srpaulo // the Block in --block-list is larger than the --block-size=SIZE. 654278433Srpaulo uint64_t next_block_remaining = 0; 655278433Srpaulo 656278433Srpaulo // Position in opt_block_list. Unused if --block-list wasn't used. 657278433Srpaulo size_t list_pos = 0; 658278433Srpaulo 659278433Srpaulo // Handle --block-size for single-threaded mode and the first step 660278433Srpaulo // of --block-list. 661278433Srpaulo if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ) { 662278433Srpaulo // --block-size doesn't do anything here in threaded mode, 663278433Srpaulo // because the threaded encoder will take care of splitting 664278433Srpaulo // to fixed-sized Blocks. 665278433Srpaulo if (hardware_threads_get() == 1 && opt_block_size > 0) 666278433Srpaulo block_remaining = opt_block_size; 667278433Srpaulo 668278433Srpaulo // If --block-list was used, start with the first size. 669278433Srpaulo // 670278433Srpaulo // For threaded case, --block-size specifies how big Blocks 671278433Srpaulo // the encoder needs to be prepared to create at maximum 672278433Srpaulo // and --block-list will simultaneously cause new Blocks 673278433Srpaulo // to be started at specified intervals. To keep things 674278433Srpaulo // logical, the same is done in single-threaded mode. The 675278433Srpaulo // output is still not identical because in single-threaded 676278433Srpaulo // mode the size info isn't written into Block Headers. 677278433Srpaulo if (opt_block_list != NULL) { 678278433Srpaulo if (block_remaining < opt_block_list[list_pos]) { 679278433Srpaulo assert(hardware_threads_get() == 1); 680278433Srpaulo next_block_remaining = opt_block_list[list_pos] 681278433Srpaulo - block_remaining; 682278433Srpaulo } else { 683278433Srpaulo block_remaining = opt_block_list[list_pos]; 684278433Srpaulo } 685278433Srpaulo } 686278433Srpaulo } 687278433Srpaulo 688207753Smm strm.next_out = out_buf.u8; 689207753Smm strm.avail_out = IO_BUFFER_SIZE; 690207753Smm 691207753Smm while (!user_abort) { 692278433Srpaulo // Fill the input buffer if it is empty and we aren't 693278433Srpaulo // flushing or finishing. 694278433Srpaulo if (strm.avail_in == 0 && action == LZMA_RUN) { 695207753Smm strm.next_in = in_buf.u8; 696278433Srpaulo strm.avail_in = io_read(pair, &in_buf, 697278433Srpaulo my_min(block_remaining, 698278433Srpaulo IO_BUFFER_SIZE)); 699207753Smm 700207753Smm if (strm.avail_in == SIZE_MAX) 701207753Smm break; 702207753Smm 703278433Srpaulo if (pair->src_eof) { 704207753Smm action = LZMA_FINISH; 705278433Srpaulo 706278433Srpaulo } else if (block_remaining != UINT64_MAX) { 707278433Srpaulo // Start a new Block after every 708278433Srpaulo // opt_block_size bytes of input. 709278433Srpaulo block_remaining -= strm.avail_in; 710278433Srpaulo if (block_remaining == 0) 711278433Srpaulo action = LZMA_FULL_BARRIER; 712278433Srpaulo } 713278433Srpaulo 714360523Sdelphij if (action == LZMA_RUN && pair->flush_needed) 715278433Srpaulo action = LZMA_SYNC_FLUSH; 716207753Smm } 717207753Smm 718207753Smm // Let liblzma do the actual work. 719207753Smm ret = lzma_code(&strm, action); 720207753Smm 721207753Smm // Write out if the output buffer became full. 722207753Smm if (strm.avail_out == 0) { 723360523Sdelphij if (coder_write_output(pair)) 724207753Smm break; 725207753Smm } 726207753Smm 727278433Srpaulo if (ret == LZMA_STREAM_END && (action == LZMA_SYNC_FLUSH 728278433Srpaulo || action == LZMA_FULL_BARRIER)) { 729278433Srpaulo if (action == LZMA_SYNC_FLUSH) { 730278433Srpaulo // Flushing completed. Write the pending data 731360523Sdelphij // out immediately so that the reading side 732278433Srpaulo // can decompress everything compressed so far. 733360523Sdelphij if (coder_write_output(pair)) 734278433Srpaulo break; 735278433Srpaulo 736360523Sdelphij // Mark that we haven't seen any new input 737360523Sdelphij // since the previous flush. 738360523Sdelphij pair->src_has_seen_input = false; 739360523Sdelphij pair->flush_needed = false; 740278433Srpaulo } else { 741278433Srpaulo // Start a new Block after LZMA_FULL_BARRIER. 742278433Srpaulo if (opt_block_list == NULL) { 743278433Srpaulo assert(hardware_threads_get() == 1); 744278433Srpaulo assert(opt_block_size > 0); 745278433Srpaulo block_remaining = opt_block_size; 746278433Srpaulo } else { 747278433Srpaulo split_block(&block_remaining, 748278433Srpaulo &next_block_remaining, 749278433Srpaulo &list_pos); 750278433Srpaulo } 751278433Srpaulo } 752278433Srpaulo 753278433Srpaulo // Start a new Block after LZMA_FULL_FLUSH or continue 754278433Srpaulo // the same block after LZMA_SYNC_FLUSH. 755278433Srpaulo action = LZMA_RUN; 756278433Srpaulo 757278433Srpaulo } else if (ret != LZMA_OK) { 758207753Smm // Determine if the return value indicates that we 759207753Smm // won't continue coding. 760207753Smm const bool stop = ret != LZMA_NO_CHECK 761207753Smm && ret != LZMA_UNSUPPORTED_CHECK; 762207753Smm 763207753Smm if (stop) { 764207753Smm // Write the remaining bytes even if something 765207753Smm // went wrong, because that way the user gets 766207753Smm // as much data as possible, which can be good 767207753Smm // when trying to get at least some useful 768207753Smm // data out of damaged files. 769360523Sdelphij if (coder_write_output(pair)) 770207753Smm break; 771207753Smm } 772207753Smm 773207753Smm if (ret == LZMA_STREAM_END) { 774278433Srpaulo if (opt_single_stream) { 775278433Srpaulo io_fix_src_pos(pair, strm.avail_in); 776278433Srpaulo success = true; 777278433Srpaulo break; 778278433Srpaulo } 779278433Srpaulo 780207753Smm // Check that there is no trailing garbage. 781207753Smm // This is needed for LZMA_Alone and raw 782207753Smm // streams. 783207753Smm if (strm.avail_in == 0 && !pair->src_eof) { 784207753Smm // Try reading one more byte. 785207753Smm // Hopefully we don't get any more 786207753Smm // input, and thus pair->src_eof 787207753Smm // becomes true. 788207753Smm strm.avail_in = io_read( 789207753Smm pair, &in_buf, 1); 790207753Smm if (strm.avail_in == SIZE_MAX) 791207753Smm break; 792207753Smm 793207753Smm assert(strm.avail_in == 0 794207753Smm || strm.avail_in == 1); 795207753Smm } 796207753Smm 797207753Smm if (strm.avail_in == 0) { 798207753Smm assert(pair->src_eof); 799207753Smm success = true; 800207753Smm break; 801207753Smm } 802207753Smm 803207753Smm // We hadn't reached the end of the file. 804207753Smm ret = LZMA_DATA_ERROR; 805207753Smm assert(stop); 806207753Smm } 807207753Smm 808207753Smm // If we get here and stop is true, something went 809207753Smm // wrong and we print an error. Otherwise it's just 810207753Smm // a warning and coding can continue. 811207753Smm if (stop) { 812207753Smm message_error("%s: %s", pair->src_name, 813207753Smm message_strm(ret)); 814207753Smm } else { 815207753Smm message_warning("%s: %s", pair->src_name, 816207753Smm message_strm(ret)); 817207753Smm 818207753Smm // When compressing, all possible errors set 819207753Smm // stop to true. 820207753Smm assert(opt_mode != MODE_COMPRESS); 821207753Smm } 822207753Smm 823207753Smm if (ret == LZMA_MEMLIMIT_ERROR) { 824207753Smm // Display how much memory it would have 825207753Smm // actually needed. 826207753Smm message_mem_needed(V_ERROR, 827207753Smm lzma_memusage(&strm)); 828207753Smm } 829207753Smm 830207753Smm if (stop) 831207753Smm break; 832207753Smm } 833207753Smm 834207753Smm // Show progress information under certain conditions. 835207753Smm message_progress_update(); 836207753Smm } 837207753Smm 838207753Smm return success; 839207753Smm} 840207753Smm 841207753Smm 842207753Smm/// Copy from input file to output file without processing the data in any 843207753Smm/// way. This is used only when trying to decompress unrecognized files 844207753Smm/// with --decompress --stdout --force, so the output is always stdout. 845207753Smmstatic bool 846207753Smmcoder_passthru(file_pair *pair) 847207753Smm{ 848207753Smm while (strm.avail_in != 0) { 849207753Smm if (user_abort) 850207753Smm return false; 851207753Smm 852207753Smm if (io_write(pair, &in_buf, strm.avail_in)) 853207753Smm return false; 854207753Smm 855207753Smm strm.total_in += strm.avail_in; 856207753Smm strm.total_out = strm.total_in; 857207753Smm message_progress_update(); 858207753Smm 859207753Smm strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE); 860207753Smm if (strm.avail_in == SIZE_MAX) 861207753Smm return false; 862207753Smm } 863207753Smm 864207753Smm return true; 865207753Smm} 866207753Smm 867207753Smm 868207753Smmextern void 869207753Smmcoder_run(const char *filename) 870207753Smm{ 871207753Smm // Set and possibly print the filename for the progress message. 872207753Smm message_filename(filename); 873207753Smm 874207753Smm // Try to open the input file. 875207753Smm file_pair *pair = io_open_src(filename); 876207753Smm if (pair == NULL) 877207753Smm return; 878207753Smm 879207753Smm // Assume that something goes wrong. 880207753Smm bool success = false; 881207753Smm 882278433Srpaulo if (opt_mode == MODE_COMPRESS) { 883278433Srpaulo strm.next_in = NULL; 884278433Srpaulo strm.avail_in = 0; 885278433Srpaulo } else { 886278433Srpaulo // Read the first chunk of input data. This is needed 887278433Srpaulo // to detect the input file type. 888278433Srpaulo strm.next_in = in_buf.u8; 889278433Srpaulo strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE); 890278433Srpaulo } 891207753Smm 892207753Smm if (strm.avail_in != SIZE_MAX) { 893207753Smm // Initialize the coder. This will detect the file format 894207753Smm // and, in decompression or testing mode, check the memory 895207753Smm // usage of the first Block too. This way we don't try to 896207753Smm // open the destination file if we see that coding wouldn't 897207753Smm // work at all anyway. This also avoids deleting the old 898207753Smm // "target" file if --force was used. 899207753Smm const enum coder_init_ret init_ret = coder_init(pair); 900207753Smm 901207753Smm if (init_ret != CODER_INIT_ERROR && !user_abort) { 902207753Smm // Don't open the destination file when --test 903207753Smm // is used. 904207753Smm if (opt_mode == MODE_TEST || !io_open_dest(pair)) { 905278433Srpaulo // Remember the current time. It is needed 906360523Sdelphij // for progress indicator. 907278433Srpaulo mytime_set_start_time(); 908278433Srpaulo 909207753Smm // Initialize the progress indicator. 910360523Sdelphij const bool is_passthru = init_ret 911360523Sdelphij == CODER_INIT_PASSTHRU; 912207753Smm const uint64_t in_size 913360523Sdelphij = pair->src_st.st_size <= 0 914360523Sdelphij ? 0 : (uint64_t)(pair->src_st.st_size); 915360523Sdelphij message_progress_start(&strm, 916360523Sdelphij is_passthru, in_size); 917207753Smm 918207753Smm // Do the actual coding or passthru. 919360523Sdelphij if (is_passthru) 920360523Sdelphij success = coder_passthru(pair); 921360523Sdelphij else 922207753Smm success = coder_normal(pair); 923207753Smm 924207753Smm message_progress_end(success); 925207753Smm } 926207753Smm } 927207753Smm } 928207753Smm 929207753Smm // Close the file pair. It needs to know if coding was successful to 930207753Smm // know if the source or target file should be unlinked. 931207753Smm io_close(pair, success); 932207753Smm 933207753Smm return; 934207753Smm} 935278433Srpaulo 936278433Srpaulo 937278433Srpaulo#ifndef NDEBUG 938278433Srpauloextern void 939278433Srpaulocoder_free(void) 940278433Srpaulo{ 941278433Srpaulo lzma_end(&strm); 942278433Srpaulo return; 943278433Srpaulo} 944278433Srpaulo#endif 945