1207753Smm/////////////////////////////////////////////////////////////////////////////// 2207753Smm// 3207753Smm/// \file args.c 4207753Smm/// \brief Argument parsing 5207753Smm/// 6207753Smm/// \note Filter-specific options parsing is in options.c. 7207753Smm// 8207753Smm// Author: Lasse Collin 9207753Smm// 10207753Smm// This file has been put into the public domain. 11207753Smm// You can do whatever you want with this file. 12207753Smm// 13207753Smm/////////////////////////////////////////////////////////////////////////////// 14207753Smm 15207753Smm#include "private.h" 16207753Smm 17207753Smm#include "getopt.h" 18207753Smm#include <ctype.h> 19207753Smm 20207753Smm 21207753Smmbool opt_stdout = false; 22207753Smmbool opt_force = false; 23207753Smmbool opt_keep_original = false; 24207753Smmbool opt_robot = false; 25207753Smm 26207753Smm// We don't modify or free() this, but we need to assign it in some 27207753Smm// non-const pointers. 28213700Smmconst char stdin_filename[] = "(stdin)"; 29207753Smm 30207753Smm 31213700Smm/// Parse and set the memory usage limit for compression and/or decompression. 32207753Smmstatic void 33213700Smmparse_memlimit(const char *name, const char *name_percentage, char *str, 34213700Smm bool set_compress, bool set_decompress) 35213700Smm{ 36213700Smm bool is_percentage = false; 37213700Smm uint64_t value; 38213700Smm 39213700Smm const size_t len = strlen(str); 40213700Smm if (len > 0 && str[len - 1] == '%') { 41213700Smm str[len - 1] = '\0'; 42213700Smm is_percentage = true; 43213700Smm value = str_to_uint64(name_percentage, str, 1, 100); 44213700Smm } else { 45213700Smm // On 32-bit systems, SIZE_MAX would make more sense than 46213700Smm // UINT64_MAX. But use UINT64_MAX still so that scripts 47213700Smm // that assume > 4 GiB values don't break. 48213700Smm value = str_to_uint64(name, str, 0, UINT64_MAX); 49213700Smm } 50213700Smm 51213700Smm hardware_memlimit_set( 52213700Smm value, set_compress, set_decompress, is_percentage); 53213700Smm return; 54213700Smm} 55213700Smm 56213700Smm 57213700Smmstatic void 58207753Smmparse_real(args_info *args, int argc, char **argv) 59207753Smm{ 60207753Smm enum { 61213700Smm OPT_X86 = INT_MIN, 62207753Smm OPT_POWERPC, 63207753Smm OPT_IA64, 64207753Smm OPT_ARM, 65207753Smm OPT_ARMTHUMB, 66207753Smm OPT_SPARC, 67207753Smm OPT_DELTA, 68207753Smm OPT_LZMA1, 69207753Smm OPT_LZMA2, 70207753Smm 71207753Smm OPT_NO_SPARSE, 72207753Smm OPT_FILES, 73207753Smm OPT_FILES0, 74213700Smm OPT_MEM_COMPRESS, 75213700Smm OPT_MEM_DECOMPRESS, 76213700Smm OPT_NO_ADJUST, 77207753Smm OPT_INFO_MEMORY, 78207753Smm OPT_ROBOT, 79207753Smm }; 80207753Smm 81207753Smm static const char short_opts[] 82207753Smm = "cC:defF:hHlkM:qQrS:tT:vVz0123456789"; 83207753Smm 84207753Smm static const struct option long_opts[] = { 85207753Smm // Operation mode 86207753Smm { "compress", no_argument, NULL, 'z' }, 87207753Smm { "decompress", no_argument, NULL, 'd' }, 88207753Smm { "uncompress", no_argument, NULL, 'd' }, 89207753Smm { "test", no_argument, NULL, 't' }, 90207753Smm { "list", no_argument, NULL, 'l' }, 91207753Smm 92207753Smm // Operation modifiers 93207753Smm { "keep", no_argument, NULL, 'k' }, 94207753Smm { "force", no_argument, NULL, 'f' }, 95207753Smm { "stdout", no_argument, NULL, 'c' }, 96207753Smm { "to-stdout", no_argument, NULL, 'c' }, 97207753Smm { "no-sparse", no_argument, NULL, OPT_NO_SPARSE }, 98207753Smm { "suffix", required_argument, NULL, 'S' }, 99207753Smm // { "recursive", no_argument, NULL, 'r' }, // TODO 100207753Smm { "files", optional_argument, NULL, OPT_FILES }, 101207753Smm { "files0", optional_argument, NULL, OPT_FILES0 }, 102207753Smm 103207753Smm // Basic compression settings 104207753Smm { "format", required_argument, NULL, 'F' }, 105207753Smm { "check", required_argument, NULL, 'C' }, 106213700Smm { "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS }, 107213700Smm { "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS }, 108213700Smm { "memlimit", required_argument, NULL, 'M' }, 109213700Smm { "memory", required_argument, NULL, 'M' }, // Old alias 110213700Smm { "no-adjust", no_argument, NULL, OPT_NO_ADJUST }, 111207753Smm { "threads", required_argument, NULL, 'T' }, 112207753Smm 113207753Smm { "extreme", no_argument, NULL, 'e' }, 114207753Smm { "fast", no_argument, NULL, '0' }, 115207753Smm { "best", no_argument, NULL, '9' }, 116207753Smm 117207753Smm // Filters 118207753Smm { "lzma1", optional_argument, NULL, OPT_LZMA1 }, 119207753Smm { "lzma2", optional_argument, NULL, OPT_LZMA2 }, 120207753Smm { "x86", optional_argument, NULL, OPT_X86 }, 121207753Smm { "powerpc", optional_argument, NULL, OPT_POWERPC }, 122207753Smm { "ia64", optional_argument, NULL, OPT_IA64 }, 123207753Smm { "arm", optional_argument, NULL, OPT_ARM }, 124207753Smm { "armthumb", optional_argument, NULL, OPT_ARMTHUMB }, 125207753Smm { "sparc", optional_argument, NULL, OPT_SPARC }, 126207753Smm { "delta", optional_argument, NULL, OPT_DELTA }, 127207753Smm 128207753Smm // Other options 129207753Smm { "quiet", no_argument, NULL, 'q' }, 130207753Smm { "verbose", no_argument, NULL, 'v' }, 131207753Smm { "no-warn", no_argument, NULL, 'Q' }, 132207753Smm { "robot", no_argument, NULL, OPT_ROBOT }, 133207753Smm { "info-memory", no_argument, NULL, OPT_INFO_MEMORY }, 134207753Smm { "help", no_argument, NULL, 'h' }, 135207753Smm { "long-help", no_argument, NULL, 'H' }, 136207753Smm { "version", no_argument, NULL, 'V' }, 137207753Smm 138213700Smm { NULL, 0, NULL, 0 } 139207753Smm }; 140207753Smm 141207753Smm int c; 142207753Smm 143207753Smm while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) 144207753Smm != -1) { 145207753Smm switch (c) { 146207753Smm // Compression preset (also for decompression if --format=raw) 147207753Smm case '0': case '1': case '2': case '3': case '4': 148207753Smm case '5': case '6': case '7': case '8': case '9': 149207753Smm coder_set_preset(c - '0'); 150207753Smm break; 151207753Smm 152213700Smm // --memlimit-compress 153213700Smm case OPT_MEM_COMPRESS: 154213700Smm parse_memlimit("memlimit-compress", 155213700Smm "memlimit-compress%", optarg, 156213700Smm true, false); 157213700Smm break; 158207753Smm 159213700Smm // --memlimit-decompress 160213700Smm case OPT_MEM_DECOMPRESS: 161213700Smm parse_memlimit("memlimit-decompress", 162213700Smm "memlimit-decompress%", optarg, 163213700Smm false, true); 164207753Smm break; 165207753Smm 166213700Smm // --memlimit 167213700Smm case 'M': 168213700Smm parse_memlimit("memlimit", "memlimit%", optarg, 169213700Smm true, true); 170213700Smm break; 171213700Smm 172207753Smm // --suffix 173207753Smm case 'S': 174207753Smm suffix_set(optarg); 175207753Smm break; 176207753Smm 177207753Smm case 'T': 178207753Smm hardware_threadlimit_set(str_to_uint64( 179207753Smm "threads", optarg, 0, UINT32_MAX)); 180207753Smm break; 181207753Smm 182207753Smm // --version 183207753Smm case 'V': 184207753Smm // This doesn't return. 185207753Smm message_version(); 186207753Smm 187207753Smm // --stdout 188207753Smm case 'c': 189207753Smm opt_stdout = true; 190207753Smm break; 191207753Smm 192207753Smm // --decompress 193207753Smm case 'd': 194207753Smm opt_mode = MODE_DECOMPRESS; 195207753Smm break; 196207753Smm 197207753Smm // --extreme 198207753Smm case 'e': 199207753Smm coder_set_extreme(); 200207753Smm break; 201207753Smm 202207753Smm // --force 203207753Smm case 'f': 204207753Smm opt_force = true; 205207753Smm break; 206207753Smm 207207753Smm // --info-memory 208207753Smm case OPT_INFO_MEMORY: 209207753Smm // This doesn't return. 210213700Smm hardware_memlimit_show(); 211207753Smm 212207753Smm // --help 213207753Smm case 'h': 214207753Smm // This doesn't return. 215207753Smm message_help(false); 216207753Smm 217207753Smm // --long-help 218207753Smm case 'H': 219207753Smm // This doesn't return. 220207753Smm message_help(true); 221207753Smm 222207753Smm // --list 223207753Smm case 'l': 224207753Smm opt_mode = MODE_LIST; 225207753Smm break; 226207753Smm 227207753Smm // --keep 228207753Smm case 'k': 229207753Smm opt_keep_original = true; 230207753Smm break; 231207753Smm 232207753Smm // --quiet 233207753Smm case 'q': 234207753Smm message_verbosity_decrease(); 235207753Smm break; 236207753Smm 237207753Smm case 'Q': 238207753Smm set_exit_no_warn(); 239207753Smm break; 240207753Smm 241207753Smm case 't': 242207753Smm opt_mode = MODE_TEST; 243207753Smm break; 244207753Smm 245207753Smm // --verbose 246207753Smm case 'v': 247207753Smm message_verbosity_increase(); 248207753Smm break; 249207753Smm 250207753Smm // --robot 251207753Smm case OPT_ROBOT: 252207753Smm opt_robot = true; 253207753Smm 254207753Smm // This is to make sure that floating point numbers 255207753Smm // always have a dot as decimal separator. 256207753Smm setlocale(LC_NUMERIC, "C"); 257207753Smm break; 258207753Smm 259207753Smm case 'z': 260207753Smm opt_mode = MODE_COMPRESS; 261207753Smm break; 262207753Smm 263207753Smm // Filter setup 264207753Smm 265207753Smm case OPT_X86: 266207753Smm coder_add_filter(LZMA_FILTER_X86, 267207753Smm options_bcj(optarg)); 268207753Smm break; 269207753Smm 270207753Smm case OPT_POWERPC: 271207753Smm coder_add_filter(LZMA_FILTER_POWERPC, 272207753Smm options_bcj(optarg)); 273207753Smm break; 274207753Smm 275207753Smm case OPT_IA64: 276207753Smm coder_add_filter(LZMA_FILTER_IA64, 277207753Smm options_bcj(optarg)); 278207753Smm break; 279207753Smm 280207753Smm case OPT_ARM: 281207753Smm coder_add_filter(LZMA_FILTER_ARM, 282207753Smm options_bcj(optarg)); 283207753Smm break; 284207753Smm 285207753Smm case OPT_ARMTHUMB: 286207753Smm coder_add_filter(LZMA_FILTER_ARMTHUMB, 287207753Smm options_bcj(optarg)); 288207753Smm break; 289207753Smm 290207753Smm case OPT_SPARC: 291207753Smm coder_add_filter(LZMA_FILTER_SPARC, 292207753Smm options_bcj(optarg)); 293207753Smm break; 294207753Smm 295207753Smm case OPT_DELTA: 296207753Smm coder_add_filter(LZMA_FILTER_DELTA, 297207753Smm options_delta(optarg)); 298207753Smm break; 299207753Smm 300207753Smm case OPT_LZMA1: 301207753Smm coder_add_filter(LZMA_FILTER_LZMA1, 302207753Smm options_lzma(optarg)); 303207753Smm break; 304207753Smm 305207753Smm case OPT_LZMA2: 306207753Smm coder_add_filter(LZMA_FILTER_LZMA2, 307207753Smm options_lzma(optarg)); 308207753Smm break; 309207753Smm 310207753Smm // Other 311207753Smm 312207753Smm // --format 313207753Smm case 'F': { 314207753Smm // Just in case, support both "lzma" and "alone" since 315207753Smm // the latter was used for forward compatibility in 316207753Smm // LZMA Utils 4.32.x. 317207753Smm static const struct { 318207753Smm char str[8]; 319207753Smm enum format_type format; 320207753Smm } types[] = { 321207753Smm { "auto", FORMAT_AUTO }, 322207753Smm { "xz", FORMAT_XZ }, 323207753Smm { "lzma", FORMAT_LZMA }, 324207753Smm { "alone", FORMAT_LZMA }, 325207753Smm // { "gzip", FORMAT_GZIP }, 326207753Smm // { "gz", FORMAT_GZIP }, 327207753Smm { "raw", FORMAT_RAW }, 328207753Smm }; 329207753Smm 330207753Smm size_t i = 0; 331207753Smm while (strcmp(types[i].str, optarg) != 0) 332207753Smm if (++i == ARRAY_SIZE(types)) 333207753Smm message_fatal(_("%s: Unknown file " 334207753Smm "format type"), 335207753Smm optarg); 336207753Smm 337207753Smm opt_format = types[i].format; 338207753Smm break; 339207753Smm } 340207753Smm 341207753Smm // --check 342207753Smm case 'C': { 343207753Smm static const struct { 344207753Smm char str[8]; 345207753Smm lzma_check check; 346207753Smm } types[] = { 347207753Smm { "none", LZMA_CHECK_NONE }, 348207753Smm { "crc32", LZMA_CHECK_CRC32 }, 349207753Smm { "crc64", LZMA_CHECK_CRC64 }, 350207753Smm { "sha256", LZMA_CHECK_SHA256 }, 351207753Smm }; 352207753Smm 353207753Smm size_t i = 0; 354207753Smm while (strcmp(types[i].str, optarg) != 0) { 355207753Smm if (++i == ARRAY_SIZE(types)) 356207753Smm message_fatal(_("%s: Unsupported " 357207753Smm "integrity " 358207753Smm "check type"), optarg); 359207753Smm } 360207753Smm 361207753Smm // Use a separate check in case we are using different 362207753Smm // liblzma than what was used to compile us. 363207753Smm if (!lzma_check_is_supported(types[i].check)) 364207753Smm message_fatal(_("%s: Unsupported integrity " 365207753Smm "check type"), optarg); 366207753Smm 367207753Smm coder_set_check(types[i].check); 368207753Smm break; 369207753Smm } 370207753Smm 371207753Smm case OPT_NO_SPARSE: 372207753Smm io_no_sparse(); 373207753Smm break; 374207753Smm 375207753Smm case OPT_FILES: 376207753Smm args->files_delim = '\n'; 377207753Smm 378207753Smm // Fall through 379207753Smm 380207753Smm case OPT_FILES0: 381207753Smm if (args->files_name != NULL) 382207753Smm message_fatal(_("Only one file can be " 383207753Smm "specified with `--files' " 384207753Smm "or `--files0'.")); 385207753Smm 386207753Smm if (optarg == NULL) { 387207753Smm args->files_name = (char *)stdin_filename; 388207753Smm args->files_file = stdin; 389207753Smm } else { 390207753Smm args->files_name = optarg; 391207753Smm args->files_file = fopen(optarg, 392207753Smm c == OPT_FILES ? "r" : "rb"); 393207753Smm if (args->files_file == NULL) 394207753Smm message_fatal("%s: %s", optarg, 395207753Smm strerror(errno)); 396207753Smm } 397207753Smm 398207753Smm break; 399207753Smm 400213700Smm case OPT_NO_ADJUST: 401213700Smm opt_auto_adjust = false; 402213700Smm break; 403213700Smm 404207753Smm default: 405207753Smm message_try_help(); 406207753Smm tuklib_exit(E_ERROR, E_ERROR, false); 407207753Smm } 408207753Smm } 409207753Smm 410207753Smm return; 411207753Smm} 412207753Smm 413207753Smm 414207753Smmstatic void 415213700Smmparse_environment(args_info *args, char *argv0, const char *varname) 416207753Smm{ 417213700Smm char *env = getenv(varname); 418207753Smm if (env == NULL) 419207753Smm return; 420207753Smm 421207753Smm // We modify the string, so make a copy of it. 422207753Smm env = xstrdup(env); 423207753Smm 424207753Smm // Calculate the number of arguments in env. argc stats at one 425207753Smm // to include space for the program name. 426207753Smm int argc = 1; 427207753Smm bool prev_was_space = true; 428207753Smm for (size_t i = 0; env[i] != '\0'; ++i) { 429207753Smm // NOTE: Cast to unsigned char is needed so that correct 430207753Smm // value gets passed to isspace(), which expects 431207753Smm // unsigned char cast to int. Casting to int is done 432207753Smm // automatically due to integer promotion, but we need to 433207753Smm // force char to unsigned char manually. Otherwise 8-bit 434207753Smm // characters would get promoted to wrong value if 435207753Smm // char is signed. 436207753Smm if (isspace((unsigned char)env[i])) { 437207753Smm prev_was_space = true; 438207753Smm } else if (prev_was_space) { 439207753Smm prev_was_space = false; 440207753Smm 441245128Smm // Keep argc small enough to fit into a signed int 442207753Smm // and to keep it usable for memory allocation. 443213700Smm if (++argc == my_min( 444213700Smm INT_MAX, SIZE_MAX / sizeof(char *))) 445207753Smm message_fatal(_("The environment variable " 446213700Smm "%s contains too many " 447213700Smm "arguments"), varname); 448207753Smm } 449207753Smm } 450207753Smm 451207753Smm // Allocate memory to hold pointers to the arguments. Add one to get 452207753Smm // space for the terminating NULL (if some systems happen to need it). 453207753Smm char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *)); 454207753Smm argv[0] = argv0; 455207753Smm argv[argc] = NULL; 456207753Smm 457207753Smm // Go through the string again. Split the arguments using '\0' 458207753Smm // characters and add pointers to the resulting strings to argv. 459207753Smm argc = 1; 460207753Smm prev_was_space = true; 461207753Smm for (size_t i = 0; env[i] != '\0'; ++i) { 462207753Smm if (isspace((unsigned char)env[i])) { 463207753Smm prev_was_space = true; 464207753Smm env[i] = '\0'; 465207753Smm } else if (prev_was_space) { 466207753Smm prev_was_space = false; 467207753Smm argv[argc++] = env + i; 468207753Smm } 469207753Smm } 470207753Smm 471207753Smm // Parse the argument list we got from the environment. All non-option 472207753Smm // arguments i.e. filenames are ignored. 473207753Smm parse_real(args, argc, argv); 474207753Smm 475207753Smm // Reset the state of the getopt_long() so that we can parse the 476207753Smm // command line options too. There are two incompatible ways to 477207753Smm // do it. 478207753Smm#ifdef HAVE_OPTRESET 479207753Smm // BSD 480207753Smm optind = 1; 481207753Smm optreset = 1; 482207753Smm#else 483207753Smm // GNU, Solaris 484207753Smm optind = 0; 485207753Smm#endif 486207753Smm 487207753Smm // We don't need the argument list from environment anymore. 488207753Smm free(argv); 489207753Smm free(env); 490207753Smm 491207753Smm return; 492207753Smm} 493207753Smm 494207753Smm 495207753Smmextern void 496207753Smmargs_parse(args_info *args, int argc, char **argv) 497207753Smm{ 498207753Smm // Initialize those parts of *args that we need later. 499207753Smm args->files_name = NULL; 500207753Smm args->files_file = NULL; 501207753Smm args->files_delim = '\0'; 502207753Smm 503207753Smm // Check how we were called. 504207753Smm { 505207753Smm // Remove the leading path name, if any. 506207753Smm const char *name = strrchr(argv[0], '/'); 507207753Smm if (name == NULL) 508207753Smm name = argv[0]; 509207753Smm else 510207753Smm ++name; 511207753Smm 512207753Smm // NOTE: It's possible that name[0] is now '\0' if argv[0] 513207753Smm // is weird, but it doesn't matter here. 514207753Smm 515207753Smm // Look for full command names instead of substrings like 516207753Smm // "un", "cat", and "lz" to reduce possibility of false 517207753Smm // positives when the programs have been renamed. 518207753Smm if (strstr(name, "xzcat") != NULL) { 519207753Smm opt_mode = MODE_DECOMPRESS; 520207753Smm opt_stdout = true; 521207753Smm } else if (strstr(name, "unxz") != NULL) { 522207753Smm opt_mode = MODE_DECOMPRESS; 523207753Smm } else if (strstr(name, "lzcat") != NULL) { 524207753Smm opt_format = FORMAT_LZMA; 525207753Smm opt_mode = MODE_DECOMPRESS; 526207753Smm opt_stdout = true; 527207753Smm } else if (strstr(name, "unlzma") != NULL) { 528207753Smm opt_format = FORMAT_LZMA; 529207753Smm opt_mode = MODE_DECOMPRESS; 530207753Smm } else if (strstr(name, "lzma") != NULL) { 531207753Smm opt_format = FORMAT_LZMA; 532207753Smm } 533207753Smm } 534207753Smm 535213700Smm // First the flags from the environment 536213700Smm parse_environment(args, argv[0], "XZ_DEFAULTS"); 537213700Smm parse_environment(args, argv[0], "XZ_OPT"); 538207753Smm 539207753Smm // Then from the command line 540207753Smm parse_real(args, argc, argv); 541207753Smm 542207753Smm // Never remove the source file when the destination is not on disk. 543207753Smm // In test mode the data is written nowhere, but setting opt_stdout 544207753Smm // will make the rest of the code behave well. 545207753Smm if (opt_stdout || opt_mode == MODE_TEST) { 546207753Smm opt_keep_original = true; 547207753Smm opt_stdout = true; 548207753Smm } 549207753Smm 550207753Smm // When compressing, if no --format flag was used, or it 551207753Smm // was --format=auto, we compress to the .xz format. 552207753Smm if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO) 553207753Smm opt_format = FORMAT_XZ; 554207753Smm 555207753Smm // Compression settings need to be validated (options themselves and 556207753Smm // their memory usage) when compressing to any file format. It has to 557207753Smm // be done also when uncompressing raw data, since for raw decoding 558207753Smm // the options given on the command line are used to know what kind 559207753Smm // of raw data we are supposed to decode. 560207753Smm if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW) 561207753Smm coder_set_compression_settings(); 562207753Smm 563207753Smm // If no filenames are given, use stdin. 564207753Smm if (argv[optind] == NULL && args->files_name == NULL) { 565207753Smm // We don't modify or free() the "-" constant. The caller 566207753Smm // modifies this so don't make the struct itself const. 567207753Smm static char *names_stdin[2] = { (char *)"-", NULL }; 568207753Smm args->arg_names = names_stdin; 569207753Smm args->arg_count = 1; 570207753Smm } else { 571207753Smm // We got at least one filename from the command line, or 572207753Smm // --files or --files0 was specified. 573207753Smm args->arg_names = argv + optind; 574207753Smm args->arg_count = argc - optind; 575207753Smm } 576207753Smm 577207753Smm return; 578207753Smm} 579