args.c revision 207753
1207753Smm/////////////////////////////////////////////////////////////////////////////// 2207753Smm// 3207753Smm/// \file args.c 4207753Smm/// \brief Argument parsing 5207753Smm/// 6207753Smm/// \note Filter-specific options parsing is in options.c. 7207753Smm// 8207753Smm// Author: Lasse Collin 9207753Smm// 10207753Smm// This file has been put into the public domain. 11207753Smm// You can do whatever you want with this file. 12207753Smm// 13207753Smm/////////////////////////////////////////////////////////////////////////////// 14207753Smm 15207753Smm#include "private.h" 16207753Smm 17207753Smm#include "getopt.h" 18207753Smm#include <ctype.h> 19207753Smm 20207753Smm 21207753Smmbool opt_stdout = false; 22207753Smmbool opt_force = false; 23207753Smmbool opt_keep_original = false; 24207753Smmbool opt_robot = false; 25207753Smm 26207753Smm// We don't modify or free() this, but we need to assign it in some 27207753Smm// non-const pointers. 28207753Smmconst char *stdin_filename = "(stdin)"; 29207753Smm 30207753Smm 31207753Smmstatic void 32207753Smmparse_real(args_info *args, int argc, char **argv) 33207753Smm{ 34207753Smm enum { 35207753Smm OPT_SUBBLOCK = INT_MIN, 36207753Smm OPT_X86, 37207753Smm OPT_POWERPC, 38207753Smm OPT_IA64, 39207753Smm OPT_ARM, 40207753Smm OPT_ARMTHUMB, 41207753Smm OPT_SPARC, 42207753Smm OPT_DELTA, 43207753Smm OPT_LZMA1, 44207753Smm OPT_LZMA2, 45207753Smm 46207753Smm OPT_NO_SPARSE, 47207753Smm OPT_FILES, 48207753Smm OPT_FILES0, 49207753Smm OPT_INFO_MEMORY, 50207753Smm OPT_ROBOT, 51207753Smm }; 52207753Smm 53207753Smm static const char short_opts[] 54207753Smm = "cC:defF:hHlkM:qQrS:tT:vVz0123456789"; 55207753Smm 56207753Smm static const struct option long_opts[] = { 57207753Smm // Operation mode 58207753Smm { "compress", no_argument, NULL, 'z' }, 59207753Smm { "decompress", no_argument, NULL, 'd' }, 60207753Smm { "uncompress", no_argument, NULL, 'd' }, 61207753Smm { "test", no_argument, NULL, 't' }, 62207753Smm { "list", no_argument, NULL, 'l' }, 63207753Smm 64207753Smm // Operation modifiers 65207753Smm { "keep", no_argument, NULL, 'k' }, 66207753Smm { "force", no_argument, NULL, 'f' }, 67207753Smm { "stdout", no_argument, NULL, 'c' }, 68207753Smm { "to-stdout", no_argument, NULL, 'c' }, 69207753Smm { "no-sparse", no_argument, NULL, OPT_NO_SPARSE }, 70207753Smm { "suffix", required_argument, NULL, 'S' }, 71207753Smm // { "recursive", no_argument, NULL, 'r' }, // TODO 72207753Smm { "files", optional_argument, NULL, OPT_FILES }, 73207753Smm { "files0", optional_argument, NULL, OPT_FILES0 }, 74207753Smm 75207753Smm // Basic compression settings 76207753Smm { "format", required_argument, NULL, 'F' }, 77207753Smm { "check", required_argument, NULL, 'C' }, 78207753Smm { "memory", required_argument, NULL, 'M' }, 79207753Smm { "threads", required_argument, NULL, 'T' }, 80207753Smm 81207753Smm { "extreme", no_argument, NULL, 'e' }, 82207753Smm { "fast", no_argument, NULL, '0' }, 83207753Smm { "best", no_argument, NULL, '9' }, 84207753Smm 85207753Smm // Filters 86207753Smm { "lzma1", optional_argument, NULL, OPT_LZMA1 }, 87207753Smm { "lzma2", optional_argument, NULL, OPT_LZMA2 }, 88207753Smm { "x86", optional_argument, NULL, OPT_X86 }, 89207753Smm { "powerpc", optional_argument, NULL, OPT_POWERPC }, 90207753Smm { "ia64", optional_argument, NULL, OPT_IA64 }, 91207753Smm { "arm", optional_argument, NULL, OPT_ARM }, 92207753Smm { "armthumb", optional_argument, NULL, OPT_ARMTHUMB }, 93207753Smm { "sparc", optional_argument, NULL, OPT_SPARC }, 94207753Smm { "delta", optional_argument, NULL, OPT_DELTA }, 95207753Smm { "subblock", optional_argument, NULL, OPT_SUBBLOCK }, 96207753Smm 97207753Smm // Other options 98207753Smm { "quiet", no_argument, NULL, 'q' }, 99207753Smm { "verbose", no_argument, NULL, 'v' }, 100207753Smm { "no-warn", no_argument, NULL, 'Q' }, 101207753Smm { "robot", no_argument, NULL, OPT_ROBOT }, 102207753Smm { "info-memory", no_argument, NULL, OPT_INFO_MEMORY }, 103207753Smm { "help", no_argument, NULL, 'h' }, 104207753Smm { "long-help", no_argument, NULL, 'H' }, 105207753Smm { "version", no_argument, NULL, 'V' }, 106207753Smm 107207753Smm { NULL, 0, NULL, 0 } 108207753Smm }; 109207753Smm 110207753Smm int c; 111207753Smm 112207753Smm while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) 113207753Smm != -1) { 114207753Smm switch (c) { 115207753Smm // Compression preset (also for decompression if --format=raw) 116207753Smm case '0': case '1': case '2': case '3': case '4': 117207753Smm case '5': case '6': case '7': case '8': case '9': 118207753Smm coder_set_preset(c - '0'); 119207753Smm break; 120207753Smm 121207753Smm // --memory 122207753Smm case 'M': { 123207753Smm // Support specifying the limit as a percentage of 124207753Smm // installed physical RAM. 125207753Smm size_t len = strlen(optarg); 126207753Smm if (len > 0 && optarg[len - 1] == '%') { 127207753Smm optarg[len - 1] = '\0'; 128207753Smm hardware_memlimit_set_percentage( 129207753Smm str_to_uint64( 130207753Smm "memory%", optarg, 1, 100)); 131207753Smm } else { 132207753Smm // On 32-bit systems, SIZE_MAX would make more 133207753Smm // sense than UINT64_MAX. But use UINT64_MAX 134207753Smm // still so that scripts that assume > 4 GiB 135207753Smm // values don't break. 136207753Smm hardware_memlimit_set(str_to_uint64( 137207753Smm "memory", optarg, 138207753Smm 0, UINT64_MAX)); 139207753Smm } 140207753Smm 141207753Smm break; 142207753Smm } 143207753Smm 144207753Smm // --suffix 145207753Smm case 'S': 146207753Smm suffix_set(optarg); 147207753Smm break; 148207753Smm 149207753Smm case 'T': 150207753Smm hardware_threadlimit_set(str_to_uint64( 151207753Smm "threads", optarg, 0, UINT32_MAX)); 152207753Smm break; 153207753Smm 154207753Smm // --version 155207753Smm case 'V': 156207753Smm // This doesn't return. 157207753Smm message_version(); 158207753Smm 159207753Smm // --stdout 160207753Smm case 'c': 161207753Smm opt_stdout = true; 162207753Smm break; 163207753Smm 164207753Smm // --decompress 165207753Smm case 'd': 166207753Smm opt_mode = MODE_DECOMPRESS; 167207753Smm break; 168207753Smm 169207753Smm // --extreme 170207753Smm case 'e': 171207753Smm coder_set_extreme(); 172207753Smm break; 173207753Smm 174207753Smm // --force 175207753Smm case 'f': 176207753Smm opt_force = true; 177207753Smm break; 178207753Smm 179207753Smm // --info-memory 180207753Smm case OPT_INFO_MEMORY: 181207753Smm // This doesn't return. 182207753Smm message_memlimit(); 183207753Smm 184207753Smm // --help 185207753Smm case 'h': 186207753Smm // This doesn't return. 187207753Smm message_help(false); 188207753Smm 189207753Smm // --long-help 190207753Smm case 'H': 191207753Smm // This doesn't return. 192207753Smm message_help(true); 193207753Smm 194207753Smm // --list 195207753Smm case 'l': 196207753Smm opt_mode = MODE_LIST; 197207753Smm break; 198207753Smm 199207753Smm // --keep 200207753Smm case 'k': 201207753Smm opt_keep_original = true; 202207753Smm break; 203207753Smm 204207753Smm // --quiet 205207753Smm case 'q': 206207753Smm message_verbosity_decrease(); 207207753Smm break; 208207753Smm 209207753Smm case 'Q': 210207753Smm set_exit_no_warn(); 211207753Smm break; 212207753Smm 213207753Smm case 't': 214207753Smm opt_mode = MODE_TEST; 215207753Smm break; 216207753Smm 217207753Smm // --verbose 218207753Smm case 'v': 219207753Smm message_verbosity_increase(); 220207753Smm break; 221207753Smm 222207753Smm // --robot 223207753Smm case OPT_ROBOT: 224207753Smm opt_robot = true; 225207753Smm 226207753Smm // This is to make sure that floating point numbers 227207753Smm // always have a dot as decimal separator. 228207753Smm setlocale(LC_NUMERIC, "C"); 229207753Smm break; 230207753Smm 231207753Smm case 'z': 232207753Smm opt_mode = MODE_COMPRESS; 233207753Smm break; 234207753Smm 235207753Smm // Filter setup 236207753Smm 237207753Smm case OPT_SUBBLOCK: 238207753Smm coder_add_filter(LZMA_FILTER_SUBBLOCK, 239207753Smm options_subblock(optarg)); 240207753Smm break; 241207753Smm 242207753Smm case OPT_X86: 243207753Smm coder_add_filter(LZMA_FILTER_X86, 244207753Smm options_bcj(optarg)); 245207753Smm break; 246207753Smm 247207753Smm case OPT_POWERPC: 248207753Smm coder_add_filter(LZMA_FILTER_POWERPC, 249207753Smm options_bcj(optarg)); 250207753Smm break; 251207753Smm 252207753Smm case OPT_IA64: 253207753Smm coder_add_filter(LZMA_FILTER_IA64, 254207753Smm options_bcj(optarg)); 255207753Smm break; 256207753Smm 257207753Smm case OPT_ARM: 258207753Smm coder_add_filter(LZMA_FILTER_ARM, 259207753Smm options_bcj(optarg)); 260207753Smm break; 261207753Smm 262207753Smm case OPT_ARMTHUMB: 263207753Smm coder_add_filter(LZMA_FILTER_ARMTHUMB, 264207753Smm options_bcj(optarg)); 265207753Smm break; 266207753Smm 267207753Smm case OPT_SPARC: 268207753Smm coder_add_filter(LZMA_FILTER_SPARC, 269207753Smm options_bcj(optarg)); 270207753Smm break; 271207753Smm 272207753Smm case OPT_DELTA: 273207753Smm coder_add_filter(LZMA_FILTER_DELTA, 274207753Smm options_delta(optarg)); 275207753Smm break; 276207753Smm 277207753Smm case OPT_LZMA1: 278207753Smm coder_add_filter(LZMA_FILTER_LZMA1, 279207753Smm options_lzma(optarg)); 280207753Smm break; 281207753Smm 282207753Smm case OPT_LZMA2: 283207753Smm coder_add_filter(LZMA_FILTER_LZMA2, 284207753Smm options_lzma(optarg)); 285207753Smm break; 286207753Smm 287207753Smm // Other 288207753Smm 289207753Smm // --format 290207753Smm case 'F': { 291207753Smm // Just in case, support both "lzma" and "alone" since 292207753Smm // the latter was used for forward compatibility in 293207753Smm // LZMA Utils 4.32.x. 294207753Smm static const struct { 295207753Smm char str[8]; 296207753Smm enum format_type format; 297207753Smm } types[] = { 298207753Smm { "auto", FORMAT_AUTO }, 299207753Smm { "xz", FORMAT_XZ }, 300207753Smm { "lzma", FORMAT_LZMA }, 301207753Smm { "alone", FORMAT_LZMA }, 302207753Smm // { "gzip", FORMAT_GZIP }, 303207753Smm // { "gz", FORMAT_GZIP }, 304207753Smm { "raw", FORMAT_RAW }, 305207753Smm }; 306207753Smm 307207753Smm size_t i = 0; 308207753Smm while (strcmp(types[i].str, optarg) != 0) 309207753Smm if (++i == ARRAY_SIZE(types)) 310207753Smm message_fatal(_("%s: Unknown file " 311207753Smm "format type"), 312207753Smm optarg); 313207753Smm 314207753Smm opt_format = types[i].format; 315207753Smm break; 316207753Smm } 317207753Smm 318207753Smm // --check 319207753Smm case 'C': { 320207753Smm static const struct { 321207753Smm char str[8]; 322207753Smm lzma_check check; 323207753Smm } types[] = { 324207753Smm { "none", LZMA_CHECK_NONE }, 325207753Smm { "crc32", LZMA_CHECK_CRC32 }, 326207753Smm { "crc64", LZMA_CHECK_CRC64 }, 327207753Smm { "sha256", LZMA_CHECK_SHA256 }, 328207753Smm }; 329207753Smm 330207753Smm size_t i = 0; 331207753Smm while (strcmp(types[i].str, optarg) != 0) { 332207753Smm if (++i == ARRAY_SIZE(types)) 333207753Smm message_fatal(_("%s: Unsupported " 334207753Smm "integrity " 335207753Smm "check type"), optarg); 336207753Smm } 337207753Smm 338207753Smm // Use a separate check in case we are using different 339207753Smm // liblzma than what was used to compile us. 340207753Smm if (!lzma_check_is_supported(types[i].check)) 341207753Smm message_fatal(_("%s: Unsupported integrity " 342207753Smm "check type"), optarg); 343207753Smm 344207753Smm coder_set_check(types[i].check); 345207753Smm break; 346207753Smm } 347207753Smm 348207753Smm case OPT_NO_SPARSE: 349207753Smm io_no_sparse(); 350207753Smm break; 351207753Smm 352207753Smm case OPT_FILES: 353207753Smm args->files_delim = '\n'; 354207753Smm 355207753Smm // Fall through 356207753Smm 357207753Smm case OPT_FILES0: 358207753Smm if (args->files_name != NULL) 359207753Smm message_fatal(_("Only one file can be " 360207753Smm "specified with `--files' " 361207753Smm "or `--files0'.")); 362207753Smm 363207753Smm if (optarg == NULL) { 364207753Smm args->files_name = (char *)stdin_filename; 365207753Smm args->files_file = stdin; 366207753Smm } else { 367207753Smm args->files_name = optarg; 368207753Smm args->files_file = fopen(optarg, 369207753Smm c == OPT_FILES ? "r" : "rb"); 370207753Smm if (args->files_file == NULL) 371207753Smm message_fatal("%s: %s", optarg, 372207753Smm strerror(errno)); 373207753Smm } 374207753Smm 375207753Smm break; 376207753Smm 377207753Smm default: 378207753Smm message_try_help(); 379207753Smm tuklib_exit(E_ERROR, E_ERROR, false); 380207753Smm } 381207753Smm } 382207753Smm 383207753Smm return; 384207753Smm} 385207753Smm 386207753Smm 387207753Smmstatic void 388207753Smmparse_environment(args_info *args, char *argv0) 389207753Smm{ 390207753Smm char *env = getenv("XZ_OPT"); 391207753Smm if (env == NULL) 392207753Smm return; 393207753Smm 394207753Smm // We modify the string, so make a copy of it. 395207753Smm env = xstrdup(env); 396207753Smm 397207753Smm // Calculate the number of arguments in env. argc stats at one 398207753Smm // to include space for the program name. 399207753Smm int argc = 1; 400207753Smm bool prev_was_space = true; 401207753Smm for (size_t i = 0; env[i] != '\0'; ++i) { 402207753Smm // NOTE: Cast to unsigned char is needed so that correct 403207753Smm // value gets passed to isspace(), which expects 404207753Smm // unsigned char cast to int. Casting to int is done 405207753Smm // automatically due to integer promotion, but we need to 406207753Smm // force char to unsigned char manually. Otherwise 8-bit 407207753Smm // characters would get promoted to wrong value if 408207753Smm // char is signed. 409207753Smm if (isspace((unsigned char)env[i])) { 410207753Smm prev_was_space = true; 411207753Smm } else if (prev_was_space) { 412207753Smm prev_was_space = false; 413207753Smm 414207753Smm // Keep argc small enough to fit into a singed int 415207753Smm // and to keep it usable for memory allocation. 416207753Smm if (++argc == MIN(INT_MAX, SIZE_MAX / sizeof(char *))) 417207753Smm message_fatal(_("The environment variable " 418207753Smm "XZ_OPT contains too many " 419207753Smm "arguments")); 420207753Smm } 421207753Smm } 422207753Smm 423207753Smm // Allocate memory to hold pointers to the arguments. Add one to get 424207753Smm // space for the terminating NULL (if some systems happen to need it). 425207753Smm char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *)); 426207753Smm argv[0] = argv0; 427207753Smm argv[argc] = NULL; 428207753Smm 429207753Smm // Go through the string again. Split the arguments using '\0' 430207753Smm // characters and add pointers to the resulting strings to argv. 431207753Smm argc = 1; 432207753Smm prev_was_space = true; 433207753Smm for (size_t i = 0; env[i] != '\0'; ++i) { 434207753Smm if (isspace((unsigned char)env[i])) { 435207753Smm prev_was_space = true; 436207753Smm env[i] = '\0'; 437207753Smm } else if (prev_was_space) { 438207753Smm prev_was_space = false; 439207753Smm argv[argc++] = env + i; 440207753Smm } 441207753Smm } 442207753Smm 443207753Smm // Parse the argument list we got from the environment. All non-option 444207753Smm // arguments i.e. filenames are ignored. 445207753Smm parse_real(args, argc, argv); 446207753Smm 447207753Smm // Reset the state of the getopt_long() so that we can parse the 448207753Smm // command line options too. There are two incompatible ways to 449207753Smm // do it. 450207753Smm#ifdef HAVE_OPTRESET 451207753Smm // BSD 452207753Smm optind = 1; 453207753Smm optreset = 1; 454207753Smm#else 455207753Smm // GNU, Solaris 456207753Smm optind = 0; 457207753Smm#endif 458207753Smm 459207753Smm // We don't need the argument list from environment anymore. 460207753Smm free(argv); 461207753Smm free(env); 462207753Smm 463207753Smm return; 464207753Smm} 465207753Smm 466207753Smm 467207753Smmextern void 468207753Smmargs_parse(args_info *args, int argc, char **argv) 469207753Smm{ 470207753Smm // Initialize those parts of *args that we need later. 471207753Smm args->files_name = NULL; 472207753Smm args->files_file = NULL; 473207753Smm args->files_delim = '\0'; 474207753Smm 475207753Smm // Check how we were called. 476207753Smm { 477207753Smm // Remove the leading path name, if any. 478207753Smm const char *name = strrchr(argv[0], '/'); 479207753Smm if (name == NULL) 480207753Smm name = argv[0]; 481207753Smm else 482207753Smm ++name; 483207753Smm 484207753Smm // NOTE: It's possible that name[0] is now '\0' if argv[0] 485207753Smm // is weird, but it doesn't matter here. 486207753Smm 487207753Smm // Look for full command names instead of substrings like 488207753Smm // "un", "cat", and "lz" to reduce possibility of false 489207753Smm // positives when the programs have been renamed. 490207753Smm if (strstr(name, "xzcat") != NULL) { 491207753Smm opt_mode = MODE_DECOMPRESS; 492207753Smm opt_stdout = true; 493207753Smm } else if (strstr(name, "unxz") != NULL) { 494207753Smm opt_mode = MODE_DECOMPRESS; 495207753Smm } else if (strstr(name, "lzcat") != NULL) { 496207753Smm opt_format = FORMAT_LZMA; 497207753Smm opt_mode = MODE_DECOMPRESS; 498207753Smm opt_stdout = true; 499207753Smm } else if (strstr(name, "unlzma") != NULL) { 500207753Smm opt_format = FORMAT_LZMA; 501207753Smm opt_mode = MODE_DECOMPRESS; 502207753Smm } else if (strstr(name, "lzma") != NULL) { 503207753Smm opt_format = FORMAT_LZMA; 504207753Smm } 505207753Smm } 506207753Smm 507207753Smm // First the flags from environment 508207753Smm parse_environment(args, argv[0]); 509207753Smm 510207753Smm // Then from the command line 511207753Smm parse_real(args, argc, argv); 512207753Smm 513207753Smm // Never remove the source file when the destination is not on disk. 514207753Smm // In test mode the data is written nowhere, but setting opt_stdout 515207753Smm // will make the rest of the code behave well. 516207753Smm if (opt_stdout || opt_mode == MODE_TEST) { 517207753Smm opt_keep_original = true; 518207753Smm opt_stdout = true; 519207753Smm } 520207753Smm 521207753Smm // When compressing, if no --format flag was used, or it 522207753Smm // was --format=auto, we compress to the .xz format. 523207753Smm if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO) 524207753Smm opt_format = FORMAT_XZ; 525207753Smm 526207753Smm // Compression settings need to be validated (options themselves and 527207753Smm // their memory usage) when compressing to any file format. It has to 528207753Smm // be done also when uncompressing raw data, since for raw decoding 529207753Smm // the options given on the command line are used to know what kind 530207753Smm // of raw data we are supposed to decode. 531207753Smm if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW) 532207753Smm coder_set_compression_settings(); 533207753Smm 534207753Smm // If no filenames are given, use stdin. 535207753Smm if (argv[optind] == NULL && args->files_name == NULL) { 536207753Smm // We don't modify or free() the "-" constant. The caller 537207753Smm // modifies this so don't make the struct itself const. 538207753Smm static char *names_stdin[2] = { (char *)"-", NULL }; 539207753Smm args->arg_names = names_stdin; 540207753Smm args->arg_count = 1; 541207753Smm } else { 542207753Smm // We got at least one filename from the command line, or 543207753Smm // --files or --files0 was specified. 544207753Smm args->arg_names = argv + optind; 545207753Smm args->arg_count = argc - optind; 546207753Smm } 547207753Smm 548207753Smm return; 549207753Smm} 550