args.c revision 292588
1///////////////////////////////////////////////////////////////////////////////
2//
3/// \file       args.c
4/// \brief      Argument parsing
5///
6/// \note       Filter-specific options parsing is in options.c.
7//
8//  Author:     Lasse Collin
9//
10//  This file has been put into the public domain.
11//  You can do whatever you want with this file.
12//
13///////////////////////////////////////////////////////////////////////////////
14
15#include "private.h"
16
17#include "getopt.h"
18#include <ctype.h>
19
20
21bool opt_stdout = false;
22bool opt_force = false;
23bool opt_keep_original = false;
24bool opt_robot = false;
25bool opt_ignore_check = false;
26
27// We don't modify or free() this, but we need to assign it in some
28// non-const pointers.
29const char stdin_filename[] = "(stdin)";
30
31
32/// Parse and set the memory usage limit for compression and/or decompression.
33static void
34parse_memlimit(const char *name, const char *name_percentage, char *str,
35		bool set_compress, bool set_decompress)
36{
37	bool is_percentage = false;
38	uint64_t value;
39
40	const size_t len = strlen(str);
41	if (len > 0 && str[len - 1] == '%') {
42		str[len - 1] = '\0';
43		is_percentage = true;
44		value = str_to_uint64(name_percentage, str, 1, 100);
45	} else {
46		// On 32-bit systems, SIZE_MAX would make more sense than
47		// UINT64_MAX. But use UINT64_MAX still so that scripts
48		// that assume > 4 GiB values don't break.
49		value = str_to_uint64(name, str, 0, UINT64_MAX);
50	}
51
52	hardware_memlimit_set(
53			value, set_compress, set_decompress, is_percentage);
54	return;
55}
56
57
58static void
59parse_block_list(char *str)
60{
61	// It must be non-empty and not begin with a comma.
62	if (str[0] == '\0' || str[0] == ',')
63		message_fatal(_("%s: Invalid argument to --block-list"), str);
64
65	// Count the number of comma-separated strings.
66	size_t count = 1;
67	for (size_t i = 0; str[i] != '\0'; ++i)
68		if (str[i] == ',')
69			++count;
70
71	// Prevent an unlikely integer overflow.
72	if (count > SIZE_MAX / sizeof(uint64_t) - 1)
73		message_fatal(_("%s: Too many arguments to --block-list"),
74				str);
75
76	// Allocate memory to hold all the sizes specified.
77	// If --block-list was specified already, its value is forgotten.
78	free(opt_block_list);
79	opt_block_list = xmalloc((count + 1) * sizeof(uint64_t));
80
81	for (size_t i = 0; i < count; ++i) {
82		// Locate the next comma and replace it with \0.
83		char *p = strchr(str, ',');
84		if (p != NULL)
85			*p = '\0';
86
87		if (str[0] == '\0') {
88			// There is no string, that is, a comma follows
89			// another comma. Use the previous value.
90			//
91			// NOTE: We checked earler that the first char
92			// of the whole list cannot be a comma.
93			assert(i > 0);
94			opt_block_list[i] = opt_block_list[i - 1];
95		} else {
96			opt_block_list[i] = str_to_uint64("block-list", str,
97					0, UINT64_MAX);
98
99			// Zero indicates no more new Blocks.
100			if (opt_block_list[i] == 0) {
101				if (i + 1 != count)
102					message_fatal(_("0 can only be used "
103							"as the last element "
104							"in --block-list"));
105
106				opt_block_list[i] = UINT64_MAX;
107			}
108		}
109
110		str = p + 1;
111	}
112
113	// Terminate the array.
114	opt_block_list[count] = 0;
115	return;
116}
117
118
119static void
120parse_real(args_info *args, int argc, char **argv)
121{
122	enum {
123		OPT_X86 = INT_MIN,
124		OPT_POWERPC,
125		OPT_IA64,
126		OPT_ARM,
127		OPT_ARMTHUMB,
128		OPT_SPARC,
129		OPT_DELTA,
130		OPT_LZMA1,
131		OPT_LZMA2,
132
133		OPT_SINGLE_STREAM,
134		OPT_NO_SPARSE,
135		OPT_FILES,
136		OPT_FILES0,
137		OPT_BLOCK_SIZE,
138		OPT_BLOCK_LIST,
139		OPT_MEM_COMPRESS,
140		OPT_MEM_DECOMPRESS,
141		OPT_NO_ADJUST,
142		OPT_INFO_MEMORY,
143		OPT_ROBOT,
144		OPT_FLUSH_TIMEOUT,
145		OPT_IGNORE_CHECK,
146	};
147
148	static const char short_opts[]
149			= "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
150
151	static const struct option long_opts[] = {
152		// Operation mode
153		{ "compress",     no_argument,       NULL,  'z' },
154		{ "decompress",   no_argument,       NULL,  'd' },
155		{ "uncompress",   no_argument,       NULL,  'd' },
156		{ "test",         no_argument,       NULL,  't' },
157		{ "list",         no_argument,       NULL,  'l' },
158
159		// Operation modifiers
160		{ "keep",         no_argument,       NULL,  'k' },
161		{ "force",        no_argument,       NULL,  'f' },
162		{ "stdout",       no_argument,       NULL,  'c' },
163		{ "to-stdout",    no_argument,       NULL,  'c' },
164		{ "single-stream", no_argument,      NULL,  OPT_SINGLE_STREAM },
165		{ "no-sparse",    no_argument,       NULL,  OPT_NO_SPARSE },
166		{ "suffix",       required_argument, NULL,  'S' },
167		// { "recursive",      no_argument,       NULL,  'r' }, // TODO
168		{ "files",        optional_argument, NULL,  OPT_FILES },
169		{ "files0",       optional_argument, NULL,  OPT_FILES0 },
170
171		// Basic compression settings
172		{ "format",       required_argument, NULL,  'F' },
173		{ "check",        required_argument, NULL,  'C' },
174		{ "ignore-check", no_argument,       NULL,  OPT_IGNORE_CHECK },
175		{ "block-size",   required_argument, NULL,  OPT_BLOCK_SIZE },
176		{ "block-list",  required_argument, NULL,  OPT_BLOCK_LIST },
177		{ "memlimit-compress",   required_argument, NULL, OPT_MEM_COMPRESS },
178		{ "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
179		{ "memlimit",     required_argument, NULL,  'M' },
180		{ "memory",       required_argument, NULL,  'M' }, // Old alias
181		{ "no-adjust",    no_argument,       NULL,  OPT_NO_ADJUST },
182		{ "threads",      required_argument, NULL,  'T' },
183		{ "flush-timeout", required_argument, NULL, OPT_FLUSH_TIMEOUT },
184
185		{ "extreme",      no_argument,       NULL,  'e' },
186		{ "fast",         no_argument,       NULL,  '0' },
187		{ "best",         no_argument,       NULL,  '9' },
188
189		// Filters
190		{ "lzma1",        optional_argument, NULL,  OPT_LZMA1 },
191		{ "lzma2",        optional_argument, NULL,  OPT_LZMA2 },
192		{ "x86",          optional_argument, NULL,  OPT_X86 },
193		{ "powerpc",      optional_argument, NULL,  OPT_POWERPC },
194		{ "ia64",         optional_argument, NULL,  OPT_IA64 },
195		{ "arm",          optional_argument, NULL,  OPT_ARM },
196		{ "armthumb",     optional_argument, NULL,  OPT_ARMTHUMB },
197		{ "sparc",        optional_argument, NULL,  OPT_SPARC },
198		{ "delta",        optional_argument, NULL,  OPT_DELTA },
199
200		// Other options
201		{ "quiet",        no_argument,       NULL,  'q' },
202		{ "verbose",      no_argument,       NULL,  'v' },
203		{ "no-warn",      no_argument,       NULL,  'Q' },
204		{ "robot",        no_argument,       NULL,  OPT_ROBOT },
205		{ "info-memory",  no_argument,       NULL,  OPT_INFO_MEMORY },
206		{ "help",         no_argument,       NULL,  'h' },
207		{ "long-help",    no_argument,       NULL,  'H' },
208		{ "version",      no_argument,       NULL,  'V' },
209
210		{ NULL,           0,                 NULL,   0 }
211	};
212
213	int c;
214
215	while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
216			!= -1) {
217		switch (c) {
218		// Compression preset (also for decompression if --format=raw)
219		case '0': case '1': case '2': case '3': case '4':
220		case '5': case '6': case '7': case '8': case '9':
221			coder_set_preset(c - '0');
222			break;
223
224		// --memlimit-compress
225		case OPT_MEM_COMPRESS:
226			parse_memlimit("memlimit-compress",
227					"memlimit-compress%", optarg,
228					true, false);
229			break;
230
231		// --memlimit-decompress
232		case OPT_MEM_DECOMPRESS:
233			parse_memlimit("memlimit-decompress",
234					"memlimit-decompress%", optarg,
235					false, true);
236			break;
237
238		// --memlimit
239		case 'M':
240			parse_memlimit("memlimit", "memlimit%", optarg,
241					true, true);
242			break;
243
244		// --suffix
245		case 'S':
246			suffix_set(optarg);
247			break;
248
249		case 'T':
250			// The max is from src/liblzma/common/common.h.
251			hardware_threads_set(str_to_uint64("threads",
252					optarg, 0, 16384));
253			break;
254
255		// --version
256		case 'V':
257			// This doesn't return.
258			message_version();
259
260		// --stdout
261		case 'c':
262			opt_stdout = true;
263			break;
264
265		// --decompress
266		case 'd':
267			opt_mode = MODE_DECOMPRESS;
268			break;
269
270		// --extreme
271		case 'e':
272			coder_set_extreme();
273			break;
274
275		// --force
276		case 'f':
277			opt_force = true;
278			break;
279
280		// --info-memory
281		case OPT_INFO_MEMORY:
282			// This doesn't return.
283			hardware_memlimit_show();
284
285		// --help
286		case 'h':
287			// This doesn't return.
288			message_help(false);
289
290		// --long-help
291		case 'H':
292			// This doesn't return.
293			message_help(true);
294
295		// --list
296		case 'l':
297			opt_mode = MODE_LIST;
298			break;
299
300		// --keep
301		case 'k':
302			opt_keep_original = true;
303			break;
304
305		// --quiet
306		case 'q':
307			message_verbosity_decrease();
308			break;
309
310		case 'Q':
311			set_exit_no_warn();
312			break;
313
314		case 't':
315			opt_mode = MODE_TEST;
316			break;
317
318		// --verbose
319		case 'v':
320			message_verbosity_increase();
321			break;
322
323		// --robot
324		case OPT_ROBOT:
325			opt_robot = true;
326
327			// This is to make sure that floating point numbers
328			// always have a dot as decimal separator.
329			setlocale(LC_NUMERIC, "C");
330			break;
331
332		case 'z':
333			opt_mode = MODE_COMPRESS;
334			break;
335
336		// Filter setup
337
338		case OPT_X86:
339			coder_add_filter(LZMA_FILTER_X86,
340					options_bcj(optarg));
341			break;
342
343		case OPT_POWERPC:
344			coder_add_filter(LZMA_FILTER_POWERPC,
345					options_bcj(optarg));
346			break;
347
348		case OPT_IA64:
349			coder_add_filter(LZMA_FILTER_IA64,
350					options_bcj(optarg));
351			break;
352
353		case OPT_ARM:
354			coder_add_filter(LZMA_FILTER_ARM,
355					options_bcj(optarg));
356			break;
357
358		case OPT_ARMTHUMB:
359			coder_add_filter(LZMA_FILTER_ARMTHUMB,
360					options_bcj(optarg));
361			break;
362
363		case OPT_SPARC:
364			coder_add_filter(LZMA_FILTER_SPARC,
365					options_bcj(optarg));
366			break;
367
368		case OPT_DELTA:
369			coder_add_filter(LZMA_FILTER_DELTA,
370					options_delta(optarg));
371			break;
372
373		case OPT_LZMA1:
374			coder_add_filter(LZMA_FILTER_LZMA1,
375					options_lzma(optarg));
376			break;
377
378		case OPT_LZMA2:
379			coder_add_filter(LZMA_FILTER_LZMA2,
380					options_lzma(optarg));
381			break;
382
383		// Other
384
385		// --format
386		case 'F': {
387			// Just in case, support both "lzma" and "alone" since
388			// the latter was used for forward compatibility in
389			// LZMA Utils 4.32.x.
390			static const struct {
391				char str[8];
392				enum format_type format;
393			} types[] = {
394				{ "auto",   FORMAT_AUTO },
395				{ "xz",     FORMAT_XZ },
396				{ "lzma",   FORMAT_LZMA },
397				{ "alone",  FORMAT_LZMA },
398				// { "gzip",   FORMAT_GZIP },
399				// { "gz",     FORMAT_GZIP },
400				{ "raw",    FORMAT_RAW },
401			};
402
403			size_t i = 0;
404			while (strcmp(types[i].str, optarg) != 0)
405				if (++i == ARRAY_SIZE(types))
406					message_fatal(_("%s: Unknown file "
407							"format type"),
408							optarg);
409
410			opt_format = types[i].format;
411			break;
412		}
413
414		// --check
415		case 'C': {
416			static const struct {
417				char str[8];
418				lzma_check check;
419			} types[] = {
420				{ "none",   LZMA_CHECK_NONE },
421				{ "crc32",  LZMA_CHECK_CRC32 },
422				{ "crc64",  LZMA_CHECK_CRC64 },
423				{ "sha256", LZMA_CHECK_SHA256 },
424			};
425
426			size_t i = 0;
427			while (strcmp(types[i].str, optarg) != 0) {
428				if (++i == ARRAY_SIZE(types))
429					message_fatal(_("%s: Unsupported "
430							"integrity "
431							"check type"), optarg);
432			}
433
434			// Use a separate check in case we are using different
435			// liblzma than what was used to compile us.
436			if (!lzma_check_is_supported(types[i].check))
437				message_fatal(_("%s: Unsupported integrity "
438						"check type"), optarg);
439
440			coder_set_check(types[i].check);
441			break;
442		}
443
444		case OPT_IGNORE_CHECK:
445			opt_ignore_check = true;
446			break;
447
448		case OPT_BLOCK_SIZE:
449			opt_block_size = str_to_uint64("block-size", optarg,
450					0, LZMA_VLI_MAX);
451			break;
452
453		case OPT_BLOCK_LIST: {
454			parse_block_list(optarg);
455			break;
456		}
457
458		case OPT_SINGLE_STREAM:
459			opt_single_stream = true;
460			break;
461
462		case OPT_NO_SPARSE:
463			io_no_sparse();
464			break;
465
466		case OPT_FILES:
467			args->files_delim = '\n';
468
469		// Fall through
470
471		case OPT_FILES0:
472			if (args->files_name != NULL)
473				message_fatal(_("Only one file can be "
474						"specified with `--files' "
475						"or `--files0'."));
476
477			if (optarg == NULL) {
478				args->files_name = (char *)stdin_filename;
479				args->files_file = stdin;
480			} else {
481				args->files_name = optarg;
482				args->files_file = fopen(optarg,
483						c == OPT_FILES ? "r" : "rb");
484				if (args->files_file == NULL)
485					message_fatal("%s: %s", optarg,
486							strerror(errno));
487			}
488
489			break;
490
491		case OPT_NO_ADJUST:
492			opt_auto_adjust = false;
493			break;
494
495		case OPT_FLUSH_TIMEOUT:
496			opt_flush_timeout = str_to_uint64("flush-timeout",
497					optarg, 0, UINT64_MAX);
498			break;
499
500		default:
501			message_try_help();
502			tuklib_exit(E_ERROR, E_ERROR, false);
503		}
504	}
505
506	return;
507}
508
509
510static void
511parse_environment(args_info *args, char *argv0, const char *varname)
512{
513	char *env = getenv(varname);
514	if (env == NULL)
515		return;
516
517	// We modify the string, so make a copy of it.
518	env = xstrdup(env);
519
520	// Calculate the number of arguments in env. argc stats at one
521	// to include space for the program name.
522	int argc = 1;
523	bool prev_was_space = true;
524	for (size_t i = 0; env[i] != '\0'; ++i) {
525		// NOTE: Cast to unsigned char is needed so that correct
526		// value gets passed to isspace(), which expects
527		// unsigned char cast to int. Casting to int is done
528		// automatically due to integer promotion, but we need to
529		// force char to unsigned char manually. Otherwise 8-bit
530		// characters would get promoted to wrong value if
531		// char is signed.
532		if (isspace((unsigned char)env[i])) {
533			prev_was_space = true;
534		} else if (prev_was_space) {
535			prev_was_space = false;
536
537			// Keep argc small enough to fit into a signed int
538			// and to keep it usable for memory allocation.
539			if (++argc == my_min(
540					INT_MAX, SIZE_MAX / sizeof(char *)))
541				message_fatal(_("The environment variable "
542						"%s contains too many "
543						"arguments"), varname);
544		}
545	}
546
547	// Allocate memory to hold pointers to the arguments. Add one to get
548	// space for the terminating NULL (if some systems happen to need it).
549	char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
550	argv[0] = argv0;
551	argv[argc] = NULL;
552
553	// Go through the string again. Split the arguments using '\0'
554	// characters and add pointers to the resulting strings to argv.
555	argc = 1;
556	prev_was_space = true;
557	for (size_t i = 0; env[i] != '\0'; ++i) {
558		if (isspace((unsigned char)env[i])) {
559			prev_was_space = true;
560			env[i] = '\0';
561		} else if (prev_was_space) {
562			prev_was_space = false;
563			argv[argc++] = env + i;
564		}
565	}
566
567	// Parse the argument list we got from the environment. All non-option
568	// arguments i.e. filenames are ignored.
569	parse_real(args, argc, argv);
570
571	// Reset the state of the getopt_long() so that we can parse the
572	// command line options too. There are two incompatible ways to
573	// do it.
574#ifdef HAVE_OPTRESET
575	// BSD
576	optind = 1;
577	optreset = 1;
578#else
579	// GNU, Solaris
580	optind = 0;
581#endif
582
583	// We don't need the argument list from environment anymore.
584	free(argv);
585	free(env);
586
587	return;
588}
589
590
591extern void
592args_parse(args_info *args, int argc, char **argv)
593{
594	// Initialize those parts of *args that we need later.
595	args->files_name = NULL;
596	args->files_file = NULL;
597	args->files_delim = '\0';
598
599	// Check how we were called.
600	{
601		// Remove the leading path name, if any.
602		const char *name = strrchr(argv[0], '/');
603		if (name == NULL)
604			name = argv[0];
605		else
606			++name;
607
608		// NOTE: It's possible that name[0] is now '\0' if argv[0]
609		// is weird, but it doesn't matter here.
610
611		// Look for full command names instead of substrings like
612		// "un", "cat", and "lz" to reduce possibility of false
613		// positives when the programs have been renamed.
614		if (strstr(name, "xzcat") != NULL) {
615			opt_mode = MODE_DECOMPRESS;
616			opt_stdout = true;
617		} else if (strstr(name, "unxz") != NULL) {
618			opt_mode = MODE_DECOMPRESS;
619		} else if (strstr(name, "lzcat") != NULL) {
620			opt_format = FORMAT_LZMA;
621			opt_mode = MODE_DECOMPRESS;
622			opt_stdout = true;
623		} else if (strstr(name, "unlzma") != NULL) {
624			opt_format = FORMAT_LZMA;
625			opt_mode = MODE_DECOMPRESS;
626		} else if (strstr(name, "lzma") != NULL) {
627			opt_format = FORMAT_LZMA;
628		}
629	}
630
631	// First the flags from the environment
632	parse_environment(args, argv[0], "XZ_DEFAULTS");
633	parse_environment(args, argv[0], "XZ_OPT");
634
635	// Then from the command line
636	parse_real(args, argc, argv);
637
638	// Never remove the source file when the destination is not on disk.
639	// In test mode the data is written nowhere, but setting opt_stdout
640	// will make the rest of the code behave well.
641	if (opt_stdout || opt_mode == MODE_TEST) {
642		opt_keep_original = true;
643		opt_stdout = true;
644	}
645
646	// When compressing, if no --format flag was used, or it
647	// was --format=auto, we compress to the .xz format.
648	if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
649		opt_format = FORMAT_XZ;
650
651	// Compression settings need to be validated (options themselves and
652	// their memory usage) when compressing to any file format. It has to
653	// be done also when uncompressing raw data, since for raw decoding
654	// the options given on the command line are used to know what kind
655	// of raw data we are supposed to decode.
656	if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
657		coder_set_compression_settings();
658
659	// If no filenames are given, use stdin.
660	if (argv[optind] == NULL && args->files_name == NULL) {
661		// We don't modify or free() the "-" constant. The caller
662		// modifies this so don't make the struct itself const.
663		static char *names_stdin[2] = { (char *)"-", NULL };
664		args->arg_names = names_stdin;
665		args->arg_count = 1;
666	} else {
667		// We got at least one filename from the command line, or
668		// --files or --files0 was specified.
669		args->arg_names = argv + optind;
670		args->arg_count = argc - optind;
671	}
672
673	return;
674}
675
676
677#ifndef NDEBUG
678extern void
679args_free(void)
680{
681	free(opt_block_list);
682	return;
683}
684#endif
685