1///////////////////////////////////////////////////////////////////////////////
2//
3/// \file       args.c
4/// \brief      Argument parsing
5///
6/// \note       Filter-specific options parsing is in options.c.
7//
8//  Author:     Lasse Collin
9//
10//  This file has been put into the public domain.
11//  You can do whatever you want with this file.
12//
13///////////////////////////////////////////////////////////////////////////////
14
15#include "private.h"
16
17#include "getopt.h"
18#include <ctype.h>
19
20
21bool opt_stdout = false;
22bool opt_force = false;
23bool opt_keep_original = false;
24bool opt_robot = false;
25
26// We don't modify or free() this, but we need to assign it in some
27// non-const pointers.
28const char stdin_filename[] = "(stdin)";
29
30
31/// Parse and set the memory usage limit for compression and/or decompression.
32static void
33parse_memlimit(const char *name, const char *name_percentage, char *str,
34		bool set_compress, bool set_decompress)
35{
36	bool is_percentage = false;
37	uint64_t value;
38
39	const size_t len = strlen(str);
40	if (len > 0 && str[len - 1] == '%') {
41		str[len - 1] = '\0';
42		is_percentage = true;
43		value = str_to_uint64(name_percentage, str, 1, 100);
44	} else {
45		// On 32-bit systems, SIZE_MAX would make more sense than
46		// UINT64_MAX. But use UINT64_MAX still so that scripts
47		// that assume > 4 GiB values don't break.
48		value = str_to_uint64(name, str, 0, UINT64_MAX);
49	}
50
51	hardware_memlimit_set(
52			value, set_compress, set_decompress, is_percentage);
53	return;
54}
55
56
57static void
58parse_real(args_info *args, int argc, char **argv)
59{
60	enum {
61		OPT_X86 = INT_MIN,
62		OPT_POWERPC,
63		OPT_IA64,
64		OPT_ARM,
65		OPT_ARMTHUMB,
66		OPT_SPARC,
67		OPT_DELTA,
68		OPT_LZMA1,
69		OPT_LZMA2,
70
71		OPT_NO_SPARSE,
72		OPT_FILES,
73		OPT_FILES0,
74		OPT_MEM_COMPRESS,
75		OPT_MEM_DECOMPRESS,
76		OPT_NO_ADJUST,
77		OPT_INFO_MEMORY,
78		OPT_ROBOT,
79	};
80
81	static const char short_opts[]
82			= "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
83
84	static const struct option long_opts[] = {
85		// Operation mode
86		{ "compress",     no_argument,       NULL,  'z' },
87		{ "decompress",   no_argument,       NULL,  'd' },
88		{ "uncompress",   no_argument,       NULL,  'd' },
89		{ "test",         no_argument,       NULL,  't' },
90		{ "list",         no_argument,       NULL,  'l' },
91
92		// Operation modifiers
93		{ "keep",         no_argument,       NULL,  'k' },
94		{ "force",        no_argument,       NULL,  'f' },
95		{ "stdout",       no_argument,       NULL,  'c' },
96		{ "to-stdout",    no_argument,       NULL,  'c' },
97		{ "no-sparse",    no_argument,       NULL,  OPT_NO_SPARSE },
98		{ "suffix",       required_argument, NULL,  'S' },
99		// { "recursive",      no_argument,       NULL,  'r' }, // TODO
100		{ "files",        optional_argument, NULL,  OPT_FILES },
101		{ "files0",       optional_argument, NULL,  OPT_FILES0 },
102
103		// Basic compression settings
104		{ "format",       required_argument, NULL,  'F' },
105		{ "check",        required_argument, NULL,  'C' },
106		{ "memlimit-compress",   required_argument, NULL, OPT_MEM_COMPRESS },
107		{ "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
108		{ "memlimit",     required_argument, NULL,  'M' },
109		{ "memory",       required_argument, NULL,  'M' }, // Old alias
110		{ "no-adjust",    no_argument,       NULL,  OPT_NO_ADJUST },
111		{ "threads",      required_argument, NULL,  'T' },
112
113		{ "extreme",      no_argument,       NULL,  'e' },
114		{ "fast",         no_argument,       NULL,  '0' },
115		{ "best",         no_argument,       NULL,  '9' },
116
117		// Filters
118		{ "lzma1",        optional_argument, NULL,  OPT_LZMA1 },
119		{ "lzma2",        optional_argument, NULL,  OPT_LZMA2 },
120		{ "x86",          optional_argument, NULL,  OPT_X86 },
121		{ "powerpc",      optional_argument, NULL,  OPT_POWERPC },
122		{ "ia64",         optional_argument, NULL,  OPT_IA64 },
123		{ "arm",          optional_argument, NULL,  OPT_ARM },
124		{ "armthumb",     optional_argument, NULL,  OPT_ARMTHUMB },
125		{ "sparc",        optional_argument, NULL,  OPT_SPARC },
126		{ "delta",        optional_argument, NULL,  OPT_DELTA },
127
128		// Other options
129		{ "quiet",        no_argument,       NULL,  'q' },
130		{ "verbose",      no_argument,       NULL,  'v' },
131		{ "no-warn",      no_argument,       NULL,  'Q' },
132		{ "robot",        no_argument,       NULL,  OPT_ROBOT },
133		{ "info-memory",  no_argument,       NULL,  OPT_INFO_MEMORY },
134		{ "help",         no_argument,       NULL,  'h' },
135		{ "long-help",    no_argument,       NULL,  'H' },
136		{ "version",      no_argument,       NULL,  'V' },
137
138		{ NULL,           0,                 NULL,   0 }
139	};
140
141	int c;
142
143	while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
144			!= -1) {
145		switch (c) {
146		// Compression preset (also for decompression if --format=raw)
147		case '0': case '1': case '2': case '3': case '4':
148		case '5': case '6': case '7': case '8': case '9':
149			coder_set_preset(c - '0');
150			break;
151
152		// --memlimit-compress
153		case OPT_MEM_COMPRESS:
154			parse_memlimit("memlimit-compress",
155					"memlimit-compress%", optarg,
156					true, false);
157			break;
158
159		// --memlimit-decompress
160		case OPT_MEM_DECOMPRESS:
161			parse_memlimit("memlimit-decompress",
162					"memlimit-decompress%", optarg,
163					false, true);
164			break;
165
166		// --memlimit
167		case 'M':
168			parse_memlimit("memlimit", "memlimit%", optarg,
169					true, true);
170			break;
171
172		// --suffix
173		case 'S':
174			suffix_set(optarg);
175			break;
176
177		case 'T':
178			hardware_threadlimit_set(str_to_uint64(
179					"threads", optarg, 0, UINT32_MAX));
180			break;
181
182		// --version
183		case 'V':
184			// This doesn't return.
185			message_version();
186
187		// --stdout
188		case 'c':
189			opt_stdout = true;
190			break;
191
192		// --decompress
193		case 'd':
194			opt_mode = MODE_DECOMPRESS;
195			break;
196
197		// --extreme
198		case 'e':
199			coder_set_extreme();
200			break;
201
202		// --force
203		case 'f':
204			opt_force = true;
205			break;
206
207		// --info-memory
208		case OPT_INFO_MEMORY:
209			// This doesn't return.
210			hardware_memlimit_show();
211
212		// --help
213		case 'h':
214			// This doesn't return.
215			message_help(false);
216
217		// --long-help
218		case 'H':
219			// This doesn't return.
220			message_help(true);
221
222		// --list
223		case 'l':
224			opt_mode = MODE_LIST;
225			break;
226
227		// --keep
228		case 'k':
229			opt_keep_original = true;
230			break;
231
232		// --quiet
233		case 'q':
234			message_verbosity_decrease();
235			break;
236
237		case 'Q':
238			set_exit_no_warn();
239			break;
240
241		case 't':
242			opt_mode = MODE_TEST;
243			break;
244
245		// --verbose
246		case 'v':
247			message_verbosity_increase();
248			break;
249
250		// --robot
251		case OPT_ROBOT:
252			opt_robot = true;
253
254			// This is to make sure that floating point numbers
255			// always have a dot as decimal separator.
256			setlocale(LC_NUMERIC, "C");
257			break;
258
259		case 'z':
260			opt_mode = MODE_COMPRESS;
261			break;
262
263		// Filter setup
264
265		case OPT_X86:
266			coder_add_filter(LZMA_FILTER_X86,
267					options_bcj(optarg));
268			break;
269
270		case OPT_POWERPC:
271			coder_add_filter(LZMA_FILTER_POWERPC,
272					options_bcj(optarg));
273			break;
274
275		case OPT_IA64:
276			coder_add_filter(LZMA_FILTER_IA64,
277					options_bcj(optarg));
278			break;
279
280		case OPT_ARM:
281			coder_add_filter(LZMA_FILTER_ARM,
282					options_bcj(optarg));
283			break;
284
285		case OPT_ARMTHUMB:
286			coder_add_filter(LZMA_FILTER_ARMTHUMB,
287					options_bcj(optarg));
288			break;
289
290		case OPT_SPARC:
291			coder_add_filter(LZMA_FILTER_SPARC,
292					options_bcj(optarg));
293			break;
294
295		case OPT_DELTA:
296			coder_add_filter(LZMA_FILTER_DELTA,
297					options_delta(optarg));
298			break;
299
300		case OPT_LZMA1:
301			coder_add_filter(LZMA_FILTER_LZMA1,
302					options_lzma(optarg));
303			break;
304
305		case OPT_LZMA2:
306			coder_add_filter(LZMA_FILTER_LZMA2,
307					options_lzma(optarg));
308			break;
309
310		// Other
311
312		// --format
313		case 'F': {
314			// Just in case, support both "lzma" and "alone" since
315			// the latter was used for forward compatibility in
316			// LZMA Utils 4.32.x.
317			static const struct {
318				char str[8];
319				enum format_type format;
320			} types[] = {
321				{ "auto",   FORMAT_AUTO },
322				{ "xz",     FORMAT_XZ },
323				{ "lzma",   FORMAT_LZMA },
324				{ "alone",  FORMAT_LZMA },
325				// { "gzip",   FORMAT_GZIP },
326				// { "gz",     FORMAT_GZIP },
327				{ "raw",    FORMAT_RAW },
328			};
329
330			size_t i = 0;
331			while (strcmp(types[i].str, optarg) != 0)
332				if (++i == ARRAY_SIZE(types))
333					message_fatal(_("%s: Unknown file "
334							"format type"),
335							optarg);
336
337			opt_format = types[i].format;
338			break;
339		}
340
341		// --check
342		case 'C': {
343			static const struct {
344				char str[8];
345				lzma_check check;
346			} types[] = {
347				{ "none",   LZMA_CHECK_NONE },
348				{ "crc32",  LZMA_CHECK_CRC32 },
349				{ "crc64",  LZMA_CHECK_CRC64 },
350				{ "sha256", LZMA_CHECK_SHA256 },
351			};
352
353			size_t i = 0;
354			while (strcmp(types[i].str, optarg) != 0) {
355				if (++i == ARRAY_SIZE(types))
356					message_fatal(_("%s: Unsupported "
357							"integrity "
358							"check type"), optarg);
359			}
360
361			// Use a separate check in case we are using different
362			// liblzma than what was used to compile us.
363			if (!lzma_check_is_supported(types[i].check))
364				message_fatal(_("%s: Unsupported integrity "
365						"check type"), optarg);
366
367			coder_set_check(types[i].check);
368			break;
369		}
370
371		case OPT_NO_SPARSE:
372			io_no_sparse();
373			break;
374
375		case OPT_FILES:
376			args->files_delim = '\n';
377
378		// Fall through
379
380		case OPT_FILES0:
381			if (args->files_name != NULL)
382				message_fatal(_("Only one file can be "
383						"specified with `--files' "
384						"or `--files0'."));
385
386			if (optarg == NULL) {
387				args->files_name = (char *)stdin_filename;
388				args->files_file = stdin;
389			} else {
390				args->files_name = optarg;
391				args->files_file = fopen(optarg,
392						c == OPT_FILES ? "r" : "rb");
393				if (args->files_file == NULL)
394					message_fatal("%s: %s", optarg,
395							strerror(errno));
396			}
397
398			break;
399
400		case OPT_NO_ADJUST:
401			opt_auto_adjust = false;
402			break;
403
404		default:
405			message_try_help();
406			tuklib_exit(E_ERROR, E_ERROR, false);
407		}
408	}
409
410	return;
411}
412
413
414static void
415parse_environment(args_info *args, char *argv0, const char *varname)
416{
417	char *env = getenv(varname);
418	if (env == NULL)
419		return;
420
421	// We modify the string, so make a copy of it.
422	env = xstrdup(env);
423
424	// Calculate the number of arguments in env. argc stats at one
425	// to include space for the program name.
426	int argc = 1;
427	bool prev_was_space = true;
428	for (size_t i = 0; env[i] != '\0'; ++i) {
429		// NOTE: Cast to unsigned char is needed so that correct
430		// value gets passed to isspace(), which expects
431		// unsigned char cast to int. Casting to int is done
432		// automatically due to integer promotion, but we need to
433		// force char to unsigned char manually. Otherwise 8-bit
434		// characters would get promoted to wrong value if
435		// char is signed.
436		if (isspace((unsigned char)env[i])) {
437			prev_was_space = true;
438		} else if (prev_was_space) {
439			prev_was_space = false;
440
441			// Keep argc small enough to fit into a signed int
442			// and to keep it usable for memory allocation.
443			if (++argc == my_min(
444					INT_MAX, SIZE_MAX / sizeof(char *)))
445				message_fatal(_("The environment variable "
446						"%s contains too many "
447						"arguments"), varname);
448		}
449	}
450
451	// Allocate memory to hold pointers to the arguments. Add one to get
452	// space for the terminating NULL (if some systems happen to need it).
453	char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
454	argv[0] = argv0;
455	argv[argc] = NULL;
456
457	// Go through the string again. Split the arguments using '\0'
458	// characters and add pointers to the resulting strings to argv.
459	argc = 1;
460	prev_was_space = true;
461	for (size_t i = 0; env[i] != '\0'; ++i) {
462		if (isspace((unsigned char)env[i])) {
463			prev_was_space = true;
464			env[i] = '\0';
465		} else if (prev_was_space) {
466			prev_was_space = false;
467			argv[argc++] = env + i;
468		}
469	}
470
471	// Parse the argument list we got from the environment. All non-option
472	// arguments i.e. filenames are ignored.
473	parse_real(args, argc, argv);
474
475	// Reset the state of the getopt_long() so that we can parse the
476	// command line options too. There are two incompatible ways to
477	// do it.
478#ifdef HAVE_OPTRESET
479	// BSD
480	optind = 1;
481	optreset = 1;
482#else
483	// GNU, Solaris
484	optind = 0;
485#endif
486
487	// We don't need the argument list from environment anymore.
488	free(argv);
489	free(env);
490
491	return;
492}
493
494
495extern void
496args_parse(args_info *args, int argc, char **argv)
497{
498	// Initialize those parts of *args that we need later.
499	args->files_name = NULL;
500	args->files_file = NULL;
501	args->files_delim = '\0';
502
503	// Check how we were called.
504	{
505		// Remove the leading path name, if any.
506		const char *name = strrchr(argv[0], '/');
507		if (name == NULL)
508			name = argv[0];
509		else
510			++name;
511
512		// NOTE: It's possible that name[0] is now '\0' if argv[0]
513		// is weird, but it doesn't matter here.
514
515		// Look for full command names instead of substrings like
516		// "un", "cat", and "lz" to reduce possibility of false
517		// positives when the programs have been renamed.
518		if (strstr(name, "xzcat") != NULL) {
519			opt_mode = MODE_DECOMPRESS;
520			opt_stdout = true;
521		} else if (strstr(name, "unxz") != NULL) {
522			opt_mode = MODE_DECOMPRESS;
523		} else if (strstr(name, "lzcat") != NULL) {
524			opt_format = FORMAT_LZMA;
525			opt_mode = MODE_DECOMPRESS;
526			opt_stdout = true;
527		} else if (strstr(name, "unlzma") != NULL) {
528			opt_format = FORMAT_LZMA;
529			opt_mode = MODE_DECOMPRESS;
530		} else if (strstr(name, "lzma") != NULL) {
531			opt_format = FORMAT_LZMA;
532		}
533	}
534
535	// First the flags from the environment
536	parse_environment(args, argv[0], "XZ_DEFAULTS");
537	parse_environment(args, argv[0], "XZ_OPT");
538
539	// Then from the command line
540	parse_real(args, argc, argv);
541
542	// Never remove the source file when the destination is not on disk.
543	// In test mode the data is written nowhere, but setting opt_stdout
544	// will make the rest of the code behave well.
545	if (opt_stdout || opt_mode == MODE_TEST) {
546		opt_keep_original = true;
547		opt_stdout = true;
548	}
549
550	// When compressing, if no --format flag was used, or it
551	// was --format=auto, we compress to the .xz format.
552	if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
553		opt_format = FORMAT_XZ;
554
555	// Compression settings need to be validated (options themselves and
556	// their memory usage) when compressing to any file format. It has to
557	// be done also when uncompressing raw data, since for raw decoding
558	// the options given on the command line are used to know what kind
559	// of raw data we are supposed to decode.
560	if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
561		coder_set_compression_settings();
562
563	// If no filenames are given, use stdin.
564	if (argv[optind] == NULL && args->files_name == NULL) {
565		// We don't modify or free() the "-" constant. The caller
566		// modifies this so don't make the struct itself const.
567		static char *names_stdin[2] = { (char *)"-", NULL };
568		args->arg_names = names_stdin;
569		args->arg_count = 1;
570	} else {
571		// We got at least one filename from the command line, or
572		// --files or --files0 was specified.
573		args->arg_names = argv + optind;
574		args->arg_count = argc - optind;
575	}
576
577	return;
578}
579