args.c revision 207753
1207753Smm///////////////////////////////////////////////////////////////////////////////
2207753Smm//
3207753Smm/// \file       args.c
4207753Smm/// \brief      Argument parsing
5207753Smm///
6207753Smm/// \note       Filter-specific options parsing is in options.c.
7207753Smm//
8207753Smm//  Author:     Lasse Collin
9207753Smm//
10207753Smm//  This file has been put into the public domain.
11207753Smm//  You can do whatever you want with this file.
12207753Smm//
13207753Smm///////////////////////////////////////////////////////////////////////////////
14207753Smm
15207753Smm#include "private.h"
16207753Smm
17207753Smm#include "getopt.h"
18207753Smm#include <ctype.h>
19207753Smm
20207753Smm
21207753Smmbool opt_stdout = false;
22207753Smmbool opt_force = false;
23207753Smmbool opt_keep_original = false;
24207753Smmbool opt_robot = false;
25207753Smm
26207753Smm// We don't modify or free() this, but we need to assign it in some
27207753Smm// non-const pointers.
28207753Smmconst char *stdin_filename = "(stdin)";
29207753Smm
30207753Smm
31207753Smmstatic void
32207753Smmparse_real(args_info *args, int argc, char **argv)
33207753Smm{
34207753Smm	enum {
35207753Smm		OPT_SUBBLOCK = INT_MIN,
36207753Smm		OPT_X86,
37207753Smm		OPT_POWERPC,
38207753Smm		OPT_IA64,
39207753Smm		OPT_ARM,
40207753Smm		OPT_ARMTHUMB,
41207753Smm		OPT_SPARC,
42207753Smm		OPT_DELTA,
43207753Smm		OPT_LZMA1,
44207753Smm		OPT_LZMA2,
45207753Smm
46207753Smm		OPT_NO_SPARSE,
47207753Smm		OPT_FILES,
48207753Smm		OPT_FILES0,
49207753Smm		OPT_INFO_MEMORY,
50207753Smm		OPT_ROBOT,
51207753Smm	};
52207753Smm
53207753Smm	static const char short_opts[]
54207753Smm			= "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
55207753Smm
56207753Smm	static const struct option long_opts[] = {
57207753Smm		// Operation mode
58207753Smm		{ "compress",     no_argument,       NULL,  'z' },
59207753Smm		{ "decompress",   no_argument,       NULL,  'd' },
60207753Smm		{ "uncompress",   no_argument,       NULL,  'd' },
61207753Smm		{ "test",         no_argument,       NULL,  't' },
62207753Smm		{ "list",         no_argument,       NULL,  'l' },
63207753Smm
64207753Smm		// Operation modifiers
65207753Smm		{ "keep",         no_argument,       NULL,  'k' },
66207753Smm		{ "force",        no_argument,       NULL,  'f' },
67207753Smm		{ "stdout",       no_argument,       NULL,  'c' },
68207753Smm		{ "to-stdout",    no_argument,       NULL,  'c' },
69207753Smm		{ "no-sparse",    no_argument,       NULL,  OPT_NO_SPARSE },
70207753Smm		{ "suffix",       required_argument, NULL,  'S' },
71207753Smm		// { "recursive",      no_argument,       NULL,  'r' }, // TODO
72207753Smm		{ "files",        optional_argument, NULL,  OPT_FILES },
73207753Smm		{ "files0",       optional_argument, NULL,  OPT_FILES0 },
74207753Smm
75207753Smm		// Basic compression settings
76207753Smm		{ "format",       required_argument, NULL,  'F' },
77207753Smm		{ "check",        required_argument, NULL,  'C' },
78207753Smm		{ "memory",       required_argument, NULL,  'M' },
79207753Smm		{ "threads",      required_argument, NULL,  'T' },
80207753Smm
81207753Smm		{ "extreme",      no_argument,       NULL,  'e' },
82207753Smm		{ "fast",         no_argument,       NULL,  '0' },
83207753Smm		{ "best",         no_argument,       NULL,  '9' },
84207753Smm
85207753Smm		// Filters
86207753Smm		{ "lzma1",        optional_argument, NULL,  OPT_LZMA1 },
87207753Smm		{ "lzma2",        optional_argument, NULL,  OPT_LZMA2 },
88207753Smm		{ "x86",          optional_argument, NULL,  OPT_X86 },
89207753Smm		{ "powerpc",      optional_argument, NULL,  OPT_POWERPC },
90207753Smm		{ "ia64",         optional_argument, NULL,  OPT_IA64 },
91207753Smm		{ "arm",          optional_argument, NULL,  OPT_ARM },
92207753Smm		{ "armthumb",     optional_argument, NULL,  OPT_ARMTHUMB },
93207753Smm		{ "sparc",        optional_argument, NULL,  OPT_SPARC },
94207753Smm		{ "delta",        optional_argument, NULL,  OPT_DELTA },
95207753Smm		{ "subblock",     optional_argument, NULL,  OPT_SUBBLOCK },
96207753Smm
97207753Smm		// Other options
98207753Smm		{ "quiet",        no_argument,       NULL,  'q' },
99207753Smm		{ "verbose",      no_argument,       NULL,  'v' },
100207753Smm		{ "no-warn",      no_argument,       NULL,  'Q' },
101207753Smm		{ "robot",        no_argument,       NULL,  OPT_ROBOT },
102207753Smm		{ "info-memory",  no_argument,       NULL,  OPT_INFO_MEMORY },
103207753Smm		{ "help",         no_argument,       NULL,  'h' },
104207753Smm		{ "long-help",    no_argument,       NULL,  'H' },
105207753Smm		{ "version",      no_argument,       NULL,  'V' },
106207753Smm
107207753Smm		{ NULL,                 0,                 NULL,   0 }
108207753Smm	};
109207753Smm
110207753Smm	int c;
111207753Smm
112207753Smm	while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
113207753Smm			!= -1) {
114207753Smm		switch (c) {
115207753Smm		// Compression preset (also for decompression if --format=raw)
116207753Smm		case '0': case '1': case '2': case '3': case '4':
117207753Smm		case '5': case '6': case '7': case '8': case '9':
118207753Smm			coder_set_preset(c - '0');
119207753Smm			break;
120207753Smm
121207753Smm		// --memory
122207753Smm		case 'M': {
123207753Smm			// Support specifying the limit as a percentage of
124207753Smm			// installed physical RAM.
125207753Smm			size_t len = strlen(optarg);
126207753Smm			if (len > 0 && optarg[len - 1] == '%') {
127207753Smm				optarg[len - 1] = '\0';
128207753Smm				hardware_memlimit_set_percentage(
129207753Smm						str_to_uint64(
130207753Smm						"memory%", optarg, 1, 100));
131207753Smm			} else {
132207753Smm				// On 32-bit systems, SIZE_MAX would make more
133207753Smm				// sense than UINT64_MAX. But use UINT64_MAX
134207753Smm				// still so that scripts that assume > 4 GiB
135207753Smm				// values don't break.
136207753Smm				hardware_memlimit_set(str_to_uint64(
137207753Smm						"memory", optarg,
138207753Smm						0, UINT64_MAX));
139207753Smm			}
140207753Smm
141207753Smm			break;
142207753Smm		}
143207753Smm
144207753Smm		// --suffix
145207753Smm		case 'S':
146207753Smm			suffix_set(optarg);
147207753Smm			break;
148207753Smm
149207753Smm		case 'T':
150207753Smm			hardware_threadlimit_set(str_to_uint64(
151207753Smm					"threads", optarg, 0, UINT32_MAX));
152207753Smm			break;
153207753Smm
154207753Smm		// --version
155207753Smm		case 'V':
156207753Smm			// This doesn't return.
157207753Smm			message_version();
158207753Smm
159207753Smm		// --stdout
160207753Smm		case 'c':
161207753Smm			opt_stdout = true;
162207753Smm			break;
163207753Smm
164207753Smm		// --decompress
165207753Smm		case 'd':
166207753Smm			opt_mode = MODE_DECOMPRESS;
167207753Smm			break;
168207753Smm
169207753Smm		// --extreme
170207753Smm		case 'e':
171207753Smm			coder_set_extreme();
172207753Smm			break;
173207753Smm
174207753Smm		// --force
175207753Smm		case 'f':
176207753Smm			opt_force = true;
177207753Smm			break;
178207753Smm
179207753Smm		// --info-memory
180207753Smm		case OPT_INFO_MEMORY:
181207753Smm			// This doesn't return.
182207753Smm			message_memlimit();
183207753Smm
184207753Smm		// --help
185207753Smm		case 'h':
186207753Smm			// This doesn't return.
187207753Smm			message_help(false);
188207753Smm
189207753Smm		// --long-help
190207753Smm		case 'H':
191207753Smm			// This doesn't return.
192207753Smm			message_help(true);
193207753Smm
194207753Smm		// --list
195207753Smm		case 'l':
196207753Smm			opt_mode = MODE_LIST;
197207753Smm			break;
198207753Smm
199207753Smm		// --keep
200207753Smm		case 'k':
201207753Smm			opt_keep_original = true;
202207753Smm			break;
203207753Smm
204207753Smm		// --quiet
205207753Smm		case 'q':
206207753Smm			message_verbosity_decrease();
207207753Smm			break;
208207753Smm
209207753Smm		case 'Q':
210207753Smm			set_exit_no_warn();
211207753Smm			break;
212207753Smm
213207753Smm		case 't':
214207753Smm			opt_mode = MODE_TEST;
215207753Smm			break;
216207753Smm
217207753Smm		// --verbose
218207753Smm		case 'v':
219207753Smm			message_verbosity_increase();
220207753Smm			break;
221207753Smm
222207753Smm		// --robot
223207753Smm		case OPT_ROBOT:
224207753Smm			opt_robot = true;
225207753Smm
226207753Smm			// This is to make sure that floating point numbers
227207753Smm			// always have a dot as decimal separator.
228207753Smm			setlocale(LC_NUMERIC, "C");
229207753Smm			break;
230207753Smm
231207753Smm		case 'z':
232207753Smm			opt_mode = MODE_COMPRESS;
233207753Smm			break;
234207753Smm
235207753Smm		// Filter setup
236207753Smm
237207753Smm		case OPT_SUBBLOCK:
238207753Smm			coder_add_filter(LZMA_FILTER_SUBBLOCK,
239207753Smm					options_subblock(optarg));
240207753Smm			break;
241207753Smm
242207753Smm		case OPT_X86:
243207753Smm			coder_add_filter(LZMA_FILTER_X86,
244207753Smm					options_bcj(optarg));
245207753Smm			break;
246207753Smm
247207753Smm		case OPT_POWERPC:
248207753Smm			coder_add_filter(LZMA_FILTER_POWERPC,
249207753Smm					options_bcj(optarg));
250207753Smm			break;
251207753Smm
252207753Smm		case OPT_IA64:
253207753Smm			coder_add_filter(LZMA_FILTER_IA64,
254207753Smm					options_bcj(optarg));
255207753Smm			break;
256207753Smm
257207753Smm		case OPT_ARM:
258207753Smm			coder_add_filter(LZMA_FILTER_ARM,
259207753Smm					options_bcj(optarg));
260207753Smm			break;
261207753Smm
262207753Smm		case OPT_ARMTHUMB:
263207753Smm			coder_add_filter(LZMA_FILTER_ARMTHUMB,
264207753Smm					options_bcj(optarg));
265207753Smm			break;
266207753Smm
267207753Smm		case OPT_SPARC:
268207753Smm			coder_add_filter(LZMA_FILTER_SPARC,
269207753Smm					options_bcj(optarg));
270207753Smm			break;
271207753Smm
272207753Smm		case OPT_DELTA:
273207753Smm			coder_add_filter(LZMA_FILTER_DELTA,
274207753Smm					options_delta(optarg));
275207753Smm			break;
276207753Smm
277207753Smm		case OPT_LZMA1:
278207753Smm			coder_add_filter(LZMA_FILTER_LZMA1,
279207753Smm					options_lzma(optarg));
280207753Smm			break;
281207753Smm
282207753Smm		case OPT_LZMA2:
283207753Smm			coder_add_filter(LZMA_FILTER_LZMA2,
284207753Smm					options_lzma(optarg));
285207753Smm			break;
286207753Smm
287207753Smm		// Other
288207753Smm
289207753Smm		// --format
290207753Smm		case 'F': {
291207753Smm			// Just in case, support both "lzma" and "alone" since
292207753Smm			// the latter was used for forward compatibility in
293207753Smm			// LZMA Utils 4.32.x.
294207753Smm			static const struct {
295207753Smm				char str[8];
296207753Smm				enum format_type format;
297207753Smm			} types[] = {
298207753Smm				{ "auto",   FORMAT_AUTO },
299207753Smm				{ "xz",     FORMAT_XZ },
300207753Smm				{ "lzma",   FORMAT_LZMA },
301207753Smm				{ "alone",  FORMAT_LZMA },
302207753Smm				// { "gzip",   FORMAT_GZIP },
303207753Smm				// { "gz",     FORMAT_GZIP },
304207753Smm				{ "raw",    FORMAT_RAW },
305207753Smm			};
306207753Smm
307207753Smm			size_t i = 0;
308207753Smm			while (strcmp(types[i].str, optarg) != 0)
309207753Smm				if (++i == ARRAY_SIZE(types))
310207753Smm					message_fatal(_("%s: Unknown file "
311207753Smm							"format type"),
312207753Smm							optarg);
313207753Smm
314207753Smm			opt_format = types[i].format;
315207753Smm			break;
316207753Smm		}
317207753Smm
318207753Smm		// --check
319207753Smm		case 'C': {
320207753Smm			static const struct {
321207753Smm				char str[8];
322207753Smm				lzma_check check;
323207753Smm			} types[] = {
324207753Smm				{ "none",   LZMA_CHECK_NONE },
325207753Smm				{ "crc32",  LZMA_CHECK_CRC32 },
326207753Smm				{ "crc64",  LZMA_CHECK_CRC64 },
327207753Smm				{ "sha256", LZMA_CHECK_SHA256 },
328207753Smm			};
329207753Smm
330207753Smm			size_t i = 0;
331207753Smm			while (strcmp(types[i].str, optarg) != 0) {
332207753Smm				if (++i == ARRAY_SIZE(types))
333207753Smm					message_fatal(_("%s: Unsupported "
334207753Smm							"integrity "
335207753Smm							"check type"), optarg);
336207753Smm			}
337207753Smm
338207753Smm			// Use a separate check in case we are using different
339207753Smm			// liblzma than what was used to compile us.
340207753Smm			if (!lzma_check_is_supported(types[i].check))
341207753Smm				message_fatal(_("%s: Unsupported integrity "
342207753Smm						"check type"), optarg);
343207753Smm
344207753Smm			coder_set_check(types[i].check);
345207753Smm			break;
346207753Smm		}
347207753Smm
348207753Smm		case OPT_NO_SPARSE:
349207753Smm			io_no_sparse();
350207753Smm			break;
351207753Smm
352207753Smm		case OPT_FILES:
353207753Smm			args->files_delim = '\n';
354207753Smm
355207753Smm		// Fall through
356207753Smm
357207753Smm		case OPT_FILES0:
358207753Smm			if (args->files_name != NULL)
359207753Smm				message_fatal(_("Only one file can be "
360207753Smm						"specified with `--files' "
361207753Smm						"or `--files0'."));
362207753Smm
363207753Smm			if (optarg == NULL) {
364207753Smm				args->files_name = (char *)stdin_filename;
365207753Smm				args->files_file = stdin;
366207753Smm			} else {
367207753Smm				args->files_name = optarg;
368207753Smm				args->files_file = fopen(optarg,
369207753Smm						c == OPT_FILES ? "r" : "rb");
370207753Smm				if (args->files_file == NULL)
371207753Smm					message_fatal("%s: %s", optarg,
372207753Smm							strerror(errno));
373207753Smm			}
374207753Smm
375207753Smm			break;
376207753Smm
377207753Smm		default:
378207753Smm			message_try_help();
379207753Smm			tuklib_exit(E_ERROR, E_ERROR, false);
380207753Smm		}
381207753Smm	}
382207753Smm
383207753Smm	return;
384207753Smm}
385207753Smm
386207753Smm
387207753Smmstatic void
388207753Smmparse_environment(args_info *args, char *argv0)
389207753Smm{
390207753Smm	char *env = getenv("XZ_OPT");
391207753Smm	if (env == NULL)
392207753Smm		return;
393207753Smm
394207753Smm	// We modify the string, so make a copy of it.
395207753Smm	env = xstrdup(env);
396207753Smm
397207753Smm	// Calculate the number of arguments in env. argc stats at one
398207753Smm	// to include space for the program name.
399207753Smm	int argc = 1;
400207753Smm	bool prev_was_space = true;
401207753Smm	for (size_t i = 0; env[i] != '\0'; ++i) {
402207753Smm		// NOTE: Cast to unsigned char is needed so that correct
403207753Smm		// value gets passed to isspace(), which expects
404207753Smm		// unsigned char cast to int. Casting to int is done
405207753Smm		// automatically due to integer promotion, but we need to
406207753Smm		// force char to unsigned char manually. Otherwise 8-bit
407207753Smm		// characters would get promoted to wrong value if
408207753Smm		// char is signed.
409207753Smm		if (isspace((unsigned char)env[i])) {
410207753Smm			prev_was_space = true;
411207753Smm		} else if (prev_was_space) {
412207753Smm			prev_was_space = false;
413207753Smm
414207753Smm			// Keep argc small enough to fit into a singed int
415207753Smm			// and to keep it usable for memory allocation.
416207753Smm			if (++argc == MIN(INT_MAX, SIZE_MAX / sizeof(char *)))
417207753Smm				message_fatal(_("The environment variable "
418207753Smm						"XZ_OPT contains too many "
419207753Smm						"arguments"));
420207753Smm		}
421207753Smm	}
422207753Smm
423207753Smm	// Allocate memory to hold pointers to the arguments. Add one to get
424207753Smm	// space for the terminating NULL (if some systems happen to need it).
425207753Smm	char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
426207753Smm	argv[0] = argv0;
427207753Smm	argv[argc] = NULL;
428207753Smm
429207753Smm	// Go through the string again. Split the arguments using '\0'
430207753Smm	// characters and add pointers to the resulting strings to argv.
431207753Smm	argc = 1;
432207753Smm	prev_was_space = true;
433207753Smm	for (size_t i = 0; env[i] != '\0'; ++i) {
434207753Smm		if (isspace((unsigned char)env[i])) {
435207753Smm			prev_was_space = true;
436207753Smm			env[i] = '\0';
437207753Smm		} else if (prev_was_space) {
438207753Smm			prev_was_space = false;
439207753Smm			argv[argc++] = env + i;
440207753Smm		}
441207753Smm	}
442207753Smm
443207753Smm	// Parse the argument list we got from the environment. All non-option
444207753Smm	// arguments i.e. filenames are ignored.
445207753Smm	parse_real(args, argc, argv);
446207753Smm
447207753Smm	// Reset the state of the getopt_long() so that we can parse the
448207753Smm	// command line options too. There are two incompatible ways to
449207753Smm	// do it.
450207753Smm#ifdef HAVE_OPTRESET
451207753Smm	// BSD
452207753Smm	optind = 1;
453207753Smm	optreset = 1;
454207753Smm#else
455207753Smm	// GNU, Solaris
456207753Smm	optind = 0;
457207753Smm#endif
458207753Smm
459207753Smm	// We don't need the argument list from environment anymore.
460207753Smm	free(argv);
461207753Smm	free(env);
462207753Smm
463207753Smm	return;
464207753Smm}
465207753Smm
466207753Smm
467207753Smmextern void
468207753Smmargs_parse(args_info *args, int argc, char **argv)
469207753Smm{
470207753Smm	// Initialize those parts of *args that we need later.
471207753Smm	args->files_name = NULL;
472207753Smm	args->files_file = NULL;
473207753Smm	args->files_delim = '\0';
474207753Smm
475207753Smm	// Check how we were called.
476207753Smm	{
477207753Smm		// Remove the leading path name, if any.
478207753Smm		const char *name = strrchr(argv[0], '/');
479207753Smm		if (name == NULL)
480207753Smm			name = argv[0];
481207753Smm		else
482207753Smm			++name;
483207753Smm
484207753Smm		// NOTE: It's possible that name[0] is now '\0' if argv[0]
485207753Smm		// is weird, but it doesn't matter here.
486207753Smm
487207753Smm		// Look for full command names instead of substrings like
488207753Smm		// "un", "cat", and "lz" to reduce possibility of false
489207753Smm		// positives when the programs have been renamed.
490207753Smm		if (strstr(name, "xzcat") != NULL) {
491207753Smm			opt_mode = MODE_DECOMPRESS;
492207753Smm			opt_stdout = true;
493207753Smm		} else if (strstr(name, "unxz") != NULL) {
494207753Smm			opt_mode = MODE_DECOMPRESS;
495207753Smm		} else if (strstr(name, "lzcat") != NULL) {
496207753Smm			opt_format = FORMAT_LZMA;
497207753Smm			opt_mode = MODE_DECOMPRESS;
498207753Smm			opt_stdout = true;
499207753Smm		} else if (strstr(name, "unlzma") != NULL) {
500207753Smm			opt_format = FORMAT_LZMA;
501207753Smm			opt_mode = MODE_DECOMPRESS;
502207753Smm		} else if (strstr(name, "lzma") != NULL) {
503207753Smm			opt_format = FORMAT_LZMA;
504207753Smm		}
505207753Smm	}
506207753Smm
507207753Smm	// First the flags from environment
508207753Smm	parse_environment(args, argv[0]);
509207753Smm
510207753Smm	// Then from the command line
511207753Smm	parse_real(args, argc, argv);
512207753Smm
513207753Smm	// Never remove the source file when the destination is not on disk.
514207753Smm	// In test mode the data is written nowhere, but setting opt_stdout
515207753Smm	// will make the rest of the code behave well.
516207753Smm	if (opt_stdout || opt_mode == MODE_TEST) {
517207753Smm		opt_keep_original = true;
518207753Smm		opt_stdout = true;
519207753Smm	}
520207753Smm
521207753Smm	// When compressing, if no --format flag was used, or it
522207753Smm	// was --format=auto, we compress to the .xz format.
523207753Smm	if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
524207753Smm		opt_format = FORMAT_XZ;
525207753Smm
526207753Smm	// Compression settings need to be validated (options themselves and
527207753Smm	// their memory usage) when compressing to any file format. It has to
528207753Smm	// be done also when uncompressing raw data, since for raw decoding
529207753Smm	// the options given on the command line are used to know what kind
530207753Smm	// of raw data we are supposed to decode.
531207753Smm	if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
532207753Smm		coder_set_compression_settings();
533207753Smm
534207753Smm	// If no filenames are given, use stdin.
535207753Smm	if (argv[optind] == NULL && args->files_name == NULL) {
536207753Smm		// We don't modify or free() the "-" constant. The caller
537207753Smm		// modifies this so don't make the struct itself const.
538207753Smm		static char *names_stdin[2] = { (char *)"-", NULL };
539207753Smm		args->arg_names = names_stdin;
540207753Smm		args->arg_count = 1;
541207753Smm	} else {
542207753Smm		// We got at least one filename from the command line, or
543207753Smm		// --files or --files0 was specified.
544207753Smm		args->arg_names = argv + optind;
545207753Smm		args->arg_count = argc - optind;
546207753Smm	}
547207753Smm
548207753Smm	return;
549207753Smm}
550