1207753Smm///////////////////////////////////////////////////////////////////////////////
2207753Smm//
3207753Smm/// \file       xzdec.c
4207753Smm/// \brief      Simple single-threaded tool to uncompress .xz or .lzma files
5207753Smm//
6207753Smm//  Author:     Lasse Collin
7207753Smm//
8207753Smm//  This file has been put into the public domain.
9207753Smm//  You can do whatever you want with this file.
10207753Smm//
11207753Smm///////////////////////////////////////////////////////////////////////////////
12207753Smm
13207753Smm#include "sysdefs.h"
14207753Smm#include "lzma.h"
15207753Smm
16207753Smm#include <stdarg.h>
17207753Smm#include <errno.h>
18207753Smm#include <stdio.h>
19207753Smm#include <unistd.h>
20207753Smm
21207753Smm#include "getopt.h"
22207753Smm#include "tuklib_progname.h"
23207753Smm#include "tuklib_exit.h"
24207753Smm
25207753Smm#ifdef TUKLIB_DOSLIKE
26207753Smm#	include <fcntl.h>
27207753Smm#	include <io.h>
28207753Smm#endif
29207753Smm
30207753Smm
31207753Smm#ifdef LZMADEC
32207753Smm#	define TOOL_FORMAT "lzma"
33207753Smm#else
34207753Smm#	define TOOL_FORMAT "xz"
35207753Smm#endif
36207753Smm
37207753Smm
38207753Smm/// Error messages are suppressed if this is zero, which is the case when
39207753Smm/// --quiet has been given at least twice.
40207753Smmstatic unsigned int display_errors = 2;
41207753Smm
42207753Smm
43223935Smmstatic void lzma_attribute((__format__(__printf__, 1, 2)))
44207753Smmmy_errorf(const char *fmt, ...)
45207753Smm{
46207753Smm	va_list ap;
47207753Smm	va_start(ap, fmt);
48207753Smm
49207753Smm	if (display_errors) {
50207753Smm		fprintf(stderr, "%s: ", progname);
51207753Smm		vfprintf(stderr, fmt, ap);
52207753Smm		fprintf(stderr, "\n");
53207753Smm	}
54207753Smm
55207753Smm	va_end(ap);
56207753Smm	return;
57207753Smm}
58207753Smm
59207753Smm
60223935Smmstatic void lzma_attribute((__noreturn__))
61207753Smmhelp(void)
62207753Smm{
63207753Smm	printf(
64207753Smm"Usage: %s [OPTION]... [FILE]...\n"
65207753Smm"Uncompress files in the ." TOOL_FORMAT " format to the standard output.\n"
66207753Smm"\n"
67207753Smm"  -c, --stdout       (ignored)\n"
68207753Smm"  -d, --decompress   (ignored)\n"
69207753Smm"  -k, --keep         (ignored)\n"
70207753Smm"  -q, --quiet        specify *twice* to suppress errors\n"
71207753Smm"  -Q, --no-warn      (ignored)\n"
72207753Smm"  -h, --help         display this help and exit\n"
73207753Smm"  -V, --version      display the version number and exit\n"
74207753Smm"\n"
75207753Smm"With no FILE, or when FILE is -, read standard input.\n"
76207753Smm"\n"
77207753Smm"Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n"
78213700SmmPACKAGE_NAME " home page: <" PACKAGE_URL ">\n", progname);
79213700Smm
80207753Smm	tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
81207753Smm}
82207753Smm
83207753Smm
84223935Smmstatic void lzma_attribute((__noreturn__))
85207753Smmversion(void)
86207753Smm{
87207753Smm	printf(TOOL_FORMAT "dec (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n"
88207753Smm			"liblzma %s\n", lzma_version_string());
89207753Smm
90207753Smm	tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
91207753Smm}
92207753Smm
93207753Smm
94207753Smm/// Parses command line options.
95207753Smmstatic void
96207753Smmparse_options(int argc, char **argv)
97207753Smm{
98207753Smm	static const char short_opts[] = "cdkM:hqQV";
99207753Smm	static const struct option long_opts[] = {
100207753Smm		{ "stdout",       no_argument,         NULL, 'c' },
101207753Smm		{ "to-stdout",    no_argument,         NULL, 'c' },
102207753Smm		{ "decompress",   no_argument,         NULL, 'd' },
103207753Smm		{ "uncompress",   no_argument,         NULL, 'd' },
104207753Smm		{ "keep",         no_argument,         NULL, 'k' },
105207753Smm		{ "quiet",        no_argument,         NULL, 'q' },
106207753Smm		{ "no-warn",      no_argument,         NULL, 'Q' },
107207753Smm		{ "help",         no_argument,         NULL, 'h' },
108207753Smm		{ "version",      no_argument,         NULL, 'V' },
109207753Smm		{ NULL,           0,                   NULL, 0   }
110207753Smm	};
111207753Smm
112207753Smm	int c;
113207753Smm
114207753Smm	while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
115207753Smm			!= -1) {
116207753Smm		switch (c) {
117207753Smm		case 'c':
118207753Smm		case 'd':
119207753Smm		case 'k':
120207753Smm		case 'Q':
121207753Smm			break;
122207753Smm
123207753Smm		case 'q':
124207753Smm			if (display_errors > 0)
125207753Smm				--display_errors;
126207753Smm
127207753Smm			break;
128207753Smm
129207753Smm		case 'h':
130207753Smm			help();
131207753Smm
132207753Smm		case 'V':
133207753Smm			version();
134207753Smm
135207753Smm		default:
136207753Smm			exit(EXIT_FAILURE);
137207753Smm		}
138207753Smm	}
139207753Smm
140207753Smm	return;
141207753Smm}
142207753Smm
143207753Smm
144207753Smmstatic void
145207753Smmuncompress(lzma_stream *strm, FILE *file, const char *filename)
146207753Smm{
147207753Smm	lzma_ret ret;
148207753Smm
149207753Smm	// Initialize the decoder
150207753Smm#ifdef LZMADEC
151213700Smm	ret = lzma_alone_decoder(strm, UINT64_MAX);
152207753Smm#else
153213700Smm	ret = lzma_stream_decoder(strm, UINT64_MAX, LZMA_CONCATENATED);
154207753Smm#endif
155207753Smm
156207753Smm	// The only reasonable error here is LZMA_MEM_ERROR.
157207753Smm	if (ret != LZMA_OK) {
158207753Smm		my_errorf("%s", ret == LZMA_MEM_ERROR ? strerror(ENOMEM)
159207753Smm				: "Internal error (bug)");
160207753Smm		exit(EXIT_FAILURE);
161207753Smm	}
162207753Smm
163207753Smm	// Input and output buffers
164207753Smm	uint8_t in_buf[BUFSIZ];
165207753Smm	uint8_t out_buf[BUFSIZ];
166207753Smm
167207753Smm	strm->avail_in = 0;
168207753Smm	strm->next_out = out_buf;
169207753Smm	strm->avail_out = BUFSIZ;
170207753Smm
171207753Smm	lzma_action action = LZMA_RUN;
172207753Smm
173207753Smm	while (true) {
174207753Smm		if (strm->avail_in == 0) {
175207753Smm			strm->next_in = in_buf;
176207753Smm			strm->avail_in = fread(in_buf, 1, BUFSIZ, file);
177207753Smm
178207753Smm			if (ferror(file)) {
179207753Smm				// POSIX says that fread() sets errno if
180207753Smm				// an error occurred. ferror() doesn't
181207753Smm				// touch errno.
182207753Smm				my_errorf("%s: Error reading input file: %s",
183207753Smm						filename, strerror(errno));
184207753Smm				exit(EXIT_FAILURE);
185207753Smm			}
186207753Smm
187207753Smm#ifndef LZMADEC
188207753Smm			// When using LZMA_CONCATENATED, we need to tell
189207753Smm			// liblzma when it has got all the input.
190207753Smm			if (feof(file))
191207753Smm				action = LZMA_FINISH;
192207753Smm#endif
193207753Smm		}
194207753Smm
195207753Smm		ret = lzma_code(strm, action);
196207753Smm
197207753Smm		// Write and check write error before checking decoder error.
198207753Smm		// This way as much data as possible gets written to output
199207753Smm		// even if decoder detected an error.
200207753Smm		if (strm->avail_out == 0 || ret != LZMA_OK) {
201207753Smm			const size_t write_size = BUFSIZ - strm->avail_out;
202207753Smm
203207753Smm			if (fwrite(out_buf, 1, write_size, stdout)
204207753Smm					!= write_size) {
205207753Smm				// Wouldn't be a surprise if writing to stderr
206207753Smm				// would fail too but at least try to show an
207207753Smm				// error message.
208207753Smm				my_errorf("Cannot write to standard output: "
209207753Smm						"%s", strerror(errno));
210207753Smm				exit(EXIT_FAILURE);
211207753Smm			}
212207753Smm
213207753Smm			strm->next_out = out_buf;
214207753Smm			strm->avail_out = BUFSIZ;
215207753Smm		}
216207753Smm
217207753Smm		if (ret != LZMA_OK) {
218207753Smm			if (ret == LZMA_STREAM_END) {
219207753Smm#ifdef LZMADEC
220207753Smm				// Check that there's no trailing garbage.
221207753Smm				if (strm->avail_in != 0
222207753Smm						|| fread(in_buf, 1, 1, file)
223207753Smm							!= 0
224207753Smm						|| !feof(file))
225207753Smm					ret = LZMA_DATA_ERROR;
226207753Smm				else
227207753Smm					return;
228207753Smm#else
229207753Smm				// lzma_stream_decoder() already guarantees
230207753Smm				// that there's no trailing garbage.
231207753Smm				assert(strm->avail_in == 0);
232207753Smm				assert(action == LZMA_FINISH);
233207753Smm				assert(feof(file));
234207753Smm				return;
235207753Smm#endif
236207753Smm			}
237207753Smm
238207753Smm			const char *msg;
239207753Smm			switch (ret) {
240207753Smm			case LZMA_MEM_ERROR:
241207753Smm				msg = strerror(ENOMEM);
242207753Smm				break;
243207753Smm
244207753Smm			case LZMA_FORMAT_ERROR:
245207753Smm				msg = "File format not recognized";
246207753Smm				break;
247207753Smm
248207753Smm			case LZMA_OPTIONS_ERROR:
249207753Smm				// FIXME: Better message?
250207753Smm				msg = "Unsupported compression options";
251207753Smm				break;
252207753Smm
253207753Smm			case LZMA_DATA_ERROR:
254207753Smm				msg = "File is corrupt";
255207753Smm				break;
256207753Smm
257207753Smm			case LZMA_BUF_ERROR:
258207753Smm				msg = "Unexpected end of input";
259207753Smm				break;
260207753Smm
261207753Smm			default:
262207753Smm				msg = "Internal error (bug)";
263207753Smm				break;
264207753Smm			}
265207753Smm
266207753Smm			my_errorf("%s: %s", filename, msg);
267207753Smm			exit(EXIT_FAILURE);
268207753Smm		}
269207753Smm	}
270207753Smm}
271207753Smm
272207753Smm
273207753Smmint
274207753Smmmain(int argc, char **argv)
275207753Smm{
276207753Smm	// Initialize progname which we will be used in error messages.
277207753Smm	tuklib_progname_init(argv);
278207753Smm
279207753Smm	// Parse the command line options.
280207753Smm	parse_options(argc, argv);
281207753Smm
282207753Smm	// The same lzma_stream is used for all files that we decode. This way
283207753Smm	// we don't need to reallocate memory for every file if they use same
284207753Smm	// compression settings.
285207753Smm	lzma_stream strm = LZMA_STREAM_INIT;
286207753Smm
287207753Smm	// Some systems require setting stdin and stdout to binary mode.
288207753Smm#ifdef TUKLIB_DOSLIKE
289207753Smm	setmode(fileno(stdin), O_BINARY);
290207753Smm	setmode(fileno(stdout), O_BINARY);
291207753Smm#endif
292207753Smm
293207753Smm	if (optind == argc) {
294207753Smm		// No filenames given, decode from stdin.
295207753Smm		uncompress(&strm, stdin, "(stdin)");
296207753Smm	} else {
297207753Smm		// Loop through the filenames given on the command line.
298207753Smm		do {
299207753Smm			// "-" indicates stdin.
300207753Smm			if (strcmp(argv[optind], "-") == 0) {
301207753Smm				uncompress(&strm, stdin, "(stdin)");
302207753Smm			} else {
303207753Smm				FILE *file = fopen(argv[optind], "rb");
304207753Smm				if (file == NULL) {
305207753Smm					my_errorf("%s: %s", argv[optind],
306207753Smm							strerror(errno));
307207753Smm					exit(EXIT_FAILURE);
308207753Smm				}
309207753Smm
310207753Smm				uncompress(&strm, file, argv[optind]);
311207753Smm				fclose(file);
312207753Smm			}
313207753Smm		} while (++optind < argc);
314207753Smm	}
315207753Smm
316207753Smm#ifndef NDEBUG
317207753Smm	// Free the memory only when debugging. Freeing wastes some time,
318207753Smm	// but allows detecting possible memory leaks with Valgrind.
319207753Smm	lzma_end(&strm);
320207753Smm#endif
321207753Smm
322207753Smm	tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
323207753Smm}
324