list.c revision 334607
1///////////////////////////////////////////////////////////////////////////////
2//
3/// \file       list.c
4/// \brief      Listing information about .xz files
5//
6//  Author:     Lasse Collin
7//
8//  This file has been put into the public domain.
9//  You can do whatever you want with this file.
10//
11///////////////////////////////////////////////////////////////////////////////
12
13#include "private.h"
14#include "tuklib_integer.h"
15
16
17/// Information about a .xz file
18typedef struct {
19	/// Combined Index of all Streams in the file
20	lzma_index *idx;
21
22	/// Total amount of Stream Padding
23	uint64_t stream_padding;
24
25	/// Highest memory usage so far
26	uint64_t memusage_max;
27
28	/// True if all Blocks so far have Compressed Size and
29	/// Uncompressed Size fields
30	bool all_have_sizes;
31
32	/// Oldest XZ Utils version that will decompress the file
33	uint32_t min_version;
34
35} xz_file_info;
36
37#define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 }
38
39
40/// Information about a .xz Block
41typedef struct {
42	/// Size of the Block Header
43	uint32_t header_size;
44
45	/// A few of the Block Flags as a string
46	char flags[3];
47
48	/// Size of the Compressed Data field in the Block
49	lzma_vli compressed_size;
50
51	/// Decoder memory usage for this Block
52	uint64_t memusage;
53
54	/// The filter chain of this Block in human-readable form
55	char filter_chain[FILTERS_STR_SIZE];
56
57} block_header_info;
58
59
60/// Check ID to string mapping
61static const char check_names[LZMA_CHECK_ID_MAX + 1][12] = {
62	// TRANSLATORS: Indicates that there is no integrity check.
63	// This string is used in tables, so the width must not
64	// exceed ten columns with a fixed-width font.
65	N_("None"),
66	"CRC32",
67	// TRANSLATORS: Indicates that integrity check name is not known,
68	// but the Check ID is known (here 2). This and other "Unknown-N"
69	// strings are used in tables, so the width must not exceed ten
70	// columns with a fixed-width font. It's OK to omit the dash if
71	// you need space for one extra letter, but don't use spaces.
72	N_("Unknown-2"),
73	N_("Unknown-3"),
74	"CRC64",
75	N_("Unknown-5"),
76	N_("Unknown-6"),
77	N_("Unknown-7"),
78	N_("Unknown-8"),
79	N_("Unknown-9"),
80	"SHA-256",
81	N_("Unknown-11"),
82	N_("Unknown-12"),
83	N_("Unknown-13"),
84	N_("Unknown-14"),
85	N_("Unknown-15"),
86};
87
88/// Buffer size for get_check_names(). This may be a bit ridiculous,
89/// but at least it's enough if some language needs many multibyte chars.
90#define CHECKS_STR_SIZE 1024
91
92
93/// Value of the Check field as hexadecimal string.
94/// This is set by parse_check_value().
95static char check_value[2 * LZMA_CHECK_SIZE_MAX + 1];
96
97
98/// Totals that are displayed if there was more than one file.
99/// The "files" counter is also used in print_info_adv() to show
100/// the file number.
101static struct {
102	uint64_t files;
103	uint64_t streams;
104	uint64_t blocks;
105	uint64_t compressed_size;
106	uint64_t uncompressed_size;
107	uint64_t stream_padding;
108	uint64_t memusage_max;
109	uint32_t checks;
110	uint32_t min_version;
111	bool all_have_sizes;
112} totals = { 0, 0, 0, 0, 0, 0, 0, 0, 50000002, true };
113
114
115/// Convert XZ Utils version number to a string.
116static const char *
117xz_ver_to_str(uint32_t ver)
118{
119	static char buf[32];
120
121	unsigned int major = ver / 10000000U;
122	ver -= major * 10000000U;
123
124	unsigned int minor = ver / 10000U;
125	ver -= minor * 10000U;
126
127	unsigned int patch = ver / 10U;
128	ver -= patch * 10U;
129
130	const char *stability = ver == 0 ? "alpha" : ver == 1 ? "beta" : "";
131
132	snprintf(buf, sizeof(buf), "%u.%u.%u%s",
133			major, minor, patch, stability);
134	return buf;
135}
136
137
138/// \brief      Parse the Index(es) from the given .xz file
139///
140/// \param      xfi     Pointer to structure where the decoded information
141///                     is stored.
142/// \param      pair    Input file
143///
144/// \return     On success, false is returned. On error, true is returned.
145///
146// TODO: This function is pretty big. liblzma should have a function that
147// takes a callback function to parse the Index(es) from a .xz file to make
148// it easy for applications.
149static bool
150parse_indexes(xz_file_info *xfi, file_pair *pair)
151{
152	if (pair->src_st.st_size <= 0) {
153		message_error(_("%s: File is empty"), pair->src_name);
154		return true;
155	}
156
157	if (pair->src_st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
158		message_error(_("%s: Too small to be a valid .xz file"),
159				pair->src_name);
160		return true;
161	}
162
163	io_buf buf;
164	lzma_stream_flags header_flags;
165	lzma_stream_flags footer_flags;
166	lzma_ret ret;
167
168	// lzma_stream for the Index decoder
169	lzma_stream strm = LZMA_STREAM_INIT;
170
171	// All Indexes decoded so far
172	lzma_index *combined_index = NULL;
173
174	// The Index currently being decoded
175	lzma_index *this_index = NULL;
176
177	// Current position in the file. We parse the file backwards so
178	// initialize it to point to the end of the file.
179	off_t pos = pair->src_st.st_size;
180
181	// Each loop iteration decodes one Index.
182	do {
183		// Check that there is enough data left to contain at least
184		// the Stream Header and Stream Footer. This check cannot
185		// fail in the first pass of this loop.
186		if (pos < 2 * LZMA_STREAM_HEADER_SIZE) {
187			message_error("%s: %s", pair->src_name,
188					message_strm(LZMA_DATA_ERROR));
189			goto error;
190		}
191
192		pos -= LZMA_STREAM_HEADER_SIZE;
193		lzma_vli stream_padding = 0;
194
195		// Locate the Stream Footer. There may be Stream Padding which
196		// we must skip when reading backwards.
197		while (true) {
198			if (pos < LZMA_STREAM_HEADER_SIZE) {
199				message_error("%s: %s", pair->src_name,
200						message_strm(
201							LZMA_DATA_ERROR));
202				goto error;
203			}
204
205			if (io_pread(pair, &buf,
206					LZMA_STREAM_HEADER_SIZE, pos))
207				goto error;
208
209			// Stream Padding is always a multiple of four bytes.
210			int i = 2;
211			if (buf.u32[i] != 0)
212				break;
213
214			// To avoid calling io_pread() for every four bytes
215			// of Stream Padding, take advantage that we read
216			// 12 bytes (LZMA_STREAM_HEADER_SIZE) already and
217			// check them too before calling io_pread() again.
218			do {
219				stream_padding += 4;
220				pos -= 4;
221				--i;
222			} while (i >= 0 && buf.u32[i] == 0);
223		}
224
225		// Decode the Stream Footer.
226		ret = lzma_stream_footer_decode(&footer_flags, buf.u8);
227		if (ret != LZMA_OK) {
228			message_error("%s: %s", pair->src_name,
229					message_strm(ret));
230			goto error;
231		}
232
233		// Check that the Stream Footer doesn't specify something
234		// that we don't support. This can only happen if the xz
235		// version is older than liblzma and liblzma supports
236		// something new.
237		//
238		// It is enough to check Stream Footer. Stream Header must
239		// match when it is compared against Stream Footer with
240		// lzma_stream_flags_compare().
241		if (footer_flags.version != 0) {
242			message_error("%s: %s", pair->src_name,
243					message_strm(LZMA_OPTIONS_ERROR));
244			goto error;
245		}
246
247		// Check that the size of the Index field looks sane.
248		lzma_vli index_size = footer_flags.backward_size;
249		if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) {
250			message_error("%s: %s", pair->src_name,
251					message_strm(LZMA_DATA_ERROR));
252			goto error;
253		}
254
255		// Set pos to the beginning of the Index.
256		pos -= index_size;
257
258		// See how much memory we can use for decoding this Index.
259		uint64_t memlimit = hardware_memlimit_get(MODE_LIST);
260		uint64_t memused = 0;
261		if (combined_index != NULL) {
262			memused = lzma_index_memused(combined_index);
263			if (memused > memlimit)
264				message_bug();
265
266			memlimit -= memused;
267		}
268
269		// Decode the Index.
270		ret = lzma_index_decoder(&strm, &this_index, memlimit);
271		if (ret != LZMA_OK) {
272			message_error("%s: %s", pair->src_name,
273					message_strm(ret));
274			goto error;
275		}
276
277		do {
278			// Don't give the decoder more input than the
279			// Index size.
280			strm.avail_in = my_min(IO_BUFFER_SIZE, index_size);
281			if (io_pread(pair, &buf, strm.avail_in, pos))
282				goto error;
283
284			pos += strm.avail_in;
285			index_size -= strm.avail_in;
286
287			strm.next_in = buf.u8;
288			ret = lzma_code(&strm, LZMA_RUN);
289
290		} while (ret == LZMA_OK);
291
292		// If the decoding seems to be successful, check also that
293		// the Index decoder consumed as much input as indicated
294		// by the Backward Size field.
295		if (ret == LZMA_STREAM_END)
296			if (index_size != 0 || strm.avail_in != 0)
297				ret = LZMA_DATA_ERROR;
298
299		if (ret != LZMA_STREAM_END) {
300			// LZMA_BUFFER_ERROR means that the Index decoder
301			// would have liked more input than what the Index
302			// size should be according to Stream Footer.
303			// The message for LZMA_DATA_ERROR makes more
304			// sense in that case.
305			if (ret == LZMA_BUF_ERROR)
306				ret = LZMA_DATA_ERROR;
307
308			message_error("%s: %s", pair->src_name,
309					message_strm(ret));
310
311			// If the error was too low memory usage limit,
312			// show also how much memory would have been needed.
313			if (ret == LZMA_MEMLIMIT_ERROR) {
314				uint64_t needed = lzma_memusage(&strm);
315				if (UINT64_MAX - needed < memused)
316					needed = UINT64_MAX;
317				else
318					needed += memused;
319
320				message_mem_needed(V_ERROR, needed);
321			}
322
323			goto error;
324		}
325
326		// Decode the Stream Header and check that its Stream Flags
327		// match the Stream Footer.
328		pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE;
329		if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) {
330			message_error("%s: %s", pair->src_name,
331					message_strm(LZMA_DATA_ERROR));
332			goto error;
333		}
334
335		pos -= lzma_index_total_size(this_index);
336		if (io_pread(pair, &buf, LZMA_STREAM_HEADER_SIZE, pos))
337			goto error;
338
339		ret = lzma_stream_header_decode(&header_flags, buf.u8);
340		if (ret != LZMA_OK) {
341			message_error("%s: %s", pair->src_name,
342					message_strm(ret));
343			goto error;
344		}
345
346		ret = lzma_stream_flags_compare(&header_flags, &footer_flags);
347		if (ret != LZMA_OK) {
348			message_error("%s: %s", pair->src_name,
349					message_strm(ret));
350			goto error;
351		}
352
353		// Store the decoded Stream Flags into this_index. This is
354		// needed so that we can print which Check is used in each
355		// Stream.
356		ret = lzma_index_stream_flags(this_index, &footer_flags);
357		if (ret != LZMA_OK)
358			message_bug();
359
360		// Store also the size of the Stream Padding field. It is
361		// needed to show the offsets of the Streams correctly.
362		ret = lzma_index_stream_padding(this_index, stream_padding);
363		if (ret != LZMA_OK)
364			message_bug();
365
366		if (combined_index != NULL) {
367			// Append the earlier decoded Indexes
368			// after this_index.
369			ret = lzma_index_cat(
370					this_index, combined_index, NULL);
371			if (ret != LZMA_OK) {
372				message_error("%s: %s", pair->src_name,
373						message_strm(ret));
374				goto error;
375			}
376		}
377
378		combined_index = this_index;
379		this_index = NULL;
380
381		xfi->stream_padding += stream_padding;
382
383	} while (pos > 0);
384
385	lzma_end(&strm);
386
387	// All OK. Make combined_index available to the caller.
388	xfi->idx = combined_index;
389	return false;
390
391error:
392	// Something went wrong, free the allocated memory.
393	lzma_end(&strm);
394	lzma_index_end(combined_index, NULL);
395	lzma_index_end(this_index, NULL);
396	return true;
397}
398
399
400/// \brief      Parse the Block Header
401///
402/// The result is stored into *bhi. The caller takes care of initializing it.
403///
404/// \return     False on success, true on error.
405static bool
406parse_block_header(file_pair *pair, const lzma_index_iter *iter,
407		block_header_info *bhi, xz_file_info *xfi)
408{
409#if IO_BUFFER_SIZE < LZMA_BLOCK_HEADER_SIZE_MAX
410#	error IO_BUFFER_SIZE < LZMA_BLOCK_HEADER_SIZE_MAX
411#endif
412
413	// Get the whole Block Header with one read, but don't read past
414	// the end of the Block (or even its Check field).
415	const uint32_t size = my_min(iter->block.total_size
416				- lzma_check_size(iter->stream.flags->check),
417			LZMA_BLOCK_HEADER_SIZE_MAX);
418	io_buf buf;
419	if (io_pread(pair, &buf, size, iter->block.compressed_file_offset))
420		return true;
421
422	// Zero would mean Index Indicator and thus not a valid Block.
423	if (buf.u8[0] == 0)
424		goto data_error;
425
426	// Initialize the block structure and decode Block Header Size.
427	lzma_filter filters[LZMA_FILTERS_MAX + 1];
428	lzma_block block;
429	block.version = 0;
430	block.check = iter->stream.flags->check;
431	block.filters = filters;
432
433	block.header_size = lzma_block_header_size_decode(buf.u8[0]);
434	if (block.header_size > size)
435		goto data_error;
436
437	// Decode the Block Header.
438	switch (lzma_block_header_decode(&block, NULL, buf.u8)) {
439	case LZMA_OK:
440		break;
441
442	case LZMA_OPTIONS_ERROR:
443		message_error("%s: %s", pair->src_name,
444				message_strm(LZMA_OPTIONS_ERROR));
445		return true;
446
447	case LZMA_DATA_ERROR:
448		goto data_error;
449
450	default:
451		message_bug();
452	}
453
454	// Check the Block Flags. These must be done before calling
455	// lzma_block_compressed_size(), because it overwrites
456	// block.compressed_size.
457	bhi->flags[0] = block.compressed_size != LZMA_VLI_UNKNOWN
458			? 'c' : '-';
459	bhi->flags[1] = block.uncompressed_size != LZMA_VLI_UNKNOWN
460			? 'u' : '-';
461	bhi->flags[2] = '\0';
462
463	// Collect information if all Blocks have both Compressed Size
464	// and Uncompressed Size fields. They can be useful e.g. for
465	// multi-threaded decompression so it can be useful to know it.
466	xfi->all_have_sizes &= block.compressed_size != LZMA_VLI_UNKNOWN
467			&& block.uncompressed_size != LZMA_VLI_UNKNOWN;
468
469	// Validate or set block.compressed_size.
470	switch (lzma_block_compressed_size(&block,
471			iter->block.unpadded_size)) {
472	case LZMA_OK:
473		// Validate also block.uncompressed_size if it is present.
474		// If it isn't present, there's no need to set it since
475		// we aren't going to actually decompress the Block; if
476		// we were decompressing, then we should set it so that
477		// the Block decoder could validate the Uncompressed Size
478		// that was stored in the Index.
479		if (block.uncompressed_size == LZMA_VLI_UNKNOWN
480				|| block.uncompressed_size
481					== iter->block.uncompressed_size)
482			break;
483
484		// If the above fails, the file is corrupt so
485		// LZMA_DATA_ERROR is a good error code.
486
487	// Fall through
488
489	case LZMA_DATA_ERROR:
490		// Free the memory allocated by lzma_block_header_decode().
491		for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i)
492			free(filters[i].options);
493
494		goto data_error;
495
496	default:
497		message_bug();
498	}
499
500	// Copy the known sizes.
501	bhi->header_size = block.header_size;
502	bhi->compressed_size = block.compressed_size;
503
504	// Calculate the decoder memory usage and update the maximum
505	// memory usage of this Block.
506	bhi->memusage = lzma_raw_decoder_memusage(filters);
507	if (xfi->memusage_max < bhi->memusage)
508		xfi->memusage_max = bhi->memusage;
509
510	// Determine the minimum XZ Utils version that supports this Block.
511	//
512	// Currently the only thing that 5.0.0 doesn't support is empty
513	// LZMA2 Block. This decoder bug was fixed in 5.0.2.
514	{
515		size_t i = 0;
516		while (filters[i + 1].id != LZMA_VLI_UNKNOWN)
517			++i;
518
519		if (filters[i].id == LZMA_FILTER_LZMA2
520				&& iter->block.uncompressed_size == 0
521				&& xfi->min_version < 50000022U)
522			xfi->min_version = 50000022U;
523	}
524
525	// Convert the filter chain to human readable form.
526	message_filters_to_str(bhi->filter_chain, filters, false);
527
528	// Free the memory allocated by lzma_block_header_decode().
529	for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i)
530		free(filters[i].options);
531
532	return false;
533
534data_error:
535	// Show the error message.
536	message_error("%s: %s", pair->src_name,
537			message_strm(LZMA_DATA_ERROR));
538	return true;
539}
540
541
542/// \brief      Parse the Check field and put it into check_value[]
543///
544/// \return     False on success, true on error.
545static bool
546parse_check_value(file_pair *pair, const lzma_index_iter *iter)
547{
548	// Don't read anything from the file if there is no integrity Check.
549	if (iter->stream.flags->check == LZMA_CHECK_NONE) {
550		snprintf(check_value, sizeof(check_value), "---");
551		return false;
552	}
553
554	// Locate and read the Check field.
555	const uint32_t size = lzma_check_size(iter->stream.flags->check);
556	const off_t offset = iter->block.compressed_file_offset
557			+ iter->block.total_size - size;
558	io_buf buf;
559	if (io_pread(pair, &buf, size, offset))
560		return true;
561
562	// CRC32 and CRC64 are in little endian. Guess that all the future
563	// 32-bit and 64-bit Check values are little endian too. It shouldn't
564	// be a too big problem if this guess is wrong.
565	if (size == 4)
566		snprintf(check_value, sizeof(check_value),
567				"%08" PRIx32, conv32le(buf.u32[0]));
568	else if (size == 8)
569		snprintf(check_value, sizeof(check_value),
570				"%016" PRIx64, conv64le(buf.u64[0]));
571	else
572		for (size_t i = 0; i < size; ++i)
573			snprintf(check_value + i * 2, 3, "%02x", buf.u8[i]);
574
575	return false;
576}
577
578
579/// \brief      Parse detailed information about a Block
580///
581/// Since this requires seek(s), listing information about all Blocks can
582/// be slow.
583///
584/// \param      pair    Input file
585/// \param      iter    Location of the Block whose Check value should
586///                     be printed.
587/// \param      bhi     Pointer to structure where to store the information
588///                     about the Block Header field.
589///
590/// \return     False on success, true on error. If an error occurs,
591///             the error message is printed too so the caller doesn't
592///             need to worry about that.
593static bool
594parse_details(file_pair *pair, const lzma_index_iter *iter,
595		block_header_info *bhi, xz_file_info *xfi)
596{
597	if (parse_block_header(pair, iter, bhi, xfi))
598		return true;
599
600	if (parse_check_value(pair, iter))
601		return true;
602
603	return false;
604}
605
606
607/// \brief      Get the compression ratio
608///
609/// This has slightly different format than that is used in message.c.
610static const char *
611get_ratio(uint64_t compressed_size, uint64_t uncompressed_size)
612{
613	if (uncompressed_size == 0)
614		return "---";
615
616	const double ratio = (double)(compressed_size)
617			/ (double)(uncompressed_size);
618	if (ratio > 9.999)
619		return "---";
620
621	static char buf[16];
622	snprintf(buf, sizeof(buf), "%.3f", ratio);
623	return buf;
624}
625
626
627/// \brief      Get a comma-separated list of Check names
628///
629/// The check names are translated with gettext except when in robot mode.
630///
631/// \param      buf     Buffer to hold the resulting string
632/// \param      checks  Bit mask of Checks to print
633/// \param      space_after_comma
634///                     It's better to not use spaces in table-like listings,
635///                     but in more verbose formats a space after a comma
636///                     is good for readability.
637static void
638get_check_names(char buf[CHECKS_STR_SIZE],
639		uint32_t checks, bool space_after_comma)
640{
641	// If we get called when there are no Checks to print, set checks
642	// to 1 so that we print "None". This can happen in the robot mode
643	// when printing the totals line if there are no valid input files.
644	if (checks == 0)
645		checks = 1;
646
647	char *pos = buf;
648	size_t left = CHECKS_STR_SIZE;
649
650	const char *sep = space_after_comma ? ", " : ",";
651	bool comma = false;
652
653	for (size_t i = 0; i <= LZMA_CHECK_ID_MAX; ++i) {
654		if (checks & (UINT32_C(1) << i)) {
655			my_snprintf(&pos, &left, "%s%s",
656					comma ? sep : "",
657					opt_robot ? check_names[i]
658						: _(check_names[i]));
659			comma = true;
660		}
661	}
662
663	return;
664}
665
666
667static bool
668print_info_basic(const xz_file_info *xfi, file_pair *pair)
669{
670	static bool headings_displayed = false;
671	if (!headings_displayed) {
672		headings_displayed = true;
673		// TRANSLATORS: These are column headings. From Strms (Streams)
674		// to Ratio, the columns are right aligned. Check and Filename
675		// are left aligned. If you need longer words, it's OK to
676		// use two lines here. Test with "xz -l foo.xz".
677		puts(_("Strms  Blocks   Compressed Uncompressed  Ratio  "
678				"Check   Filename"));
679	}
680
681	char checks[CHECKS_STR_SIZE];
682	get_check_names(checks, lzma_index_checks(xfi->idx), false);
683
684	const char *cols[7] = {
685		uint64_to_str(lzma_index_stream_count(xfi->idx), 0),
686		uint64_to_str(lzma_index_block_count(xfi->idx), 1),
687		uint64_to_nicestr(lzma_index_file_size(xfi->idx),
688			NICESTR_B, NICESTR_TIB, false, 2),
689		uint64_to_nicestr(lzma_index_uncompressed_size(xfi->idx),
690			NICESTR_B, NICESTR_TIB, false, 3),
691		get_ratio(lzma_index_file_size(xfi->idx),
692			lzma_index_uncompressed_size(xfi->idx)),
693		checks,
694		pair->src_name,
695	};
696	printf("%*s %*s  %*s  %*s  %*s  %-*s %s\n",
697			tuklib_mbstr_fw(cols[0], 5), cols[0],
698			tuklib_mbstr_fw(cols[1], 7), cols[1],
699			tuklib_mbstr_fw(cols[2], 11), cols[2],
700			tuklib_mbstr_fw(cols[3], 11), cols[3],
701			tuklib_mbstr_fw(cols[4], 5), cols[4],
702			tuklib_mbstr_fw(cols[5], 7), cols[5],
703			cols[6]);
704
705	return false;
706}
707
708
709static void
710print_adv_helper(uint64_t stream_count, uint64_t block_count,
711		uint64_t compressed_size, uint64_t uncompressed_size,
712		uint32_t checks, uint64_t stream_padding)
713{
714	char checks_str[CHECKS_STR_SIZE];
715	get_check_names(checks_str, checks, true);
716
717	printf(_("  Streams:            %s\n"),
718			uint64_to_str(stream_count, 0));
719	printf(_("  Blocks:             %s\n"),
720			uint64_to_str(block_count, 0));
721	printf(_("  Compressed size:    %s\n"),
722			uint64_to_nicestr(compressed_size,
723				NICESTR_B, NICESTR_TIB, true, 0));
724	printf(_("  Uncompressed size:  %s\n"),
725			uint64_to_nicestr(uncompressed_size,
726				NICESTR_B, NICESTR_TIB, true, 0));
727	printf(_("  Ratio:              %s\n"),
728			get_ratio(compressed_size, uncompressed_size));
729	printf(_("  Check:              %s\n"), checks_str);
730	printf(_("  Stream padding:     %s\n"),
731			uint64_to_nicestr(stream_padding,
732				NICESTR_B, NICESTR_TIB, true, 0));
733	return;
734}
735
736
737static bool
738print_info_adv(xz_file_info *xfi, file_pair *pair)
739{
740	// Print the overall information.
741	print_adv_helper(lzma_index_stream_count(xfi->idx),
742			lzma_index_block_count(xfi->idx),
743			lzma_index_file_size(xfi->idx),
744			lzma_index_uncompressed_size(xfi->idx),
745			lzma_index_checks(xfi->idx),
746			xfi->stream_padding);
747
748	// Size of the biggest Check. This is used to calculate the width
749	// of the CheckVal field. The table would get insanely wide if
750	// we always reserved space for 64-byte Check (128 chars as hex).
751	uint32_t check_max = 0;
752
753	// Print information about the Streams.
754	//
755	// TRANSLATORS: The second line is column headings. All except
756	// Check are right aligned; Check is left aligned. Test with
757	// "xz -lv foo.xz".
758	puts(_("  Streams:\n    Stream    Blocks"
759			"      CompOffset    UncompOffset"
760			"        CompSize      UncompSize  Ratio"
761			"  Check      Padding"));
762
763	lzma_index_iter iter;
764	lzma_index_iter_init(&iter, xfi->idx);
765
766	while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) {
767		const char *cols1[4] = {
768			uint64_to_str(iter.stream.number, 0),
769			uint64_to_str(iter.stream.block_count, 1),
770			uint64_to_str(iter.stream.compressed_offset, 2),
771			uint64_to_str(iter.stream.uncompressed_offset, 3),
772		};
773		printf("    %*s %*s %*s %*s ",
774				tuklib_mbstr_fw(cols1[0], 6), cols1[0],
775				tuklib_mbstr_fw(cols1[1], 9), cols1[1],
776				tuklib_mbstr_fw(cols1[2], 15), cols1[2],
777				tuklib_mbstr_fw(cols1[3], 15), cols1[3]);
778
779		const char *cols2[5] = {
780			uint64_to_str(iter.stream.compressed_size, 0),
781			uint64_to_str(iter.stream.uncompressed_size, 1),
782			get_ratio(iter.stream.compressed_size,
783				iter.stream.uncompressed_size),
784			_(check_names[iter.stream.flags->check]),
785			uint64_to_str(iter.stream.padding, 2),
786		};
787		printf("%*s %*s  %*s  %-*s %*s\n",
788				tuklib_mbstr_fw(cols2[0], 15), cols2[0],
789				tuklib_mbstr_fw(cols2[1], 15), cols2[1],
790				tuklib_mbstr_fw(cols2[2], 5), cols2[2],
791				tuklib_mbstr_fw(cols2[3], 10), cols2[3],
792				tuklib_mbstr_fw(cols2[4], 7), cols2[4]);
793
794		// Update the maximum Check size.
795		if (lzma_check_size(iter.stream.flags->check) > check_max)
796			check_max = lzma_check_size(iter.stream.flags->check);
797	}
798
799	// Cache the verbosity level to a local variable.
800	const bool detailed = message_verbosity_get() >= V_DEBUG;
801
802	// Information collected from Block Headers
803	block_header_info bhi;
804
805	// Print information about the Blocks but only if there is
806	// at least one Block.
807	if (lzma_index_block_count(xfi->idx) > 0) {
808		// Calculate the width of the CheckVal field.
809		const int checkval_width = my_max(8, 2 * check_max);
810
811		// TRANSLATORS: The second line is column headings. All
812		// except Check are right aligned; Check is left aligned.
813		printf(_("  Blocks:\n    Stream     Block"
814			"      CompOffset    UncompOffset"
815			"       TotalSize      UncompSize  Ratio  Check"));
816
817		if (detailed) {
818			// TRANSLATORS: These are additional column headings
819			// for the most verbose listing mode. CheckVal
820			// (Check value), Flags, and Filters are left aligned.
821			// Header (Block Header Size), CompSize, and MemUsage
822			// are right aligned. %*s is replaced with 0-120
823			// spaces to make the CheckVal column wide enough.
824			// Test with "xz -lvv foo.xz".
825			printf(_("      CheckVal %*s Header  Flags        "
826					"CompSize    MemUsage  Filters"),
827					checkval_width - 8, "");
828		}
829
830		putchar('\n');
831
832		lzma_index_iter_init(&iter, xfi->idx);
833
834		// Iterate over the Blocks.
835		while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) {
836			if (detailed && parse_details(pair, &iter, &bhi, xfi))
837					return true;
838
839			const char *cols1[4] = {
840				uint64_to_str(iter.stream.number, 0),
841				uint64_to_str(
842					iter.block.number_in_stream, 1),
843				uint64_to_str(
844					iter.block.compressed_file_offset, 2),
845				uint64_to_str(
846					iter.block.uncompressed_file_offset, 3)
847			};
848			printf("    %*s %*s %*s %*s ",
849				tuklib_mbstr_fw(cols1[0], 6), cols1[0],
850				tuklib_mbstr_fw(cols1[1], 9), cols1[1],
851				tuklib_mbstr_fw(cols1[2], 15), cols1[2],
852				tuklib_mbstr_fw(cols1[3], 15), cols1[3]);
853
854			const char *cols2[4] = {
855				uint64_to_str(iter.block.total_size, 0),
856				uint64_to_str(iter.block.uncompressed_size,
857						1),
858				get_ratio(iter.block.total_size,
859					iter.block.uncompressed_size),
860				_(check_names[iter.stream.flags->check])
861			};
862			printf("%*s %*s  %*s  %-*s",
863				tuklib_mbstr_fw(cols2[0], 15), cols2[0],
864				tuklib_mbstr_fw(cols2[1], 15), cols2[1],
865				tuklib_mbstr_fw(cols2[2], 5), cols2[2],
866				tuklib_mbstr_fw(cols2[3], detailed ? 11 : 1),
867					cols2[3]);
868
869			if (detailed) {
870				const lzma_vli compressed_size
871						= iter.block.unpadded_size
872						- bhi.header_size
873						- lzma_check_size(
874						iter.stream.flags->check);
875
876				const char *cols3[6] = {
877					check_value,
878					uint64_to_str(bhi.header_size, 0),
879					bhi.flags,
880					uint64_to_str(compressed_size, 1),
881					uint64_to_str(
882						round_up_to_mib(bhi.memusage),
883						2),
884					bhi.filter_chain
885				};
886				// Show MiB for memory usage, because it
887				// is the only size which is not in bytes.
888				printf("%-*s  %*s  %-5s %*s %*s MiB  %s",
889					checkval_width, cols3[0],
890					tuklib_mbstr_fw(cols3[1], 6), cols3[1],
891					cols3[2],
892					tuklib_mbstr_fw(cols3[3], 15),
893						cols3[3],
894					tuklib_mbstr_fw(cols3[4], 7), cols3[4],
895					cols3[5]);
896			}
897
898			putchar('\n');
899		}
900	}
901
902	if (detailed) {
903		printf(_("  Memory needed:      %s MiB\n"), uint64_to_str(
904				round_up_to_mib(xfi->memusage_max), 0));
905		printf(_("  Sizes in headers:   %s\n"),
906				xfi->all_have_sizes ? _("Yes") : _("No"));
907		printf(_("  Minimum XZ Utils version: %s\n"),
908				xz_ver_to_str(xfi->min_version));
909	}
910
911	return false;
912}
913
914
915static bool
916print_info_robot(xz_file_info *xfi, file_pair *pair)
917{
918	char checks[CHECKS_STR_SIZE];
919	get_check_names(checks, lzma_index_checks(xfi->idx), false);
920
921	printf("name\t%s\n", pair->src_name);
922
923	printf("file\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
924			"\t%s\t%s\t%" PRIu64 "\n",
925			lzma_index_stream_count(xfi->idx),
926			lzma_index_block_count(xfi->idx),
927			lzma_index_file_size(xfi->idx),
928			lzma_index_uncompressed_size(xfi->idx),
929			get_ratio(lzma_index_file_size(xfi->idx),
930				lzma_index_uncompressed_size(xfi->idx)),
931			checks,
932			xfi->stream_padding);
933
934	if (message_verbosity_get() >= V_VERBOSE) {
935		lzma_index_iter iter;
936		lzma_index_iter_init(&iter, xfi->idx);
937
938		while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM))
939			printf("stream\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
940				"\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
941				"\t%s\t%s\t%" PRIu64 "\n",
942				iter.stream.number,
943				iter.stream.block_count,
944				iter.stream.compressed_offset,
945				iter.stream.uncompressed_offset,
946				iter.stream.compressed_size,
947				iter.stream.uncompressed_size,
948				get_ratio(iter.stream.compressed_size,
949					iter.stream.uncompressed_size),
950				check_names[iter.stream.flags->check],
951				iter.stream.padding);
952
953		lzma_index_iter_rewind(&iter);
954		block_header_info bhi;
955
956		while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) {
957			if (message_verbosity_get() >= V_DEBUG
958					&& parse_details(
959						pair, &iter, &bhi, xfi))
960				return true;
961
962			printf("block\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
963					"\t%" PRIu64 "\t%" PRIu64
964					"\t%" PRIu64 "\t%" PRIu64 "\t%s\t%s",
965					iter.stream.number,
966					iter.block.number_in_stream,
967					iter.block.number_in_file,
968					iter.block.compressed_file_offset,
969					iter.block.uncompressed_file_offset,
970					iter.block.total_size,
971					iter.block.uncompressed_size,
972					get_ratio(iter.block.total_size,
973						iter.block.uncompressed_size),
974					check_names[iter.stream.flags->check]);
975
976			if (message_verbosity_get() >= V_DEBUG)
977				printf("\t%s\t%" PRIu32 "\t%s\t%" PRIu64
978						"\t%" PRIu64 "\t%s",
979						check_value,
980						bhi.header_size,
981						bhi.flags,
982						bhi.compressed_size,
983						bhi.memusage,
984						bhi.filter_chain);
985
986			putchar('\n');
987		}
988	}
989
990	if (message_verbosity_get() >= V_DEBUG)
991		printf("summary\t%" PRIu64 "\t%s\t%" PRIu32 "\n",
992				xfi->memusage_max,
993				xfi->all_have_sizes ? "yes" : "no",
994				xfi->min_version);
995
996	return false;
997}
998
999
1000static void
1001update_totals(const xz_file_info *xfi)
1002{
1003	// TODO: Integer overflow checks
1004	++totals.files;
1005	totals.streams += lzma_index_stream_count(xfi->idx);
1006	totals.blocks += lzma_index_block_count(xfi->idx);
1007	totals.compressed_size += lzma_index_file_size(xfi->idx);
1008	totals.uncompressed_size += lzma_index_uncompressed_size(xfi->idx);
1009	totals.stream_padding += xfi->stream_padding;
1010	totals.checks |= lzma_index_checks(xfi->idx);
1011
1012	if (totals.memusage_max < xfi->memusage_max)
1013		totals.memusage_max = xfi->memusage_max;
1014
1015	if (totals.min_version < xfi->min_version)
1016		totals.min_version = xfi->min_version;
1017
1018	totals.all_have_sizes &= xfi->all_have_sizes;
1019
1020	return;
1021}
1022
1023
1024static void
1025print_totals_basic(void)
1026{
1027	// Print a separator line.
1028	char line[80];
1029	memset(line, '-', sizeof(line));
1030	line[sizeof(line) - 1] = '\0';
1031	puts(line);
1032
1033	// Get the check names.
1034	char checks[CHECKS_STR_SIZE];
1035	get_check_names(checks, totals.checks, false);
1036
1037	// Print the totals except the file count, which needs
1038	// special handling.
1039	printf("%5s %7s  %11s  %11s  %5s  %-7s ",
1040			uint64_to_str(totals.streams, 0),
1041			uint64_to_str(totals.blocks, 1),
1042			uint64_to_nicestr(totals.compressed_size,
1043				NICESTR_B, NICESTR_TIB, false, 2),
1044			uint64_to_nicestr(totals.uncompressed_size,
1045				NICESTR_B, NICESTR_TIB, false, 3),
1046			get_ratio(totals.compressed_size,
1047				totals.uncompressed_size),
1048			checks);
1049
1050	// Since we print totals only when there are at least two files,
1051	// the English message will always use "%s files". But some other
1052	// languages need different forms for different plurals so we
1053	// have to translate this with ngettext().
1054	//
1055	// TRANSLATORS: %s is an integer. Only the plural form of this
1056	// message is used (e.g. "2 files"). Test with "xz -l foo.xz bar.xz".
1057	printf(ngettext("%s file\n", "%s files\n",
1058			totals.files <= ULONG_MAX ? totals.files
1059				: (totals.files % 1000000) + 1000000),
1060			uint64_to_str(totals.files, 0));
1061
1062	return;
1063}
1064
1065
1066static void
1067print_totals_adv(void)
1068{
1069	putchar('\n');
1070	puts(_("Totals:"));
1071	printf(_("  Number of files:    %s\n"),
1072			uint64_to_str(totals.files, 0));
1073	print_adv_helper(totals.streams, totals.blocks,
1074			totals.compressed_size, totals.uncompressed_size,
1075			totals.checks, totals.stream_padding);
1076
1077	if (message_verbosity_get() >= V_DEBUG) {
1078		printf(_("  Memory needed:      %s MiB\n"), uint64_to_str(
1079				round_up_to_mib(totals.memusage_max), 0));
1080		printf(_("  Sizes in headers:   %s\n"),
1081				totals.all_have_sizes ? _("Yes") : _("No"));
1082		printf(_("  Minimum XZ Utils version: %s\n"),
1083				xz_ver_to_str(totals.min_version));
1084	}
1085
1086	return;
1087}
1088
1089
1090static void
1091print_totals_robot(void)
1092{
1093	char checks[CHECKS_STR_SIZE];
1094	get_check_names(checks, totals.checks, false);
1095
1096	printf("totals\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
1097			"\t%s\t%s\t%" PRIu64 "\t%" PRIu64,
1098			totals.streams,
1099			totals.blocks,
1100			totals.compressed_size,
1101			totals.uncompressed_size,
1102			get_ratio(totals.compressed_size,
1103				totals.uncompressed_size),
1104			checks,
1105			totals.stream_padding,
1106			totals.files);
1107
1108	if (message_verbosity_get() >= V_DEBUG)
1109		printf("\t%" PRIu64 "\t%s\t%" PRIu32,
1110				totals.memusage_max,
1111				totals.all_have_sizes ? "yes" : "no",
1112				totals.min_version);
1113
1114	putchar('\n');
1115
1116	return;
1117}
1118
1119
1120extern void
1121list_totals(void)
1122{
1123	if (opt_robot) {
1124		// Always print totals in --robot mode. It can be convenient
1125		// in some cases and doesn't complicate usage of the
1126		// single-file case much.
1127		print_totals_robot();
1128
1129	} else if (totals.files > 1) {
1130		// For non-robot mode, totals are printed only if there
1131		// is more than one file.
1132		if (message_verbosity_get() <= V_WARNING)
1133			print_totals_basic();
1134		else
1135			print_totals_adv();
1136	}
1137
1138	return;
1139}
1140
1141
1142extern void
1143list_file(const char *filename)
1144{
1145	if (opt_format != FORMAT_XZ && opt_format != FORMAT_AUTO)
1146		message_fatal(_("--list works only on .xz files "
1147				"(--format=xz or --format=auto)"));
1148
1149	message_filename(filename);
1150
1151	if (filename == stdin_filename) {
1152		message_error(_("--list does not support reading from "
1153				"standard input"));
1154		return;
1155	}
1156
1157	// Unset opt_stdout so that io_open_src() won't accept special files.
1158	// Set opt_force so that io_open_src() will follow symlinks.
1159	opt_stdout = false;
1160	opt_force = true;
1161	file_pair *pair = io_open_src(filename);
1162	if (pair == NULL)
1163		return;
1164
1165	xz_file_info xfi = XZ_FILE_INFO_INIT;
1166	if (!parse_indexes(&xfi, pair)) {
1167		bool fail;
1168
1169		// We have three main modes:
1170		//  - --robot, which has submodes if --verbose is specified
1171		//    once or twice
1172		//  - Normal --list without --verbose
1173		//  - --list with one or two --verbose
1174		if (opt_robot)
1175			fail = print_info_robot(&xfi, pair);
1176		else if (message_verbosity_get() <= V_WARNING)
1177			fail = print_info_basic(&xfi, pair);
1178		else
1179			fail = print_info_adv(&xfi, pair);
1180
1181		// Update the totals that are displayed after all
1182		// the individual files have been listed. Don't count
1183		// broken files.
1184		if (!fail)
1185			update_totals(&xfi);
1186
1187		lzma_index_end(xfi.idx, NULL);
1188	}
1189
1190	io_close(pair, false);
1191	return;
1192}
1193