stream_decoder.c revision 292588
1///////////////////////////////////////////////////////////////////////////////
2//
3/// \file       stream_decoder.c
4/// \brief      Decodes .xz Streams
5//
6//  Author:     Lasse Collin
7//
8//  This file has been put into the public domain.
9//  You can do whatever you want with this file.
10//
11///////////////////////////////////////////////////////////////////////////////
12
13#include "stream_decoder.h"
14#include "block_decoder.h"
15
16
17struct lzma_coder_s {
18	enum {
19		SEQ_STREAM_HEADER,
20		SEQ_BLOCK_HEADER,
21		SEQ_BLOCK,
22		SEQ_INDEX,
23		SEQ_STREAM_FOOTER,
24		SEQ_STREAM_PADDING,
25	} sequence;
26
27	/// Block or Metadata decoder. This takes little memory and the same
28	/// data structure can be used to decode every Block Header, so it's
29	/// a good idea to have a separate lzma_next_coder structure for it.
30	lzma_next_coder block_decoder;
31
32	/// Block options decoded by the Block Header decoder and used by
33	/// the Block decoder.
34	lzma_block block_options;
35
36	/// Stream Flags from Stream Header
37	lzma_stream_flags stream_flags;
38
39	/// Index is hashed so that it can be compared to the sizes of Blocks
40	/// with O(1) memory usage.
41	lzma_index_hash *index_hash;
42
43	/// Memory usage limit
44	uint64_t memlimit;
45
46	/// Amount of memory actually needed (only an estimate)
47	uint64_t memusage;
48
49	/// If true, LZMA_NO_CHECK is returned if the Stream has
50	/// no integrity check.
51	bool tell_no_check;
52
53	/// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has
54	/// an integrity check that isn't supported by this liblzma build.
55	bool tell_unsupported_check;
56
57	/// If true, LZMA_GET_CHECK is returned after decoding Stream Header.
58	bool tell_any_check;
59
60	/// If true, we will tell the Block decoder to skip calculating
61	/// and verifying the integrity check.
62	bool ignore_check;
63
64	/// If true, we will decode concatenated Streams that possibly have
65	/// Stream Padding between or after them. LZMA_STREAM_END is returned
66	/// once the application isn't giving us any new input, and we aren't
67	/// in the middle of a Stream, and possible Stream Padding is a
68	/// multiple of four bytes.
69	bool concatenated;
70
71	/// When decoding concatenated Streams, this is true as long as we
72	/// are decoding the first Stream. This is needed to avoid misleading
73	/// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic
74	/// bytes.
75	bool first_stream;
76
77	/// Write position in buffer[] and position in Stream Padding
78	size_t pos;
79
80	/// Buffer to hold Stream Header, Block Header, and Stream Footer.
81	/// Block Header has biggest maximum size.
82	uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX];
83};
84
85
86static lzma_ret
87stream_decoder_reset(lzma_coder *coder, const lzma_allocator *allocator)
88{
89	// Initialize the Index hash used to verify the Index.
90	coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator);
91	if (coder->index_hash == NULL)
92		return LZMA_MEM_ERROR;
93
94	// Reset the rest of the variables.
95	coder->sequence = SEQ_STREAM_HEADER;
96	coder->pos = 0;
97
98	return LZMA_OK;
99}
100
101
102static lzma_ret
103stream_decode(lzma_coder *coder, const lzma_allocator *allocator,
104		const uint8_t *restrict in, size_t *restrict in_pos,
105		size_t in_size, uint8_t *restrict out,
106		size_t *restrict out_pos, size_t out_size, lzma_action action)
107{
108	// When decoding the actual Block, it may be able to produce more
109	// output even if we don't give it any new input.
110	while (true)
111	switch (coder->sequence) {
112	case SEQ_STREAM_HEADER: {
113		// Copy the Stream Header to the internal buffer.
114		lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
115				LZMA_STREAM_HEADER_SIZE);
116
117		// Return if we didn't get the whole Stream Header yet.
118		if (coder->pos < LZMA_STREAM_HEADER_SIZE)
119			return LZMA_OK;
120
121		coder->pos = 0;
122
123		// Decode the Stream Header.
124		const lzma_ret ret = lzma_stream_header_decode(
125				&coder->stream_flags, coder->buffer);
126		if (ret != LZMA_OK)
127			return ret == LZMA_FORMAT_ERROR && !coder->first_stream
128					? LZMA_DATA_ERROR : ret;
129
130		// If we are decoding concatenated Streams, and the later
131		// Streams have invalid Header Magic Bytes, we give
132		// LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR.
133		coder->first_stream = false;
134
135		// Copy the type of the Check so that Block Header and Block
136		// decoders see it.
137		coder->block_options.check = coder->stream_flags.check;
138
139		// Even if we return LZMA_*_CHECK below, we want
140		// to continue from Block Header decoding.
141		coder->sequence = SEQ_BLOCK_HEADER;
142
143		// Detect if there's no integrity check or if it is
144		// unsupported if those were requested by the application.
145		if (coder->tell_no_check && coder->stream_flags.check
146				== LZMA_CHECK_NONE)
147			return LZMA_NO_CHECK;
148
149		if (coder->tell_unsupported_check
150				&& !lzma_check_is_supported(
151					coder->stream_flags.check))
152			return LZMA_UNSUPPORTED_CHECK;
153
154		if (coder->tell_any_check)
155			return LZMA_GET_CHECK;
156	}
157
158	// Fall through
159
160	case SEQ_BLOCK_HEADER: {
161		if (*in_pos >= in_size)
162			return LZMA_OK;
163
164		if (coder->pos == 0) {
165			// Detect if it's Index.
166			if (in[*in_pos] == 0x00) {
167				coder->sequence = SEQ_INDEX;
168				break;
169			}
170
171			// Calculate the size of the Block Header. Note that
172			// Block Header decoder wants to see this byte too
173			// so don't advance *in_pos.
174			coder->block_options.header_size
175					= lzma_block_header_size_decode(
176						in[*in_pos]);
177		}
178
179		// Copy the Block Header to the internal buffer.
180		lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
181				coder->block_options.header_size);
182
183		// Return if we didn't get the whole Block Header yet.
184		if (coder->pos < coder->block_options.header_size)
185			return LZMA_OK;
186
187		coder->pos = 0;
188
189		// Version 1 is needed to support the .ignore_check option.
190		coder->block_options.version = 1;
191
192		// Set up a buffer to hold the filter chain. Block Header
193		// decoder will initialize all members of this array so
194		// we don't need to do it here.
195		lzma_filter filters[LZMA_FILTERS_MAX + 1];
196		coder->block_options.filters = filters;
197
198		// Decode the Block Header.
199		return_if_error(lzma_block_header_decode(&coder->block_options,
200				allocator, coder->buffer));
201
202		// If LZMA_IGNORE_CHECK was used, this flag needs to be set.
203		// It has to be set after lzma_block_header_decode() because
204		// it always resets this to false.
205		coder->block_options.ignore_check = coder->ignore_check;
206
207		// Check the memory usage limit.
208		const uint64_t memusage = lzma_raw_decoder_memusage(filters);
209		lzma_ret ret;
210
211		if (memusage == UINT64_MAX) {
212			// One or more unknown Filter IDs.
213			ret = LZMA_OPTIONS_ERROR;
214		} else {
215			// Now we can set coder->memusage since we know that
216			// the filter chain is valid. We don't want
217			// lzma_memusage() to return UINT64_MAX in case of
218			// invalid filter chain.
219			coder->memusage = memusage;
220
221			if (memusage > coder->memlimit) {
222				// The chain would need too much memory.
223				ret = LZMA_MEMLIMIT_ERROR;
224			} else {
225				// Memory usage is OK.
226				// Initialize the Block decoder.
227				ret = lzma_block_decoder_init(
228						&coder->block_decoder,
229						allocator,
230						&coder->block_options);
231			}
232		}
233
234		// Free the allocated filter options since they are needed
235		// only to initialize the Block decoder.
236		for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i)
237			lzma_free(filters[i].options, allocator);
238
239		coder->block_options.filters = NULL;
240
241		// Check if memory usage calculation and Block enocoder
242		// initialization succeeded.
243		if (ret != LZMA_OK)
244			return ret;
245
246		coder->sequence = SEQ_BLOCK;
247	}
248
249	// Fall through
250
251	case SEQ_BLOCK: {
252		const lzma_ret ret = coder->block_decoder.code(
253				coder->block_decoder.coder, allocator,
254				in, in_pos, in_size, out, out_pos, out_size,
255				action);
256
257		if (ret != LZMA_STREAM_END)
258			return ret;
259
260		// Block decoded successfully. Add the new size pair to
261		// the Index hash.
262		return_if_error(lzma_index_hash_append(coder->index_hash,
263				lzma_block_unpadded_size(
264					&coder->block_options),
265				coder->block_options.uncompressed_size));
266
267		coder->sequence = SEQ_BLOCK_HEADER;
268		break;
269	}
270
271	case SEQ_INDEX: {
272		// If we don't have any input, don't call
273		// lzma_index_hash_decode() since it would return
274		// LZMA_BUF_ERROR, which we must not do here.
275		if (*in_pos >= in_size)
276			return LZMA_OK;
277
278		// Decode the Index and compare it to the hash calculated
279		// from the sizes of the Blocks (if any).
280		const lzma_ret ret = lzma_index_hash_decode(coder->index_hash,
281				in, in_pos, in_size);
282		if (ret != LZMA_STREAM_END)
283			return ret;
284
285		coder->sequence = SEQ_STREAM_FOOTER;
286	}
287
288	// Fall through
289
290	case SEQ_STREAM_FOOTER: {
291		// Copy the Stream Footer to the internal buffer.
292		lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
293				LZMA_STREAM_HEADER_SIZE);
294
295		// Return if we didn't get the whole Stream Footer yet.
296		if (coder->pos < LZMA_STREAM_HEADER_SIZE)
297			return LZMA_OK;
298
299		coder->pos = 0;
300
301		// Decode the Stream Footer. The decoder gives
302		// LZMA_FORMAT_ERROR if the magic bytes don't match,
303		// so convert that return code to LZMA_DATA_ERROR.
304		lzma_stream_flags footer_flags;
305		const lzma_ret ret = lzma_stream_footer_decode(
306				&footer_flags, coder->buffer);
307		if (ret != LZMA_OK)
308			return ret == LZMA_FORMAT_ERROR
309					? LZMA_DATA_ERROR : ret;
310
311		// Check that Index Size stored in the Stream Footer matches
312		// the real size of the Index field.
313		if (lzma_index_hash_size(coder->index_hash)
314				!= footer_flags.backward_size)
315			return LZMA_DATA_ERROR;
316
317		// Compare that the Stream Flags fields are identical in
318		// both Stream Header and Stream Footer.
319		return_if_error(lzma_stream_flags_compare(
320				&coder->stream_flags, &footer_flags));
321
322		if (!coder->concatenated)
323			return LZMA_STREAM_END;
324
325		coder->sequence = SEQ_STREAM_PADDING;
326	}
327
328	// Fall through
329
330	case SEQ_STREAM_PADDING:
331		assert(coder->concatenated);
332
333		// Skip over possible Stream Padding.
334		while (true) {
335			if (*in_pos >= in_size) {
336				// Unless LZMA_FINISH was used, we cannot
337				// know if there's more input coming later.
338				if (action != LZMA_FINISH)
339					return LZMA_OK;
340
341				// Stream Padding must be a multiple of
342				// four bytes.
343				return coder->pos == 0
344						? LZMA_STREAM_END
345						: LZMA_DATA_ERROR;
346			}
347
348			// If the byte is not zero, it probably indicates
349			// beginning of a new Stream (or the file is corrupt).
350			if (in[*in_pos] != 0x00)
351				break;
352
353			++*in_pos;
354			coder->pos = (coder->pos + 1) & 3;
355		}
356
357		// Stream Padding must be a multiple of four bytes (empty
358		// Stream Padding is OK).
359		if (coder->pos != 0) {
360			++*in_pos;
361			return LZMA_DATA_ERROR;
362		}
363
364		// Prepare to decode the next Stream.
365		return_if_error(stream_decoder_reset(coder, allocator));
366		break;
367
368	default:
369		assert(0);
370		return LZMA_PROG_ERROR;
371	}
372
373	// Never reached
374}
375
376
377static void
378stream_decoder_end(lzma_coder *coder, const lzma_allocator *allocator)
379{
380	lzma_next_end(&coder->block_decoder, allocator);
381	lzma_index_hash_end(coder->index_hash, allocator);
382	lzma_free(coder, allocator);
383	return;
384}
385
386
387static lzma_check
388stream_decoder_get_check(const lzma_coder *coder)
389{
390	return coder->stream_flags.check;
391}
392
393
394static lzma_ret
395stream_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
396		uint64_t *old_memlimit, uint64_t new_memlimit)
397{
398	*memusage = coder->memusage;
399	*old_memlimit = coder->memlimit;
400
401	if (new_memlimit != 0) {
402		if (new_memlimit < coder->memusage)
403			return LZMA_MEMLIMIT_ERROR;
404
405		coder->memlimit = new_memlimit;
406	}
407
408	return LZMA_OK;
409}
410
411
412extern lzma_ret
413lzma_stream_decoder_init(
414		lzma_next_coder *next, const lzma_allocator *allocator,
415		uint64_t memlimit, uint32_t flags)
416{
417	lzma_next_coder_init(&lzma_stream_decoder_init, next, allocator);
418
419	if (memlimit == 0)
420		return LZMA_PROG_ERROR;
421
422	if (flags & ~LZMA_SUPPORTED_FLAGS)
423		return LZMA_OPTIONS_ERROR;
424
425	if (next->coder == NULL) {
426		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
427		if (next->coder == NULL)
428			return LZMA_MEM_ERROR;
429
430		next->code = &stream_decode;
431		next->end = &stream_decoder_end;
432		next->get_check = &stream_decoder_get_check;
433		next->memconfig = &stream_decoder_memconfig;
434
435		next->coder->block_decoder = LZMA_NEXT_CODER_INIT;
436		next->coder->index_hash = NULL;
437	}
438
439	next->coder->memlimit = memlimit;
440	next->coder->memusage = LZMA_MEMUSAGE_BASE;
441	next->coder->tell_no_check = (flags & LZMA_TELL_NO_CHECK) != 0;
442	next->coder->tell_unsupported_check
443			= (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0;
444	next->coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0;
445	next->coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0;
446	next->coder->concatenated = (flags & LZMA_CONCATENATED) != 0;
447	next->coder->first_stream = true;
448
449	return stream_decoder_reset(next->coder, allocator);
450}
451
452
453extern LZMA_API(lzma_ret)
454lzma_stream_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags)
455{
456	lzma_next_strm_init(lzma_stream_decoder_init, strm, memlimit, flags);
457
458	strm->internal->supported_actions[LZMA_RUN] = true;
459	strm->internal->supported_actions[LZMA_FINISH] = true;
460
461	return LZMA_OK;
462}
463