1///////////////////////////////////////////////////////////////////////////////
2//
3/// \file       stream_decoder.c
4/// \brief      Decodes .xz Streams
5//
6//  Author:     Lasse Collin
7//
8//  This file has been put into the public domain.
9//  You can do whatever you want with this file.
10//
11///////////////////////////////////////////////////////////////////////////////
12
13#include "stream_decoder.h"
14#include "block_decoder.h"
15
16
17typedef struct {
18	enum {
19		SEQ_STREAM_HEADER,
20		SEQ_BLOCK_HEADER,
21		SEQ_BLOCK,
22		SEQ_INDEX,
23		SEQ_STREAM_FOOTER,
24		SEQ_STREAM_PADDING,
25	} sequence;
26
27	/// Block or Metadata decoder. This takes little memory and the same
28	/// data structure can be used to decode every Block Header, so it's
29	/// a good idea to have a separate lzma_next_coder structure for it.
30	lzma_next_coder block_decoder;
31
32	/// Block options decoded by the Block Header decoder and used by
33	/// the Block decoder.
34	lzma_block block_options;
35
36	/// Stream Flags from Stream Header
37	lzma_stream_flags stream_flags;
38
39	/// Index is hashed so that it can be compared to the sizes of Blocks
40	/// with O(1) memory usage.
41	lzma_index_hash *index_hash;
42
43	/// Memory usage limit
44	uint64_t memlimit;
45
46	/// Amount of memory actually needed (only an estimate)
47	uint64_t memusage;
48
49	/// If true, LZMA_NO_CHECK is returned if the Stream has
50	/// no integrity check.
51	bool tell_no_check;
52
53	/// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has
54	/// an integrity check that isn't supported by this liblzma build.
55	bool tell_unsupported_check;
56
57	/// If true, LZMA_GET_CHECK is returned after decoding Stream Header.
58	bool tell_any_check;
59
60	/// If true, we will tell the Block decoder to skip calculating
61	/// and verifying the integrity check.
62	bool ignore_check;
63
64	/// If true, we will decode concatenated Streams that possibly have
65	/// Stream Padding between or after them. LZMA_STREAM_END is returned
66	/// once the application isn't giving us any new input, and we aren't
67	/// in the middle of a Stream, and possible Stream Padding is a
68	/// multiple of four bytes.
69	bool concatenated;
70
71	/// When decoding concatenated Streams, this is true as long as we
72	/// are decoding the first Stream. This is needed to avoid misleading
73	/// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic
74	/// bytes.
75	bool first_stream;
76
77	/// Write position in buffer[] and position in Stream Padding
78	size_t pos;
79
80	/// Buffer to hold Stream Header, Block Header, and Stream Footer.
81	/// Block Header has biggest maximum size.
82	uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX];
83} lzma_stream_coder;
84
85
86static lzma_ret
87stream_decoder_reset(lzma_stream_coder *coder, const lzma_allocator *allocator)
88{
89	// Initialize the Index hash used to verify the Index.
90	coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator);
91	if (coder->index_hash == NULL)
92		return LZMA_MEM_ERROR;
93
94	// Reset the rest of the variables.
95	coder->sequence = SEQ_STREAM_HEADER;
96	coder->pos = 0;
97
98	return LZMA_OK;
99}
100
101
102static lzma_ret
103stream_decode(void *coder_ptr, const lzma_allocator *allocator,
104		const uint8_t *restrict in, size_t *restrict in_pos,
105		size_t in_size, uint8_t *restrict out,
106		size_t *restrict out_pos, size_t out_size, lzma_action action)
107{
108	lzma_stream_coder *coder = coder_ptr;
109
110	// When decoding the actual Block, it may be able to produce more
111	// output even if we don't give it any new input.
112	while (true)
113	switch (coder->sequence) {
114	case SEQ_STREAM_HEADER: {
115		// Copy the Stream Header to the internal buffer.
116		lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
117				LZMA_STREAM_HEADER_SIZE);
118
119		// Return if we didn't get the whole Stream Header yet.
120		if (coder->pos < LZMA_STREAM_HEADER_SIZE)
121			return LZMA_OK;
122
123		coder->pos = 0;
124
125		// Decode the Stream Header.
126		const lzma_ret ret = lzma_stream_header_decode(
127				&coder->stream_flags, coder->buffer);
128		if (ret != LZMA_OK)
129			return ret == LZMA_FORMAT_ERROR && !coder->first_stream
130					? LZMA_DATA_ERROR : ret;
131
132		// If we are decoding concatenated Streams, and the later
133		// Streams have invalid Header Magic Bytes, we give
134		// LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR.
135		coder->first_stream = false;
136
137		// Copy the type of the Check so that Block Header and Block
138		// decoders see it.
139		coder->block_options.check = coder->stream_flags.check;
140
141		// Even if we return LZMA_*_CHECK below, we want
142		// to continue from Block Header decoding.
143		coder->sequence = SEQ_BLOCK_HEADER;
144
145		// Detect if there's no integrity check or if it is
146		// unsupported if those were requested by the application.
147		if (coder->tell_no_check && coder->stream_flags.check
148				== LZMA_CHECK_NONE)
149			return LZMA_NO_CHECK;
150
151		if (coder->tell_unsupported_check
152				&& !lzma_check_is_supported(
153					coder->stream_flags.check))
154			return LZMA_UNSUPPORTED_CHECK;
155
156		if (coder->tell_any_check)
157			return LZMA_GET_CHECK;
158	}
159
160	// Fall through
161
162	case SEQ_BLOCK_HEADER: {
163		if (*in_pos >= in_size)
164			return LZMA_OK;
165
166		if (coder->pos == 0) {
167			// Detect if it's Index.
168			if (in[*in_pos] == 0x00) {
169				coder->sequence = SEQ_INDEX;
170				break;
171			}
172
173			// Calculate the size of the Block Header. Note that
174			// Block Header decoder wants to see this byte too
175			// so don't advance *in_pos.
176			coder->block_options.header_size
177					= lzma_block_header_size_decode(
178						in[*in_pos]);
179		}
180
181		// Copy the Block Header to the internal buffer.
182		lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
183				coder->block_options.header_size);
184
185		// Return if we didn't get the whole Block Header yet.
186		if (coder->pos < coder->block_options.header_size)
187			return LZMA_OK;
188
189		coder->pos = 0;
190
191		// Version 1 is needed to support the .ignore_check option.
192		coder->block_options.version = 1;
193
194		// Set up a buffer to hold the filter chain. Block Header
195		// decoder will initialize all members of this array so
196		// we don't need to do it here.
197		lzma_filter filters[LZMA_FILTERS_MAX + 1];
198		coder->block_options.filters = filters;
199
200		// Decode the Block Header.
201		return_if_error(lzma_block_header_decode(&coder->block_options,
202				allocator, coder->buffer));
203
204		// If LZMA_IGNORE_CHECK was used, this flag needs to be set.
205		// It has to be set after lzma_block_header_decode() because
206		// it always resets this to false.
207		coder->block_options.ignore_check = coder->ignore_check;
208
209		// Check the memory usage limit.
210		const uint64_t memusage = lzma_raw_decoder_memusage(filters);
211		lzma_ret ret;
212
213		if (memusage == UINT64_MAX) {
214			// One or more unknown Filter IDs.
215			ret = LZMA_OPTIONS_ERROR;
216		} else {
217			// Now we can set coder->memusage since we know that
218			// the filter chain is valid. We don't want
219			// lzma_memusage() to return UINT64_MAX in case of
220			// invalid filter chain.
221			coder->memusage = memusage;
222
223			if (memusage > coder->memlimit) {
224				// The chain would need too much memory.
225				ret = LZMA_MEMLIMIT_ERROR;
226			} else {
227				// Memory usage is OK.
228				// Initialize the Block decoder.
229				ret = lzma_block_decoder_init(
230						&coder->block_decoder,
231						allocator,
232						&coder->block_options);
233			}
234		}
235
236		// Free the allocated filter options since they are needed
237		// only to initialize the Block decoder.
238		for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i)
239			lzma_free(filters[i].options, allocator);
240
241		coder->block_options.filters = NULL;
242
243		// Check if memory usage calculation and Block enocoder
244		// initialization succeeded.
245		if (ret != LZMA_OK)
246			return ret;
247
248		coder->sequence = SEQ_BLOCK;
249	}
250
251	// Fall through
252
253	case SEQ_BLOCK: {
254		const lzma_ret ret = coder->block_decoder.code(
255				coder->block_decoder.coder, allocator,
256				in, in_pos, in_size, out, out_pos, out_size,
257				action);
258
259		if (ret != LZMA_STREAM_END)
260			return ret;
261
262		// Block decoded successfully. Add the new size pair to
263		// the Index hash.
264		return_if_error(lzma_index_hash_append(coder->index_hash,
265				lzma_block_unpadded_size(
266					&coder->block_options),
267				coder->block_options.uncompressed_size));
268
269		coder->sequence = SEQ_BLOCK_HEADER;
270		break;
271	}
272
273	case SEQ_INDEX: {
274		// If we don't have any input, don't call
275		// lzma_index_hash_decode() since it would return
276		// LZMA_BUF_ERROR, which we must not do here.
277		if (*in_pos >= in_size)
278			return LZMA_OK;
279
280		// Decode the Index and compare it to the hash calculated
281		// from the sizes of the Blocks (if any).
282		const lzma_ret ret = lzma_index_hash_decode(coder->index_hash,
283				in, in_pos, in_size);
284		if (ret != LZMA_STREAM_END)
285			return ret;
286
287		coder->sequence = SEQ_STREAM_FOOTER;
288	}
289
290	// Fall through
291
292	case SEQ_STREAM_FOOTER: {
293		// Copy the Stream Footer to the internal buffer.
294		lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
295				LZMA_STREAM_HEADER_SIZE);
296
297		// Return if we didn't get the whole Stream Footer yet.
298		if (coder->pos < LZMA_STREAM_HEADER_SIZE)
299			return LZMA_OK;
300
301		coder->pos = 0;
302
303		// Decode the Stream Footer. The decoder gives
304		// LZMA_FORMAT_ERROR if the magic bytes don't match,
305		// so convert that return code to LZMA_DATA_ERROR.
306		lzma_stream_flags footer_flags;
307		const lzma_ret ret = lzma_stream_footer_decode(
308				&footer_flags, coder->buffer);
309		if (ret != LZMA_OK)
310			return ret == LZMA_FORMAT_ERROR
311					? LZMA_DATA_ERROR : ret;
312
313		// Check that Index Size stored in the Stream Footer matches
314		// the real size of the Index field.
315		if (lzma_index_hash_size(coder->index_hash)
316				!= footer_flags.backward_size)
317			return LZMA_DATA_ERROR;
318
319		// Compare that the Stream Flags fields are identical in
320		// both Stream Header and Stream Footer.
321		return_if_error(lzma_stream_flags_compare(
322				&coder->stream_flags, &footer_flags));
323
324		if (!coder->concatenated)
325			return LZMA_STREAM_END;
326
327		coder->sequence = SEQ_STREAM_PADDING;
328	}
329
330	// Fall through
331
332	case SEQ_STREAM_PADDING:
333		assert(coder->concatenated);
334
335		// Skip over possible Stream Padding.
336		while (true) {
337			if (*in_pos >= in_size) {
338				// Unless LZMA_FINISH was used, we cannot
339				// know if there's more input coming later.
340				if (action != LZMA_FINISH)
341					return LZMA_OK;
342
343				// Stream Padding must be a multiple of
344				// four bytes.
345				return coder->pos == 0
346						? LZMA_STREAM_END
347						: LZMA_DATA_ERROR;
348			}
349
350			// If the byte is not zero, it probably indicates
351			// beginning of a new Stream (or the file is corrupt).
352			if (in[*in_pos] != 0x00)
353				break;
354
355			++*in_pos;
356			coder->pos = (coder->pos + 1) & 3;
357		}
358
359		// Stream Padding must be a multiple of four bytes (empty
360		// Stream Padding is OK).
361		if (coder->pos != 0) {
362			++*in_pos;
363			return LZMA_DATA_ERROR;
364		}
365
366		// Prepare to decode the next Stream.
367		return_if_error(stream_decoder_reset(coder, allocator));
368		break;
369
370	default:
371		assert(0);
372		return LZMA_PROG_ERROR;
373	}
374
375	// Never reached
376}
377
378
379static void
380stream_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
381{
382	lzma_stream_coder *coder = coder_ptr;
383	lzma_next_end(&coder->block_decoder, allocator);
384	lzma_index_hash_end(coder->index_hash, allocator);
385	lzma_free(coder, allocator);
386	return;
387}
388
389
390static lzma_check
391stream_decoder_get_check(const void *coder_ptr)
392{
393	const lzma_stream_coder *coder = coder_ptr;
394	return coder->stream_flags.check;
395}
396
397
398static lzma_ret
399stream_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
400		uint64_t *old_memlimit, uint64_t new_memlimit)
401{
402	lzma_stream_coder *coder = coder_ptr;
403
404	*memusage = coder->memusage;
405	*old_memlimit = coder->memlimit;
406
407	if (new_memlimit != 0) {
408		if (new_memlimit < coder->memusage)
409			return LZMA_MEMLIMIT_ERROR;
410
411		coder->memlimit = new_memlimit;
412	}
413
414	return LZMA_OK;
415}
416
417
418extern lzma_ret
419lzma_stream_decoder_init(
420		lzma_next_coder *next, const lzma_allocator *allocator,
421		uint64_t memlimit, uint32_t flags)
422{
423	lzma_next_coder_init(&lzma_stream_decoder_init, next, allocator);
424
425	if (flags & ~LZMA_SUPPORTED_FLAGS)
426		return LZMA_OPTIONS_ERROR;
427
428	lzma_stream_coder *coder = next->coder;
429	if (coder == NULL) {
430		coder = lzma_alloc(sizeof(lzma_stream_coder), allocator);
431		if (coder == NULL)
432			return LZMA_MEM_ERROR;
433
434		next->coder = coder;
435		next->code = &stream_decode;
436		next->end = &stream_decoder_end;
437		next->get_check = &stream_decoder_get_check;
438		next->memconfig = &stream_decoder_memconfig;
439
440		coder->block_decoder = LZMA_NEXT_CODER_INIT;
441		coder->index_hash = NULL;
442	}
443
444	coder->memlimit = my_max(1, memlimit);
445	coder->memusage = LZMA_MEMUSAGE_BASE;
446	coder->tell_no_check = (flags & LZMA_TELL_NO_CHECK) != 0;
447	coder->tell_unsupported_check
448			= (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0;
449	coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0;
450	coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0;
451	coder->concatenated = (flags & LZMA_CONCATENATED) != 0;
452	coder->first_stream = true;
453
454	return stream_decoder_reset(coder, allocator);
455}
456
457
458extern LZMA_API(lzma_ret)
459lzma_stream_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags)
460{
461	lzma_next_strm_init(lzma_stream_decoder_init, strm, memlimit, flags);
462
463	strm->internal->supported_actions[LZMA_RUN] = true;
464	strm->internal->supported_actions[LZMA_FINISH] = true;
465
466	return LZMA_OK;
467}
468