1///////////////////////////////////////////////////////////////////////////////
2//
3/// \file       stream_decoder.c
4/// \brief      Decodes .xz Streams
5//
6//  Author:     Lasse Collin
7//
8//  This file has been put into the public domain.
9//  You can do whatever you want with this file.
10//
11///////////////////////////////////////////////////////////////////////////////
12
13#include "stream_decoder.h"
14#include "block_decoder.h"
15
16
17struct lzma_coder_s {
18	enum {
19		SEQ_STREAM_HEADER,
20		SEQ_BLOCK_HEADER,
21		SEQ_BLOCK,
22		SEQ_INDEX,
23		SEQ_STREAM_FOOTER,
24		SEQ_STREAM_PADDING,
25	} sequence;
26
27	/// Block or Metadata decoder. This takes little memory and the same
28	/// data structure can be used to decode every Block Header, so it's
29	/// a good idea to have a separate lzma_next_coder structure for it.
30	lzma_next_coder block_decoder;
31
32	/// Block options decoded by the Block Header decoder and used by
33	/// the Block decoder.
34	lzma_block block_options;
35
36	/// Stream Flags from Stream Header
37	lzma_stream_flags stream_flags;
38
39	/// Index is hashed so that it can be compared to the sizes of Blocks
40	/// with O(1) memory usage.
41	lzma_index_hash *index_hash;
42
43	/// Memory usage limit
44	uint64_t memlimit;
45
46	/// Amount of memory actually needed (only an estimate)
47	uint64_t memusage;
48
49	/// If true, LZMA_NO_CHECK is returned if the Stream has
50	/// no integrity check.
51	bool tell_no_check;
52
53	/// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has
54	/// an integrity check that isn't supported by this liblzma build.
55	bool tell_unsupported_check;
56
57	/// If true, LZMA_GET_CHECK is returned after decoding Stream Header.
58	bool tell_any_check;
59
60	/// If true, we will decode concatenated Streams that possibly have
61	/// Stream Padding between or after them. LZMA_STREAM_END is returned
62	/// once the application isn't giving us any new input, and we aren't
63	/// in the middle of a Stream, and possible Stream Padding is a
64	/// multiple of four bytes.
65	bool concatenated;
66
67	/// When decoding concatenated Streams, this is true as long as we
68	/// are decoding the first Stream. This is needed to avoid misleading
69	/// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic
70	/// bytes.
71	bool first_stream;
72
73	/// Write position in buffer[] and position in Stream Padding
74	size_t pos;
75
76	/// Buffer to hold Stream Header, Block Header, and Stream Footer.
77	/// Block Header has biggest maximum size.
78	uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX];
79};
80
81
82static lzma_ret
83stream_decoder_reset(lzma_coder *coder, lzma_allocator *allocator)
84{
85	// Initialize the Index hash used to verify the Index.
86	coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator);
87	if (coder->index_hash == NULL)
88		return LZMA_MEM_ERROR;
89
90	// Reset the rest of the variables.
91	coder->sequence = SEQ_STREAM_HEADER;
92	coder->pos = 0;
93
94	return LZMA_OK;
95}
96
97
98static lzma_ret
99stream_decode(lzma_coder *coder, lzma_allocator *allocator,
100		const uint8_t *restrict in, size_t *restrict in_pos,
101		size_t in_size, uint8_t *restrict out,
102		size_t *restrict out_pos, size_t out_size, lzma_action action)
103{
104	// When decoding the actual Block, it may be able to produce more
105	// output even if we don't give it any new input.
106	while (true)
107	switch (coder->sequence) {
108	case SEQ_STREAM_HEADER: {
109		// Copy the Stream Header to the internal buffer.
110		lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
111				LZMA_STREAM_HEADER_SIZE);
112
113		// Return if we didn't get the whole Stream Header yet.
114		if (coder->pos < LZMA_STREAM_HEADER_SIZE)
115			return LZMA_OK;
116
117		coder->pos = 0;
118
119		// Decode the Stream Header.
120		const lzma_ret ret = lzma_stream_header_decode(
121				&coder->stream_flags, coder->buffer);
122		if (ret != LZMA_OK)
123			return ret == LZMA_FORMAT_ERROR && !coder->first_stream
124					? LZMA_DATA_ERROR : ret;
125
126		// If we are decoding concatenated Streams, and the later
127		// Streams have invalid Header Magic Bytes, we give
128		// LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR.
129		coder->first_stream = false;
130
131		// Copy the type of the Check so that Block Header and Block
132		// decoders see it.
133		coder->block_options.check = coder->stream_flags.check;
134
135		// Even if we return LZMA_*_CHECK below, we want
136		// to continue from Block Header decoding.
137		coder->sequence = SEQ_BLOCK_HEADER;
138
139		// Detect if there's no integrity check or if it is
140		// unsupported if those were requested by the application.
141		if (coder->tell_no_check && coder->stream_flags.check
142				== LZMA_CHECK_NONE)
143			return LZMA_NO_CHECK;
144
145		if (coder->tell_unsupported_check
146				&& !lzma_check_is_supported(
147					coder->stream_flags.check))
148			return LZMA_UNSUPPORTED_CHECK;
149
150		if (coder->tell_any_check)
151			return LZMA_GET_CHECK;
152	}
153
154	// Fall through
155
156	case SEQ_BLOCK_HEADER: {
157		if (*in_pos >= in_size)
158			return LZMA_OK;
159
160		if (coder->pos == 0) {
161			// Detect if it's Index.
162			if (in[*in_pos] == 0x00) {
163				coder->sequence = SEQ_INDEX;
164				break;
165			}
166
167			// Calculate the size of the Block Header. Note that
168			// Block Header decoder wants to see this byte too
169			// so don't advance *in_pos.
170			coder->block_options.header_size
171					= lzma_block_header_size_decode(
172						in[*in_pos]);
173		}
174
175		// Copy the Block Header to the internal buffer.
176		lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
177				coder->block_options.header_size);
178
179		// Return if we didn't get the whole Block Header yet.
180		if (coder->pos < coder->block_options.header_size)
181			return LZMA_OK;
182
183		coder->pos = 0;
184
185		// Version 0 is currently the only possible version.
186		coder->block_options.version = 0;
187
188		// Set up a buffer to hold the filter chain. Block Header
189		// decoder will initialize all members of this array so
190		// we don't need to do it here.
191		lzma_filter filters[LZMA_FILTERS_MAX + 1];
192		coder->block_options.filters = filters;
193
194		// Decode the Block Header.
195		return_if_error(lzma_block_header_decode(&coder->block_options,
196				allocator, coder->buffer));
197
198		// Check the memory usage limit.
199		const uint64_t memusage = lzma_raw_decoder_memusage(filters);
200		lzma_ret ret;
201
202		if (memusage == UINT64_MAX) {
203			// One or more unknown Filter IDs.
204			ret = LZMA_OPTIONS_ERROR;
205		} else {
206			// Now we can set coder->memusage since we know that
207			// the filter chain is valid. We don't want
208			// lzma_memusage() to return UINT64_MAX in case of
209			// invalid filter chain.
210			coder->memusage = memusage;
211
212			if (memusage > coder->memlimit) {
213				// The chain would need too much memory.
214				ret = LZMA_MEMLIMIT_ERROR;
215			} else {
216				// Memory usage is OK.
217				// Initialize the Block decoder.
218				ret = lzma_block_decoder_init(
219						&coder->block_decoder,
220						allocator,
221						&coder->block_options);
222			}
223		}
224
225		// Free the allocated filter options since they are needed
226		// only to initialize the Block decoder.
227		for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i)
228			lzma_free(filters[i].options, allocator);
229
230		coder->block_options.filters = NULL;
231
232		// Check if memory usage calculation and Block enocoder
233		// initialization succeeded.
234		if (ret != LZMA_OK)
235			return ret;
236
237		coder->sequence = SEQ_BLOCK;
238	}
239
240	// Fall through
241
242	case SEQ_BLOCK: {
243		const lzma_ret ret = coder->block_decoder.code(
244				coder->block_decoder.coder, allocator,
245				in, in_pos, in_size, out, out_pos, out_size,
246				action);
247
248		if (ret != LZMA_STREAM_END)
249			return ret;
250
251		// Block decoded successfully. Add the new size pair to
252		// the Index hash.
253		return_if_error(lzma_index_hash_append(coder->index_hash,
254				lzma_block_unpadded_size(
255					&coder->block_options),
256				coder->block_options.uncompressed_size));
257
258		coder->sequence = SEQ_BLOCK_HEADER;
259		break;
260	}
261
262	case SEQ_INDEX: {
263		// If we don't have any input, don't call
264		// lzma_index_hash_decode() since it would return
265		// LZMA_BUF_ERROR, which we must not do here.
266		if (*in_pos >= in_size)
267			return LZMA_OK;
268
269		// Decode the Index and compare it to the hash calculated
270		// from the sizes of the Blocks (if any).
271		const lzma_ret ret = lzma_index_hash_decode(coder->index_hash,
272				in, in_pos, in_size);
273		if (ret != LZMA_STREAM_END)
274			return ret;
275
276		coder->sequence = SEQ_STREAM_FOOTER;
277	}
278
279	// Fall through
280
281	case SEQ_STREAM_FOOTER: {
282		// Copy the Stream Footer to the internal buffer.
283		lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
284				LZMA_STREAM_HEADER_SIZE);
285
286		// Return if we didn't get the whole Stream Footer yet.
287		if (coder->pos < LZMA_STREAM_HEADER_SIZE)
288			return LZMA_OK;
289
290		coder->pos = 0;
291
292		// Decode the Stream Footer. The decoder gives
293		// LZMA_FORMAT_ERROR if the magic bytes don't match,
294		// so convert that return code to LZMA_DATA_ERROR.
295		lzma_stream_flags footer_flags;
296		const lzma_ret ret = lzma_stream_footer_decode(
297				&footer_flags, coder->buffer);
298		if (ret != LZMA_OK)
299			return ret == LZMA_FORMAT_ERROR
300					? LZMA_DATA_ERROR : ret;
301
302		// Check that Index Size stored in the Stream Footer matches
303		// the real size of the Index field.
304		if (lzma_index_hash_size(coder->index_hash)
305				!= footer_flags.backward_size)
306			return LZMA_DATA_ERROR;
307
308		// Compare that the Stream Flags fields are identical in
309		// both Stream Header and Stream Footer.
310		return_if_error(lzma_stream_flags_compare(
311				&coder->stream_flags, &footer_flags));
312
313		if (!coder->concatenated)
314			return LZMA_STREAM_END;
315
316		coder->sequence = SEQ_STREAM_PADDING;
317	}
318
319	// Fall through
320
321	case SEQ_STREAM_PADDING:
322		assert(coder->concatenated);
323
324		// Skip over possible Stream Padding.
325		while (true) {
326			if (*in_pos >= in_size) {
327				// Unless LZMA_FINISH was used, we cannot
328				// know if there's more input coming later.
329				if (action != LZMA_FINISH)
330					return LZMA_OK;
331
332				// Stream Padding must be a multiple of
333				// four bytes.
334				return coder->pos == 0
335						? LZMA_STREAM_END
336						: LZMA_DATA_ERROR;
337			}
338
339			// If the byte is not zero, it probably indicates
340			// beginning of a new Stream (or the file is corrupt).
341			if (in[*in_pos] != 0x00)
342				break;
343
344			++*in_pos;
345			coder->pos = (coder->pos + 1) & 3;
346		}
347
348		// Stream Padding must be a multiple of four bytes (empty
349		// Stream Padding is OK).
350		if (coder->pos != 0) {
351			++*in_pos;
352			return LZMA_DATA_ERROR;
353		}
354
355		// Prepare to decode the next Stream.
356		return_if_error(stream_decoder_reset(coder, allocator));
357		break;
358
359	default:
360		assert(0);
361		return LZMA_PROG_ERROR;
362	}
363
364	// Never reached
365}
366
367
368static void
369stream_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
370{
371	lzma_next_end(&coder->block_decoder, allocator);
372	lzma_index_hash_end(coder->index_hash, allocator);
373	lzma_free(coder, allocator);
374	return;
375}
376
377
378static lzma_check
379stream_decoder_get_check(const lzma_coder *coder)
380{
381	return coder->stream_flags.check;
382}
383
384
385static lzma_ret
386stream_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
387		uint64_t *old_memlimit, uint64_t new_memlimit)
388{
389	*memusage = coder->memusage;
390	*old_memlimit = coder->memlimit;
391
392	if (new_memlimit != 0) {
393		if (new_memlimit < coder->memusage)
394			return LZMA_MEMLIMIT_ERROR;
395
396		coder->memlimit = new_memlimit;
397	}
398
399	return LZMA_OK;
400}
401
402
403extern lzma_ret
404lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
405		uint64_t memlimit, uint32_t flags)
406{
407	lzma_next_coder_init(&lzma_stream_decoder_init, next, allocator);
408
409	if (memlimit == 0)
410		return LZMA_PROG_ERROR;
411
412	if (flags & ~LZMA_SUPPORTED_FLAGS)
413		return LZMA_OPTIONS_ERROR;
414
415	if (next->coder == NULL) {
416		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
417		if (next->coder == NULL)
418			return LZMA_MEM_ERROR;
419
420		next->code = &stream_decode;
421		next->end = &stream_decoder_end;
422		next->get_check = &stream_decoder_get_check;
423		next->memconfig = &stream_decoder_memconfig;
424
425		next->coder->block_decoder = LZMA_NEXT_CODER_INIT;
426		next->coder->index_hash = NULL;
427	}
428
429	next->coder->memlimit = memlimit;
430	next->coder->memusage = LZMA_MEMUSAGE_BASE;
431	next->coder->tell_no_check = (flags & LZMA_TELL_NO_CHECK) != 0;
432	next->coder->tell_unsupported_check
433			= (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0;
434	next->coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0;
435	next->coder->concatenated = (flags & LZMA_CONCATENATED) != 0;
436	next->coder->first_stream = true;
437
438	return stream_decoder_reset(next->coder, allocator);
439}
440
441
442extern LZMA_API(lzma_ret)
443lzma_stream_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags)
444{
445	lzma_next_strm_init(lzma_stream_decoder_init, strm, memlimit, flags);
446
447	strm->internal->supported_actions[LZMA_RUN] = true;
448	strm->internal->supported_actions[LZMA_FINISH] = true;
449
450	return LZMA_OK;
451}
452