stream_decoder.c revision 207753
1283625Sdim///////////////////////////////////////////////////////////////////////////////
2283625Sdim//
3283625Sdim/// \file       stream_decoder.c
4283625Sdim/// \brief      Decodes .xz Streams
5283625Sdim//
6283625Sdim//  Author:     Lasse Collin
7283625Sdim//
8283625Sdim//  This file has been put into the public domain.
9283625Sdim//  You can do whatever you want with this file.
10283625Sdim//
11283625Sdim///////////////////////////////////////////////////////////////////////////////
12283625Sdim
13283625Sdim#include "stream_decoder.h"
14283625Sdim#include "block_decoder.h"
15283625Sdim
16283625Sdim
17283625Sdimstruct lzma_coder_s {
18283625Sdim	enum {
19283625Sdim		SEQ_STREAM_HEADER,
20283625Sdim		SEQ_BLOCK_HEADER,
21296417Sdim		SEQ_BLOCK,
22283625Sdim		SEQ_INDEX,
23283625Sdim		SEQ_STREAM_FOOTER,
24283625Sdim		SEQ_STREAM_PADDING,
25283625Sdim	} sequence;
26283625Sdim
27283625Sdim	/// Block or Metadata decoder. This takes little memory and the same
28283625Sdim	/// data structure can be used to decode every Block Header, so it's
29283625Sdim	/// a good idea to have a separate lzma_next_coder structure for it.
30283625Sdim	lzma_next_coder block_decoder;
31283625Sdim
32283625Sdim	/// Block options decoded by the Block Header decoder and used by
33283625Sdim	/// the Block decoder.
34283625Sdim	lzma_block block_options;
35283625Sdim
36283625Sdim	/// Stream Flags from Stream Header
37283625Sdim	lzma_stream_flags stream_flags;
38283625Sdim
39283625Sdim	/// Index is hashed so that it can be compared to the sizes of Blocks
40283625Sdim	/// with O(1) memory usage.
41283625Sdim	lzma_index_hash *index_hash;
42283625Sdim
43283625Sdim	/// Memory usage limit
44283625Sdim	uint64_t memlimit;
45283625Sdim
46283625Sdim	/// Amount of memory actually needed (only an estimate)
47283625Sdim	uint64_t memusage;
48283625Sdim
49283625Sdim	/// If true, LZMA_NO_CHECK is returned if the Stream has
50283625Sdim	/// no integrity check.
51283625Sdim	bool tell_no_check;
52283625Sdim
53283625Sdim	/// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has
54283625Sdim	/// an integrity check that isn't supported by this liblzma build.
55283625Sdim	bool tell_unsupported_check;
56283625Sdim
57283625Sdim	/// If true, LZMA_GET_CHECK is returned after decoding Stream Header.
58283625Sdim	bool tell_any_check;
59283625Sdim
60283625Sdim	/// If true, we will decode concatenated Streams that possibly have
61283625Sdim	/// Stream Padding between or after them. LZMA_STREAM_END is returned
62283625Sdim	/// once the application isn't giving us any new input, and we aren't
63283625Sdim	/// in the middle of a Stream, and possible Stream Padding is a
64285181Sdim	/// multiple of four bytes.
65283625Sdim	bool concatenated;
66283625Sdim
67283625Sdim	/// When decoding concatenated Streams, this is true as long as we
68283625Sdim	/// are decoding the first Stream. This is needed to avoid misleading
69283625Sdim	/// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic
70283625Sdim	/// bytes.
71283625Sdim	bool first_stream;
72283625Sdim
73283625Sdim	/// Write position in buffer[] and position in Stream Padding
74283625Sdim	size_t pos;
75283625Sdim
76283625Sdim	/// Buffer to hold Stream Header, Block Header, and Stream Footer.
77283625Sdim	/// Block Header has biggest maximum size.
78283625Sdim	uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX];
79283625Sdim};
80283625Sdim
81283625Sdim
82283625Sdimstatic lzma_ret
83283625Sdimstream_decoder_reset(lzma_coder *coder, lzma_allocator *allocator)
84283625Sdim{
85283625Sdim	// Initialize the Index hash used to verify the Index.
86283625Sdim	coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator);
87283625Sdim	if (coder->index_hash == NULL)
88283625Sdim		return LZMA_MEM_ERROR;
89283625Sdim
90283625Sdim	// Reset the rest of the variables.
91283625Sdim	coder->sequence = SEQ_STREAM_HEADER;
92283625Sdim	coder->pos = 0;
93283625Sdim
94283625Sdim	return LZMA_OK;
95283625Sdim}
96283625Sdim
97283625Sdim
98283625Sdimstatic lzma_ret
99283625Sdimstream_decode(lzma_coder *coder, lzma_allocator *allocator,
100283625Sdim		const uint8_t *restrict in, size_t *restrict in_pos,
101283625Sdim		size_t in_size, uint8_t *restrict out,
102283625Sdim		size_t *restrict out_pos, size_t out_size, lzma_action action)
103283625Sdim{
104283625Sdim	// When decoding the actual Block, it may be able to produce more
105283625Sdim	// output even if we don't give it any new input.
106283625Sdim	while (true)
107283625Sdim	switch (coder->sequence) {
108283625Sdim	case SEQ_STREAM_HEADER: {
109283625Sdim		// Copy the Stream Header to the internal buffer.
110283625Sdim		lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
111283625Sdim				LZMA_STREAM_HEADER_SIZE);
112283625Sdim
113296417Sdim		// Return if we didn't get the whole Stream Header yet.
114283625Sdim		if (coder->pos < LZMA_STREAM_HEADER_SIZE)
115283625Sdim			return LZMA_OK;
116283625Sdim
117283625Sdim		coder->pos = 0;
118283625Sdim
119283625Sdim		// Decode the Stream Header.
120283625Sdim		const lzma_ret ret = lzma_stream_header_decode(
121283625Sdim				&coder->stream_flags, coder->buffer);
122283625Sdim		if (ret != LZMA_OK)
123283625Sdim			return ret == LZMA_FORMAT_ERROR && !coder->first_stream
124283625Sdim					? LZMA_DATA_ERROR : ret;
125283625Sdim
126283625Sdim		// If we are decoding concatenated Streams, and the later
127283625Sdim		// Streams have invalid Header Magic Bytes, we give
128283625Sdim		// LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR.
129283625Sdim		coder->first_stream = false;
130283625Sdim
131283625Sdim		// Copy the type of the Check so that Block Header and Block
132283625Sdim		// decoders see it.
133283625Sdim		coder->block_options.check = coder->stream_flags.check;
134283625Sdim
135283625Sdim		// Even if we return LZMA_*_CHECK below, we want
136283625Sdim		// to continue from Block Header decoding.
137283625Sdim		coder->sequence = SEQ_BLOCK_HEADER;
138283625Sdim
139283625Sdim		// Detect if there's no integrity check or if it is
140283625Sdim		// unsupported if those were requested by the application.
141283625Sdim		if (coder->tell_no_check && coder->stream_flags.check
142283625Sdim				== LZMA_CHECK_NONE)
143283625Sdim			return LZMA_NO_CHECK;
144283625Sdim
145283625Sdim		if (coder->tell_unsupported_check
146283625Sdim				&& !lzma_check_is_supported(
147283625Sdim					coder->stream_flags.check))
148283625Sdim			return LZMA_UNSUPPORTED_CHECK;
149283625Sdim
150283625Sdim		if (coder->tell_any_check)
151283625Sdim			return LZMA_GET_CHECK;
152283625Sdim	}
153283625Sdim
154283625Sdim	// Fall through
155283625Sdim
156283625Sdim	case SEQ_BLOCK_HEADER: {
157283625Sdim		if (*in_pos >= in_size)
158283625Sdim			return LZMA_OK;
159283625Sdim
160283625Sdim		if (coder->pos == 0) {
161283625Sdim			// Detect if it's Index.
162283625Sdim			if (in[*in_pos] == 0x00) {
163283625Sdim				coder->sequence = SEQ_INDEX;
164283625Sdim				break;
165283625Sdim			}
166283625Sdim
167283625Sdim			// Calculate the size of the Block Header. Note that
168283625Sdim			// Block Header decoder wants to see this byte too
169283625Sdim			// so don't advance *in_pos.
170283625Sdim			coder->block_options.header_size
171283625Sdim					= lzma_block_header_size_decode(
172283625Sdim						in[*in_pos]);
173283625Sdim		}
174283625Sdim
175283625Sdim		// Copy the Block Header to the internal buffer.
176283625Sdim		lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
177283625Sdim				coder->block_options.header_size);
178283625Sdim
179283625Sdim		// Return if we didn't get the whole Block Header yet.
180283625Sdim		if (coder->pos < coder->block_options.header_size)
181283625Sdim			return LZMA_OK;
182283625Sdim
183283625Sdim		coder->pos = 0;
184283625Sdim
185283625Sdim		// Version 0 is currently the only possible version.
186283625Sdim		coder->block_options.version = 0;
187283625Sdim
188283625Sdim		// Set up a buffer to hold the filter chain. Block Header
189283625Sdim		// decoder will initialize all members of this array so
190283625Sdim		// we don't need to do it here.
191283625Sdim		lzma_filter filters[LZMA_FILTERS_MAX + 1];
192283625Sdim		coder->block_options.filters = filters;
193283625Sdim
194283625Sdim		// Decode the Block Header.
195283625Sdim		return_if_error(lzma_block_header_decode(&coder->block_options,
196283625Sdim				allocator, coder->buffer));
197283625Sdim
198283625Sdim		// Check the memory usage limit.
199283625Sdim		const uint64_t memusage = lzma_raw_decoder_memusage(filters);
200283625Sdim		lzma_ret ret;
201283625Sdim
202283625Sdim		if (memusage == UINT64_MAX) {
203283625Sdim			// One or more unknown Filter IDs.
204283625Sdim			ret = LZMA_OPTIONS_ERROR;
205283625Sdim		} else {
206283625Sdim			// Now we can set coder->memusage since we know that
207283625Sdim			// the filter chain is valid. We don't want
208283625Sdim			// lzma_memusage() to return UINT64_MAX in case of
209283625Sdim			// invalid filter chain.
210283625Sdim			coder->memusage = memusage;
211283625Sdim
212283625Sdim			if (memusage > coder->memlimit) {
213283625Sdim				// The chain would need too much memory.
214283625Sdim				ret = LZMA_MEMLIMIT_ERROR;
215283625Sdim			} else {
216283625Sdim				// Memory usage is OK.
217283625Sdim				// Initialize the Block decoder.
218283625Sdim				ret = lzma_block_decoder_init(
219283625Sdim						&coder->block_decoder,
220283625Sdim						allocator,
221283625Sdim						&coder->block_options);
222283625Sdim			}
223283625Sdim		}
224283625Sdim
225283625Sdim		// Free the allocated filter options since they are needed
226283625Sdim		// only to initialize the Block decoder.
227283625Sdim		for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i)
228283625Sdim			lzma_free(filters[i].options, allocator);
229283625Sdim
230283625Sdim		coder->block_options.filters = NULL;
231283625Sdim
232283625Sdim		// Check if memory usage calculation and Block enocoder
233283625Sdim		// initialization succeeded.
234283625Sdim		if (ret != LZMA_OK)
235283625Sdim			return ret;
236283625Sdim
237283625Sdim		coder->sequence = SEQ_BLOCK;
238283625Sdim	}
239283625Sdim
240283625Sdim	// Fall through
241283625Sdim
242283625Sdim	case SEQ_BLOCK: {
243283625Sdim		const lzma_ret ret = coder->block_decoder.code(
244283625Sdim				coder->block_decoder.coder, allocator,
245283625Sdim				in, in_pos, in_size, out, out_pos, out_size,
246283625Sdim				action);
247283625Sdim
248283625Sdim		if (ret != LZMA_STREAM_END)
249283625Sdim			return ret;
250283625Sdim
251283625Sdim		// Block decoded successfully. Add the new size pair to
252283625Sdim		// the Index hash.
253283625Sdim		return_if_error(lzma_index_hash_append(coder->index_hash,
254283625Sdim				lzma_block_unpadded_size(
255283625Sdim					&coder->block_options),
256283625Sdim				coder->block_options.uncompressed_size));
257283625Sdim
258283625Sdim		coder->sequence = SEQ_BLOCK_HEADER;
259283625Sdim		break;
260283625Sdim	}
261283625Sdim
262283625Sdim	case SEQ_INDEX: {
263283625Sdim		// If we don't have any input, don't call
264296417Sdim		// lzma_index_hash_decode() since it would return
265296417Sdim		// LZMA_BUF_ERROR, which we must not do here.
266296417Sdim		if (*in_pos >= in_size)
267296417Sdim			return LZMA_OK;
268296417Sdim
269296417Sdim		// Decode the Index and compare it to the hash calculated
270283625Sdim		// from the sizes of the Blocks (if any).
271283625Sdim		const lzma_ret ret = lzma_index_hash_decode(coder->index_hash,
272283625Sdim				in, in_pos, in_size);
273283625Sdim		if (ret != LZMA_STREAM_END)
274283625Sdim			return ret;
275283625Sdim
276283625Sdim		coder->sequence = SEQ_STREAM_FOOTER;
277283625Sdim	}
278283625Sdim
279283625Sdim	// Fall through
280283625Sdim
281283625Sdim	case SEQ_STREAM_FOOTER: {
282283625Sdim		// Copy the Stream Footer to the internal buffer.
283283625Sdim		lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
284283625Sdim				LZMA_STREAM_HEADER_SIZE);
285283625Sdim
286283625Sdim		// Return if we didn't get the whole Stream Footer yet.
287283625Sdim		if (coder->pos < LZMA_STREAM_HEADER_SIZE)
288283625Sdim			return LZMA_OK;
289283625Sdim
290283625Sdim		coder->pos = 0;
291283625Sdim
292283625Sdim		// Decode the Stream Footer. The decoder gives
293283625Sdim		// LZMA_FORMAT_ERROR if the magic bytes don't match,
294283625Sdim		// so convert that return code to LZMA_DATA_ERROR.
295283625Sdim		lzma_stream_flags footer_flags;
296283625Sdim		const lzma_ret ret = lzma_stream_footer_decode(
297283625Sdim				&footer_flags, coder->buffer);
298283625Sdim		if (ret != LZMA_OK)
299283625Sdim			return ret == LZMA_FORMAT_ERROR
300283625Sdim					? LZMA_DATA_ERROR : ret;
301283625Sdim
302283625Sdim		// Check that Index Size stored in the Stream Footer matches
303283625Sdim		// the real size of the Index field.
304296417Sdim		if (lzma_index_hash_size(coder->index_hash)
305283625Sdim				!= footer_flags.backward_size)
306283625Sdim			return LZMA_DATA_ERROR;
307296417Sdim
308283625Sdim		// Compare that the Stream Flags fields are identical in
309283625Sdim		// both Stream Header and Stream Footer.
310283625Sdim		return_if_error(lzma_stream_flags_compare(
311283625Sdim				&coder->stream_flags, &footer_flags));
312283625Sdim
313283625Sdim		if (!coder->concatenated)
314283625Sdim			return LZMA_STREAM_END;
315283625Sdim
316283625Sdim		coder->sequence = SEQ_STREAM_PADDING;
317283625Sdim	}
318283625Sdim
319283625Sdim	// Fall through
320283625Sdim
321283625Sdim	case SEQ_STREAM_PADDING:
322283625Sdim		assert(coder->concatenated);
323283625Sdim
324283625Sdim		// Skip over possible Stream Padding.
325283625Sdim		while (true) {
326283625Sdim			if (*in_pos >= in_size) {
327283625Sdim				// Unless LZMA_FINISH was used, we cannot
328283625Sdim				// know if there's more input coming later.
329283625Sdim				if (action != LZMA_FINISH)
330283625Sdim					return LZMA_OK;
331283625Sdim
332283625Sdim				// Stream Padding must be a multiple of
333283625Sdim				// four bytes.
334283625Sdim				return coder->pos == 0
335283625Sdim						? LZMA_STREAM_END
336283625Sdim						: LZMA_DATA_ERROR;
337283625Sdim			}
338283625Sdim
339283625Sdim			// If the byte is not zero, it probably indicates
340283625Sdim			// beginning of a new Stream (or the file is corrupt).
341283625Sdim			if (in[*in_pos] != 0x00)
342283625Sdim				break;
343283625Sdim
344283625Sdim			++*in_pos;
345283625Sdim			coder->pos = (coder->pos + 1) & 3;
346283625Sdim		}
347283625Sdim
348283625Sdim		// Stream Padding must be a multiple of four bytes (empty
349283625Sdim		// Stream Padding is OK).
350283625Sdim		if (coder->pos != 0) {
351283625Sdim			++*in_pos;
352283625Sdim			return LZMA_DATA_ERROR;
353283625Sdim		}
354283625Sdim
355283625Sdim		// Prepare to decode the next Stream.
356283625Sdim		return_if_error(stream_decoder_reset(coder, allocator));
357283625Sdim		break;
358283625Sdim
359283625Sdim	default:
360283625Sdim		assert(0);
361283625Sdim		return LZMA_PROG_ERROR;
362283625Sdim	}
363283625Sdim
364283625Sdim	// Never reached
365283625Sdim}
366283625Sdim
367283625Sdim
368283625Sdimstatic void
369283625Sdimstream_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
370283625Sdim{
371283625Sdim	lzma_next_end(&coder->block_decoder, allocator);
372283625Sdim	lzma_index_hash_end(coder->index_hash, allocator);
373283625Sdim	lzma_free(coder, allocator);
374283625Sdim	return;
375283625Sdim}
376283625Sdim
377283625Sdim
378283625Sdimstatic lzma_check
379283625Sdimstream_decoder_get_check(const lzma_coder *coder)
380283625Sdim{
381283625Sdim	return coder->stream_flags.check;
382283625Sdim}
383283625Sdim
384283625Sdim
385283625Sdimstatic lzma_ret
386283625Sdimstream_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
387283625Sdim		uint64_t *old_memlimit, uint64_t new_memlimit)
388283625Sdim{
389283625Sdim	*memusage = coder->memusage;
390283625Sdim	*old_memlimit = coder->memlimit;
391283625Sdim
392283625Sdim	if (new_memlimit != 0) {
393283625Sdim		if (new_memlimit < coder->memusage)
394283625Sdim			return LZMA_MEMLIMIT_ERROR;
395283625Sdim
396283625Sdim		coder->memlimit = new_memlimit;
397283625Sdim	}
398283625Sdim
399283625Sdim	return LZMA_OK;
400283625Sdim}
401283625Sdim
402283625Sdim
403283625Sdimextern lzma_ret
404283625Sdimlzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
405283625Sdim		uint64_t memlimit, uint32_t flags)
406283625Sdim{
407283625Sdim	lzma_next_coder_init(&lzma_stream_decoder_init, next, allocator);
408283625Sdim
409283625Sdim	if (memlimit == 0)
410283625Sdim		return LZMA_PROG_ERROR;
411283625Sdim
412283625Sdim	if (flags & ~LZMA_SUPPORTED_FLAGS)
413283625Sdim		return LZMA_OPTIONS_ERROR;
414283625Sdim
415283625Sdim	if (next->coder == NULL) {
416283625Sdim		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
417283625Sdim		if (next->coder == NULL)
418283625Sdim			return LZMA_MEM_ERROR;
419283625Sdim
420283625Sdim		next->code = &stream_decode;
421283625Sdim		next->end = &stream_decoder_end;
422283625Sdim		next->get_check = &stream_decoder_get_check;
423283625Sdim		next->memconfig = &stream_decoder_memconfig;
424283625Sdim
425283625Sdim		next->coder->block_decoder = LZMA_NEXT_CODER_INIT;
426283625Sdim		next->coder->index_hash = NULL;
427283625Sdim	}
428283625Sdim
429283625Sdim	next->coder->memlimit = memlimit;
430283625Sdim	next->coder->memusage = LZMA_MEMUSAGE_BASE;
431283625Sdim	next->coder->tell_no_check = (flags & LZMA_TELL_NO_CHECK) != 0;
432283625Sdim	next->coder->tell_unsupported_check
433283625Sdim			= (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0;
434283625Sdim	next->coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0;
435283625Sdim	next->coder->concatenated = (flags & LZMA_CONCATENATED) != 0;
436283625Sdim	next->coder->first_stream = true;
437283625Sdim
438283625Sdim	return stream_decoder_reset(next->coder, allocator);
439283625Sdim}
440283625Sdim
441283625Sdim
442283625Sdimextern LZMA_API(lzma_ret)
443283625Sdimlzma_stream_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags)
444283625Sdim{
445283625Sdim	lzma_next_strm_init(lzma_stream_decoder_init, strm, memlimit, flags);
446283625Sdim
447283625Sdim	strm->internal->supported_actions[LZMA_RUN] = true;
448283625Sdim	strm->internal->supported_actions[LZMA_FINISH] = true;
449283625Sdim
450283625Sdim	return LZMA_OK;
451283625Sdim}
452283625Sdim