1207753Smm///////////////////////////////////////////////////////////////////////////////
2207753Smm//
3207753Smm/// \file       stream_decoder.c
4207753Smm/// \brief      Decodes .xz Streams
5207753Smm//
6207753Smm//  Author:     Lasse Collin
7207753Smm//
8207753Smm//  This file has been put into the public domain.
9207753Smm//  You can do whatever you want with this file.
10207753Smm//
11207753Smm///////////////////////////////////////////////////////////////////////////////
12207753Smm
13207753Smm#include "stream_decoder.h"
14207753Smm#include "block_decoder.h"
15207753Smm
16207753Smm
17207753Smmstruct lzma_coder_s {
18207753Smm	enum {
19207753Smm		SEQ_STREAM_HEADER,
20207753Smm		SEQ_BLOCK_HEADER,
21207753Smm		SEQ_BLOCK,
22207753Smm		SEQ_INDEX,
23207753Smm		SEQ_STREAM_FOOTER,
24207753Smm		SEQ_STREAM_PADDING,
25207753Smm	} sequence;
26207753Smm
27207753Smm	/// Block or Metadata decoder. This takes little memory and the same
28207753Smm	/// data structure can be used to decode every Block Header, so it's
29207753Smm	/// a good idea to have a separate lzma_next_coder structure for it.
30207753Smm	lzma_next_coder block_decoder;
31207753Smm
32207753Smm	/// Block options decoded by the Block Header decoder and used by
33207753Smm	/// the Block decoder.
34207753Smm	lzma_block block_options;
35207753Smm
36207753Smm	/// Stream Flags from Stream Header
37207753Smm	lzma_stream_flags stream_flags;
38207753Smm
39207753Smm	/// Index is hashed so that it can be compared to the sizes of Blocks
40207753Smm	/// with O(1) memory usage.
41207753Smm	lzma_index_hash *index_hash;
42207753Smm
43207753Smm	/// Memory usage limit
44207753Smm	uint64_t memlimit;
45207753Smm
46207753Smm	/// Amount of memory actually needed (only an estimate)
47207753Smm	uint64_t memusage;
48207753Smm
49207753Smm	/// If true, LZMA_NO_CHECK is returned if the Stream has
50207753Smm	/// no integrity check.
51207753Smm	bool tell_no_check;
52207753Smm
53207753Smm	/// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has
54207753Smm	/// an integrity check that isn't supported by this liblzma build.
55207753Smm	bool tell_unsupported_check;
56207753Smm
57207753Smm	/// If true, LZMA_GET_CHECK is returned after decoding Stream Header.
58207753Smm	bool tell_any_check;
59207753Smm
60207753Smm	/// If true, we will decode concatenated Streams that possibly have
61207753Smm	/// Stream Padding between or after them. LZMA_STREAM_END is returned
62207753Smm	/// once the application isn't giving us any new input, and we aren't
63207753Smm	/// in the middle of a Stream, and possible Stream Padding is a
64207753Smm	/// multiple of four bytes.
65207753Smm	bool concatenated;
66207753Smm
67207753Smm	/// When decoding concatenated Streams, this is true as long as we
68207753Smm	/// are decoding the first Stream. This is needed to avoid misleading
69207753Smm	/// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic
70207753Smm	/// bytes.
71207753Smm	bool first_stream;
72207753Smm
73207753Smm	/// Write position in buffer[] and position in Stream Padding
74207753Smm	size_t pos;
75207753Smm
76207753Smm	/// Buffer to hold Stream Header, Block Header, and Stream Footer.
77207753Smm	/// Block Header has biggest maximum size.
78207753Smm	uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX];
79207753Smm};
80207753Smm
81207753Smm
82207753Smmstatic lzma_ret
83207753Smmstream_decoder_reset(lzma_coder *coder, lzma_allocator *allocator)
84207753Smm{
85207753Smm	// Initialize the Index hash used to verify the Index.
86207753Smm	coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator);
87207753Smm	if (coder->index_hash == NULL)
88207753Smm		return LZMA_MEM_ERROR;
89207753Smm
90207753Smm	// Reset the rest of the variables.
91207753Smm	coder->sequence = SEQ_STREAM_HEADER;
92207753Smm	coder->pos = 0;
93207753Smm
94207753Smm	return LZMA_OK;
95207753Smm}
96207753Smm
97207753Smm
98207753Smmstatic lzma_ret
99207753Smmstream_decode(lzma_coder *coder, lzma_allocator *allocator,
100207753Smm		const uint8_t *restrict in, size_t *restrict in_pos,
101207753Smm		size_t in_size, uint8_t *restrict out,
102207753Smm		size_t *restrict out_pos, size_t out_size, lzma_action action)
103207753Smm{
104207753Smm	// When decoding the actual Block, it may be able to produce more
105207753Smm	// output even if we don't give it any new input.
106207753Smm	while (true)
107207753Smm	switch (coder->sequence) {
108207753Smm	case SEQ_STREAM_HEADER: {
109207753Smm		// Copy the Stream Header to the internal buffer.
110207753Smm		lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
111207753Smm				LZMA_STREAM_HEADER_SIZE);
112207753Smm
113207753Smm		// Return if we didn't get the whole Stream Header yet.
114207753Smm		if (coder->pos < LZMA_STREAM_HEADER_SIZE)
115207753Smm			return LZMA_OK;
116207753Smm
117207753Smm		coder->pos = 0;
118207753Smm
119207753Smm		// Decode the Stream Header.
120207753Smm		const lzma_ret ret = lzma_stream_header_decode(
121207753Smm				&coder->stream_flags, coder->buffer);
122207753Smm		if (ret != LZMA_OK)
123207753Smm			return ret == LZMA_FORMAT_ERROR && !coder->first_stream
124207753Smm					? LZMA_DATA_ERROR : ret;
125207753Smm
126207753Smm		// If we are decoding concatenated Streams, and the later
127207753Smm		// Streams have invalid Header Magic Bytes, we give
128207753Smm		// LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR.
129207753Smm		coder->first_stream = false;
130207753Smm
131207753Smm		// Copy the type of the Check so that Block Header and Block
132207753Smm		// decoders see it.
133207753Smm		coder->block_options.check = coder->stream_flags.check;
134207753Smm
135207753Smm		// Even if we return LZMA_*_CHECK below, we want
136207753Smm		// to continue from Block Header decoding.
137207753Smm		coder->sequence = SEQ_BLOCK_HEADER;
138207753Smm
139207753Smm		// Detect if there's no integrity check or if it is
140207753Smm		// unsupported if those were requested by the application.
141207753Smm		if (coder->tell_no_check && coder->stream_flags.check
142207753Smm				== LZMA_CHECK_NONE)
143207753Smm			return LZMA_NO_CHECK;
144207753Smm
145207753Smm		if (coder->tell_unsupported_check
146207753Smm				&& !lzma_check_is_supported(
147207753Smm					coder->stream_flags.check))
148207753Smm			return LZMA_UNSUPPORTED_CHECK;
149207753Smm
150207753Smm		if (coder->tell_any_check)
151207753Smm			return LZMA_GET_CHECK;
152207753Smm	}
153207753Smm
154207753Smm	// Fall through
155207753Smm
156207753Smm	case SEQ_BLOCK_HEADER: {
157207753Smm		if (*in_pos >= in_size)
158207753Smm			return LZMA_OK;
159207753Smm
160207753Smm		if (coder->pos == 0) {
161207753Smm			// Detect if it's Index.
162207753Smm			if (in[*in_pos] == 0x00) {
163207753Smm				coder->sequence = SEQ_INDEX;
164207753Smm				break;
165207753Smm			}
166207753Smm
167207753Smm			// Calculate the size of the Block Header. Note that
168207753Smm			// Block Header decoder wants to see this byte too
169207753Smm			// so don't advance *in_pos.
170207753Smm			coder->block_options.header_size
171207753Smm					= lzma_block_header_size_decode(
172207753Smm						in[*in_pos]);
173207753Smm		}
174207753Smm
175207753Smm		// Copy the Block Header to the internal buffer.
176207753Smm		lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
177207753Smm				coder->block_options.header_size);
178207753Smm
179207753Smm		// Return if we didn't get the whole Block Header yet.
180207753Smm		if (coder->pos < coder->block_options.header_size)
181207753Smm			return LZMA_OK;
182207753Smm
183207753Smm		coder->pos = 0;
184207753Smm
185207753Smm		// Version 0 is currently the only possible version.
186207753Smm		coder->block_options.version = 0;
187207753Smm
188207753Smm		// Set up a buffer to hold the filter chain. Block Header
189207753Smm		// decoder will initialize all members of this array so
190207753Smm		// we don't need to do it here.
191207753Smm		lzma_filter filters[LZMA_FILTERS_MAX + 1];
192207753Smm		coder->block_options.filters = filters;
193207753Smm
194207753Smm		// Decode the Block Header.
195207753Smm		return_if_error(lzma_block_header_decode(&coder->block_options,
196207753Smm				allocator, coder->buffer));
197207753Smm
198207753Smm		// Check the memory usage limit.
199207753Smm		const uint64_t memusage = lzma_raw_decoder_memusage(filters);
200207753Smm		lzma_ret ret;
201207753Smm
202207753Smm		if (memusage == UINT64_MAX) {
203207753Smm			// One or more unknown Filter IDs.
204207753Smm			ret = LZMA_OPTIONS_ERROR;
205207753Smm		} else {
206207753Smm			// Now we can set coder->memusage since we know that
207207753Smm			// the filter chain is valid. We don't want
208207753Smm			// lzma_memusage() to return UINT64_MAX in case of
209207753Smm			// invalid filter chain.
210207753Smm			coder->memusage = memusage;
211207753Smm
212207753Smm			if (memusage > coder->memlimit) {
213207753Smm				// The chain would need too much memory.
214207753Smm				ret = LZMA_MEMLIMIT_ERROR;
215207753Smm			} else {
216207753Smm				// Memory usage is OK.
217207753Smm				// Initialize the Block decoder.
218207753Smm				ret = lzma_block_decoder_init(
219207753Smm						&coder->block_decoder,
220207753Smm						allocator,
221207753Smm						&coder->block_options);
222207753Smm			}
223207753Smm		}
224207753Smm
225207753Smm		// Free the allocated filter options since they are needed
226207753Smm		// only to initialize the Block decoder.
227207753Smm		for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i)
228207753Smm			lzma_free(filters[i].options, allocator);
229207753Smm
230207753Smm		coder->block_options.filters = NULL;
231207753Smm
232207753Smm		// Check if memory usage calculation and Block enocoder
233207753Smm		// initialization succeeded.
234207753Smm		if (ret != LZMA_OK)
235207753Smm			return ret;
236207753Smm
237207753Smm		coder->sequence = SEQ_BLOCK;
238207753Smm	}
239207753Smm
240207753Smm	// Fall through
241207753Smm
242207753Smm	case SEQ_BLOCK: {
243207753Smm		const lzma_ret ret = coder->block_decoder.code(
244207753Smm				coder->block_decoder.coder, allocator,
245207753Smm				in, in_pos, in_size, out, out_pos, out_size,
246207753Smm				action);
247207753Smm
248207753Smm		if (ret != LZMA_STREAM_END)
249207753Smm			return ret;
250207753Smm
251207753Smm		// Block decoded successfully. Add the new size pair to
252207753Smm		// the Index hash.
253207753Smm		return_if_error(lzma_index_hash_append(coder->index_hash,
254207753Smm				lzma_block_unpadded_size(
255207753Smm					&coder->block_options),
256207753Smm				coder->block_options.uncompressed_size));
257207753Smm
258207753Smm		coder->sequence = SEQ_BLOCK_HEADER;
259207753Smm		break;
260207753Smm	}
261207753Smm
262207753Smm	case SEQ_INDEX: {
263207753Smm		// If we don't have any input, don't call
264207753Smm		// lzma_index_hash_decode() since it would return
265207753Smm		// LZMA_BUF_ERROR, which we must not do here.
266207753Smm		if (*in_pos >= in_size)
267207753Smm			return LZMA_OK;
268207753Smm
269207753Smm		// Decode the Index and compare it to the hash calculated
270207753Smm		// from the sizes of the Blocks (if any).
271207753Smm		const lzma_ret ret = lzma_index_hash_decode(coder->index_hash,
272207753Smm				in, in_pos, in_size);
273207753Smm		if (ret != LZMA_STREAM_END)
274207753Smm			return ret;
275207753Smm
276207753Smm		coder->sequence = SEQ_STREAM_FOOTER;
277207753Smm	}
278207753Smm
279207753Smm	// Fall through
280207753Smm
281207753Smm	case SEQ_STREAM_FOOTER: {
282207753Smm		// Copy the Stream Footer to the internal buffer.
283207753Smm		lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
284207753Smm				LZMA_STREAM_HEADER_SIZE);
285207753Smm
286207753Smm		// Return if we didn't get the whole Stream Footer yet.
287207753Smm		if (coder->pos < LZMA_STREAM_HEADER_SIZE)
288207753Smm			return LZMA_OK;
289207753Smm
290207753Smm		coder->pos = 0;
291207753Smm
292207753Smm		// Decode the Stream Footer. The decoder gives
293207753Smm		// LZMA_FORMAT_ERROR if the magic bytes don't match,
294207753Smm		// so convert that return code to LZMA_DATA_ERROR.
295207753Smm		lzma_stream_flags footer_flags;
296207753Smm		const lzma_ret ret = lzma_stream_footer_decode(
297207753Smm				&footer_flags, coder->buffer);
298207753Smm		if (ret != LZMA_OK)
299207753Smm			return ret == LZMA_FORMAT_ERROR
300207753Smm					? LZMA_DATA_ERROR : ret;
301207753Smm
302207753Smm		// Check that Index Size stored in the Stream Footer matches
303207753Smm		// the real size of the Index field.
304207753Smm		if (lzma_index_hash_size(coder->index_hash)
305207753Smm				!= footer_flags.backward_size)
306207753Smm			return LZMA_DATA_ERROR;
307207753Smm
308207753Smm		// Compare that the Stream Flags fields are identical in
309207753Smm		// both Stream Header and Stream Footer.
310207753Smm		return_if_error(lzma_stream_flags_compare(
311207753Smm				&coder->stream_flags, &footer_flags));
312207753Smm
313207753Smm		if (!coder->concatenated)
314207753Smm			return LZMA_STREAM_END;
315207753Smm
316207753Smm		coder->sequence = SEQ_STREAM_PADDING;
317207753Smm	}
318207753Smm
319207753Smm	// Fall through
320207753Smm
321207753Smm	case SEQ_STREAM_PADDING:
322207753Smm		assert(coder->concatenated);
323207753Smm
324207753Smm		// Skip over possible Stream Padding.
325207753Smm		while (true) {
326207753Smm			if (*in_pos >= in_size) {
327207753Smm				// Unless LZMA_FINISH was used, we cannot
328207753Smm				// know if there's more input coming later.
329207753Smm				if (action != LZMA_FINISH)
330207753Smm					return LZMA_OK;
331207753Smm
332207753Smm				// Stream Padding must be a multiple of
333207753Smm				// four bytes.
334207753Smm				return coder->pos == 0
335207753Smm						? LZMA_STREAM_END
336207753Smm						: LZMA_DATA_ERROR;
337207753Smm			}
338207753Smm
339207753Smm			// If the byte is not zero, it probably indicates
340207753Smm			// beginning of a new Stream (or the file is corrupt).
341207753Smm			if (in[*in_pos] != 0x00)
342207753Smm				break;
343207753Smm
344207753Smm			++*in_pos;
345207753Smm			coder->pos = (coder->pos + 1) & 3;
346207753Smm		}
347207753Smm
348207753Smm		// Stream Padding must be a multiple of four bytes (empty
349207753Smm		// Stream Padding is OK).
350207753Smm		if (coder->pos != 0) {
351207753Smm			++*in_pos;
352207753Smm			return LZMA_DATA_ERROR;
353207753Smm		}
354207753Smm
355207753Smm		// Prepare to decode the next Stream.
356207753Smm		return_if_error(stream_decoder_reset(coder, allocator));
357207753Smm		break;
358207753Smm
359207753Smm	default:
360207753Smm		assert(0);
361207753Smm		return LZMA_PROG_ERROR;
362207753Smm	}
363207753Smm
364207753Smm	// Never reached
365207753Smm}
366207753Smm
367207753Smm
368207753Smmstatic void
369207753Smmstream_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
370207753Smm{
371207753Smm	lzma_next_end(&coder->block_decoder, allocator);
372207753Smm	lzma_index_hash_end(coder->index_hash, allocator);
373207753Smm	lzma_free(coder, allocator);
374207753Smm	return;
375207753Smm}
376207753Smm
377207753Smm
378207753Smmstatic lzma_check
379207753Smmstream_decoder_get_check(const lzma_coder *coder)
380207753Smm{
381207753Smm	return coder->stream_flags.check;
382207753Smm}
383207753Smm
384207753Smm
385207753Smmstatic lzma_ret
386207753Smmstream_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
387207753Smm		uint64_t *old_memlimit, uint64_t new_memlimit)
388207753Smm{
389207753Smm	*memusage = coder->memusage;
390207753Smm	*old_memlimit = coder->memlimit;
391207753Smm
392207753Smm	if (new_memlimit != 0) {
393207753Smm		if (new_memlimit < coder->memusage)
394207753Smm			return LZMA_MEMLIMIT_ERROR;
395207753Smm
396207753Smm		coder->memlimit = new_memlimit;
397207753Smm	}
398207753Smm
399207753Smm	return LZMA_OK;
400207753Smm}
401207753Smm
402207753Smm
403207753Smmextern lzma_ret
404207753Smmlzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
405207753Smm		uint64_t memlimit, uint32_t flags)
406207753Smm{
407207753Smm	lzma_next_coder_init(&lzma_stream_decoder_init, next, allocator);
408207753Smm
409207753Smm	if (memlimit == 0)
410207753Smm		return LZMA_PROG_ERROR;
411207753Smm
412207753Smm	if (flags & ~LZMA_SUPPORTED_FLAGS)
413207753Smm		return LZMA_OPTIONS_ERROR;
414207753Smm
415207753Smm	if (next->coder == NULL) {
416207753Smm		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
417207753Smm		if (next->coder == NULL)
418207753Smm			return LZMA_MEM_ERROR;
419207753Smm
420207753Smm		next->code = &stream_decode;
421207753Smm		next->end = &stream_decoder_end;
422207753Smm		next->get_check = &stream_decoder_get_check;
423207753Smm		next->memconfig = &stream_decoder_memconfig;
424207753Smm
425207753Smm		next->coder->block_decoder = LZMA_NEXT_CODER_INIT;
426207753Smm		next->coder->index_hash = NULL;
427207753Smm	}
428207753Smm
429207753Smm	next->coder->memlimit = memlimit;
430207753Smm	next->coder->memusage = LZMA_MEMUSAGE_BASE;
431207753Smm	next->coder->tell_no_check = (flags & LZMA_TELL_NO_CHECK) != 0;
432207753Smm	next->coder->tell_unsupported_check
433207753Smm			= (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0;
434207753Smm	next->coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0;
435207753Smm	next->coder->concatenated = (flags & LZMA_CONCATENATED) != 0;
436207753Smm	next->coder->first_stream = true;
437207753Smm
438207753Smm	return stream_decoder_reset(next->coder, allocator);
439207753Smm}
440207753Smm
441207753Smm
442207753Smmextern LZMA_API(lzma_ret)
443207753Smmlzma_stream_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags)
444207753Smm{
445207753Smm	lzma_next_strm_init(lzma_stream_decoder_init, strm, memlimit, flags);
446207753Smm
447207753Smm	strm->internal->supported_actions[LZMA_RUN] = true;
448207753Smm	strm->internal->supported_actions[LZMA_FINISH] = true;
449207753Smm
450207753Smm	return LZMA_OK;
451207753Smm}
452