stream_decoder.c revision 256281
117658Sjulian///////////////////////////////////////////////////////////////////////////////
217658Sjulian//
317658Sjulian/// \file       stream_decoder.c
417658Sjulian/// \brief      Decodes .xz Streams
517658Sjulian//
617658Sjulian//  Author:     Lasse Collin
717658Sjulian//
817658Sjulian//  This file has been put into the public domain.
917658Sjulian//  You can do whatever you want with this file.
1017658Sjulian//
1117658Sjulian///////////////////////////////////////////////////////////////////////////////
1217658Sjulian
1317658Sjulian#include "stream_decoder.h"
1417658Sjulian#include "block_decoder.h"
1517658Sjulian
1617658Sjulian
1717658Sjulianstruct lzma_coder_s {
1817658Sjulian	enum {
1917658Sjulian		SEQ_STREAM_HEADER,
2017658Sjulian		SEQ_BLOCK_HEADER,
2117658Sjulian		SEQ_BLOCK,
2217658Sjulian		SEQ_INDEX,
2317658Sjulian		SEQ_STREAM_FOOTER,
2417658Sjulian		SEQ_STREAM_PADDING,
2517658Sjulian	} sequence;
2617658Sjulian
2717658Sjulian	/// Block or Metadata decoder. This takes little memory and the same
2817658Sjulian	/// data structure can be used to decode every Block Header, so it's
2917658Sjulian	/// a good idea to have a separate lzma_next_coder structure for it.
3017658Sjulian	lzma_next_coder block_decoder;
3117658Sjulian
3217658Sjulian	/// Block options decoded by the Block Header decoder and used by
3317658Sjulian	/// the Block decoder.
3417658Sjulian	lzma_block block_options;
3517658Sjulian
3617658Sjulian	/// Stream Flags from Stream Header
37116182Sobrien	lzma_stream_flags stream_flags;
38116182Sobrien
39116182Sobrien	/// Index is hashed so that it can be compared to the sizes of Blocks
40174921Srwatson	/// with O(1) memory usage.
41131927Smarcel	lzma_index_hash *index_hash;
4228976Sbde
43134649Sscottl	/// Memory usage limit
44221173Sattilio	uint64_t memlimit;
4517658Sjulian
4617658Sjulian	/// Amount of memory actually needed (only an estimate)
4717658Sjulian	uint64_t memusage;
4860041Sphk
4931275Sbde	/// If true, LZMA_NO_CHECK is returned if the Stream has
5078767Sjhb	/// no integrity check.
5178767Sjhb	bool tell_no_check;
5278767Sjhb
53193066Sjamie	/// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has
54131927Smarcel	/// an integrity check that isn't supported by this liblzma build.
5517658Sjulian	bool tell_unsupported_check;
56183527Speter
5755539Sluoqi	/// If true, LZMA_GET_CHECK is returned after decoding Stream Header.
5889601Ssobomax	bool tell_any_check;
5921776Sbde
60164033Srwatson	/// If true, we will decode concatenated Streams that possibly have
6178767Sjhb	/// Stream Padding between or after them. LZMA_STREAM_END is returned
6278767Sjhb	/// once the application isn't giving us any new input, and we aren't
6378767Sjhb	/// in the middle of a Stream, and possible Stream Padding is a
64137263Speter	/// multiple of four bytes.
65206878Sattilio	bool concatenated;
6617658Sjulian
6717658Sjulian	/// When decoding concatenated Streams, this is true as long as we
68225448Sattilio	/// are decoding the first Stream. This is needed to avoid misleading
69221173Sattilio	/// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic
70221173Sattilio	/// bytes.
71221173Sattilio	bool first_stream;
7217658Sjulian
73174921Srwatson	/// Write position in buffer[] and position in Stream Padding
74174921Srwatson	size_t pos;
75118990Smarcel
7694169Sphk	/// Buffer to hold Stream Header, Block Header, and Stream Footer.
7791778Sjake	/// Block Header has biggest maximum size.
7817658Sjulian	uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX];
79163606Srwatson};
80163606Srwatson
81157628Spjd
82157628Spjdstatic lzma_ret
83157628Spjdstream_decoder_reset(lzma_coder *coder, lzma_allocator *allocator)
84157628Spjd{
85157628Spjd	// Initialize the Index hash used to verify the Index.
86157628Spjd	coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator);
8717658Sjulian	if (coder->index_hash == NULL)
8817658Sjulian		return LZMA_MEM_ERROR;
8917658Sjulian
9017658Sjulian	// Reset the rest of the variables.
9117658Sjulian	coder->sequence = SEQ_STREAM_HEADER;
9217658Sjulian	coder->pos = 0;
9317658Sjulian
9417658Sjulian	return LZMA_OK;
9517658Sjulian}
9617658Sjulian
9717658Sjulian
9817658Sjulianstatic lzma_ret
99131927Smarcelstream_decode(lzma_coder *coder, lzma_allocator *allocator,
100131927Smarcel		const uint8_t *restrict in, size_t *restrict in_pos,
10142135Smsmith		size_t in_size, uint8_t *restrict out,
10217658Sjulian		size_t *restrict out_pos, size_t out_size, lzma_action action)
10342135Smsmith{
10417658Sjulian	// When decoding the actual Block, it may be able to produce more
105213322Savg	// output even if we don't give it any new input.
10646381Sbillf	while (true)
107213322Savg	switch (coder->sequence) {
108103647Sjhb	case SEQ_STREAM_HEADER: {
109131927Smarcel		// Copy the Stream Header to the internal buffer.
110213322Savg		lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
111103647Sjhb				LZMA_STREAM_HEADER_SIZE);
112213322Savg
11317658Sjulian		// Return if we didn't get the whole Stream Header yet.
114213322Savg		if (coder->pos < LZMA_STREAM_HEADER_SIZE)
115103647Sjhb			return LZMA_OK;
116213322Savg
117131927Smarcel		coder->pos = 0;
11817658Sjulian
119213322Savg		// Decode the Stream Header.
120213322Savg		const lzma_ret ret = lzma_stream_header_decode(
12185202Speter				&coder->stream_flags, coder->buffer);
122213322Savg		if (ret != LZMA_OK)
12385202Speter			return ret == LZMA_FORMAT_ERROR && !coder->first_stream
124228424Savg					? LZMA_DATA_ERROR : ret;
125228424Savg
126228424Savg		// If we are decoding concatenated Streams, and the later
127228424Savg		// Streams have invalid Header Magic Bytes, we give
128228424Savg		// LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR.
129227309Sed		coder->first_stream = false;
130227309Sed
13143436Smsmith		// Copy the type of the Check so that Block Header and Block
132225448Sattilio		// decoders see it.
133225448Sattilio		coder->block_options.check = coder->stream_flags.check;
134225448Sattilio
135225448Sattilio		// Even if we return LZMA_*_CHECK below, we want
136225448Sattilio		// to continue from Block Header decoding.
137225448Sattilio		coder->sequence = SEQ_BLOCK_HEADER;
138225448Sattilio
139225448Sattilio		// Detect if there's no integrity check or if it is
14017658Sjulian		// unsupported if those were requested by the application.
14117658Sjulian		if (coder->tell_no_check && coder->stream_flags.check
14217658Sjulian				== LZMA_CHECK_NONE)
14317658Sjulian			return LZMA_NO_CHECK;
14417658Sjulian
14517658Sjulian		if (coder->tell_unsupported_check
146228424Savg				&& !lzma_check_is_supported(
14793496Sphk					coder->stream_flags.check))
148155383Sjeff			return LZMA_UNSUPPORTED_CHECK;
14993496Sphk
15067093Sps		if (coder->tell_any_check)
151131927Smarcel			return LZMA_GET_CHECK;
152131927Smarcel	}
153131927Smarcel
154131927Smarcel	// Fall through
15565395Speter
15665395Speter	case SEQ_BLOCK_HEADER: {
15765395Speter		if (*in_pos >= in_size)
15865395Speter			return LZMA_OK;
15917658Sjulian
16050107Smsmith		if (coder->pos == 0) {
161110859Salfred			// Detect if it's Index.
16250107Smsmith			if (in[*in_pos] == 0x00) {
16350107Smsmith				coder->sequence = SEQ_INDEX;
164110859Salfred				break;
165110859Salfred			}
166214279Sbrucec
167110859Salfred			// Calculate the size of the Block Header. Note that
168110859Salfred			// Block Header decoder wants to see this byte too
169110859Salfred			// so don't advance *in_pos.
170110859Salfred			coder->block_options.header_size
171110859Salfred					= lzma_block_header_size_decode(
172110859Salfred						in[*in_pos]);
17350107Smsmith		}
17448868Sphk
175177253Srwatson		// Copy the Block Header to the internal buffer.
17650107Smsmith		lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
17717658Sjulian				coder->block_options.header_size);
178167211Srwatson
17917658Sjulian		// Return if we didn't get the whole Block Header yet.
18082749Sdillon		if (coder->pos < coder->block_options.header_size)
18117658Sjulian			return LZMA_OK;
182225617Skmacy
18317658Sjulian		coder->pos = 0;
18417658Sjulian
18517658Sjulian		// Version 0 is currently the only possible version.
186106024Srwatson		coder->block_options.version = 0;
187106024Srwatson
188172930Srwatson		// Set up a buffer to hold the filter chain. Block Header
189106024Srwatson		// decoder will initialize all members of this array so
190106024Srwatson		// we don't need to do it here.
191164033Srwatson		lzma_filter filters[LZMA_FILTERS_MAX + 1];
192106024Srwatson		coder->block_options.filters = filters;
193106024Srwatson
194214004Smarcel		// Decode the Block Header.
195106024Srwatson		return_if_error(lzma_block_header_decode(&coder->block_options,
196106024Srwatson				allocator, coder->buffer));
19782749Sdillon
19817658Sjulian		// Check the memory usage limit.
19917658Sjulian		const uint64_t memusage = lzma_raw_decoder_memusage(filters);
20017658Sjulian		lzma_ret ret;
20117658Sjulian
20217658Sjulian		if (memusage == UINT64_MAX) {
20365268Smsmith			// One or more unknown Filter IDs.
20465268Smsmith			ret = LZMA_OPTIONS_ERROR;
20517658Sjulian		} else {
20665268Smsmith			// Now we can set coder->memusage since we know that
20717658Sjulian			// the filter chain is valid. We don't want
208110859Salfred			// lzma_memusage() to return UINT64_MAX in case of
20965268Smsmith			// invalid filter chain.
210110859Salfred			coder->memusage = memusage;
21117658Sjulian
21217658Sjulian			if (memusage > coder->memlimit) {
21373913Sjhb				// The chain would need too much memory.
214225617Skmacy				ret = LZMA_MEMLIMIT_ERROR;
21573913Sjhb			} else {
21617658Sjulian				// Memory usage is OK.
21717658Sjulian				// Initialize the Block decoder.
218214004Smarcel				ret = lzma_block_decoder_init(
21917658Sjulian						&coder->block_decoder,
22017658Sjulian						allocator,
22117658Sjulian						&coder->block_options);
22217658Sjulian			}
22317658Sjulian		}
22454233Sphk
22565395Speter		// Free the allocated filter options since they are needed
22654233Sphk		// only to initialize the Block decoder.
22754233Sphk		for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i)
22854233Sphk			lzma_free(filters[i].options, allocator);
22954233Sphk
23054233Sphk		coder->block_options.filters = NULL;
23154233Sphk
23254233Sphk		// Check if memory usage calculation and Block enocoder
23354233Sphk		// initialization succeeded.
23465764Sjhb		if (ret != LZMA_OK)
23554233Sphk			return ret;
23654233Sphk
23754233Sphk		coder->sequence = SEQ_BLOCK;
23854233Sphk	}
23965764Sjhb
24054233Sphk	// Fall through
24154233Sphk
24254233Sphk	case SEQ_BLOCK: {
24354233Sphk		const lzma_ret ret = coder->block_decoder.code(
24465764Sjhb				coder->block_decoder.coder, allocator,
24554233Sphk				in, in_pos, in_size, out, out_pos, out_size,
24654233Sphk				action);
24754233Sphk
24865764Sjhb		if (ret != LZMA_STREAM_END)
24954233Sphk			return ret;
25054233Sphk
251222801Smarcel		// Block decoded successfully. Add the new size pair to
252222801Smarcel		// the Index hash.
25394169Sphk		return_if_error(lzma_index_hash_append(coder->index_hash,
254222801Smarcel				lzma_block_unpadded_size(
255110859Salfred					&coder->block_options),
256222801Smarcel				coder->block_options.uncompressed_size));
257222801Smarcel
258222801Smarcel		coder->sequence = SEQ_BLOCK_HEADER;
259222801Smarcel		break;
260132412Sjulian	}
26194169Sphk
262131927Smarcel	case SEQ_INDEX: {
26394169Sphk		// If we don't have any input, don't call
264222801Smarcel		// lzma_index_hash_decode() since it would return
265222801Smarcel		// LZMA_BUF_ERROR, which we must not do here.
266174921Srwatson		if (*in_pos >= in_size)
267222801Smarcel			return LZMA_OK;
268222801Smarcel
269174921Srwatson		// Decode the Index and compare it to the hash calculated
270222801Smarcel		// from the sizes of the Blocks (if any).
271174921Srwatson		const lzma_ret ret = lzma_index_hash_decode(coder->index_hash,
272222801Smarcel				in, in_pos, in_size);
273174921Srwatson		if (ret != LZMA_STREAM_END)
274222801Smarcel			return ret;
275176788Sru
276222801Smarcel		coder->sequence = SEQ_STREAM_FOOTER;
27794169Sphk	}
27894169Sphk
279149875Struckman	// Fall through
280149875Struckman
281149875Struckman	case SEQ_STREAM_FOOTER: {
282149875Struckman		// Copy the Stream Footer to the internal buffer.
283175486Sattilio		lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
284149875Struckman				LZMA_STREAM_HEADER_SIZE);
285149875Struckman
286149875Struckman		// Return if we didn't get the whole Stream Footer yet.
287149875Struckman		if (coder->pos < LZMA_STREAM_HEADER_SIZE)
288149875Struckman			return LZMA_OK;
28917658Sjulian
290137329Snjl		coder->pos = 0;
29117658Sjulian
292214004Smarcel		// Decode the Stream Footer. The decoder gives
293214004Smarcel		// LZMA_FORMAT_ERROR if the magic bytes don't match,
29417658Sjulian		// so convert that return code to LZMA_DATA_ERROR.
295133763Struckman		lzma_stream_flags footer_flags;
29617658Sjulian		const lzma_ret ret = lzma_stream_footer_decode(
297137375Smarcel				&footer_flags, coder->buffer);
298137329Snjl		if (ret != LZMA_OK)
299137329Snjl			return ret == LZMA_FORMAT_ERROR
300137329Snjl					? LZMA_DATA_ERROR : ret;
301137329Snjl
302137329Snjl		// Check that Index Size stored in the Stream Footer matches
303228424Savg		// the real size of the Index field.
304228424Savg		if (lzma_index_hash_size(coder->index_hash)
305228424Savg				!= footer_flags.backward_size)
306228424Savg			return LZMA_DATA_ERROR;
307228424Savg
308228424Savg		// Compare that the Stream Flags fields are identical in
309137263Speter		// both Stream Header and Stream Footer.
310155383Sjeff		return_if_error(lzma_stream_flags_compare(
311155383Sjeff				&coder->stream_flags, &footer_flags));
312137263Speter
31365268Smsmith		if (!coder->concatenated)
31465268Smsmith			return LZMA_STREAM_END;
31565268Smsmith
31682119Sjhb		coder->sequence = SEQ_STREAM_PADDING;
317131927Smarcel	}
31882119Sjhb
31927997Sjulian	// Fall through
32027997Sjulian
32127997Sjulian	case SEQ_STREAM_PADDING:
32250107Smsmith		assert(coder->concatenated);
32327997Sjulian
32427997Sjulian		// Skip over possible Stream Padding.
32527997Sjulian		while (true) {
32627997Sjulian			if (*in_pos >= in_size) {
32717658Sjulian				// Unless LZMA_FINISH was used, we cannot
32817658Sjulian				// know if there's more input coming later.
32965707Sjasone				if (action != LZMA_FINISH)
330131481Sjhb					return LZMA_OK;
33165707Sjasone
332131481Sjhb				// Stream Padding must be a multiple of
33317658Sjulian				// four bytes.
33417658Sjulian				return coder->pos == 0
33517658Sjulian						? LZMA_STREAM_END
336221173Sattilio						: LZMA_DATA_ERROR;
337221173Sattilio			}
338221173Sattilio
339225617Skmacy			// If the byte is not zero, it probably indicates
34017658Sjulian			// beginning of a new Stream (or the file is corrupt).
34134266Sjulian			if (in[*in_pos] != 0x00)
34234266Sjulian				break;
34334266Sjulian
34434266Sjulian			++*in_pos;
34534266Sjulian			coder->pos = (coder->pos + 1) & 3;
34665707Sjasone		}
34717658Sjulian
348149875Struckman		// Stream Padding must be a multiple of four bytes (empty
349149875Struckman		// Stream Padding is OK).
35017658Sjulian		if (coder->pos != 0) {
351133763Struckman			++*in_pos;
352133763Struckman			return LZMA_DATA_ERROR;
353136115Sphk		}
35417658Sjulian
355133763Struckman		// Prepare to decode the next Stream.
356133763Struckman		return_if_error(stream_decoder_reset(coder, allocator));
357133763Struckman		break;
358133763Struckman
359133763Struckman	default:
36017658Sjulian		assert(0);
36165707Sjasone		return LZMA_PROG_ERROR;
36265707Sjasone	}
36365707Sjasone
364221173Sattilio	// Never reached
365221173Sattilio}
366221173Sattilio
367225617Skmacy
368131481Sjhbstatic void
369131481Sjhbstream_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
370131481Sjhb{
371131481Sjhb	lzma_next_end(&coder->block_decoder, allocator);
372131481Sjhb	lzma_index_hash_end(coder->index_hash, allocator);
373131481Sjhb	lzma_free(coder, allocator);
374131481Sjhb	return;
37534266Sjulian}
376131481Sjhb
377131481Sjhb
378131481Sjhbstatic lzma_check
379131481Sjhbstream_decoder_get_check(const lzma_coder *coder)
380131481Sjhb{
381131481Sjhb	return coder->stream_flags.check;
382131481Sjhb}
383131481Sjhb
384170307Sjeff
385131481Sjhbstatic lzma_ret
386170307Sjeffstream_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
387131481Sjhb		uint64_t *old_memlimit, uint64_t new_memlimit)
388131481Sjhb{
389131481Sjhb	*memusage = coder->memusage;
390131481Sjhb	*old_memlimit = coder->memlimit;
39117658Sjulian
392133418Snjl	if (new_memlimit != 0) {
39341137Smsmith		if (new_memlimit < coder->memusage)
39441137Smsmith			return LZMA_MEMLIMIT_ERROR;
39541137Smsmith
39641137Smsmith		coder->memlimit = new_memlimit;
39741137Smsmith	}
39841137Smsmith
399149875Struckman	return LZMA_OK;
400137186Sphk}
401137186Sphk
402130640Sphk
40353452Sphkextern lzma_ret
40448225Smckusicklzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
40553023Sphk		uint64_t memlimit, uint32_t flags)
40653023Sphk{
407137186Sphk	lzma_next_coder_init(&lzma_stream_decoder_init, next, allocator);
40853023Sphk
409225448Sattilio	if (memlimit == 0)
410225448Sattilio		return LZMA_PROG_ERROR;
411225448Sattilio
412225448Sattilio	if (flags & ~LZMA_SUPPORTED_FLAGS)
413225448Sattilio		return LZMA_OPTIONS_ERROR;
414225448Sattilio
415225448Sattilio	if (next->coder == NULL) {
416225448Sattilio		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
417225448Sattilio		if (next->coder == NULL)
418225448Sattilio			return LZMA_MEM_ERROR;
419225448Sattilio
42046568Speter		next->code = &stream_decode;
42141137Smsmith		next->end = &stream_decoder_end;
42217658Sjulian		next->get_check = &stream_decoder_get_check;
42317658Sjulian		next->memconfig = &stream_decoder_memconfig;
42417658Sjulian
42517658Sjulian		next->coder->block_decoder = LZMA_NEXT_CODER_INIT;
42617658Sjulian		next->coder->index_hash = NULL;
427133763Struckman	}
42817658Sjulian
42917658Sjulian	next->coder->memlimit = memlimit;
430133763Struckman	next->coder->memusage = LZMA_MEMUSAGE_BASE;
431133763Struckman	next->coder->tell_no_check = (flags & LZMA_TELL_NO_CHECK) != 0;
43217658Sjulian	next->coder->tell_unsupported_check
43317658Sjulian			= (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0;
43417658Sjulian	next->coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0;
43517658Sjulian	next->coder->concatenated = (flags & LZMA_CONCATENATED) != 0;
43617658Sjulian	next->coder->first_stream = true;
43717658Sjulian
438157628Spjd	return stream_decoder_reset(next->coder, allocator);
43939237Sgibbs}
44017658Sjulian
44127997Sjulian
44254233Sphkextern LZMA_API(lzma_ret)
44354233Sphklzma_stream_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags)
44427997Sjulian{
44527997Sjulian	lzma_next_strm_init(lzma_stream_decoder_init, strm, memlimit, flags);
44627997Sjulian
44727997Sjulian	strm->internal->supported_actions[LZMA_RUN] = true;
44850107Smsmith	strm->internal->supported_actions[LZMA_FINISH] = true;
449137329Snjl
450132412Sjulian	return LZMA_OK;
451222801Smarcel}
45239237Sgibbs