1// SPDX-License-Identifier: 0BSD
2
3///////////////////////////////////////////////////////////////////////////////
4//
5/// \file       stream_decoder.c
6/// \brief      Decodes .xz Streams
7//
8//  Author:     Lasse Collin
9//
10///////////////////////////////////////////////////////////////////////////////
11
12#include "stream_decoder.h"
13#include "block_decoder.h"
14#include "index.h"
15
16
17typedef struct {
18	enum {
19		SEQ_STREAM_HEADER,
20		SEQ_BLOCK_HEADER,
21		SEQ_BLOCK_INIT,
22		SEQ_BLOCK_RUN,
23		SEQ_INDEX,
24		SEQ_STREAM_FOOTER,
25		SEQ_STREAM_PADDING,
26	} sequence;
27
28	/// Block decoder
29	lzma_next_coder block_decoder;
30
31	/// Block options decoded by the Block Header decoder and used by
32	/// the Block decoder.
33	lzma_block block_options;
34
35	/// Stream Flags from Stream Header
36	lzma_stream_flags stream_flags;
37
38	/// Index is hashed so that it can be compared to the sizes of Blocks
39	/// with O(1) memory usage.
40	lzma_index_hash *index_hash;
41
42	/// Memory usage limit
43	uint64_t memlimit;
44
45	/// Amount of memory actually needed (only an estimate)
46	uint64_t memusage;
47
48	/// If true, LZMA_NO_CHECK is returned if the Stream has
49	/// no integrity check.
50	bool tell_no_check;
51
52	/// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has
53	/// an integrity check that isn't supported by this liblzma build.
54	bool tell_unsupported_check;
55
56	/// If true, LZMA_GET_CHECK is returned after decoding Stream Header.
57	bool tell_any_check;
58
59	/// If true, we will tell the Block decoder to skip calculating
60	/// and verifying the integrity check.
61	bool ignore_check;
62
63	/// If true, we will decode concatenated Streams that possibly have
64	/// Stream Padding between or after them. LZMA_STREAM_END is returned
65	/// once the application isn't giving us any new input (LZMA_FINISH),
66	/// and we aren't in the middle of a Stream, and possible
67	/// Stream Padding is a multiple of four bytes.
68	bool concatenated;
69
70	/// When decoding concatenated Streams, this is true as long as we
71	/// are decoding the first Stream. This is needed to avoid misleading
72	/// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic
73	/// bytes.
74	bool first_stream;
75
76	/// Write position in buffer[] and position in Stream Padding
77	size_t pos;
78
79	/// Buffer to hold Stream Header, Block Header, and Stream Footer.
80	/// Block Header has biggest maximum size.
81	uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX];
82} lzma_stream_coder;
83
84
85static lzma_ret
86stream_decoder_reset(lzma_stream_coder *coder, const lzma_allocator *allocator)
87{
88	// Initialize the Index hash used to verify the Index.
89	coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator);
90	if (coder->index_hash == NULL)
91		return LZMA_MEM_ERROR;
92
93	// Reset the rest of the variables.
94	coder->sequence = SEQ_STREAM_HEADER;
95	coder->pos = 0;
96
97	return LZMA_OK;
98}
99
100
101static lzma_ret
102stream_decode(void *coder_ptr, const lzma_allocator *allocator,
103		const uint8_t *restrict in, size_t *restrict in_pos,
104		size_t in_size, uint8_t *restrict out,
105		size_t *restrict out_pos, size_t out_size, lzma_action action)
106{
107	lzma_stream_coder *coder = coder_ptr;
108
109	// When decoding the actual Block, it may be able to produce more
110	// output even if we don't give it any new input.
111	while (true)
112	switch (coder->sequence) {
113	case SEQ_STREAM_HEADER: {
114		// Copy the Stream Header to the internal buffer.
115		lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
116				LZMA_STREAM_HEADER_SIZE);
117
118		// Return if we didn't get the whole Stream Header yet.
119		if (coder->pos < LZMA_STREAM_HEADER_SIZE)
120			return LZMA_OK;
121
122		coder->pos = 0;
123
124		// Decode the Stream Header.
125		const lzma_ret ret = lzma_stream_header_decode(
126				&coder->stream_flags, coder->buffer);
127		if (ret != LZMA_OK)
128			return ret == LZMA_FORMAT_ERROR && !coder->first_stream
129					? LZMA_DATA_ERROR : ret;
130
131		// If we are decoding concatenated Streams, and the later
132		// Streams have invalid Header Magic Bytes, we give
133		// LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR.
134		coder->first_stream = false;
135
136		// Copy the type of the Check so that Block Header and Block
137		// decoders see it.
138		coder->block_options.check = coder->stream_flags.check;
139
140		// Even if we return LZMA_*_CHECK below, we want
141		// to continue from Block Header decoding.
142		coder->sequence = SEQ_BLOCK_HEADER;
143
144		// Detect if there's no integrity check or if it is
145		// unsupported if those were requested by the application.
146		if (coder->tell_no_check && coder->stream_flags.check
147				== LZMA_CHECK_NONE)
148			return LZMA_NO_CHECK;
149
150		if (coder->tell_unsupported_check
151				&& !lzma_check_is_supported(
152					coder->stream_flags.check))
153			return LZMA_UNSUPPORTED_CHECK;
154
155		if (coder->tell_any_check)
156			return LZMA_GET_CHECK;
157	}
158
159	// Fall through
160
161	case SEQ_BLOCK_HEADER: {
162		if (*in_pos >= in_size)
163			return LZMA_OK;
164
165		if (coder->pos == 0) {
166			// Detect if it's Index.
167			if (in[*in_pos] == INDEX_INDICATOR) {
168				coder->sequence = SEQ_INDEX;
169				break;
170			}
171
172			// Calculate the size of the Block Header. Note that
173			// Block Header decoder wants to see this byte too
174			// so don't advance *in_pos.
175			coder->block_options.header_size
176					= lzma_block_header_size_decode(
177						in[*in_pos]);
178		}
179
180		// Copy the Block Header to the internal buffer.
181		lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
182				coder->block_options.header_size);
183
184		// Return if we didn't get the whole Block Header yet.
185		if (coder->pos < coder->block_options.header_size)
186			return LZMA_OK;
187
188		coder->pos = 0;
189		coder->sequence = SEQ_BLOCK_INIT;
190	}
191
192	// Fall through
193
194	case SEQ_BLOCK_INIT: {
195		// Checking memusage and doing the initialization needs
196		// its own sequence point because we need to be able to
197		// retry if we return LZMA_MEMLIMIT_ERROR.
198
199		// Version 1 is needed to support the .ignore_check option.
200		coder->block_options.version = 1;
201
202		// Set up a buffer to hold the filter chain. Block Header
203		// decoder will initialize all members of this array so
204		// we don't need to do it here.
205		lzma_filter filters[LZMA_FILTERS_MAX + 1];
206		coder->block_options.filters = filters;
207
208		// Decode the Block Header.
209		return_if_error(lzma_block_header_decode(&coder->block_options,
210				allocator, coder->buffer));
211
212		// If LZMA_IGNORE_CHECK was used, this flag needs to be set.
213		// It has to be set after lzma_block_header_decode() because
214		// it always resets this to false.
215		coder->block_options.ignore_check = coder->ignore_check;
216
217		// Check the memory usage limit.
218		const uint64_t memusage = lzma_raw_decoder_memusage(filters);
219		lzma_ret ret;
220
221		if (memusage == UINT64_MAX) {
222			// One or more unknown Filter IDs.
223			ret = LZMA_OPTIONS_ERROR;
224		} else {
225			// Now we can set coder->memusage since we know that
226			// the filter chain is valid. We don't want
227			// lzma_memusage() to return UINT64_MAX in case of
228			// invalid filter chain.
229			coder->memusage = memusage;
230
231			if (memusage > coder->memlimit) {
232				// The chain would need too much memory.
233				ret = LZMA_MEMLIMIT_ERROR;
234			} else {
235				// Memory usage is OK.
236				// Initialize the Block decoder.
237				ret = lzma_block_decoder_init(
238						&coder->block_decoder,
239						allocator,
240						&coder->block_options);
241			}
242		}
243
244		// Free the allocated filter options since they are needed
245		// only to initialize the Block decoder.
246		lzma_filters_free(filters, allocator);
247		coder->block_options.filters = NULL;
248
249		// Check if memory usage calculation and Block decoder
250		// initialization succeeded.
251		if (ret != LZMA_OK)
252			return ret;
253
254		coder->sequence = SEQ_BLOCK_RUN;
255	}
256
257	// Fall through
258
259	case SEQ_BLOCK_RUN: {
260		const lzma_ret ret = coder->block_decoder.code(
261				coder->block_decoder.coder, allocator,
262				in, in_pos, in_size, out, out_pos, out_size,
263				action);
264
265		if (ret != LZMA_STREAM_END)
266			return ret;
267
268		// Block decoded successfully. Add the new size pair to
269		// the Index hash.
270		return_if_error(lzma_index_hash_append(coder->index_hash,
271				lzma_block_unpadded_size(
272					&coder->block_options),
273				coder->block_options.uncompressed_size));
274
275		coder->sequence = SEQ_BLOCK_HEADER;
276		break;
277	}
278
279	case SEQ_INDEX: {
280		// If we don't have any input, don't call
281		// lzma_index_hash_decode() since it would return
282		// LZMA_BUF_ERROR, which we must not do here.
283		if (*in_pos >= in_size)
284			return LZMA_OK;
285
286		// Decode the Index and compare it to the hash calculated
287		// from the sizes of the Blocks (if any).
288		const lzma_ret ret = lzma_index_hash_decode(coder->index_hash,
289				in, in_pos, in_size);
290		if (ret != LZMA_STREAM_END)
291			return ret;
292
293		coder->sequence = SEQ_STREAM_FOOTER;
294	}
295
296	// Fall through
297
298	case SEQ_STREAM_FOOTER: {
299		// Copy the Stream Footer to the internal buffer.
300		lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
301				LZMA_STREAM_HEADER_SIZE);
302
303		// Return if we didn't get the whole Stream Footer yet.
304		if (coder->pos < LZMA_STREAM_HEADER_SIZE)
305			return LZMA_OK;
306
307		coder->pos = 0;
308
309		// Decode the Stream Footer. The decoder gives
310		// LZMA_FORMAT_ERROR if the magic bytes don't match,
311		// so convert that return code to LZMA_DATA_ERROR.
312		lzma_stream_flags footer_flags;
313		const lzma_ret ret = lzma_stream_footer_decode(
314				&footer_flags, coder->buffer);
315		if (ret != LZMA_OK)
316			return ret == LZMA_FORMAT_ERROR
317					? LZMA_DATA_ERROR : ret;
318
319		// Check that Index Size stored in the Stream Footer matches
320		// the real size of the Index field.
321		if (lzma_index_hash_size(coder->index_hash)
322				!= footer_flags.backward_size)
323			return LZMA_DATA_ERROR;
324
325		// Compare that the Stream Flags fields are identical in
326		// both Stream Header and Stream Footer.
327		return_if_error(lzma_stream_flags_compare(
328				&coder->stream_flags, &footer_flags));
329
330		if (!coder->concatenated)
331			return LZMA_STREAM_END;
332
333		coder->sequence = SEQ_STREAM_PADDING;
334	}
335
336	// Fall through
337
338	case SEQ_STREAM_PADDING:
339		assert(coder->concatenated);
340
341		// Skip over possible Stream Padding.
342		while (true) {
343			if (*in_pos >= in_size) {
344				// Unless LZMA_FINISH was used, we cannot
345				// know if there's more input coming later.
346				if (action != LZMA_FINISH)
347					return LZMA_OK;
348
349				// Stream Padding must be a multiple of
350				// four bytes.
351				return coder->pos == 0
352						? LZMA_STREAM_END
353						: LZMA_DATA_ERROR;
354			}
355
356			// If the byte is not zero, it probably indicates
357			// beginning of a new Stream (or the file is corrupt).
358			if (in[*in_pos] != 0x00)
359				break;
360
361			++*in_pos;
362			coder->pos = (coder->pos + 1) & 3;
363		}
364
365		// Stream Padding must be a multiple of four bytes (empty
366		// Stream Padding is OK).
367		if (coder->pos != 0) {
368			++*in_pos;
369			return LZMA_DATA_ERROR;
370		}
371
372		// Prepare to decode the next Stream.
373		return_if_error(stream_decoder_reset(coder, allocator));
374		break;
375
376	default:
377		assert(0);
378		return LZMA_PROG_ERROR;
379	}
380
381	// Never reached
382}
383
384
385static void
386stream_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
387{
388	lzma_stream_coder *coder = coder_ptr;
389	lzma_next_end(&coder->block_decoder, allocator);
390	lzma_index_hash_end(coder->index_hash, allocator);
391	lzma_free(coder, allocator);
392	return;
393}
394
395
396static lzma_check
397stream_decoder_get_check(const void *coder_ptr)
398{
399	const lzma_stream_coder *coder = coder_ptr;
400	return coder->stream_flags.check;
401}
402
403
404static lzma_ret
405stream_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
406		uint64_t *old_memlimit, uint64_t new_memlimit)
407{
408	lzma_stream_coder *coder = coder_ptr;
409
410	*memusage = coder->memusage;
411	*old_memlimit = coder->memlimit;
412
413	if (new_memlimit != 0) {
414		if (new_memlimit < coder->memusage)
415			return LZMA_MEMLIMIT_ERROR;
416
417		coder->memlimit = new_memlimit;
418	}
419
420	return LZMA_OK;
421}
422
423
424extern lzma_ret
425lzma_stream_decoder_init(
426		lzma_next_coder *next, const lzma_allocator *allocator,
427		uint64_t memlimit, uint32_t flags)
428{
429	lzma_next_coder_init(&lzma_stream_decoder_init, next, allocator);
430
431	if (flags & ~LZMA_SUPPORTED_FLAGS)
432		return LZMA_OPTIONS_ERROR;
433
434	lzma_stream_coder *coder = next->coder;
435	if (coder == NULL) {
436		coder = lzma_alloc(sizeof(lzma_stream_coder), allocator);
437		if (coder == NULL)
438			return LZMA_MEM_ERROR;
439
440		next->coder = coder;
441		next->code = &stream_decode;
442		next->end = &stream_decoder_end;
443		next->get_check = &stream_decoder_get_check;
444		next->memconfig = &stream_decoder_memconfig;
445
446		coder->block_decoder = LZMA_NEXT_CODER_INIT;
447		coder->index_hash = NULL;
448	}
449
450	coder->memlimit = my_max(1, memlimit);
451	coder->memusage = LZMA_MEMUSAGE_BASE;
452	coder->tell_no_check = (flags & LZMA_TELL_NO_CHECK) != 0;
453	coder->tell_unsupported_check
454			= (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0;
455	coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0;
456	coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0;
457	coder->concatenated = (flags & LZMA_CONCATENATED) != 0;
458	coder->first_stream = true;
459
460	return stream_decoder_reset(coder, allocator);
461}
462
463
464extern LZMA_API(lzma_ret)
465lzma_stream_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags)
466{
467	lzma_next_strm_init(lzma_stream_decoder_init, strm, memlimit, flags);
468
469	strm->internal->supported_actions[LZMA_RUN] = true;
470	strm->internal->supported_actions[LZMA_FINISH] = true;
471
472	return LZMA_OK;
473}
474