1// SPDX-License-Identifier: 0BSD
2
3///////////////////////////////////////////////////////////////////////////////
4//
5/// \file       index_decoder.c
6/// \brief      Decodes the Index field
7//
8//  Author:     Lasse Collin
9//
10///////////////////////////////////////////////////////////////////////////////
11
12#include "index_decoder.h"
13#include "check.h"
14
15
16typedef struct {
17	enum {
18		SEQ_INDICATOR,
19		SEQ_COUNT,
20		SEQ_MEMUSAGE,
21		SEQ_UNPADDED,
22		SEQ_UNCOMPRESSED,
23		SEQ_PADDING_INIT,
24		SEQ_PADDING,
25		SEQ_CRC32,
26	} sequence;
27
28	/// Memory usage limit
29	uint64_t memlimit;
30
31	/// Target Index
32	lzma_index *index;
33
34	/// Pointer give by the application, which is set after
35	/// successful decoding.
36	lzma_index **index_ptr;
37
38	/// Number of Records left to decode.
39	lzma_vli count;
40
41	/// The most recent Unpadded Size field
42	lzma_vli unpadded_size;
43
44	/// The most recent Uncompressed Size field
45	lzma_vli uncompressed_size;
46
47	/// Position in integers
48	size_t pos;
49
50	/// CRC32 of the List of Records field
51	uint32_t crc32;
52} lzma_index_coder;
53
54
55static lzma_ret
56index_decode(void *coder_ptr, const lzma_allocator *allocator,
57		const uint8_t *restrict in, size_t *restrict in_pos,
58		size_t in_size,
59		uint8_t *restrict out lzma_attribute((__unused__)),
60		size_t *restrict out_pos lzma_attribute((__unused__)),
61		size_t out_size lzma_attribute((__unused__)),
62		lzma_action action lzma_attribute((__unused__)))
63{
64	lzma_index_coder *coder = coder_ptr;
65
66	// Similar optimization as in index_encoder.c
67	const size_t in_start = *in_pos;
68	lzma_ret ret = LZMA_OK;
69
70	while (*in_pos < in_size)
71	switch (coder->sequence) {
72	case SEQ_INDICATOR:
73		// Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or
74		// LZMA_FORMAT_ERROR, because a typical usage case for Index
75		// decoder is when parsing the Stream backwards. If seeking
76		// backward from the Stream Footer gives us something that
77		// doesn't begin with Index Indicator, the file is considered
78		// corrupt, not "programming error" or "unrecognized file
79		// format". One could argue that the application should
80		// verify the Index Indicator before trying to decode the
81		// Index, but well, I suppose it is simpler this way.
82		if (in[(*in_pos)++] != INDEX_INDICATOR)
83			return LZMA_DATA_ERROR;
84
85		coder->sequence = SEQ_COUNT;
86		break;
87
88	case SEQ_COUNT:
89		ret = lzma_vli_decode(&coder->count, &coder->pos,
90				in, in_pos, in_size);
91		if (ret != LZMA_STREAM_END)
92			goto out;
93
94		coder->pos = 0;
95		coder->sequence = SEQ_MEMUSAGE;
96
97	// Fall through
98
99	case SEQ_MEMUSAGE:
100		if (lzma_index_memusage(1, coder->count) > coder->memlimit) {
101			ret = LZMA_MEMLIMIT_ERROR;
102			goto out;
103		}
104
105		// Tell the Index handling code how many Records this
106		// Index has to allow it to allocate memory more efficiently.
107		lzma_index_prealloc(coder->index, coder->count);
108
109		ret = LZMA_OK;
110		coder->sequence = coder->count == 0
111				? SEQ_PADDING_INIT : SEQ_UNPADDED;
112		break;
113
114	case SEQ_UNPADDED:
115	case SEQ_UNCOMPRESSED: {
116		lzma_vli *size = coder->sequence == SEQ_UNPADDED
117				? &coder->unpadded_size
118				: &coder->uncompressed_size;
119
120		ret = lzma_vli_decode(size, &coder->pos,
121				in, in_pos, in_size);
122		if (ret != LZMA_STREAM_END)
123			goto out;
124
125		ret = LZMA_OK;
126		coder->pos = 0;
127
128		if (coder->sequence == SEQ_UNPADDED) {
129			// Validate that encoded Unpadded Size isn't too small
130			// or too big.
131			if (coder->unpadded_size < UNPADDED_SIZE_MIN
132					|| coder->unpadded_size
133						> UNPADDED_SIZE_MAX)
134				return LZMA_DATA_ERROR;
135
136			coder->sequence = SEQ_UNCOMPRESSED;
137		} else {
138			// Add the decoded Record to the Index.
139			return_if_error(lzma_index_append(
140					coder->index, allocator,
141					coder->unpadded_size,
142					coder->uncompressed_size));
143
144			// Check if this was the last Record.
145			coder->sequence = --coder->count == 0
146					? SEQ_PADDING_INIT
147					: SEQ_UNPADDED;
148		}
149
150		break;
151	}
152
153	case SEQ_PADDING_INIT:
154		coder->pos = lzma_index_padding_size(coder->index);
155		coder->sequence = SEQ_PADDING;
156
157	// Fall through
158
159	case SEQ_PADDING:
160		if (coder->pos > 0) {
161			--coder->pos;
162			if (in[(*in_pos)++] != 0x00)
163				return LZMA_DATA_ERROR;
164
165			break;
166		}
167
168		// Finish the CRC32 calculation.
169		coder->crc32 = lzma_crc32(in + in_start,
170				*in_pos - in_start, coder->crc32);
171
172		coder->sequence = SEQ_CRC32;
173
174	// Fall through
175
176	case SEQ_CRC32:
177		do {
178			if (*in_pos == in_size)
179				return LZMA_OK;
180
181			if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
182					!= in[(*in_pos)++]) {
183#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
184				return LZMA_DATA_ERROR;
185#endif
186			}
187
188		} while (++coder->pos < 4);
189
190		// Decoding was successful, now we can let the application
191		// see the decoded Index.
192		*coder->index_ptr = coder->index;
193
194		// Make index NULL so we don't free it unintentionally.
195		coder->index = NULL;
196
197		return LZMA_STREAM_END;
198
199	default:
200		assert(0);
201		return LZMA_PROG_ERROR;
202	}
203
204out:
205	// Update the CRC32.
206	//
207	// Avoid null pointer + 0 (undefined behavior) in "in + in_start".
208	// In such a case we had no input and thus in_used == 0.
209	{
210		const size_t in_used = *in_pos - in_start;
211		if (in_used > 0)
212			coder->crc32 = lzma_crc32(in + in_start,
213					in_used, coder->crc32);
214	}
215
216	return ret;
217}
218
219
220static void
221index_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
222{
223	lzma_index_coder *coder = coder_ptr;
224	lzma_index_end(coder->index, allocator);
225	lzma_free(coder, allocator);
226	return;
227}
228
229
230static lzma_ret
231index_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
232		uint64_t *old_memlimit, uint64_t new_memlimit)
233{
234	lzma_index_coder *coder = coder_ptr;
235
236	*memusage = lzma_index_memusage(1, coder->count);
237	*old_memlimit = coder->memlimit;
238
239	if (new_memlimit != 0) {
240		if (new_memlimit < *memusage)
241			return LZMA_MEMLIMIT_ERROR;
242
243		coder->memlimit = new_memlimit;
244	}
245
246	return LZMA_OK;
247}
248
249
250static lzma_ret
251index_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator,
252		lzma_index **i, uint64_t memlimit)
253{
254	// Remember the pointer given by the application. We will set it
255	// to point to the decoded Index only if decoding is successful.
256	// Before that, keep it NULL so that applications can always safely
257	// pass it to lzma_index_end() no matter did decoding succeed or not.
258	coder->index_ptr = i;
259	*i = NULL;
260
261	// We always allocate a new lzma_index.
262	coder->index = lzma_index_init(allocator);
263	if (coder->index == NULL)
264		return LZMA_MEM_ERROR;
265
266	// Initialize the rest.
267	coder->sequence = SEQ_INDICATOR;
268	coder->memlimit = my_max(1, memlimit);
269	coder->count = 0; // Needs to be initialized due to _memconfig().
270	coder->pos = 0;
271	coder->crc32 = 0;
272
273	return LZMA_OK;
274}
275
276
277extern lzma_ret
278lzma_index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
279		lzma_index **i, uint64_t memlimit)
280{
281	lzma_next_coder_init(&lzma_index_decoder_init, next, allocator);
282
283	if (i == NULL)
284		return LZMA_PROG_ERROR;
285
286	lzma_index_coder *coder = next->coder;
287	if (coder == NULL) {
288		coder = lzma_alloc(sizeof(lzma_index_coder), allocator);
289		if (coder == NULL)
290			return LZMA_MEM_ERROR;
291
292		next->coder = coder;
293		next->code = &index_decode;
294		next->end = &index_decoder_end;
295		next->memconfig = &index_decoder_memconfig;
296		coder->index = NULL;
297	} else {
298		lzma_index_end(coder->index, allocator);
299	}
300
301	return index_decoder_reset(coder, allocator, i, memlimit);
302}
303
304
305extern LZMA_API(lzma_ret)
306lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
307{
308	// If i isn't NULL, *i must always be initialized due to
309	// the wording in the API docs. This way it is initialized
310	// if we return LZMA_PROG_ERROR due to strm == NULL.
311	if (i != NULL)
312		*i = NULL;
313
314	lzma_next_strm_init(lzma_index_decoder_init, strm, i, memlimit);
315
316	strm->internal->supported_actions[LZMA_RUN] = true;
317	strm->internal->supported_actions[LZMA_FINISH] = true;
318
319	return LZMA_OK;
320}
321
322
323extern LZMA_API(lzma_ret)
324lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit,
325		const lzma_allocator *allocator,
326		const uint8_t *in, size_t *in_pos, size_t in_size)
327{
328	// If i isn't NULL, *i must always be initialized due to
329	// the wording in the API docs.
330	if (i != NULL)
331		*i = NULL;
332
333	// Sanity checks
334	if (i == NULL || memlimit == NULL
335			|| in == NULL || in_pos == NULL || *in_pos > in_size)
336		return LZMA_PROG_ERROR;
337
338	// Initialize the decoder.
339	lzma_index_coder coder;
340	return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
341
342	// Store the input start position so that we can restore it in case
343	// of an error.
344	const size_t in_start = *in_pos;
345
346	// Do the actual decoding.
347	lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size,
348			NULL, NULL, 0, LZMA_RUN);
349
350	if (ret == LZMA_STREAM_END) {
351		ret = LZMA_OK;
352	} else {
353		// Something went wrong, free the Index structure and restore
354		// the input position.
355		lzma_index_end(coder.index, allocator);
356		*in_pos = in_start;
357
358		if (ret == LZMA_OK) {
359			// The input is truncated or otherwise corrupt.
360			// Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR
361			// like lzma_vli_decode() does in single-call mode.
362			ret = LZMA_DATA_ERROR;
363
364		} else if (ret == LZMA_MEMLIMIT_ERROR) {
365			// Tell the caller how much memory would have
366			// been needed.
367			*memlimit = lzma_index_memusage(1, coder.count);
368		}
369	}
370
371	return ret;
372}
373