index_decoder.c revision 292588
1///////////////////////////////////////////////////////////////////////////////
2//
3/// \file       index_decoder.c
4/// \brief      Decodes the Index field
5//
6//  Author:     Lasse Collin
7//
8//  This file has been put into the public domain.
9//  You can do whatever you want with this file.
10//
11///////////////////////////////////////////////////////////////////////////////
12
13#include "index.h"
14#include "check.h"
15
16
17struct lzma_coder_s {
18	enum {
19		SEQ_INDICATOR,
20		SEQ_COUNT,
21		SEQ_MEMUSAGE,
22		SEQ_UNPADDED,
23		SEQ_UNCOMPRESSED,
24		SEQ_PADDING_INIT,
25		SEQ_PADDING,
26		SEQ_CRC32,
27	} sequence;
28
29	/// Memory usage limit
30	uint64_t memlimit;
31
32	/// Target Index
33	lzma_index *index;
34
35	/// Pointer give by the application, which is set after
36	/// successful decoding.
37	lzma_index **index_ptr;
38
39	/// Number of Records left to decode.
40	lzma_vli count;
41
42	/// The most recent Unpadded Size field
43	lzma_vli unpadded_size;
44
45	/// The most recent Uncompressed Size field
46	lzma_vli uncompressed_size;
47
48	/// Position in integers
49	size_t pos;
50
51	/// CRC32 of the List of Records field
52	uint32_t crc32;
53};
54
55
56static lzma_ret
57index_decode(lzma_coder *coder, const lzma_allocator *allocator,
58		const uint8_t *restrict in, size_t *restrict in_pos,
59		size_t in_size,
60		uint8_t *restrict out lzma_attribute((__unused__)),
61		size_t *restrict out_pos lzma_attribute((__unused__)),
62		size_t out_size lzma_attribute((__unused__)),
63		lzma_action action lzma_attribute((__unused__)))
64{
65	// Similar optimization as in index_encoder.c
66	const size_t in_start = *in_pos;
67	lzma_ret ret = LZMA_OK;
68
69	while (*in_pos < in_size)
70	switch (coder->sequence) {
71	case SEQ_INDICATOR:
72		// Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or
73		// LZMA_FORMAT_ERROR, because a typical usage case for Index
74		// decoder is when parsing the Stream backwards. If seeking
75		// backward from the Stream Footer gives us something that
76		// doesn't begin with Index Indicator, the file is considered
77		// corrupt, not "programming error" or "unrecognized file
78		// format". One could argue that the application should
79		// verify the Index Indicator before trying to decode the
80		// Index, but well, I suppose it is simpler this way.
81		if (in[(*in_pos)++] != 0x00)
82			return LZMA_DATA_ERROR;
83
84		coder->sequence = SEQ_COUNT;
85		break;
86
87	case SEQ_COUNT:
88		ret = lzma_vli_decode(&coder->count, &coder->pos,
89				in, in_pos, in_size);
90		if (ret != LZMA_STREAM_END)
91			goto out;
92
93		coder->pos = 0;
94		coder->sequence = SEQ_MEMUSAGE;
95
96	// Fall through
97
98	case SEQ_MEMUSAGE:
99		if (lzma_index_memusage(1, coder->count) > coder->memlimit) {
100			ret = LZMA_MEMLIMIT_ERROR;
101			goto out;
102		}
103
104		// Tell the Index handling code how many Records this
105		// Index has to allow it to allocate memory more efficiently.
106		lzma_index_prealloc(coder->index, coder->count);
107
108		ret = LZMA_OK;
109		coder->sequence = coder->count == 0
110				? SEQ_PADDING_INIT : SEQ_UNPADDED;
111		break;
112
113	case SEQ_UNPADDED:
114	case SEQ_UNCOMPRESSED: {
115		lzma_vli *size = coder->sequence == SEQ_UNPADDED
116				? &coder->unpadded_size
117				: &coder->uncompressed_size;
118
119		ret = lzma_vli_decode(size, &coder->pos,
120				in, in_pos, in_size);
121		if (ret != LZMA_STREAM_END)
122			goto out;
123
124		ret = LZMA_OK;
125		coder->pos = 0;
126
127		if (coder->sequence == SEQ_UNPADDED) {
128			// Validate that encoded Unpadded Size isn't too small
129			// or too big.
130			if (coder->unpadded_size < UNPADDED_SIZE_MIN
131					|| coder->unpadded_size
132						> UNPADDED_SIZE_MAX)
133				return LZMA_DATA_ERROR;
134
135			coder->sequence = SEQ_UNCOMPRESSED;
136		} else {
137			// Add the decoded Record to the Index.
138			return_if_error(lzma_index_append(
139					coder->index, allocator,
140					coder->unpadded_size,
141					coder->uncompressed_size));
142
143			// Check if this was the last Record.
144			coder->sequence = --coder->count == 0
145					? SEQ_PADDING_INIT
146					: SEQ_UNPADDED;
147		}
148
149		break;
150	}
151
152	case SEQ_PADDING_INIT:
153		coder->pos = lzma_index_padding_size(coder->index);
154		coder->sequence = SEQ_PADDING;
155
156	// Fall through
157
158	case SEQ_PADDING:
159		if (coder->pos > 0) {
160			--coder->pos;
161			if (in[(*in_pos)++] != 0x00)
162				return LZMA_DATA_ERROR;
163
164			break;
165		}
166
167		// Finish the CRC32 calculation.
168		coder->crc32 = lzma_crc32(in + in_start,
169				*in_pos - in_start, coder->crc32);
170
171		coder->sequence = SEQ_CRC32;
172
173	// Fall through
174
175	case SEQ_CRC32:
176		do {
177			if (*in_pos == in_size)
178				return LZMA_OK;
179
180			if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
181					!= in[(*in_pos)++])
182				return LZMA_DATA_ERROR;
183
184		} while (++coder->pos < 4);
185
186		// Decoding was successful, now we can let the application
187		// see the decoded Index.
188		*coder->index_ptr = coder->index;
189
190		// Make index NULL so we don't free it unintentionally.
191		coder->index = NULL;
192
193		return LZMA_STREAM_END;
194
195	default:
196		assert(0);
197		return LZMA_PROG_ERROR;
198	}
199
200out:
201	// Update the CRC32,
202	coder->crc32 = lzma_crc32(in + in_start,
203			*in_pos - in_start, coder->crc32);
204
205	return ret;
206}
207
208
209static void
210index_decoder_end(lzma_coder *coder, const lzma_allocator *allocator)
211{
212	lzma_index_end(coder->index, allocator);
213	lzma_free(coder, allocator);
214	return;
215}
216
217
218static lzma_ret
219index_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
220		uint64_t *old_memlimit, uint64_t new_memlimit)
221{
222	*memusage = lzma_index_memusage(1, coder->count);
223	*old_memlimit = coder->memlimit;
224
225	if (new_memlimit != 0) {
226		if (new_memlimit < *memusage)
227			return LZMA_MEMLIMIT_ERROR;
228
229		coder->memlimit = new_memlimit;
230	}
231
232	return LZMA_OK;
233}
234
235
236static lzma_ret
237index_decoder_reset(lzma_coder *coder, const lzma_allocator *allocator,
238		lzma_index **i, uint64_t memlimit)
239{
240	// Remember the pointer given by the application. We will set it
241	// to point to the decoded Index only if decoding is successful.
242	// Before that, keep it NULL so that applications can always safely
243	// pass it to lzma_index_end() no matter did decoding succeed or not.
244	coder->index_ptr = i;
245	*i = NULL;
246
247	// We always allocate a new lzma_index.
248	coder->index = lzma_index_init(allocator);
249	if (coder->index == NULL)
250		return LZMA_MEM_ERROR;
251
252	// Initialize the rest.
253	coder->sequence = SEQ_INDICATOR;
254	coder->memlimit = memlimit;
255	coder->count = 0; // Needs to be initialized due to _memconfig().
256	coder->pos = 0;
257	coder->crc32 = 0;
258
259	return LZMA_OK;
260}
261
262
263static lzma_ret
264index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
265		lzma_index **i, uint64_t memlimit)
266{
267	lzma_next_coder_init(&index_decoder_init, next, allocator);
268
269	if (i == NULL || memlimit == 0)
270		return LZMA_PROG_ERROR;
271
272	if (next->coder == NULL) {
273		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
274		if (next->coder == NULL)
275			return LZMA_MEM_ERROR;
276
277		next->code = &index_decode;
278		next->end = &index_decoder_end;
279		next->memconfig = &index_decoder_memconfig;
280		next->coder->index = NULL;
281	} else {
282		lzma_index_end(next->coder->index, allocator);
283	}
284
285	return index_decoder_reset(next->coder, allocator, i, memlimit);
286}
287
288
289extern LZMA_API(lzma_ret)
290lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
291{
292	lzma_next_strm_init(index_decoder_init, strm, i, memlimit);
293
294	strm->internal->supported_actions[LZMA_RUN] = true;
295	strm->internal->supported_actions[LZMA_FINISH] = true;
296
297	return LZMA_OK;
298}
299
300
301extern LZMA_API(lzma_ret)
302lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit,
303		const lzma_allocator *allocator,
304		const uint8_t *in, size_t *in_pos, size_t in_size)
305{
306	// Sanity checks
307	if (i == NULL || memlimit == NULL
308			|| in == NULL || in_pos == NULL || *in_pos > in_size)
309		return LZMA_PROG_ERROR;
310
311	// Initialize the decoder.
312	lzma_coder coder;
313	return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
314
315	// Store the input start position so that we can restore it in case
316	// of an error.
317	const size_t in_start = *in_pos;
318
319	// Do the actual decoding.
320	lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size,
321			NULL, NULL, 0, LZMA_RUN);
322
323	if (ret == LZMA_STREAM_END) {
324		ret = LZMA_OK;
325	} else {
326		// Something went wrong, free the Index structure and restore
327		// the input position.
328		lzma_index_end(coder.index, allocator);
329		*in_pos = in_start;
330
331		if (ret == LZMA_OK) {
332			// The input is truncated or otherwise corrupt.
333			// Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR
334			// like lzma_vli_decode() does in single-call mode.
335			ret = LZMA_DATA_ERROR;
336
337		} else if (ret == LZMA_MEMLIMIT_ERROR) {
338			// Tell the caller how much memory would have
339			// been needed.
340			*memlimit = lzma_index_memusage(1, coder.count);
341		}
342	}
343
344	return ret;
345}
346