1207753Smm///////////////////////////////////////////////////////////////////////////////
2207753Smm//
3207753Smm/// \file       index_decoder.c
4207753Smm/// \brief      Decodes the Index field
5207753Smm//
6207753Smm//  Author:     Lasse Collin
7207753Smm//
8207753Smm//  This file has been put into the public domain.
9207753Smm//  You can do whatever you want with this file.
10207753Smm//
11207753Smm///////////////////////////////////////////////////////////////////////////////
12207753Smm
13207753Smm#include "index.h"
14207753Smm#include "check.h"
15207753Smm
16207753Smm
17312517Sdelphijtypedef struct {
18207753Smm	enum {
19207753Smm		SEQ_INDICATOR,
20207753Smm		SEQ_COUNT,
21207753Smm		SEQ_MEMUSAGE,
22207753Smm		SEQ_UNPADDED,
23207753Smm		SEQ_UNCOMPRESSED,
24207753Smm		SEQ_PADDING_INIT,
25207753Smm		SEQ_PADDING,
26207753Smm		SEQ_CRC32,
27207753Smm	} sequence;
28207753Smm
29207753Smm	/// Memory usage limit
30207753Smm	uint64_t memlimit;
31207753Smm
32207753Smm	/// Target Index
33207753Smm	lzma_index *index;
34207753Smm
35207753Smm	/// Pointer give by the application, which is set after
36207753Smm	/// successful decoding.
37207753Smm	lzma_index **index_ptr;
38207753Smm
39207753Smm	/// Number of Records left to decode.
40207753Smm	lzma_vli count;
41207753Smm
42207753Smm	/// The most recent Unpadded Size field
43207753Smm	lzma_vli unpadded_size;
44207753Smm
45207753Smm	/// The most recent Uncompressed Size field
46207753Smm	lzma_vli uncompressed_size;
47207753Smm
48207753Smm	/// Position in integers
49207753Smm	size_t pos;
50207753Smm
51207753Smm	/// CRC32 of the List of Records field
52207753Smm	uint32_t crc32;
53312517Sdelphij} lzma_index_coder;
54207753Smm
55207753Smm
56207753Smmstatic lzma_ret
57312517Sdelphijindex_decode(void *coder_ptr, const lzma_allocator *allocator,
58207753Smm		const uint8_t *restrict in, size_t *restrict in_pos,
59223935Smm		size_t in_size,
60223935Smm		uint8_t *restrict out lzma_attribute((__unused__)),
61223935Smm		size_t *restrict out_pos lzma_attribute((__unused__)),
62223935Smm		size_t out_size lzma_attribute((__unused__)),
63223935Smm		lzma_action action lzma_attribute((__unused__)))
64207753Smm{
65312517Sdelphij	lzma_index_coder *coder = coder_ptr;
66312517Sdelphij
67207753Smm	// Similar optimization as in index_encoder.c
68207753Smm	const size_t in_start = *in_pos;
69207753Smm	lzma_ret ret = LZMA_OK;
70207753Smm
71207753Smm	while (*in_pos < in_size)
72207753Smm	switch (coder->sequence) {
73207753Smm	case SEQ_INDICATOR:
74207753Smm		// Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or
75207753Smm		// LZMA_FORMAT_ERROR, because a typical usage case for Index
76207753Smm		// decoder is when parsing the Stream backwards. If seeking
77207753Smm		// backward from the Stream Footer gives us something that
78207753Smm		// doesn't begin with Index Indicator, the file is considered
79207753Smm		// corrupt, not "programming error" or "unrecognized file
80207753Smm		// format". One could argue that the application should
81207753Smm		// verify the Index Indicator before trying to decode the
82207753Smm		// Index, but well, I suppose it is simpler this way.
83207753Smm		if (in[(*in_pos)++] != 0x00)
84207753Smm			return LZMA_DATA_ERROR;
85207753Smm
86207753Smm		coder->sequence = SEQ_COUNT;
87207753Smm		break;
88207753Smm
89207753Smm	case SEQ_COUNT:
90207753Smm		ret = lzma_vli_decode(&coder->count, &coder->pos,
91207753Smm				in, in_pos, in_size);
92207753Smm		if (ret != LZMA_STREAM_END)
93207753Smm			goto out;
94207753Smm
95207753Smm		coder->pos = 0;
96207753Smm		coder->sequence = SEQ_MEMUSAGE;
97207753Smm
98207753Smm	// Fall through
99207753Smm
100207753Smm	case SEQ_MEMUSAGE:
101207753Smm		if (lzma_index_memusage(1, coder->count) > coder->memlimit) {
102207753Smm			ret = LZMA_MEMLIMIT_ERROR;
103207753Smm			goto out;
104207753Smm		}
105207753Smm
106207753Smm		// Tell the Index handling code how many Records this
107207753Smm		// Index has to allow it to allocate memory more efficiently.
108207753Smm		lzma_index_prealloc(coder->index, coder->count);
109207753Smm
110207753Smm		ret = LZMA_OK;
111207753Smm		coder->sequence = coder->count == 0
112207753Smm				? SEQ_PADDING_INIT : SEQ_UNPADDED;
113207753Smm		break;
114207753Smm
115207753Smm	case SEQ_UNPADDED:
116207753Smm	case SEQ_UNCOMPRESSED: {
117207753Smm		lzma_vli *size = coder->sequence == SEQ_UNPADDED
118207753Smm				? &coder->unpadded_size
119207753Smm				: &coder->uncompressed_size;
120207753Smm
121207753Smm		ret = lzma_vli_decode(size, &coder->pos,
122207753Smm				in, in_pos, in_size);
123207753Smm		if (ret != LZMA_STREAM_END)
124207753Smm			goto out;
125207753Smm
126207753Smm		ret = LZMA_OK;
127207753Smm		coder->pos = 0;
128207753Smm
129207753Smm		if (coder->sequence == SEQ_UNPADDED) {
130207753Smm			// Validate that encoded Unpadded Size isn't too small
131207753Smm			// or too big.
132207753Smm			if (coder->unpadded_size < UNPADDED_SIZE_MIN
133207753Smm					|| coder->unpadded_size
134207753Smm						> UNPADDED_SIZE_MAX)
135207753Smm				return LZMA_DATA_ERROR;
136207753Smm
137207753Smm			coder->sequence = SEQ_UNCOMPRESSED;
138207753Smm		} else {
139207753Smm			// Add the decoded Record to the Index.
140207753Smm			return_if_error(lzma_index_append(
141207753Smm					coder->index, allocator,
142207753Smm					coder->unpadded_size,
143207753Smm					coder->uncompressed_size));
144207753Smm
145207753Smm			// Check if this was the last Record.
146207753Smm			coder->sequence = --coder->count == 0
147207753Smm					? SEQ_PADDING_INIT
148207753Smm					: SEQ_UNPADDED;
149207753Smm		}
150207753Smm
151207753Smm		break;
152207753Smm	}
153207753Smm
154207753Smm	case SEQ_PADDING_INIT:
155207753Smm		coder->pos = lzma_index_padding_size(coder->index);
156207753Smm		coder->sequence = SEQ_PADDING;
157207753Smm
158207753Smm	// Fall through
159207753Smm
160207753Smm	case SEQ_PADDING:
161207753Smm		if (coder->pos > 0) {
162207753Smm			--coder->pos;
163207753Smm			if (in[(*in_pos)++] != 0x00)
164207753Smm				return LZMA_DATA_ERROR;
165207753Smm
166207753Smm			break;
167207753Smm		}
168207753Smm
169207753Smm		// Finish the CRC32 calculation.
170207753Smm		coder->crc32 = lzma_crc32(in + in_start,
171207753Smm				*in_pos - in_start, coder->crc32);
172207753Smm
173207753Smm		coder->sequence = SEQ_CRC32;
174207753Smm
175207753Smm	// Fall through
176207753Smm
177207753Smm	case SEQ_CRC32:
178207753Smm		do {
179207753Smm			if (*in_pos == in_size)
180207753Smm				return LZMA_OK;
181207753Smm
182207753Smm			if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
183207753Smm					!= in[(*in_pos)++])
184207753Smm				return LZMA_DATA_ERROR;
185207753Smm
186207753Smm		} while (++coder->pos < 4);
187207753Smm
188207753Smm		// Decoding was successful, now we can let the application
189207753Smm		// see the decoded Index.
190207753Smm		*coder->index_ptr = coder->index;
191207753Smm
192207753Smm		// Make index NULL so we don't free it unintentionally.
193207753Smm		coder->index = NULL;
194207753Smm
195207753Smm		return LZMA_STREAM_END;
196207753Smm
197207753Smm	default:
198207753Smm		assert(0);
199207753Smm		return LZMA_PROG_ERROR;
200207753Smm	}
201207753Smm
202207753Smmout:
203207753Smm	// Update the CRC32,
204207753Smm	coder->crc32 = lzma_crc32(in + in_start,
205207753Smm			*in_pos - in_start, coder->crc32);
206207753Smm
207207753Smm	return ret;
208207753Smm}
209207753Smm
210207753Smm
211207753Smmstatic void
212312517Sdelphijindex_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
213207753Smm{
214312517Sdelphij	lzma_index_coder *coder = coder_ptr;
215207753Smm	lzma_index_end(coder->index, allocator);
216207753Smm	lzma_free(coder, allocator);
217207753Smm	return;
218207753Smm}
219207753Smm
220207753Smm
221207753Smmstatic lzma_ret
222312517Sdelphijindex_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
223207753Smm		uint64_t *old_memlimit, uint64_t new_memlimit)
224207753Smm{
225312517Sdelphij	lzma_index_coder *coder = coder_ptr;
226312517Sdelphij
227207753Smm	*memusage = lzma_index_memusage(1, coder->count);
228207753Smm	*old_memlimit = coder->memlimit;
229207753Smm
230207753Smm	if (new_memlimit != 0) {
231207753Smm		if (new_memlimit < *memusage)
232207753Smm			return LZMA_MEMLIMIT_ERROR;
233207753Smm
234207753Smm		coder->memlimit = new_memlimit;
235207753Smm	}
236207753Smm
237207753Smm	return LZMA_OK;
238207753Smm}
239207753Smm
240207753Smm
241207753Smmstatic lzma_ret
242312517Sdelphijindex_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator,
243207753Smm		lzma_index **i, uint64_t memlimit)
244207753Smm{
245207753Smm	// Remember the pointer given by the application. We will set it
246207753Smm	// to point to the decoded Index only if decoding is successful.
247207753Smm	// Before that, keep it NULL so that applications can always safely
248207753Smm	// pass it to lzma_index_end() no matter did decoding succeed or not.
249207753Smm	coder->index_ptr = i;
250207753Smm	*i = NULL;
251207753Smm
252207753Smm	// We always allocate a new lzma_index.
253207753Smm	coder->index = lzma_index_init(allocator);
254207753Smm	if (coder->index == NULL)
255207753Smm		return LZMA_MEM_ERROR;
256207753Smm
257207753Smm	// Initialize the rest.
258207753Smm	coder->sequence = SEQ_INDICATOR;
259334607Sdelphij	coder->memlimit = my_max(1, memlimit);
260207753Smm	coder->count = 0; // Needs to be initialized due to _memconfig().
261207753Smm	coder->pos = 0;
262207753Smm	coder->crc32 = 0;
263207753Smm
264207753Smm	return LZMA_OK;
265207753Smm}
266207753Smm
267207753Smm
268207753Smmstatic lzma_ret
269278433Srpauloindex_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
270207753Smm		lzma_index **i, uint64_t memlimit)
271207753Smm{
272207753Smm	lzma_next_coder_init(&index_decoder_init, next, allocator);
273207753Smm
274334607Sdelphij	if (i == NULL)
275207753Smm		return LZMA_PROG_ERROR;
276207753Smm
277312517Sdelphij	lzma_index_coder *coder = next->coder;
278312517Sdelphij	if (coder == NULL) {
279312517Sdelphij		coder = lzma_alloc(sizeof(lzma_index_coder), allocator);
280312517Sdelphij		if (coder == NULL)
281207753Smm			return LZMA_MEM_ERROR;
282207753Smm
283312517Sdelphij		next->coder = coder;
284207753Smm		next->code = &index_decode;
285207753Smm		next->end = &index_decoder_end;
286207753Smm		next->memconfig = &index_decoder_memconfig;
287312517Sdelphij		coder->index = NULL;
288207753Smm	} else {
289312517Sdelphij		lzma_index_end(coder->index, allocator);
290207753Smm	}
291207753Smm
292312517Sdelphij	return index_decoder_reset(coder, allocator, i, memlimit);
293207753Smm}
294207753Smm
295207753Smm
296207753Smmextern LZMA_API(lzma_ret)
297207753Smmlzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
298207753Smm{
299207753Smm	lzma_next_strm_init(index_decoder_init, strm, i, memlimit);
300207753Smm
301207753Smm	strm->internal->supported_actions[LZMA_RUN] = true;
302215187Smm	strm->internal->supported_actions[LZMA_FINISH] = true;
303207753Smm
304207753Smm	return LZMA_OK;
305207753Smm}
306207753Smm
307207753Smm
308207753Smmextern LZMA_API(lzma_ret)
309278433Srpaulolzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit,
310278433Srpaulo		const lzma_allocator *allocator,
311207753Smm		const uint8_t *in, size_t *in_pos, size_t in_size)
312207753Smm{
313207753Smm	// Sanity checks
314207753Smm	if (i == NULL || memlimit == NULL
315207753Smm			|| in == NULL || in_pos == NULL || *in_pos > in_size)
316207753Smm		return LZMA_PROG_ERROR;
317207753Smm
318207753Smm	// Initialize the decoder.
319312517Sdelphij	lzma_index_coder coder;
320207753Smm	return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
321207753Smm
322207753Smm	// Store the input start position so that we can restore it in case
323207753Smm	// of an error.
324207753Smm	const size_t in_start = *in_pos;
325207753Smm
326207753Smm	// Do the actual decoding.
327207753Smm	lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size,
328207753Smm			NULL, NULL, 0, LZMA_RUN);
329207753Smm
330207753Smm	if (ret == LZMA_STREAM_END) {
331207753Smm		ret = LZMA_OK;
332207753Smm	} else {
333207753Smm		// Something went wrong, free the Index structure and restore
334207753Smm		// the input position.
335207753Smm		lzma_index_end(coder.index, allocator);
336207753Smm		*in_pos = in_start;
337207753Smm
338207753Smm		if (ret == LZMA_OK) {
339207753Smm			// The input is truncated or otherwise corrupt.
340207753Smm			// Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR
341207753Smm			// like lzma_vli_decode() does in single-call mode.
342207753Smm			ret = LZMA_DATA_ERROR;
343207753Smm
344207753Smm		} else if (ret == LZMA_MEMLIMIT_ERROR) {
345207753Smm			// Tell the caller how much memory would have
346207753Smm			// been needed.
347207753Smm			*memlimit = lzma_index_memusage(1, coder.count);
348207753Smm		}
349207753Smm	}
350207753Smm
351207753Smm	return ret;
352207753Smm}
353