1207753Smm///////////////////////////////////////////////////////////////////////////////
2207753Smm//
3207753Smm/// \file       index_hash.c
4207753Smm/// \brief      Validates Index by using a hash function
5207753Smm//
6207753Smm//  Author:     Lasse Collin
7207753Smm//
8207753Smm//  This file has been put into the public domain.
9207753Smm//  You can do whatever you want with this file.
10207753Smm//
11207753Smm///////////////////////////////////////////////////////////////////////////////
12207753Smm
13207753Smm#include "common.h"
14207753Smm#include "index.h"
15207753Smm#include "check.h"
16207753Smm
17207753Smm
18207753Smmtypedef struct {
19207753Smm	/// Sum of the Block sizes (including Block Padding)
20207753Smm	lzma_vli blocks_size;
21207753Smm
22207753Smm	/// Sum of the Uncompressed Size fields
23207753Smm	lzma_vli uncompressed_size;
24207753Smm
25207753Smm	/// Number of Records
26207753Smm	lzma_vli count;
27207753Smm
28207753Smm	/// Size of the List of Index Records as bytes
29207753Smm	lzma_vli index_list_size;
30207753Smm
31207753Smm	/// Check calculated from Unpadded Sizes and Uncompressed Sizes.
32207753Smm	lzma_check_state check;
33207753Smm
34207753Smm} lzma_index_hash_info;
35207753Smm
36207753Smm
37207753Smmstruct lzma_index_hash_s {
38207753Smm	enum {
39207753Smm		SEQ_BLOCK,
40207753Smm		SEQ_COUNT,
41207753Smm		SEQ_UNPADDED,
42207753Smm		SEQ_UNCOMPRESSED,
43207753Smm		SEQ_PADDING_INIT,
44207753Smm		SEQ_PADDING,
45207753Smm		SEQ_CRC32,
46207753Smm	} sequence;
47207753Smm
48207753Smm	/// Information collected while decoding the actual Blocks.
49207753Smm	lzma_index_hash_info blocks;
50207753Smm
51207753Smm	/// Information collected from the Index field.
52207753Smm	lzma_index_hash_info records;
53207753Smm
54207753Smm	/// Number of Records not fully decoded
55207753Smm	lzma_vli remaining;
56207753Smm
57207753Smm	/// Unpadded Size currently being read from an Index Record.
58207753Smm	lzma_vli unpadded_size;
59207753Smm
60207753Smm	/// Uncompressed Size currently being read from an Index Record.
61207753Smm	lzma_vli uncompressed_size;
62207753Smm
63207753Smm	/// Position in variable-length integers when decoding them from
64207753Smm	/// the List of Records.
65207753Smm	size_t pos;
66207753Smm
67207753Smm	/// CRC32 of the Index
68207753Smm	uint32_t crc32;
69207753Smm};
70207753Smm
71207753Smm
72207753Smmextern LZMA_API(lzma_index_hash *)
73207753Smmlzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator)
74207753Smm{
75207753Smm	if (index_hash == NULL) {
76207753Smm		index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator);
77207753Smm		if (index_hash == NULL)
78207753Smm			return NULL;
79207753Smm	}
80207753Smm
81207753Smm	index_hash->sequence = SEQ_BLOCK;
82207753Smm	index_hash->blocks.blocks_size = 0;
83207753Smm	index_hash->blocks.uncompressed_size = 0;
84207753Smm	index_hash->blocks.count = 0;
85207753Smm	index_hash->blocks.index_list_size = 0;
86207753Smm	index_hash->records.blocks_size = 0;
87207753Smm	index_hash->records.uncompressed_size = 0;
88207753Smm	index_hash->records.count = 0;
89207753Smm	index_hash->records.index_list_size = 0;
90207753Smm	index_hash->unpadded_size = 0;
91207753Smm	index_hash->uncompressed_size = 0;
92207753Smm	index_hash->pos = 0;
93207753Smm	index_hash->crc32 = 0;
94207753Smm
95207753Smm	// These cannot fail because LZMA_CHECK_BEST is known to be supported.
96207753Smm	(void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST);
97207753Smm	(void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST);
98207753Smm
99207753Smm	return index_hash;
100207753Smm}
101207753Smm
102207753Smm
103207753Smmextern LZMA_API(void)
104207753Smmlzma_index_hash_end(lzma_index_hash *index_hash, lzma_allocator *allocator)
105207753Smm{
106207753Smm	lzma_free(index_hash, allocator);
107207753Smm	return;
108207753Smm}
109207753Smm
110207753Smm
111207753Smmextern LZMA_API(lzma_vli)
112207753Smmlzma_index_hash_size(const lzma_index_hash *index_hash)
113207753Smm{
114207753Smm	// Get the size of the Index from ->blocks instead of ->records for
115207753Smm	// cases where application wants to know the Index Size before
116207753Smm	// decoding the Index.
117207753Smm	return index_size(index_hash->blocks.count,
118207753Smm			index_hash->blocks.index_list_size);
119207753Smm}
120207753Smm
121207753Smm
122207753Smm/// Updates the sizes and the hash without any validation.
123207753Smmstatic lzma_ret
124207753Smmhash_append(lzma_index_hash_info *info, lzma_vli unpadded_size,
125207753Smm		lzma_vli uncompressed_size)
126207753Smm{
127207753Smm	info->blocks_size += vli_ceil4(unpadded_size);
128207753Smm	info->uncompressed_size += uncompressed_size;
129207753Smm	info->index_list_size += lzma_vli_size(unpadded_size)
130207753Smm			+ lzma_vli_size(uncompressed_size);
131207753Smm	++info->count;
132207753Smm
133207753Smm	const lzma_vli sizes[2] = { unpadded_size, uncompressed_size };
134207753Smm	lzma_check_update(&info->check, LZMA_CHECK_BEST,
135207753Smm			(const uint8_t *)(sizes), sizeof(sizes));
136207753Smm
137207753Smm	return LZMA_OK;
138207753Smm}
139207753Smm
140207753Smm
141207753Smmextern LZMA_API(lzma_ret)
142207753Smmlzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size,
143207753Smm		lzma_vli uncompressed_size)
144207753Smm{
145207753Smm	// Validate the arguments.
146207753Smm	if (index_hash->sequence != SEQ_BLOCK
147207753Smm			|| unpadded_size < UNPADDED_SIZE_MIN
148207753Smm			|| unpadded_size > UNPADDED_SIZE_MAX
149207753Smm			|| uncompressed_size > LZMA_VLI_MAX)
150207753Smm		return LZMA_PROG_ERROR;
151207753Smm
152207753Smm	// Update the hash.
153207753Smm	return_if_error(hash_append(&index_hash->blocks,
154207753Smm			unpadded_size, uncompressed_size));
155207753Smm
156207753Smm	// Validate the properties of *info are still in allowed limits.
157207753Smm	if (index_hash->blocks.blocks_size > LZMA_VLI_MAX
158207753Smm			|| index_hash->blocks.uncompressed_size > LZMA_VLI_MAX
159207753Smm			|| index_size(index_hash->blocks.count,
160207753Smm					index_hash->blocks.index_list_size)
161207753Smm				> LZMA_BACKWARD_SIZE_MAX
162207753Smm			|| index_stream_size(index_hash->blocks.blocks_size,
163207753Smm					index_hash->blocks.count,
164207753Smm					index_hash->blocks.index_list_size)
165207753Smm				> LZMA_VLI_MAX)
166207753Smm		return LZMA_DATA_ERROR;
167207753Smm
168207753Smm	return LZMA_OK;
169207753Smm}
170207753Smm
171207753Smm
172207753Smmextern LZMA_API(lzma_ret)
173207753Smmlzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
174207753Smm		size_t *in_pos, size_t in_size)
175207753Smm{
176207753Smm	// Catch zero input buffer here, because in contrast to Index encoder
177207753Smm	// and decoder functions, applications call this function directly
178207753Smm	// instead of via lzma_code(), which does the buffer checking.
179207753Smm	if (*in_pos >= in_size)
180207753Smm		return LZMA_BUF_ERROR;
181207753Smm
182207753Smm	// NOTE: This function has many similarities to index_encode() and
183207753Smm	// index_decode() functions found from index_encoder.c and
184207753Smm	// index_decoder.c. See the comments especially in index_encoder.c.
185207753Smm	const size_t in_start = *in_pos;
186207753Smm	lzma_ret ret = LZMA_OK;
187207753Smm
188207753Smm	while (*in_pos < in_size)
189207753Smm	switch (index_hash->sequence) {
190207753Smm	case SEQ_BLOCK:
191207753Smm		// Check the Index Indicator is present.
192207753Smm		if (in[(*in_pos)++] != 0x00)
193207753Smm			return LZMA_DATA_ERROR;
194207753Smm
195207753Smm		index_hash->sequence = SEQ_COUNT;
196207753Smm		break;
197207753Smm
198207753Smm	case SEQ_COUNT: {
199207753Smm		ret = lzma_vli_decode(&index_hash->remaining,
200207753Smm				&index_hash->pos, in, in_pos, in_size);
201207753Smm		if (ret != LZMA_STREAM_END)
202207753Smm			goto out;
203207753Smm
204207753Smm		// The count must match the count of the Blocks decoded.
205207753Smm		if (index_hash->remaining != index_hash->blocks.count)
206207753Smm			return LZMA_DATA_ERROR;
207207753Smm
208207753Smm		ret = LZMA_OK;
209207753Smm		index_hash->pos = 0;
210207753Smm
211207753Smm		// Handle the special case when there are no Blocks.
212207753Smm		index_hash->sequence = index_hash->remaining == 0
213207753Smm				? SEQ_PADDING_INIT : SEQ_UNPADDED;
214207753Smm		break;
215207753Smm	}
216207753Smm
217207753Smm	case SEQ_UNPADDED:
218207753Smm	case SEQ_UNCOMPRESSED: {
219207753Smm		lzma_vli *size = index_hash->sequence == SEQ_UNPADDED
220207753Smm				? &index_hash->unpadded_size
221207753Smm				: &index_hash->uncompressed_size;
222207753Smm
223207753Smm		ret = lzma_vli_decode(size, &index_hash->pos,
224207753Smm				in, in_pos, in_size);
225207753Smm		if (ret != LZMA_STREAM_END)
226207753Smm			goto out;
227207753Smm
228207753Smm		ret = LZMA_OK;
229207753Smm		index_hash->pos = 0;
230207753Smm
231207753Smm		if (index_hash->sequence == SEQ_UNPADDED) {
232207753Smm			if (index_hash->unpadded_size < UNPADDED_SIZE_MIN
233207753Smm					|| index_hash->unpadded_size
234207753Smm						> UNPADDED_SIZE_MAX)
235207753Smm				return LZMA_DATA_ERROR;
236207753Smm
237207753Smm			index_hash->sequence = SEQ_UNCOMPRESSED;
238207753Smm		} else {
239207753Smm			// Update the hash.
240207753Smm			return_if_error(hash_append(&index_hash->records,
241207753Smm					index_hash->unpadded_size,
242207753Smm					index_hash->uncompressed_size));
243207753Smm
244207753Smm			// Verify that we don't go over the known sizes. Note
245207753Smm			// that this validation is simpler than the one used
246207753Smm			// in lzma_index_hash_append(), because here we know
247207753Smm			// that values in index_hash->blocks are already
248207753Smm			// validated and we are fine as long as we don't
249207753Smm			// exceed them in index_hash->records.
250207753Smm			if (index_hash->blocks.blocks_size
251207753Smm					< index_hash->records.blocks_size
252207753Smm					|| index_hash->blocks.uncompressed_size
253207753Smm					< index_hash->records.uncompressed_size
254207753Smm					|| index_hash->blocks.index_list_size
255207753Smm					< index_hash->records.index_list_size)
256207753Smm				return LZMA_DATA_ERROR;
257207753Smm
258207753Smm			// Check if this was the last Record.
259207753Smm			index_hash->sequence = --index_hash->remaining == 0
260207753Smm					? SEQ_PADDING_INIT : SEQ_UNPADDED;
261207753Smm		}
262207753Smm
263207753Smm		break;
264207753Smm	}
265207753Smm
266207753Smm	case SEQ_PADDING_INIT:
267207753Smm		index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded(
268207753Smm				index_hash->records.count,
269207753Smm				index_hash->records.index_list_size)) & 3;
270207753Smm		index_hash->sequence = SEQ_PADDING;
271207753Smm
272207753Smm	// Fall through
273207753Smm
274207753Smm	case SEQ_PADDING:
275207753Smm		if (index_hash->pos > 0) {
276207753Smm			--index_hash->pos;
277207753Smm			if (in[(*in_pos)++] != 0x00)
278207753Smm				return LZMA_DATA_ERROR;
279207753Smm
280207753Smm			break;
281207753Smm		}
282207753Smm
283207753Smm		// Compare the sizes.
284207753Smm		if (index_hash->blocks.blocks_size
285207753Smm				!= index_hash->records.blocks_size
286207753Smm				|| index_hash->blocks.uncompressed_size
287207753Smm				!= index_hash->records.uncompressed_size
288207753Smm				|| index_hash->blocks.index_list_size
289207753Smm				!= index_hash->records.index_list_size)
290207753Smm			return LZMA_DATA_ERROR;
291207753Smm
292207753Smm		// Finish the hashes and compare them.
293207753Smm		lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST);
294207753Smm		lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST);
295207753Smm		if (memcmp(index_hash->blocks.check.buffer.u8,
296207753Smm				index_hash->records.check.buffer.u8,
297207753Smm				lzma_check_size(LZMA_CHECK_BEST)) != 0)
298207753Smm			return LZMA_DATA_ERROR;
299207753Smm
300207753Smm		// Finish the CRC32 calculation.
301207753Smm		index_hash->crc32 = lzma_crc32(in + in_start,
302207753Smm				*in_pos - in_start, index_hash->crc32);
303207753Smm
304207753Smm		index_hash->sequence = SEQ_CRC32;
305207753Smm
306207753Smm	// Fall through
307207753Smm
308207753Smm	case SEQ_CRC32:
309207753Smm		do {
310207753Smm			if (*in_pos == in_size)
311207753Smm				return LZMA_OK;
312207753Smm
313207753Smm			if (((index_hash->crc32 >> (index_hash->pos * 8))
314207753Smm					& 0xFF) != in[(*in_pos)++])
315207753Smm				return LZMA_DATA_ERROR;
316207753Smm
317207753Smm		} while (++index_hash->pos < 4);
318207753Smm
319207753Smm		return LZMA_STREAM_END;
320207753Smm
321207753Smm	default:
322207753Smm		assert(0);
323207753Smm		return LZMA_PROG_ERROR;
324207753Smm	}
325207753Smm
326207753Smmout:
327207753Smm	// Update the CRC32,
328207753Smm	index_hash->crc32 = lzma_crc32(in + in_start,
329207753Smm			*in_pos - in_start, index_hash->crc32);
330207753Smm
331207753Smm	return ret;
332207753Smm}
333