index_hash.c revision 213700
1219888Sed///////////////////////////////////////////////////////////////////////////////
2257547Sray//
3219888Sed/// \file       index_hash.c
4219888Sed/// \brief      Validates Index by using a hash function
5219888Sed//
6219888Sed//  Author:     Lasse Collin
7219888Sed//
8257547Sray//  This file has been put into the public domain.
9257547Sray//  You can do whatever you want with this file.
10257547Sray//
11219888Sed///////////////////////////////////////////////////////////////////////////////
12219888Sed
13219888Sed#include "common.h"
14219888Sed#include "index.h"
15219888Sed#include "check.h"
16219888Sed
17219888Sed
18219888Sedtypedef struct {
19219888Sed	/// Sum of the Block sizes (including Block Padding)
20219888Sed	lzma_vli blocks_size;
21219888Sed
22219888Sed	/// Sum of the Uncompressed Size fields
23219888Sed	lzma_vli uncompressed_size;
24219888Sed
25219888Sed	/// Number of Records
26219888Sed	lzma_vli count;
27219888Sed
28219888Sed	/// Size of the List of Index Records as bytes
29219888Sed	lzma_vli index_list_size;
30219888Sed
31219888Sed	/// Check calculated from Unpadded Sizes and Uncompressed Sizes.
32219888Sed	lzma_check_state check;
33219888Sed
34219888Sed} lzma_index_hash_info;
35219888Sed
36219888Sed
37219888Sedstruct lzma_index_hash_s {
38219888Sed	enum {
39219888Sed		SEQ_BLOCK,
40219888Sed		SEQ_COUNT,
41219888Sed		SEQ_UNPADDED,
42219888Sed		SEQ_UNCOMPRESSED,
43219888Sed		SEQ_PADDING_INIT,
44219888Sed		SEQ_PADDING,
45219888Sed		SEQ_CRC32,
46256145Sray	} sequence;
47219888Sed
48219888Sed	/// Information collected while decoding the actual Blocks.
49219888Sed	lzma_index_hash_info blocks;
50219888Sed
51219888Sed	/// Information collected from the Index field.
52219888Sed	lzma_index_hash_info records;
53219888Sed
54219888Sed	/// Number of Records not fully decoded
55219888Sed	lzma_vli remaining;
56219888Sed
57219888Sed	/// Unpadded Size currently being read from an Index Record.
58219888Sed	lzma_vli unpadded_size;
59219888Sed
60219888Sed	/// Uncompressed Size currently being read from an Index Record.
61219888Sed	lzma_vli uncompressed_size;
62219888Sed
63219888Sed	/// Position in variable-length integers when decoding them from
64219888Sed	/// the List of Records.
65219888Sed	size_t pos;
66219888Sed
67219888Sed	/// CRC32 of the Index
68219888Sed	uint32_t crc32;
69219888Sed};
70219888Sed
71219888Sed
72219888Sedextern LZMA_API(lzma_index_hash *)
73219888Sedlzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator)
74219888Sed{
75219888Sed	if (index_hash == NULL) {
76219888Sed		index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator);
77219888Sed		if (index_hash == NULL)
78219888Sed			return NULL;
79219888Sed	}
80219888Sed
81219888Sed	index_hash->sequence = SEQ_BLOCK;
82219888Sed	index_hash->blocks.blocks_size = 0;
83219888Sed	index_hash->blocks.uncompressed_size = 0;
84219888Sed	index_hash->blocks.count = 0;
85257296Snwhitehorn	index_hash->blocks.index_list_size = 0;
86219888Sed	index_hash->records.blocks_size = 0;
87219888Sed	index_hash->records.uncompressed_size = 0;
88219888Sed	index_hash->records.count = 0;
89219888Sed	index_hash->records.index_list_size = 0;
90219888Sed	index_hash->unpadded_size = 0;
91219888Sed	index_hash->uncompressed_size = 0;
92257296Snwhitehorn	index_hash->pos = 0;
93219888Sed	index_hash->crc32 = 0;
94219888Sed
95219888Sed	// These cannot fail because LZMA_CHECK_BEST is known to be supported.
96219888Sed	(void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST);
97219888Sed	(void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST);
98219888Sed
99219888Sed	return index_hash;
100219888Sed}
101219888Sed
102219888Sed
103219888Sedextern LZMA_API(void)
104256145Sraylzma_index_hash_end(lzma_index_hash *index_hash, lzma_allocator *allocator)
105256145Sray{
106256145Sray	lzma_free(index_hash, allocator);
107256145Sray	return;
108256145Sray}
109256145Sray
110256145Sray
111219888Sedextern LZMA_API(lzma_vli)
112219888Sedlzma_index_hash_size(const lzma_index_hash *index_hash)
113256145Sray{
114219888Sed	// Get the size of the Index from ->blocks instead of ->records for
115219888Sed	// cases where application wants to know the Index Size before
116219888Sed	// decoding the Index.
117219888Sed	return index_size(index_hash->blocks.count,
118219888Sed			index_hash->blocks.index_list_size);
119219888Sed}
120219888Sed
121219888Sed
122219888Sed/// Updates the sizes and the hash without any validation.
123219888Sedstatic lzma_ret
124256145Srayhash_append(lzma_index_hash_info *info, lzma_vli unpadded_size,
125256145Sray		lzma_vli uncompressed_size)
126256145Sray{
127256145Sray	info->blocks_size += vli_ceil4(unpadded_size);
128256145Sray	info->uncompressed_size += uncompressed_size;
129256145Sray	info->index_list_size += lzma_vli_size(unpadded_size)
130256145Sray			+ lzma_vli_size(uncompressed_size);
131256145Sray	++info->count;
132256145Sray
133219888Sed	const lzma_vli sizes[2] = { unpadded_size, uncompressed_size };
134256145Sray	lzma_check_update(&info->check, LZMA_CHECK_BEST,
135256145Sray			(const uint8_t *)(sizes), sizeof(sizes));
136256145Sray
137256145Sray	return LZMA_OK;
138256145Sray}
139256145Sray
140256145Sray
141256145Srayextern LZMA_API(lzma_ret)
142256145Sraylzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size,
143256145Sray		lzma_vli uncompressed_size)
144256145Sray{
145256145Sray	// Validate the arguments.
146256145Sray	if (index_hash->sequence != SEQ_BLOCK
147256145Sray			|| unpadded_size < UNPADDED_SIZE_MIN
148256145Sray			|| unpadded_size > UNPADDED_SIZE_MAX
149256145Sray			|| uncompressed_size > LZMA_VLI_MAX)
150256145Sray		return LZMA_PROG_ERROR;
151256145Sray
152256145Sray	// Update the hash.
153256145Sray	return_if_error(hash_append(&index_hash->blocks,
154256145Sray			unpadded_size, uncompressed_size));
155256145Sray
156256145Sray	// Validate the properties of *info are still in allowed limits.
157256145Sray	if (index_hash->blocks.blocks_size > LZMA_VLI_MAX
158256145Sray			|| index_hash->blocks.uncompressed_size > LZMA_VLI_MAX
159256145Sray			|| index_size(index_hash->blocks.count,
160256145Sray					index_hash->blocks.index_list_size)
161256145Sray				> LZMA_BACKWARD_SIZE_MAX
162256145Sray			|| index_stream_size(index_hash->blocks.blocks_size,
163256145Sray					index_hash->blocks.count,
164256145Sray					index_hash->blocks.index_list_size)
165256145Sray				> LZMA_VLI_MAX)
166256145Sray		return LZMA_DATA_ERROR;
167256145Sray
168256145Sray	return LZMA_OK;
169256145Sray}
170256145Sray
171256145Sray
172256145Srayextern LZMA_API(lzma_ret)
173256145Sraylzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
174256145Sray		size_t *in_pos, size_t in_size)
175256145Sray{
176256145Sray	// Catch zero input buffer here, because in contrast to Index encoder
177256145Sray	// and decoder functions, applications call this function directly
178256145Sray	// instead of via lzma_code(), which does the buffer checking.
179256145Sray	if (*in_pos >= in_size)
180256145Sray		return LZMA_BUF_ERROR;
181256145Sray
182256145Sray	// NOTE: This function has many similarities to index_encode() and
183256145Sray	// index_decode() functions found from index_encoder.c and
184256145Sray	// index_decoder.c. See the comments especially in index_encoder.c.
185256145Sray	const size_t in_start = *in_pos;
186256145Sray	lzma_ret ret = LZMA_OK;
187256145Sray
188256145Sray	while (*in_pos < in_size)
189256145Sray	switch (index_hash->sequence) {
190256145Sray	case SEQ_BLOCK:
191256145Sray		// Check the Index Indicator is present.
192256145Sray		if (in[(*in_pos)++] != 0x00)
193256145Sray			return LZMA_DATA_ERROR;
194256145Sray
195256145Sray		index_hash->sequence = SEQ_COUNT;
196256145Sray		break;
197256145Sray
198256145Sray	case SEQ_COUNT: {
199256145Sray		ret = lzma_vli_decode(&index_hash->remaining,
200256145Sray				&index_hash->pos, in, in_pos, in_size);
201256145Sray		if (ret != LZMA_STREAM_END)
202256145Sray			goto out;
203256145Sray
204256145Sray		// The count must match the count of the Blocks decoded.
205256145Sray		if (index_hash->remaining != index_hash->blocks.count)
206256145Sray			return LZMA_DATA_ERROR;
207256145Sray
208256145Sray		ret = LZMA_OK;
209256145Sray		index_hash->pos = 0;
210256145Sray
211256145Sray		// Handle the special case when there are no Blocks.
212256145Sray		index_hash->sequence = index_hash->remaining == 0
213256145Sray				? SEQ_PADDING_INIT : SEQ_UNPADDED;
214256145Sray		break;
215256145Sray	}
216256145Sray
217256145Sray	case SEQ_UNPADDED:
218256145Sray	case SEQ_UNCOMPRESSED: {
219219888Sed		lzma_vli *size = index_hash->sequence == SEQ_UNPADDED
220219888Sed				? &index_hash->unpadded_size
221219888Sed				: &index_hash->uncompressed_size;
222256145Sray
223219888Sed		ret = lzma_vli_decode(size, &index_hash->pos,
224219888Sed				in, in_pos, in_size);
225219888Sed		if (ret != LZMA_STREAM_END)
226256145Sray			goto out;
227256145Sray
228219888Sed		ret = LZMA_OK;
229256145Sray		index_hash->pos = 0;
230219888Sed
231256145Sray		if (index_hash->sequence == SEQ_UNPADDED) {
232256145Sray			if (index_hash->unpadded_size < UNPADDED_SIZE_MIN
233256145Sray					|| index_hash->unpadded_size
234256145Sray						> UNPADDED_SIZE_MAX)
235256145Sray				return LZMA_DATA_ERROR;
236219888Sed
237219888Sed			index_hash->sequence = SEQ_UNCOMPRESSED;
238219888Sed		} else {
239219888Sed			// Update the hash.
240219888Sed			return_if_error(hash_append(&index_hash->records,
241256145Sray					index_hash->unpadded_size,
242256145Sray					index_hash->uncompressed_size));
243256145Sray
244219888Sed			// Verify that we don't go over the known sizes. Note
245219888Sed			// that this validation is simpler than the one used
246219888Sed			// in lzma_index_hash_append(), because here we know
247256145Sray			// that values in index_hash->blocks are already
248219888Sed			// validated and we are fine as long as we don't
249256145Sray			// exceed them in index_hash->records.
250219888Sed			if (index_hash->blocks.blocks_size
251256145Sray					< index_hash->records.blocks_size
252256145Sray					|| index_hash->blocks.uncompressed_size
253256145Sray					< index_hash->records.uncompressed_size
254219888Sed					|| index_hash->blocks.index_list_size
255219888Sed					< index_hash->records.index_list_size)
256219888Sed				return LZMA_DATA_ERROR;
257219888Sed
258219888Sed			// Check if this was the last Record.
259219888Sed			index_hash->sequence = --index_hash->remaining == 0
260219888Sed					? SEQ_PADDING_INIT : SEQ_UNPADDED;
261219888Sed		}
262219888Sed
263219888Sed		break;
264219888Sed	}
265219888Sed
266219888Sed	case SEQ_PADDING_INIT:
267219888Sed		index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded(
268219888Sed				index_hash->records.count,
269219888Sed				index_hash->records.index_list_size)) & 3;
270219888Sed		index_hash->sequence = SEQ_PADDING;
271219888Sed
272219888Sed	// Fall through
273219888Sed
274219888Sed	case SEQ_PADDING:
275219888Sed		if (index_hash->pos > 0) {
276219888Sed			--index_hash->pos;
277219888Sed			if (in[(*in_pos)++] != 0x00)
278219888Sed				return LZMA_DATA_ERROR;
279219888Sed
280257076Sray			break;
281257076Sray		}
282257076Sray
283257076Sray		// Compare the sizes.
284257076Sray		if (index_hash->blocks.blocks_size
285257076Sray				!= index_hash->records.blocks_size
286257076Sray				|| index_hash->blocks.uncompressed_size
287257076Sray				!= index_hash->records.uncompressed_size
288257076Sray				|| index_hash->blocks.index_list_size
289257076Sray				!= index_hash->records.index_list_size)
290257076Sray			return LZMA_DATA_ERROR;
291257076Sray
292257076Sray		// Finish the hashes and compare them.
293257076Sray		lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST);
294257076Sray		lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST);
295257076Sray		if (memcmp(index_hash->blocks.check.buffer.u8,
296257076Sray				index_hash->records.check.buffer.u8,
297257076Sray				lzma_check_size(LZMA_CHECK_BEST)) != 0)
298257076Sray			return LZMA_DATA_ERROR;
299257076Sray
300257076Sray		// Finish the CRC32 calculation.
301257076Sray		index_hash->crc32 = lzma_crc32(in + in_start,
302257076Sray				*in_pos - in_start, index_hash->crc32);
303257076Sray
304257076Sray		index_hash->sequence = SEQ_CRC32;
305219888Sed
306257076Sray	// Fall through
307257076Sray
308257076Sray	case SEQ_CRC32:
309257076Sray		do {
310257076Sray			if (*in_pos == in_size)
311257076Sray				return LZMA_OK;
312257076Sray
313257076Sray			if (((index_hash->crc32 >> (index_hash->pos * 8))
314257076Sray					& 0xFF) != in[(*in_pos)++])
315257076Sray				return LZMA_DATA_ERROR;
316257076Sray
317257076Sray		} while (++index_hash->pos < 4);
318257076Sray
319257076Sray		return LZMA_STREAM_END;
320257076Sray
321257076Sray	default:
322257076Sray		assert(0);
323257076Sray		return LZMA_PROG_ERROR;
324257076Sray	}
325257076Sray
326257076Srayout:
327257076Sray	// Update the CRC32,
328257076Sray	index_hash->crc32 = lzma_crc32(in + in_start,
329257076Sray			*in_pos - in_start, index_hash->crc32);
330257076Sray
331257076Sray	return ret;
332257076Sray}
333257076Sray