index_hash.c revision 213700
1219888Sed/////////////////////////////////////////////////////////////////////////////// 2257547Sray// 3219888Sed/// \file index_hash.c 4219888Sed/// \brief Validates Index by using a hash function 5219888Sed// 6219888Sed// Author: Lasse Collin 7219888Sed// 8257547Sray// This file has been put into the public domain. 9257547Sray// You can do whatever you want with this file. 10257547Sray// 11219888Sed/////////////////////////////////////////////////////////////////////////////// 12219888Sed 13219888Sed#include "common.h" 14219888Sed#include "index.h" 15219888Sed#include "check.h" 16219888Sed 17219888Sed 18219888Sedtypedef struct { 19219888Sed /// Sum of the Block sizes (including Block Padding) 20219888Sed lzma_vli blocks_size; 21219888Sed 22219888Sed /// Sum of the Uncompressed Size fields 23219888Sed lzma_vli uncompressed_size; 24219888Sed 25219888Sed /// Number of Records 26219888Sed lzma_vli count; 27219888Sed 28219888Sed /// Size of the List of Index Records as bytes 29219888Sed lzma_vli index_list_size; 30219888Sed 31219888Sed /// Check calculated from Unpadded Sizes and Uncompressed Sizes. 32219888Sed lzma_check_state check; 33219888Sed 34219888Sed} lzma_index_hash_info; 35219888Sed 36219888Sed 37219888Sedstruct lzma_index_hash_s { 38219888Sed enum { 39219888Sed SEQ_BLOCK, 40219888Sed SEQ_COUNT, 41219888Sed SEQ_UNPADDED, 42219888Sed SEQ_UNCOMPRESSED, 43219888Sed SEQ_PADDING_INIT, 44219888Sed SEQ_PADDING, 45219888Sed SEQ_CRC32, 46256145Sray } sequence; 47219888Sed 48219888Sed /// Information collected while decoding the actual Blocks. 49219888Sed lzma_index_hash_info blocks; 50219888Sed 51219888Sed /// Information collected from the Index field. 52219888Sed lzma_index_hash_info records; 53219888Sed 54219888Sed /// Number of Records not fully decoded 55219888Sed lzma_vli remaining; 56219888Sed 57219888Sed /// Unpadded Size currently being read from an Index Record. 58219888Sed lzma_vli unpadded_size; 59219888Sed 60219888Sed /// Uncompressed Size currently being read from an Index Record. 61219888Sed lzma_vli uncompressed_size; 62219888Sed 63219888Sed /// Position in variable-length integers when decoding them from 64219888Sed /// the List of Records. 65219888Sed size_t pos; 66219888Sed 67219888Sed /// CRC32 of the Index 68219888Sed uint32_t crc32; 69219888Sed}; 70219888Sed 71219888Sed 72219888Sedextern LZMA_API(lzma_index_hash *) 73219888Sedlzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator) 74219888Sed{ 75219888Sed if (index_hash == NULL) { 76219888Sed index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator); 77219888Sed if (index_hash == NULL) 78219888Sed return NULL; 79219888Sed } 80219888Sed 81219888Sed index_hash->sequence = SEQ_BLOCK; 82219888Sed index_hash->blocks.blocks_size = 0; 83219888Sed index_hash->blocks.uncompressed_size = 0; 84219888Sed index_hash->blocks.count = 0; 85257296Snwhitehorn index_hash->blocks.index_list_size = 0; 86219888Sed index_hash->records.blocks_size = 0; 87219888Sed index_hash->records.uncompressed_size = 0; 88219888Sed index_hash->records.count = 0; 89219888Sed index_hash->records.index_list_size = 0; 90219888Sed index_hash->unpadded_size = 0; 91219888Sed index_hash->uncompressed_size = 0; 92257296Snwhitehorn index_hash->pos = 0; 93219888Sed index_hash->crc32 = 0; 94219888Sed 95219888Sed // These cannot fail because LZMA_CHECK_BEST is known to be supported. 96219888Sed (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST); 97219888Sed (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST); 98219888Sed 99219888Sed return index_hash; 100219888Sed} 101219888Sed 102219888Sed 103219888Sedextern LZMA_API(void) 104256145Sraylzma_index_hash_end(lzma_index_hash *index_hash, lzma_allocator *allocator) 105256145Sray{ 106256145Sray lzma_free(index_hash, allocator); 107256145Sray return; 108256145Sray} 109256145Sray 110256145Sray 111219888Sedextern LZMA_API(lzma_vli) 112219888Sedlzma_index_hash_size(const lzma_index_hash *index_hash) 113256145Sray{ 114219888Sed // Get the size of the Index from ->blocks instead of ->records for 115219888Sed // cases where application wants to know the Index Size before 116219888Sed // decoding the Index. 117219888Sed return index_size(index_hash->blocks.count, 118219888Sed index_hash->blocks.index_list_size); 119219888Sed} 120219888Sed 121219888Sed 122219888Sed/// Updates the sizes and the hash without any validation. 123219888Sedstatic lzma_ret 124256145Srayhash_append(lzma_index_hash_info *info, lzma_vli unpadded_size, 125256145Sray lzma_vli uncompressed_size) 126256145Sray{ 127256145Sray info->blocks_size += vli_ceil4(unpadded_size); 128256145Sray info->uncompressed_size += uncompressed_size; 129256145Sray info->index_list_size += lzma_vli_size(unpadded_size) 130256145Sray + lzma_vli_size(uncompressed_size); 131256145Sray ++info->count; 132256145Sray 133219888Sed const lzma_vli sizes[2] = { unpadded_size, uncompressed_size }; 134256145Sray lzma_check_update(&info->check, LZMA_CHECK_BEST, 135256145Sray (const uint8_t *)(sizes), sizeof(sizes)); 136256145Sray 137256145Sray return LZMA_OK; 138256145Sray} 139256145Sray 140256145Sray 141256145Srayextern LZMA_API(lzma_ret) 142256145Sraylzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size, 143256145Sray lzma_vli uncompressed_size) 144256145Sray{ 145256145Sray // Validate the arguments. 146256145Sray if (index_hash->sequence != SEQ_BLOCK 147256145Sray || unpadded_size < UNPADDED_SIZE_MIN 148256145Sray || unpadded_size > UNPADDED_SIZE_MAX 149256145Sray || uncompressed_size > LZMA_VLI_MAX) 150256145Sray return LZMA_PROG_ERROR; 151256145Sray 152256145Sray // Update the hash. 153256145Sray return_if_error(hash_append(&index_hash->blocks, 154256145Sray unpadded_size, uncompressed_size)); 155256145Sray 156256145Sray // Validate the properties of *info are still in allowed limits. 157256145Sray if (index_hash->blocks.blocks_size > LZMA_VLI_MAX 158256145Sray || index_hash->blocks.uncompressed_size > LZMA_VLI_MAX 159256145Sray || index_size(index_hash->blocks.count, 160256145Sray index_hash->blocks.index_list_size) 161256145Sray > LZMA_BACKWARD_SIZE_MAX 162256145Sray || index_stream_size(index_hash->blocks.blocks_size, 163256145Sray index_hash->blocks.count, 164256145Sray index_hash->blocks.index_list_size) 165256145Sray > LZMA_VLI_MAX) 166256145Sray return LZMA_DATA_ERROR; 167256145Sray 168256145Sray return LZMA_OK; 169256145Sray} 170256145Sray 171256145Sray 172256145Srayextern LZMA_API(lzma_ret) 173256145Sraylzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, 174256145Sray size_t *in_pos, size_t in_size) 175256145Sray{ 176256145Sray // Catch zero input buffer here, because in contrast to Index encoder 177256145Sray // and decoder functions, applications call this function directly 178256145Sray // instead of via lzma_code(), which does the buffer checking. 179256145Sray if (*in_pos >= in_size) 180256145Sray return LZMA_BUF_ERROR; 181256145Sray 182256145Sray // NOTE: This function has many similarities to index_encode() and 183256145Sray // index_decode() functions found from index_encoder.c and 184256145Sray // index_decoder.c. See the comments especially in index_encoder.c. 185256145Sray const size_t in_start = *in_pos; 186256145Sray lzma_ret ret = LZMA_OK; 187256145Sray 188256145Sray while (*in_pos < in_size) 189256145Sray switch (index_hash->sequence) { 190256145Sray case SEQ_BLOCK: 191256145Sray // Check the Index Indicator is present. 192256145Sray if (in[(*in_pos)++] != 0x00) 193256145Sray return LZMA_DATA_ERROR; 194256145Sray 195256145Sray index_hash->sequence = SEQ_COUNT; 196256145Sray break; 197256145Sray 198256145Sray case SEQ_COUNT: { 199256145Sray ret = lzma_vli_decode(&index_hash->remaining, 200256145Sray &index_hash->pos, in, in_pos, in_size); 201256145Sray if (ret != LZMA_STREAM_END) 202256145Sray goto out; 203256145Sray 204256145Sray // The count must match the count of the Blocks decoded. 205256145Sray if (index_hash->remaining != index_hash->blocks.count) 206256145Sray return LZMA_DATA_ERROR; 207256145Sray 208256145Sray ret = LZMA_OK; 209256145Sray index_hash->pos = 0; 210256145Sray 211256145Sray // Handle the special case when there are no Blocks. 212256145Sray index_hash->sequence = index_hash->remaining == 0 213256145Sray ? SEQ_PADDING_INIT : SEQ_UNPADDED; 214256145Sray break; 215256145Sray } 216256145Sray 217256145Sray case SEQ_UNPADDED: 218256145Sray case SEQ_UNCOMPRESSED: { 219219888Sed lzma_vli *size = index_hash->sequence == SEQ_UNPADDED 220219888Sed ? &index_hash->unpadded_size 221219888Sed : &index_hash->uncompressed_size; 222256145Sray 223219888Sed ret = lzma_vli_decode(size, &index_hash->pos, 224219888Sed in, in_pos, in_size); 225219888Sed if (ret != LZMA_STREAM_END) 226256145Sray goto out; 227256145Sray 228219888Sed ret = LZMA_OK; 229256145Sray index_hash->pos = 0; 230219888Sed 231256145Sray if (index_hash->sequence == SEQ_UNPADDED) { 232256145Sray if (index_hash->unpadded_size < UNPADDED_SIZE_MIN 233256145Sray || index_hash->unpadded_size 234256145Sray > UNPADDED_SIZE_MAX) 235256145Sray return LZMA_DATA_ERROR; 236219888Sed 237219888Sed index_hash->sequence = SEQ_UNCOMPRESSED; 238219888Sed } else { 239219888Sed // Update the hash. 240219888Sed return_if_error(hash_append(&index_hash->records, 241256145Sray index_hash->unpadded_size, 242256145Sray index_hash->uncompressed_size)); 243256145Sray 244219888Sed // Verify that we don't go over the known sizes. Note 245219888Sed // that this validation is simpler than the one used 246219888Sed // in lzma_index_hash_append(), because here we know 247256145Sray // that values in index_hash->blocks are already 248219888Sed // validated and we are fine as long as we don't 249256145Sray // exceed them in index_hash->records. 250219888Sed if (index_hash->blocks.blocks_size 251256145Sray < index_hash->records.blocks_size 252256145Sray || index_hash->blocks.uncompressed_size 253256145Sray < index_hash->records.uncompressed_size 254219888Sed || index_hash->blocks.index_list_size 255219888Sed < index_hash->records.index_list_size) 256219888Sed return LZMA_DATA_ERROR; 257219888Sed 258219888Sed // Check if this was the last Record. 259219888Sed index_hash->sequence = --index_hash->remaining == 0 260219888Sed ? SEQ_PADDING_INIT : SEQ_UNPADDED; 261219888Sed } 262219888Sed 263219888Sed break; 264219888Sed } 265219888Sed 266219888Sed case SEQ_PADDING_INIT: 267219888Sed index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded( 268219888Sed index_hash->records.count, 269219888Sed index_hash->records.index_list_size)) & 3; 270219888Sed index_hash->sequence = SEQ_PADDING; 271219888Sed 272219888Sed // Fall through 273219888Sed 274219888Sed case SEQ_PADDING: 275219888Sed if (index_hash->pos > 0) { 276219888Sed --index_hash->pos; 277219888Sed if (in[(*in_pos)++] != 0x00) 278219888Sed return LZMA_DATA_ERROR; 279219888Sed 280257076Sray break; 281257076Sray } 282257076Sray 283257076Sray // Compare the sizes. 284257076Sray if (index_hash->blocks.blocks_size 285257076Sray != index_hash->records.blocks_size 286257076Sray || index_hash->blocks.uncompressed_size 287257076Sray != index_hash->records.uncompressed_size 288257076Sray || index_hash->blocks.index_list_size 289257076Sray != index_hash->records.index_list_size) 290257076Sray return LZMA_DATA_ERROR; 291257076Sray 292257076Sray // Finish the hashes and compare them. 293257076Sray lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST); 294257076Sray lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST); 295257076Sray if (memcmp(index_hash->blocks.check.buffer.u8, 296257076Sray index_hash->records.check.buffer.u8, 297257076Sray lzma_check_size(LZMA_CHECK_BEST)) != 0) 298257076Sray return LZMA_DATA_ERROR; 299257076Sray 300257076Sray // Finish the CRC32 calculation. 301257076Sray index_hash->crc32 = lzma_crc32(in + in_start, 302257076Sray *in_pos - in_start, index_hash->crc32); 303257076Sray 304257076Sray index_hash->sequence = SEQ_CRC32; 305219888Sed 306257076Sray // Fall through 307257076Sray 308257076Sray case SEQ_CRC32: 309257076Sray do { 310257076Sray if (*in_pos == in_size) 311257076Sray return LZMA_OK; 312257076Sray 313257076Sray if (((index_hash->crc32 >> (index_hash->pos * 8)) 314257076Sray & 0xFF) != in[(*in_pos)++]) 315257076Sray return LZMA_DATA_ERROR; 316257076Sray 317257076Sray } while (++index_hash->pos < 4); 318257076Sray 319257076Sray return LZMA_STREAM_END; 320257076Sray 321257076Sray default: 322257076Sray assert(0); 323257076Sray return LZMA_PROG_ERROR; 324257076Sray } 325257076Sray 326257076Srayout: 327257076Sray // Update the CRC32, 328257076Sray index_hash->crc32 = lzma_crc32(in + in_start, 329257076Sray *in_pos - in_start, index_hash->crc32); 330257076Sray 331257076Sray return ret; 332257076Sray} 333257076Sray