1207753Smm/////////////////////////////////////////////////////////////////////////////// 2207753Smm// 3207753Smm/// \file index_hash.c 4207753Smm/// \brief Validates Index by using a hash function 5207753Smm// 6207753Smm// Author: Lasse Collin 7207753Smm// 8207753Smm// This file has been put into the public domain. 9207753Smm// You can do whatever you want with this file. 10207753Smm// 11207753Smm/////////////////////////////////////////////////////////////////////////////// 12207753Smm 13207753Smm#include "common.h" 14207753Smm#include "index.h" 15207753Smm#include "check.h" 16207753Smm 17207753Smm 18207753Smmtypedef struct { 19207753Smm /// Sum of the Block sizes (including Block Padding) 20207753Smm lzma_vli blocks_size; 21207753Smm 22207753Smm /// Sum of the Uncompressed Size fields 23207753Smm lzma_vli uncompressed_size; 24207753Smm 25207753Smm /// Number of Records 26207753Smm lzma_vli count; 27207753Smm 28207753Smm /// Size of the List of Index Records as bytes 29207753Smm lzma_vli index_list_size; 30207753Smm 31207753Smm /// Check calculated from Unpadded Sizes and Uncompressed Sizes. 32207753Smm lzma_check_state check; 33207753Smm 34207753Smm} lzma_index_hash_info; 35207753Smm 36207753Smm 37207753Smmstruct lzma_index_hash_s { 38207753Smm enum { 39207753Smm SEQ_BLOCK, 40207753Smm SEQ_COUNT, 41207753Smm SEQ_UNPADDED, 42207753Smm SEQ_UNCOMPRESSED, 43207753Smm SEQ_PADDING_INIT, 44207753Smm SEQ_PADDING, 45207753Smm SEQ_CRC32, 46207753Smm } sequence; 47207753Smm 48207753Smm /// Information collected while decoding the actual Blocks. 49207753Smm lzma_index_hash_info blocks; 50207753Smm 51207753Smm /// Information collected from the Index field. 52207753Smm lzma_index_hash_info records; 53207753Smm 54207753Smm /// Number of Records not fully decoded 55207753Smm lzma_vli remaining; 56207753Smm 57207753Smm /// Unpadded Size currently being read from an Index Record. 58207753Smm lzma_vli unpadded_size; 59207753Smm 60207753Smm /// Uncompressed Size currently being read from an Index Record. 61207753Smm lzma_vli uncompressed_size; 62207753Smm 63207753Smm /// Position in variable-length integers when decoding them from 64207753Smm /// the List of Records. 65207753Smm size_t pos; 66207753Smm 67207753Smm /// CRC32 of the Index 68207753Smm uint32_t crc32; 69207753Smm}; 70207753Smm 71207753Smm 72207753Smmextern LZMA_API(lzma_index_hash *) 73207753Smmlzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator) 74207753Smm{ 75207753Smm if (index_hash == NULL) { 76207753Smm index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator); 77207753Smm if (index_hash == NULL) 78207753Smm return NULL; 79207753Smm } 80207753Smm 81207753Smm index_hash->sequence = SEQ_BLOCK; 82207753Smm index_hash->blocks.blocks_size = 0; 83207753Smm index_hash->blocks.uncompressed_size = 0; 84207753Smm index_hash->blocks.count = 0; 85207753Smm index_hash->blocks.index_list_size = 0; 86207753Smm index_hash->records.blocks_size = 0; 87207753Smm index_hash->records.uncompressed_size = 0; 88207753Smm index_hash->records.count = 0; 89207753Smm index_hash->records.index_list_size = 0; 90207753Smm index_hash->unpadded_size = 0; 91207753Smm index_hash->uncompressed_size = 0; 92207753Smm index_hash->pos = 0; 93207753Smm index_hash->crc32 = 0; 94207753Smm 95207753Smm // These cannot fail because LZMA_CHECK_BEST is known to be supported. 96207753Smm (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST); 97207753Smm (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST); 98207753Smm 99207753Smm return index_hash; 100207753Smm} 101207753Smm 102207753Smm 103207753Smmextern LZMA_API(void) 104207753Smmlzma_index_hash_end(lzma_index_hash *index_hash, lzma_allocator *allocator) 105207753Smm{ 106207753Smm lzma_free(index_hash, allocator); 107207753Smm return; 108207753Smm} 109207753Smm 110207753Smm 111207753Smmextern LZMA_API(lzma_vli) 112207753Smmlzma_index_hash_size(const lzma_index_hash *index_hash) 113207753Smm{ 114207753Smm // Get the size of the Index from ->blocks instead of ->records for 115207753Smm // cases where application wants to know the Index Size before 116207753Smm // decoding the Index. 117207753Smm return index_size(index_hash->blocks.count, 118207753Smm index_hash->blocks.index_list_size); 119207753Smm} 120207753Smm 121207753Smm 122207753Smm/// Updates the sizes and the hash without any validation. 123207753Smmstatic lzma_ret 124207753Smmhash_append(lzma_index_hash_info *info, lzma_vli unpadded_size, 125207753Smm lzma_vli uncompressed_size) 126207753Smm{ 127207753Smm info->blocks_size += vli_ceil4(unpadded_size); 128207753Smm info->uncompressed_size += uncompressed_size; 129207753Smm info->index_list_size += lzma_vli_size(unpadded_size) 130207753Smm + lzma_vli_size(uncompressed_size); 131207753Smm ++info->count; 132207753Smm 133207753Smm const lzma_vli sizes[2] = { unpadded_size, uncompressed_size }; 134207753Smm lzma_check_update(&info->check, LZMA_CHECK_BEST, 135207753Smm (const uint8_t *)(sizes), sizeof(sizes)); 136207753Smm 137207753Smm return LZMA_OK; 138207753Smm} 139207753Smm 140207753Smm 141207753Smmextern LZMA_API(lzma_ret) 142207753Smmlzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size, 143207753Smm lzma_vli uncompressed_size) 144207753Smm{ 145207753Smm // Validate the arguments. 146207753Smm if (index_hash->sequence != SEQ_BLOCK 147207753Smm || unpadded_size < UNPADDED_SIZE_MIN 148207753Smm || unpadded_size > UNPADDED_SIZE_MAX 149207753Smm || uncompressed_size > LZMA_VLI_MAX) 150207753Smm return LZMA_PROG_ERROR; 151207753Smm 152207753Smm // Update the hash. 153207753Smm return_if_error(hash_append(&index_hash->blocks, 154207753Smm unpadded_size, uncompressed_size)); 155207753Smm 156207753Smm // Validate the properties of *info are still in allowed limits. 157207753Smm if (index_hash->blocks.blocks_size > LZMA_VLI_MAX 158207753Smm || index_hash->blocks.uncompressed_size > LZMA_VLI_MAX 159207753Smm || index_size(index_hash->blocks.count, 160207753Smm index_hash->blocks.index_list_size) 161207753Smm > LZMA_BACKWARD_SIZE_MAX 162207753Smm || index_stream_size(index_hash->blocks.blocks_size, 163207753Smm index_hash->blocks.count, 164207753Smm index_hash->blocks.index_list_size) 165207753Smm > LZMA_VLI_MAX) 166207753Smm return LZMA_DATA_ERROR; 167207753Smm 168207753Smm return LZMA_OK; 169207753Smm} 170207753Smm 171207753Smm 172207753Smmextern LZMA_API(lzma_ret) 173207753Smmlzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, 174207753Smm size_t *in_pos, size_t in_size) 175207753Smm{ 176207753Smm // Catch zero input buffer here, because in contrast to Index encoder 177207753Smm // and decoder functions, applications call this function directly 178207753Smm // instead of via lzma_code(), which does the buffer checking. 179207753Smm if (*in_pos >= in_size) 180207753Smm return LZMA_BUF_ERROR; 181207753Smm 182207753Smm // NOTE: This function has many similarities to index_encode() and 183207753Smm // index_decode() functions found from index_encoder.c and 184207753Smm // index_decoder.c. See the comments especially in index_encoder.c. 185207753Smm const size_t in_start = *in_pos; 186207753Smm lzma_ret ret = LZMA_OK; 187207753Smm 188207753Smm while (*in_pos < in_size) 189207753Smm switch (index_hash->sequence) { 190207753Smm case SEQ_BLOCK: 191207753Smm // Check the Index Indicator is present. 192207753Smm if (in[(*in_pos)++] != 0x00) 193207753Smm return LZMA_DATA_ERROR; 194207753Smm 195207753Smm index_hash->sequence = SEQ_COUNT; 196207753Smm break; 197207753Smm 198207753Smm case SEQ_COUNT: { 199207753Smm ret = lzma_vli_decode(&index_hash->remaining, 200207753Smm &index_hash->pos, in, in_pos, in_size); 201207753Smm if (ret != LZMA_STREAM_END) 202207753Smm goto out; 203207753Smm 204207753Smm // The count must match the count of the Blocks decoded. 205207753Smm if (index_hash->remaining != index_hash->blocks.count) 206207753Smm return LZMA_DATA_ERROR; 207207753Smm 208207753Smm ret = LZMA_OK; 209207753Smm index_hash->pos = 0; 210207753Smm 211207753Smm // Handle the special case when there are no Blocks. 212207753Smm index_hash->sequence = index_hash->remaining == 0 213207753Smm ? SEQ_PADDING_INIT : SEQ_UNPADDED; 214207753Smm break; 215207753Smm } 216207753Smm 217207753Smm case SEQ_UNPADDED: 218207753Smm case SEQ_UNCOMPRESSED: { 219207753Smm lzma_vli *size = index_hash->sequence == SEQ_UNPADDED 220207753Smm ? &index_hash->unpadded_size 221207753Smm : &index_hash->uncompressed_size; 222207753Smm 223207753Smm ret = lzma_vli_decode(size, &index_hash->pos, 224207753Smm in, in_pos, in_size); 225207753Smm if (ret != LZMA_STREAM_END) 226207753Smm goto out; 227207753Smm 228207753Smm ret = LZMA_OK; 229207753Smm index_hash->pos = 0; 230207753Smm 231207753Smm if (index_hash->sequence == SEQ_UNPADDED) { 232207753Smm if (index_hash->unpadded_size < UNPADDED_SIZE_MIN 233207753Smm || index_hash->unpadded_size 234207753Smm > UNPADDED_SIZE_MAX) 235207753Smm return LZMA_DATA_ERROR; 236207753Smm 237207753Smm index_hash->sequence = SEQ_UNCOMPRESSED; 238207753Smm } else { 239207753Smm // Update the hash. 240207753Smm return_if_error(hash_append(&index_hash->records, 241207753Smm index_hash->unpadded_size, 242207753Smm index_hash->uncompressed_size)); 243207753Smm 244207753Smm // Verify that we don't go over the known sizes. Note 245207753Smm // that this validation is simpler than the one used 246207753Smm // in lzma_index_hash_append(), because here we know 247207753Smm // that values in index_hash->blocks are already 248207753Smm // validated and we are fine as long as we don't 249207753Smm // exceed them in index_hash->records. 250207753Smm if (index_hash->blocks.blocks_size 251207753Smm < index_hash->records.blocks_size 252207753Smm || index_hash->blocks.uncompressed_size 253207753Smm < index_hash->records.uncompressed_size 254207753Smm || index_hash->blocks.index_list_size 255207753Smm < index_hash->records.index_list_size) 256207753Smm return LZMA_DATA_ERROR; 257207753Smm 258207753Smm // Check if this was the last Record. 259207753Smm index_hash->sequence = --index_hash->remaining == 0 260207753Smm ? SEQ_PADDING_INIT : SEQ_UNPADDED; 261207753Smm } 262207753Smm 263207753Smm break; 264207753Smm } 265207753Smm 266207753Smm case SEQ_PADDING_INIT: 267207753Smm index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded( 268207753Smm index_hash->records.count, 269207753Smm index_hash->records.index_list_size)) & 3; 270207753Smm index_hash->sequence = SEQ_PADDING; 271207753Smm 272207753Smm // Fall through 273207753Smm 274207753Smm case SEQ_PADDING: 275207753Smm if (index_hash->pos > 0) { 276207753Smm --index_hash->pos; 277207753Smm if (in[(*in_pos)++] != 0x00) 278207753Smm return LZMA_DATA_ERROR; 279207753Smm 280207753Smm break; 281207753Smm } 282207753Smm 283207753Smm // Compare the sizes. 284207753Smm if (index_hash->blocks.blocks_size 285207753Smm != index_hash->records.blocks_size 286207753Smm || index_hash->blocks.uncompressed_size 287207753Smm != index_hash->records.uncompressed_size 288207753Smm || index_hash->blocks.index_list_size 289207753Smm != index_hash->records.index_list_size) 290207753Smm return LZMA_DATA_ERROR; 291207753Smm 292207753Smm // Finish the hashes and compare them. 293207753Smm lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST); 294207753Smm lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST); 295207753Smm if (memcmp(index_hash->blocks.check.buffer.u8, 296207753Smm index_hash->records.check.buffer.u8, 297207753Smm lzma_check_size(LZMA_CHECK_BEST)) != 0) 298207753Smm return LZMA_DATA_ERROR; 299207753Smm 300207753Smm // Finish the CRC32 calculation. 301207753Smm index_hash->crc32 = lzma_crc32(in + in_start, 302207753Smm *in_pos - in_start, index_hash->crc32); 303207753Smm 304207753Smm index_hash->sequence = SEQ_CRC32; 305207753Smm 306207753Smm // Fall through 307207753Smm 308207753Smm case SEQ_CRC32: 309207753Smm do { 310207753Smm if (*in_pos == in_size) 311207753Smm return LZMA_OK; 312207753Smm 313207753Smm if (((index_hash->crc32 >> (index_hash->pos * 8)) 314207753Smm & 0xFF) != in[(*in_pos)++]) 315207753Smm return LZMA_DATA_ERROR; 316207753Smm 317207753Smm } while (++index_hash->pos < 4); 318207753Smm 319207753Smm return LZMA_STREAM_END; 320207753Smm 321207753Smm default: 322207753Smm assert(0); 323207753Smm return LZMA_PROG_ERROR; 324207753Smm } 325207753Smm 326207753Smmout: 327207753Smm // Update the CRC32, 328207753Smm index_hash->crc32 = lzma_crc32(in + in_start, 329207753Smm *in_pos - in_start, index_hash->crc32); 330207753Smm 331207753Smm return ret; 332207753Smm} 333