lzma2_decoder.c revision 223935
1207753Smm/////////////////////////////////////////////////////////////////////////////// 2207753Smm// 3207753Smm/// \file lzma2_decoder.c 4207753Smm/// \brief LZMA2 decoder 5207753Smm/// 6207753Smm// Authors: Igor Pavlov 7207753Smm// Lasse Collin 8207753Smm// 9207753Smm// This file has been put into the public domain. 10207753Smm// You can do whatever you want with this file. 11207753Smm// 12207753Smm/////////////////////////////////////////////////////////////////////////////// 13207753Smm 14207753Smm#include "lzma2_decoder.h" 15207753Smm#include "lz_decoder.h" 16207753Smm#include "lzma_decoder.h" 17207753Smm 18207753Smm 19207753Smmstruct lzma_coder_s { 20207753Smm enum sequence { 21207753Smm SEQ_CONTROL, 22207753Smm SEQ_UNCOMPRESSED_1, 23207753Smm SEQ_UNCOMPRESSED_2, 24207753Smm SEQ_COMPRESSED_0, 25207753Smm SEQ_COMPRESSED_1, 26207753Smm SEQ_PROPERTIES, 27207753Smm SEQ_LZMA, 28207753Smm SEQ_COPY, 29207753Smm } sequence; 30207753Smm 31207753Smm /// Sequence after the size fields have been decoded. 32207753Smm enum sequence next_sequence; 33207753Smm 34207753Smm /// LZMA decoder 35207753Smm lzma_lz_decoder lzma; 36207753Smm 37207753Smm /// Uncompressed size of LZMA chunk 38207753Smm size_t uncompressed_size; 39207753Smm 40207753Smm /// Compressed size of the chunk (naturally equals to uncompressed 41207753Smm /// size of uncompressed chunk) 42207753Smm size_t compressed_size; 43207753Smm 44207753Smm /// True if properties are needed. This is false before the 45207753Smm /// first LZMA chunk. 46207753Smm bool need_properties; 47207753Smm 48207753Smm /// True if dictionary reset is needed. This is false before the 49207753Smm /// first chunk (LZMA or uncompressed). 50207753Smm bool need_dictionary_reset; 51207753Smm 52207753Smm lzma_options_lzma options; 53207753Smm}; 54207753Smm 55207753Smm 56207753Smmstatic lzma_ret 57207753Smmlzma2_decode(lzma_coder *restrict coder, lzma_dict *restrict dict, 58207753Smm const uint8_t *restrict in, size_t *restrict in_pos, 59207753Smm size_t in_size) 60207753Smm{ 61207753Smm // With SEQ_LZMA it is possible that no new input is needed to do 62207753Smm // some progress. The rest of the sequences assume that there is 63207753Smm // at least one byte of input. 64207753Smm while (*in_pos < in_size || coder->sequence == SEQ_LZMA) 65207753Smm switch (coder->sequence) { 66207753Smm case SEQ_CONTROL: { 67207753Smm const uint32_t control = in[*in_pos]; 68207753Smm ++*in_pos; 69207753Smm 70223935Smm // End marker 71223935Smm if (control == 0x00) 72223935Smm return LZMA_STREAM_END; 73223935Smm 74207753Smm if (control >= 0xE0 || control == 1) { 75207753Smm // Dictionary reset implies that next LZMA chunk has 76207753Smm // to set new properties. 77207753Smm coder->need_properties = true; 78207753Smm coder->need_dictionary_reset = true; 79207753Smm } else if (coder->need_dictionary_reset) { 80207753Smm return LZMA_DATA_ERROR; 81207753Smm } 82207753Smm 83207753Smm if (control >= 0x80) { 84207753Smm // LZMA chunk. The highest five bits of the 85207753Smm // uncompressed size are taken from the control byte. 86207753Smm coder->uncompressed_size = (control & 0x1F) << 16; 87207753Smm coder->sequence = SEQ_UNCOMPRESSED_1; 88207753Smm 89207753Smm // See if there are new properties or if we need to 90207753Smm // reset the state. 91207753Smm if (control >= 0xC0) { 92207753Smm // When there are new properties, state reset 93207753Smm // is done at SEQ_PROPERTIES. 94207753Smm coder->need_properties = false; 95207753Smm coder->next_sequence = SEQ_PROPERTIES; 96207753Smm 97207753Smm } else if (coder->need_properties) { 98207753Smm return LZMA_DATA_ERROR; 99207753Smm 100207753Smm } else { 101207753Smm coder->next_sequence = SEQ_LZMA; 102207753Smm 103207753Smm // If only state reset is wanted with old 104207753Smm // properties, do the resetting here for 105207753Smm // simplicity. 106207753Smm if (control >= 0xA0) 107207753Smm coder->lzma.reset(coder->lzma.coder, 108207753Smm &coder->options); 109207753Smm } 110207753Smm } else { 111207753Smm // Invalid control values 112207753Smm if (control > 2) 113207753Smm return LZMA_DATA_ERROR; 114207753Smm 115207753Smm // It's uncompressed chunk 116207753Smm coder->sequence = SEQ_COMPRESSED_0; 117207753Smm coder->next_sequence = SEQ_COPY; 118207753Smm } 119207753Smm 120207753Smm if (coder->need_dictionary_reset) { 121207753Smm // Finish the dictionary reset and let the caller 122207753Smm // flush the dictionary to the actual output buffer. 123207753Smm coder->need_dictionary_reset = false; 124207753Smm dict_reset(dict); 125207753Smm return LZMA_OK; 126207753Smm } 127207753Smm 128207753Smm break; 129207753Smm } 130207753Smm 131207753Smm case SEQ_UNCOMPRESSED_1: 132207753Smm coder->uncompressed_size += (uint32_t)(in[(*in_pos)++]) << 8; 133207753Smm coder->sequence = SEQ_UNCOMPRESSED_2; 134207753Smm break; 135207753Smm 136207753Smm case SEQ_UNCOMPRESSED_2: 137207753Smm coder->uncompressed_size += in[(*in_pos)++] + 1; 138207753Smm coder->sequence = SEQ_COMPRESSED_0; 139207753Smm coder->lzma.set_uncompressed(coder->lzma.coder, 140207753Smm coder->uncompressed_size); 141207753Smm break; 142207753Smm 143207753Smm case SEQ_COMPRESSED_0: 144207753Smm coder->compressed_size = (uint32_t)(in[(*in_pos)++]) << 8; 145207753Smm coder->sequence = SEQ_COMPRESSED_1; 146207753Smm break; 147207753Smm 148207753Smm case SEQ_COMPRESSED_1: 149207753Smm coder->compressed_size += in[(*in_pos)++] + 1; 150207753Smm coder->sequence = coder->next_sequence; 151207753Smm break; 152207753Smm 153207753Smm case SEQ_PROPERTIES: 154207753Smm if (lzma_lzma_lclppb_decode(&coder->options, in[(*in_pos)++])) 155207753Smm return LZMA_DATA_ERROR; 156207753Smm 157207753Smm coder->lzma.reset(coder->lzma.coder, &coder->options); 158207753Smm 159207753Smm coder->sequence = SEQ_LZMA; 160207753Smm break; 161207753Smm 162207753Smm case SEQ_LZMA: { 163207753Smm // Store the start offset so that we can update 164207753Smm // coder->compressed_size later. 165207753Smm const size_t in_start = *in_pos; 166207753Smm 167207753Smm // Decode from in[] to *dict. 168207753Smm const lzma_ret ret = coder->lzma.code(coder->lzma.coder, 169207753Smm dict, in, in_pos, in_size); 170207753Smm 171207753Smm // Validate and update coder->compressed_size. 172207753Smm const size_t in_used = *in_pos - in_start; 173207753Smm if (in_used > coder->compressed_size) 174207753Smm return LZMA_DATA_ERROR; 175207753Smm 176207753Smm coder->compressed_size -= in_used; 177207753Smm 178207753Smm // Return if we didn't finish the chunk, or an error occurred. 179207753Smm if (ret != LZMA_STREAM_END) 180207753Smm return ret; 181207753Smm 182207753Smm // The LZMA decoder must have consumed the whole chunk now. 183207753Smm // We don't need to worry about uncompressed size since it 184207753Smm // is checked by the LZMA decoder. 185207753Smm if (coder->compressed_size != 0) 186207753Smm return LZMA_DATA_ERROR; 187207753Smm 188207753Smm coder->sequence = SEQ_CONTROL; 189207753Smm break; 190207753Smm } 191207753Smm 192207753Smm case SEQ_COPY: { 193207753Smm // Copy from input to the dictionary as is. 194207753Smm dict_write(dict, in, in_pos, in_size, &coder->compressed_size); 195207753Smm if (coder->compressed_size != 0) 196207753Smm return LZMA_OK; 197207753Smm 198207753Smm coder->sequence = SEQ_CONTROL; 199207753Smm break; 200207753Smm } 201207753Smm 202207753Smm default: 203207753Smm assert(0); 204207753Smm return LZMA_PROG_ERROR; 205207753Smm } 206207753Smm 207207753Smm return LZMA_OK; 208207753Smm} 209207753Smm 210207753Smm 211207753Smmstatic void 212207753Smmlzma2_decoder_end(lzma_coder *coder, lzma_allocator *allocator) 213207753Smm{ 214207753Smm assert(coder->lzma.end == NULL); 215207753Smm lzma_free(coder->lzma.coder, allocator); 216207753Smm 217207753Smm lzma_free(coder, allocator); 218207753Smm 219207753Smm return; 220207753Smm} 221207753Smm 222207753Smm 223207753Smmstatic lzma_ret 224207753Smmlzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator, 225207753Smm const void *opt, lzma_lz_options *lz_options) 226207753Smm{ 227207753Smm if (lz->coder == NULL) { 228207753Smm lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); 229207753Smm if (lz->coder == NULL) 230207753Smm return LZMA_MEM_ERROR; 231207753Smm 232207753Smm lz->code = &lzma2_decode; 233207753Smm lz->end = &lzma2_decoder_end; 234207753Smm 235207753Smm lz->coder->lzma = LZMA_LZ_DECODER_INIT; 236207753Smm } 237207753Smm 238207753Smm const lzma_options_lzma *options = opt; 239207753Smm 240207753Smm lz->coder->sequence = SEQ_CONTROL; 241207753Smm lz->coder->need_properties = true; 242207753Smm lz->coder->need_dictionary_reset = options->preset_dict == NULL 243207753Smm || options->preset_dict_size == 0; 244207753Smm 245207753Smm return lzma_lzma_decoder_create(&lz->coder->lzma, 246207753Smm allocator, options, lz_options); 247207753Smm} 248207753Smm 249207753Smm 250207753Smmextern lzma_ret 251207753Smmlzma_lzma2_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, 252207753Smm const lzma_filter_info *filters) 253207753Smm{ 254207753Smm // LZMA2 can only be the last filter in the chain. This is enforced 255207753Smm // by the raw_decoder initialization. 256207753Smm assert(filters[1].init == NULL); 257207753Smm 258207753Smm return lzma_lz_decoder_init(next, allocator, filters, 259207753Smm &lzma2_decoder_init); 260207753Smm} 261207753Smm 262207753Smm 263207753Smmextern uint64_t 264207753Smmlzma_lzma2_decoder_memusage(const void *options) 265207753Smm{ 266207753Smm return sizeof(lzma_coder) 267207753Smm + lzma_lzma_decoder_memusage_nocheck(options); 268207753Smm} 269207753Smm 270207753Smm 271207753Smmextern lzma_ret 272207753Smmlzma_lzma2_props_decode(void **options, lzma_allocator *allocator, 273207753Smm const uint8_t *props, size_t props_size) 274207753Smm{ 275207753Smm if (props_size != 1) 276207753Smm return LZMA_OPTIONS_ERROR; 277207753Smm 278207753Smm // Check that reserved bits are unset. 279207753Smm if (props[0] & 0xC0) 280207753Smm return LZMA_OPTIONS_ERROR; 281207753Smm 282207753Smm // Decode the dictionary size. 283207753Smm if (props[0] > 40) 284207753Smm return LZMA_OPTIONS_ERROR; 285207753Smm 286207753Smm lzma_options_lzma *opt = lzma_alloc( 287207753Smm sizeof(lzma_options_lzma), allocator); 288207753Smm if (opt == NULL) 289207753Smm return LZMA_MEM_ERROR; 290207753Smm 291207753Smm if (props[0] == 40) { 292207753Smm opt->dict_size = UINT32_MAX; 293207753Smm } else { 294207753Smm opt->dict_size = 2 | (props[0] & 1); 295207753Smm opt->dict_size <<= props[0] / 2 + 11; 296207753Smm } 297207753Smm 298207753Smm opt->preset_dict = NULL; 299207753Smm opt->preset_dict_size = 0; 300207753Smm 301207753Smm *options = opt; 302207753Smm 303207753Smm return LZMA_OK; 304207753Smm} 305