1207753Smm/////////////////////////////////////////////////////////////////////////////// 2207753Smm// 3207753Smm/// \file lzma2_decoder.c 4207753Smm/// \brief LZMA2 decoder 5207753Smm/// 6207753Smm// Authors: Igor Pavlov 7207753Smm// Lasse Collin 8207753Smm// 9207753Smm// This file has been put into the public domain. 10207753Smm// You can do whatever you want with this file. 11207753Smm// 12207753Smm/////////////////////////////////////////////////////////////////////////////// 13207753Smm 14207753Smm#include "lzma2_decoder.h" 15207753Smm#include "lz_decoder.h" 16207753Smm#include "lzma_decoder.h" 17207753Smm 18207753Smm 19312517Sdelphijtypedef struct { 20207753Smm enum sequence { 21207753Smm SEQ_CONTROL, 22207753Smm SEQ_UNCOMPRESSED_1, 23207753Smm SEQ_UNCOMPRESSED_2, 24207753Smm SEQ_COMPRESSED_0, 25207753Smm SEQ_COMPRESSED_1, 26207753Smm SEQ_PROPERTIES, 27207753Smm SEQ_LZMA, 28207753Smm SEQ_COPY, 29207753Smm } sequence; 30207753Smm 31207753Smm /// Sequence after the size fields have been decoded. 32207753Smm enum sequence next_sequence; 33207753Smm 34207753Smm /// LZMA decoder 35207753Smm lzma_lz_decoder lzma; 36207753Smm 37207753Smm /// Uncompressed size of LZMA chunk 38207753Smm size_t uncompressed_size; 39207753Smm 40207753Smm /// Compressed size of the chunk (naturally equals to uncompressed 41207753Smm /// size of uncompressed chunk) 42207753Smm size_t compressed_size; 43207753Smm 44207753Smm /// True if properties are needed. This is false before the 45207753Smm /// first LZMA chunk. 46207753Smm bool need_properties; 47207753Smm 48207753Smm /// True if dictionary reset is needed. This is false before the 49207753Smm /// first chunk (LZMA or uncompressed). 50207753Smm bool need_dictionary_reset; 51207753Smm 52207753Smm lzma_options_lzma options; 53312517Sdelphij} lzma_lzma2_coder; 54207753Smm 55207753Smm 56207753Smmstatic lzma_ret 57312517Sdelphijlzma2_decode(void *coder_ptr, lzma_dict *restrict dict, 58207753Smm const uint8_t *restrict in, size_t *restrict in_pos, 59207753Smm size_t in_size) 60207753Smm{ 61312517Sdelphij lzma_lzma2_coder *restrict coder = coder_ptr; 62312517Sdelphij 63207753Smm // With SEQ_LZMA it is possible that no new input is needed to do 64207753Smm // some progress. The rest of the sequences assume that there is 65207753Smm // at least one byte of input. 66207753Smm while (*in_pos < in_size || coder->sequence == SEQ_LZMA) 67207753Smm switch (coder->sequence) { 68207753Smm case SEQ_CONTROL: { 69207753Smm const uint32_t control = in[*in_pos]; 70207753Smm ++*in_pos; 71207753Smm 72223935Smm // End marker 73223935Smm if (control == 0x00) 74223935Smm return LZMA_STREAM_END; 75223935Smm 76207753Smm if (control >= 0xE0 || control == 1) { 77207753Smm // Dictionary reset implies that next LZMA chunk has 78207753Smm // to set new properties. 79207753Smm coder->need_properties = true; 80207753Smm coder->need_dictionary_reset = true; 81207753Smm } else if (coder->need_dictionary_reset) { 82207753Smm return LZMA_DATA_ERROR; 83207753Smm } 84207753Smm 85207753Smm if (control >= 0x80) { 86207753Smm // LZMA chunk. The highest five bits of the 87207753Smm // uncompressed size are taken from the control byte. 88207753Smm coder->uncompressed_size = (control & 0x1F) << 16; 89207753Smm coder->sequence = SEQ_UNCOMPRESSED_1; 90207753Smm 91207753Smm // See if there are new properties or if we need to 92207753Smm // reset the state. 93207753Smm if (control >= 0xC0) { 94207753Smm // When there are new properties, state reset 95207753Smm // is done at SEQ_PROPERTIES. 96207753Smm coder->need_properties = false; 97207753Smm coder->next_sequence = SEQ_PROPERTIES; 98207753Smm 99207753Smm } else if (coder->need_properties) { 100207753Smm return LZMA_DATA_ERROR; 101207753Smm 102207753Smm } else { 103207753Smm coder->next_sequence = SEQ_LZMA; 104207753Smm 105207753Smm // If only state reset is wanted with old 106207753Smm // properties, do the resetting here for 107207753Smm // simplicity. 108207753Smm if (control >= 0xA0) 109207753Smm coder->lzma.reset(coder->lzma.coder, 110207753Smm &coder->options); 111207753Smm } 112207753Smm } else { 113207753Smm // Invalid control values 114207753Smm if (control > 2) 115207753Smm return LZMA_DATA_ERROR; 116207753Smm 117207753Smm // It's uncompressed chunk 118207753Smm coder->sequence = SEQ_COMPRESSED_0; 119207753Smm coder->next_sequence = SEQ_COPY; 120207753Smm } 121207753Smm 122207753Smm if (coder->need_dictionary_reset) { 123207753Smm // Finish the dictionary reset and let the caller 124207753Smm // flush the dictionary to the actual output buffer. 125207753Smm coder->need_dictionary_reset = false; 126207753Smm dict_reset(dict); 127207753Smm return LZMA_OK; 128207753Smm } 129207753Smm 130207753Smm break; 131207753Smm } 132207753Smm 133207753Smm case SEQ_UNCOMPRESSED_1: 134207753Smm coder->uncompressed_size += (uint32_t)(in[(*in_pos)++]) << 8; 135207753Smm coder->sequence = SEQ_UNCOMPRESSED_2; 136207753Smm break; 137207753Smm 138207753Smm case SEQ_UNCOMPRESSED_2: 139360523Sdelphij coder->uncompressed_size += in[(*in_pos)++] + 1U; 140207753Smm coder->sequence = SEQ_COMPRESSED_0; 141207753Smm coder->lzma.set_uncompressed(coder->lzma.coder, 142207753Smm coder->uncompressed_size); 143207753Smm break; 144207753Smm 145207753Smm case SEQ_COMPRESSED_0: 146207753Smm coder->compressed_size = (uint32_t)(in[(*in_pos)++]) << 8; 147207753Smm coder->sequence = SEQ_COMPRESSED_1; 148207753Smm break; 149207753Smm 150207753Smm case SEQ_COMPRESSED_1: 151360523Sdelphij coder->compressed_size += in[(*in_pos)++] + 1U; 152207753Smm coder->sequence = coder->next_sequence; 153207753Smm break; 154207753Smm 155207753Smm case SEQ_PROPERTIES: 156207753Smm if (lzma_lzma_lclppb_decode(&coder->options, in[(*in_pos)++])) 157207753Smm return LZMA_DATA_ERROR; 158207753Smm 159207753Smm coder->lzma.reset(coder->lzma.coder, &coder->options); 160207753Smm 161207753Smm coder->sequence = SEQ_LZMA; 162207753Smm break; 163207753Smm 164207753Smm case SEQ_LZMA: { 165207753Smm // Store the start offset so that we can update 166207753Smm // coder->compressed_size later. 167207753Smm const size_t in_start = *in_pos; 168207753Smm 169207753Smm // Decode from in[] to *dict. 170207753Smm const lzma_ret ret = coder->lzma.code(coder->lzma.coder, 171207753Smm dict, in, in_pos, in_size); 172207753Smm 173207753Smm // Validate and update coder->compressed_size. 174207753Smm const size_t in_used = *in_pos - in_start; 175207753Smm if (in_used > coder->compressed_size) 176207753Smm return LZMA_DATA_ERROR; 177207753Smm 178207753Smm coder->compressed_size -= in_used; 179207753Smm 180207753Smm // Return if we didn't finish the chunk, or an error occurred. 181207753Smm if (ret != LZMA_STREAM_END) 182207753Smm return ret; 183207753Smm 184207753Smm // The LZMA decoder must have consumed the whole chunk now. 185207753Smm // We don't need to worry about uncompressed size since it 186207753Smm // is checked by the LZMA decoder. 187207753Smm if (coder->compressed_size != 0) 188207753Smm return LZMA_DATA_ERROR; 189207753Smm 190207753Smm coder->sequence = SEQ_CONTROL; 191207753Smm break; 192207753Smm } 193207753Smm 194207753Smm case SEQ_COPY: { 195207753Smm // Copy from input to the dictionary as is. 196207753Smm dict_write(dict, in, in_pos, in_size, &coder->compressed_size); 197207753Smm if (coder->compressed_size != 0) 198207753Smm return LZMA_OK; 199207753Smm 200207753Smm coder->sequence = SEQ_CONTROL; 201207753Smm break; 202207753Smm } 203207753Smm 204207753Smm default: 205207753Smm assert(0); 206207753Smm return LZMA_PROG_ERROR; 207207753Smm } 208207753Smm 209207753Smm return LZMA_OK; 210207753Smm} 211207753Smm 212207753Smm 213207753Smmstatic void 214312517Sdelphijlzma2_decoder_end(void *coder_ptr, const lzma_allocator *allocator) 215207753Smm{ 216312517Sdelphij lzma_lzma2_coder *coder = coder_ptr; 217312517Sdelphij 218207753Smm assert(coder->lzma.end == NULL); 219207753Smm lzma_free(coder->lzma.coder, allocator); 220207753Smm 221207753Smm lzma_free(coder, allocator); 222207753Smm 223207753Smm return; 224207753Smm} 225207753Smm 226207753Smm 227207753Smmstatic lzma_ret 228278433Srpaulolzma2_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator, 229207753Smm const void *opt, lzma_lz_options *lz_options) 230207753Smm{ 231312517Sdelphij lzma_lzma2_coder *coder = lz->coder; 232312517Sdelphij if (coder == NULL) { 233312517Sdelphij coder = lzma_alloc(sizeof(lzma_lzma2_coder), allocator); 234312517Sdelphij if (coder == NULL) 235207753Smm return LZMA_MEM_ERROR; 236207753Smm 237312517Sdelphij lz->coder = coder; 238207753Smm lz->code = &lzma2_decode; 239207753Smm lz->end = &lzma2_decoder_end; 240207753Smm 241312517Sdelphij coder->lzma = LZMA_LZ_DECODER_INIT; 242207753Smm } 243207753Smm 244207753Smm const lzma_options_lzma *options = opt; 245207753Smm 246312517Sdelphij coder->sequence = SEQ_CONTROL; 247312517Sdelphij coder->need_properties = true; 248312517Sdelphij coder->need_dictionary_reset = options->preset_dict == NULL 249207753Smm || options->preset_dict_size == 0; 250207753Smm 251312517Sdelphij return lzma_lzma_decoder_create(&coder->lzma, 252207753Smm allocator, options, lz_options); 253207753Smm} 254207753Smm 255207753Smm 256207753Smmextern lzma_ret 257278433Srpaulolzma_lzma2_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, 258207753Smm const lzma_filter_info *filters) 259207753Smm{ 260207753Smm // LZMA2 can only be the last filter in the chain. This is enforced 261207753Smm // by the raw_decoder initialization. 262207753Smm assert(filters[1].init == NULL); 263207753Smm 264207753Smm return lzma_lz_decoder_init(next, allocator, filters, 265207753Smm &lzma2_decoder_init); 266207753Smm} 267207753Smm 268207753Smm 269207753Smmextern uint64_t 270207753Smmlzma_lzma2_decoder_memusage(const void *options) 271207753Smm{ 272312517Sdelphij return sizeof(lzma_lzma2_coder) 273207753Smm + lzma_lzma_decoder_memusage_nocheck(options); 274207753Smm} 275207753Smm 276207753Smm 277207753Smmextern lzma_ret 278278433Srpaulolzma_lzma2_props_decode(void **options, const lzma_allocator *allocator, 279207753Smm const uint8_t *props, size_t props_size) 280207753Smm{ 281207753Smm if (props_size != 1) 282207753Smm return LZMA_OPTIONS_ERROR; 283207753Smm 284207753Smm // Check that reserved bits are unset. 285207753Smm if (props[0] & 0xC0) 286207753Smm return LZMA_OPTIONS_ERROR; 287207753Smm 288207753Smm // Decode the dictionary size. 289207753Smm if (props[0] > 40) 290207753Smm return LZMA_OPTIONS_ERROR; 291207753Smm 292207753Smm lzma_options_lzma *opt = lzma_alloc( 293207753Smm sizeof(lzma_options_lzma), allocator); 294207753Smm if (opt == NULL) 295207753Smm return LZMA_MEM_ERROR; 296207753Smm 297207753Smm if (props[0] == 40) { 298207753Smm opt->dict_size = UINT32_MAX; 299207753Smm } else { 300360523Sdelphij opt->dict_size = 2 | (props[0] & 1U); 301360523Sdelphij opt->dict_size <<= props[0] / 2U + 11; 302207753Smm } 303207753Smm 304207753Smm opt->preset_dict = NULL; 305207753Smm opt->preset_dict_size = 0; 306207753Smm 307207753Smm *options = opt; 308207753Smm 309207753Smm return LZMA_OK; 310207753Smm} 311