lzma2_decoder.c revision 292588
1/////////////////////////////////////////////////////////////////////////////// 2// 3/// \file lzma2_decoder.c 4/// \brief LZMA2 decoder 5/// 6// Authors: Igor Pavlov 7// Lasse Collin 8// 9// This file has been put into the public domain. 10// You can do whatever you want with this file. 11// 12/////////////////////////////////////////////////////////////////////////////// 13 14#include "lzma2_decoder.h" 15#include "lz_decoder.h" 16#include "lzma_decoder.h" 17 18 19struct lzma_coder_s { 20 enum sequence { 21 SEQ_CONTROL, 22 SEQ_UNCOMPRESSED_1, 23 SEQ_UNCOMPRESSED_2, 24 SEQ_COMPRESSED_0, 25 SEQ_COMPRESSED_1, 26 SEQ_PROPERTIES, 27 SEQ_LZMA, 28 SEQ_COPY, 29 } sequence; 30 31 /// Sequence after the size fields have been decoded. 32 enum sequence next_sequence; 33 34 /// LZMA decoder 35 lzma_lz_decoder lzma; 36 37 /// Uncompressed size of LZMA chunk 38 size_t uncompressed_size; 39 40 /// Compressed size of the chunk (naturally equals to uncompressed 41 /// size of uncompressed chunk) 42 size_t compressed_size; 43 44 /// True if properties are needed. This is false before the 45 /// first LZMA chunk. 46 bool need_properties; 47 48 /// True if dictionary reset is needed. This is false before the 49 /// first chunk (LZMA or uncompressed). 50 bool need_dictionary_reset; 51 52 lzma_options_lzma options; 53}; 54 55 56static lzma_ret 57lzma2_decode(lzma_coder *restrict coder, lzma_dict *restrict dict, 58 const uint8_t *restrict in, size_t *restrict in_pos, 59 size_t in_size) 60{ 61 // With SEQ_LZMA it is possible that no new input is needed to do 62 // some progress. The rest of the sequences assume that there is 63 // at least one byte of input. 64 while (*in_pos < in_size || coder->sequence == SEQ_LZMA) 65 switch (coder->sequence) { 66 case SEQ_CONTROL: { 67 const uint32_t control = in[*in_pos]; 68 ++*in_pos; 69 70 // End marker 71 if (control == 0x00) 72 return LZMA_STREAM_END; 73 74 if (control >= 0xE0 || control == 1) { 75 // Dictionary reset implies that next LZMA chunk has 76 // to set new properties. 77 coder->need_properties = true; 78 coder->need_dictionary_reset = true; 79 } else if (coder->need_dictionary_reset) { 80 return LZMA_DATA_ERROR; 81 } 82 83 if (control >= 0x80) { 84 // LZMA chunk. The highest five bits of the 85 // uncompressed size are taken from the control byte. 86 coder->uncompressed_size = (control & 0x1F) << 16; 87 coder->sequence = SEQ_UNCOMPRESSED_1; 88 89 // See if there are new properties or if we need to 90 // reset the state. 91 if (control >= 0xC0) { 92 // When there are new properties, state reset 93 // is done at SEQ_PROPERTIES. 94 coder->need_properties = false; 95 coder->next_sequence = SEQ_PROPERTIES; 96 97 } else if (coder->need_properties) { 98 return LZMA_DATA_ERROR; 99 100 } else { 101 coder->next_sequence = SEQ_LZMA; 102 103 // If only state reset is wanted with old 104 // properties, do the resetting here for 105 // simplicity. 106 if (control >= 0xA0) 107 coder->lzma.reset(coder->lzma.coder, 108 &coder->options); 109 } 110 } else { 111 // Invalid control values 112 if (control > 2) 113 return LZMA_DATA_ERROR; 114 115 // It's uncompressed chunk 116 coder->sequence = SEQ_COMPRESSED_0; 117 coder->next_sequence = SEQ_COPY; 118 } 119 120 if (coder->need_dictionary_reset) { 121 // Finish the dictionary reset and let the caller 122 // flush the dictionary to the actual output buffer. 123 coder->need_dictionary_reset = false; 124 dict_reset(dict); 125 return LZMA_OK; 126 } 127 128 break; 129 } 130 131 case SEQ_UNCOMPRESSED_1: 132 coder->uncompressed_size += (uint32_t)(in[(*in_pos)++]) << 8; 133 coder->sequence = SEQ_UNCOMPRESSED_2; 134 break; 135 136 case SEQ_UNCOMPRESSED_2: 137 coder->uncompressed_size += in[(*in_pos)++] + 1; 138 coder->sequence = SEQ_COMPRESSED_0; 139 coder->lzma.set_uncompressed(coder->lzma.coder, 140 coder->uncompressed_size); 141 break; 142 143 case SEQ_COMPRESSED_0: 144 coder->compressed_size = (uint32_t)(in[(*in_pos)++]) << 8; 145 coder->sequence = SEQ_COMPRESSED_1; 146 break; 147 148 case SEQ_COMPRESSED_1: 149 coder->compressed_size += in[(*in_pos)++] + 1; 150 coder->sequence = coder->next_sequence; 151 break; 152 153 case SEQ_PROPERTIES: 154 if (lzma_lzma_lclppb_decode(&coder->options, in[(*in_pos)++])) 155 return LZMA_DATA_ERROR; 156 157 coder->lzma.reset(coder->lzma.coder, &coder->options); 158 159 coder->sequence = SEQ_LZMA; 160 break; 161 162 case SEQ_LZMA: { 163 // Store the start offset so that we can update 164 // coder->compressed_size later. 165 const size_t in_start = *in_pos; 166 167 // Decode from in[] to *dict. 168 const lzma_ret ret = coder->lzma.code(coder->lzma.coder, 169 dict, in, in_pos, in_size); 170 171 // Validate and update coder->compressed_size. 172 const size_t in_used = *in_pos - in_start; 173 if (in_used > coder->compressed_size) 174 return LZMA_DATA_ERROR; 175 176 coder->compressed_size -= in_used; 177 178 // Return if we didn't finish the chunk, or an error occurred. 179 if (ret != LZMA_STREAM_END) 180 return ret; 181 182 // The LZMA decoder must have consumed the whole chunk now. 183 // We don't need to worry about uncompressed size since it 184 // is checked by the LZMA decoder. 185 if (coder->compressed_size != 0) 186 return LZMA_DATA_ERROR; 187 188 coder->sequence = SEQ_CONTROL; 189 break; 190 } 191 192 case SEQ_COPY: { 193 // Copy from input to the dictionary as is. 194 dict_write(dict, in, in_pos, in_size, &coder->compressed_size); 195 if (coder->compressed_size != 0) 196 return LZMA_OK; 197 198 coder->sequence = SEQ_CONTROL; 199 break; 200 } 201 202 default: 203 assert(0); 204 return LZMA_PROG_ERROR; 205 } 206 207 return LZMA_OK; 208} 209 210 211static void 212lzma2_decoder_end(lzma_coder *coder, const lzma_allocator *allocator) 213{ 214 assert(coder->lzma.end == NULL); 215 lzma_free(coder->lzma.coder, allocator); 216 217 lzma_free(coder, allocator); 218 219 return; 220} 221 222 223static lzma_ret 224lzma2_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator, 225 const void *opt, lzma_lz_options *lz_options) 226{ 227 if (lz->coder == NULL) { 228 lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); 229 if (lz->coder == NULL) 230 return LZMA_MEM_ERROR; 231 232 lz->code = &lzma2_decode; 233 lz->end = &lzma2_decoder_end; 234 235 lz->coder->lzma = LZMA_LZ_DECODER_INIT; 236 } 237 238 const lzma_options_lzma *options = opt; 239 240 lz->coder->sequence = SEQ_CONTROL; 241 lz->coder->need_properties = true; 242 lz->coder->need_dictionary_reset = options->preset_dict == NULL 243 || options->preset_dict_size == 0; 244 245 return lzma_lzma_decoder_create(&lz->coder->lzma, 246 allocator, options, lz_options); 247} 248 249 250extern lzma_ret 251lzma_lzma2_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, 252 const lzma_filter_info *filters) 253{ 254 // LZMA2 can only be the last filter in the chain. This is enforced 255 // by the raw_decoder initialization. 256 assert(filters[1].init == NULL); 257 258 return lzma_lz_decoder_init(next, allocator, filters, 259 &lzma2_decoder_init); 260} 261 262 263extern uint64_t 264lzma_lzma2_decoder_memusage(const void *options) 265{ 266 return sizeof(lzma_coder) 267 + lzma_lzma_decoder_memusage_nocheck(options); 268} 269 270 271extern lzma_ret 272lzma_lzma2_props_decode(void **options, const lzma_allocator *allocator, 273 const uint8_t *props, size_t props_size) 274{ 275 if (props_size != 1) 276 return LZMA_OPTIONS_ERROR; 277 278 // Check that reserved bits are unset. 279 if (props[0] & 0xC0) 280 return LZMA_OPTIONS_ERROR; 281 282 // Decode the dictionary size. 283 if (props[0] > 40) 284 return LZMA_OPTIONS_ERROR; 285 286 lzma_options_lzma *opt = lzma_alloc( 287 sizeof(lzma_options_lzma), allocator); 288 if (opt == NULL) 289 return LZMA_MEM_ERROR; 290 291 if (props[0] == 40) { 292 opt->dict_size = UINT32_MAX; 293 } else { 294 opt->dict_size = 2 | (props[0] & 1); 295 opt->dict_size <<= props[0] / 2 + 11; 296 } 297 298 opt->preset_dict = NULL; 299 opt->preset_dict_size = 0; 300 301 *options = opt; 302 303 return LZMA_OK; 304} 305