lzma2_decoder.c revision 207842
1/////////////////////////////////////////////////////////////////////////////// 2// 3/// \file lzma2_decoder.c 4/// \brief LZMA2 decoder 5/// 6// Authors: Igor Pavlov 7// Lasse Collin 8// 9// This file has been put into the public domain. 10// You can do whatever you want with this file. 11// 12/////////////////////////////////////////////////////////////////////////////// 13 14#include "lzma2_decoder.h" 15#include "lz_decoder.h" 16#include "lzma_decoder.h" 17 18 19struct lzma_coder_s { 20 enum sequence { 21 SEQ_CONTROL, 22 SEQ_UNCOMPRESSED_1, 23 SEQ_UNCOMPRESSED_2, 24 SEQ_COMPRESSED_0, 25 SEQ_COMPRESSED_1, 26 SEQ_PROPERTIES, 27 SEQ_LZMA, 28 SEQ_COPY, 29 } sequence; 30 31 /// Sequence after the size fields have been decoded. 32 enum sequence next_sequence; 33 34 /// LZMA decoder 35 lzma_lz_decoder lzma; 36 37 /// Uncompressed size of LZMA chunk 38 size_t uncompressed_size; 39 40 /// Compressed size of the chunk (naturally equals to uncompressed 41 /// size of uncompressed chunk) 42 size_t compressed_size; 43 44 /// True if properties are needed. This is false before the 45 /// first LZMA chunk. 46 bool need_properties; 47 48 /// True if dictionary reset is needed. This is false before the 49 /// first chunk (LZMA or uncompressed). 50 bool need_dictionary_reset; 51 52 lzma_options_lzma options; 53}; 54 55 56static lzma_ret 57lzma2_decode(lzma_coder *restrict coder, lzma_dict *restrict dict, 58 const uint8_t *restrict in, size_t *restrict in_pos, 59 size_t in_size) 60{ 61 // With SEQ_LZMA it is possible that no new input is needed to do 62 // some progress. The rest of the sequences assume that there is 63 // at least one byte of input. 64 while (*in_pos < in_size || coder->sequence == SEQ_LZMA) 65 switch (coder->sequence) { 66 case SEQ_CONTROL: { 67 const uint32_t control = in[*in_pos]; 68 ++*in_pos; 69 70 if (control >= 0xE0 || control == 1) { 71 // Dictionary reset implies that next LZMA chunk has 72 // to set new properties. 73 coder->need_properties = true; 74 coder->need_dictionary_reset = true; 75 } else if (coder->need_dictionary_reset) { 76 return LZMA_DATA_ERROR; 77 } 78 79 if (control >= 0x80) { 80 // LZMA chunk. The highest five bits of the 81 // uncompressed size are taken from the control byte. 82 coder->uncompressed_size = (control & 0x1F) << 16; 83 coder->sequence = SEQ_UNCOMPRESSED_1; 84 85 // See if there are new properties or if we need to 86 // reset the state. 87 if (control >= 0xC0) { 88 // When there are new properties, state reset 89 // is done at SEQ_PROPERTIES. 90 coder->need_properties = false; 91 coder->next_sequence = SEQ_PROPERTIES; 92 93 } else if (coder->need_properties) { 94 return LZMA_DATA_ERROR; 95 96 } else { 97 coder->next_sequence = SEQ_LZMA; 98 99 // If only state reset is wanted with old 100 // properties, do the resetting here for 101 // simplicity. 102 if (control >= 0xA0) 103 coder->lzma.reset(coder->lzma.coder, 104 &coder->options); 105 } 106 } else { 107 // End marker 108 if (control == 0x00) 109 return LZMA_STREAM_END; 110 111 // Invalid control values 112 if (control > 2) 113 return LZMA_DATA_ERROR; 114 115 // It's uncompressed chunk 116 coder->sequence = SEQ_COMPRESSED_0; 117 coder->next_sequence = SEQ_COPY; 118 } 119 120 if (coder->need_dictionary_reset) { 121 // Finish the dictionary reset and let the caller 122 // flush the dictionary to the actual output buffer. 123 coder->need_dictionary_reset = false; 124 dict_reset(dict); 125 return LZMA_OK; 126 } 127 128 break; 129 } 130 131 case SEQ_UNCOMPRESSED_1: 132 coder->uncompressed_size += (uint32_t)(in[(*in_pos)++]) << 8; 133 coder->sequence = SEQ_UNCOMPRESSED_2; 134 break; 135 136 case SEQ_UNCOMPRESSED_2: 137 coder->uncompressed_size += in[(*in_pos)++] + 1; 138 coder->sequence = SEQ_COMPRESSED_0; 139 coder->lzma.set_uncompressed(coder->lzma.coder, 140 coder->uncompressed_size); 141 break; 142 143 case SEQ_COMPRESSED_0: 144 coder->compressed_size = (uint32_t)(in[(*in_pos)++]) << 8; 145 coder->sequence = SEQ_COMPRESSED_1; 146 break; 147 148 case SEQ_COMPRESSED_1: 149 coder->compressed_size += in[(*in_pos)++] + 1; 150 coder->sequence = coder->next_sequence; 151 break; 152 153 case SEQ_PROPERTIES: 154 if (lzma_lzma_lclppb_decode(&coder->options, in[(*in_pos)++])) 155 return LZMA_DATA_ERROR; 156 157 coder->lzma.reset(coder->lzma.coder, &coder->options); 158 159 coder->sequence = SEQ_LZMA; 160 break; 161 162 case SEQ_LZMA: { 163 // Store the start offset so that we can update 164 // coder->compressed_size later. 165 const size_t in_start = *in_pos; 166 167 // Decode from in[] to *dict. 168 const lzma_ret ret = coder->lzma.code(coder->lzma.coder, 169 dict, in, in_pos, in_size); 170 171 // Validate and update coder->compressed_size. 172 const size_t in_used = *in_pos - in_start; 173 if (in_used > coder->compressed_size) 174 return LZMA_DATA_ERROR; 175 176 coder->compressed_size -= in_used; 177 178 // Return if we didn't finish the chunk, or an error occurred. 179 if (ret != LZMA_STREAM_END) 180 return ret; 181 182 // The LZMA decoder must have consumed the whole chunk now. 183 // We don't need to worry about uncompressed size since it 184 // is checked by the LZMA decoder. 185 if (coder->compressed_size != 0) 186 return LZMA_DATA_ERROR; 187 188 coder->sequence = SEQ_CONTROL; 189 break; 190 } 191 192 case SEQ_COPY: { 193 // Copy from input to the dictionary as is. 194 // FIXME Can copy too much? 195 dict_write(dict, in, in_pos, in_size, &coder->compressed_size); 196 if (coder->compressed_size != 0) 197 return LZMA_OK; 198 199 coder->sequence = SEQ_CONTROL; 200 break; 201 } 202 203 default: 204 assert(0); 205 return LZMA_PROG_ERROR; 206 } 207 208 return LZMA_OK; 209} 210 211 212static void 213lzma2_decoder_end(lzma_coder *coder, lzma_allocator *allocator) 214{ 215 assert(coder->lzma.end == NULL); 216 lzma_free(coder->lzma.coder, allocator); 217 218 lzma_free(coder, allocator); 219 220 return; 221} 222 223 224static lzma_ret 225lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator, 226 const void *opt, lzma_lz_options *lz_options) 227{ 228 if (lz->coder == NULL) { 229 lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); 230 if (lz->coder == NULL) 231 return LZMA_MEM_ERROR; 232 233 lz->code = &lzma2_decode; 234 lz->end = &lzma2_decoder_end; 235 236 lz->coder->lzma = LZMA_LZ_DECODER_INIT; 237 } 238 239 const lzma_options_lzma *options = opt; 240 241 lz->coder->sequence = SEQ_CONTROL; 242 lz->coder->need_properties = true; 243 lz->coder->need_dictionary_reset = options->preset_dict == NULL 244 || options->preset_dict_size == 0; 245 246 return lzma_lzma_decoder_create(&lz->coder->lzma, 247 allocator, options, lz_options); 248} 249 250 251extern lzma_ret 252lzma_lzma2_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, 253 const lzma_filter_info *filters) 254{ 255 // LZMA2 can only be the last filter in the chain. This is enforced 256 // by the raw_decoder initialization. 257 assert(filters[1].init == NULL); 258 259 return lzma_lz_decoder_init(next, allocator, filters, 260 &lzma2_decoder_init); 261} 262 263 264extern uint64_t 265lzma_lzma2_decoder_memusage(const void *options) 266{ 267 return sizeof(lzma_coder) 268 + lzma_lzma_decoder_memusage_nocheck(options); 269} 270 271 272extern lzma_ret 273lzma_lzma2_props_decode(void **options, lzma_allocator *allocator, 274 const uint8_t *props, size_t props_size) 275{ 276 if (props_size != 1) 277 return LZMA_OPTIONS_ERROR; 278 279 // Check that reserved bits are unset. 280 if (props[0] & 0xC0) 281 return LZMA_OPTIONS_ERROR; 282 283 // Decode the dictionary size. 284 if (props[0] > 40) 285 return LZMA_OPTIONS_ERROR; 286 287 lzma_options_lzma *opt = lzma_alloc( 288 sizeof(lzma_options_lzma), allocator); 289 if (opt == NULL) 290 return LZMA_MEM_ERROR; 291 292 if (props[0] == 40) { 293 opt->dict_size = UINT32_MAX; 294 } else { 295 opt->dict_size = 2 | (props[0] & 1); 296 opt->dict_size <<= props[0] / 2 + 11; 297 } 298 299 opt->preset_dict = NULL; 300 opt->preset_dict_size = 0; 301 302 *options = opt; 303 304 return LZMA_OK; 305} 306