lz_decoder.c revision 278433
1207753Smm/////////////////////////////////////////////////////////////////////////////// 2207753Smm// 3207753Smm/// \file lz_decoder.c 4207753Smm/// \brief LZ out window 5207753Smm/// 6207753Smm// Authors: Igor Pavlov 7207753Smm// Lasse Collin 8207753Smm// 9207753Smm// This file has been put into the public domain. 10207753Smm// You can do whatever you want with this file. 11207753Smm// 12207753Smm/////////////////////////////////////////////////////////////////////////////// 13207753Smm 14207753Smm// liblzma supports multiple LZ77-based filters. The LZ part is shared 15207753Smm// between these filters. The LZ code takes care of dictionary handling 16207753Smm// and passing the data between filters in the chain. The filter-specific 17207753Smm// part decodes from the input buffer to the dictionary. 18207753Smm 19207753Smm 20207753Smm#include "lz_decoder.h" 21207753Smm 22207753Smm 23207753Smmstruct lzma_coder_s { 24207753Smm /// Dictionary (history buffer) 25207753Smm lzma_dict dict; 26207753Smm 27207753Smm /// The actual LZ-based decoder e.g. LZMA 28207753Smm lzma_lz_decoder lz; 29207753Smm 30207753Smm /// Next filter in the chain, if any. Note that LZMA and LZMA2 are 31207753Smm /// only allowed as the last filter, but the long-range filter in 32207753Smm /// future can be in the middle of the chain. 33207753Smm lzma_next_coder next; 34207753Smm 35207753Smm /// True if the next filter in the chain has returned LZMA_STREAM_END. 36207753Smm bool next_finished; 37207753Smm 38207753Smm /// True if the LZ decoder (e.g. LZMA) has detected end of payload 39207753Smm /// marker. This may become true before next_finished becomes true. 40207753Smm bool this_finished; 41207753Smm 42207753Smm /// Temporary buffer needed when the LZ-based filter is not the last 43207753Smm /// filter in the chain. The output of the next filter is first 44207753Smm /// decoded into buffer[], which is then used as input for the actual 45207753Smm /// LZ-based decoder. 46207753Smm struct { 47207753Smm size_t pos; 48207753Smm size_t size; 49207753Smm uint8_t buffer[LZMA_BUFFER_SIZE]; 50207753Smm } temp; 51207753Smm}; 52207753Smm 53207753Smm 54207753Smmstatic void 55207753Smmlz_decoder_reset(lzma_coder *coder) 56207753Smm{ 57207753Smm coder->dict.pos = 0; 58207753Smm coder->dict.full = 0; 59207753Smm coder->dict.buf[coder->dict.size - 1] = '\0'; 60207753Smm coder->dict.need_reset = false; 61207753Smm return; 62207753Smm} 63207753Smm 64207753Smm 65207753Smmstatic lzma_ret 66207753Smmdecode_buffer(lzma_coder *coder, 67207753Smm const uint8_t *restrict in, size_t *restrict in_pos, 68207753Smm size_t in_size, uint8_t *restrict out, 69207753Smm size_t *restrict out_pos, size_t out_size) 70207753Smm{ 71207753Smm while (true) { 72207753Smm // Wrap the dictionary if needed. 73207753Smm if (coder->dict.pos == coder->dict.size) 74207753Smm coder->dict.pos = 0; 75207753Smm 76207753Smm // Store the current dictionary position. It is needed to know 77207753Smm // where to start copying to the out[] buffer. 78207753Smm const size_t dict_start = coder->dict.pos; 79207753Smm 80207753Smm // Calculate how much we allow coder->lz.code() to decode. 81207753Smm // It must not decode past the end of the dictionary 82207753Smm // buffer, and we don't want it to decode more than is 83207753Smm // actually needed to fill the out[] buffer. 84213700Smm coder->dict.limit = coder->dict.pos 85213700Smm + my_min(out_size - *out_pos, 86213700Smm coder->dict.size - coder->dict.pos); 87207753Smm 88207753Smm // Call the coder->lz.code() to do the actual decoding. 89207753Smm const lzma_ret ret = coder->lz.code( 90207753Smm coder->lz.coder, &coder->dict, 91207753Smm in, in_pos, in_size); 92207753Smm 93207753Smm // Copy the decoded data from the dictionary to the out[] 94207753Smm // buffer. 95207753Smm const size_t copy_size = coder->dict.pos - dict_start; 96207753Smm assert(copy_size <= out_size - *out_pos); 97207753Smm memcpy(out + *out_pos, coder->dict.buf + dict_start, 98207753Smm copy_size); 99207753Smm *out_pos += copy_size; 100207753Smm 101207753Smm // Reset the dictionary if so requested by coder->lz.code(). 102207753Smm if (coder->dict.need_reset) { 103207753Smm lz_decoder_reset(coder); 104207753Smm 105207753Smm // Since we reset dictionary, we don't check if 106207753Smm // dictionary became full. 107207753Smm if (ret != LZMA_OK || *out_pos == out_size) 108207753Smm return ret; 109207753Smm } else { 110207753Smm // Return if everything got decoded or an error 111207753Smm // occurred, or if there's no more data to decode. 112207753Smm // 113207753Smm // Note that detecting if there's something to decode 114207753Smm // is done by looking if dictionary become full 115207753Smm // instead of looking if *in_pos == in_size. This 116207753Smm // is because it is possible that all the input was 117207753Smm // consumed already but some data is pending to be 118207753Smm // written to the dictionary. 119207753Smm if (ret != LZMA_OK || *out_pos == out_size 120207753Smm || coder->dict.pos < coder->dict.size) 121207753Smm return ret; 122207753Smm } 123207753Smm } 124207753Smm} 125207753Smm 126207753Smm 127207753Smmstatic lzma_ret 128207753Smmlz_decode(lzma_coder *coder, 129278433Srpaulo const lzma_allocator *allocator lzma_attribute((__unused__)), 130207753Smm const uint8_t *restrict in, size_t *restrict in_pos, 131207753Smm size_t in_size, uint8_t *restrict out, 132207753Smm size_t *restrict out_pos, size_t out_size, 133207753Smm lzma_action action) 134207753Smm{ 135207753Smm if (coder->next.code == NULL) 136207753Smm return decode_buffer(coder, in, in_pos, in_size, 137207753Smm out, out_pos, out_size); 138207753Smm 139207753Smm // We aren't the last coder in the chain, we need to decode 140207753Smm // our input to a temporary buffer. 141207753Smm while (*out_pos < out_size) { 142207753Smm // Fill the temporary buffer if it is empty. 143207753Smm if (!coder->next_finished 144207753Smm && coder->temp.pos == coder->temp.size) { 145207753Smm coder->temp.pos = 0; 146207753Smm coder->temp.size = 0; 147207753Smm 148207753Smm const lzma_ret ret = coder->next.code( 149207753Smm coder->next.coder, 150207753Smm allocator, in, in_pos, in_size, 151207753Smm coder->temp.buffer, &coder->temp.size, 152207753Smm LZMA_BUFFER_SIZE, action); 153207753Smm 154207753Smm if (ret == LZMA_STREAM_END) 155207753Smm coder->next_finished = true; 156207753Smm else if (ret != LZMA_OK || coder->temp.size == 0) 157207753Smm return ret; 158207753Smm } 159207753Smm 160207753Smm if (coder->this_finished) { 161207753Smm if (coder->temp.size != 0) 162207753Smm return LZMA_DATA_ERROR; 163207753Smm 164207753Smm if (coder->next_finished) 165207753Smm return LZMA_STREAM_END; 166207753Smm 167207753Smm return LZMA_OK; 168207753Smm } 169207753Smm 170207753Smm const lzma_ret ret = decode_buffer(coder, coder->temp.buffer, 171207753Smm &coder->temp.pos, coder->temp.size, 172207753Smm out, out_pos, out_size); 173207753Smm 174207753Smm if (ret == LZMA_STREAM_END) 175207753Smm coder->this_finished = true; 176207753Smm else if (ret != LZMA_OK) 177207753Smm return ret; 178207753Smm else if (coder->next_finished && *out_pos < out_size) 179207753Smm return LZMA_DATA_ERROR; 180207753Smm } 181207753Smm 182207753Smm return LZMA_OK; 183207753Smm} 184207753Smm 185207753Smm 186207753Smmstatic void 187278433Srpaulolz_decoder_end(lzma_coder *coder, const lzma_allocator *allocator) 188207753Smm{ 189207753Smm lzma_next_end(&coder->next, allocator); 190207753Smm lzma_free(coder->dict.buf, allocator); 191207753Smm 192207753Smm if (coder->lz.end != NULL) 193207753Smm coder->lz.end(coder->lz.coder, allocator); 194207753Smm else 195207753Smm lzma_free(coder->lz.coder, allocator); 196207753Smm 197207753Smm lzma_free(coder, allocator); 198207753Smm return; 199207753Smm} 200207753Smm 201207753Smm 202207753Smmextern lzma_ret 203278433Srpaulolzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, 204207753Smm const lzma_filter_info *filters, 205207753Smm lzma_ret (*lz_init)(lzma_lz_decoder *lz, 206278433Srpaulo const lzma_allocator *allocator, const void *options, 207207753Smm lzma_lz_options *lz_options)) 208207753Smm{ 209207753Smm // Allocate the base structure if it isn't already allocated. 210207753Smm if (next->coder == NULL) { 211207753Smm next->coder = lzma_alloc(sizeof(lzma_coder), allocator); 212207753Smm if (next->coder == NULL) 213207753Smm return LZMA_MEM_ERROR; 214207753Smm 215207753Smm next->code = &lz_decode; 216207753Smm next->end = &lz_decoder_end; 217207753Smm 218207753Smm next->coder->dict.buf = NULL; 219207753Smm next->coder->dict.size = 0; 220207753Smm next->coder->lz = LZMA_LZ_DECODER_INIT; 221207753Smm next->coder->next = LZMA_NEXT_CODER_INIT; 222207753Smm } 223207753Smm 224207753Smm // Allocate and initialize the LZ-based decoder. It will also give 225207753Smm // us the dictionary size. 226207753Smm lzma_lz_options lz_options; 227207753Smm return_if_error(lz_init(&next->coder->lz, allocator, 228207753Smm filters[0].options, &lz_options)); 229207753Smm 230207753Smm // If the dictionary size is very small, increase it to 4096 bytes. 231207753Smm // This is to prevent constant wrapping of the dictionary, which 232207753Smm // would slow things down. The downside is that since we don't check 233207753Smm // separately for the real dictionary size, we may happily accept 234207753Smm // corrupt files. 235207753Smm if (lz_options.dict_size < 4096) 236207753Smm lz_options.dict_size = 4096; 237207753Smm 238207753Smm // Make dictionary size a multipe of 16. Some LZ-based decoders like 239207753Smm // LZMA use the lowest bits lzma_dict.pos to know the alignment of the 240207753Smm // data. Aligned buffer is also good when memcpying from the 241207753Smm // dictionary to the output buffer, since applications are 242207753Smm // recommended to give aligned buffers to liblzma. 243207753Smm // 244207753Smm // Avoid integer overflow. 245207753Smm if (lz_options.dict_size > SIZE_MAX - 15) 246207753Smm return LZMA_MEM_ERROR; 247207753Smm 248207753Smm lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15)); 249207753Smm 250207753Smm // Allocate and initialize the dictionary. 251207753Smm if (next->coder->dict.size != lz_options.dict_size) { 252207753Smm lzma_free(next->coder->dict.buf, allocator); 253207753Smm next->coder->dict.buf 254207753Smm = lzma_alloc(lz_options.dict_size, allocator); 255207753Smm if (next->coder->dict.buf == NULL) 256207753Smm return LZMA_MEM_ERROR; 257207753Smm 258207753Smm next->coder->dict.size = lz_options.dict_size; 259207753Smm } 260207753Smm 261207753Smm lz_decoder_reset(next->coder); 262207753Smm 263207753Smm // Use the preset dictionary if it was given to us. 264207753Smm if (lz_options.preset_dict != NULL 265207753Smm && lz_options.preset_dict_size > 0) { 266207753Smm // If the preset dictionary is bigger than the actual 267207753Smm // dictionary, copy only the tail. 268213700Smm const size_t copy_size = my_min(lz_options.preset_dict_size, 269207753Smm lz_options.dict_size); 270207753Smm const size_t offset = lz_options.preset_dict_size - copy_size; 271207753Smm memcpy(next->coder->dict.buf, lz_options.preset_dict + offset, 272207753Smm copy_size); 273207753Smm next->coder->dict.pos = copy_size; 274207753Smm next->coder->dict.full = copy_size; 275207753Smm } 276207753Smm 277207753Smm // Miscellaneous initializations 278207753Smm next->coder->next_finished = false; 279207753Smm next->coder->this_finished = false; 280207753Smm next->coder->temp.pos = 0; 281207753Smm next->coder->temp.size = 0; 282207753Smm 283207753Smm // Initialize the next filter in the chain, if any. 284207753Smm return lzma_next_filter_init(&next->coder->next, allocator, 285207753Smm filters + 1); 286207753Smm} 287207753Smm 288207753Smm 289207753Smmextern uint64_t 290207753Smmlzma_lz_decoder_memusage(size_t dictionary_size) 291207753Smm{ 292207753Smm return sizeof(lzma_coder) + (uint64_t)(dictionary_size); 293207753Smm} 294207753Smm 295207753Smm 296207753Smmextern void 297207753Smmlzma_lz_decoder_uncompressed(lzma_coder *coder, lzma_vli uncompressed_size) 298207753Smm{ 299207753Smm coder->lz.set_uncompressed(coder->lz.coder, uncompressed_size); 300207753Smm} 301