lz_encoder.c revision 207842
1/////////////////////////////////////////////////////////////////////////////// 2// 3/// \file lz_encoder.c 4/// \brief LZ in window 5/// 6// Authors: Igor Pavlov 7// Lasse Collin 8// 9// This file has been put into the public domain. 10// You can do whatever you want with this file. 11// 12/////////////////////////////////////////////////////////////////////////////// 13 14#include "lz_encoder.h" 15#include "lz_encoder_hash.h" 16 17// See lz_encoder_hash.h. This is a bit hackish but avoids making 18// endianness a conditional in makefiles. 19#if defined(WORDS_BIGENDIAN) && !defined(HAVE_SMALL) 20# include "lz_encoder_hash_table.h" 21#endif 22 23 24struct lzma_coder_s { 25 /// LZ-based encoder e.g. LZMA 26 lzma_lz_encoder lz; 27 28 /// History buffer and match finder 29 lzma_mf mf; 30 31 /// Next coder in the chain 32 lzma_next_coder next; 33}; 34 35 36/// \brief Moves the data in the input window to free space for new data 37/// 38/// mf->buffer is a sliding input window, which keeps mf->keep_size_before 39/// bytes of input history available all the time. Now and then we need to 40/// "slide" the buffer to make space for the new data to the end of the 41/// buffer. At the same time, data older than keep_size_before is dropped. 42/// 43static void 44move_window(lzma_mf *mf) 45{ 46 // Align the move to a multiple of 16 bytes. Some LZ-based encoders 47 // like LZMA use the lowest bits of mf->read_pos to know the 48 // alignment of the uncompressed data. We also get better speed 49 // for memmove() with aligned buffers. 50 assert(mf->read_pos > mf->keep_size_before); 51 const uint32_t move_offset 52 = (mf->read_pos - mf->keep_size_before) & ~UINT32_C(15); 53 54 assert(mf->write_pos > move_offset); 55 const size_t move_size = mf->write_pos - move_offset; 56 57 assert(move_offset + move_size <= mf->size); 58 59 memmove(mf->buffer, mf->buffer + move_offset, move_size); 60 61 mf->offset += move_offset; 62 mf->read_pos -= move_offset; 63 mf->read_limit -= move_offset; 64 mf->write_pos -= move_offset; 65 66 return; 67} 68 69 70/// \brief Tries to fill the input window (mf->buffer) 71/// 72/// If we are the last encoder in the chain, our input data is in in[]. 73/// Otherwise we call the next filter in the chain to process in[] and 74/// write its output to mf->buffer. 75/// 76/// This function must not be called once it has returned LZMA_STREAM_END. 77/// 78static lzma_ret 79fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in, 80 size_t *in_pos, size_t in_size, lzma_action action) 81{ 82 assert(coder->mf.read_pos <= coder->mf.write_pos); 83 84 // Move the sliding window if needed. 85 if (coder->mf.read_pos >= coder->mf.size - coder->mf.keep_size_after) 86 move_window(&coder->mf); 87 88 // Maybe this is ugly, but lzma_mf uses uint32_t for most things 89 // (which I find cleanest), but we need size_t here when filling 90 // the history window. 91 size_t write_pos = coder->mf.write_pos; 92 lzma_ret ret; 93 if (coder->next.code == NULL) { 94 // Not using a filter, simply memcpy() as much as possible. 95 lzma_bufcpy(in, in_pos, in_size, coder->mf.buffer, 96 &write_pos, coder->mf.size); 97 98 ret = action != LZMA_RUN && *in_pos == in_size 99 ? LZMA_STREAM_END : LZMA_OK; 100 101 } else { 102 ret = coder->next.code(coder->next.coder, allocator, 103 in, in_pos, in_size, 104 coder->mf.buffer, &write_pos, 105 coder->mf.size, action); 106 } 107 108 coder->mf.write_pos = write_pos; 109 110 // If end of stream has been reached or flushing completed, we allow 111 // the encoder to process all the input (that is, read_pos is allowed 112 // to reach write_pos). Otherwise we keep keep_size_after bytes 113 // available as prebuffer. 114 if (ret == LZMA_STREAM_END) { 115 assert(*in_pos == in_size); 116 ret = LZMA_OK; 117 coder->mf.action = action; 118 coder->mf.read_limit = coder->mf.write_pos; 119 120 } else if (coder->mf.write_pos > coder->mf.keep_size_after) { 121 // This needs to be done conditionally, because if we got 122 // only little new input, there may be too little input 123 // to do any encoding yet. 124 coder->mf.read_limit = coder->mf.write_pos 125 - coder->mf.keep_size_after; 126 } 127 128 // Restart the match finder after finished LZMA_SYNC_FLUSH. 129 if (coder->mf.pending > 0 130 && coder->mf.read_pos < coder->mf.read_limit) { 131 // Match finder may update coder->pending and expects it to 132 // start from zero, so use a temporary variable. 133 const size_t pending = coder->mf.pending; 134 coder->mf.pending = 0; 135 136 // Rewind read_pos so that the match finder can hash 137 // the pending bytes. 138 assert(coder->mf.read_pos >= pending); 139 coder->mf.read_pos -= pending; 140 141 // Call the skip function directly instead of using 142 // mf_skip(), since we don't want to touch mf->read_ahead. 143 coder->mf.skip(&coder->mf, pending); 144 } 145 146 return ret; 147} 148 149 150static lzma_ret 151lz_encode(lzma_coder *coder, lzma_allocator *allocator, 152 const uint8_t *restrict in, size_t *restrict in_pos, 153 size_t in_size, 154 uint8_t *restrict out, size_t *restrict out_pos, 155 size_t out_size, lzma_action action) 156{ 157 while (*out_pos < out_size 158 && (*in_pos < in_size || action != LZMA_RUN)) { 159 // Read more data to coder->mf.buffer if needed. 160 if (coder->mf.action == LZMA_RUN && coder->mf.read_pos 161 >= coder->mf.read_limit) 162 return_if_error(fill_window(coder, allocator, 163 in, in_pos, in_size, action)); 164 165 // Encode 166 const lzma_ret ret = coder->lz.code(coder->lz.coder, 167 &coder->mf, out, out_pos, out_size); 168 if (ret != LZMA_OK) { 169 // Setting this to LZMA_RUN for cases when we are 170 // flushing. It doesn't matter when finishing or if 171 // an error occurred. 172 coder->mf.action = LZMA_RUN; 173 return ret; 174 } 175 } 176 177 return LZMA_OK; 178} 179 180 181static bool 182lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator, 183 const lzma_lz_options *lz_options) 184{ 185 // For now, the dictionary size is limited to 1.5 GiB. This may grow 186 // in the future if needed, but it needs a little more work than just 187 // changing this check. 188 if (lz_options->dict_size < LZMA_DICT_SIZE_MIN 189 || lz_options->dict_size 190 > (UINT32_C(1) << 30) + (UINT32_C(1) << 29) 191 || lz_options->nice_len > lz_options->match_len_max) 192 return true; 193 194 mf->keep_size_before = lz_options->before_size + lz_options->dict_size; 195 196 mf->keep_size_after = lz_options->after_size 197 + lz_options->match_len_max; 198 199 // To avoid constant memmove()s, allocate some extra space. Since 200 // memmove()s become more expensive when the size of the buffer 201 // increases, we reserve more space when a large dictionary is 202 // used to make the memmove() calls rarer. 203 // 204 // This works with dictionaries up to about 3 GiB. If bigger 205 // dictionary is wanted, some extra work is needed: 206 // - Several variables in lzma_mf have to be changed from uint32_t 207 // to size_t. 208 // - Memory usage calculation needs something too, e.g. use uint64_t 209 // for mf->size. 210 uint32_t reserve = lz_options->dict_size / 2; 211 if (reserve > (UINT32_C(1) << 30)) 212 reserve /= 2; 213 214 reserve += (lz_options->before_size + lz_options->match_len_max 215 + lz_options->after_size) / 2 + (UINT32_C(1) << 19); 216 217 const uint32_t old_size = mf->size; 218 mf->size = mf->keep_size_before + reserve + mf->keep_size_after; 219 220 // Deallocate the old history buffer if it exists but has different 221 // size than what is needed now. 222 if (mf->buffer != NULL && old_size != mf->size) { 223 lzma_free(mf->buffer, allocator); 224 mf->buffer = NULL; 225 } 226 227 // Match finder options 228 mf->match_len_max = lz_options->match_len_max; 229 mf->nice_len = lz_options->nice_len; 230 231 // cyclic_size has to stay smaller than 2 Gi. Note that this doesn't 232 // mean limiting dictionary size to less than 2 GiB. With a match 233 // finder that uses multibyte resolution (hashes start at e.g. every 234 // fourth byte), cyclic_size would stay below 2 Gi even when 235 // dictionary size is greater than 2 GiB. 236 // 237 // It would be possible to allow cyclic_size >= 2 Gi, but then we 238 // would need to be careful to use 64-bit types in various places 239 // (size_t could do since we would need bigger than 32-bit address 240 // space anyway). It would also require either zeroing a multigigabyte 241 // buffer at initialization (waste of time and RAM) or allow 242 // normalization in lz_encoder_mf.c to access uninitialized 243 // memory to keep the code simpler. The current way is simple and 244 // still allows pretty big dictionaries, so I don't expect these 245 // limits to change. 246 mf->cyclic_size = lz_options->dict_size + 1; 247 248 // Validate the match finder ID and setup the function pointers. 249 switch (lz_options->match_finder) { 250#ifdef HAVE_MF_HC3 251 case LZMA_MF_HC3: 252 mf->find = &lzma_mf_hc3_find; 253 mf->skip = &lzma_mf_hc3_skip; 254 break; 255#endif 256#ifdef HAVE_MF_HC4 257 case LZMA_MF_HC4: 258 mf->find = &lzma_mf_hc4_find; 259 mf->skip = &lzma_mf_hc4_skip; 260 break; 261#endif 262#ifdef HAVE_MF_BT2 263 case LZMA_MF_BT2: 264 mf->find = &lzma_mf_bt2_find; 265 mf->skip = &lzma_mf_bt2_skip; 266 break; 267#endif 268#ifdef HAVE_MF_BT3 269 case LZMA_MF_BT3: 270 mf->find = &lzma_mf_bt3_find; 271 mf->skip = &lzma_mf_bt3_skip; 272 break; 273#endif 274#ifdef HAVE_MF_BT4 275 case LZMA_MF_BT4: 276 mf->find = &lzma_mf_bt4_find; 277 mf->skip = &lzma_mf_bt4_skip; 278 break; 279#endif 280 281 default: 282 return true; 283 } 284 285 // Calculate the sizes of mf->hash and mf->son and check that 286 // nice_len is big enough for the selected match finder. 287 const uint32_t hash_bytes = lz_options->match_finder & 0x0F; 288 if (hash_bytes > mf->nice_len) 289 return true; 290 291 const bool is_bt = (lz_options->match_finder & 0x10) != 0; 292 uint32_t hs; 293 294 if (hash_bytes == 2) { 295 hs = 0xFFFF; 296 } else { 297 // Round dictionary size up to the next 2^n - 1 so it can 298 // be used as a hash mask. 299 hs = lz_options->dict_size - 1; 300 hs |= hs >> 1; 301 hs |= hs >> 2; 302 hs |= hs >> 4; 303 hs |= hs >> 8; 304 hs >>= 1; 305 hs |= 0xFFFF; 306 307 if (hs > (UINT32_C(1) << 24)) { 308 if (hash_bytes == 3) 309 hs = (UINT32_C(1) << 24) - 1; 310 else 311 hs >>= 1; 312 } 313 } 314 315 mf->hash_mask = hs; 316 317 ++hs; 318 if (hash_bytes > 2) 319 hs += HASH_2_SIZE; 320 if (hash_bytes > 3) 321 hs += HASH_3_SIZE; 322/* 323 No match finder uses this at the moment. 324 if (mf->hash_bytes > 4) 325 hs += HASH_4_SIZE; 326*/ 327 328 // If the above code calculating hs is modified, make sure that 329 // this assertion stays valid (UINT32_MAX / 5 is not strictly the 330 // exact limit). If it doesn't, you need to calculate that 331 // hash_size_sum + sons_count cannot overflow. 332 assert(hs < UINT32_MAX / 5); 333 334 const uint32_t old_count = mf->hash_size_sum + mf->sons_count; 335 mf->hash_size_sum = hs; 336 mf->sons_count = mf->cyclic_size; 337 if (is_bt) 338 mf->sons_count *= 2; 339 340 const uint32_t new_count = mf->hash_size_sum + mf->sons_count; 341 342 // Deallocate the old hash array if it exists and has different size 343 // than what is needed now. 344 if (mf->hash != NULL && old_count != new_count) { 345 lzma_free(mf->hash, allocator); 346 mf->hash = NULL; 347 } 348 349 // Maximum number of match finder cycles 350 mf->depth = lz_options->depth; 351 if (mf->depth == 0) { 352 mf->depth = 16 + (mf->nice_len / 2); 353 if (!is_bt) 354 mf->depth /= 2; 355 } 356 357 return false; 358} 359 360 361static bool 362lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator, 363 const lzma_lz_options *lz_options) 364{ 365 // Allocate the history buffer. 366 if (mf->buffer == NULL) { 367 mf->buffer = lzma_alloc(mf->size, allocator); 368 if (mf->buffer == NULL) 369 return true; 370 } 371 372 // Use cyclic_size as initial mf->offset. This allows 373 // avoiding a few branches in the match finders. The downside is 374 // that match finder needs to be normalized more often, which may 375 // hurt performance with huge dictionaries. 376 mf->offset = mf->cyclic_size; 377 mf->read_pos = 0; 378 mf->read_ahead = 0; 379 mf->read_limit = 0; 380 mf->write_pos = 0; 381 mf->pending = 0; 382 383 // Allocate match finder's hash array. 384 const size_t alloc_count = mf->hash_size_sum + mf->sons_count; 385 386#if UINT32_MAX >= SIZE_MAX / 4 387 // Check for integer overflow. (Huge dictionaries are not 388 // possible on 32-bit CPU.) 389 if (alloc_count > SIZE_MAX / sizeof(uint32_t)) 390 return true; 391#endif 392 393 if (mf->hash == NULL) { 394 mf->hash = lzma_alloc(alloc_count * sizeof(uint32_t), 395 allocator); 396 if (mf->hash == NULL) 397 return true; 398 } 399 400 mf->son = mf->hash + mf->hash_size_sum; 401 mf->cyclic_pos = 0; 402 403 // Initialize the hash table. Since EMPTY_HASH_VALUE is zero, we 404 // can use memset(). 405/* 406 for (uint32_t i = 0; i < hash_size_sum; ++i) 407 mf->hash[i] = EMPTY_HASH_VALUE; 408*/ 409 memzero(mf->hash, (size_t)(mf->hash_size_sum) * sizeof(uint32_t)); 410 411 // We don't need to initialize mf->son, but not doing that will 412 // make Valgrind complain in normalization (see normalize() in 413 // lz_encoder_mf.c). 414 // 415 // Skipping this initialization is *very* good when big dictionary is 416 // used but only small amount of data gets actually compressed: most 417 // of the mf->hash won't get actually allocated by the kernel, so 418 // we avoid wasting RAM and improve initialization speed a lot. 419 //memzero(mf->son, (size_t)(mf->sons_count) * sizeof(uint32_t)); 420 421 // Handle preset dictionary. 422 if (lz_options->preset_dict != NULL 423 && lz_options->preset_dict_size > 0) { 424 // If the preset dictionary is bigger than the actual 425 // dictionary, use only the tail. 426 mf->write_pos = MIN(lz_options->preset_dict_size, mf->size); 427 memcpy(mf->buffer, lz_options->preset_dict 428 + lz_options->preset_dict_size - mf->write_pos, 429 mf->write_pos); 430 mf->action = LZMA_SYNC_FLUSH; 431 mf->skip(mf, mf->write_pos); 432 } 433 434 mf->action = LZMA_RUN; 435 436 return false; 437} 438 439 440extern uint64_t 441lzma_lz_encoder_memusage(const lzma_lz_options *lz_options) 442{ 443 // Old buffers must not exist when calling lz_encoder_prepare(). 444 lzma_mf mf = { 445 .buffer = NULL, 446 .hash = NULL, 447 }; 448 449 // Setup the size information into mf. 450 if (lz_encoder_prepare(&mf, NULL, lz_options)) 451 return UINT64_MAX; 452 453 // Calculate the memory usage. 454 return (uint64_t)(mf.hash_size_sum + mf.sons_count) 455 * sizeof(uint32_t) 456 + (uint64_t)(mf.size) + sizeof(lzma_coder); 457} 458 459 460static void 461lz_encoder_end(lzma_coder *coder, lzma_allocator *allocator) 462{ 463 lzma_next_end(&coder->next, allocator); 464 465 lzma_free(coder->mf.hash, allocator); 466 lzma_free(coder->mf.buffer, allocator); 467 468 if (coder->lz.end != NULL) 469 coder->lz.end(coder->lz.coder, allocator); 470 else 471 lzma_free(coder->lz.coder, allocator); 472 473 lzma_free(coder, allocator); 474 return; 475} 476 477 478static lzma_ret 479lz_encoder_update(lzma_coder *coder, lzma_allocator *allocator, 480 const lzma_filter *filters_null lzma_attribute((unused)), 481 const lzma_filter *reversed_filters) 482{ 483 if (coder->lz.options_update == NULL) 484 return LZMA_PROG_ERROR; 485 486 return_if_error(coder->lz.options_update( 487 coder->lz.coder, reversed_filters)); 488 489 return lzma_next_filter_update( 490 &coder->next, allocator, reversed_filters + 1); 491} 492 493 494extern lzma_ret 495lzma_lz_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, 496 const lzma_filter_info *filters, 497 lzma_ret (*lz_init)(lzma_lz_encoder *lz, 498 lzma_allocator *allocator, const void *options, 499 lzma_lz_options *lz_options)) 500{ 501#ifdef HAVE_SMALL 502 // We need that the CRC32 table has been initialized. 503 lzma_crc32_init(); 504#endif 505 506 // Allocate and initialize the base data structure. 507 if (next->coder == NULL) { 508 next->coder = lzma_alloc(sizeof(lzma_coder), allocator); 509 if (next->coder == NULL) 510 return LZMA_MEM_ERROR; 511 512 next->code = &lz_encode; 513 next->end = &lz_encoder_end; 514 next->update = &lz_encoder_update; 515 516 next->coder->lz.coder = NULL; 517 next->coder->lz.code = NULL; 518 next->coder->lz.end = NULL; 519 520 next->coder->mf.buffer = NULL; 521 next->coder->mf.hash = NULL; 522 523 next->coder->next = LZMA_NEXT_CODER_INIT; 524 } 525 526 // Initialize the LZ-based encoder. 527 lzma_lz_options lz_options; 528 return_if_error(lz_init(&next->coder->lz, allocator, 529 filters[0].options, &lz_options)); 530 531 // Setup the size information into next->coder->mf and deallocate 532 // old buffers if they have wrong size. 533 if (lz_encoder_prepare(&next->coder->mf, allocator, &lz_options)) 534 return LZMA_OPTIONS_ERROR; 535 536 // Allocate new buffers if needed, and do the rest of 537 // the initialization. 538 if (lz_encoder_init(&next->coder->mf, allocator, &lz_options)) 539 return LZMA_MEM_ERROR; 540 541 // Initialize the next filter in the chain, if any. 542 return lzma_next_filter_init(&next->coder->next, allocator, 543 filters + 1); 544} 545 546 547extern LZMA_API(lzma_bool) 548lzma_mf_is_supported(lzma_match_finder mf) 549{ 550 bool ret = false; 551 552#ifdef HAVE_MF_HC3 553 if (mf == LZMA_MF_HC3) 554 ret = true; 555#endif 556 557#ifdef HAVE_MF_HC4 558 if (mf == LZMA_MF_HC4) 559 ret = true; 560#endif 561 562#ifdef HAVE_MF_BT2 563 if (mf == LZMA_MF_BT2) 564 ret = true; 565#endif 566 567#ifdef HAVE_MF_BT3 568 if (mf == LZMA_MF_BT3) 569 ret = true; 570#endif 571 572#ifdef HAVE_MF_BT4 573 if (mf == LZMA_MF_BT4) 574 ret = true; 575#endif 576 577 return ret; 578} 579