lzma_encoder.c revision 207753
1254885Sdumbbell/////////////////////////////////////////////////////////////////////////////// 2254885Sdumbbell// 3254885Sdumbbell/// \file lzma_encoder.c 4254885Sdumbbell/// \brief LZMA encoder 5254885Sdumbbell/// 6254885Sdumbbell// Authors: Igor Pavlov 7254885Sdumbbell// Lasse Collin 8254885Sdumbbell// 9254885Sdumbbell// This file has been put into the public domain. 10254885Sdumbbell// You can do whatever you want with this file. 11254885Sdumbbell// 12254885Sdumbbell/////////////////////////////////////////////////////////////////////////////// 13254885Sdumbbell 14254885Sdumbbell#include "lzma2_encoder.h" 15254885Sdumbbell#include "lzma_encoder_private.h" 16254885Sdumbbell#include "fastpos.h" 17254885Sdumbbell 18254885Sdumbbell 19254885Sdumbbell///////////// 20254885Sdumbbell// Literal // 21254885Sdumbbell///////////// 22254885Sdumbbell 23254885Sdumbbellstatic inline void 24254885Sdumbbellliteral_matched(lzma_range_encoder *rc, probability *subcoder, 25254885Sdumbbell uint32_t match_byte, uint32_t symbol) 26254885Sdumbbell{ 27254885Sdumbbell uint32_t offset = 0x100; 28254885Sdumbbell symbol += UINT32_C(1) << 8; 29254885Sdumbbell 30254885Sdumbbell do { 31254885Sdumbbell match_byte <<= 1; 32254885Sdumbbell const uint32_t match_bit = match_byte & offset; 33254885Sdumbbell const uint32_t subcoder_index 34254885Sdumbbell = offset + match_bit + (symbol >> 8); 35254885Sdumbbell const uint32_t bit = (symbol >> 7) & 1; 36254885Sdumbbell rc_bit(rc, &subcoder[subcoder_index], bit); 37254885Sdumbbell 38254885Sdumbbell symbol <<= 1; 39254885Sdumbbell offset &= ~(match_byte ^ symbol); 40254885Sdumbbell 41254885Sdumbbell } while (symbol < (UINT32_C(1) << 16)); 42254885Sdumbbell} 43254885Sdumbbell 44254885Sdumbbell 45254885Sdumbbellstatic inline void 46254885Sdumbbellliteral(lzma_coder *coder, lzma_mf *mf, uint32_t position) 47254885Sdumbbell{ 48254885Sdumbbell // Locate the literal byte to be encoded and the subcoder. 49254885Sdumbbell const uint8_t cur_byte = mf->buffer[ 50254885Sdumbbell mf->read_pos - mf->read_ahead]; 51254885Sdumbbell probability *subcoder = literal_subcoder(coder->literal, 52254885Sdumbbell coder->literal_context_bits, coder->literal_pos_mask, 53254885Sdumbbell position, mf->buffer[mf->read_pos - mf->read_ahead - 1]); 54254885Sdumbbell 55254885Sdumbbell if (is_literal_state(coder->state)) { 56254885Sdumbbell // Previous LZMA-symbol was a literal. Encode a normal 57254885Sdumbbell // literal without a match byte. 58254885Sdumbbell rc_bittree(&coder->rc, subcoder, 8, cur_byte); 59254885Sdumbbell } else { 60254885Sdumbbell // Previous LZMA-symbol was a match. Use the last byte of 61254885Sdumbbell // the match as a "match byte". That is, compare the bits 62254885Sdumbbell // of the current literal and the match byte. 63254885Sdumbbell const uint8_t match_byte = mf->buffer[ 64254885Sdumbbell mf->read_pos - coder->reps[0] - 1 65254885Sdumbbell - mf->read_ahead]; 66254885Sdumbbell literal_matched(&coder->rc, subcoder, match_byte, cur_byte); 67254885Sdumbbell } 68254885Sdumbbell 69254885Sdumbbell update_literal(coder->state); 70254885Sdumbbell} 71254885Sdumbbell 72254885Sdumbbell 73254885Sdumbbell////////////////// 74254885Sdumbbell// Match length // 75254885Sdumbbell////////////////// 76254885Sdumbbell 77254885Sdumbbellstatic void 78254885Sdumbbelllength_update_prices(lzma_length_encoder *lc, const uint32_t pos_state) 79254885Sdumbbell{ 80254885Sdumbbell const uint32_t table_size = lc->table_size; 81254885Sdumbbell lc->counters[pos_state] = table_size; 82254885Sdumbbell 83254885Sdumbbell const uint32_t a0 = rc_bit_0_price(lc->choice); 84254885Sdumbbell const uint32_t a1 = rc_bit_1_price(lc->choice); 85254885Sdumbbell const uint32_t b0 = a1 + rc_bit_0_price(lc->choice2); 86254885Sdumbbell const uint32_t b1 = a1 + rc_bit_1_price(lc->choice2); 87254885Sdumbbell uint32_t *const prices = lc->prices[pos_state]; 88254885Sdumbbell 89254885Sdumbbell uint32_t i; 90254885Sdumbbell for (i = 0; i < table_size && i < LEN_LOW_SYMBOLS; ++i) 91254885Sdumbbell prices[i] = a0 + rc_bittree_price(lc->low[pos_state], 92254885Sdumbbell LEN_LOW_BITS, i); 93254885Sdumbbell 94254885Sdumbbell for (; i < table_size && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i) 95254885Sdumbbell prices[i] = b0 + rc_bittree_price(lc->mid[pos_state], 96254885Sdumbbell LEN_MID_BITS, i - LEN_LOW_SYMBOLS); 97254885Sdumbbell 98254885Sdumbbell for (; i < table_size; ++i) 99254885Sdumbbell prices[i] = b1 + rc_bittree_price(lc->high, LEN_HIGH_BITS, 100254885Sdumbbell i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS); 101254885Sdumbbell 102254885Sdumbbell return; 103254885Sdumbbell} 104254885Sdumbbell 105254885Sdumbbell 106254885Sdumbbellstatic inline void 107254885Sdumbbelllength(lzma_range_encoder *rc, lzma_length_encoder *lc, 108254885Sdumbbell const uint32_t pos_state, uint32_t len, const bool fast_mode) 109254885Sdumbbell{ 110254885Sdumbbell assert(len <= MATCH_LEN_MAX); 111254885Sdumbbell len -= MATCH_LEN_MIN; 112254885Sdumbbell 113254885Sdumbbell if (len < LEN_LOW_SYMBOLS) { 114254885Sdumbbell rc_bit(rc, &lc->choice, 0); 115254885Sdumbbell rc_bittree(rc, lc->low[pos_state], LEN_LOW_BITS, len); 116254885Sdumbbell } else { 117254885Sdumbbell rc_bit(rc, &lc->choice, 1); 118254885Sdumbbell len -= LEN_LOW_SYMBOLS; 119254885Sdumbbell 120254885Sdumbbell if (len < LEN_MID_SYMBOLS) { 121254885Sdumbbell rc_bit(rc, &lc->choice2, 0); 122254885Sdumbbell rc_bittree(rc, lc->mid[pos_state], LEN_MID_BITS, len); 123254885Sdumbbell } else { 124254885Sdumbbell rc_bit(rc, &lc->choice2, 1); 125254885Sdumbbell len -= LEN_MID_SYMBOLS; 126254885Sdumbbell rc_bittree(rc, lc->high, LEN_HIGH_BITS, len); 127254885Sdumbbell } 128254885Sdumbbell } 129254885Sdumbbell 130254885Sdumbbell // Only getoptimum uses the prices so don't update the table when 131254885Sdumbbell // in fast mode. 132254885Sdumbbell if (!fast_mode) 133254885Sdumbbell if (--lc->counters[pos_state] == 0) 134254885Sdumbbell length_update_prices(lc, pos_state); 135254885Sdumbbell} 136254885Sdumbbell 137254885Sdumbbell 138254885Sdumbbell/////////// 139254885Sdumbbell// Match // 140254885Sdumbbell/////////// 141254885Sdumbbell 142254885Sdumbbellstatic inline void 143254885Sdumbbellmatch(lzma_coder *coder, const uint32_t pos_state, 144254885Sdumbbell const uint32_t distance, const uint32_t len) 145254885Sdumbbell{ 146254885Sdumbbell update_match(coder->state); 147254885Sdumbbell 148254885Sdumbbell length(&coder->rc, &coder->match_len_encoder, pos_state, len, 149254885Sdumbbell coder->fast_mode); 150254885Sdumbbell 151254885Sdumbbell const uint32_t pos_slot = get_pos_slot(distance); 152254885Sdumbbell const uint32_t len_to_pos_state = get_len_to_pos_state(len); 153254885Sdumbbell rc_bittree(&coder->rc, coder->pos_slot[len_to_pos_state], 154254885Sdumbbell POS_SLOT_BITS, pos_slot); 155254885Sdumbbell 156254885Sdumbbell if (pos_slot >= START_POS_MODEL_INDEX) { 157254885Sdumbbell const uint32_t footer_bits = (pos_slot >> 1) - 1; 158254885Sdumbbell const uint32_t base = (2 | (pos_slot & 1)) << footer_bits; 159254885Sdumbbell const uint32_t pos_reduced = distance - base; 160254885Sdumbbell 161254885Sdumbbell if (pos_slot < END_POS_MODEL_INDEX) { 162254885Sdumbbell // Careful here: base - pos_slot - 1 can be -1, but 163254885Sdumbbell // rc_bittree_reverse starts at probs[1], not probs[0]. 164254885Sdumbbell rc_bittree_reverse(&coder->rc, 165254885Sdumbbell coder->pos_special + base - pos_slot - 1, 166254885Sdumbbell footer_bits, pos_reduced); 167254885Sdumbbell } else { 168254885Sdumbbell rc_direct(&coder->rc, pos_reduced >> ALIGN_BITS, 169254885Sdumbbell footer_bits - ALIGN_BITS); 170254885Sdumbbell rc_bittree_reverse( 171254885Sdumbbell &coder->rc, coder->pos_align, 172254885Sdumbbell ALIGN_BITS, pos_reduced & ALIGN_MASK); 173254885Sdumbbell ++coder->align_price_count; 174254885Sdumbbell } 175254885Sdumbbell } 176254885Sdumbbell 177254885Sdumbbell coder->reps[3] = coder->reps[2]; 178254885Sdumbbell coder->reps[2] = coder->reps[1]; 179254885Sdumbbell coder->reps[1] = coder->reps[0]; 180254885Sdumbbell coder->reps[0] = distance; 181254885Sdumbbell ++coder->match_price_count; 182254885Sdumbbell} 183254885Sdumbbell 184254885Sdumbbell 185254885Sdumbbell//////////////////// 186254885Sdumbbell// Repeated match // 187254885Sdumbbell//////////////////// 188254885Sdumbbell 189254885Sdumbbellstatic inline void 190254885Sdumbbellrep_match(lzma_coder *coder, const uint32_t pos_state, 191254885Sdumbbell const uint32_t rep, const uint32_t len) 192254885Sdumbbell{ 193254885Sdumbbell if (rep == 0) { 194254885Sdumbbell rc_bit(&coder->rc, &coder->is_rep0[coder->state], 0); 195254885Sdumbbell rc_bit(&coder->rc, 196254885Sdumbbell &coder->is_rep0_long[coder->state][pos_state], 197254885Sdumbbell len != 1); 198254885Sdumbbell } else { 199254885Sdumbbell const uint32_t distance = coder->reps[rep]; 200254885Sdumbbell rc_bit(&coder->rc, &coder->is_rep0[coder->state], 1); 201254885Sdumbbell 202254885Sdumbbell if (rep == 1) { 203254885Sdumbbell rc_bit(&coder->rc, &coder->is_rep1[coder->state], 0); 204254885Sdumbbell } else { 205254885Sdumbbell rc_bit(&coder->rc, &coder->is_rep1[coder->state], 1); 206254885Sdumbbell rc_bit(&coder->rc, &coder->is_rep2[coder->state], 207254885Sdumbbell rep - 2); 208254885Sdumbbell 209254885Sdumbbell if (rep == 3) 210254885Sdumbbell coder->reps[3] = coder->reps[2]; 211254885Sdumbbell 212254885Sdumbbell coder->reps[2] = coder->reps[1]; 213254885Sdumbbell } 214254885Sdumbbell 215254885Sdumbbell coder->reps[1] = coder->reps[0]; 216254885Sdumbbell coder->reps[0] = distance; 217254885Sdumbbell } 218254885Sdumbbell 219254885Sdumbbell if (len == 1) { 220254885Sdumbbell update_short_rep(coder->state); 221254885Sdumbbell } else { 222254885Sdumbbell length(&coder->rc, &coder->rep_len_encoder, pos_state, len, 223254885Sdumbbell coder->fast_mode); 224254885Sdumbbell update_long_rep(coder->state); 225254885Sdumbbell } 226254885Sdumbbell} 227254885Sdumbbell 228254885Sdumbbell 229254885Sdumbbell////////// 230254885Sdumbbell// Main // 231254885Sdumbbell////////// 232254885Sdumbbell 233254885Sdumbbellstatic void 234254885Sdumbbellencode_symbol(lzma_coder *coder, lzma_mf *mf, 235254885Sdumbbell uint32_t back, uint32_t len, uint32_t position) 236254885Sdumbbell{ 237254885Sdumbbell const uint32_t pos_state = position & coder->pos_mask; 238254885Sdumbbell 239254885Sdumbbell if (back == UINT32_MAX) { 240254885Sdumbbell // Literal i.e. eight-bit byte 241254885Sdumbbell assert(len == 1); 242254885Sdumbbell rc_bit(&coder->rc, 243254885Sdumbbell &coder->is_match[coder->state][pos_state], 0); 244254885Sdumbbell literal(coder, mf, position); 245254885Sdumbbell } else { 246254885Sdumbbell // Some type of match 247254885Sdumbbell rc_bit(&coder->rc, 248254885Sdumbbell &coder->is_match[coder->state][pos_state], 1); 249254885Sdumbbell 250254885Sdumbbell if (back < REP_DISTANCES) { 251254885Sdumbbell // It's a repeated match i.e. the same distance 252254885Sdumbbell // has been used earlier. 253254885Sdumbbell rc_bit(&coder->rc, &coder->is_rep[coder->state], 1); 254254885Sdumbbell rep_match(coder, pos_state, back, len); 255254885Sdumbbell } else { 256254885Sdumbbell // Normal match 257254885Sdumbbell rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); 258254885Sdumbbell match(coder, pos_state, back - REP_DISTANCES, len); 259254885Sdumbbell } 260254885Sdumbbell } 261254885Sdumbbell 262254885Sdumbbell assert(mf->read_ahead >= len); 263254885Sdumbbell mf->read_ahead -= len; 264254885Sdumbbell} 265254885Sdumbbell 266254885Sdumbbell 267254885Sdumbbellstatic bool 268254885Sdumbbellencode_init(lzma_coder *coder, lzma_mf *mf) 269254885Sdumbbell{ 270254885Sdumbbell assert(mf_position(mf) == 0); 271254885Sdumbbell 272254885Sdumbbell if (mf->read_pos == mf->read_limit) { 273254885Sdumbbell if (mf->action == LZMA_RUN) 274254885Sdumbbell return false; // We cannot do anything. 275254885Sdumbbell 276254885Sdumbbell // We are finishing (we cannot get here when flushing). 277254885Sdumbbell assert(mf->write_pos == mf->read_pos); 278254885Sdumbbell assert(mf->action == LZMA_FINISH); 279254885Sdumbbell } else { 280254885Sdumbbell // Do the actual initialization. The first LZMA symbol must 281254885Sdumbbell // always be a literal. 282254885Sdumbbell mf_skip(mf, 1); 283254885Sdumbbell mf->read_ahead = 0; 284254885Sdumbbell rc_bit(&coder->rc, &coder->is_match[0][0], 0); 285254885Sdumbbell rc_bittree(&coder->rc, coder->literal[0], 8, mf->buffer[0]); 286254885Sdumbbell } 287254885Sdumbbell 288254885Sdumbbell // Initialization is done (except if empty file). 289254885Sdumbbell coder->is_initialized = true; 290254885Sdumbbell 291254885Sdumbbell return true; 292254885Sdumbbell} 293254885Sdumbbell 294254885Sdumbbell 295254885Sdumbbellstatic void 296254885Sdumbbellencode_eopm(lzma_coder *coder, uint32_t position) 297254885Sdumbbell{ 298254885Sdumbbell const uint32_t pos_state = position & coder->pos_mask; 299254885Sdumbbell rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 1); 300254885Sdumbbell rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); 301254885Sdumbbell match(coder, pos_state, UINT32_MAX, MATCH_LEN_MIN); 302254885Sdumbbell} 303254885Sdumbbell 304254885Sdumbbell 305254885Sdumbbell/// Number of bytes that a single encoding loop in lzma_lzma_encode() can 306254885Sdumbbell/// consume from the dictionary. This limit comes from lzma_lzma_optimum() 307254885Sdumbbell/// and may need to be updated if that function is significantly modified. 308254885Sdumbbell#define LOOP_INPUT_MAX (OPTS + 1) 309254885Sdumbbell 310254885Sdumbbell 311254885Sdumbbellextern lzma_ret 312254885Sdumbbelllzma_lzma_encode(lzma_coder *restrict coder, lzma_mf *restrict mf, 313254885Sdumbbell uint8_t *restrict out, size_t *restrict out_pos, 314254885Sdumbbell size_t out_size, uint32_t limit) 315254885Sdumbbell{ 316254885Sdumbbell // Initialize the stream if no data has been encoded yet. 317254885Sdumbbell if (!coder->is_initialized && !encode_init(coder, mf)) 318254885Sdumbbell return LZMA_OK; 319254885Sdumbbell 320254885Sdumbbell // Get the lowest bits of the uncompressed offset from the LZ layer. 321254885Sdumbbell uint32_t position = mf_position(mf); 322254885Sdumbbell 323254885Sdumbbell while (true) { 324254885Sdumbbell // Encode pending bits, if any. Calling this before encoding 325254885Sdumbbell // the next symbol is needed only with plain LZMA, since 326254885Sdumbbell // LZMA2 always provides big enough buffer to flush 327254885Sdumbbell // everything out from the range encoder. For the same reason, 328254885Sdumbbell // rc_encode() never returns true when this function is used 329254885Sdumbbell // as part of LZMA2 encoder. 330254885Sdumbbell if (rc_encode(&coder->rc, out, out_pos, out_size)) { 331254885Sdumbbell assert(limit == UINT32_MAX); 332254885Sdumbbell return LZMA_OK; 333254885Sdumbbell } 334254885Sdumbbell 335254885Sdumbbell // With LZMA2 we need to take care that compressed size of 336254885Sdumbbell // a chunk doesn't get too big. 337254885Sdumbbell // TODO 338254885Sdumbbell if (limit != UINT32_MAX 339254885Sdumbbell && (mf->read_pos - mf->read_ahead >= limit 340254885Sdumbbell || *out_pos + rc_pending(&coder->rc) 341254885Sdumbbell >= LZMA2_CHUNK_MAX 342254885Sdumbbell - LOOP_INPUT_MAX)) 343254885Sdumbbell break; 344254885Sdumbbell 345254885Sdumbbell // Check that there is some input to process. 346254885Sdumbbell if (mf->read_pos >= mf->read_limit) { 347254885Sdumbbell if (mf->action == LZMA_RUN) 348254885Sdumbbell return LZMA_OK; 349254885Sdumbbell 350254885Sdumbbell if (mf->read_ahead == 0) 351254885Sdumbbell break; 352254885Sdumbbell } 353254885Sdumbbell 354254885Sdumbbell // Get optimal match (repeat position and length). 355254885Sdumbbell // Value ranges for pos: 356254885Sdumbbell // - [0, REP_DISTANCES): repeated match 357254885Sdumbbell // - [REP_DISTANCES, UINT32_MAX): 358254885Sdumbbell // match at (pos - REP_DISTANCES) 359254885Sdumbbell // - UINT32_MAX: not a match but a literal 360254885Sdumbbell // Value ranges for len: 361254885Sdumbbell // - [MATCH_LEN_MIN, MATCH_LEN_MAX] 362254885Sdumbbell uint32_t len; 363254885Sdumbbell uint32_t back; 364254885Sdumbbell 365254885Sdumbbell if (coder->fast_mode) 366254885Sdumbbell lzma_lzma_optimum_fast(coder, mf, &back, &len); 367254885Sdumbbell else 368254885Sdumbbell lzma_lzma_optimum_normal( 369254885Sdumbbell coder, mf, &back, &len, position); 370254885Sdumbbell 371254885Sdumbbell encode_symbol(coder, mf, back, len, position); 372254885Sdumbbell 373254885Sdumbbell position += len; 374254885Sdumbbell } 375254885Sdumbbell 376254885Sdumbbell if (!coder->is_flushed) { 377254885Sdumbbell coder->is_flushed = true; 378254885Sdumbbell 379254885Sdumbbell // We don't support encoding plain LZMA streams without EOPM, 380254885Sdumbbell // and LZMA2 doesn't use EOPM at LZMA level. 381254885Sdumbbell if (limit == UINT32_MAX) 382254885Sdumbbell encode_eopm(coder, position); 383254885Sdumbbell 384254885Sdumbbell // Flush the remaining bytes from the range encoder. 385254885Sdumbbell rc_flush(&coder->rc); 386254885Sdumbbell 387254885Sdumbbell // Copy the remaining bytes to the output buffer. If there 388254885Sdumbbell // isn't enough output space, we will copy out the remaining 389254885Sdumbbell // bytes on the next call to this function by using 390254885Sdumbbell // the rc_encode() call in the encoding loop above. 391254885Sdumbbell if (rc_encode(&coder->rc, out, out_pos, out_size)) { 392254885Sdumbbell assert(limit == UINT32_MAX); 393254885Sdumbbell return LZMA_OK; 394254885Sdumbbell } 395254885Sdumbbell } 396254885Sdumbbell 397254885Sdumbbell // Make it ready for the next LZMA2 chunk. 398254885Sdumbbell coder->is_flushed = false; 399254885Sdumbbell 400254885Sdumbbell return LZMA_STREAM_END; 401254885Sdumbbell} 402254885Sdumbbell 403254885Sdumbbell 404254885Sdumbbellstatic lzma_ret 405254885Sdumbbelllzma_encode(lzma_coder *restrict coder, lzma_mf *restrict mf, 406254885Sdumbbell uint8_t *restrict out, size_t *restrict out_pos, 407254885Sdumbbell size_t out_size) 408254885Sdumbbell{ 409254885Sdumbbell // Plain LZMA has no support for sync-flushing. 410254885Sdumbbell if (unlikely(mf->action == LZMA_SYNC_FLUSH)) 411254885Sdumbbell return LZMA_OPTIONS_ERROR; 412254885Sdumbbell 413254885Sdumbbell return lzma_lzma_encode(coder, mf, out, out_pos, out_size, UINT32_MAX); 414254885Sdumbbell} 415254885Sdumbbell 416254885Sdumbbell 417254885Sdumbbell//////////////////// 418254885Sdumbbell// Initialization // 419254885Sdumbbell//////////////////// 420254885Sdumbbell 421254885Sdumbbellstatic bool 422254885Sdumbbellis_options_valid(const lzma_options_lzma *options) 423254885Sdumbbell{ 424254885Sdumbbell // Validate some of the options. LZ encoder validates nice_len too 425254885Sdumbbell // but we need a valid value here earlier. 426254885Sdumbbell return is_lclppb_valid(options) 427254885Sdumbbell && options->nice_len >= MATCH_LEN_MIN 428254885Sdumbbell && options->nice_len <= MATCH_LEN_MAX 429254885Sdumbbell && (options->mode == LZMA_MODE_FAST 430254885Sdumbbell || options->mode == LZMA_MODE_NORMAL); 431254885Sdumbbell} 432254885Sdumbbell 433254885Sdumbbell 434254885Sdumbbellstatic void 435254885Sdumbbellset_lz_options(lzma_lz_options *lz_options, const lzma_options_lzma *options) 436254885Sdumbbell{ 437254885Sdumbbell // LZ encoder initialization does the validation for these so we 438254885Sdumbbell // don't need to validate here. 439254885Sdumbbell lz_options->before_size = OPTS; 440254885Sdumbbell lz_options->dict_size = options->dict_size; 441254885Sdumbbell lz_options->after_size = LOOP_INPUT_MAX; 442254885Sdumbbell lz_options->match_len_max = MATCH_LEN_MAX; 443254885Sdumbbell lz_options->nice_len = options->nice_len; 444254885Sdumbbell lz_options->match_finder = options->mf; 445254885Sdumbbell lz_options->depth = options->depth; 446254885Sdumbbell lz_options->preset_dict = options->preset_dict; 447254885Sdumbbell lz_options->preset_dict_size = options->preset_dict_size; 448254885Sdumbbell return; 449254885Sdumbbell} 450254885Sdumbbell 451254885Sdumbbell 452254885Sdumbbellstatic void 453254885Sdumbbelllength_encoder_reset(lzma_length_encoder *lencoder, 454254885Sdumbbell const uint32_t num_pos_states, const bool fast_mode) 455254885Sdumbbell{ 456254885Sdumbbell bit_reset(lencoder->choice); 457254885Sdumbbell bit_reset(lencoder->choice2); 458254885Sdumbbell 459254885Sdumbbell for (size_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { 460254885Sdumbbell bittree_reset(lencoder->low[pos_state], LEN_LOW_BITS); 461254885Sdumbbell bittree_reset(lencoder->mid[pos_state], LEN_MID_BITS); 462254885Sdumbbell } 463254885Sdumbbell 464254885Sdumbbell bittree_reset(lencoder->high, LEN_HIGH_BITS); 465254885Sdumbbell 466254885Sdumbbell if (!fast_mode) 467254885Sdumbbell for (size_t pos_state = 0; pos_state < num_pos_states; 468254885Sdumbbell ++pos_state) 469254885Sdumbbell length_update_prices(lencoder, pos_state); 470254885Sdumbbell 471254885Sdumbbell return; 472254885Sdumbbell} 473254885Sdumbbell 474254885Sdumbbell 475254885Sdumbbellextern lzma_ret 476254885Sdumbbelllzma_lzma_encoder_reset(lzma_coder *coder, const lzma_options_lzma *options) 477254885Sdumbbell{ 478254885Sdumbbell if (!is_options_valid(options)) 479254885Sdumbbell return LZMA_OPTIONS_ERROR; 480254885Sdumbbell 481254885Sdumbbell coder->pos_mask = (1U << options->pb) - 1; 482254885Sdumbbell coder->literal_context_bits = options->lc; 483254885Sdumbbell coder->literal_pos_mask = (1U << options->lp) - 1; 484254885Sdumbbell 485254885Sdumbbell // Range coder 486254885Sdumbbell rc_reset(&coder->rc); 487254885Sdumbbell 488254885Sdumbbell // State 489254885Sdumbbell coder->state = STATE_LIT_LIT; 490254885Sdumbbell for (size_t i = 0; i < REP_DISTANCES; ++i) 491254885Sdumbbell coder->reps[i] = 0; 492254885Sdumbbell 493254885Sdumbbell literal_init(coder->literal, options->lc, options->lp); 494254885Sdumbbell 495254885Sdumbbell // Bit encoders 496254885Sdumbbell for (size_t i = 0; i < STATES; ++i) { 497254885Sdumbbell for (size_t j = 0; j <= coder->pos_mask; ++j) { 498254885Sdumbbell bit_reset(coder->is_match[i][j]); 499254885Sdumbbell bit_reset(coder->is_rep0_long[i][j]); 500254885Sdumbbell } 501254885Sdumbbell 502254885Sdumbbell bit_reset(coder->is_rep[i]); 503254885Sdumbbell bit_reset(coder->is_rep0[i]); 504254885Sdumbbell bit_reset(coder->is_rep1[i]); 505254885Sdumbbell bit_reset(coder->is_rep2[i]); 506254885Sdumbbell } 507254885Sdumbbell 508254885Sdumbbell for (size_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i) 509254885Sdumbbell bit_reset(coder->pos_special[i]); 510254885Sdumbbell 511254885Sdumbbell // Bit tree encoders 512254885Sdumbbell for (size_t i = 0; i < LEN_TO_POS_STATES; ++i) 513254885Sdumbbell bittree_reset(coder->pos_slot[i], POS_SLOT_BITS); 514254885Sdumbbell 515254885Sdumbbell bittree_reset(coder->pos_align, ALIGN_BITS); 516254885Sdumbbell 517254885Sdumbbell // Length encoders 518254885Sdumbbell length_encoder_reset(&coder->match_len_encoder, 519254885Sdumbbell 1U << options->pb, coder->fast_mode); 520254885Sdumbbell 521254885Sdumbbell length_encoder_reset(&coder->rep_len_encoder, 522254885Sdumbbell 1U << options->pb, coder->fast_mode); 523254885Sdumbbell 524254885Sdumbbell // Price counts are incremented every time appropriate probabilities 525254885Sdumbbell // are changed. price counts are set to zero when the price tables 526254885Sdumbbell // are updated, which is done when the appropriate price counts have 527254885Sdumbbell // big enough value, and lzma_mf.read_ahead == 0 which happens at 528254885Sdumbbell // least every OPTS (a few thousand) possible price count increments. 529254885Sdumbbell // 530254885Sdumbbell // By resetting price counts to UINT32_MAX / 2, we make sure that the 531254885Sdumbbell // price tables will be initialized before they will be used (since 532254885Sdumbbell // the value is definitely big enough), and that it is OK to increment 533254885Sdumbbell // price counts without risk of integer overflow (since UINT32_MAX / 2 534254885Sdumbbell // is small enough). The current code doesn't increment price counts 535254885Sdumbbell // before initializing price tables, but it maybe done in future if 536254885Sdumbbell // we add support for saving the state between LZMA2 chunks. 537254885Sdumbbell coder->match_price_count = UINT32_MAX / 2; 538254885Sdumbbell coder->align_price_count = UINT32_MAX / 2; 539254885Sdumbbell 540254885Sdumbbell coder->opts_end_index = 0; 541254885Sdumbbell coder->opts_current_index = 0; 542254885Sdumbbell 543254885Sdumbbell return LZMA_OK; 544254885Sdumbbell} 545254885Sdumbbell 546254885Sdumbbell 547254885Sdumbbellextern lzma_ret 548254885Sdumbbelllzma_lzma_encoder_create(lzma_coder **coder_ptr, lzma_allocator *allocator, 549254885Sdumbbell const lzma_options_lzma *options, lzma_lz_options *lz_options) 550254885Sdumbbell{ 551254885Sdumbbell // Allocate lzma_coder if it wasn't already allocated. 552254885Sdumbbell if (*coder_ptr == NULL) { 553254885Sdumbbell *coder_ptr = lzma_alloc(sizeof(lzma_coder), allocator); 554254885Sdumbbell if (*coder_ptr == NULL) 555254885Sdumbbell return LZMA_MEM_ERROR; 556254885Sdumbbell } 557254885Sdumbbell 558254885Sdumbbell lzma_coder *coder = *coder_ptr; 559254885Sdumbbell 560254885Sdumbbell // Set compression mode. We haven't validates the options yet, 561254885Sdumbbell // but it's OK here, since nothing bad happens with invalid 562254885Sdumbbell // options in the code below, and they will get rejected by 563254885Sdumbbell // lzma_lzma_encoder_reset() call at the end of this function. 564254885Sdumbbell switch (options->mode) { 565254885Sdumbbell case LZMA_MODE_FAST: 566254885Sdumbbell coder->fast_mode = true; 567254885Sdumbbell break; 568254885Sdumbbell 569254885Sdumbbell case LZMA_MODE_NORMAL: { 570254885Sdumbbell coder->fast_mode = false; 571254885Sdumbbell 572254885Sdumbbell // Set dist_table_size. 573254885Sdumbbell // Round the dictionary size up to next 2^n. 574254885Sdumbbell uint32_t log_size = 0; 575254885Sdumbbell while ((UINT32_C(1) << log_size) < options->dict_size) 576254885Sdumbbell ++log_size; 577254885Sdumbbell 578254885Sdumbbell coder->dist_table_size = log_size * 2; 579254885Sdumbbell 580254885Sdumbbell // Length encoders' price table size 581254885Sdumbbell coder->match_len_encoder.table_size 582254885Sdumbbell = options->nice_len + 1 - MATCH_LEN_MIN; 583254885Sdumbbell coder->rep_len_encoder.table_size 584254885Sdumbbell = options->nice_len + 1 - MATCH_LEN_MIN; 585254885Sdumbbell break; 586254885Sdumbbell } 587254885Sdumbbell 588254885Sdumbbell default: 589254885Sdumbbell return LZMA_OPTIONS_ERROR; 590254885Sdumbbell } 591254885Sdumbbell 592254885Sdumbbell // We don't need to write the first byte as literal if there is 593254885Sdumbbell // a non-empty preset dictionary. encode_init() wouldn't even work 594254885Sdumbbell // if there is a non-empty preset dictionary, because encode_init() 595254885Sdumbbell // assumes that position is zero and previous byte is also zero. 596254885Sdumbbell coder->is_initialized = options->preset_dict != NULL 597254885Sdumbbell && options->preset_dict_size > 0; 598254885Sdumbbell coder->is_flushed = false; 599254885Sdumbbell 600254885Sdumbbell set_lz_options(lz_options, options); 601254885Sdumbbell 602254885Sdumbbell return lzma_lzma_encoder_reset(coder, options); 603254885Sdumbbell} 604254885Sdumbbell 605254885Sdumbbell 606254885Sdumbbellstatic lzma_ret 607254885Sdumbbelllzma_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator, 608254885Sdumbbell const void *options, lzma_lz_options *lz_options) 609254885Sdumbbell{ 610254885Sdumbbell lz->code = &lzma_encode; 611254885Sdumbbell return lzma_lzma_encoder_create( 612254885Sdumbbell &lz->coder, allocator, options, lz_options); 613254885Sdumbbell} 614254885Sdumbbell 615254885Sdumbbell 616254885Sdumbbellextern lzma_ret 617254885Sdumbbelllzma_lzma_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, 618254885Sdumbbell const lzma_filter_info *filters) 619254885Sdumbbell{ 620254885Sdumbbell return lzma_lz_encoder_init( 621254885Sdumbbell next, allocator, filters, &lzma_encoder_init); 622254885Sdumbbell} 623254885Sdumbbell 624254885Sdumbbell 625254885Sdumbbellextern uint64_t 626254885Sdumbbelllzma_lzma_encoder_memusage(const void *options) 627254885Sdumbbell{ 628254885Sdumbbell if (!is_options_valid(options)) 629254885Sdumbbell return UINT64_MAX; 630254885Sdumbbell 631254885Sdumbbell lzma_lz_options lz_options; 632254885Sdumbbell set_lz_options(&lz_options, options); 633254885Sdumbbell 634254885Sdumbbell const uint64_t lz_memusage = lzma_lz_encoder_memusage(&lz_options); 635254885Sdumbbell if (lz_memusage == UINT64_MAX) 636254885Sdumbbell return UINT64_MAX; 637254885Sdumbbell 638254885Sdumbbell return (uint64_t)(sizeof(lzma_coder)) + lz_memusage; 639254885Sdumbbell} 640254885Sdumbbell 641254885Sdumbbell 642254885Sdumbbellextern bool 643254885Sdumbbelllzma_lzma_lclppb_encode(const lzma_options_lzma *options, uint8_t *byte) 644254885Sdumbbell{ 645254885Sdumbbell if (!is_lclppb_valid(options)) 646254885Sdumbbell return true; 647254885Sdumbbell 648254885Sdumbbell *byte = (options->pb * 5 + options->lp) * 9 + options->lc; 649254885Sdumbbell assert(*byte <= (4 * 5 + 4) * 9 + 8); 650254885Sdumbbell 651254885Sdumbbell return false; 652254885Sdumbbell} 653254885Sdumbbell 654254885Sdumbbell 655254885Sdumbbell#ifdef HAVE_ENCODER_LZMA1 656254885Sdumbbellextern lzma_ret 657254885Sdumbbelllzma_lzma_props_encode(const void *options, uint8_t *out) 658254885Sdumbbell{ 659254885Sdumbbell const lzma_options_lzma *const opt = options; 660254885Sdumbbell 661254885Sdumbbell if (lzma_lzma_lclppb_encode(opt, out)) 662254885Sdumbbell return LZMA_PROG_ERROR; 663254885Sdumbbell 664254885Sdumbbell unaligned_write32le(out + 1, opt->dict_size); 665254885Sdumbbell 666254885Sdumbbell return LZMA_OK; 667254885Sdumbbell} 668254885Sdumbbell#endif 669254885Sdumbbell 670254885Sdumbbell 671254885Sdumbbellextern LZMA_API(lzma_bool) 672254885Sdumbbelllzma_mode_is_supported(lzma_mode mode) 673254885Sdumbbell{ 674254885Sdumbbell return mode == LZMA_MODE_FAST || mode == LZMA_MODE_NORMAL; 675254885Sdumbbell} 676254885Sdumbbell