1207753Smm/////////////////////////////////////////////////////////////////////////////// 2207753Smm// 3207753Smm/// \file lzma_encoder.c 4207753Smm/// \brief LZMA encoder 5207753Smm/// 6207753Smm// Authors: Igor Pavlov 7207753Smm// Lasse Collin 8207753Smm// 9207753Smm// This file has been put into the public domain. 10207753Smm// You can do whatever you want with this file. 11207753Smm// 12207753Smm/////////////////////////////////////////////////////////////////////////////// 13207753Smm 14207753Smm#include "lzma2_encoder.h" 15207753Smm#include "lzma_encoder_private.h" 16207753Smm#include "fastpos.h" 17207753Smm 18207753Smm 19207753Smm///////////// 20207753Smm// Literal // 21207753Smm///////////// 22207753Smm 23207753Smmstatic inline void 24207753Smmliteral_matched(lzma_range_encoder *rc, probability *subcoder, 25207753Smm uint32_t match_byte, uint32_t symbol) 26207753Smm{ 27207753Smm uint32_t offset = 0x100; 28207753Smm symbol += UINT32_C(1) << 8; 29207753Smm 30207753Smm do { 31207753Smm match_byte <<= 1; 32207753Smm const uint32_t match_bit = match_byte & offset; 33207753Smm const uint32_t subcoder_index 34207753Smm = offset + match_bit + (symbol >> 8); 35207753Smm const uint32_t bit = (symbol >> 7) & 1; 36207753Smm rc_bit(rc, &subcoder[subcoder_index], bit); 37207753Smm 38207753Smm symbol <<= 1; 39207753Smm offset &= ~(match_byte ^ symbol); 40207753Smm 41207753Smm } while (symbol < (UINT32_C(1) << 16)); 42207753Smm} 43207753Smm 44207753Smm 45207753Smmstatic inline void 46207753Smmliteral(lzma_coder *coder, lzma_mf *mf, uint32_t position) 47207753Smm{ 48207753Smm // Locate the literal byte to be encoded and the subcoder. 49207753Smm const uint8_t cur_byte = mf->buffer[ 50207753Smm mf->read_pos - mf->read_ahead]; 51207753Smm probability *subcoder = literal_subcoder(coder->literal, 52207753Smm coder->literal_context_bits, coder->literal_pos_mask, 53207753Smm position, mf->buffer[mf->read_pos - mf->read_ahead - 1]); 54207753Smm 55207753Smm if (is_literal_state(coder->state)) { 56207753Smm // Previous LZMA-symbol was a literal. Encode a normal 57207753Smm // literal without a match byte. 58207753Smm rc_bittree(&coder->rc, subcoder, 8, cur_byte); 59207753Smm } else { 60207753Smm // Previous LZMA-symbol was a match. Use the last byte of 61207753Smm // the match as a "match byte". That is, compare the bits 62207753Smm // of the current literal and the match byte. 63207753Smm const uint8_t match_byte = mf->buffer[ 64207753Smm mf->read_pos - coder->reps[0] - 1 65207753Smm - mf->read_ahead]; 66207753Smm literal_matched(&coder->rc, subcoder, match_byte, cur_byte); 67207753Smm } 68207753Smm 69207753Smm update_literal(coder->state); 70207753Smm} 71207753Smm 72207753Smm 73207753Smm////////////////// 74207753Smm// Match length // 75207753Smm////////////////// 76207753Smm 77207753Smmstatic void 78207753Smmlength_update_prices(lzma_length_encoder *lc, const uint32_t pos_state) 79207753Smm{ 80207753Smm const uint32_t table_size = lc->table_size; 81207753Smm lc->counters[pos_state] = table_size; 82207753Smm 83207753Smm const uint32_t a0 = rc_bit_0_price(lc->choice); 84207753Smm const uint32_t a1 = rc_bit_1_price(lc->choice); 85207753Smm const uint32_t b0 = a1 + rc_bit_0_price(lc->choice2); 86207753Smm const uint32_t b1 = a1 + rc_bit_1_price(lc->choice2); 87207753Smm uint32_t *const prices = lc->prices[pos_state]; 88207753Smm 89207753Smm uint32_t i; 90207753Smm for (i = 0; i < table_size && i < LEN_LOW_SYMBOLS; ++i) 91207753Smm prices[i] = a0 + rc_bittree_price(lc->low[pos_state], 92207753Smm LEN_LOW_BITS, i); 93207753Smm 94207753Smm for (; i < table_size && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i) 95207753Smm prices[i] = b0 + rc_bittree_price(lc->mid[pos_state], 96207753Smm LEN_MID_BITS, i - LEN_LOW_SYMBOLS); 97207753Smm 98207753Smm for (; i < table_size; ++i) 99207753Smm prices[i] = b1 + rc_bittree_price(lc->high, LEN_HIGH_BITS, 100207753Smm i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS); 101207753Smm 102207753Smm return; 103207753Smm} 104207753Smm 105207753Smm 106207753Smmstatic inline void 107207753Smmlength(lzma_range_encoder *rc, lzma_length_encoder *lc, 108207753Smm const uint32_t pos_state, uint32_t len, const bool fast_mode) 109207753Smm{ 110207753Smm assert(len <= MATCH_LEN_MAX); 111207753Smm len -= MATCH_LEN_MIN; 112207753Smm 113207753Smm if (len < LEN_LOW_SYMBOLS) { 114207753Smm rc_bit(rc, &lc->choice, 0); 115207753Smm rc_bittree(rc, lc->low[pos_state], LEN_LOW_BITS, len); 116207753Smm } else { 117207753Smm rc_bit(rc, &lc->choice, 1); 118207753Smm len -= LEN_LOW_SYMBOLS; 119207753Smm 120207753Smm if (len < LEN_MID_SYMBOLS) { 121207753Smm rc_bit(rc, &lc->choice2, 0); 122207753Smm rc_bittree(rc, lc->mid[pos_state], LEN_MID_BITS, len); 123207753Smm } else { 124207753Smm rc_bit(rc, &lc->choice2, 1); 125207753Smm len -= LEN_MID_SYMBOLS; 126207753Smm rc_bittree(rc, lc->high, LEN_HIGH_BITS, len); 127207753Smm } 128207753Smm } 129207753Smm 130207753Smm // Only getoptimum uses the prices so don't update the table when 131207753Smm // in fast mode. 132207753Smm if (!fast_mode) 133207753Smm if (--lc->counters[pos_state] == 0) 134207753Smm length_update_prices(lc, pos_state); 135207753Smm} 136207753Smm 137207753Smm 138207753Smm/////////// 139207753Smm// Match // 140207753Smm/////////// 141207753Smm 142207753Smmstatic inline void 143207753Smmmatch(lzma_coder *coder, const uint32_t pos_state, 144207753Smm const uint32_t distance, const uint32_t len) 145207753Smm{ 146207753Smm update_match(coder->state); 147207753Smm 148207753Smm length(&coder->rc, &coder->match_len_encoder, pos_state, len, 149207753Smm coder->fast_mode); 150207753Smm 151207753Smm const uint32_t pos_slot = get_pos_slot(distance); 152207753Smm const uint32_t len_to_pos_state = get_len_to_pos_state(len); 153207753Smm rc_bittree(&coder->rc, coder->pos_slot[len_to_pos_state], 154207753Smm POS_SLOT_BITS, pos_slot); 155207753Smm 156207753Smm if (pos_slot >= START_POS_MODEL_INDEX) { 157207753Smm const uint32_t footer_bits = (pos_slot >> 1) - 1; 158207753Smm const uint32_t base = (2 | (pos_slot & 1)) << footer_bits; 159207753Smm const uint32_t pos_reduced = distance - base; 160207753Smm 161207753Smm if (pos_slot < END_POS_MODEL_INDEX) { 162207753Smm // Careful here: base - pos_slot - 1 can be -1, but 163207753Smm // rc_bittree_reverse starts at probs[1], not probs[0]. 164207753Smm rc_bittree_reverse(&coder->rc, 165207753Smm coder->pos_special + base - pos_slot - 1, 166207753Smm footer_bits, pos_reduced); 167207753Smm } else { 168207753Smm rc_direct(&coder->rc, pos_reduced >> ALIGN_BITS, 169207753Smm footer_bits - ALIGN_BITS); 170207753Smm rc_bittree_reverse( 171207753Smm &coder->rc, coder->pos_align, 172207753Smm ALIGN_BITS, pos_reduced & ALIGN_MASK); 173207753Smm ++coder->align_price_count; 174207753Smm } 175207753Smm } 176207753Smm 177207753Smm coder->reps[3] = coder->reps[2]; 178207753Smm coder->reps[2] = coder->reps[1]; 179207753Smm coder->reps[1] = coder->reps[0]; 180207753Smm coder->reps[0] = distance; 181207753Smm ++coder->match_price_count; 182207753Smm} 183207753Smm 184207753Smm 185207753Smm//////////////////// 186207753Smm// Repeated match // 187207753Smm//////////////////// 188207753Smm 189207753Smmstatic inline void 190207753Smmrep_match(lzma_coder *coder, const uint32_t pos_state, 191207753Smm const uint32_t rep, const uint32_t len) 192207753Smm{ 193207753Smm if (rep == 0) { 194207753Smm rc_bit(&coder->rc, &coder->is_rep0[coder->state], 0); 195207753Smm rc_bit(&coder->rc, 196207753Smm &coder->is_rep0_long[coder->state][pos_state], 197207753Smm len != 1); 198207753Smm } else { 199207753Smm const uint32_t distance = coder->reps[rep]; 200207753Smm rc_bit(&coder->rc, &coder->is_rep0[coder->state], 1); 201207753Smm 202207753Smm if (rep == 1) { 203207753Smm rc_bit(&coder->rc, &coder->is_rep1[coder->state], 0); 204207753Smm } else { 205207753Smm rc_bit(&coder->rc, &coder->is_rep1[coder->state], 1); 206207753Smm rc_bit(&coder->rc, &coder->is_rep2[coder->state], 207207753Smm rep - 2); 208207753Smm 209207753Smm if (rep == 3) 210207753Smm coder->reps[3] = coder->reps[2]; 211207753Smm 212207753Smm coder->reps[2] = coder->reps[1]; 213207753Smm } 214207753Smm 215207753Smm coder->reps[1] = coder->reps[0]; 216207753Smm coder->reps[0] = distance; 217207753Smm } 218207753Smm 219207753Smm if (len == 1) { 220207753Smm update_short_rep(coder->state); 221207753Smm } else { 222207753Smm length(&coder->rc, &coder->rep_len_encoder, pos_state, len, 223207753Smm coder->fast_mode); 224207753Smm update_long_rep(coder->state); 225207753Smm } 226207753Smm} 227207753Smm 228207753Smm 229207753Smm////////// 230207753Smm// Main // 231207753Smm////////// 232207753Smm 233207753Smmstatic void 234207753Smmencode_symbol(lzma_coder *coder, lzma_mf *mf, 235207753Smm uint32_t back, uint32_t len, uint32_t position) 236207753Smm{ 237207753Smm const uint32_t pos_state = position & coder->pos_mask; 238207753Smm 239207753Smm if (back == UINT32_MAX) { 240207753Smm // Literal i.e. eight-bit byte 241207753Smm assert(len == 1); 242207753Smm rc_bit(&coder->rc, 243207753Smm &coder->is_match[coder->state][pos_state], 0); 244207753Smm literal(coder, mf, position); 245207753Smm } else { 246207753Smm // Some type of match 247207753Smm rc_bit(&coder->rc, 248207753Smm &coder->is_match[coder->state][pos_state], 1); 249207753Smm 250207753Smm if (back < REP_DISTANCES) { 251207753Smm // It's a repeated match i.e. the same distance 252207753Smm // has been used earlier. 253207753Smm rc_bit(&coder->rc, &coder->is_rep[coder->state], 1); 254207753Smm rep_match(coder, pos_state, back, len); 255207753Smm } else { 256207753Smm // Normal match 257207753Smm rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); 258207753Smm match(coder, pos_state, back - REP_DISTANCES, len); 259207753Smm } 260207753Smm } 261207753Smm 262207753Smm assert(mf->read_ahead >= len); 263207753Smm mf->read_ahead -= len; 264207753Smm} 265207753Smm 266207753Smm 267207753Smmstatic bool 268207753Smmencode_init(lzma_coder *coder, lzma_mf *mf) 269207753Smm{ 270207753Smm assert(mf_position(mf) == 0); 271207753Smm 272207753Smm if (mf->read_pos == mf->read_limit) { 273207753Smm if (mf->action == LZMA_RUN) 274207753Smm return false; // We cannot do anything. 275207753Smm 276207753Smm // We are finishing (we cannot get here when flushing). 277207753Smm assert(mf->write_pos == mf->read_pos); 278207753Smm assert(mf->action == LZMA_FINISH); 279207753Smm } else { 280207753Smm // Do the actual initialization. The first LZMA symbol must 281207753Smm // always be a literal. 282207753Smm mf_skip(mf, 1); 283207753Smm mf->read_ahead = 0; 284207753Smm rc_bit(&coder->rc, &coder->is_match[0][0], 0); 285207753Smm rc_bittree(&coder->rc, coder->literal[0], 8, mf->buffer[0]); 286207753Smm } 287207753Smm 288207753Smm // Initialization is done (except if empty file). 289207753Smm coder->is_initialized = true; 290207753Smm 291207753Smm return true; 292207753Smm} 293207753Smm 294207753Smm 295207753Smmstatic void 296207753Smmencode_eopm(lzma_coder *coder, uint32_t position) 297207753Smm{ 298207753Smm const uint32_t pos_state = position & coder->pos_mask; 299207753Smm rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 1); 300207753Smm rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); 301207753Smm match(coder, pos_state, UINT32_MAX, MATCH_LEN_MIN); 302207753Smm} 303207753Smm 304207753Smm 305207753Smm/// Number of bytes that a single encoding loop in lzma_lzma_encode() can 306207753Smm/// consume from the dictionary. This limit comes from lzma_lzma_optimum() 307207753Smm/// and may need to be updated if that function is significantly modified. 308207753Smm#define LOOP_INPUT_MAX (OPTS + 1) 309207753Smm 310207753Smm 311207753Smmextern lzma_ret 312207753Smmlzma_lzma_encode(lzma_coder *restrict coder, lzma_mf *restrict mf, 313207753Smm uint8_t *restrict out, size_t *restrict out_pos, 314207753Smm size_t out_size, uint32_t limit) 315207753Smm{ 316207753Smm // Initialize the stream if no data has been encoded yet. 317207753Smm if (!coder->is_initialized && !encode_init(coder, mf)) 318207753Smm return LZMA_OK; 319207753Smm 320207753Smm // Get the lowest bits of the uncompressed offset from the LZ layer. 321207753Smm uint32_t position = mf_position(mf); 322207753Smm 323207753Smm while (true) { 324207753Smm // Encode pending bits, if any. Calling this before encoding 325207753Smm // the next symbol is needed only with plain LZMA, since 326207753Smm // LZMA2 always provides big enough buffer to flush 327207753Smm // everything out from the range encoder. For the same reason, 328207753Smm // rc_encode() never returns true when this function is used 329207753Smm // as part of LZMA2 encoder. 330207753Smm if (rc_encode(&coder->rc, out, out_pos, out_size)) { 331207753Smm assert(limit == UINT32_MAX); 332207753Smm return LZMA_OK; 333207753Smm } 334207753Smm 335207753Smm // With LZMA2 we need to take care that compressed size of 336207753Smm // a chunk doesn't get too big. 337215187Smm // FIXME? Check if this could be improved. 338207753Smm if (limit != UINT32_MAX 339207753Smm && (mf->read_pos - mf->read_ahead >= limit 340207753Smm || *out_pos + rc_pending(&coder->rc) 341207753Smm >= LZMA2_CHUNK_MAX 342207753Smm - LOOP_INPUT_MAX)) 343207753Smm break; 344207753Smm 345207753Smm // Check that there is some input to process. 346207753Smm if (mf->read_pos >= mf->read_limit) { 347207753Smm if (mf->action == LZMA_RUN) 348207753Smm return LZMA_OK; 349207753Smm 350207753Smm if (mf->read_ahead == 0) 351207753Smm break; 352207753Smm } 353207753Smm 354207753Smm // Get optimal match (repeat position and length). 355207753Smm // Value ranges for pos: 356207753Smm // - [0, REP_DISTANCES): repeated match 357207753Smm // - [REP_DISTANCES, UINT32_MAX): 358207753Smm // match at (pos - REP_DISTANCES) 359207753Smm // - UINT32_MAX: not a match but a literal 360207753Smm // Value ranges for len: 361207753Smm // - [MATCH_LEN_MIN, MATCH_LEN_MAX] 362207753Smm uint32_t len; 363207753Smm uint32_t back; 364207753Smm 365207753Smm if (coder->fast_mode) 366207753Smm lzma_lzma_optimum_fast(coder, mf, &back, &len); 367207753Smm else 368207753Smm lzma_lzma_optimum_normal( 369207753Smm coder, mf, &back, &len, position); 370207753Smm 371207753Smm encode_symbol(coder, mf, back, len, position); 372207753Smm 373207753Smm position += len; 374207753Smm } 375207753Smm 376207753Smm if (!coder->is_flushed) { 377207753Smm coder->is_flushed = true; 378207753Smm 379207753Smm // We don't support encoding plain LZMA streams without EOPM, 380207753Smm // and LZMA2 doesn't use EOPM at LZMA level. 381207753Smm if (limit == UINT32_MAX) 382207753Smm encode_eopm(coder, position); 383207753Smm 384207753Smm // Flush the remaining bytes from the range encoder. 385207753Smm rc_flush(&coder->rc); 386207753Smm 387207753Smm // Copy the remaining bytes to the output buffer. If there 388207753Smm // isn't enough output space, we will copy out the remaining 389207753Smm // bytes on the next call to this function by using 390207753Smm // the rc_encode() call in the encoding loop above. 391207753Smm if (rc_encode(&coder->rc, out, out_pos, out_size)) { 392207753Smm assert(limit == UINT32_MAX); 393207753Smm return LZMA_OK; 394207753Smm } 395207753Smm } 396207753Smm 397207753Smm // Make it ready for the next LZMA2 chunk. 398207753Smm coder->is_flushed = false; 399207753Smm 400207753Smm return LZMA_STREAM_END; 401207753Smm} 402207753Smm 403207753Smm 404207753Smmstatic lzma_ret 405207753Smmlzma_encode(lzma_coder *restrict coder, lzma_mf *restrict mf, 406207753Smm uint8_t *restrict out, size_t *restrict out_pos, 407207753Smm size_t out_size) 408207753Smm{ 409207753Smm // Plain LZMA has no support for sync-flushing. 410207753Smm if (unlikely(mf->action == LZMA_SYNC_FLUSH)) 411207753Smm return LZMA_OPTIONS_ERROR; 412207753Smm 413207753Smm return lzma_lzma_encode(coder, mf, out, out_pos, out_size, UINT32_MAX); 414207753Smm} 415207753Smm 416207753Smm 417207753Smm//////////////////// 418207753Smm// Initialization // 419207753Smm//////////////////// 420207753Smm 421207753Smmstatic bool 422207753Smmis_options_valid(const lzma_options_lzma *options) 423207753Smm{ 424207753Smm // Validate some of the options. LZ encoder validates nice_len too 425207753Smm // but we need a valid value here earlier. 426207753Smm return is_lclppb_valid(options) 427207753Smm && options->nice_len >= MATCH_LEN_MIN 428207753Smm && options->nice_len <= MATCH_LEN_MAX 429207753Smm && (options->mode == LZMA_MODE_FAST 430207753Smm || options->mode == LZMA_MODE_NORMAL); 431207753Smm} 432207753Smm 433207753Smm 434207753Smmstatic void 435207753Smmset_lz_options(lzma_lz_options *lz_options, const lzma_options_lzma *options) 436207753Smm{ 437207753Smm // LZ encoder initialization does the validation for these so we 438207753Smm // don't need to validate here. 439207753Smm lz_options->before_size = OPTS; 440207753Smm lz_options->dict_size = options->dict_size; 441207753Smm lz_options->after_size = LOOP_INPUT_MAX; 442207753Smm lz_options->match_len_max = MATCH_LEN_MAX; 443207753Smm lz_options->nice_len = options->nice_len; 444207753Smm lz_options->match_finder = options->mf; 445207753Smm lz_options->depth = options->depth; 446207753Smm lz_options->preset_dict = options->preset_dict; 447207753Smm lz_options->preset_dict_size = options->preset_dict_size; 448207753Smm return; 449207753Smm} 450207753Smm 451207753Smm 452207753Smmstatic void 453207753Smmlength_encoder_reset(lzma_length_encoder *lencoder, 454207753Smm const uint32_t num_pos_states, const bool fast_mode) 455207753Smm{ 456207753Smm bit_reset(lencoder->choice); 457207753Smm bit_reset(lencoder->choice2); 458207753Smm 459207753Smm for (size_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { 460207753Smm bittree_reset(lencoder->low[pos_state], LEN_LOW_BITS); 461207753Smm bittree_reset(lencoder->mid[pos_state], LEN_MID_BITS); 462207753Smm } 463207753Smm 464207753Smm bittree_reset(lencoder->high, LEN_HIGH_BITS); 465207753Smm 466207753Smm if (!fast_mode) 467207753Smm for (size_t pos_state = 0; pos_state < num_pos_states; 468207753Smm ++pos_state) 469207753Smm length_update_prices(lencoder, pos_state); 470207753Smm 471207753Smm return; 472207753Smm} 473207753Smm 474207753Smm 475207753Smmextern lzma_ret 476207753Smmlzma_lzma_encoder_reset(lzma_coder *coder, const lzma_options_lzma *options) 477207753Smm{ 478207753Smm if (!is_options_valid(options)) 479207753Smm return LZMA_OPTIONS_ERROR; 480207753Smm 481207753Smm coder->pos_mask = (1U << options->pb) - 1; 482207753Smm coder->literal_context_bits = options->lc; 483207753Smm coder->literal_pos_mask = (1U << options->lp) - 1; 484207753Smm 485207753Smm // Range coder 486207753Smm rc_reset(&coder->rc); 487207753Smm 488207753Smm // State 489207753Smm coder->state = STATE_LIT_LIT; 490207753Smm for (size_t i = 0; i < REP_DISTANCES; ++i) 491207753Smm coder->reps[i] = 0; 492207753Smm 493207753Smm literal_init(coder->literal, options->lc, options->lp); 494207753Smm 495207753Smm // Bit encoders 496207753Smm for (size_t i = 0; i < STATES; ++i) { 497207753Smm for (size_t j = 0; j <= coder->pos_mask; ++j) { 498207753Smm bit_reset(coder->is_match[i][j]); 499207753Smm bit_reset(coder->is_rep0_long[i][j]); 500207753Smm } 501207753Smm 502207753Smm bit_reset(coder->is_rep[i]); 503207753Smm bit_reset(coder->is_rep0[i]); 504207753Smm bit_reset(coder->is_rep1[i]); 505207753Smm bit_reset(coder->is_rep2[i]); 506207753Smm } 507207753Smm 508207753Smm for (size_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i) 509207753Smm bit_reset(coder->pos_special[i]); 510207753Smm 511207753Smm // Bit tree encoders 512207753Smm for (size_t i = 0; i < LEN_TO_POS_STATES; ++i) 513207753Smm bittree_reset(coder->pos_slot[i], POS_SLOT_BITS); 514207753Smm 515207753Smm bittree_reset(coder->pos_align, ALIGN_BITS); 516207753Smm 517207753Smm // Length encoders 518207753Smm length_encoder_reset(&coder->match_len_encoder, 519207753Smm 1U << options->pb, coder->fast_mode); 520207753Smm 521207753Smm length_encoder_reset(&coder->rep_len_encoder, 522207753Smm 1U << options->pb, coder->fast_mode); 523207753Smm 524207753Smm // Price counts are incremented every time appropriate probabilities 525207753Smm // are changed. price counts are set to zero when the price tables 526207753Smm // are updated, which is done when the appropriate price counts have 527207753Smm // big enough value, and lzma_mf.read_ahead == 0 which happens at 528207753Smm // least every OPTS (a few thousand) possible price count increments. 529207753Smm // 530207753Smm // By resetting price counts to UINT32_MAX / 2, we make sure that the 531207753Smm // price tables will be initialized before they will be used (since 532207753Smm // the value is definitely big enough), and that it is OK to increment 533207753Smm // price counts without risk of integer overflow (since UINT32_MAX / 2 534207753Smm // is small enough). The current code doesn't increment price counts 535207753Smm // before initializing price tables, but it maybe done in future if 536207753Smm // we add support for saving the state between LZMA2 chunks. 537207753Smm coder->match_price_count = UINT32_MAX / 2; 538207753Smm coder->align_price_count = UINT32_MAX / 2; 539207753Smm 540207753Smm coder->opts_end_index = 0; 541207753Smm coder->opts_current_index = 0; 542207753Smm 543207753Smm return LZMA_OK; 544207753Smm} 545207753Smm 546207753Smm 547207753Smmextern lzma_ret 548207753Smmlzma_lzma_encoder_create(lzma_coder **coder_ptr, lzma_allocator *allocator, 549207753Smm const lzma_options_lzma *options, lzma_lz_options *lz_options) 550207753Smm{ 551207753Smm // Allocate lzma_coder if it wasn't already allocated. 552207753Smm if (*coder_ptr == NULL) { 553207753Smm *coder_ptr = lzma_alloc(sizeof(lzma_coder), allocator); 554207753Smm if (*coder_ptr == NULL) 555207753Smm return LZMA_MEM_ERROR; 556207753Smm } 557207753Smm 558207753Smm lzma_coder *coder = *coder_ptr; 559207753Smm 560207753Smm // Set compression mode. We haven't validates the options yet, 561207753Smm // but it's OK here, since nothing bad happens with invalid 562207753Smm // options in the code below, and they will get rejected by 563207753Smm // lzma_lzma_encoder_reset() call at the end of this function. 564207753Smm switch (options->mode) { 565207753Smm case LZMA_MODE_FAST: 566207753Smm coder->fast_mode = true; 567207753Smm break; 568207753Smm 569207753Smm case LZMA_MODE_NORMAL: { 570207753Smm coder->fast_mode = false; 571207753Smm 572207753Smm // Set dist_table_size. 573207753Smm // Round the dictionary size up to next 2^n. 574207753Smm uint32_t log_size = 0; 575207753Smm while ((UINT32_C(1) << log_size) < options->dict_size) 576207753Smm ++log_size; 577207753Smm 578207753Smm coder->dist_table_size = log_size * 2; 579207753Smm 580207753Smm // Length encoders' price table size 581207753Smm coder->match_len_encoder.table_size 582207753Smm = options->nice_len + 1 - MATCH_LEN_MIN; 583207753Smm coder->rep_len_encoder.table_size 584207753Smm = options->nice_len + 1 - MATCH_LEN_MIN; 585207753Smm break; 586207753Smm } 587207753Smm 588207753Smm default: 589207753Smm return LZMA_OPTIONS_ERROR; 590207753Smm } 591207753Smm 592207753Smm // We don't need to write the first byte as literal if there is 593207753Smm // a non-empty preset dictionary. encode_init() wouldn't even work 594207753Smm // if there is a non-empty preset dictionary, because encode_init() 595207753Smm // assumes that position is zero and previous byte is also zero. 596207753Smm coder->is_initialized = options->preset_dict != NULL 597207753Smm && options->preset_dict_size > 0; 598207753Smm coder->is_flushed = false; 599207753Smm 600207753Smm set_lz_options(lz_options, options); 601207753Smm 602207753Smm return lzma_lzma_encoder_reset(coder, options); 603207753Smm} 604207753Smm 605207753Smm 606207753Smmstatic lzma_ret 607207753Smmlzma_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator, 608207753Smm const void *options, lzma_lz_options *lz_options) 609207753Smm{ 610207753Smm lz->code = &lzma_encode; 611207753Smm return lzma_lzma_encoder_create( 612207753Smm &lz->coder, allocator, options, lz_options); 613207753Smm} 614207753Smm 615207753Smm 616207753Smmextern lzma_ret 617207753Smmlzma_lzma_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, 618207753Smm const lzma_filter_info *filters) 619207753Smm{ 620207753Smm return lzma_lz_encoder_init( 621207753Smm next, allocator, filters, &lzma_encoder_init); 622207753Smm} 623207753Smm 624207753Smm 625207753Smmextern uint64_t 626207753Smmlzma_lzma_encoder_memusage(const void *options) 627207753Smm{ 628207753Smm if (!is_options_valid(options)) 629207753Smm return UINT64_MAX; 630207753Smm 631207753Smm lzma_lz_options lz_options; 632207753Smm set_lz_options(&lz_options, options); 633207753Smm 634207753Smm const uint64_t lz_memusage = lzma_lz_encoder_memusage(&lz_options); 635207753Smm if (lz_memusage == UINT64_MAX) 636207753Smm return UINT64_MAX; 637207753Smm 638207753Smm return (uint64_t)(sizeof(lzma_coder)) + lz_memusage; 639207753Smm} 640207753Smm 641207753Smm 642207753Smmextern bool 643207753Smmlzma_lzma_lclppb_encode(const lzma_options_lzma *options, uint8_t *byte) 644207753Smm{ 645207753Smm if (!is_lclppb_valid(options)) 646207753Smm return true; 647207753Smm 648207753Smm *byte = (options->pb * 5 + options->lp) * 9 + options->lc; 649207753Smm assert(*byte <= (4 * 5 + 4) * 9 + 8); 650207753Smm 651207753Smm return false; 652207753Smm} 653207753Smm 654207753Smm 655207753Smm#ifdef HAVE_ENCODER_LZMA1 656207753Smmextern lzma_ret 657207753Smmlzma_lzma_props_encode(const void *options, uint8_t *out) 658207753Smm{ 659207753Smm const lzma_options_lzma *const opt = options; 660207753Smm 661207753Smm if (lzma_lzma_lclppb_encode(opt, out)) 662207753Smm return LZMA_PROG_ERROR; 663207753Smm 664207753Smm unaligned_write32le(out + 1, opt->dict_size); 665207753Smm 666207753Smm return LZMA_OK; 667207753Smm} 668207753Smm#endif 669207753Smm 670207753Smm 671207753Smmextern LZMA_API(lzma_bool) 672207753Smmlzma_mode_is_supported(lzma_mode mode) 673207753Smm{ 674207753Smm return mode == LZMA_MODE_FAST || mode == LZMA_MODE_NORMAL; 675207753Smm} 676