1207753Smm///////////////////////////////////////////////////////////////////////////////
2207753Smm//
3207753Smm/// \file       lzma_encoder.c
4207753Smm/// \brief      LZMA encoder
5207753Smm///
6207753Smm//  Authors:    Igor Pavlov
7207753Smm//              Lasse Collin
8207753Smm//
9207753Smm//  This file has been put into the public domain.
10207753Smm//  You can do whatever you want with this file.
11207753Smm//
12207753Smm///////////////////////////////////////////////////////////////////////////////
13207753Smm
14207753Smm#include "lzma2_encoder.h"
15207753Smm#include "lzma_encoder_private.h"
16207753Smm#include "fastpos.h"
17207753Smm
18207753Smm
19207753Smm/////////////
20207753Smm// Literal //
21207753Smm/////////////
22207753Smm
23207753Smmstatic inline void
24207753Smmliteral_matched(lzma_range_encoder *rc, probability *subcoder,
25207753Smm		uint32_t match_byte, uint32_t symbol)
26207753Smm{
27207753Smm	uint32_t offset = 0x100;
28207753Smm	symbol += UINT32_C(1) << 8;
29207753Smm
30207753Smm	do {
31207753Smm		match_byte <<= 1;
32207753Smm		const uint32_t match_bit = match_byte & offset;
33207753Smm		const uint32_t subcoder_index
34207753Smm				= offset + match_bit + (symbol >> 8);
35207753Smm		const uint32_t bit = (symbol >> 7) & 1;
36207753Smm		rc_bit(rc, &subcoder[subcoder_index], bit);
37207753Smm
38207753Smm		symbol <<= 1;
39207753Smm		offset &= ~(match_byte ^ symbol);
40207753Smm
41207753Smm	} while (symbol < (UINT32_C(1) << 16));
42207753Smm}
43207753Smm
44207753Smm
45207753Smmstatic inline void
46207753Smmliteral(lzma_coder *coder, lzma_mf *mf, uint32_t position)
47207753Smm{
48207753Smm	// Locate the literal byte to be encoded and the subcoder.
49207753Smm	const uint8_t cur_byte = mf->buffer[
50207753Smm			mf->read_pos - mf->read_ahead];
51207753Smm	probability *subcoder = literal_subcoder(coder->literal,
52207753Smm			coder->literal_context_bits, coder->literal_pos_mask,
53207753Smm			position, mf->buffer[mf->read_pos - mf->read_ahead - 1]);
54207753Smm
55207753Smm	if (is_literal_state(coder->state)) {
56207753Smm		// Previous LZMA-symbol was a literal. Encode a normal
57207753Smm		// literal without a match byte.
58207753Smm		rc_bittree(&coder->rc, subcoder, 8, cur_byte);
59207753Smm	} else {
60207753Smm		// Previous LZMA-symbol was a match. Use the last byte of
61207753Smm		// the match as a "match byte". That is, compare the bits
62207753Smm		// of the current literal and the match byte.
63207753Smm		const uint8_t match_byte = mf->buffer[
64207753Smm				mf->read_pos - coder->reps[0] - 1
65207753Smm				- mf->read_ahead];
66207753Smm		literal_matched(&coder->rc, subcoder, match_byte, cur_byte);
67207753Smm	}
68207753Smm
69207753Smm	update_literal(coder->state);
70207753Smm}
71207753Smm
72207753Smm
73207753Smm//////////////////
74207753Smm// Match length //
75207753Smm//////////////////
76207753Smm
77207753Smmstatic void
78207753Smmlength_update_prices(lzma_length_encoder *lc, const uint32_t pos_state)
79207753Smm{
80207753Smm	const uint32_t table_size = lc->table_size;
81207753Smm	lc->counters[pos_state] = table_size;
82207753Smm
83207753Smm	const uint32_t a0 = rc_bit_0_price(lc->choice);
84207753Smm	const uint32_t a1 = rc_bit_1_price(lc->choice);
85207753Smm	const uint32_t b0 = a1 + rc_bit_0_price(lc->choice2);
86207753Smm	const uint32_t b1 = a1 + rc_bit_1_price(lc->choice2);
87207753Smm	uint32_t *const prices = lc->prices[pos_state];
88207753Smm
89207753Smm	uint32_t i;
90207753Smm	for (i = 0; i < table_size && i < LEN_LOW_SYMBOLS; ++i)
91207753Smm		prices[i] = a0 + rc_bittree_price(lc->low[pos_state],
92207753Smm				LEN_LOW_BITS, i);
93207753Smm
94207753Smm	for (; i < table_size && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i)
95207753Smm		prices[i] = b0 + rc_bittree_price(lc->mid[pos_state],
96207753Smm				LEN_MID_BITS, i - LEN_LOW_SYMBOLS);
97207753Smm
98207753Smm	for (; i < table_size; ++i)
99207753Smm		prices[i] = b1 + rc_bittree_price(lc->high, LEN_HIGH_BITS,
100207753Smm				i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS);
101207753Smm
102207753Smm	return;
103207753Smm}
104207753Smm
105207753Smm
106207753Smmstatic inline void
107207753Smmlength(lzma_range_encoder *rc, lzma_length_encoder *lc,
108207753Smm		const uint32_t pos_state, uint32_t len, const bool fast_mode)
109207753Smm{
110207753Smm	assert(len <= MATCH_LEN_MAX);
111207753Smm	len -= MATCH_LEN_MIN;
112207753Smm
113207753Smm	if (len < LEN_LOW_SYMBOLS) {
114207753Smm		rc_bit(rc, &lc->choice, 0);
115207753Smm		rc_bittree(rc, lc->low[pos_state], LEN_LOW_BITS, len);
116207753Smm	} else {
117207753Smm		rc_bit(rc, &lc->choice, 1);
118207753Smm		len -= LEN_LOW_SYMBOLS;
119207753Smm
120207753Smm		if (len < LEN_MID_SYMBOLS) {
121207753Smm			rc_bit(rc, &lc->choice2, 0);
122207753Smm			rc_bittree(rc, lc->mid[pos_state], LEN_MID_BITS, len);
123207753Smm		} else {
124207753Smm			rc_bit(rc, &lc->choice2, 1);
125207753Smm			len -= LEN_MID_SYMBOLS;
126207753Smm			rc_bittree(rc, lc->high, LEN_HIGH_BITS, len);
127207753Smm		}
128207753Smm	}
129207753Smm
130207753Smm	// Only getoptimum uses the prices so don't update the table when
131207753Smm	// in fast mode.
132207753Smm	if (!fast_mode)
133207753Smm		if (--lc->counters[pos_state] == 0)
134207753Smm			length_update_prices(lc, pos_state);
135207753Smm}
136207753Smm
137207753Smm
138207753Smm///////////
139207753Smm// Match //
140207753Smm///////////
141207753Smm
142207753Smmstatic inline void
143207753Smmmatch(lzma_coder *coder, const uint32_t pos_state,
144207753Smm		const uint32_t distance, const uint32_t len)
145207753Smm{
146207753Smm	update_match(coder->state);
147207753Smm
148207753Smm	length(&coder->rc, &coder->match_len_encoder, pos_state, len,
149207753Smm			coder->fast_mode);
150207753Smm
151207753Smm	const uint32_t pos_slot = get_pos_slot(distance);
152207753Smm	const uint32_t len_to_pos_state = get_len_to_pos_state(len);
153207753Smm	rc_bittree(&coder->rc, coder->pos_slot[len_to_pos_state],
154207753Smm			POS_SLOT_BITS, pos_slot);
155207753Smm
156207753Smm	if (pos_slot >= START_POS_MODEL_INDEX) {
157207753Smm		const uint32_t footer_bits = (pos_slot >> 1) - 1;
158207753Smm		const uint32_t base = (2 | (pos_slot & 1)) << footer_bits;
159207753Smm		const uint32_t pos_reduced = distance - base;
160207753Smm
161207753Smm		if (pos_slot < END_POS_MODEL_INDEX) {
162207753Smm			// Careful here: base - pos_slot - 1 can be -1, but
163207753Smm			// rc_bittree_reverse starts at probs[1], not probs[0].
164207753Smm			rc_bittree_reverse(&coder->rc,
165207753Smm				coder->pos_special + base - pos_slot - 1,
166207753Smm				footer_bits, pos_reduced);
167207753Smm		} else {
168207753Smm			rc_direct(&coder->rc, pos_reduced >> ALIGN_BITS,
169207753Smm					footer_bits - ALIGN_BITS);
170207753Smm			rc_bittree_reverse(
171207753Smm					&coder->rc, coder->pos_align,
172207753Smm					ALIGN_BITS, pos_reduced & ALIGN_MASK);
173207753Smm			++coder->align_price_count;
174207753Smm		}
175207753Smm	}
176207753Smm
177207753Smm	coder->reps[3] = coder->reps[2];
178207753Smm	coder->reps[2] = coder->reps[1];
179207753Smm	coder->reps[1] = coder->reps[0];
180207753Smm	coder->reps[0] = distance;
181207753Smm	++coder->match_price_count;
182207753Smm}
183207753Smm
184207753Smm
185207753Smm////////////////////
186207753Smm// Repeated match //
187207753Smm////////////////////
188207753Smm
189207753Smmstatic inline void
190207753Smmrep_match(lzma_coder *coder, const uint32_t pos_state,
191207753Smm		const uint32_t rep, const uint32_t len)
192207753Smm{
193207753Smm	if (rep == 0) {
194207753Smm		rc_bit(&coder->rc, &coder->is_rep0[coder->state], 0);
195207753Smm		rc_bit(&coder->rc,
196207753Smm				&coder->is_rep0_long[coder->state][pos_state],
197207753Smm				len != 1);
198207753Smm	} else {
199207753Smm		const uint32_t distance = coder->reps[rep];
200207753Smm		rc_bit(&coder->rc, &coder->is_rep0[coder->state], 1);
201207753Smm
202207753Smm		if (rep == 1) {
203207753Smm			rc_bit(&coder->rc, &coder->is_rep1[coder->state], 0);
204207753Smm		} else {
205207753Smm			rc_bit(&coder->rc, &coder->is_rep1[coder->state], 1);
206207753Smm			rc_bit(&coder->rc, &coder->is_rep2[coder->state],
207207753Smm					rep - 2);
208207753Smm
209207753Smm			if (rep == 3)
210207753Smm				coder->reps[3] = coder->reps[2];
211207753Smm
212207753Smm			coder->reps[2] = coder->reps[1];
213207753Smm		}
214207753Smm
215207753Smm		coder->reps[1] = coder->reps[0];
216207753Smm		coder->reps[0] = distance;
217207753Smm	}
218207753Smm
219207753Smm	if (len == 1) {
220207753Smm		update_short_rep(coder->state);
221207753Smm	} else {
222207753Smm		length(&coder->rc, &coder->rep_len_encoder, pos_state, len,
223207753Smm				coder->fast_mode);
224207753Smm		update_long_rep(coder->state);
225207753Smm	}
226207753Smm}
227207753Smm
228207753Smm
229207753Smm//////////
230207753Smm// Main //
231207753Smm//////////
232207753Smm
233207753Smmstatic void
234207753Smmencode_symbol(lzma_coder *coder, lzma_mf *mf,
235207753Smm		uint32_t back, uint32_t len, uint32_t position)
236207753Smm{
237207753Smm	const uint32_t pos_state = position & coder->pos_mask;
238207753Smm
239207753Smm	if (back == UINT32_MAX) {
240207753Smm		// Literal i.e. eight-bit byte
241207753Smm		assert(len == 1);
242207753Smm		rc_bit(&coder->rc,
243207753Smm				&coder->is_match[coder->state][pos_state], 0);
244207753Smm		literal(coder, mf, position);
245207753Smm	} else {
246207753Smm		// Some type of match
247207753Smm		rc_bit(&coder->rc,
248207753Smm			&coder->is_match[coder->state][pos_state], 1);
249207753Smm
250207753Smm		if (back < REP_DISTANCES) {
251207753Smm			// It's a repeated match i.e. the same distance
252207753Smm			// has been used earlier.
253207753Smm			rc_bit(&coder->rc, &coder->is_rep[coder->state], 1);
254207753Smm			rep_match(coder, pos_state, back, len);
255207753Smm		} else {
256207753Smm			// Normal match
257207753Smm			rc_bit(&coder->rc, &coder->is_rep[coder->state], 0);
258207753Smm			match(coder, pos_state, back - REP_DISTANCES, len);
259207753Smm		}
260207753Smm	}
261207753Smm
262207753Smm	assert(mf->read_ahead >= len);
263207753Smm	mf->read_ahead -= len;
264207753Smm}
265207753Smm
266207753Smm
267207753Smmstatic bool
268207753Smmencode_init(lzma_coder *coder, lzma_mf *mf)
269207753Smm{
270207753Smm	assert(mf_position(mf) == 0);
271207753Smm
272207753Smm	if (mf->read_pos == mf->read_limit) {
273207753Smm		if (mf->action == LZMA_RUN)
274207753Smm			return false; // We cannot do anything.
275207753Smm
276207753Smm		// We are finishing (we cannot get here when flushing).
277207753Smm		assert(mf->write_pos == mf->read_pos);
278207753Smm		assert(mf->action == LZMA_FINISH);
279207753Smm	} else {
280207753Smm		// Do the actual initialization. The first LZMA symbol must
281207753Smm		// always be a literal.
282207753Smm		mf_skip(mf, 1);
283207753Smm		mf->read_ahead = 0;
284207753Smm		rc_bit(&coder->rc, &coder->is_match[0][0], 0);
285207753Smm		rc_bittree(&coder->rc, coder->literal[0], 8, mf->buffer[0]);
286207753Smm	}
287207753Smm
288207753Smm	// Initialization is done (except if empty file).
289207753Smm	coder->is_initialized = true;
290207753Smm
291207753Smm	return true;
292207753Smm}
293207753Smm
294207753Smm
295207753Smmstatic void
296207753Smmencode_eopm(lzma_coder *coder, uint32_t position)
297207753Smm{
298207753Smm	const uint32_t pos_state = position & coder->pos_mask;
299207753Smm	rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 1);
300207753Smm	rc_bit(&coder->rc, &coder->is_rep[coder->state], 0);
301207753Smm	match(coder, pos_state, UINT32_MAX, MATCH_LEN_MIN);
302207753Smm}
303207753Smm
304207753Smm
305207753Smm/// Number of bytes that a single encoding loop in lzma_lzma_encode() can
306207753Smm/// consume from the dictionary. This limit comes from lzma_lzma_optimum()
307207753Smm/// and may need to be updated if that function is significantly modified.
308207753Smm#define LOOP_INPUT_MAX (OPTS + 1)
309207753Smm
310207753Smm
311207753Smmextern lzma_ret
312207753Smmlzma_lzma_encode(lzma_coder *restrict coder, lzma_mf *restrict mf,
313207753Smm		uint8_t *restrict out, size_t *restrict out_pos,
314207753Smm		size_t out_size, uint32_t limit)
315207753Smm{
316207753Smm	// Initialize the stream if no data has been encoded yet.
317207753Smm	if (!coder->is_initialized && !encode_init(coder, mf))
318207753Smm		return LZMA_OK;
319207753Smm
320207753Smm	// Get the lowest bits of the uncompressed offset from the LZ layer.
321207753Smm	uint32_t position = mf_position(mf);
322207753Smm
323207753Smm	while (true) {
324207753Smm		// Encode pending bits, if any. Calling this before encoding
325207753Smm		// the next symbol is needed only with plain LZMA, since
326207753Smm		// LZMA2 always provides big enough buffer to flush
327207753Smm		// everything out from the range encoder. For the same reason,
328207753Smm		// rc_encode() never returns true when this function is used
329207753Smm		// as part of LZMA2 encoder.
330207753Smm		if (rc_encode(&coder->rc, out, out_pos, out_size)) {
331207753Smm			assert(limit == UINT32_MAX);
332207753Smm			return LZMA_OK;
333207753Smm		}
334207753Smm
335207753Smm		// With LZMA2 we need to take care that compressed size of
336207753Smm		// a chunk doesn't get too big.
337215187Smm		// FIXME? Check if this could be improved.
338207753Smm		if (limit != UINT32_MAX
339207753Smm				&& (mf->read_pos - mf->read_ahead >= limit
340207753Smm					|| *out_pos + rc_pending(&coder->rc)
341207753Smm						>= LZMA2_CHUNK_MAX
342207753Smm							- LOOP_INPUT_MAX))
343207753Smm			break;
344207753Smm
345207753Smm		// Check that there is some input to process.
346207753Smm		if (mf->read_pos >= mf->read_limit) {
347207753Smm			if (mf->action == LZMA_RUN)
348207753Smm				return LZMA_OK;
349207753Smm
350207753Smm			if (mf->read_ahead == 0)
351207753Smm				break;
352207753Smm		}
353207753Smm
354207753Smm		// Get optimal match (repeat position and length).
355207753Smm		// Value ranges for pos:
356207753Smm		//   - [0, REP_DISTANCES): repeated match
357207753Smm		//   - [REP_DISTANCES, UINT32_MAX):
358207753Smm		//     match at (pos - REP_DISTANCES)
359207753Smm		//   - UINT32_MAX: not a match but a literal
360207753Smm		// Value ranges for len:
361207753Smm		//   - [MATCH_LEN_MIN, MATCH_LEN_MAX]
362207753Smm		uint32_t len;
363207753Smm		uint32_t back;
364207753Smm
365207753Smm		if (coder->fast_mode)
366207753Smm			lzma_lzma_optimum_fast(coder, mf, &back, &len);
367207753Smm		else
368207753Smm			lzma_lzma_optimum_normal(
369207753Smm					coder, mf, &back, &len, position);
370207753Smm
371207753Smm		encode_symbol(coder, mf, back, len, position);
372207753Smm
373207753Smm		position += len;
374207753Smm	}
375207753Smm
376207753Smm	if (!coder->is_flushed) {
377207753Smm		coder->is_flushed = true;
378207753Smm
379207753Smm		// We don't support encoding plain LZMA streams without EOPM,
380207753Smm		// and LZMA2 doesn't use EOPM at LZMA level.
381207753Smm		if (limit == UINT32_MAX)
382207753Smm			encode_eopm(coder, position);
383207753Smm
384207753Smm		// Flush the remaining bytes from the range encoder.
385207753Smm		rc_flush(&coder->rc);
386207753Smm
387207753Smm		// Copy the remaining bytes to the output buffer. If there
388207753Smm		// isn't enough output space, we will copy out the remaining
389207753Smm		// bytes on the next call to this function by using
390207753Smm		// the rc_encode() call in the encoding loop above.
391207753Smm		if (rc_encode(&coder->rc, out, out_pos, out_size)) {
392207753Smm			assert(limit == UINT32_MAX);
393207753Smm			return LZMA_OK;
394207753Smm		}
395207753Smm	}
396207753Smm
397207753Smm	// Make it ready for the next LZMA2 chunk.
398207753Smm	coder->is_flushed = false;
399207753Smm
400207753Smm	return LZMA_STREAM_END;
401207753Smm}
402207753Smm
403207753Smm
404207753Smmstatic lzma_ret
405207753Smmlzma_encode(lzma_coder *restrict coder, lzma_mf *restrict mf,
406207753Smm		uint8_t *restrict out, size_t *restrict out_pos,
407207753Smm		size_t out_size)
408207753Smm{
409207753Smm	// Plain LZMA has no support for sync-flushing.
410207753Smm	if (unlikely(mf->action == LZMA_SYNC_FLUSH))
411207753Smm		return LZMA_OPTIONS_ERROR;
412207753Smm
413207753Smm	return lzma_lzma_encode(coder, mf, out, out_pos, out_size, UINT32_MAX);
414207753Smm}
415207753Smm
416207753Smm
417207753Smm////////////////////
418207753Smm// Initialization //
419207753Smm////////////////////
420207753Smm
421207753Smmstatic bool
422207753Smmis_options_valid(const lzma_options_lzma *options)
423207753Smm{
424207753Smm	// Validate some of the options. LZ encoder validates nice_len too
425207753Smm	// but we need a valid value here earlier.
426207753Smm	return is_lclppb_valid(options)
427207753Smm			&& options->nice_len >= MATCH_LEN_MIN
428207753Smm			&& options->nice_len <= MATCH_LEN_MAX
429207753Smm			&& (options->mode == LZMA_MODE_FAST
430207753Smm				|| options->mode == LZMA_MODE_NORMAL);
431207753Smm}
432207753Smm
433207753Smm
434207753Smmstatic void
435207753Smmset_lz_options(lzma_lz_options *lz_options, const lzma_options_lzma *options)
436207753Smm{
437207753Smm	// LZ encoder initialization does the validation for these so we
438207753Smm	// don't need to validate here.
439207753Smm	lz_options->before_size = OPTS;
440207753Smm	lz_options->dict_size = options->dict_size;
441207753Smm	lz_options->after_size = LOOP_INPUT_MAX;
442207753Smm	lz_options->match_len_max = MATCH_LEN_MAX;
443207753Smm	lz_options->nice_len = options->nice_len;
444207753Smm	lz_options->match_finder = options->mf;
445207753Smm	lz_options->depth = options->depth;
446207753Smm	lz_options->preset_dict = options->preset_dict;
447207753Smm	lz_options->preset_dict_size = options->preset_dict_size;
448207753Smm	return;
449207753Smm}
450207753Smm
451207753Smm
452207753Smmstatic void
453207753Smmlength_encoder_reset(lzma_length_encoder *lencoder,
454207753Smm		const uint32_t num_pos_states, const bool fast_mode)
455207753Smm{
456207753Smm	bit_reset(lencoder->choice);
457207753Smm	bit_reset(lencoder->choice2);
458207753Smm
459207753Smm	for (size_t pos_state = 0; pos_state < num_pos_states; ++pos_state) {
460207753Smm		bittree_reset(lencoder->low[pos_state], LEN_LOW_BITS);
461207753Smm		bittree_reset(lencoder->mid[pos_state], LEN_MID_BITS);
462207753Smm	}
463207753Smm
464207753Smm	bittree_reset(lencoder->high, LEN_HIGH_BITS);
465207753Smm
466207753Smm	if (!fast_mode)
467207753Smm		for (size_t pos_state = 0; pos_state < num_pos_states;
468207753Smm				++pos_state)
469207753Smm			length_update_prices(lencoder, pos_state);
470207753Smm
471207753Smm	return;
472207753Smm}
473207753Smm
474207753Smm
475207753Smmextern lzma_ret
476207753Smmlzma_lzma_encoder_reset(lzma_coder *coder, const lzma_options_lzma *options)
477207753Smm{
478207753Smm	if (!is_options_valid(options))
479207753Smm		return LZMA_OPTIONS_ERROR;
480207753Smm
481207753Smm	coder->pos_mask = (1U << options->pb) - 1;
482207753Smm	coder->literal_context_bits = options->lc;
483207753Smm	coder->literal_pos_mask = (1U << options->lp) - 1;
484207753Smm
485207753Smm	// Range coder
486207753Smm	rc_reset(&coder->rc);
487207753Smm
488207753Smm	// State
489207753Smm	coder->state = STATE_LIT_LIT;
490207753Smm	for (size_t i = 0; i < REP_DISTANCES; ++i)
491207753Smm		coder->reps[i] = 0;
492207753Smm
493207753Smm	literal_init(coder->literal, options->lc, options->lp);
494207753Smm
495207753Smm	// Bit encoders
496207753Smm	for (size_t i = 0; i < STATES; ++i) {
497207753Smm		for (size_t j = 0; j <= coder->pos_mask; ++j) {
498207753Smm			bit_reset(coder->is_match[i][j]);
499207753Smm			bit_reset(coder->is_rep0_long[i][j]);
500207753Smm		}
501207753Smm
502207753Smm		bit_reset(coder->is_rep[i]);
503207753Smm		bit_reset(coder->is_rep0[i]);
504207753Smm		bit_reset(coder->is_rep1[i]);
505207753Smm		bit_reset(coder->is_rep2[i]);
506207753Smm	}
507207753Smm
508207753Smm	for (size_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i)
509207753Smm		bit_reset(coder->pos_special[i]);
510207753Smm
511207753Smm	// Bit tree encoders
512207753Smm	for (size_t i = 0; i < LEN_TO_POS_STATES; ++i)
513207753Smm		bittree_reset(coder->pos_slot[i], POS_SLOT_BITS);
514207753Smm
515207753Smm	bittree_reset(coder->pos_align, ALIGN_BITS);
516207753Smm
517207753Smm	// Length encoders
518207753Smm	length_encoder_reset(&coder->match_len_encoder,
519207753Smm			1U << options->pb, coder->fast_mode);
520207753Smm
521207753Smm	length_encoder_reset(&coder->rep_len_encoder,
522207753Smm			1U << options->pb, coder->fast_mode);
523207753Smm
524207753Smm	// Price counts are incremented every time appropriate probabilities
525207753Smm	// are changed. price counts are set to zero when the price tables
526207753Smm	// are updated, which is done when the appropriate price counts have
527207753Smm	// big enough value, and lzma_mf.read_ahead == 0 which happens at
528207753Smm	// least every OPTS (a few thousand) possible price count increments.
529207753Smm	//
530207753Smm	// By resetting price counts to UINT32_MAX / 2, we make sure that the
531207753Smm	// price tables will be initialized before they will be used (since
532207753Smm	// the value is definitely big enough), and that it is OK to increment
533207753Smm	// price counts without risk of integer overflow (since UINT32_MAX / 2
534207753Smm	// is small enough). The current code doesn't increment price counts
535207753Smm	// before initializing price tables, but it maybe done in future if
536207753Smm	// we add support for saving the state between LZMA2 chunks.
537207753Smm	coder->match_price_count = UINT32_MAX / 2;
538207753Smm	coder->align_price_count = UINT32_MAX / 2;
539207753Smm
540207753Smm	coder->opts_end_index = 0;
541207753Smm	coder->opts_current_index = 0;
542207753Smm
543207753Smm	return LZMA_OK;
544207753Smm}
545207753Smm
546207753Smm
547207753Smmextern lzma_ret
548207753Smmlzma_lzma_encoder_create(lzma_coder **coder_ptr, lzma_allocator *allocator,
549207753Smm		const lzma_options_lzma *options, lzma_lz_options *lz_options)
550207753Smm{
551207753Smm	// Allocate lzma_coder if it wasn't already allocated.
552207753Smm	if (*coder_ptr == NULL) {
553207753Smm		*coder_ptr = lzma_alloc(sizeof(lzma_coder), allocator);
554207753Smm		if (*coder_ptr == NULL)
555207753Smm			return LZMA_MEM_ERROR;
556207753Smm	}
557207753Smm
558207753Smm	lzma_coder *coder = *coder_ptr;
559207753Smm
560207753Smm	// Set compression mode. We haven't validates the options yet,
561207753Smm	// but it's OK here, since nothing bad happens with invalid
562207753Smm	// options in the code below, and they will get rejected by
563207753Smm	// lzma_lzma_encoder_reset() call at the end of this function.
564207753Smm	switch (options->mode) {
565207753Smm		case LZMA_MODE_FAST:
566207753Smm			coder->fast_mode = true;
567207753Smm			break;
568207753Smm
569207753Smm		case LZMA_MODE_NORMAL: {
570207753Smm			coder->fast_mode = false;
571207753Smm
572207753Smm			// Set dist_table_size.
573207753Smm			// Round the dictionary size up to next 2^n.
574207753Smm			uint32_t log_size = 0;
575207753Smm			while ((UINT32_C(1) << log_size) < options->dict_size)
576207753Smm				++log_size;
577207753Smm
578207753Smm			coder->dist_table_size = log_size * 2;
579207753Smm
580207753Smm			// Length encoders' price table size
581207753Smm			coder->match_len_encoder.table_size
582207753Smm				= options->nice_len + 1 - MATCH_LEN_MIN;
583207753Smm			coder->rep_len_encoder.table_size
584207753Smm				= options->nice_len + 1 - MATCH_LEN_MIN;
585207753Smm			break;
586207753Smm		}
587207753Smm
588207753Smm		default:
589207753Smm			return LZMA_OPTIONS_ERROR;
590207753Smm	}
591207753Smm
592207753Smm	// We don't need to write the first byte as literal if there is
593207753Smm	// a non-empty preset dictionary. encode_init() wouldn't even work
594207753Smm	// if there is a non-empty preset dictionary, because encode_init()
595207753Smm	// assumes that position is zero and previous byte is also zero.
596207753Smm	coder->is_initialized = options->preset_dict != NULL
597207753Smm			&& options->preset_dict_size > 0;
598207753Smm	coder->is_flushed = false;
599207753Smm
600207753Smm	set_lz_options(lz_options, options);
601207753Smm
602207753Smm	return lzma_lzma_encoder_reset(coder, options);
603207753Smm}
604207753Smm
605207753Smm
606207753Smmstatic lzma_ret
607207753Smmlzma_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator,
608207753Smm		const void *options, lzma_lz_options *lz_options)
609207753Smm{
610207753Smm	lz->code = &lzma_encode;
611207753Smm	return lzma_lzma_encoder_create(
612207753Smm			&lz->coder, allocator, options, lz_options);
613207753Smm}
614207753Smm
615207753Smm
616207753Smmextern lzma_ret
617207753Smmlzma_lzma_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
618207753Smm		const lzma_filter_info *filters)
619207753Smm{
620207753Smm	return lzma_lz_encoder_init(
621207753Smm			next, allocator, filters, &lzma_encoder_init);
622207753Smm}
623207753Smm
624207753Smm
625207753Smmextern uint64_t
626207753Smmlzma_lzma_encoder_memusage(const void *options)
627207753Smm{
628207753Smm	if (!is_options_valid(options))
629207753Smm		return UINT64_MAX;
630207753Smm
631207753Smm	lzma_lz_options lz_options;
632207753Smm	set_lz_options(&lz_options, options);
633207753Smm
634207753Smm	const uint64_t lz_memusage = lzma_lz_encoder_memusage(&lz_options);
635207753Smm	if (lz_memusage == UINT64_MAX)
636207753Smm		return UINT64_MAX;
637207753Smm
638207753Smm	return (uint64_t)(sizeof(lzma_coder)) + lz_memusage;
639207753Smm}
640207753Smm
641207753Smm
642207753Smmextern bool
643207753Smmlzma_lzma_lclppb_encode(const lzma_options_lzma *options, uint8_t *byte)
644207753Smm{
645207753Smm	if (!is_lclppb_valid(options))
646207753Smm		return true;
647207753Smm
648207753Smm	*byte = (options->pb * 5 + options->lp) * 9 + options->lc;
649207753Smm	assert(*byte <= (4 * 5 + 4) * 9 + 8);
650207753Smm
651207753Smm	return false;
652207753Smm}
653207753Smm
654207753Smm
655207753Smm#ifdef HAVE_ENCODER_LZMA1
656207753Smmextern lzma_ret
657207753Smmlzma_lzma_props_encode(const void *options, uint8_t *out)
658207753Smm{
659207753Smm	const lzma_options_lzma *const opt = options;
660207753Smm
661207753Smm	if (lzma_lzma_lclppb_encode(opt, out))
662207753Smm		return LZMA_PROG_ERROR;
663207753Smm
664207753Smm	unaligned_write32le(out + 1, opt->dict_size);
665207753Smm
666207753Smm	return LZMA_OK;
667207753Smm}
668207753Smm#endif
669207753Smm
670207753Smm
671207753Smmextern LZMA_API(lzma_bool)
672207753Smmlzma_mode_is_supported(lzma_mode mode)
673207753Smm{
674207753Smm	return mode == LZMA_MODE_FAST || mode == LZMA_MODE_NORMAL;
675207753Smm}
676