1207753Smm///////////////////////////////////////////////////////////////////////////////
2207753Smm//
3207753Smm/// \file       lzma_encoder_optimum_normal.c
4207753Smm//
5207753Smm//  Author:     Igor Pavlov
6207753Smm//
7207753Smm//  This file has been put into the public domain.
8207753Smm//  You can do whatever you want with this file.
9207753Smm//
10207753Smm///////////////////////////////////////////////////////////////////////////////
11207753Smm
12207753Smm#include "lzma_encoder_private.h"
13207753Smm#include "fastpos.h"
14207753Smm
15207753Smm
16207753Smm////////////
17207753Smm// Prices //
18207753Smm////////////
19207753Smm
20207753Smmstatic uint32_t
21207753Smmget_literal_price(const lzma_coder *const coder, const uint32_t pos,
22207753Smm		const uint32_t prev_byte, const bool match_mode,
23207753Smm		uint32_t match_byte, uint32_t symbol)
24207753Smm{
25207753Smm	const probability *const subcoder = literal_subcoder(coder->literal,
26207753Smm			coder->literal_context_bits, coder->literal_pos_mask,
27207753Smm			pos, prev_byte);
28207753Smm
29207753Smm	uint32_t price = 0;
30207753Smm
31207753Smm	if (!match_mode) {
32207753Smm		price = rc_bittree_price(subcoder, 8, symbol);
33207753Smm	} else {
34207753Smm		uint32_t offset = 0x100;
35207753Smm		symbol += UINT32_C(1) << 8;
36207753Smm
37207753Smm		do {
38207753Smm			match_byte <<= 1;
39207753Smm
40207753Smm			const uint32_t match_bit = match_byte & offset;
41207753Smm			const uint32_t subcoder_index
42207753Smm					= offset + match_bit + (symbol >> 8);
43207753Smm			const uint32_t bit = (symbol >> 7) & 1;
44207753Smm			price += rc_bit_price(subcoder[subcoder_index], bit);
45207753Smm
46207753Smm			symbol <<= 1;
47207753Smm			offset &= ~(match_byte ^ symbol);
48207753Smm
49207753Smm		} while (symbol < (UINT32_C(1) << 16));
50207753Smm	}
51207753Smm
52207753Smm	return price;
53207753Smm}
54207753Smm
55207753Smm
56207753Smmstatic inline uint32_t
57207753Smmget_len_price(const lzma_length_encoder *const lencoder,
58207753Smm		const uint32_t len, const uint32_t pos_state)
59207753Smm{
60207753Smm	// NOTE: Unlike the other price tables, length prices are updated
61207753Smm	// in lzma_encoder.c
62207753Smm	return lencoder->prices[pos_state][len - MATCH_LEN_MIN];
63207753Smm}
64207753Smm
65207753Smm
66207753Smmstatic inline uint32_t
67207753Smmget_short_rep_price(const lzma_coder *const coder,
68207753Smm		const lzma_lzma_state state, const uint32_t pos_state)
69207753Smm{
70207753Smm	return rc_bit_0_price(coder->is_rep0[state])
71207753Smm		+ rc_bit_0_price(coder->is_rep0_long[state][pos_state]);
72207753Smm}
73207753Smm
74207753Smm
75207753Smmstatic inline uint32_t
76207753Smmget_pure_rep_price(const lzma_coder *const coder, const uint32_t rep_index,
77207753Smm		const lzma_lzma_state state, uint32_t pos_state)
78207753Smm{
79207753Smm	uint32_t price;
80207753Smm
81207753Smm	if (rep_index == 0) {
82207753Smm		price = rc_bit_0_price(coder->is_rep0[state]);
83207753Smm		price += rc_bit_1_price(coder->is_rep0_long[state][pos_state]);
84207753Smm	} else {
85207753Smm		price = rc_bit_1_price(coder->is_rep0[state]);
86207753Smm
87207753Smm		if (rep_index == 1) {
88207753Smm			price += rc_bit_0_price(coder->is_rep1[state]);
89207753Smm		} else {
90207753Smm			price += rc_bit_1_price(coder->is_rep1[state]);
91207753Smm			price += rc_bit_price(coder->is_rep2[state],
92207753Smm					rep_index - 2);
93207753Smm		}
94207753Smm	}
95207753Smm
96207753Smm	return price;
97207753Smm}
98207753Smm
99207753Smm
100207753Smmstatic inline uint32_t
101207753Smmget_rep_price(const lzma_coder *const coder, const uint32_t rep_index,
102207753Smm		const uint32_t len, const lzma_lzma_state state,
103207753Smm		const uint32_t pos_state)
104207753Smm{
105207753Smm	return get_len_price(&coder->rep_len_encoder, len, pos_state)
106207753Smm		+ get_pure_rep_price(coder, rep_index, state, pos_state);
107207753Smm}
108207753Smm
109207753Smm
110207753Smmstatic inline uint32_t
111207753Smmget_pos_len_price(const lzma_coder *const coder, const uint32_t pos,
112207753Smm		const uint32_t len, const uint32_t pos_state)
113207753Smm{
114207753Smm	const uint32_t len_to_pos_state = get_len_to_pos_state(len);
115207753Smm	uint32_t price;
116207753Smm
117207753Smm	if (pos < FULL_DISTANCES) {
118207753Smm		price = coder->distances_prices[len_to_pos_state][pos];
119207753Smm	} else {
120207753Smm		const uint32_t pos_slot = get_pos_slot_2(pos);
121207753Smm		price = coder->pos_slot_prices[len_to_pos_state][pos_slot]
122207753Smm				+ coder->align_prices[pos & ALIGN_MASK];
123207753Smm	}
124207753Smm
125207753Smm	price += get_len_price(&coder->match_len_encoder, len, pos_state);
126207753Smm
127207753Smm	return price;
128207753Smm}
129207753Smm
130207753Smm
131207753Smmstatic void
132207753Smmfill_distances_prices(lzma_coder *coder)
133207753Smm{
134207753Smm	for (uint32_t len_to_pos_state = 0;
135207753Smm			len_to_pos_state < LEN_TO_POS_STATES;
136207753Smm			++len_to_pos_state) {
137207753Smm
138207753Smm		uint32_t *const pos_slot_prices
139207753Smm				= coder->pos_slot_prices[len_to_pos_state];
140207753Smm
141207753Smm		// Price to encode the pos_slot.
142207753Smm		for (uint32_t pos_slot = 0;
143207753Smm				pos_slot < coder->dist_table_size; ++pos_slot)
144207753Smm			pos_slot_prices[pos_slot] = rc_bittree_price(
145207753Smm					coder->pos_slot[len_to_pos_state],
146207753Smm					POS_SLOT_BITS, pos_slot);
147207753Smm
148207753Smm		// For matches with distance >= FULL_DISTANCES, add the price
149207753Smm		// of the direct bits part of the match distance. (Align bits
150207753Smm		// are handled by fill_align_prices()).
151207753Smm		for (uint32_t pos_slot = END_POS_MODEL_INDEX;
152207753Smm				pos_slot < coder->dist_table_size; ++pos_slot)
153207753Smm			pos_slot_prices[pos_slot] += rc_direct_price(
154207753Smm					((pos_slot >> 1) - 1) - ALIGN_BITS);
155207753Smm
156207753Smm		// Distances in the range [0, 3] are fully encoded with
157207753Smm		// pos_slot, so they are used for coder->distances_prices
158207753Smm		// as is.
159207753Smm		for (uint32_t i = 0; i < START_POS_MODEL_INDEX; ++i)
160207753Smm			coder->distances_prices[len_to_pos_state][i]
161207753Smm					= pos_slot_prices[i];
162207753Smm	}
163207753Smm
164207753Smm	// Distances in the range [4, 127] depend on pos_slot and pos_special.
165207753Smm	// We do this in a loop separate from the above loop to avoid
166207753Smm	// redundant calls to get_pos_slot().
167207753Smm	for (uint32_t i = START_POS_MODEL_INDEX; i < FULL_DISTANCES; ++i) {
168207753Smm		const uint32_t pos_slot = get_pos_slot(i);
169207753Smm		const uint32_t footer_bits = ((pos_slot >> 1) - 1);
170207753Smm		const uint32_t base = (2 | (pos_slot & 1)) << footer_bits;
171207753Smm		const uint32_t price = rc_bittree_reverse_price(
172207753Smm				coder->pos_special + base - pos_slot - 1,
173207753Smm				footer_bits, i - base);
174207753Smm
175207753Smm		for (uint32_t len_to_pos_state = 0;
176207753Smm				len_to_pos_state < LEN_TO_POS_STATES;
177207753Smm				++len_to_pos_state)
178207753Smm			coder->distances_prices[len_to_pos_state][i]
179207753Smm					= price + coder->pos_slot_prices[
180207753Smm						len_to_pos_state][pos_slot];
181207753Smm	}
182207753Smm
183207753Smm	coder->match_price_count = 0;
184207753Smm	return;
185207753Smm}
186207753Smm
187207753Smm
188207753Smmstatic void
189207753Smmfill_align_prices(lzma_coder *coder)
190207753Smm{
191207753Smm	for (uint32_t i = 0; i < ALIGN_TABLE_SIZE; ++i)
192207753Smm		coder->align_prices[i] = rc_bittree_reverse_price(
193207753Smm				coder->pos_align, ALIGN_BITS, i);
194207753Smm
195207753Smm	coder->align_price_count = 0;
196207753Smm	return;
197207753Smm}
198207753Smm
199207753Smm
200207753Smm/////////////
201207753Smm// Optimal //
202207753Smm/////////////
203207753Smm
204207753Smmstatic inline void
205207753Smmmake_literal(lzma_optimal *optimal)
206207753Smm{
207207753Smm	optimal->back_prev = UINT32_MAX;
208207753Smm	optimal->prev_1_is_literal = false;
209207753Smm}
210207753Smm
211207753Smm
212207753Smmstatic inline void
213207753Smmmake_short_rep(lzma_optimal *optimal)
214207753Smm{
215207753Smm	optimal->back_prev = 0;
216207753Smm	optimal->prev_1_is_literal = false;
217207753Smm}
218207753Smm
219207753Smm
220207753Smm#define is_short_rep(optimal) \
221207753Smm	((optimal).back_prev == 0)
222207753Smm
223207753Smm
224207753Smmstatic void
225207753Smmbackward(lzma_coder *restrict coder, uint32_t *restrict len_res,
226207753Smm		uint32_t *restrict back_res, uint32_t cur)
227207753Smm{
228207753Smm	coder->opts_end_index = cur;
229207753Smm
230207753Smm	uint32_t pos_mem = coder->opts[cur].pos_prev;
231207753Smm	uint32_t back_mem = coder->opts[cur].back_prev;
232207753Smm
233207753Smm	do {
234207753Smm		if (coder->opts[cur].prev_1_is_literal) {
235207753Smm			make_literal(&coder->opts[pos_mem]);
236207753Smm			coder->opts[pos_mem].pos_prev = pos_mem - 1;
237207753Smm
238207753Smm			if (coder->opts[cur].prev_2) {
239207753Smm				coder->opts[pos_mem - 1].prev_1_is_literal
240207753Smm						= false;
241207753Smm				coder->opts[pos_mem - 1].pos_prev
242207753Smm						= coder->opts[cur].pos_prev_2;
243207753Smm				coder->opts[pos_mem - 1].back_prev
244207753Smm						= coder->opts[cur].back_prev_2;
245207753Smm			}
246207753Smm		}
247207753Smm
248207753Smm		const uint32_t pos_prev = pos_mem;
249207753Smm		const uint32_t back_cur = back_mem;
250207753Smm
251207753Smm		back_mem = coder->opts[pos_prev].back_prev;
252207753Smm		pos_mem = coder->opts[pos_prev].pos_prev;
253207753Smm
254207753Smm		coder->opts[pos_prev].back_prev = back_cur;
255207753Smm		coder->opts[pos_prev].pos_prev = cur;
256207753Smm		cur = pos_prev;
257207753Smm
258207753Smm	} while (cur != 0);
259207753Smm
260207753Smm	coder->opts_current_index = coder->opts[0].pos_prev;
261207753Smm	*len_res = coder->opts[0].pos_prev;
262207753Smm	*back_res = coder->opts[0].back_prev;
263207753Smm
264207753Smm	return;
265207753Smm}
266207753Smm
267207753Smm
268207753Smm//////////
269207753Smm// Main //
270207753Smm//////////
271207753Smm
272207753Smmstatic inline uint32_t
273207753Smmhelper1(lzma_coder *restrict coder, lzma_mf *restrict mf,
274207753Smm		uint32_t *restrict back_res, uint32_t *restrict len_res,
275207753Smm		uint32_t position)
276207753Smm{
277207753Smm	const uint32_t nice_len = mf->nice_len;
278207753Smm
279207753Smm	uint32_t len_main;
280207753Smm	uint32_t matches_count;
281207753Smm
282207753Smm	if (mf->read_ahead == 0) {
283207753Smm		len_main = mf_find(mf, &matches_count, coder->matches);
284207753Smm	} else {
285207753Smm		assert(mf->read_ahead == 1);
286207753Smm		len_main = coder->longest_match_length;
287207753Smm		matches_count = coder->matches_count;
288207753Smm	}
289207753Smm
290213700Smm	const uint32_t buf_avail = my_min(mf_avail(mf) + 1, MATCH_LEN_MAX);
291207753Smm	if (buf_avail < 2) {
292207753Smm		*back_res = UINT32_MAX;
293207753Smm		*len_res = 1;
294207753Smm		return UINT32_MAX;
295207753Smm	}
296207753Smm
297207753Smm	const uint8_t *const buf = mf_ptr(mf) - 1;
298207753Smm
299207753Smm	uint32_t rep_lens[REP_DISTANCES];
300207753Smm	uint32_t rep_max_index = 0;
301207753Smm
302207753Smm	for (uint32_t i = 0; i < REP_DISTANCES; ++i) {
303207753Smm		const uint8_t *const buf_back = buf - coder->reps[i] - 1;
304207753Smm
305207753Smm		if (not_equal_16(buf, buf_back)) {
306207753Smm			rep_lens[i] = 0;
307207753Smm			continue;
308207753Smm		}
309207753Smm
310207753Smm		uint32_t len_test;
311207753Smm		for (len_test = 2; len_test < buf_avail
312207753Smm				&& buf[len_test] == buf_back[len_test];
313207753Smm				++len_test) ;
314207753Smm
315207753Smm		rep_lens[i] = len_test;
316207753Smm		if (len_test > rep_lens[rep_max_index])
317207753Smm			rep_max_index = i;
318207753Smm	}
319207753Smm
320207753Smm	if (rep_lens[rep_max_index] >= nice_len) {
321207753Smm		*back_res = rep_max_index;
322207753Smm		*len_res = rep_lens[rep_max_index];
323207753Smm		mf_skip(mf, *len_res - 1);
324207753Smm		return UINT32_MAX;
325207753Smm	}
326207753Smm
327207753Smm
328207753Smm	if (len_main >= nice_len) {
329207753Smm		*back_res = coder->matches[matches_count - 1].dist
330207753Smm				+ REP_DISTANCES;
331207753Smm		*len_res = len_main;
332207753Smm		mf_skip(mf, len_main - 1);
333207753Smm		return UINT32_MAX;
334207753Smm	}
335207753Smm
336207753Smm	const uint8_t current_byte = *buf;
337207753Smm	const uint8_t match_byte = *(buf - coder->reps[0] - 1);
338207753Smm
339207753Smm	if (len_main < 2 && current_byte != match_byte
340207753Smm			&& rep_lens[rep_max_index] < 2) {
341207753Smm		*back_res = UINT32_MAX;
342207753Smm		*len_res = 1;
343207753Smm		return UINT32_MAX;
344207753Smm	}
345207753Smm
346207753Smm	coder->opts[0].state = coder->state;
347207753Smm
348207753Smm	const uint32_t pos_state = position & coder->pos_mask;
349207753Smm
350207753Smm	coder->opts[1].price = rc_bit_0_price(
351207753Smm				coder->is_match[coder->state][pos_state])
352207753Smm			+ get_literal_price(coder, position, buf[-1],
353207753Smm				!is_literal_state(coder->state),
354207753Smm				match_byte, current_byte);
355207753Smm
356207753Smm	make_literal(&coder->opts[1]);
357207753Smm
358207753Smm	const uint32_t match_price = rc_bit_1_price(
359207753Smm			coder->is_match[coder->state][pos_state]);
360207753Smm	const uint32_t rep_match_price = match_price
361207753Smm			+ rc_bit_1_price(coder->is_rep[coder->state]);
362207753Smm
363207753Smm	if (match_byte == current_byte) {
364207753Smm		const uint32_t short_rep_price = rep_match_price
365207753Smm				+ get_short_rep_price(
366207753Smm					coder, coder->state, pos_state);
367207753Smm
368207753Smm		if (short_rep_price < coder->opts[1].price) {
369207753Smm			coder->opts[1].price = short_rep_price;
370207753Smm			make_short_rep(&coder->opts[1]);
371207753Smm		}
372207753Smm	}
373207753Smm
374213700Smm	const uint32_t len_end = my_max(len_main, rep_lens[rep_max_index]);
375207753Smm
376207753Smm	if (len_end < 2) {
377207753Smm		*back_res = coder->opts[1].back_prev;
378207753Smm		*len_res = 1;
379207753Smm		return UINT32_MAX;
380207753Smm	}
381207753Smm
382207753Smm	coder->opts[1].pos_prev = 0;
383207753Smm
384207753Smm	for (uint32_t i = 0; i < REP_DISTANCES; ++i)
385207753Smm		coder->opts[0].backs[i] = coder->reps[i];
386207753Smm
387207753Smm	uint32_t len = len_end;
388207753Smm	do {
389207753Smm		coder->opts[len].price = RC_INFINITY_PRICE;
390207753Smm	} while (--len >= 2);
391207753Smm
392207753Smm
393207753Smm	for (uint32_t i = 0; i < REP_DISTANCES; ++i) {
394207753Smm		uint32_t rep_len = rep_lens[i];
395207753Smm		if (rep_len < 2)
396207753Smm			continue;
397207753Smm
398207753Smm		const uint32_t price = rep_match_price + get_pure_rep_price(
399207753Smm				coder, i, coder->state, pos_state);
400207753Smm
401207753Smm		do {
402207753Smm			const uint32_t cur_and_len_price = price
403207753Smm					+ get_len_price(
404207753Smm						&coder->rep_len_encoder,
405207753Smm						rep_len, pos_state);
406207753Smm
407207753Smm			if (cur_and_len_price < coder->opts[rep_len].price) {
408207753Smm				coder->opts[rep_len].price = cur_and_len_price;
409207753Smm				coder->opts[rep_len].pos_prev = 0;
410207753Smm				coder->opts[rep_len].back_prev = i;
411207753Smm				coder->opts[rep_len].prev_1_is_literal = false;
412207753Smm			}
413207753Smm		} while (--rep_len >= 2);
414207753Smm	}
415207753Smm
416207753Smm
417207753Smm	const uint32_t normal_match_price = match_price
418207753Smm			+ rc_bit_0_price(coder->is_rep[coder->state]);
419207753Smm
420207753Smm	len = rep_lens[0] >= 2 ? rep_lens[0] + 1 : 2;
421207753Smm	if (len <= len_main) {
422207753Smm		uint32_t i = 0;
423207753Smm		while (len > coder->matches[i].len)
424207753Smm			++i;
425207753Smm
426207753Smm		for(; ; ++len) {
427207753Smm			const uint32_t dist = coder->matches[i].dist;
428207753Smm			const uint32_t cur_and_len_price = normal_match_price
429207753Smm					+ get_pos_len_price(coder,
430207753Smm						dist, len, pos_state);
431207753Smm
432207753Smm			if (cur_and_len_price < coder->opts[len].price) {
433207753Smm				coder->opts[len].price = cur_and_len_price;
434207753Smm				coder->opts[len].pos_prev = 0;
435207753Smm				coder->opts[len].back_prev
436207753Smm						= dist + REP_DISTANCES;
437207753Smm				coder->opts[len].prev_1_is_literal = false;
438207753Smm			}
439207753Smm
440207753Smm			if (len == coder->matches[i].len)
441207753Smm				if (++i == matches_count)
442207753Smm					break;
443207753Smm		}
444207753Smm	}
445207753Smm
446207753Smm	return len_end;
447207753Smm}
448207753Smm
449207753Smm
450207753Smmstatic inline uint32_t
451207753Smmhelper2(lzma_coder *coder, uint32_t *reps, const uint8_t *buf,
452207753Smm		uint32_t len_end, uint32_t position, const uint32_t cur,
453207753Smm		const uint32_t nice_len, const uint32_t buf_avail_full)
454207753Smm{
455207753Smm	uint32_t matches_count = coder->matches_count;
456207753Smm	uint32_t new_len = coder->longest_match_length;
457207753Smm	uint32_t pos_prev = coder->opts[cur].pos_prev;
458207753Smm	lzma_lzma_state state;
459207753Smm
460207753Smm	if (coder->opts[cur].prev_1_is_literal) {
461207753Smm		--pos_prev;
462207753Smm
463207753Smm		if (coder->opts[cur].prev_2) {
464207753Smm			state = coder->opts[coder->opts[cur].pos_prev_2].state;
465207753Smm
466207753Smm			if (coder->opts[cur].back_prev_2 < REP_DISTANCES)
467207753Smm				update_long_rep(state);
468207753Smm			else
469207753Smm				update_match(state);
470207753Smm
471207753Smm		} else {
472207753Smm			state = coder->opts[pos_prev].state;
473207753Smm		}
474207753Smm
475207753Smm		update_literal(state);
476207753Smm
477207753Smm	} else {
478207753Smm		state = coder->opts[pos_prev].state;
479207753Smm	}
480207753Smm
481207753Smm	if (pos_prev == cur - 1) {
482207753Smm		if (is_short_rep(coder->opts[cur]))
483207753Smm			update_short_rep(state);
484207753Smm		else
485207753Smm			update_literal(state);
486207753Smm	} else {
487207753Smm		uint32_t pos;
488207753Smm		if (coder->opts[cur].prev_1_is_literal
489207753Smm				&& coder->opts[cur].prev_2) {
490207753Smm			pos_prev = coder->opts[cur].pos_prev_2;
491207753Smm			pos = coder->opts[cur].back_prev_2;
492207753Smm			update_long_rep(state);
493207753Smm		} else {
494207753Smm			pos = coder->opts[cur].back_prev;
495207753Smm			if (pos < REP_DISTANCES)
496207753Smm				update_long_rep(state);
497207753Smm			else
498207753Smm				update_match(state);
499207753Smm		}
500207753Smm
501207753Smm		if (pos < REP_DISTANCES) {
502207753Smm			reps[0] = coder->opts[pos_prev].backs[pos];
503207753Smm
504207753Smm			uint32_t i;
505207753Smm			for (i = 1; i <= pos; ++i)
506207753Smm				reps[i] = coder->opts[pos_prev].backs[i - 1];
507207753Smm
508207753Smm			for (; i < REP_DISTANCES; ++i)
509207753Smm				reps[i] = coder->opts[pos_prev].backs[i];
510207753Smm
511207753Smm		} else {
512207753Smm			reps[0] = pos - REP_DISTANCES;
513207753Smm
514207753Smm			for (uint32_t i = 1; i < REP_DISTANCES; ++i)
515207753Smm				reps[i] = coder->opts[pos_prev].backs[i - 1];
516207753Smm		}
517207753Smm	}
518207753Smm
519207753Smm	coder->opts[cur].state = state;
520207753Smm
521207753Smm	for (uint32_t i = 0; i < REP_DISTANCES; ++i)
522207753Smm		coder->opts[cur].backs[i] = reps[i];
523207753Smm
524207753Smm	const uint32_t cur_price = coder->opts[cur].price;
525207753Smm
526207753Smm	const uint8_t current_byte = *buf;
527207753Smm	const uint8_t match_byte = *(buf - reps[0] - 1);
528207753Smm
529207753Smm	const uint32_t pos_state = position & coder->pos_mask;
530207753Smm
531207753Smm	const uint32_t cur_and_1_price = cur_price
532207753Smm			+ rc_bit_0_price(coder->is_match[state][pos_state])
533207753Smm			+ get_literal_price(coder, position, buf[-1],
534207753Smm			!is_literal_state(state), match_byte, current_byte);
535207753Smm
536207753Smm	bool next_is_literal = false;
537207753Smm
538207753Smm	if (cur_and_1_price < coder->opts[cur + 1].price) {
539207753Smm		coder->opts[cur + 1].price = cur_and_1_price;
540207753Smm		coder->opts[cur + 1].pos_prev = cur;
541207753Smm		make_literal(&coder->opts[cur + 1]);
542207753Smm		next_is_literal = true;
543207753Smm	}
544207753Smm
545207753Smm	const uint32_t match_price = cur_price
546207753Smm			+ rc_bit_1_price(coder->is_match[state][pos_state]);
547207753Smm	const uint32_t rep_match_price = match_price
548207753Smm			+ rc_bit_1_price(coder->is_rep[state]);
549207753Smm
550207753Smm	if (match_byte == current_byte
551207753Smm			&& !(coder->opts[cur + 1].pos_prev < cur
552207753Smm				&& coder->opts[cur + 1].back_prev == 0)) {
553207753Smm
554207753Smm		const uint32_t short_rep_price = rep_match_price
555207753Smm				+ get_short_rep_price(coder, state, pos_state);
556207753Smm
557207753Smm		if (short_rep_price <= coder->opts[cur + 1].price) {
558207753Smm			coder->opts[cur + 1].price = short_rep_price;
559207753Smm			coder->opts[cur + 1].pos_prev = cur;
560207753Smm			make_short_rep(&coder->opts[cur + 1]);
561207753Smm			next_is_literal = true;
562207753Smm		}
563207753Smm	}
564207753Smm
565207753Smm	if (buf_avail_full < 2)
566207753Smm		return len_end;
567207753Smm
568213700Smm	const uint32_t buf_avail = my_min(buf_avail_full, nice_len);
569207753Smm
570207753Smm	if (!next_is_literal && match_byte != current_byte) { // speed optimization
571207753Smm		// try literal + rep0
572207753Smm		const uint8_t *const buf_back = buf - reps[0] - 1;
573213700Smm		const uint32_t limit = my_min(buf_avail_full, nice_len + 1);
574207753Smm
575207753Smm		uint32_t len_test = 1;
576207753Smm		while (len_test < limit && buf[len_test] == buf_back[len_test])
577207753Smm			++len_test;
578207753Smm
579207753Smm		--len_test;
580207753Smm
581207753Smm		if (len_test >= 2) {
582207753Smm			lzma_lzma_state state_2 = state;
583207753Smm			update_literal(state_2);
584207753Smm
585207753Smm			const uint32_t pos_state_next = (position + 1) & coder->pos_mask;
586207753Smm			const uint32_t next_rep_match_price = cur_and_1_price
587207753Smm					+ rc_bit_1_price(coder->is_match[state_2][pos_state_next])
588207753Smm					+ rc_bit_1_price(coder->is_rep[state_2]);
589207753Smm
590207753Smm			//for (; len_test >= 2; --len_test) {
591207753Smm			const uint32_t offset = cur + 1 + len_test;
592207753Smm
593207753Smm			while (len_end < offset)
594207753Smm				coder->opts[++len_end].price = RC_INFINITY_PRICE;
595207753Smm
596207753Smm			const uint32_t cur_and_len_price = next_rep_match_price
597207753Smm					+ get_rep_price(coder, 0, len_test,
598207753Smm						state_2, pos_state_next);
599207753Smm
600207753Smm			if (cur_and_len_price < coder->opts[offset].price) {
601207753Smm				coder->opts[offset].price = cur_and_len_price;
602207753Smm				coder->opts[offset].pos_prev = cur + 1;
603207753Smm				coder->opts[offset].back_prev = 0;
604207753Smm				coder->opts[offset].prev_1_is_literal = true;
605207753Smm				coder->opts[offset].prev_2 = false;
606207753Smm			}
607207753Smm			//}
608207753Smm		}
609207753Smm	}
610207753Smm
611207753Smm
612207753Smm	uint32_t start_len = 2; // speed optimization
613207753Smm
614207753Smm	for (uint32_t rep_index = 0; rep_index < REP_DISTANCES; ++rep_index) {
615207753Smm		const uint8_t *const buf_back = buf - reps[rep_index] - 1;
616207753Smm		if (not_equal_16(buf, buf_back))
617207753Smm			continue;
618207753Smm
619207753Smm		uint32_t len_test;
620207753Smm		for (len_test = 2; len_test < buf_avail
621207753Smm				&& buf[len_test] == buf_back[len_test];
622207753Smm				++len_test) ;
623207753Smm
624207753Smm		while (len_end < cur + len_test)
625207753Smm			coder->opts[++len_end].price = RC_INFINITY_PRICE;
626207753Smm
627207753Smm		const uint32_t len_test_temp = len_test;
628207753Smm		const uint32_t price = rep_match_price + get_pure_rep_price(
629207753Smm				coder, rep_index, state, pos_state);
630207753Smm
631207753Smm		do {
632207753Smm			const uint32_t cur_and_len_price = price
633207753Smm					+ get_len_price(&coder->rep_len_encoder,
634207753Smm							len_test, pos_state);
635207753Smm
636207753Smm			if (cur_and_len_price < coder->opts[cur + len_test].price) {
637207753Smm				coder->opts[cur + len_test].price = cur_and_len_price;
638207753Smm				coder->opts[cur + len_test].pos_prev = cur;
639207753Smm				coder->opts[cur + len_test].back_prev = rep_index;
640207753Smm				coder->opts[cur + len_test].prev_1_is_literal = false;
641207753Smm			}
642207753Smm		} while (--len_test >= 2);
643207753Smm
644207753Smm		len_test = len_test_temp;
645207753Smm
646207753Smm		if (rep_index == 0)
647207753Smm			start_len = len_test + 1;
648207753Smm
649207753Smm
650207753Smm		uint32_t len_test_2 = len_test + 1;
651213700Smm		const uint32_t limit = my_min(buf_avail_full,
652207753Smm				len_test_2 + nice_len);
653207753Smm		for (; len_test_2 < limit
654207753Smm				&& buf[len_test_2] == buf_back[len_test_2];
655207753Smm				++len_test_2) ;
656207753Smm
657207753Smm		len_test_2 -= len_test + 1;
658207753Smm
659207753Smm		if (len_test_2 >= 2) {
660207753Smm			lzma_lzma_state state_2 = state;
661207753Smm			update_long_rep(state_2);
662207753Smm
663207753Smm			uint32_t pos_state_next = (position + len_test) & coder->pos_mask;
664207753Smm
665207753Smm			const uint32_t cur_and_len_literal_price = price
666207753Smm					+ get_len_price(&coder->rep_len_encoder,
667207753Smm						len_test, pos_state)
668207753Smm					+ rc_bit_0_price(coder->is_match[state_2][pos_state_next])
669207753Smm					+ get_literal_price(coder, position + len_test,
670207753Smm						buf[len_test - 1], true,
671207753Smm						buf_back[len_test], buf[len_test]);
672207753Smm
673207753Smm			update_literal(state_2);
674207753Smm
675207753Smm			pos_state_next = (position + len_test + 1) & coder->pos_mask;
676207753Smm
677207753Smm			const uint32_t next_rep_match_price = cur_and_len_literal_price
678207753Smm					+ rc_bit_1_price(coder->is_match[state_2][pos_state_next])
679207753Smm					+ rc_bit_1_price(coder->is_rep[state_2]);
680207753Smm
681207753Smm			//for(; len_test_2 >= 2; len_test_2--) {
682207753Smm			const uint32_t offset = cur + len_test + 1 + len_test_2;
683207753Smm
684207753Smm			while (len_end < offset)
685207753Smm				coder->opts[++len_end].price = RC_INFINITY_PRICE;
686207753Smm
687207753Smm			const uint32_t cur_and_len_price = next_rep_match_price
688207753Smm					+ get_rep_price(coder, 0, len_test_2,
689207753Smm						state_2, pos_state_next);
690207753Smm
691207753Smm			if (cur_and_len_price < coder->opts[offset].price) {
692207753Smm				coder->opts[offset].price = cur_and_len_price;
693207753Smm				coder->opts[offset].pos_prev = cur + len_test + 1;
694207753Smm				coder->opts[offset].back_prev = 0;
695207753Smm				coder->opts[offset].prev_1_is_literal = true;
696207753Smm				coder->opts[offset].prev_2 = true;
697207753Smm				coder->opts[offset].pos_prev_2 = cur;
698207753Smm				coder->opts[offset].back_prev_2 = rep_index;
699207753Smm			}
700207753Smm			//}
701207753Smm		}
702207753Smm	}
703207753Smm
704207753Smm
705207753Smm	//for (uint32_t len_test = 2; len_test <= new_len; ++len_test)
706207753Smm	if (new_len > buf_avail) {
707207753Smm		new_len = buf_avail;
708207753Smm
709207753Smm		matches_count = 0;
710207753Smm		while (new_len > coder->matches[matches_count].len)
711207753Smm			++matches_count;
712207753Smm
713207753Smm		coder->matches[matches_count++].len = new_len;
714207753Smm	}
715207753Smm
716207753Smm
717207753Smm	if (new_len >= start_len) {
718207753Smm		const uint32_t normal_match_price = match_price
719207753Smm				+ rc_bit_0_price(coder->is_rep[state]);
720207753Smm
721207753Smm		while (len_end < cur + new_len)
722207753Smm			coder->opts[++len_end].price = RC_INFINITY_PRICE;
723207753Smm
724207753Smm		uint32_t i = 0;
725207753Smm		while (start_len > coder->matches[i].len)
726207753Smm			++i;
727207753Smm
728207753Smm		for (uint32_t len_test = start_len; ; ++len_test) {
729207753Smm			const uint32_t cur_back = coder->matches[i].dist;
730207753Smm			uint32_t cur_and_len_price = normal_match_price
731207753Smm					+ get_pos_len_price(coder,
732207753Smm						cur_back, len_test, pos_state);
733207753Smm
734207753Smm			if (cur_and_len_price < coder->opts[cur + len_test].price) {
735207753Smm				coder->opts[cur + len_test].price = cur_and_len_price;
736207753Smm				coder->opts[cur + len_test].pos_prev = cur;
737207753Smm				coder->opts[cur + len_test].back_prev
738207753Smm						= cur_back + REP_DISTANCES;
739207753Smm				coder->opts[cur + len_test].prev_1_is_literal = false;
740207753Smm			}
741207753Smm
742207753Smm			if (len_test == coder->matches[i].len) {
743207753Smm				// Try Match + Literal + Rep0
744207753Smm				const uint8_t *const buf_back = buf - cur_back - 1;
745207753Smm				uint32_t len_test_2 = len_test + 1;
746213700Smm				const uint32_t limit = my_min(buf_avail_full,
747207753Smm						len_test_2 + nice_len);
748207753Smm
749207753Smm				for (; len_test_2 < limit &&
750207753Smm						buf[len_test_2] == buf_back[len_test_2];
751207753Smm						++len_test_2) ;
752207753Smm
753207753Smm				len_test_2 -= len_test + 1;
754207753Smm
755207753Smm				if (len_test_2 >= 2) {
756207753Smm					lzma_lzma_state state_2 = state;
757207753Smm					update_match(state_2);
758207753Smm					uint32_t pos_state_next
759207753Smm							= (position + len_test) & coder->pos_mask;
760207753Smm
761207753Smm					const uint32_t cur_and_len_literal_price = cur_and_len_price
762207753Smm							+ rc_bit_0_price(
763207753Smm								coder->is_match[state_2][pos_state_next])
764207753Smm							+ get_literal_price(coder,
765207753Smm								position + len_test,
766207753Smm								buf[len_test - 1],
767207753Smm								true,
768207753Smm								buf_back[len_test],
769207753Smm								buf[len_test]);
770207753Smm
771207753Smm					update_literal(state_2);
772207753Smm					pos_state_next = (pos_state_next + 1) & coder->pos_mask;
773207753Smm
774207753Smm					const uint32_t next_rep_match_price
775207753Smm							= cur_and_len_literal_price
776207753Smm							+ rc_bit_1_price(
777207753Smm								coder->is_match[state_2][pos_state_next])
778207753Smm							+ rc_bit_1_price(coder->is_rep[state_2]);
779207753Smm
780207753Smm					// for(; len_test_2 >= 2; --len_test_2) {
781207753Smm					const uint32_t offset = cur + len_test + 1 + len_test_2;
782207753Smm
783207753Smm					while (len_end < offset)
784207753Smm						coder->opts[++len_end].price = RC_INFINITY_PRICE;
785207753Smm
786207753Smm					cur_and_len_price = next_rep_match_price
787207753Smm							+ get_rep_price(coder, 0, len_test_2,
788207753Smm								state_2, pos_state_next);
789207753Smm
790207753Smm					if (cur_and_len_price < coder->opts[offset].price) {
791207753Smm						coder->opts[offset].price = cur_and_len_price;
792207753Smm						coder->opts[offset].pos_prev = cur + len_test + 1;
793207753Smm						coder->opts[offset].back_prev = 0;
794207753Smm						coder->opts[offset].prev_1_is_literal = true;
795207753Smm						coder->opts[offset].prev_2 = true;
796207753Smm						coder->opts[offset].pos_prev_2 = cur;
797207753Smm						coder->opts[offset].back_prev_2
798207753Smm								= cur_back + REP_DISTANCES;
799207753Smm					}
800207753Smm					//}
801207753Smm				}
802207753Smm
803207753Smm				if (++i == matches_count)
804207753Smm					break;
805207753Smm			}
806207753Smm		}
807207753Smm	}
808207753Smm
809207753Smm	return len_end;
810207753Smm}
811207753Smm
812207753Smm
813207753Smmextern void
814207753Smmlzma_lzma_optimum_normal(lzma_coder *restrict coder, lzma_mf *restrict mf,
815207753Smm		uint32_t *restrict back_res, uint32_t *restrict len_res,
816207753Smm		uint32_t position)
817207753Smm{
818207753Smm	// If we have symbols pending, return the next pending symbol.
819207753Smm	if (coder->opts_end_index != coder->opts_current_index) {
820207753Smm		assert(mf->read_ahead > 0);
821207753Smm		*len_res = coder->opts[coder->opts_current_index].pos_prev
822207753Smm				- coder->opts_current_index;
823207753Smm		*back_res = coder->opts[coder->opts_current_index].back_prev;
824207753Smm		coder->opts_current_index = coder->opts[
825207753Smm				coder->opts_current_index].pos_prev;
826207753Smm		return;
827207753Smm	}
828207753Smm
829207753Smm	// Update the price tables. In LZMA SDK <= 4.60 (and possibly later)
830207753Smm	// this was done in both initialization function and in the main loop.
831207753Smm	// In liblzma they were moved into this single place.
832207753Smm	if (mf->read_ahead == 0) {
833207753Smm		if (coder->match_price_count >= (1 << 7))
834207753Smm			fill_distances_prices(coder);
835207753Smm
836207753Smm		if (coder->align_price_count >= ALIGN_TABLE_SIZE)
837207753Smm			fill_align_prices(coder);
838207753Smm	}
839207753Smm
840207753Smm	// TODO: This needs quite a bit of cleaning still. But splitting
841207753Smm	// the original function into two pieces makes it at least a little
842207753Smm	// more readable, since those two parts don't share many variables.
843207753Smm
844207753Smm	uint32_t len_end = helper1(coder, mf, back_res, len_res, position);
845207753Smm	if (len_end == UINT32_MAX)
846207753Smm		return;
847207753Smm
848207753Smm	uint32_t reps[REP_DISTANCES];
849207753Smm	memcpy(reps, coder->reps, sizeof(reps));
850207753Smm
851207753Smm	uint32_t cur;
852207753Smm	for (cur = 1; cur < len_end; ++cur) {
853207753Smm		assert(cur < OPTS);
854207753Smm
855207753Smm		coder->longest_match_length = mf_find(
856207753Smm				mf, &coder->matches_count, coder->matches);
857207753Smm
858207753Smm		if (coder->longest_match_length >= mf->nice_len)
859207753Smm			break;
860207753Smm
861207753Smm		len_end = helper2(coder, reps, mf_ptr(mf) - 1, len_end,
862207753Smm				position + cur, cur, mf->nice_len,
863213700Smm				my_min(mf_avail(mf) + 1, OPTS - 1 - cur));
864207753Smm	}
865207753Smm
866207753Smm	backward(coder, len_res, back_res, cur);
867207753Smm	return;
868207753Smm}
869