1207753Smm///////////////////////////////////////////////////////////////////////////////
2207753Smm//
3207753Smm/// \file       lzma_encoder_optimum_normal.c
4207753Smm//
5207753Smm//  Author:     Igor Pavlov
6207753Smm//
7207753Smm//  This file has been put into the public domain.
8207753Smm//  You can do whatever you want with this file.
9207753Smm//
10207753Smm///////////////////////////////////////////////////////////////////////////////
11207753Smm
12207753Smm#include "lzma_encoder_private.h"
13207753Smm#include "fastpos.h"
14292588Sdelphij#include "memcmplen.h"
15207753Smm
16207753Smm
17207753Smm////////////
18207753Smm// Prices //
19207753Smm////////////
20207753Smm
21207753Smmstatic uint32_t
22312518Sdelphijget_literal_price(const lzma_lzma1_encoder *const coder, const uint32_t pos,
23207753Smm		const uint32_t prev_byte, const bool match_mode,
24207753Smm		uint32_t match_byte, uint32_t symbol)
25207753Smm{
26207753Smm	const probability *const subcoder = literal_subcoder(coder->literal,
27207753Smm			coder->literal_context_bits, coder->literal_pos_mask,
28207753Smm			pos, prev_byte);
29207753Smm
30207753Smm	uint32_t price = 0;
31207753Smm
32207753Smm	if (!match_mode) {
33207753Smm		price = rc_bittree_price(subcoder, 8, symbol);
34207753Smm	} else {
35207753Smm		uint32_t offset = 0x100;
36207753Smm		symbol += UINT32_C(1) << 8;
37207753Smm
38207753Smm		do {
39207753Smm			match_byte <<= 1;
40207753Smm
41207753Smm			const uint32_t match_bit = match_byte & offset;
42207753Smm			const uint32_t subcoder_index
43207753Smm					= offset + match_bit + (symbol >> 8);
44207753Smm			const uint32_t bit = (symbol >> 7) & 1;
45207753Smm			price += rc_bit_price(subcoder[subcoder_index], bit);
46207753Smm
47207753Smm			symbol <<= 1;
48207753Smm			offset &= ~(match_byte ^ symbol);
49207753Smm
50207753Smm		} while (symbol < (UINT32_C(1) << 16));
51207753Smm	}
52207753Smm
53207753Smm	return price;
54207753Smm}
55207753Smm
56207753Smm
57207753Smmstatic inline uint32_t
58207753Smmget_len_price(const lzma_length_encoder *const lencoder,
59207753Smm		const uint32_t len, const uint32_t pos_state)
60207753Smm{
61207753Smm	// NOTE: Unlike the other price tables, length prices are updated
62207753Smm	// in lzma_encoder.c
63207753Smm	return lencoder->prices[pos_state][len - MATCH_LEN_MIN];
64207753Smm}
65207753Smm
66207753Smm
67207753Smmstatic inline uint32_t
68312518Sdelphijget_short_rep_price(const lzma_lzma1_encoder *const coder,
69207753Smm		const lzma_lzma_state state, const uint32_t pos_state)
70207753Smm{
71207753Smm	return rc_bit_0_price(coder->is_rep0[state])
72207753Smm		+ rc_bit_0_price(coder->is_rep0_long[state][pos_state]);
73207753Smm}
74207753Smm
75207753Smm
76207753Smmstatic inline uint32_t
77312518Sdelphijget_pure_rep_price(const lzma_lzma1_encoder *const coder, const uint32_t rep_index,
78207753Smm		const lzma_lzma_state state, uint32_t pos_state)
79207753Smm{
80207753Smm	uint32_t price;
81207753Smm
82207753Smm	if (rep_index == 0) {
83207753Smm		price = rc_bit_0_price(coder->is_rep0[state]);
84207753Smm		price += rc_bit_1_price(coder->is_rep0_long[state][pos_state]);
85207753Smm	} else {
86207753Smm		price = rc_bit_1_price(coder->is_rep0[state]);
87207753Smm
88207753Smm		if (rep_index == 1) {
89207753Smm			price += rc_bit_0_price(coder->is_rep1[state]);
90207753Smm		} else {
91207753Smm			price += rc_bit_1_price(coder->is_rep1[state]);
92207753Smm			price += rc_bit_price(coder->is_rep2[state],
93207753Smm					rep_index - 2);
94207753Smm		}
95207753Smm	}
96207753Smm
97207753Smm	return price;
98207753Smm}
99207753Smm
100207753Smm
101207753Smmstatic inline uint32_t
102312518Sdelphijget_rep_price(const lzma_lzma1_encoder *const coder, const uint32_t rep_index,
103207753Smm		const uint32_t len, const lzma_lzma_state state,
104207753Smm		const uint32_t pos_state)
105207753Smm{
106207753Smm	return get_len_price(&coder->rep_len_encoder, len, pos_state)
107207753Smm		+ get_pure_rep_price(coder, rep_index, state, pos_state);
108207753Smm}
109207753Smm
110207753Smm
111207753Smmstatic inline uint32_t
112312518Sdelphijget_dist_len_price(const lzma_lzma1_encoder *const coder, const uint32_t dist,
113207753Smm		const uint32_t len, const uint32_t pos_state)
114207753Smm{
115292588Sdelphij	const uint32_t dist_state = get_dist_state(len);
116207753Smm	uint32_t price;
117207753Smm
118292588Sdelphij	if (dist < FULL_DISTANCES) {
119292588Sdelphij		price = coder->dist_prices[dist_state][dist];
120207753Smm	} else {
121292588Sdelphij		const uint32_t dist_slot = get_dist_slot_2(dist);
122292588Sdelphij		price = coder->dist_slot_prices[dist_state][dist_slot]
123292588Sdelphij				+ coder->align_prices[dist & ALIGN_MASK];
124207753Smm	}
125207753Smm
126207753Smm	price += get_len_price(&coder->match_len_encoder, len, pos_state);
127207753Smm
128207753Smm	return price;
129207753Smm}
130207753Smm
131207753Smm
132207753Smmstatic void
133312518Sdelphijfill_dist_prices(lzma_lzma1_encoder *coder)
134207753Smm{
135292588Sdelphij	for (uint32_t dist_state = 0; dist_state < DIST_STATES; ++dist_state) {
136207753Smm
137292588Sdelphij		uint32_t *const dist_slot_prices
138292588Sdelphij				= coder->dist_slot_prices[dist_state];
139207753Smm
140292588Sdelphij		// Price to encode the dist_slot.
141292588Sdelphij		for (uint32_t dist_slot = 0;
142292588Sdelphij				dist_slot < coder->dist_table_size; ++dist_slot)
143292588Sdelphij			dist_slot_prices[dist_slot] = rc_bittree_price(
144292588Sdelphij					coder->dist_slot[dist_state],
145292588Sdelphij					DIST_SLOT_BITS, dist_slot);
146207753Smm
147207753Smm		// For matches with distance >= FULL_DISTANCES, add the price
148207753Smm		// of the direct bits part of the match distance. (Align bits
149207753Smm		// are handled by fill_align_prices()).
150292588Sdelphij		for (uint32_t dist_slot = DIST_MODEL_END;
151292588Sdelphij				dist_slot < coder->dist_table_size;
152292588Sdelphij				++dist_slot)
153292588Sdelphij			dist_slot_prices[dist_slot] += rc_direct_price(
154292588Sdelphij					((dist_slot >> 1) - 1) - ALIGN_BITS);
155207753Smm
156207753Smm		// Distances in the range [0, 3] are fully encoded with
157292588Sdelphij		// dist_slot, so they are used for coder->dist_prices
158207753Smm		// as is.
159292588Sdelphij		for (uint32_t i = 0; i < DIST_MODEL_START; ++i)
160292588Sdelphij			coder->dist_prices[dist_state][i]
161292588Sdelphij					= dist_slot_prices[i];
162207753Smm	}
163207753Smm
164292588Sdelphij	// Distances in the range [4, 127] depend on dist_slot and
165292588Sdelphij	// dist_special. We do this in a loop separate from the above
166292588Sdelphij	// loop to avoid redundant calls to get_dist_slot().
167292588Sdelphij	for (uint32_t i = DIST_MODEL_START; i < FULL_DISTANCES; ++i) {
168292588Sdelphij		const uint32_t dist_slot = get_dist_slot(i);
169292588Sdelphij		const uint32_t footer_bits = ((dist_slot >> 1) - 1);
170292588Sdelphij		const uint32_t base = (2 | (dist_slot & 1)) << footer_bits;
171207753Smm		const uint32_t price = rc_bittree_reverse_price(
172292588Sdelphij				coder->dist_special + base - dist_slot - 1,
173207753Smm				footer_bits, i - base);
174207753Smm
175292588Sdelphij		for (uint32_t dist_state = 0; dist_state < DIST_STATES;
176292588Sdelphij				++dist_state)
177292588Sdelphij			coder->dist_prices[dist_state][i]
178292588Sdelphij					= price + coder->dist_slot_prices[
179292588Sdelphij						dist_state][dist_slot];
180207753Smm	}
181207753Smm
182207753Smm	coder->match_price_count = 0;
183207753Smm	return;
184207753Smm}
185207753Smm
186207753Smm
187207753Smmstatic void
188312518Sdelphijfill_align_prices(lzma_lzma1_encoder *coder)
189207753Smm{
190292588Sdelphij	for (uint32_t i = 0; i < ALIGN_SIZE; ++i)
191207753Smm		coder->align_prices[i] = rc_bittree_reverse_price(
192292588Sdelphij				coder->dist_align, ALIGN_BITS, i);
193207753Smm
194207753Smm	coder->align_price_count = 0;
195207753Smm	return;
196207753Smm}
197207753Smm
198207753Smm
199207753Smm/////////////
200207753Smm// Optimal //
201207753Smm/////////////
202207753Smm
203207753Smmstatic inline void
204207753Smmmake_literal(lzma_optimal *optimal)
205207753Smm{
206207753Smm	optimal->back_prev = UINT32_MAX;
207207753Smm	optimal->prev_1_is_literal = false;
208207753Smm}
209207753Smm
210207753Smm
211207753Smmstatic inline void
212207753Smmmake_short_rep(lzma_optimal *optimal)
213207753Smm{
214207753Smm	optimal->back_prev = 0;
215207753Smm	optimal->prev_1_is_literal = false;
216207753Smm}
217207753Smm
218207753Smm
219207753Smm#define is_short_rep(optimal) \
220207753Smm	((optimal).back_prev == 0)
221207753Smm
222207753Smm
223207753Smmstatic void
224312518Sdelphijbackward(lzma_lzma1_encoder *restrict coder, uint32_t *restrict len_res,
225207753Smm		uint32_t *restrict back_res, uint32_t cur)
226207753Smm{
227207753Smm	coder->opts_end_index = cur;
228207753Smm
229207753Smm	uint32_t pos_mem = coder->opts[cur].pos_prev;
230207753Smm	uint32_t back_mem = coder->opts[cur].back_prev;
231207753Smm
232207753Smm	do {
233207753Smm		if (coder->opts[cur].prev_1_is_literal) {
234207753Smm			make_literal(&coder->opts[pos_mem]);
235207753Smm			coder->opts[pos_mem].pos_prev = pos_mem - 1;
236207753Smm
237207753Smm			if (coder->opts[cur].prev_2) {
238207753Smm				coder->opts[pos_mem - 1].prev_1_is_literal
239207753Smm						= false;
240207753Smm				coder->opts[pos_mem - 1].pos_prev
241207753Smm						= coder->opts[cur].pos_prev_2;
242207753Smm				coder->opts[pos_mem - 1].back_prev
243207753Smm						= coder->opts[cur].back_prev_2;
244207753Smm			}
245207753Smm		}
246207753Smm
247207753Smm		const uint32_t pos_prev = pos_mem;
248207753Smm		const uint32_t back_cur = back_mem;
249207753Smm
250207753Smm		back_mem = coder->opts[pos_prev].back_prev;
251207753Smm		pos_mem = coder->opts[pos_prev].pos_prev;
252207753Smm
253207753Smm		coder->opts[pos_prev].back_prev = back_cur;
254207753Smm		coder->opts[pos_prev].pos_prev = cur;
255207753Smm		cur = pos_prev;
256207753Smm
257207753Smm	} while (cur != 0);
258207753Smm
259207753Smm	coder->opts_current_index = coder->opts[0].pos_prev;
260207753Smm	*len_res = coder->opts[0].pos_prev;
261207753Smm	*back_res = coder->opts[0].back_prev;
262207753Smm
263207753Smm	return;
264207753Smm}
265207753Smm
266207753Smm
267207753Smm//////////
268207753Smm// Main //
269207753Smm//////////
270207753Smm
271207753Smmstatic inline uint32_t
272312518Sdelphijhelper1(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf,
273207753Smm		uint32_t *restrict back_res, uint32_t *restrict len_res,
274207753Smm		uint32_t position)
275207753Smm{
276207753Smm	const uint32_t nice_len = mf->nice_len;
277207753Smm
278207753Smm	uint32_t len_main;
279207753Smm	uint32_t matches_count;
280207753Smm
281207753Smm	if (mf->read_ahead == 0) {
282207753Smm		len_main = mf_find(mf, &matches_count, coder->matches);
283207753Smm	} else {
284207753Smm		assert(mf->read_ahead == 1);
285207753Smm		len_main = coder->longest_match_length;
286207753Smm		matches_count = coder->matches_count;
287207753Smm	}
288207753Smm
289213700Smm	const uint32_t buf_avail = my_min(mf_avail(mf) + 1, MATCH_LEN_MAX);
290207753Smm	if (buf_avail < 2) {
291207753Smm		*back_res = UINT32_MAX;
292207753Smm		*len_res = 1;
293207753Smm		return UINT32_MAX;
294207753Smm	}
295207753Smm
296207753Smm	const uint8_t *const buf = mf_ptr(mf) - 1;
297207753Smm
298292588Sdelphij	uint32_t rep_lens[REPS];
299207753Smm	uint32_t rep_max_index = 0;
300207753Smm
301292588Sdelphij	for (uint32_t i = 0; i < REPS; ++i) {
302207753Smm		const uint8_t *const buf_back = buf - coder->reps[i] - 1;
303207753Smm
304207753Smm		if (not_equal_16(buf, buf_back)) {
305207753Smm			rep_lens[i] = 0;
306207753Smm			continue;
307207753Smm		}
308207753Smm
309292588Sdelphij		rep_lens[i] = lzma_memcmplen(buf, buf_back, 2, buf_avail);
310207753Smm
311292588Sdelphij		if (rep_lens[i] > rep_lens[rep_max_index])
312207753Smm			rep_max_index = i;
313207753Smm	}
314207753Smm
315207753Smm	if (rep_lens[rep_max_index] >= nice_len) {
316207753Smm		*back_res = rep_max_index;
317207753Smm		*len_res = rep_lens[rep_max_index];
318207753Smm		mf_skip(mf, *len_res - 1);
319207753Smm		return UINT32_MAX;
320207753Smm	}
321207753Smm
322207753Smm
323207753Smm	if (len_main >= nice_len) {
324292588Sdelphij		*back_res = coder->matches[matches_count - 1].dist + REPS;
325207753Smm		*len_res = len_main;
326207753Smm		mf_skip(mf, len_main - 1);
327207753Smm		return UINT32_MAX;
328207753Smm	}
329207753Smm
330207753Smm	const uint8_t current_byte = *buf;
331207753Smm	const uint8_t match_byte = *(buf - coder->reps[0] - 1);
332207753Smm
333207753Smm	if (len_main < 2 && current_byte != match_byte
334207753Smm			&& rep_lens[rep_max_index] < 2) {
335207753Smm		*back_res = UINT32_MAX;
336207753Smm		*len_res = 1;
337207753Smm		return UINT32_MAX;
338207753Smm	}
339207753Smm
340207753Smm	coder->opts[0].state = coder->state;
341207753Smm
342207753Smm	const uint32_t pos_state = position & coder->pos_mask;
343207753Smm
344207753Smm	coder->opts[1].price = rc_bit_0_price(
345207753Smm				coder->is_match[coder->state][pos_state])
346207753Smm			+ get_literal_price(coder, position, buf[-1],
347207753Smm				!is_literal_state(coder->state),
348207753Smm				match_byte, current_byte);
349207753Smm
350207753Smm	make_literal(&coder->opts[1]);
351207753Smm
352207753Smm	const uint32_t match_price = rc_bit_1_price(
353207753Smm			coder->is_match[coder->state][pos_state]);
354207753Smm	const uint32_t rep_match_price = match_price
355207753Smm			+ rc_bit_1_price(coder->is_rep[coder->state]);
356207753Smm
357207753Smm	if (match_byte == current_byte) {
358207753Smm		const uint32_t short_rep_price = rep_match_price
359207753Smm				+ get_short_rep_price(
360207753Smm					coder, coder->state, pos_state);
361207753Smm
362207753Smm		if (short_rep_price < coder->opts[1].price) {
363207753Smm			coder->opts[1].price = short_rep_price;
364207753Smm			make_short_rep(&coder->opts[1]);
365207753Smm		}
366207753Smm	}
367207753Smm
368213700Smm	const uint32_t len_end = my_max(len_main, rep_lens[rep_max_index]);
369207753Smm
370207753Smm	if (len_end < 2) {
371207753Smm		*back_res = coder->opts[1].back_prev;
372207753Smm		*len_res = 1;
373207753Smm		return UINT32_MAX;
374207753Smm	}
375207753Smm
376207753Smm	coder->opts[1].pos_prev = 0;
377207753Smm
378292588Sdelphij	for (uint32_t i = 0; i < REPS; ++i)
379207753Smm		coder->opts[0].backs[i] = coder->reps[i];
380207753Smm
381207753Smm	uint32_t len = len_end;
382207753Smm	do {
383207753Smm		coder->opts[len].price = RC_INFINITY_PRICE;
384207753Smm	} while (--len >= 2);
385207753Smm
386207753Smm
387292588Sdelphij	for (uint32_t i = 0; i < REPS; ++i) {
388207753Smm		uint32_t rep_len = rep_lens[i];
389207753Smm		if (rep_len < 2)
390207753Smm			continue;
391207753Smm
392207753Smm		const uint32_t price = rep_match_price + get_pure_rep_price(
393207753Smm				coder, i, coder->state, pos_state);
394207753Smm
395207753Smm		do {
396207753Smm			const uint32_t cur_and_len_price = price
397207753Smm					+ get_len_price(
398207753Smm						&coder->rep_len_encoder,
399207753Smm						rep_len, pos_state);
400207753Smm
401207753Smm			if (cur_and_len_price < coder->opts[rep_len].price) {
402207753Smm				coder->opts[rep_len].price = cur_and_len_price;
403207753Smm				coder->opts[rep_len].pos_prev = 0;
404207753Smm				coder->opts[rep_len].back_prev = i;
405207753Smm				coder->opts[rep_len].prev_1_is_literal = false;
406207753Smm			}
407207753Smm		} while (--rep_len >= 2);
408207753Smm	}
409207753Smm
410207753Smm
411207753Smm	const uint32_t normal_match_price = match_price
412207753Smm			+ rc_bit_0_price(coder->is_rep[coder->state]);
413207753Smm
414207753Smm	len = rep_lens[0] >= 2 ? rep_lens[0] + 1 : 2;
415207753Smm	if (len <= len_main) {
416207753Smm		uint32_t i = 0;
417207753Smm		while (len > coder->matches[i].len)
418207753Smm			++i;
419207753Smm
420207753Smm		for(; ; ++len) {
421207753Smm			const uint32_t dist = coder->matches[i].dist;
422207753Smm			const uint32_t cur_and_len_price = normal_match_price
423292588Sdelphij					+ get_dist_len_price(coder,
424207753Smm						dist, len, pos_state);
425207753Smm
426207753Smm			if (cur_and_len_price < coder->opts[len].price) {
427207753Smm				coder->opts[len].price = cur_and_len_price;
428207753Smm				coder->opts[len].pos_prev = 0;
429292588Sdelphij				coder->opts[len].back_prev = dist + REPS;
430207753Smm				coder->opts[len].prev_1_is_literal = false;
431207753Smm			}
432207753Smm
433207753Smm			if (len == coder->matches[i].len)
434207753Smm				if (++i == matches_count)
435207753Smm					break;
436207753Smm		}
437207753Smm	}
438207753Smm
439207753Smm	return len_end;
440207753Smm}
441207753Smm
442207753Smm
443207753Smmstatic inline uint32_t
444312518Sdelphijhelper2(lzma_lzma1_encoder *coder, uint32_t *reps, const uint8_t *buf,
445207753Smm		uint32_t len_end, uint32_t position, const uint32_t cur,
446207753Smm		const uint32_t nice_len, const uint32_t buf_avail_full)
447207753Smm{
448207753Smm	uint32_t matches_count = coder->matches_count;
449207753Smm	uint32_t new_len = coder->longest_match_length;
450207753Smm	uint32_t pos_prev = coder->opts[cur].pos_prev;
451207753Smm	lzma_lzma_state state;
452207753Smm
453207753Smm	if (coder->opts[cur].prev_1_is_literal) {
454207753Smm		--pos_prev;
455207753Smm
456207753Smm		if (coder->opts[cur].prev_2) {
457207753Smm			state = coder->opts[coder->opts[cur].pos_prev_2].state;
458207753Smm
459292588Sdelphij			if (coder->opts[cur].back_prev_2 < REPS)
460207753Smm				update_long_rep(state);
461207753Smm			else
462207753Smm				update_match(state);
463207753Smm
464207753Smm		} else {
465207753Smm			state = coder->opts[pos_prev].state;
466207753Smm		}
467207753Smm
468207753Smm		update_literal(state);
469207753Smm
470207753Smm	} else {
471207753Smm		state = coder->opts[pos_prev].state;
472207753Smm	}
473207753Smm
474207753Smm	if (pos_prev == cur - 1) {
475207753Smm		if (is_short_rep(coder->opts[cur]))
476207753Smm			update_short_rep(state);
477207753Smm		else
478207753Smm			update_literal(state);
479207753Smm	} else {
480207753Smm		uint32_t pos;
481207753Smm		if (coder->opts[cur].prev_1_is_literal
482207753Smm				&& coder->opts[cur].prev_2) {
483207753Smm			pos_prev = coder->opts[cur].pos_prev_2;
484207753Smm			pos = coder->opts[cur].back_prev_2;
485207753Smm			update_long_rep(state);
486207753Smm		} else {
487207753Smm			pos = coder->opts[cur].back_prev;
488292588Sdelphij			if (pos < REPS)
489207753Smm				update_long_rep(state);
490207753Smm			else
491207753Smm				update_match(state);
492207753Smm		}
493207753Smm
494292588Sdelphij		if (pos < REPS) {
495207753Smm			reps[0] = coder->opts[pos_prev].backs[pos];
496207753Smm
497207753Smm			uint32_t i;
498207753Smm			for (i = 1; i <= pos; ++i)
499207753Smm				reps[i] = coder->opts[pos_prev].backs[i - 1];
500207753Smm
501292588Sdelphij			for (; i < REPS; ++i)
502207753Smm				reps[i] = coder->opts[pos_prev].backs[i];
503207753Smm
504207753Smm		} else {
505292588Sdelphij			reps[0] = pos - REPS;
506207753Smm
507292588Sdelphij			for (uint32_t i = 1; i < REPS; ++i)
508207753Smm				reps[i] = coder->opts[pos_prev].backs[i - 1];
509207753Smm		}
510207753Smm	}
511207753Smm
512207753Smm	coder->opts[cur].state = state;
513207753Smm
514292588Sdelphij	for (uint32_t i = 0; i < REPS; ++i)
515207753Smm		coder->opts[cur].backs[i] = reps[i];
516207753Smm
517207753Smm	const uint32_t cur_price = coder->opts[cur].price;
518207753Smm
519207753Smm	const uint8_t current_byte = *buf;
520207753Smm	const uint8_t match_byte = *(buf - reps[0] - 1);
521207753Smm
522207753Smm	const uint32_t pos_state = position & coder->pos_mask;
523207753Smm
524207753Smm	const uint32_t cur_and_1_price = cur_price
525207753Smm			+ rc_bit_0_price(coder->is_match[state][pos_state])
526207753Smm			+ get_literal_price(coder, position, buf[-1],
527207753Smm			!is_literal_state(state), match_byte, current_byte);
528207753Smm
529207753Smm	bool next_is_literal = false;
530207753Smm
531207753Smm	if (cur_and_1_price < coder->opts[cur + 1].price) {
532207753Smm		coder->opts[cur + 1].price = cur_and_1_price;
533207753Smm		coder->opts[cur + 1].pos_prev = cur;
534207753Smm		make_literal(&coder->opts[cur + 1]);
535207753Smm		next_is_literal = true;
536207753Smm	}
537207753Smm
538207753Smm	const uint32_t match_price = cur_price
539207753Smm			+ rc_bit_1_price(coder->is_match[state][pos_state]);
540207753Smm	const uint32_t rep_match_price = match_price
541207753Smm			+ rc_bit_1_price(coder->is_rep[state]);
542207753Smm
543207753Smm	if (match_byte == current_byte
544207753Smm			&& !(coder->opts[cur + 1].pos_prev < cur
545207753Smm				&& coder->opts[cur + 1].back_prev == 0)) {
546207753Smm
547207753Smm		const uint32_t short_rep_price = rep_match_price
548207753Smm				+ get_short_rep_price(coder, state, pos_state);
549207753Smm
550207753Smm		if (short_rep_price <= coder->opts[cur + 1].price) {
551207753Smm			coder->opts[cur + 1].price = short_rep_price;
552207753Smm			coder->opts[cur + 1].pos_prev = cur;
553207753Smm			make_short_rep(&coder->opts[cur + 1]);
554207753Smm			next_is_literal = true;
555207753Smm		}
556207753Smm	}
557207753Smm
558207753Smm	if (buf_avail_full < 2)
559207753Smm		return len_end;
560207753Smm
561213700Smm	const uint32_t buf_avail = my_min(buf_avail_full, nice_len);
562207753Smm
563207753Smm	if (!next_is_literal && match_byte != current_byte) { // speed optimization
564207753Smm		// try literal + rep0
565207753Smm		const uint8_t *const buf_back = buf - reps[0] - 1;
566213700Smm		const uint32_t limit = my_min(buf_avail_full, nice_len + 1);
567207753Smm
568292588Sdelphij		const uint32_t len_test = lzma_memcmplen(buf, buf_back, 1, limit) - 1;
569207753Smm
570207753Smm		if (len_test >= 2) {
571207753Smm			lzma_lzma_state state_2 = state;
572207753Smm			update_literal(state_2);
573207753Smm
574207753Smm			const uint32_t pos_state_next = (position + 1) & coder->pos_mask;
575207753Smm			const uint32_t next_rep_match_price = cur_and_1_price
576207753Smm					+ rc_bit_1_price(coder->is_match[state_2][pos_state_next])
577207753Smm					+ rc_bit_1_price(coder->is_rep[state_2]);
578207753Smm
579207753Smm			//for (; len_test >= 2; --len_test) {
580207753Smm			const uint32_t offset = cur + 1 + len_test;
581207753Smm
582207753Smm			while (len_end < offset)
583207753Smm				coder->opts[++len_end].price = RC_INFINITY_PRICE;
584207753Smm
585207753Smm			const uint32_t cur_and_len_price = next_rep_match_price
586207753Smm					+ get_rep_price(coder, 0, len_test,
587207753Smm						state_2, pos_state_next);
588207753Smm
589207753Smm			if (cur_and_len_price < coder->opts[offset].price) {
590207753Smm				coder->opts[offset].price = cur_and_len_price;
591207753Smm				coder->opts[offset].pos_prev = cur + 1;
592207753Smm				coder->opts[offset].back_prev = 0;
593207753Smm				coder->opts[offset].prev_1_is_literal = true;
594207753Smm				coder->opts[offset].prev_2 = false;
595207753Smm			}
596207753Smm			//}
597207753Smm		}
598207753Smm	}
599207753Smm
600207753Smm
601207753Smm	uint32_t start_len = 2; // speed optimization
602207753Smm
603292588Sdelphij	for (uint32_t rep_index = 0; rep_index < REPS; ++rep_index) {
604207753Smm		const uint8_t *const buf_back = buf - reps[rep_index] - 1;
605207753Smm		if (not_equal_16(buf, buf_back))
606207753Smm			continue;
607207753Smm
608292588Sdelphij		uint32_t len_test = lzma_memcmplen(buf, buf_back, 2, buf_avail);
609207753Smm
610207753Smm		while (len_end < cur + len_test)
611207753Smm			coder->opts[++len_end].price = RC_INFINITY_PRICE;
612207753Smm
613207753Smm		const uint32_t len_test_temp = len_test;
614207753Smm		const uint32_t price = rep_match_price + get_pure_rep_price(
615207753Smm				coder, rep_index, state, pos_state);
616207753Smm
617207753Smm		do {
618207753Smm			const uint32_t cur_and_len_price = price
619207753Smm					+ get_len_price(&coder->rep_len_encoder,
620207753Smm							len_test, pos_state);
621207753Smm
622207753Smm			if (cur_and_len_price < coder->opts[cur + len_test].price) {
623207753Smm				coder->opts[cur + len_test].price = cur_and_len_price;
624207753Smm				coder->opts[cur + len_test].pos_prev = cur;
625207753Smm				coder->opts[cur + len_test].back_prev = rep_index;
626207753Smm				coder->opts[cur + len_test].prev_1_is_literal = false;
627207753Smm			}
628207753Smm		} while (--len_test >= 2);
629207753Smm
630207753Smm		len_test = len_test_temp;
631207753Smm
632207753Smm		if (rep_index == 0)
633207753Smm			start_len = len_test + 1;
634207753Smm
635207753Smm
636207753Smm		uint32_t len_test_2 = len_test + 1;
637213700Smm		const uint32_t limit = my_min(buf_avail_full,
638207753Smm				len_test_2 + nice_len);
639207753Smm		for (; len_test_2 < limit
640207753Smm				&& buf[len_test_2] == buf_back[len_test_2];
641207753Smm				++len_test_2) ;
642207753Smm
643207753Smm		len_test_2 -= len_test + 1;
644207753Smm
645207753Smm		if (len_test_2 >= 2) {
646207753Smm			lzma_lzma_state state_2 = state;
647207753Smm			update_long_rep(state_2);
648207753Smm
649207753Smm			uint32_t pos_state_next = (position + len_test) & coder->pos_mask;
650207753Smm
651207753Smm			const uint32_t cur_and_len_literal_price = price
652207753Smm					+ get_len_price(&coder->rep_len_encoder,
653207753Smm						len_test, pos_state)
654207753Smm					+ rc_bit_0_price(coder->is_match[state_2][pos_state_next])
655207753Smm					+ get_literal_price(coder, position + len_test,
656207753Smm						buf[len_test - 1], true,
657207753Smm						buf_back[len_test], buf[len_test]);
658207753Smm
659207753Smm			update_literal(state_2);
660207753Smm
661207753Smm			pos_state_next = (position + len_test + 1) & coder->pos_mask;
662207753Smm
663207753Smm			const uint32_t next_rep_match_price = cur_and_len_literal_price
664207753Smm					+ rc_bit_1_price(coder->is_match[state_2][pos_state_next])
665207753Smm					+ rc_bit_1_price(coder->is_rep[state_2]);
666207753Smm
667207753Smm			//for(; len_test_2 >= 2; len_test_2--) {
668207753Smm			const uint32_t offset = cur + len_test + 1 + len_test_2;
669207753Smm
670207753Smm			while (len_end < offset)
671207753Smm				coder->opts[++len_end].price = RC_INFINITY_PRICE;
672207753Smm
673207753Smm			const uint32_t cur_and_len_price = next_rep_match_price
674207753Smm					+ get_rep_price(coder, 0, len_test_2,
675207753Smm						state_2, pos_state_next);
676207753Smm
677207753Smm			if (cur_and_len_price < coder->opts[offset].price) {
678207753Smm				coder->opts[offset].price = cur_and_len_price;
679207753Smm				coder->opts[offset].pos_prev = cur + len_test + 1;
680207753Smm				coder->opts[offset].back_prev = 0;
681207753Smm				coder->opts[offset].prev_1_is_literal = true;
682207753Smm				coder->opts[offset].prev_2 = true;
683207753Smm				coder->opts[offset].pos_prev_2 = cur;
684207753Smm				coder->opts[offset].back_prev_2 = rep_index;
685207753Smm			}
686207753Smm			//}
687207753Smm		}
688207753Smm	}
689207753Smm
690207753Smm
691207753Smm	//for (uint32_t len_test = 2; len_test <= new_len; ++len_test)
692207753Smm	if (new_len > buf_avail) {
693207753Smm		new_len = buf_avail;
694207753Smm
695207753Smm		matches_count = 0;
696207753Smm		while (new_len > coder->matches[matches_count].len)
697207753Smm			++matches_count;
698207753Smm
699207753Smm		coder->matches[matches_count++].len = new_len;
700207753Smm	}
701207753Smm
702207753Smm
703207753Smm	if (new_len >= start_len) {
704207753Smm		const uint32_t normal_match_price = match_price
705207753Smm				+ rc_bit_0_price(coder->is_rep[state]);
706207753Smm
707207753Smm		while (len_end < cur + new_len)
708207753Smm			coder->opts[++len_end].price = RC_INFINITY_PRICE;
709207753Smm
710207753Smm		uint32_t i = 0;
711207753Smm		while (start_len > coder->matches[i].len)
712207753Smm			++i;
713207753Smm
714207753Smm		for (uint32_t len_test = start_len; ; ++len_test) {
715207753Smm			const uint32_t cur_back = coder->matches[i].dist;
716207753Smm			uint32_t cur_and_len_price = normal_match_price
717292588Sdelphij					+ get_dist_len_price(coder,
718207753Smm						cur_back, len_test, pos_state);
719207753Smm
720207753Smm			if (cur_and_len_price < coder->opts[cur + len_test].price) {
721207753Smm				coder->opts[cur + len_test].price = cur_and_len_price;
722207753Smm				coder->opts[cur + len_test].pos_prev = cur;
723207753Smm				coder->opts[cur + len_test].back_prev
724292588Sdelphij						= cur_back + REPS;
725207753Smm				coder->opts[cur + len_test].prev_1_is_literal = false;
726207753Smm			}
727207753Smm
728207753Smm			if (len_test == coder->matches[i].len) {
729207753Smm				// Try Match + Literal + Rep0
730207753Smm				const uint8_t *const buf_back = buf - cur_back - 1;
731207753Smm				uint32_t len_test_2 = len_test + 1;
732213700Smm				const uint32_t limit = my_min(buf_avail_full,
733207753Smm						len_test_2 + nice_len);
734207753Smm
735207753Smm				for (; len_test_2 < limit &&
736207753Smm						buf[len_test_2] == buf_back[len_test_2];
737207753Smm						++len_test_2) ;
738207753Smm
739207753Smm				len_test_2 -= len_test + 1;
740207753Smm
741207753Smm				if (len_test_2 >= 2) {
742207753Smm					lzma_lzma_state state_2 = state;
743207753Smm					update_match(state_2);
744207753Smm					uint32_t pos_state_next
745207753Smm							= (position + len_test) & coder->pos_mask;
746207753Smm
747207753Smm					const uint32_t cur_and_len_literal_price = cur_and_len_price
748207753Smm							+ rc_bit_0_price(
749207753Smm								coder->is_match[state_2][pos_state_next])
750207753Smm							+ get_literal_price(coder,
751207753Smm								position + len_test,
752207753Smm								buf[len_test - 1],
753207753Smm								true,
754207753Smm								buf_back[len_test],
755207753Smm								buf[len_test]);
756207753Smm
757207753Smm					update_literal(state_2);
758207753Smm					pos_state_next = (pos_state_next + 1) & coder->pos_mask;
759207753Smm
760207753Smm					const uint32_t next_rep_match_price
761207753Smm							= cur_and_len_literal_price
762207753Smm							+ rc_bit_1_price(
763207753Smm								coder->is_match[state_2][pos_state_next])
764207753Smm							+ rc_bit_1_price(coder->is_rep[state_2]);
765207753Smm
766207753Smm					// for(; len_test_2 >= 2; --len_test_2) {
767207753Smm					const uint32_t offset = cur + len_test + 1 + len_test_2;
768207753Smm
769207753Smm					while (len_end < offset)
770207753Smm						coder->opts[++len_end].price = RC_INFINITY_PRICE;
771207753Smm
772207753Smm					cur_and_len_price = next_rep_match_price
773207753Smm							+ get_rep_price(coder, 0, len_test_2,
774207753Smm								state_2, pos_state_next);
775207753Smm
776207753Smm					if (cur_and_len_price < coder->opts[offset].price) {
777207753Smm						coder->opts[offset].price = cur_and_len_price;
778207753Smm						coder->opts[offset].pos_prev = cur + len_test + 1;
779207753Smm						coder->opts[offset].back_prev = 0;
780207753Smm						coder->opts[offset].prev_1_is_literal = true;
781207753Smm						coder->opts[offset].prev_2 = true;
782207753Smm						coder->opts[offset].pos_prev_2 = cur;
783207753Smm						coder->opts[offset].back_prev_2
784292588Sdelphij								= cur_back + REPS;
785207753Smm					}
786207753Smm					//}
787207753Smm				}
788207753Smm
789207753Smm				if (++i == matches_count)
790207753Smm					break;
791207753Smm			}
792207753Smm		}
793207753Smm	}
794207753Smm
795207753Smm	return len_end;
796207753Smm}
797207753Smm
798207753Smm
799207753Smmextern void
800312518Sdelphijlzma_lzma_optimum_normal(lzma_lzma1_encoder *restrict coder,
801312518Sdelphij		lzma_mf *restrict mf,
802207753Smm		uint32_t *restrict back_res, uint32_t *restrict len_res,
803207753Smm		uint32_t position)
804207753Smm{
805207753Smm	// If we have symbols pending, return the next pending symbol.
806207753Smm	if (coder->opts_end_index != coder->opts_current_index) {
807207753Smm		assert(mf->read_ahead > 0);
808207753Smm		*len_res = coder->opts[coder->opts_current_index].pos_prev
809207753Smm				- coder->opts_current_index;
810207753Smm		*back_res = coder->opts[coder->opts_current_index].back_prev;
811207753Smm		coder->opts_current_index = coder->opts[
812207753Smm				coder->opts_current_index].pos_prev;
813207753Smm		return;
814207753Smm	}
815207753Smm
816207753Smm	// Update the price tables. In LZMA SDK <= 4.60 (and possibly later)
817207753Smm	// this was done in both initialization function and in the main loop.
818207753Smm	// In liblzma they were moved into this single place.
819207753Smm	if (mf->read_ahead == 0) {
820207753Smm		if (coder->match_price_count >= (1 << 7))
821292588Sdelphij			fill_dist_prices(coder);
822207753Smm
823292588Sdelphij		if (coder->align_price_count >= ALIGN_SIZE)
824207753Smm			fill_align_prices(coder);
825207753Smm	}
826207753Smm
827207753Smm	// TODO: This needs quite a bit of cleaning still. But splitting
828207753Smm	// the original function into two pieces makes it at least a little
829207753Smm	// more readable, since those two parts don't share many variables.
830207753Smm
831207753Smm	uint32_t len_end = helper1(coder, mf, back_res, len_res, position);
832207753Smm	if (len_end == UINT32_MAX)
833207753Smm		return;
834207753Smm
835292588Sdelphij	uint32_t reps[REPS];
836207753Smm	memcpy(reps, coder->reps, sizeof(reps));
837207753Smm
838207753Smm	uint32_t cur;
839207753Smm	for (cur = 1; cur < len_end; ++cur) {
840207753Smm		assert(cur < OPTS);
841207753Smm
842207753Smm		coder->longest_match_length = mf_find(
843207753Smm				mf, &coder->matches_count, coder->matches);
844207753Smm
845207753Smm		if (coder->longest_match_length >= mf->nice_len)
846207753Smm			break;
847207753Smm
848207753Smm		len_end = helper2(coder, reps, mf_ptr(mf) - 1, len_end,
849207753Smm				position + cur, cur, mf->nice_len,
850213700Smm				my_min(mf_avail(mf) + 1, OPTS - 1 - cur));
851207753Smm	}
852207753Smm
853207753Smm	backward(coder, len_res, back_res, cur);
854207753Smm	return;
855207753Smm}
856