1207753Smm///////////////////////////////////////////////////////////////////////////////
2207753Smm//
3207753Smm/// \file       lzma_encoder_optimum_normal.c
4207753Smm//
5207753Smm//  Author:     Igor Pavlov
6207753Smm//
7207753Smm//  This file has been put into the public domain.
8207753Smm//  You can do whatever you want with this file.
9207753Smm//
10207753Smm///////////////////////////////////////////////////////////////////////////////
11207753Smm
12207753Smm#include "lzma_encoder_private.h"
13207753Smm#include "fastpos.h"
14278433Srpaulo#include "memcmplen.h"
15207753Smm
16207753Smm
17207753Smm////////////
18207753Smm// Prices //
19207753Smm////////////
20207753Smm
21207753Smmstatic uint32_t
22312517Sdelphijget_literal_price(const lzma_lzma1_encoder *const coder, const uint32_t pos,
23207753Smm		const uint32_t prev_byte, const bool match_mode,
24207753Smm		uint32_t match_byte, uint32_t symbol)
25207753Smm{
26207753Smm	const probability *const subcoder = literal_subcoder(coder->literal,
27207753Smm			coder->literal_context_bits, coder->literal_pos_mask,
28207753Smm			pos, prev_byte);
29207753Smm
30207753Smm	uint32_t price = 0;
31207753Smm
32207753Smm	if (!match_mode) {
33207753Smm		price = rc_bittree_price(subcoder, 8, symbol);
34207753Smm	} else {
35207753Smm		uint32_t offset = 0x100;
36207753Smm		symbol += UINT32_C(1) << 8;
37207753Smm
38207753Smm		do {
39207753Smm			match_byte <<= 1;
40207753Smm
41207753Smm			const uint32_t match_bit = match_byte & offset;
42207753Smm			const uint32_t subcoder_index
43207753Smm					= offset + match_bit + (symbol >> 8);
44207753Smm			const uint32_t bit = (symbol >> 7) & 1;
45207753Smm			price += rc_bit_price(subcoder[subcoder_index], bit);
46207753Smm
47207753Smm			symbol <<= 1;
48207753Smm			offset &= ~(match_byte ^ symbol);
49207753Smm
50207753Smm		} while (symbol < (UINT32_C(1) << 16));
51207753Smm	}
52207753Smm
53207753Smm	return price;
54207753Smm}
55207753Smm
56207753Smm
57207753Smmstatic inline uint32_t
58207753Smmget_len_price(const lzma_length_encoder *const lencoder,
59207753Smm		const uint32_t len, const uint32_t pos_state)
60207753Smm{
61207753Smm	// NOTE: Unlike the other price tables, length prices are updated
62207753Smm	// in lzma_encoder.c
63207753Smm	return lencoder->prices[pos_state][len - MATCH_LEN_MIN];
64207753Smm}
65207753Smm
66207753Smm
67207753Smmstatic inline uint32_t
68312517Sdelphijget_short_rep_price(const lzma_lzma1_encoder *const coder,
69207753Smm		const lzma_lzma_state state, const uint32_t pos_state)
70207753Smm{
71207753Smm	return rc_bit_0_price(coder->is_rep0[state])
72207753Smm		+ rc_bit_0_price(coder->is_rep0_long[state][pos_state]);
73207753Smm}
74207753Smm
75207753Smm
76207753Smmstatic inline uint32_t
77312517Sdelphijget_pure_rep_price(const lzma_lzma1_encoder *const coder, const uint32_t rep_index,
78207753Smm		const lzma_lzma_state state, uint32_t pos_state)
79207753Smm{
80207753Smm	uint32_t price;
81207753Smm
82207753Smm	if (rep_index == 0) {
83207753Smm		price = rc_bit_0_price(coder->is_rep0[state]);
84207753Smm		price += rc_bit_1_price(coder->is_rep0_long[state][pos_state]);
85207753Smm	} else {
86207753Smm		price = rc_bit_1_price(coder->is_rep0[state]);
87207753Smm
88207753Smm		if (rep_index == 1) {
89207753Smm			price += rc_bit_0_price(coder->is_rep1[state]);
90207753Smm		} else {
91207753Smm			price += rc_bit_1_price(coder->is_rep1[state]);
92207753Smm			price += rc_bit_price(coder->is_rep2[state],
93207753Smm					rep_index - 2);
94207753Smm		}
95207753Smm	}
96207753Smm
97207753Smm	return price;
98207753Smm}
99207753Smm
100207753Smm
101207753Smmstatic inline uint32_t
102312517Sdelphijget_rep_price(const lzma_lzma1_encoder *const coder, const uint32_t rep_index,
103207753Smm		const uint32_t len, const lzma_lzma_state state,
104207753Smm		const uint32_t pos_state)
105207753Smm{
106207753Smm	return get_len_price(&coder->rep_len_encoder, len, pos_state)
107207753Smm		+ get_pure_rep_price(coder, rep_index, state, pos_state);
108207753Smm}
109207753Smm
110207753Smm
111207753Smmstatic inline uint32_t
112312517Sdelphijget_dist_len_price(const lzma_lzma1_encoder *const coder, const uint32_t dist,
113207753Smm		const uint32_t len, const uint32_t pos_state)
114207753Smm{
115278433Srpaulo	const uint32_t dist_state = get_dist_state(len);
116207753Smm	uint32_t price;
117207753Smm
118278433Srpaulo	if (dist < FULL_DISTANCES) {
119278433Srpaulo		price = coder->dist_prices[dist_state][dist];
120207753Smm	} else {
121278433Srpaulo		const uint32_t dist_slot = get_dist_slot_2(dist);
122278433Srpaulo		price = coder->dist_slot_prices[dist_state][dist_slot]
123278433Srpaulo				+ coder->align_prices[dist & ALIGN_MASK];
124207753Smm	}
125207753Smm
126207753Smm	price += get_len_price(&coder->match_len_encoder, len, pos_state);
127207753Smm
128207753Smm	return price;
129207753Smm}
130207753Smm
131207753Smm
132207753Smmstatic void
133312517Sdelphijfill_dist_prices(lzma_lzma1_encoder *coder)
134207753Smm{
135278433Srpaulo	for (uint32_t dist_state = 0; dist_state < DIST_STATES; ++dist_state) {
136207753Smm
137278433Srpaulo		uint32_t *const dist_slot_prices
138278433Srpaulo				= coder->dist_slot_prices[dist_state];
139207753Smm
140278433Srpaulo		// Price to encode the dist_slot.
141278433Srpaulo		for (uint32_t dist_slot = 0;
142278433Srpaulo				dist_slot < coder->dist_table_size; ++dist_slot)
143278433Srpaulo			dist_slot_prices[dist_slot] = rc_bittree_price(
144278433Srpaulo					coder->dist_slot[dist_state],
145278433Srpaulo					DIST_SLOT_BITS, dist_slot);
146207753Smm
147207753Smm		// For matches with distance >= FULL_DISTANCES, add the price
148207753Smm		// of the direct bits part of the match distance. (Align bits
149207753Smm		// are handled by fill_align_prices()).
150278433Srpaulo		for (uint32_t dist_slot = DIST_MODEL_END;
151278433Srpaulo				dist_slot < coder->dist_table_size;
152278433Srpaulo				++dist_slot)
153278433Srpaulo			dist_slot_prices[dist_slot] += rc_direct_price(
154278433Srpaulo					((dist_slot >> 1) - 1) - ALIGN_BITS);
155207753Smm
156207753Smm		// Distances in the range [0, 3] are fully encoded with
157278433Srpaulo		// dist_slot, so they are used for coder->dist_prices
158207753Smm		// as is.
159278433Srpaulo		for (uint32_t i = 0; i < DIST_MODEL_START; ++i)
160278433Srpaulo			coder->dist_prices[dist_state][i]
161278433Srpaulo					= dist_slot_prices[i];
162207753Smm	}
163207753Smm
164278433Srpaulo	// Distances in the range [4, 127] depend on dist_slot and
165278433Srpaulo	// dist_special. We do this in a loop separate from the above
166278433Srpaulo	// loop to avoid redundant calls to get_dist_slot().
167278433Srpaulo	for (uint32_t i = DIST_MODEL_START; i < FULL_DISTANCES; ++i) {
168278433Srpaulo		const uint32_t dist_slot = get_dist_slot(i);
169278433Srpaulo		const uint32_t footer_bits = ((dist_slot >> 1) - 1);
170278433Srpaulo		const uint32_t base = (2 | (dist_slot & 1)) << footer_bits;
171207753Smm		const uint32_t price = rc_bittree_reverse_price(
172278433Srpaulo				coder->dist_special + base - dist_slot - 1,
173207753Smm				footer_bits, i - base);
174207753Smm
175278433Srpaulo		for (uint32_t dist_state = 0; dist_state < DIST_STATES;
176278433Srpaulo				++dist_state)
177278433Srpaulo			coder->dist_prices[dist_state][i]
178278433Srpaulo					= price + coder->dist_slot_prices[
179278433Srpaulo						dist_state][dist_slot];
180207753Smm	}
181207753Smm
182207753Smm	coder->match_price_count = 0;
183207753Smm	return;
184207753Smm}
185207753Smm
186207753Smm
187207753Smmstatic void
188312517Sdelphijfill_align_prices(lzma_lzma1_encoder *coder)
189207753Smm{
190278433Srpaulo	for (uint32_t i = 0; i < ALIGN_SIZE; ++i)
191207753Smm		coder->align_prices[i] = rc_bittree_reverse_price(
192278433Srpaulo				coder->dist_align, ALIGN_BITS, i);
193207753Smm
194207753Smm	coder->align_price_count = 0;
195207753Smm	return;
196207753Smm}
197207753Smm
198207753Smm
199207753Smm/////////////
200207753Smm// Optimal //
201207753Smm/////////////
202207753Smm
203207753Smmstatic inline void
204207753Smmmake_literal(lzma_optimal *optimal)
205207753Smm{
206207753Smm	optimal->back_prev = UINT32_MAX;
207207753Smm	optimal->prev_1_is_literal = false;
208207753Smm}
209207753Smm
210207753Smm
211207753Smmstatic inline void
212207753Smmmake_short_rep(lzma_optimal *optimal)
213207753Smm{
214207753Smm	optimal->back_prev = 0;
215207753Smm	optimal->prev_1_is_literal = false;
216207753Smm}
217207753Smm
218207753Smm
219207753Smm#define is_short_rep(optimal) \
220207753Smm	((optimal).back_prev == 0)
221207753Smm
222207753Smm
223207753Smmstatic void
224312517Sdelphijbackward(lzma_lzma1_encoder *restrict coder, uint32_t *restrict len_res,
225207753Smm		uint32_t *restrict back_res, uint32_t cur)
226207753Smm{
227207753Smm	coder->opts_end_index = cur;
228207753Smm
229207753Smm	uint32_t pos_mem = coder->opts[cur].pos_prev;
230207753Smm	uint32_t back_mem = coder->opts[cur].back_prev;
231207753Smm
232207753Smm	do {
233207753Smm		if (coder->opts[cur].prev_1_is_literal) {
234207753Smm			make_literal(&coder->opts[pos_mem]);
235207753Smm			coder->opts[pos_mem].pos_prev = pos_mem - 1;
236207753Smm
237207753Smm			if (coder->opts[cur].prev_2) {
238207753Smm				coder->opts[pos_mem - 1].prev_1_is_literal
239207753Smm						= false;
240207753Smm				coder->opts[pos_mem - 1].pos_prev
241207753Smm						= coder->opts[cur].pos_prev_2;
242207753Smm				coder->opts[pos_mem - 1].back_prev
243207753Smm						= coder->opts[cur].back_prev_2;
244207753Smm			}
245207753Smm		}
246207753Smm
247207753Smm		const uint32_t pos_prev = pos_mem;
248207753Smm		const uint32_t back_cur = back_mem;
249207753Smm
250207753Smm		back_mem = coder->opts[pos_prev].back_prev;
251207753Smm		pos_mem = coder->opts[pos_prev].pos_prev;
252207753Smm
253207753Smm		coder->opts[pos_prev].back_prev = back_cur;
254207753Smm		coder->opts[pos_prev].pos_prev = cur;
255207753Smm		cur = pos_prev;
256207753Smm
257207753Smm	} while (cur != 0);
258207753Smm
259207753Smm	coder->opts_current_index = coder->opts[0].pos_prev;
260207753Smm	*len_res = coder->opts[0].pos_prev;
261207753Smm	*back_res = coder->opts[0].back_prev;
262207753Smm
263207753Smm	return;
264207753Smm}
265207753Smm
266207753Smm
267207753Smm//////////
268207753Smm// Main //
269207753Smm//////////
270207753Smm
271207753Smmstatic inline uint32_t
272312517Sdelphijhelper1(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf,
273207753Smm		uint32_t *restrict back_res, uint32_t *restrict len_res,
274207753Smm		uint32_t position)
275207753Smm{
276207753Smm	const uint32_t nice_len = mf->nice_len;
277207753Smm
278207753Smm	uint32_t len_main;
279207753Smm	uint32_t matches_count;
280207753Smm
281207753Smm	if (mf->read_ahead == 0) {
282207753Smm		len_main = mf_find(mf, &matches_count, coder->matches);
283207753Smm	} else {
284207753Smm		assert(mf->read_ahead == 1);
285207753Smm		len_main = coder->longest_match_length;
286207753Smm		matches_count = coder->matches_count;
287207753Smm	}
288207753Smm
289213700Smm	const uint32_t buf_avail = my_min(mf_avail(mf) + 1, MATCH_LEN_MAX);
290207753Smm	if (buf_avail < 2) {
291207753Smm		*back_res = UINT32_MAX;
292207753Smm		*len_res = 1;
293207753Smm		return UINT32_MAX;
294207753Smm	}
295207753Smm
296207753Smm	const uint8_t *const buf = mf_ptr(mf) - 1;
297207753Smm
298278433Srpaulo	uint32_t rep_lens[REPS];
299207753Smm	uint32_t rep_max_index = 0;
300207753Smm
301278433Srpaulo	for (uint32_t i = 0; i < REPS; ++i) {
302207753Smm		const uint8_t *const buf_back = buf - coder->reps[i] - 1;
303207753Smm
304207753Smm		if (not_equal_16(buf, buf_back)) {
305207753Smm			rep_lens[i] = 0;
306207753Smm			continue;
307207753Smm		}
308207753Smm
309278433Srpaulo		rep_lens[i] = lzma_memcmplen(buf, buf_back, 2, buf_avail);
310207753Smm
311278433Srpaulo		if (rep_lens[i] > rep_lens[rep_max_index])
312207753Smm			rep_max_index = i;
313207753Smm	}
314207753Smm
315207753Smm	if (rep_lens[rep_max_index] >= nice_len) {
316207753Smm		*back_res = rep_max_index;
317207753Smm		*len_res = rep_lens[rep_max_index];
318207753Smm		mf_skip(mf, *len_res - 1);
319207753Smm		return UINT32_MAX;
320207753Smm	}
321207753Smm
322207753Smm
323207753Smm	if (len_main >= nice_len) {
324278433Srpaulo		*back_res = coder->matches[matches_count - 1].dist + REPS;
325207753Smm		*len_res = len_main;
326207753Smm		mf_skip(mf, len_main - 1);
327207753Smm		return UINT32_MAX;
328207753Smm	}
329207753Smm
330207753Smm	const uint8_t current_byte = *buf;
331207753Smm	const uint8_t match_byte = *(buf - coder->reps[0] - 1);
332207753Smm
333207753Smm	if (len_main < 2 && current_byte != match_byte
334207753Smm			&& rep_lens[rep_max_index] < 2) {
335207753Smm		*back_res = UINT32_MAX;
336207753Smm		*len_res = 1;
337207753Smm		return UINT32_MAX;
338207753Smm	}
339207753Smm
340207753Smm	coder->opts[0].state = coder->state;
341207753Smm
342207753Smm	const uint32_t pos_state = position & coder->pos_mask;
343207753Smm
344207753Smm	coder->opts[1].price = rc_bit_0_price(
345207753Smm				coder->is_match[coder->state][pos_state])
346207753Smm			+ get_literal_price(coder, position, buf[-1],
347207753Smm				!is_literal_state(coder->state),
348207753Smm				match_byte, current_byte);
349207753Smm
350207753Smm	make_literal(&coder->opts[1]);
351207753Smm
352207753Smm	const uint32_t match_price = rc_bit_1_price(
353207753Smm			coder->is_match[coder->state][pos_state]);
354207753Smm	const uint32_t rep_match_price = match_price
355207753Smm			+ rc_bit_1_price(coder->is_rep[coder->state]);
356207753Smm
357207753Smm	if (match_byte == current_byte) {
358207753Smm		const uint32_t short_rep_price = rep_match_price
359207753Smm				+ get_short_rep_price(
360207753Smm					coder, coder->state, pos_state);
361207753Smm
362207753Smm		if (short_rep_price < coder->opts[1].price) {
363207753Smm			coder->opts[1].price = short_rep_price;
364207753Smm			make_short_rep(&coder->opts[1]);
365207753Smm		}
366207753Smm	}
367207753Smm
368213700Smm	const uint32_t len_end = my_max(len_main, rep_lens[rep_max_index]);
369207753Smm
370207753Smm	if (len_end < 2) {
371207753Smm		*back_res = coder->opts[1].back_prev;
372207753Smm		*len_res = 1;
373207753Smm		return UINT32_MAX;
374207753Smm	}
375207753Smm
376207753Smm	coder->opts[1].pos_prev = 0;
377207753Smm
378278433Srpaulo	for (uint32_t i = 0; i < REPS; ++i)
379207753Smm		coder->opts[0].backs[i] = coder->reps[i];
380207753Smm
381207753Smm	uint32_t len = len_end;
382207753Smm	do {
383207753Smm		coder->opts[len].price = RC_INFINITY_PRICE;
384207753Smm	} while (--len >= 2);
385207753Smm
386207753Smm
387278433Srpaulo	for (uint32_t i = 0; i < REPS; ++i) {
388207753Smm		uint32_t rep_len = rep_lens[i];
389207753Smm		if (rep_len < 2)
390207753Smm			continue;
391207753Smm
392207753Smm		const uint32_t price = rep_match_price + get_pure_rep_price(
393207753Smm				coder, i, coder->state, pos_state);
394207753Smm
395207753Smm		do {
396207753Smm			const uint32_t cur_and_len_price = price
397207753Smm					+ get_len_price(
398207753Smm						&coder->rep_len_encoder,
399207753Smm						rep_len, pos_state);
400207753Smm
401207753Smm			if (cur_and_len_price < coder->opts[rep_len].price) {
402207753Smm				coder->opts[rep_len].price = cur_and_len_price;
403207753Smm				coder->opts[rep_len].pos_prev = 0;
404207753Smm				coder->opts[rep_len].back_prev = i;
405207753Smm				coder->opts[rep_len].prev_1_is_literal = false;
406207753Smm			}
407207753Smm		} while (--rep_len >= 2);
408207753Smm	}
409207753Smm
410207753Smm
411207753Smm	const uint32_t normal_match_price = match_price
412207753Smm			+ rc_bit_0_price(coder->is_rep[coder->state]);
413207753Smm
414207753Smm	len = rep_lens[0] >= 2 ? rep_lens[0] + 1 : 2;
415207753Smm	if (len <= len_main) {
416207753Smm		uint32_t i = 0;
417207753Smm		while (len > coder->matches[i].len)
418207753Smm			++i;
419207753Smm
420207753Smm		for(; ; ++len) {
421207753Smm			const uint32_t dist = coder->matches[i].dist;
422207753Smm			const uint32_t cur_and_len_price = normal_match_price
423278433Srpaulo					+ get_dist_len_price(coder,
424207753Smm						dist, len, pos_state);
425207753Smm
426207753Smm			if (cur_and_len_price < coder->opts[len].price) {
427207753Smm				coder->opts[len].price = cur_and_len_price;
428207753Smm				coder->opts[len].pos_prev = 0;
429278433Srpaulo				coder->opts[len].back_prev = dist + REPS;
430207753Smm				coder->opts[len].prev_1_is_literal = false;
431207753Smm			}
432207753Smm
433207753Smm			if (len == coder->matches[i].len)
434207753Smm				if (++i == matches_count)
435207753Smm					break;
436207753Smm		}
437207753Smm	}
438207753Smm
439207753Smm	return len_end;
440207753Smm}
441207753Smm
442207753Smm
443207753Smmstatic inline uint32_t
444312517Sdelphijhelper2(lzma_lzma1_encoder *coder, uint32_t *reps, const uint8_t *buf,
445207753Smm		uint32_t len_end, uint32_t position, const uint32_t cur,
446207753Smm		const uint32_t nice_len, const uint32_t buf_avail_full)
447207753Smm{
448207753Smm	uint32_t matches_count = coder->matches_count;
449207753Smm	uint32_t new_len = coder->longest_match_length;
450207753Smm	uint32_t pos_prev = coder->opts[cur].pos_prev;
451207753Smm	lzma_lzma_state state;
452207753Smm
453207753Smm	if (coder->opts[cur].prev_1_is_literal) {
454207753Smm		--pos_prev;
455207753Smm
456207753Smm		if (coder->opts[cur].prev_2) {
457207753Smm			state = coder->opts[coder->opts[cur].pos_prev_2].state;
458207753Smm
459278433Srpaulo			if (coder->opts[cur].back_prev_2 < REPS)
460207753Smm				update_long_rep(state);
461207753Smm			else
462207753Smm				update_match(state);
463207753Smm
464207753Smm		} else {
465207753Smm			state = coder->opts[pos_prev].state;
466207753Smm		}
467207753Smm
468207753Smm		update_literal(state);
469207753Smm
470207753Smm	} else {
471207753Smm		state = coder->opts[pos_prev].state;
472207753Smm	}
473207753Smm
474207753Smm	if (pos_prev == cur - 1) {
475207753Smm		if (is_short_rep(coder->opts[cur]))
476207753Smm			update_short_rep(state);
477207753Smm		else
478207753Smm			update_literal(state);
479207753Smm	} else {
480207753Smm		uint32_t pos;
481207753Smm		if (coder->opts[cur].prev_1_is_literal
482207753Smm				&& coder->opts[cur].prev_2) {
483207753Smm			pos_prev = coder->opts[cur].pos_prev_2;
484207753Smm			pos = coder->opts[cur].back_prev_2;
485207753Smm			update_long_rep(state);
486207753Smm		} else {
487207753Smm			pos = coder->opts[cur].back_prev;
488278433Srpaulo			if (pos < REPS)
489207753Smm				update_long_rep(state);
490207753Smm			else
491207753Smm				update_match(state);
492207753Smm		}
493207753Smm
494278433Srpaulo		if (pos < REPS) {
495207753Smm			reps[0] = coder->opts[pos_prev].backs[pos];
496207753Smm
497207753Smm			uint32_t i;
498207753Smm			for (i = 1; i <= pos; ++i)
499207753Smm				reps[i] = coder->opts[pos_prev].backs[i - 1];
500207753Smm
501278433Srpaulo			for (; i < REPS; ++i)
502207753Smm				reps[i] = coder->opts[pos_prev].backs[i];
503207753Smm
504207753Smm		} else {
505278433Srpaulo			reps[0] = pos - REPS;
506207753Smm
507278433Srpaulo			for (uint32_t i = 1; i < REPS; ++i)
508207753Smm				reps[i] = coder->opts[pos_prev].backs[i - 1];
509207753Smm		}
510207753Smm	}
511207753Smm
512207753Smm	coder->opts[cur].state = state;
513207753Smm
514278433Srpaulo	for (uint32_t i = 0; i < REPS; ++i)
515207753Smm		coder->opts[cur].backs[i] = reps[i];
516207753Smm
517207753Smm	const uint32_t cur_price = coder->opts[cur].price;
518207753Smm
519207753Smm	const uint8_t current_byte = *buf;
520207753Smm	const uint8_t match_byte = *(buf - reps[0] - 1);
521207753Smm
522207753Smm	const uint32_t pos_state = position & coder->pos_mask;
523207753Smm
524207753Smm	const uint32_t cur_and_1_price = cur_price
525207753Smm			+ rc_bit_0_price(coder->is_match[state][pos_state])
526207753Smm			+ get_literal_price(coder, position, buf[-1],
527207753Smm			!is_literal_state(state), match_byte, current_byte);
528207753Smm
529207753Smm	bool next_is_literal = false;
530207753Smm
531207753Smm	if (cur_and_1_price < coder->opts[cur + 1].price) {
532207753Smm		coder->opts[cur + 1].price = cur_and_1_price;
533207753Smm		coder->opts[cur + 1].pos_prev = cur;
534207753Smm		make_literal(&coder->opts[cur + 1]);
535207753Smm		next_is_literal = true;
536207753Smm	}
537207753Smm
538207753Smm	const uint32_t match_price = cur_price
539207753Smm			+ rc_bit_1_price(coder->is_match[state][pos_state]);
540207753Smm	const uint32_t rep_match_price = match_price
541207753Smm			+ rc_bit_1_price(coder->is_rep[state]);
542207753Smm
543207753Smm	if (match_byte == current_byte
544207753Smm			&& !(coder->opts[cur + 1].pos_prev < cur
545207753Smm				&& coder->opts[cur + 1].back_prev == 0)) {
546207753Smm
547207753Smm		const uint32_t short_rep_price = rep_match_price
548207753Smm				+ get_short_rep_price(coder, state, pos_state);
549207753Smm
550207753Smm		if (short_rep_price <= coder->opts[cur + 1].price) {
551207753Smm			coder->opts[cur + 1].price = short_rep_price;
552207753Smm			coder->opts[cur + 1].pos_prev = cur;
553207753Smm			make_short_rep(&coder->opts[cur + 1]);
554207753Smm			next_is_literal = true;
555207753Smm		}
556207753Smm	}
557207753Smm
558207753Smm	if (buf_avail_full < 2)
559207753Smm		return len_end;
560207753Smm
561213700Smm	const uint32_t buf_avail = my_min(buf_avail_full, nice_len);
562207753Smm
563207753Smm	if (!next_is_literal && match_byte != current_byte) { // speed optimization
564207753Smm		// try literal + rep0
565207753Smm		const uint8_t *const buf_back = buf - reps[0] - 1;
566213700Smm		const uint32_t limit = my_min(buf_avail_full, nice_len + 1);
567207753Smm
568278433Srpaulo		const uint32_t len_test = lzma_memcmplen(buf, buf_back, 1, limit) - 1;
569207753Smm
570207753Smm		if (len_test >= 2) {
571207753Smm			lzma_lzma_state state_2 = state;
572207753Smm			update_literal(state_2);
573207753Smm
574207753Smm			const uint32_t pos_state_next = (position + 1) & coder->pos_mask;
575207753Smm			const uint32_t next_rep_match_price = cur_and_1_price
576207753Smm					+ rc_bit_1_price(coder->is_match[state_2][pos_state_next])
577207753Smm					+ rc_bit_1_price(coder->is_rep[state_2]);
578207753Smm
579207753Smm			//for (; len_test >= 2; --len_test) {
580207753Smm			const uint32_t offset = cur + 1 + len_test;
581207753Smm
582207753Smm			while (len_end < offset)
583207753Smm				coder->opts[++len_end].price = RC_INFINITY_PRICE;
584207753Smm
585207753Smm			const uint32_t cur_and_len_price = next_rep_match_price
586207753Smm					+ get_rep_price(coder, 0, len_test,
587207753Smm						state_2, pos_state_next);
588207753Smm
589207753Smm			if (cur_and_len_price < coder->opts[offset].price) {
590207753Smm				coder->opts[offset].price = cur_and_len_price;
591207753Smm				coder->opts[offset].pos_prev = cur + 1;
592207753Smm				coder->opts[offset].back_prev = 0;
593207753Smm				coder->opts[offset].prev_1_is_literal = true;
594207753Smm				coder->opts[offset].prev_2 = false;
595207753Smm			}
596207753Smm			//}
597207753Smm		}
598207753Smm	}
599207753Smm
600207753Smm
601207753Smm	uint32_t start_len = 2; // speed optimization
602207753Smm
603278433Srpaulo	for (uint32_t rep_index = 0; rep_index < REPS; ++rep_index) {
604207753Smm		const uint8_t *const buf_back = buf - reps[rep_index] - 1;
605207753Smm		if (not_equal_16(buf, buf_back))
606207753Smm			continue;
607207753Smm
608278433Srpaulo		uint32_t len_test = lzma_memcmplen(buf, buf_back, 2, buf_avail);
609207753Smm
610207753Smm		while (len_end < cur + len_test)
611207753Smm			coder->opts[++len_end].price = RC_INFINITY_PRICE;
612207753Smm
613207753Smm		const uint32_t len_test_temp = len_test;
614207753Smm		const uint32_t price = rep_match_price + get_pure_rep_price(
615207753Smm				coder, rep_index, state, pos_state);
616207753Smm
617207753Smm		do {
618207753Smm			const uint32_t cur_and_len_price = price
619207753Smm					+ get_len_price(&coder->rep_len_encoder,
620207753Smm							len_test, pos_state);
621207753Smm
622207753Smm			if (cur_and_len_price < coder->opts[cur + len_test].price) {
623207753Smm				coder->opts[cur + len_test].price = cur_and_len_price;
624207753Smm				coder->opts[cur + len_test].pos_prev = cur;
625207753Smm				coder->opts[cur + len_test].back_prev = rep_index;
626207753Smm				coder->opts[cur + len_test].prev_1_is_literal = false;
627207753Smm			}
628207753Smm		} while (--len_test >= 2);
629207753Smm
630207753Smm		len_test = len_test_temp;
631207753Smm
632207753Smm		if (rep_index == 0)
633207753Smm			start_len = len_test + 1;
634207753Smm
635207753Smm
636207753Smm		uint32_t len_test_2 = len_test + 1;
637213700Smm		const uint32_t limit = my_min(buf_avail_full,
638207753Smm				len_test_2 + nice_len);
639360523Sdelphij		// NOTE: len_test_2 may be greater than limit so the call to
640360523Sdelphij		// lzma_memcmplen() must be done conditionally.
641360523Sdelphij		if (len_test_2 < limit)
642360523Sdelphij			len_test_2 = lzma_memcmplen(buf, buf_back, len_test_2, limit);
643207753Smm
644207753Smm		len_test_2 -= len_test + 1;
645207753Smm
646207753Smm		if (len_test_2 >= 2) {
647207753Smm			lzma_lzma_state state_2 = state;
648207753Smm			update_long_rep(state_2);
649207753Smm
650207753Smm			uint32_t pos_state_next = (position + len_test) & coder->pos_mask;
651207753Smm
652207753Smm			const uint32_t cur_and_len_literal_price = price
653207753Smm					+ get_len_price(&coder->rep_len_encoder,
654207753Smm						len_test, pos_state)
655207753Smm					+ rc_bit_0_price(coder->is_match[state_2][pos_state_next])
656207753Smm					+ get_literal_price(coder, position + len_test,
657207753Smm						buf[len_test - 1], true,
658207753Smm						buf_back[len_test], buf[len_test]);
659207753Smm
660207753Smm			update_literal(state_2);
661207753Smm
662207753Smm			pos_state_next = (position + len_test + 1) & coder->pos_mask;
663207753Smm
664207753Smm			const uint32_t next_rep_match_price = cur_and_len_literal_price
665207753Smm					+ rc_bit_1_price(coder->is_match[state_2][pos_state_next])
666207753Smm					+ rc_bit_1_price(coder->is_rep[state_2]);
667207753Smm
668207753Smm			//for(; len_test_2 >= 2; len_test_2--) {
669207753Smm			const uint32_t offset = cur + len_test + 1 + len_test_2;
670207753Smm
671207753Smm			while (len_end < offset)
672207753Smm				coder->opts[++len_end].price = RC_INFINITY_PRICE;
673207753Smm
674207753Smm			const uint32_t cur_and_len_price = next_rep_match_price
675207753Smm					+ get_rep_price(coder, 0, len_test_2,
676207753Smm						state_2, pos_state_next);
677207753Smm
678207753Smm			if (cur_and_len_price < coder->opts[offset].price) {
679207753Smm				coder->opts[offset].price = cur_and_len_price;
680207753Smm				coder->opts[offset].pos_prev = cur + len_test + 1;
681207753Smm				coder->opts[offset].back_prev = 0;
682207753Smm				coder->opts[offset].prev_1_is_literal = true;
683207753Smm				coder->opts[offset].prev_2 = true;
684207753Smm				coder->opts[offset].pos_prev_2 = cur;
685207753Smm				coder->opts[offset].back_prev_2 = rep_index;
686207753Smm			}
687207753Smm			//}
688207753Smm		}
689207753Smm	}
690207753Smm
691207753Smm
692207753Smm	//for (uint32_t len_test = 2; len_test <= new_len; ++len_test)
693207753Smm	if (new_len > buf_avail) {
694207753Smm		new_len = buf_avail;
695207753Smm
696207753Smm		matches_count = 0;
697207753Smm		while (new_len > coder->matches[matches_count].len)
698207753Smm			++matches_count;
699207753Smm
700207753Smm		coder->matches[matches_count++].len = new_len;
701207753Smm	}
702207753Smm
703207753Smm
704207753Smm	if (new_len >= start_len) {
705207753Smm		const uint32_t normal_match_price = match_price
706207753Smm				+ rc_bit_0_price(coder->is_rep[state]);
707207753Smm
708207753Smm		while (len_end < cur + new_len)
709207753Smm			coder->opts[++len_end].price = RC_INFINITY_PRICE;
710207753Smm
711207753Smm		uint32_t i = 0;
712207753Smm		while (start_len > coder->matches[i].len)
713207753Smm			++i;
714207753Smm
715207753Smm		for (uint32_t len_test = start_len; ; ++len_test) {
716207753Smm			const uint32_t cur_back = coder->matches[i].dist;
717207753Smm			uint32_t cur_and_len_price = normal_match_price
718278433Srpaulo					+ get_dist_len_price(coder,
719207753Smm						cur_back, len_test, pos_state);
720207753Smm
721207753Smm			if (cur_and_len_price < coder->opts[cur + len_test].price) {
722207753Smm				coder->opts[cur + len_test].price = cur_and_len_price;
723207753Smm				coder->opts[cur + len_test].pos_prev = cur;
724207753Smm				coder->opts[cur + len_test].back_prev
725278433Srpaulo						= cur_back + REPS;
726207753Smm				coder->opts[cur + len_test].prev_1_is_literal = false;
727207753Smm			}
728207753Smm
729207753Smm			if (len_test == coder->matches[i].len) {
730207753Smm				// Try Match + Literal + Rep0
731207753Smm				const uint8_t *const buf_back = buf - cur_back - 1;
732207753Smm				uint32_t len_test_2 = len_test + 1;
733213700Smm				const uint32_t limit = my_min(buf_avail_full,
734207753Smm						len_test_2 + nice_len);
735207753Smm
736360523Sdelphij				// NOTE: len_test_2 may be greater than limit
737360523Sdelphij				// so the call to lzma_memcmplen() must be
738360523Sdelphij				// done conditionally.
739360523Sdelphij				if (len_test_2 < limit)
740360523Sdelphij					len_test_2 = lzma_memcmplen(buf, buf_back,
741360523Sdelphij							len_test_2, limit);
742207753Smm
743207753Smm				len_test_2 -= len_test + 1;
744207753Smm
745207753Smm				if (len_test_2 >= 2) {
746207753Smm					lzma_lzma_state state_2 = state;
747207753Smm					update_match(state_2);
748207753Smm					uint32_t pos_state_next
749207753Smm							= (position + len_test) & coder->pos_mask;
750207753Smm
751207753Smm					const uint32_t cur_and_len_literal_price = cur_and_len_price
752207753Smm							+ rc_bit_0_price(
753207753Smm								coder->is_match[state_2][pos_state_next])
754207753Smm							+ get_literal_price(coder,
755207753Smm								position + len_test,
756207753Smm								buf[len_test - 1],
757207753Smm								true,
758207753Smm								buf_back[len_test],
759207753Smm								buf[len_test]);
760207753Smm
761207753Smm					update_literal(state_2);
762207753Smm					pos_state_next = (pos_state_next + 1) & coder->pos_mask;
763207753Smm
764207753Smm					const uint32_t next_rep_match_price
765207753Smm							= cur_and_len_literal_price
766207753Smm							+ rc_bit_1_price(
767207753Smm								coder->is_match[state_2][pos_state_next])
768207753Smm							+ rc_bit_1_price(coder->is_rep[state_2]);
769207753Smm
770207753Smm					// for(; len_test_2 >= 2; --len_test_2) {
771207753Smm					const uint32_t offset = cur + len_test + 1 + len_test_2;
772207753Smm
773207753Smm					while (len_end < offset)
774207753Smm						coder->opts[++len_end].price = RC_INFINITY_PRICE;
775207753Smm
776207753Smm					cur_and_len_price = next_rep_match_price
777207753Smm							+ get_rep_price(coder, 0, len_test_2,
778207753Smm								state_2, pos_state_next);
779207753Smm
780207753Smm					if (cur_and_len_price < coder->opts[offset].price) {
781207753Smm						coder->opts[offset].price = cur_and_len_price;
782207753Smm						coder->opts[offset].pos_prev = cur + len_test + 1;
783207753Smm						coder->opts[offset].back_prev = 0;
784207753Smm						coder->opts[offset].prev_1_is_literal = true;
785207753Smm						coder->opts[offset].prev_2 = true;
786207753Smm						coder->opts[offset].pos_prev_2 = cur;
787207753Smm						coder->opts[offset].back_prev_2
788278433Srpaulo								= cur_back + REPS;
789207753Smm					}
790207753Smm					//}
791207753Smm				}
792207753Smm
793207753Smm				if (++i == matches_count)
794207753Smm					break;
795207753Smm			}
796207753Smm		}
797207753Smm	}
798207753Smm
799207753Smm	return len_end;
800207753Smm}
801207753Smm
802207753Smm
803207753Smmextern void
804312517Sdelphijlzma_lzma_optimum_normal(lzma_lzma1_encoder *restrict coder,
805312517Sdelphij		lzma_mf *restrict mf,
806207753Smm		uint32_t *restrict back_res, uint32_t *restrict len_res,
807207753Smm		uint32_t position)
808207753Smm{
809207753Smm	// If we have symbols pending, return the next pending symbol.
810207753Smm	if (coder->opts_end_index != coder->opts_current_index) {
811207753Smm		assert(mf->read_ahead > 0);
812207753Smm		*len_res = coder->opts[coder->opts_current_index].pos_prev
813207753Smm				- coder->opts_current_index;
814207753Smm		*back_res = coder->opts[coder->opts_current_index].back_prev;
815207753Smm		coder->opts_current_index = coder->opts[
816207753Smm				coder->opts_current_index].pos_prev;
817207753Smm		return;
818207753Smm	}
819207753Smm
820207753Smm	// Update the price tables. In LZMA SDK <= 4.60 (and possibly later)
821207753Smm	// this was done in both initialization function and in the main loop.
822207753Smm	// In liblzma they were moved into this single place.
823207753Smm	if (mf->read_ahead == 0) {
824207753Smm		if (coder->match_price_count >= (1 << 7))
825278433Srpaulo			fill_dist_prices(coder);
826207753Smm
827278433Srpaulo		if (coder->align_price_count >= ALIGN_SIZE)
828207753Smm			fill_align_prices(coder);
829207753Smm	}
830207753Smm
831207753Smm	// TODO: This needs quite a bit of cleaning still. But splitting
832207753Smm	// the original function into two pieces makes it at least a little
833207753Smm	// more readable, since those two parts don't share many variables.
834207753Smm
835207753Smm	uint32_t len_end = helper1(coder, mf, back_res, len_res, position);
836207753Smm	if (len_end == UINT32_MAX)
837207753Smm		return;
838207753Smm
839278433Srpaulo	uint32_t reps[REPS];
840207753Smm	memcpy(reps, coder->reps, sizeof(reps));
841207753Smm
842207753Smm	uint32_t cur;
843207753Smm	for (cur = 1; cur < len_end; ++cur) {
844207753Smm		assert(cur < OPTS);
845207753Smm
846207753Smm		coder->longest_match_length = mf_find(
847207753Smm				mf, &coder->matches_count, coder->matches);
848207753Smm
849207753Smm		if (coder->longest_match_length >= mf->nice_len)
850207753Smm			break;
851207753Smm
852207753Smm		len_end = helper2(coder, reps, mf_ptr(mf) - 1, len_end,
853207753Smm				position + cur, cur, mf->nice_len,
854213700Smm				my_min(mf_avail(mf) + 1, OPTS - 1 - cur));
855207753Smm	}
856207753Smm
857207753Smm	backward(coder, len_res, back_res, cur);
858207753Smm	return;
859207753Smm}
860