1///////////////////////////////////////////////////////////////////////////////
2//
3/// \file       lzma_encoder_optimum_fast.c
4//
5//  Author:     Igor Pavlov
6//
7//  This file has been put into the public domain.
8//  You can do whatever you want with this file.
9//
10///////////////////////////////////////////////////////////////////////////////
11
12#include "lzma_encoder_private.h"
13
14
15#define change_pair(small_dist, big_dist) \
16	(((big_dist) >> 7) > (small_dist))
17
18
19extern void
20lzma_lzma_optimum_fast(lzma_coder *restrict coder, lzma_mf *restrict mf,
21		uint32_t *restrict back_res, uint32_t *restrict len_res)
22{
23	const uint32_t nice_len = mf->nice_len;
24
25	uint32_t len_main;
26	uint32_t matches_count;
27	if (mf->read_ahead == 0) {
28		len_main = mf_find(mf, &matches_count, coder->matches);
29	} else {
30		assert(mf->read_ahead == 1);
31		len_main = coder->longest_match_length;
32		matches_count = coder->matches_count;
33	}
34
35	const uint8_t *buf = mf_ptr(mf) - 1;
36	const uint32_t buf_avail = my_min(mf_avail(mf) + 1, MATCH_LEN_MAX);
37
38	if (buf_avail < 2) {
39		// There's not enough input left to encode a match.
40		*back_res = UINT32_MAX;
41		*len_res = 1;
42		return;
43	}
44
45	// Look for repeated matches; scan the previous four match distances
46	uint32_t rep_len = 0;
47	uint32_t rep_index = 0;
48
49	for (uint32_t i = 0; i < REP_DISTANCES; ++i) {
50		// Pointer to the beginning of the match candidate
51		const uint8_t *const buf_back = buf - coder->reps[i] - 1;
52
53		// If the first two bytes (2 == MATCH_LEN_MIN) do not match,
54		// this rep is not useful.
55		if (not_equal_16(buf, buf_back))
56			continue;
57
58		// The first two bytes matched.
59		// Calculate the length of the match.
60		uint32_t len;
61		for (len = 2; len < buf_avail
62				&& buf[len] == buf_back[len]; ++len) ;
63
64		// If we have found a repeated match that is at least
65		// nice_len long, return it immediately.
66		if (len >= nice_len) {
67			*back_res = i;
68			*len_res = len;
69			mf_skip(mf, len - 1);
70			return;
71		}
72
73		if (len > rep_len) {
74			rep_index = i;
75			rep_len = len;
76		}
77	}
78
79	// We didn't find a long enough repeated match. Encode it as a normal
80	// match if the match length is at least nice_len.
81	if (len_main >= nice_len) {
82		*back_res = coder->matches[matches_count - 1].dist
83				+ REP_DISTANCES;
84		*len_res = len_main;
85		mf_skip(mf, len_main - 1);
86		return;
87	}
88
89	uint32_t back_main = 0;
90	if (len_main >= 2) {
91		back_main = coder->matches[matches_count - 1].dist;
92
93		while (matches_count > 1 && len_main ==
94				coder->matches[matches_count - 2].len + 1) {
95			if (!change_pair(coder->matches[
96						matches_count - 2].dist,
97					back_main))
98				break;
99
100			--matches_count;
101			len_main = coder->matches[matches_count - 1].len;
102			back_main = coder->matches[matches_count - 1].dist;
103		}
104
105		if (len_main == 2 && back_main >= 0x80)
106			len_main = 1;
107	}
108
109	if (rep_len >= 2) {
110		if (rep_len + 1 >= len_main
111				|| (rep_len + 2 >= len_main
112					&& back_main > (UINT32_C(1) << 9))
113				|| (rep_len + 3 >= len_main
114					&& back_main > (UINT32_C(1) << 15))) {
115			*back_res = rep_index;
116			*len_res = rep_len;
117			mf_skip(mf, rep_len - 1);
118			return;
119		}
120	}
121
122	if (len_main < 2 || buf_avail <= 2) {
123		*back_res = UINT32_MAX;
124		*len_res = 1;
125		return;
126	}
127
128	// Get the matches for the next byte. If we find a better match,
129	// the current byte is encoded as a literal.
130	coder->longest_match_length = mf_find(mf,
131			&coder->matches_count, coder->matches);
132
133	if (coder->longest_match_length >= 2) {
134		const uint32_t new_dist = coder->matches[
135				coder->matches_count - 1].dist;
136
137		if ((coder->longest_match_length >= len_main
138					&& new_dist < back_main)
139				|| (coder->longest_match_length == len_main + 1
140					&& !change_pair(back_main, new_dist))
141				|| (coder->longest_match_length > len_main + 1)
142				|| (coder->longest_match_length + 1 >= len_main
143					&& len_main >= 3
144					&& change_pair(new_dist, back_main))) {
145			*back_res = UINT32_MAX;
146			*len_res = 1;
147			return;
148		}
149	}
150
151	// In contrast to LZMA SDK, dictionary could not have been moved
152	// between mf_find() calls, thus it is safe to just increment
153	// the old buf pointer instead of recalculating it with mf_ptr().
154	++buf;
155
156	const uint32_t limit = len_main - 1;
157
158	for (uint32_t i = 0; i < REP_DISTANCES; ++i) {
159		const uint8_t *const buf_back = buf - coder->reps[i] - 1;
160
161		if (not_equal_16(buf, buf_back))
162			continue;
163
164		uint32_t len;
165		for (len = 2; len < limit
166				&& buf[len] == buf_back[len]; ++len) ;
167
168		if (len >= limit) {
169			*back_res = UINT32_MAX;
170			*len_res = 1;
171			return;
172		}
173	}
174
175	*back_res = back_main + REP_DISTANCES;
176	*len_res = len_main;
177	mf_skip(mf, len_main - 2);
178	return;
179}
180