1207753Smm/////////////////////////////////////////////////////////////////////////////// 2207753Smm// 3207753Smm/// \file lzma_encoder_optimum_fast.c 4207753Smm// 5207753Smm// Author: Igor Pavlov 6207753Smm// 7207753Smm// This file has been put into the public domain. 8207753Smm// You can do whatever you want with this file. 9207753Smm// 10207753Smm/////////////////////////////////////////////////////////////////////////////// 11207753Smm 12207753Smm#include "lzma_encoder_private.h" 13292588Sdelphij#include "memcmplen.h" 14207753Smm 15207753Smm 16207753Smm#define change_pair(small_dist, big_dist) \ 17207753Smm (((big_dist) >> 7) > (small_dist)) 18207753Smm 19207753Smm 20207753Smmextern void 21312518Sdelphijlzma_lzma_optimum_fast(lzma_lzma1_encoder *restrict coder, 22312518Sdelphij lzma_mf *restrict mf, 23207753Smm uint32_t *restrict back_res, uint32_t *restrict len_res) 24207753Smm{ 25207753Smm const uint32_t nice_len = mf->nice_len; 26207753Smm 27207753Smm uint32_t len_main; 28207753Smm uint32_t matches_count; 29207753Smm if (mf->read_ahead == 0) { 30207753Smm len_main = mf_find(mf, &matches_count, coder->matches); 31207753Smm } else { 32207753Smm assert(mf->read_ahead == 1); 33207753Smm len_main = coder->longest_match_length; 34207753Smm matches_count = coder->matches_count; 35207753Smm } 36207753Smm 37207753Smm const uint8_t *buf = mf_ptr(mf) - 1; 38213700Smm const uint32_t buf_avail = my_min(mf_avail(mf) + 1, MATCH_LEN_MAX); 39207753Smm 40207753Smm if (buf_avail < 2) { 41207753Smm // There's not enough input left to encode a match. 42207753Smm *back_res = UINT32_MAX; 43207753Smm *len_res = 1; 44207753Smm return; 45207753Smm } 46207753Smm 47207753Smm // Look for repeated matches; scan the previous four match distances 48207753Smm uint32_t rep_len = 0; 49207753Smm uint32_t rep_index = 0; 50207753Smm 51292588Sdelphij for (uint32_t i = 0; i < REPS; ++i) { 52207753Smm // Pointer to the beginning of the match candidate 53207753Smm const uint8_t *const buf_back = buf - coder->reps[i] - 1; 54207753Smm 55207753Smm // If the first two bytes (2 == MATCH_LEN_MIN) do not match, 56207753Smm // this rep is not useful. 57207753Smm if (not_equal_16(buf, buf_back)) 58207753Smm continue; 59207753Smm 60207753Smm // The first two bytes matched. 61207753Smm // Calculate the length of the match. 62292588Sdelphij const uint32_t len = lzma_memcmplen( 63292588Sdelphij buf, buf_back, 2, buf_avail); 64207753Smm 65207753Smm // If we have found a repeated match that is at least 66207753Smm // nice_len long, return it immediately. 67207753Smm if (len >= nice_len) { 68207753Smm *back_res = i; 69207753Smm *len_res = len; 70207753Smm mf_skip(mf, len - 1); 71207753Smm return; 72207753Smm } 73207753Smm 74207753Smm if (len > rep_len) { 75207753Smm rep_index = i; 76207753Smm rep_len = len; 77207753Smm } 78207753Smm } 79207753Smm 80207753Smm // We didn't find a long enough repeated match. Encode it as a normal 81207753Smm // match if the match length is at least nice_len. 82207753Smm if (len_main >= nice_len) { 83292588Sdelphij *back_res = coder->matches[matches_count - 1].dist + REPS; 84207753Smm *len_res = len_main; 85207753Smm mf_skip(mf, len_main - 1); 86207753Smm return; 87207753Smm } 88207753Smm 89207753Smm uint32_t back_main = 0; 90207753Smm if (len_main >= 2) { 91207753Smm back_main = coder->matches[matches_count - 1].dist; 92207753Smm 93207753Smm while (matches_count > 1 && len_main == 94207753Smm coder->matches[matches_count - 2].len + 1) { 95207753Smm if (!change_pair(coder->matches[ 96207753Smm matches_count - 2].dist, 97207753Smm back_main)) 98207753Smm break; 99207753Smm 100207753Smm --matches_count; 101207753Smm len_main = coder->matches[matches_count - 1].len; 102207753Smm back_main = coder->matches[matches_count - 1].dist; 103207753Smm } 104207753Smm 105207753Smm if (len_main == 2 && back_main >= 0x80) 106207753Smm len_main = 1; 107207753Smm } 108207753Smm 109207753Smm if (rep_len >= 2) { 110207753Smm if (rep_len + 1 >= len_main 111207753Smm || (rep_len + 2 >= len_main 112207753Smm && back_main > (UINT32_C(1) << 9)) 113207753Smm || (rep_len + 3 >= len_main 114207753Smm && back_main > (UINT32_C(1) << 15))) { 115207753Smm *back_res = rep_index; 116207753Smm *len_res = rep_len; 117207753Smm mf_skip(mf, rep_len - 1); 118207753Smm return; 119207753Smm } 120207753Smm } 121207753Smm 122207753Smm if (len_main < 2 || buf_avail <= 2) { 123207753Smm *back_res = UINT32_MAX; 124207753Smm *len_res = 1; 125207753Smm return; 126207753Smm } 127207753Smm 128207753Smm // Get the matches for the next byte. If we find a better match, 129207753Smm // the current byte is encoded as a literal. 130207753Smm coder->longest_match_length = mf_find(mf, 131207753Smm &coder->matches_count, coder->matches); 132207753Smm 133207753Smm if (coder->longest_match_length >= 2) { 134207753Smm const uint32_t new_dist = coder->matches[ 135207753Smm coder->matches_count - 1].dist; 136207753Smm 137207753Smm if ((coder->longest_match_length >= len_main 138207753Smm && new_dist < back_main) 139207753Smm || (coder->longest_match_length == len_main + 1 140207753Smm && !change_pair(back_main, new_dist)) 141207753Smm || (coder->longest_match_length > len_main + 1) 142207753Smm || (coder->longest_match_length + 1 >= len_main 143207753Smm && len_main >= 3 144207753Smm && change_pair(new_dist, back_main))) { 145207753Smm *back_res = UINT32_MAX; 146207753Smm *len_res = 1; 147207753Smm return; 148207753Smm } 149207753Smm } 150207753Smm 151207753Smm // In contrast to LZMA SDK, dictionary could not have been moved 152207753Smm // between mf_find() calls, thus it is safe to just increment 153207753Smm // the old buf pointer instead of recalculating it with mf_ptr(). 154207753Smm ++buf; 155207753Smm 156292588Sdelphij const uint32_t limit = my_max(2, len_main - 1); 157207753Smm 158292588Sdelphij for (uint32_t i = 0; i < REPS; ++i) { 159292588Sdelphij if (memcmp(buf, buf - coder->reps[i] - 1, limit) == 0) { 160207753Smm *back_res = UINT32_MAX; 161207753Smm *len_res = 1; 162207753Smm return; 163207753Smm } 164207753Smm } 165207753Smm 166292588Sdelphij *back_res = back_main + REPS; 167207753Smm *len_res = len_main; 168207753Smm mf_skip(mf, len_main - 2); 169207753Smm return; 170207753Smm} 171