lzma_encoder_optimum_fast.c revision 213700
1207753Smm/////////////////////////////////////////////////////////////////////////////// 2207753Smm// 3207753Smm/// \file lzma_encoder_optimum_fast.c 4207753Smm// 5207753Smm// Author: Igor Pavlov 6207753Smm// 7207753Smm// This file has been put into the public domain. 8207753Smm// You can do whatever you want with this file. 9207753Smm// 10207753Smm/////////////////////////////////////////////////////////////////////////////// 11207753Smm 12207753Smm#include "lzma_encoder_private.h" 13207753Smm 14207753Smm 15207753Smm#define change_pair(small_dist, big_dist) \ 16207753Smm (((big_dist) >> 7) > (small_dist)) 17207753Smm 18207753Smm 19207753Smmextern void 20207753Smmlzma_lzma_optimum_fast(lzma_coder *restrict coder, lzma_mf *restrict mf, 21207753Smm uint32_t *restrict back_res, uint32_t *restrict len_res) 22207753Smm{ 23207753Smm const uint32_t nice_len = mf->nice_len; 24207753Smm 25207753Smm uint32_t len_main; 26207753Smm uint32_t matches_count; 27207753Smm if (mf->read_ahead == 0) { 28207753Smm len_main = mf_find(mf, &matches_count, coder->matches); 29207753Smm } else { 30207753Smm assert(mf->read_ahead == 1); 31207753Smm len_main = coder->longest_match_length; 32207753Smm matches_count = coder->matches_count; 33207753Smm } 34207753Smm 35207753Smm const uint8_t *buf = mf_ptr(mf) - 1; 36213700Smm const uint32_t buf_avail = my_min(mf_avail(mf) + 1, MATCH_LEN_MAX); 37207753Smm 38207753Smm if (buf_avail < 2) { 39207753Smm // There's not enough input left to encode a match. 40207753Smm *back_res = UINT32_MAX; 41207753Smm *len_res = 1; 42207753Smm return; 43207753Smm } 44207753Smm 45207753Smm // Look for repeated matches; scan the previous four match distances 46207753Smm uint32_t rep_len = 0; 47207753Smm uint32_t rep_index = 0; 48207753Smm 49207753Smm for (uint32_t i = 0; i < REP_DISTANCES; ++i) { 50207753Smm // Pointer to the beginning of the match candidate 51207753Smm const uint8_t *const buf_back = buf - coder->reps[i] - 1; 52207753Smm 53207753Smm // If the first two bytes (2 == MATCH_LEN_MIN) do not match, 54207753Smm // this rep is not useful. 55207753Smm if (not_equal_16(buf, buf_back)) 56207753Smm continue; 57207753Smm 58207753Smm // The first two bytes matched. 59207753Smm // Calculate the length of the match. 60207753Smm uint32_t len; 61207753Smm for (len = 2; len < buf_avail 62207753Smm && buf[len] == buf_back[len]; ++len) ; 63207753Smm 64207753Smm // If we have found a repeated match that is at least 65207753Smm // nice_len long, return it immediately. 66207753Smm if (len >= nice_len) { 67207753Smm *back_res = i; 68207753Smm *len_res = len; 69207753Smm mf_skip(mf, len - 1); 70207753Smm return; 71207753Smm } 72207753Smm 73207753Smm if (len > rep_len) { 74207753Smm rep_index = i; 75207753Smm rep_len = len; 76207753Smm } 77207753Smm } 78207753Smm 79207753Smm // We didn't find a long enough repeated match. Encode it as a normal 80207753Smm // match if the match length is at least nice_len. 81207753Smm if (len_main >= nice_len) { 82207753Smm *back_res = coder->matches[matches_count - 1].dist 83207753Smm + REP_DISTANCES; 84207753Smm *len_res = len_main; 85207753Smm mf_skip(mf, len_main - 1); 86207753Smm return; 87207753Smm } 88207753Smm 89207753Smm uint32_t back_main = 0; 90207753Smm if (len_main >= 2) { 91207753Smm back_main = coder->matches[matches_count - 1].dist; 92207753Smm 93207753Smm while (matches_count > 1 && len_main == 94207753Smm coder->matches[matches_count - 2].len + 1) { 95207753Smm if (!change_pair(coder->matches[ 96207753Smm matches_count - 2].dist, 97207753Smm back_main)) 98207753Smm break; 99207753Smm 100207753Smm --matches_count; 101207753Smm len_main = coder->matches[matches_count - 1].len; 102207753Smm back_main = coder->matches[matches_count - 1].dist; 103207753Smm } 104207753Smm 105207753Smm if (len_main == 2 && back_main >= 0x80) 106207753Smm len_main = 1; 107207753Smm } 108207753Smm 109207753Smm if (rep_len >= 2) { 110207753Smm if (rep_len + 1 >= len_main 111207753Smm || (rep_len + 2 >= len_main 112207753Smm && back_main > (UINT32_C(1) << 9)) 113207753Smm || (rep_len + 3 >= len_main 114207753Smm && back_main > (UINT32_C(1) << 15))) { 115207753Smm *back_res = rep_index; 116207753Smm *len_res = rep_len; 117207753Smm mf_skip(mf, rep_len - 1); 118207753Smm return; 119207753Smm } 120207753Smm } 121207753Smm 122207753Smm if (len_main < 2 || buf_avail <= 2) { 123207753Smm *back_res = UINT32_MAX; 124207753Smm *len_res = 1; 125207753Smm return; 126207753Smm } 127207753Smm 128207753Smm // Get the matches for the next byte. If we find a better match, 129207753Smm // the current byte is encoded as a literal. 130207753Smm coder->longest_match_length = mf_find(mf, 131207753Smm &coder->matches_count, coder->matches); 132207753Smm 133207753Smm if (coder->longest_match_length >= 2) { 134207753Smm const uint32_t new_dist = coder->matches[ 135207753Smm coder->matches_count - 1].dist; 136207753Smm 137207753Smm if ((coder->longest_match_length >= len_main 138207753Smm && new_dist < back_main) 139207753Smm || (coder->longest_match_length == len_main + 1 140207753Smm && !change_pair(back_main, new_dist)) 141207753Smm || (coder->longest_match_length > len_main + 1) 142207753Smm || (coder->longest_match_length + 1 >= len_main 143207753Smm && len_main >= 3 144207753Smm && change_pair(new_dist, back_main))) { 145207753Smm *back_res = UINT32_MAX; 146207753Smm *len_res = 1; 147207753Smm return; 148207753Smm } 149207753Smm } 150207753Smm 151207753Smm // In contrast to LZMA SDK, dictionary could not have been moved 152207753Smm // between mf_find() calls, thus it is safe to just increment 153207753Smm // the old buf pointer instead of recalculating it with mf_ptr(). 154207753Smm ++buf; 155207753Smm 156207753Smm const uint32_t limit = len_main - 1; 157207753Smm 158207753Smm for (uint32_t i = 0; i < REP_DISTANCES; ++i) { 159207753Smm const uint8_t *const buf_back = buf - coder->reps[i] - 1; 160207753Smm 161207753Smm if (not_equal_16(buf, buf_back)) 162207753Smm continue; 163207753Smm 164207753Smm uint32_t len; 165207753Smm for (len = 2; len < limit 166207753Smm && buf[len] == buf_back[len]; ++len) ; 167207753Smm 168207753Smm if (len >= limit) { 169207753Smm *back_res = UINT32_MAX; 170207753Smm *len_res = 1; 171207753Smm return; 172207753Smm } 173207753Smm } 174207753Smm 175207753Smm *back_res = back_main + REP_DISTANCES; 176207753Smm *len_res = len_main; 177207753Smm mf_skip(mf, len_main - 2); 178207753Smm return; 179207753Smm} 180