1/* 2 * xdelta.c: xdelta generator. 3 * 4 * ==================================================================== 5 * Licensed to the Apache Software Foundation (ASF) under one 6 * or more contributor license agreements. See the NOTICE file 7 * distributed with this work for additional information 8 * regarding copyright ownership. The ASF licenses this file 9 * to you under the Apache License, Version 2.0 (the 10 * "License"); you may not use this file except in compliance 11 * with the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, 16 * software distributed under the License is distributed on an 17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18 * KIND, either express or implied. See the License for the 19 * specific language governing permissions and limitations 20 * under the License. 21 * ==================================================================== 22 */ 23 24 25#include <assert.h> 26 27#include <apr_general.h> /* for APR_INLINE */ 28#include <apr_hash.h> 29 30#include "svn_hash.h" 31#include "svn_delta.h" 32#include "delta.h" 33 34/* This is pseudo-adler32. It is adler32 without the prime modulus. 35 The idea is borrowed from monotone, and is a translation of the C++ 36 code. Graydon Hoare, the author of the original code, gave his 37 explicit permission to use it under these terms at 8:02pm on 38 Friday, February 11th, 2005. */ 39 40/* Size of the blocks we compute checksums for. This was chosen out of 41 thin air. Monotone used 64, xdelta1 used 64, rsync uses 128. 42 However, later optimizations assume it to be 256 or less. 43 */ 44#define MATCH_BLOCKSIZE 64 45 46/* "no" / "invalid" / "unused" value for positions within the delta windows 47 */ 48#define NO_POSITION ((apr_uint32_t)-1) 49 50/* Feed C_IN into the adler32 checksum and remove C_OUT at the same time. 51 * This function may (and will) only be called for characters that are 52 * MATCH_BLOCKSIZE positions apart. 53 * 54 * Please note that the lower 16 bits cannot overflow in neither direction. 55 * Therefore, we don't need to split the value into separate values for 56 * sum(char) and sum(sum(char)). 57 */ 58static APR_INLINE apr_uint32_t 59adler32_replace(apr_uint32_t adler32, const char c_out, const char c_in) 60{ 61 adler32 -= (MATCH_BLOCKSIZE * 0x10000u * ((unsigned char) c_out)); 62 63 adler32 -= (unsigned char)c_out; 64 adler32 += (unsigned char)c_in; 65 66 return adler32 + adler32 * 0x10000; 67} 68 69/* Calculate an pseudo-adler32 checksum for MATCH_BLOCKSIZE bytes starting 70 at DATA. Return the checksum value. */ 71 72static APR_INLINE apr_uint32_t 73init_adler32(const char *data) 74{ 75 const unsigned char *input = (const unsigned char *)data; 76 const unsigned char *last = input + MATCH_BLOCKSIZE; 77 78 apr_uint32_t s1 = 0; 79 apr_uint32_t s2 = 0; 80 81 for (; input < last; input += 8) 82 { 83 s1 += input[0]; s2 += s1; 84 s1 += input[1]; s2 += s1; 85 s1 += input[2]; s2 += s1; 86 s1 += input[3]; s2 += s1; 87 s1 += input[4]; s2 += s1; 88 s1 += input[5]; s2 += s1; 89 s1 += input[6]; s2 += s1; 90 s1 += input[7]; s2 += s1; 91 } 92 93 return s2 * 0x10000 + s1; 94} 95 96/* Information for a block of the delta source. The length of the 97 block is the smaller of MATCH_BLOCKSIZE and the difference between 98 the size of the source data and the position of this block. */ 99struct block 100{ 101 apr_uint32_t adlersum; 102 103/* Even in 64 bit systems, store only 32 bit offsets in our hash table 104 (our delta window size much much smaller then 4GB). 105 That reduces the hash table size by 50% from 32to 16KB 106 and makes it easier to fit into the CPU's L1 cache. */ 107 apr_uint32_t pos; /* NO_POSITION -> block is not used */ 108}; 109 110/* A hash table, using open addressing, of the blocks of the source. */ 111struct blocks 112{ 113 /* The largest valid index of slots. 114 This value has an upper bound proportionate to the text delta 115 window size, so unless we dramatically increase the window size, 116 it's safe to make this a 32-bit value. In any case, it has to be 117 hte same width as the block position index, (struct 118 block).pos. */ 119 apr_uint32_t max; 120 /* Source buffer that the positions in SLOTS refer to. */ 121 const char* data; 122 /* The vector of blocks. A pos value of NO_POSITION represents an unused 123 slot. */ 124 struct block *slots; 125}; 126 127 128/* Return a hash value calculated from the adler32 SUM, suitable for use with 129 our hash table. */ 130static apr_uint32_t hash_func(apr_uint32_t sum) 131{ 132 /* Since the adl32 checksum have a bad distribution for the 11th to 16th 133 bits when used for our small block size, we add some bits from the 134 other half of the checksum. */ 135 return sum ^ (sum >> 12); 136} 137 138/* Insert a block with the checksum ADLERSUM at position POS in the source 139 data into the table BLOCKS. Ignore true duplicates, i.e. blocks with 140 actually the same content. */ 141static void 142add_block(struct blocks *blocks, apr_uint32_t adlersum, apr_uint32_t pos) 143{ 144 apr_uint32_t h = hash_func(adlersum) & blocks->max; 145 146 /* This will terminate, since we know that we will not fill the table. */ 147 for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max) 148 if (blocks->slots[h].adlersum == adlersum) 149 if (memcmp(blocks->data + blocks->slots[h].pos, blocks->data + pos, 150 MATCH_BLOCKSIZE) == 0) 151 return; 152 153 blocks->slots[h].adlersum = adlersum; 154 blocks->slots[h].pos = pos; 155} 156 157/* Find a block in BLOCKS with the checksum ADLERSUM and matching the content 158 at DATA, returning its position in the source data. If there is no such 159 block, return NO_POSITION. */ 160static apr_uint32_t 161find_block(const struct blocks *blocks, 162 apr_uint32_t adlersum, 163 const char* data) 164{ 165 apr_uint32_t h = hash_func(adlersum) & blocks->max; 166 167 for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max) 168 if (blocks->slots[h].adlersum == adlersum) 169 if (memcmp(blocks->data + blocks->slots[h].pos, data, 170 MATCH_BLOCKSIZE) == 0) 171 return blocks->slots[h].pos; 172 173 return NO_POSITION; 174} 175 176/* Initialize the matches table from DATA of size DATALEN. This goes 177 through every block of MATCH_BLOCKSIZE bytes in the source and 178 checksums it, inserting the result into the BLOCKS table. */ 179static void 180init_blocks_table(const char *data, 181 apr_size_t datalen, 182 struct blocks *blocks, 183 apr_pool_t *pool) 184{ 185 apr_size_t nblocks; 186 apr_size_t wnslots = 1; 187 apr_uint32_t nslots; 188 apr_uint32_t i; 189 190 /* Be pessimistic about the block count. */ 191 nblocks = datalen / MATCH_BLOCKSIZE + 1; 192 /* Find nearest larger power of two. */ 193 while (wnslots <= nblocks) 194 wnslots *= 2; 195 /* Double the number of slots to avoid a too high load. */ 196 wnslots *= 2; 197 /* Narrow the number of slots to 32 bits, which is the size of the 198 block position index in the hash table. 199 Sanity check: On 64-bit platforms, apr_size_t is likely to be 200 larger than apr_uint32_t. Make sure that the number of slots 201 actually fits into blocks->max. It's safe to use a hard assert 202 here, because the largest possible value for nslots is 203 proportional to the text delta window size and is therefore much 204 smaller than the range of an apr_uint32_t. If we ever happen to 205 increase the window size too much, this assertion will get 206 triggered by the test suite. */ 207 nslots = (apr_uint32_t) wnslots; 208 SVN_ERR_ASSERT_NO_RETURN(wnslots == nslots); 209 blocks->max = nslots - 1; 210 blocks->data = data; 211 blocks->slots = apr_palloc(pool, nslots * sizeof(*(blocks->slots))); 212 for (i = 0; i < nslots; ++i) 213 { 214 /* Avoid using an indeterminate value in the lookup. */ 215 blocks->slots[i].adlersum = 0; 216 blocks->slots[i].pos = NO_POSITION; 217 } 218 219 /* If there is an odd block at the end of the buffer, we will 220 not use that shorter block for deltification (only indirectly 221 as an extension of some previous block). */ 222 for (i = 0; i + MATCH_BLOCKSIZE <= datalen; i += MATCH_BLOCKSIZE) 223 add_block(blocks, init_adler32(data + i), i); 224} 225 226/* Return the lowest position at which A and B differ. If no difference 227 * can be found in the first MAX_LEN characters, MAX_LEN will be returned. 228 */ 229static apr_size_t 230match_length(const char *a, const char *b, apr_size_t max_len) 231{ 232 apr_size_t pos = 0; 233 234#if SVN_UNALIGNED_ACCESS_IS_OK 235 236 /* Chunky processing is so much faster ... 237 * 238 * We can't make this work on architectures that require aligned access 239 * because A and B will probably have different alignment. So, skipping 240 * the first few chars until alignment is reached is not an option. 241 */ 242 for (; pos + sizeof(apr_size_t) <= max_len; pos += sizeof(apr_size_t)) 243 if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos)) 244 break; 245 246#endif 247 248 for (; pos < max_len; ++pos) 249 if (a[pos] != b[pos]) 250 break; 251 252 return pos; 253} 254 255/* Return the number of bytes before A and B that don't differ. If no 256 * difference can be found in the first MAX_LEN characters, MAX_LEN will 257 * be returned. Please note that A-MAX_LEN and B-MAX_LEN must both be 258 * valid addresses. 259 */ 260static apr_size_t 261reverse_match_length(const char *a, const char *b, apr_size_t max_len) 262{ 263 apr_size_t pos = 0; 264 265#if SVN_UNALIGNED_ACCESS_IS_OK 266 267 /* Chunky processing is so much faster ... 268 * 269 * We can't make this work on architectures that require aligned access 270 * because A and B will probably have different alignment. So, skipping 271 * the first few chars until alignment is reached is not an option. 272 */ 273 for (pos = sizeof(apr_size_t); pos <= max_len; pos += sizeof(apr_size_t)) 274 if (*(const apr_size_t*)(a - pos) != *(const apr_size_t*)(b - pos)) 275 break; 276 277 pos -= sizeof(apr_size_t); 278 279#endif 280 281 /* If we find a mismatch at -pos, pos-1 characters matched. 282 */ 283 while (++pos <= max_len) 284 if (a[0-pos] != b[0-pos]) 285 return pos - 1; 286 287 /* No mismatch found -> at least MAX_LEN matching chars. 288 */ 289 return max_len; 290} 291 292 293/* Try to find a match for the target data B in BLOCKS, and then 294 extend the match as long as data in A and B at the match position 295 continues to match. We set the position in A we ended up in (in 296 case we extended it backwards) in APOSP and update the corresponding 297 position within B given in BPOSP. PENDING_INSERT_START sets the 298 lower limit to BPOSP. 299 Return number of matching bytes starting at ASOP. Return 0 if 300 no match has been found. 301 */ 302static apr_size_t 303find_match(const struct blocks *blocks, 304 const apr_uint32_t rolling, 305 const char *a, 306 apr_size_t asize, 307 const char *b, 308 apr_size_t bsize, 309 apr_size_t *bposp, 310 apr_size_t *aposp, 311 apr_size_t pending_insert_start) 312{ 313 apr_size_t apos, bpos = *bposp; 314 apr_size_t delta, max_delta; 315 316 apos = find_block(blocks, rolling, b + bpos); 317 318 /* See if we have a match. */ 319 if (apos == NO_POSITION) 320 return 0; 321 322 /* Extend the match forward as far as possible */ 323 max_delta = asize - apos - MATCH_BLOCKSIZE < bsize - bpos - MATCH_BLOCKSIZE 324 ? asize - apos - MATCH_BLOCKSIZE 325 : bsize - bpos - MATCH_BLOCKSIZE; 326 delta = match_length(a + apos + MATCH_BLOCKSIZE, 327 b + bpos + MATCH_BLOCKSIZE, 328 max_delta); 329 330 /* See if we can extend backwards (max MATCH_BLOCKSIZE-1 steps because A's 331 content has been sampled only every MATCH_BLOCKSIZE positions). */ 332 while (apos > 0 && bpos > pending_insert_start && a[apos-1] == b[bpos-1]) 333 { 334 --apos; 335 --bpos; 336 ++delta; 337 } 338 339 *aposp = apos; 340 *bposp = bpos; 341 342 return MATCH_BLOCKSIZE + delta; 343} 344 345/* Utility for compute_delta() that compares the range B[START,BSIZE) with 346 * the range of similar size before A[ASIZE]. Create corresponding copy and 347 * insert operations. 348 * 349 * BUILD_BATON and POOL will be passed through from compute_delta(). 350 */ 351static void 352store_delta_trailer(svn_txdelta__ops_baton_t *build_baton, 353 const char *a, 354 apr_size_t asize, 355 const char *b, 356 apr_size_t bsize, 357 apr_size_t start, 358 apr_pool_t *pool) 359{ 360 apr_size_t end_match; 361 apr_size_t max_len = asize > (bsize - start) ? bsize - start : asize; 362 if (max_len == 0) 363 return; 364 365 end_match = reverse_match_length(a + asize, b + bsize, max_len); 366 if (end_match <= 4) 367 end_match = 0; 368 369 if (bsize - start > end_match) 370 svn_txdelta__insert_op(build_baton, svn_txdelta_new, 371 start, bsize - start - end_match, b + start, pool); 372 if (end_match) 373 svn_txdelta__insert_op(build_baton, svn_txdelta_source, 374 asize - end_match, end_match, NULL, pool); 375} 376 377 378/* Compute a delta from A to B using xdelta. 379 380 The basic xdelta algorithm is as follows: 381 382 1. Go through the source data, checksumming every MATCH_BLOCKSIZE 383 block of bytes using adler32, and inserting the checksum into a 384 match table with the position of the match. 385 2. Go through the target byte by byte, seeing if that byte starts a 386 match that we have in the match table. 387 2a. If so, try to extend the match as far as possible both 388 forwards and backwards, and then insert a source copy 389 operation into the delta ops builder for the match. 390 2b. If not, insert the byte as new data using an insert delta op. 391 392 Our implementation doesn't immediately insert "insert" operations, 393 it waits until we have another copy, or we are done. The reasoning 394 is twofold: 395 396 1. Otherwise, we would just be building a ton of 1 byte insert 397 operations 398 2. So that we can extend a source match backwards into a pending 399 insert operation, and possibly remove the need for the insert 400 entirely. This can happen due to stream alignment. 401*/ 402static void 403compute_delta(svn_txdelta__ops_baton_t *build_baton, 404 const char *a, 405 apr_size_t asize, 406 const char *b, 407 apr_size_t bsize, 408 apr_pool_t *pool) 409{ 410 struct blocks blocks; 411 apr_uint32_t rolling; 412 apr_size_t lo = 0, pending_insert_start = 0; 413 414 /* Optimization: directly compare window starts. If more than 4 415 * bytes match, we can immediately create a matching windows. 416 * Shorter sequences result in a net data increase. */ 417 lo = match_length(a, b, asize > bsize ? bsize : asize); 418 if ((lo > 4) || (lo == bsize)) 419 { 420 svn_txdelta__insert_op(build_baton, svn_txdelta_source, 421 0, lo, NULL, pool); 422 pending_insert_start = lo; 423 } 424 else 425 lo = 0; 426 427 /* If the size of the target is smaller than the match blocksize, just 428 insert the entire target. */ 429 if ((bsize - lo < MATCH_BLOCKSIZE) || (asize < MATCH_BLOCKSIZE)) 430 { 431 store_delta_trailer(build_baton, a, asize, b, bsize, lo, pool); 432 return; 433 } 434 435 /* Initialize the matches table. */ 436 init_blocks_table(a, asize, &blocks, pool); 437 438 /* Initialize our rolling checksum. */ 439 rolling = init_adler32(b + lo); 440 while (lo < bsize) 441 { 442 apr_size_t matchlen = 0; 443 apr_size_t apos; 444 445 if (lo + MATCH_BLOCKSIZE <= bsize) 446 matchlen = find_match(&blocks, rolling, a, asize, b, bsize, 447 &lo, &apos, pending_insert_start); 448 449 /* If we didn't find a real match, insert the byte at the target 450 position into the pending insert. */ 451 if (matchlen == 0) 452 { 453 /* move block one position forward. Short blocks at the end of 454 the buffer cannot be used as the beginning of a new match */ 455 if (lo + MATCH_BLOCKSIZE < bsize) 456 rolling = adler32_replace(rolling, b[lo], b[lo+MATCH_BLOCKSIZE]); 457 458 lo++; 459 } 460 else 461 { 462 /* store the sequence of B that is between the matches */ 463 if (lo - pending_insert_start > 0) 464 svn_txdelta__insert_op(build_baton, svn_txdelta_new, 465 0, lo - pending_insert_start, 466 b + pending_insert_start, pool); 467 else 468 { 469 /* the match borders on the previous op. Maybe, we found a 470 * match that is better than / overlapping the previous one. */ 471 apr_size_t len = reverse_match_length(a + apos, b + lo, apos < lo ? apos : lo); 472 if (len > 0) 473 { 474 len = svn_txdelta__remove_copy(build_baton, len); 475 apos -= len; 476 matchlen += len; 477 lo -= len; 478 } 479 } 480 481 /* Reset the pending insert start to immediately after the 482 match. */ 483 lo += matchlen; 484 pending_insert_start = lo; 485 svn_txdelta__insert_op(build_baton, svn_txdelta_source, 486 apos, matchlen, NULL, pool); 487 488 /* Calculate the Adler32 sum for the first block behind the match. 489 * Ignore short buffers at the end of B. 490 */ 491 if (lo + MATCH_BLOCKSIZE <= bsize) 492 rolling = init_adler32(b + lo); 493 } 494 } 495 496 /* If we still have an insert pending at the end, throw it in. */ 497 store_delta_trailer(build_baton, a, asize, b, bsize, pending_insert_start, pool); 498} 499 500void 501svn_txdelta__xdelta(svn_txdelta__ops_baton_t *build_baton, 502 const char *data, 503 apr_size_t source_len, 504 apr_size_t target_len, 505 apr_pool_t *pool) 506{ 507 /* We should never be asked to compute something when the source_len is 0; 508 we just use a single insert op there (and rely on zlib for 509 compression). */ 510 assert(source_len != 0); 511 compute_delta(build_baton, data, source_len, 512 data + source_len, target_len, 513 pool); 514} 515