1/*
2 * xdelta.c:  xdelta generator.
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25#include <assert.h>
26
27#include <apr_general.h>        /* for APR_INLINE */
28#include <apr_hash.h>
29
30#include "svn_hash.h"
31#include "svn_delta.h"
32#include "delta.h"
33
34/* This is pseudo-adler32. It is adler32 without the prime modulus.
35   The idea is borrowed from monotone, and is a translation of the C++
36   code.  Graydon Hoare, the author of the original code, gave his
37   explicit permission to use it under these terms at 8:02pm on
38   Friday, February 11th, 2005.  */
39
40/* Size of the blocks we compute checksums for. This was chosen out of
41   thin air.  Monotone used 64, xdelta1 used 64, rsync uses 128.
42   However, later optimizations assume it to be 256 or less.
43 */
44#define MATCH_BLOCKSIZE 64
45
46/* "no" / "invalid" / "unused" value for positions within the delta windows
47 */
48#define NO_POSITION ((apr_uint32_t)-1)
49
50/* Feed C_IN into the adler32 checksum and remove C_OUT at the same time.
51 * This function may (and will) only be called for characters that are
52 * MATCH_BLOCKSIZE positions apart.
53 *
54 * Please note that the lower 16 bits cannot overflow in neither direction.
55 * Therefore, we don't need to split the value into separate values for
56 * sum(char) and sum(sum(char)).
57 */
58static APR_INLINE apr_uint32_t
59adler32_replace(apr_uint32_t adler32, const char c_out, const char c_in)
60{
61  adler32 -= (MATCH_BLOCKSIZE * 0x10000u * ((unsigned char) c_out));
62
63  adler32 -= (unsigned char)c_out;
64  adler32 += (unsigned char)c_in;
65
66  return adler32 + adler32 * 0x10000;
67}
68
69/* Calculate an pseudo-adler32 checksum for MATCH_BLOCKSIZE bytes starting
70   at DATA.  Return the checksum value.  */
71
72static APR_INLINE apr_uint32_t
73init_adler32(const char *data)
74{
75  const unsigned char *input = (const unsigned char *)data;
76  const unsigned char *last = input + MATCH_BLOCKSIZE;
77
78  apr_uint32_t s1 = 0;
79  apr_uint32_t s2 = 0;
80
81  for (; input < last; input += 8)
82    {
83      s1 += input[0]; s2 += s1;
84      s1 += input[1]; s2 += s1;
85      s1 += input[2]; s2 += s1;
86      s1 += input[3]; s2 += s1;
87      s1 += input[4]; s2 += s1;
88      s1 += input[5]; s2 += s1;
89      s1 += input[6]; s2 += s1;
90      s1 += input[7]; s2 += s1;
91    }
92
93  return s2 * 0x10000 + s1;
94}
95
96/* Information for a block of the delta source.  The length of the
97   block is the smaller of MATCH_BLOCKSIZE and the difference between
98   the size of the source data and the position of this block. */
99struct block
100{
101  apr_uint32_t adlersum;
102
103/* Even in 64 bit systems, store only 32 bit offsets in our hash table
104   (our delta window size much much smaller then 4GB).
105   That reduces the hash table size by 50% from 32to 16KB
106   and makes it easier to fit into the CPU's L1 cache. */
107  apr_uint32_t pos;			/* NO_POSITION -> block is not used */
108};
109
110/* A hash table, using open addressing, of the blocks of the source. */
111struct blocks
112{
113  /* The largest valid index of slots.
114     This value has an upper bound proportionate to the text delta
115     window size, so unless we dramatically increase the window size,
116     it's safe to make this a 32-bit value.  In any case, it has to be
117     hte same width as the block position index, (struct
118     block).pos. */
119  apr_uint32_t max;
120  /* Source buffer that the positions in SLOTS refer to. */
121  const char* data;
122  /* The vector of blocks.  A pos value of NO_POSITION represents an unused
123     slot. */
124  struct block *slots;
125};
126
127
128/* Return a hash value calculated from the adler32 SUM, suitable for use with
129   our hash table. */
130static apr_uint32_t hash_func(apr_uint32_t sum)
131{
132  /* Since the adl32 checksum have a bad distribution for the 11th to 16th
133     bits when used for our small block size, we add some bits from the
134     other half of the checksum. */
135  return sum ^ (sum >> 12);
136}
137
138/* Insert a block with the checksum ADLERSUM at position POS in the source
139   data into the table BLOCKS.  Ignore true duplicates, i.e. blocks with
140   actually the same content. */
141static void
142add_block(struct blocks *blocks, apr_uint32_t adlersum, apr_uint32_t pos)
143{
144  apr_uint32_t h = hash_func(adlersum) & blocks->max;
145
146  /* This will terminate, since we know that we will not fill the table. */
147  for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max)
148    if (blocks->slots[h].adlersum == adlersum)
149      if (memcmp(blocks->data + blocks->slots[h].pos, blocks->data + pos,
150                 MATCH_BLOCKSIZE) == 0)
151        return;
152
153  blocks->slots[h].adlersum = adlersum;
154  blocks->slots[h].pos = pos;
155}
156
157/* Find a block in BLOCKS with the checksum ADLERSUM and matching the content
158   at DATA, returning its position in the source data.  If there is no such
159   block, return NO_POSITION. */
160static apr_uint32_t
161find_block(const struct blocks *blocks,
162           apr_uint32_t adlersum,
163           const char* data)
164{
165  apr_uint32_t h = hash_func(adlersum) & blocks->max;
166
167  for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max)
168    if (blocks->slots[h].adlersum == adlersum)
169      if (memcmp(blocks->data + blocks->slots[h].pos, data,
170                 MATCH_BLOCKSIZE) == 0)
171        return blocks->slots[h].pos;
172
173  return NO_POSITION;
174}
175
176/* Initialize the matches table from DATA of size DATALEN.  This goes
177   through every block of MATCH_BLOCKSIZE bytes in the source and
178   checksums it, inserting the result into the BLOCKS table.  */
179static void
180init_blocks_table(const char *data,
181                  apr_size_t datalen,
182                  struct blocks *blocks,
183                  apr_pool_t *pool)
184{
185  apr_size_t nblocks;
186  apr_size_t wnslots = 1;
187  apr_uint32_t nslots;
188  apr_uint32_t i;
189
190  /* Be pessimistic about the block count. */
191  nblocks = datalen / MATCH_BLOCKSIZE + 1;
192  /* Find nearest larger power of two. */
193  while (wnslots <= nblocks)
194    wnslots *= 2;
195  /* Double the number of slots to avoid a too high load. */
196  wnslots *= 2;
197  /* Narrow the number of slots to 32 bits, which is the size of the
198     block position index in the hash table.
199     Sanity check: On 64-bit platforms, apr_size_t is likely to be
200     larger than apr_uint32_t. Make sure that the number of slots
201     actually fits into blocks->max.  It's safe to use a hard assert
202     here, because the largest possible value for nslots is
203     proportional to the text delta window size and is therefore much
204     smaller than the range of an apr_uint32_t.  If we ever happen to
205     increase the window size too much, this assertion will get
206     triggered by the test suite. */
207  nslots = (apr_uint32_t) wnslots;
208  SVN_ERR_ASSERT_NO_RETURN(wnslots == nslots);
209  blocks->max = nslots - 1;
210  blocks->data = data;
211  blocks->slots = apr_palloc(pool, nslots * sizeof(*(blocks->slots)));
212  for (i = 0; i < nslots; ++i)
213    {
214      /* Avoid using an indeterminate value in the lookup. */
215      blocks->slots[i].adlersum = 0;
216      blocks->slots[i].pos = NO_POSITION;
217    }
218
219  /* If there is an odd block at the end of the buffer, we will
220     not use that shorter block for deltification (only indirectly
221     as an extension of some previous block). */
222  for (i = 0; i + MATCH_BLOCKSIZE <= datalen; i += MATCH_BLOCKSIZE)
223    add_block(blocks, init_adler32(data + i), i);
224}
225
226/* Return the lowest position at which A and B differ. If no difference
227 * can be found in the first MAX_LEN characters, MAX_LEN will be returned.
228 */
229static apr_size_t
230match_length(const char *a, const char *b, apr_size_t max_len)
231{
232  apr_size_t pos = 0;
233
234#if SVN_UNALIGNED_ACCESS_IS_OK
235
236  /* Chunky processing is so much faster ...
237   *
238   * We can't make this work on architectures that require aligned access
239   * because A and B will probably have different alignment. So, skipping
240   * the first few chars until alignment is reached is not an option.
241   */
242  for (; pos + sizeof(apr_size_t) <= max_len; pos += sizeof(apr_size_t))
243    if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos))
244      break;
245
246#endif
247
248  for (; pos < max_len; ++pos)
249    if (a[pos] != b[pos])
250      break;
251
252  return pos;
253}
254
255/* Return the number of bytes before A and B that don't differ.  If no
256 * difference can be found in the first MAX_LEN characters,  MAX_LEN will
257 * be returned.  Please note that A-MAX_LEN and B-MAX_LEN must both be
258 * valid addresses.
259 */
260static apr_size_t
261reverse_match_length(const char *a, const char *b, apr_size_t max_len)
262{
263  apr_size_t pos = 0;
264
265#if SVN_UNALIGNED_ACCESS_IS_OK
266
267  /* Chunky processing is so much faster ...
268   *
269   * We can't make this work on architectures that require aligned access
270   * because A and B will probably have different alignment. So, skipping
271   * the first few chars until alignment is reached is not an option.
272   */
273  for (pos = sizeof(apr_size_t); pos <= max_len; pos += sizeof(apr_size_t))
274    if (*(const apr_size_t*)(a - pos) != *(const apr_size_t*)(b - pos))
275      break;
276
277  pos -= sizeof(apr_size_t);
278
279#endif
280
281  /* If we find a mismatch at -pos, pos-1 characters matched.
282   */
283  while (++pos <= max_len)
284    if (a[0-pos] != b[0-pos])
285      return pos - 1;
286
287  /* No mismatch found -> at least MAX_LEN matching chars.
288   */
289  return max_len;
290}
291
292
293/* Try to find a match for the target data B in BLOCKS, and then
294   extend the match as long as data in A and B at the match position
295   continues to match.  We set the position in A we ended up in (in
296   case we extended it backwards) in APOSP and update the corresponding
297   position within B given in BPOSP. PENDING_INSERT_START sets the
298   lower limit to BPOSP.
299   Return number of matching bytes starting at ASOP.  Return 0 if
300   no match has been found.
301 */
302static apr_size_t
303find_match(const struct blocks *blocks,
304           const apr_uint32_t rolling,
305           const char *a,
306           apr_size_t asize,
307           const char *b,
308           apr_size_t bsize,
309           apr_size_t *bposp,
310           apr_size_t *aposp,
311           apr_size_t pending_insert_start)
312{
313  apr_size_t apos, bpos = *bposp;
314  apr_size_t delta, max_delta;
315
316  apos = find_block(blocks, rolling, b + bpos);
317
318  /* See if we have a match.  */
319  if (apos == NO_POSITION)
320    return 0;
321
322  /* Extend the match forward as far as possible */
323  max_delta = asize - apos - MATCH_BLOCKSIZE < bsize - bpos - MATCH_BLOCKSIZE
324            ? asize - apos - MATCH_BLOCKSIZE
325            : bsize - bpos - MATCH_BLOCKSIZE;
326  delta = match_length(a + apos + MATCH_BLOCKSIZE,
327                       b + bpos + MATCH_BLOCKSIZE,
328                       max_delta);
329
330  /* See if we can extend backwards (max MATCH_BLOCKSIZE-1 steps because A's
331     content has been sampled only every MATCH_BLOCKSIZE positions).  */
332  while (apos > 0 && bpos > pending_insert_start && a[apos-1] == b[bpos-1])
333    {
334      --apos;
335      --bpos;
336      ++delta;
337    }
338
339  *aposp = apos;
340  *bposp = bpos;
341
342  return MATCH_BLOCKSIZE + delta;
343}
344
345/* Utility for compute_delta() that compares the range B[START,BSIZE) with
346 * the range of similar size before A[ASIZE]. Create corresponding copy and
347 * insert operations.
348 *
349 * BUILD_BATON and POOL will be passed through from compute_delta().
350 */
351static void
352store_delta_trailer(svn_txdelta__ops_baton_t *build_baton,
353                    const char *a,
354                    apr_size_t asize,
355                    const char *b,
356                    apr_size_t bsize,
357                    apr_size_t start,
358                    apr_pool_t *pool)
359{
360  apr_size_t end_match;
361  apr_size_t max_len = asize > (bsize - start) ? bsize - start : asize;
362  if (max_len == 0)
363    return;
364
365  end_match = reverse_match_length(a + asize, b + bsize, max_len);
366  if (end_match <= 4)
367    end_match = 0;
368
369  if (bsize - start > end_match)
370    svn_txdelta__insert_op(build_baton, svn_txdelta_new,
371                           start, bsize - start - end_match, b + start, pool);
372  if (end_match)
373    svn_txdelta__insert_op(build_baton, svn_txdelta_source,
374                           asize - end_match, end_match, NULL, pool);
375}
376
377
378/* Compute a delta from A to B using xdelta.
379
380   The basic xdelta algorithm is as follows:
381
382   1. Go through the source data, checksumming every MATCH_BLOCKSIZE
383      block of bytes using adler32, and inserting the checksum into a
384      match table with the position of the match.
385   2. Go through the target byte by byte, seeing if that byte starts a
386      match that we have in the match table.
387      2a. If so, try to extend the match as far as possible both
388          forwards and backwards, and then insert a source copy
389          operation into the delta ops builder for the match.
390      2b. If not, insert the byte as new data using an insert delta op.
391
392   Our implementation doesn't immediately insert "insert" operations,
393   it waits until we have another copy, or we are done.  The reasoning
394   is twofold:
395
396   1. Otherwise, we would just be building a ton of 1 byte insert
397      operations
398   2. So that we can extend a source match backwards into a pending
399     insert operation, and possibly remove the need for the insert
400     entirely.  This can happen due to stream alignment.
401*/
402static void
403compute_delta(svn_txdelta__ops_baton_t *build_baton,
404              const char *a,
405              apr_size_t asize,
406              const char *b,
407              apr_size_t bsize,
408              apr_pool_t *pool)
409{
410  struct blocks blocks;
411  apr_uint32_t rolling;
412  apr_size_t lo = 0, pending_insert_start = 0;
413
414  /* Optimization: directly compare window starts. If more than 4
415   * bytes match, we can immediately create a matching windows.
416   * Shorter sequences result in a net data increase. */
417  lo = match_length(a, b, asize > bsize ? bsize : asize);
418  if ((lo > 4) || (lo == bsize))
419    {
420      svn_txdelta__insert_op(build_baton, svn_txdelta_source,
421                             0, lo, NULL, pool);
422      pending_insert_start = lo;
423    }
424  else
425    lo = 0;
426
427  /* If the size of the target is smaller than the match blocksize, just
428     insert the entire target.  */
429  if ((bsize - lo < MATCH_BLOCKSIZE) || (asize < MATCH_BLOCKSIZE))
430    {
431      store_delta_trailer(build_baton, a, asize, b, bsize, lo, pool);
432      return;
433    }
434
435  /* Initialize the matches table.  */
436  init_blocks_table(a, asize, &blocks, pool);
437
438  /* Initialize our rolling checksum.  */
439  rolling = init_adler32(b + lo);
440  while (lo < bsize)
441    {
442      apr_size_t matchlen = 0;
443      apr_size_t apos;
444
445      if (lo + MATCH_BLOCKSIZE <= bsize)
446        matchlen = find_match(&blocks, rolling, a, asize, b, bsize,
447                              &lo, &apos, pending_insert_start);
448
449      /* If we didn't find a real match, insert the byte at the target
450         position into the pending insert.  */
451      if (matchlen == 0)
452        {
453          /* move block one position forward. Short blocks at the end of
454             the buffer cannot be used as the beginning of a new match */
455          if (lo + MATCH_BLOCKSIZE < bsize)
456            rolling = adler32_replace(rolling, b[lo], b[lo+MATCH_BLOCKSIZE]);
457
458          lo++;
459        }
460      else
461        {
462          /* store the sequence of B that is between the matches */
463          if (lo - pending_insert_start > 0)
464            svn_txdelta__insert_op(build_baton, svn_txdelta_new,
465                                   0, lo - pending_insert_start,
466                                   b + pending_insert_start, pool);
467          else
468            {
469              /* the match borders on the previous op. Maybe, we found a
470               * match that is better than / overlapping the previous one. */
471              apr_size_t len = reverse_match_length(a + apos, b + lo, apos < lo ? apos : lo);
472              if (len > 0)
473                {
474                  len = svn_txdelta__remove_copy(build_baton, len);
475                  apos -= len;
476                  matchlen += len;
477                  lo -= len;
478                }
479            }
480
481          /* Reset the pending insert start to immediately after the
482             match. */
483          lo += matchlen;
484          pending_insert_start = lo;
485          svn_txdelta__insert_op(build_baton, svn_txdelta_source,
486                                 apos, matchlen, NULL, pool);
487
488          /* Calculate the Adler32 sum for the first block behind the match.
489           * Ignore short buffers at the end of B.
490           */
491          if (lo + MATCH_BLOCKSIZE <= bsize)
492            rolling = init_adler32(b + lo);
493        }
494    }
495
496  /* If we still have an insert pending at the end, throw it in.  */
497  store_delta_trailer(build_baton, a, asize, b, bsize, pending_insert_start, pool);
498}
499
500void
501svn_txdelta__xdelta(svn_txdelta__ops_baton_t *build_baton,
502                    const char *data,
503                    apr_size_t source_len,
504                    apr_size_t target_len,
505                    apr_pool_t *pool)
506{
507  /*  We should never be asked to compute something when the source_len is 0;
508      we just use a single insert op there (and rely on zlib for
509      compression). */
510  assert(source_len != 0);
511  compute_delta(build_baton, data, source_len,
512                data + source_len, target_len,
513                pool);
514}
515