1/*
2 * xdelta.c:  xdelta generator.
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25#include <assert.h>
26
27#include <apr_general.h>        /* for APR_INLINE */
28#include <apr_hash.h>
29
30#include "svn_hash.h"
31#include "svn_delta.h"
32#include "private/svn_string_private.h"
33#include "delta.h"
34
35/* This is pseudo-adler32. It is adler32 without the prime modulus.
36   The idea is borrowed from monotone, and is a translation of the C++
37   code.  Graydon Hoare, the author of the original code, gave his
38   explicit permission to use it under these terms at 8:02pm on
39   Friday, February 11th, 2005.  */
40
41/* Size of the blocks we compute checksums for. This was chosen out of
42   thin air.  Monotone used 64, xdelta1 used 64, rsync uses 128.
43   However, later optimizations assume it to be 256 or less.
44 */
45#define MATCH_BLOCKSIZE 64
46
47/* Size of the checksum presence FLAGS array in BLOCKS_T.  With standard
48   MATCH_BLOCKSIZE and SVN_DELTA_WINDOW_SIZE, 32k entries is about 20x
49   the number of checksums that actually occur, i.e. we expect a >95%
50   probability that non-matching checksums get already detected by checking
51   against the FLAGS array.
52   Must be a power of 2.
53 */
54#define FLAGS_COUNT (32 * 1024)
55
56/* "no" / "invalid" / "unused" value for positions within the delta windows
57 */
58#define NO_POSITION ((apr_uint32_t)-1)
59
60/* Feed C_IN into the adler32 checksum and remove C_OUT at the same time.
61 * This function may (and will) only be called for characters that are
62 * MATCH_BLOCKSIZE positions apart.
63 *
64 * Please note that the lower 16 bits cannot overflow in neither direction.
65 * Therefore, we don't need to split the value into separate values for
66 * sum(char) and sum(sum(char)).
67 */
68static APR_INLINE apr_uint32_t
69adler32_replace(apr_uint32_t adler32, const char c_out, const char c_in)
70{
71  adler32 -= (MATCH_BLOCKSIZE * 0x10000u * ((unsigned char) c_out));
72
73  adler32 -= (unsigned char)c_out;
74  adler32 += (unsigned char)c_in;
75
76  return adler32 + adler32 * 0x10000;
77}
78
79/* Calculate an pseudo-adler32 checksum for MATCH_BLOCKSIZE bytes starting
80   at DATA.  Return the checksum value.  */
81
82static APR_INLINE apr_uint32_t
83init_adler32(const char *data)
84{
85  const unsigned char *input = (const unsigned char *)data;
86  const unsigned char *last = input + MATCH_BLOCKSIZE;
87
88  apr_uint32_t s1 = 0;
89  apr_uint32_t s2 = 0;
90
91  for (; input < last; input += 8)
92    {
93      s1 += input[0]; s2 += s1;
94      s1 += input[1]; s2 += s1;
95      s1 += input[2]; s2 += s1;
96      s1 += input[3]; s2 += s1;
97      s1 += input[4]; s2 += s1;
98      s1 += input[5]; s2 += s1;
99      s1 += input[6]; s2 += s1;
100      s1 += input[7]; s2 += s1;
101    }
102
103  return s2 * 0x10000 + s1;
104}
105
106/* Information for a block of the delta source.  The length of the
107   block is the smaller of MATCH_BLOCKSIZE and the difference between
108   the size of the source data and the position of this block. */
109struct block
110{
111  apr_uint32_t adlersum;
112
113/* Even in 64 bit systems, store only 32 bit offsets in our hash table
114   (our delta window size much much smaller then 4GB).
115   That reduces the hash table size by 50% from 32to 16KB
116   and makes it easier to fit into the CPU's L1 cache. */
117  apr_uint32_t pos;    /* NO_POSITION -> block is not used */
118};
119
120/* A hash table, using open addressing, of the blocks of the source. */
121struct blocks
122{
123  /* The largest valid index of slots.
124     This value has an upper bound proportionate to the text delta
125     window size, so unless we dramatically increase the window size,
126     it's safe to make this a 32-bit value.  In any case, it has to be
127     hte same width as the block position index, (struct
128     block).pos. */
129  apr_uint32_t max;
130
131  /* Source buffer that the positions in SLOTS refer to. */
132  const char* data;
133
134  /* Bit array indicating whether there may be a matching slot for a given
135     adler32 checksum.  Since FLAGS has much more entries than SLOTS, this
136     will indicate most cases of non-matching checksums with a "0" bit, i.e.
137     as "known not to have a match".
138     The mapping of adler32 checksum bits is [0..2][16..27] (LSB -> MSB),
139     i.e. address the byte by the multiplicative part of adler32 and address
140     the bits in that byte by the additive part of adler32. */
141  char flags[FLAGS_COUNT / 8];
142
143  /* The vector of blocks.  A pos value of NO_POSITION represents an unused
144     slot. */
145  struct block *slots;
146};
147
148
149/* Return a hash value calculated from the adler32 SUM, suitable for use with
150   our hash table. */
151static apr_uint32_t hash_func(apr_uint32_t sum)
152{
153  /* Since the adl32 checksum have a bad distribution for the 11th to 16th
154     bits when used for our small block size, we add some bits from the
155     other half of the checksum. */
156  return sum ^ (sum >> 12);
157}
158
159/* Return the offset in BLOCKS.FLAGS for the adler32 SUM. */
160static apr_uint32_t hash_flags(apr_uint32_t sum)
161{
162  /* The upper half of SUM has a wider value range than the lower 16 bit.
163     Also, we want to a different folding than HASH_FUNC to minimize
164     correlation between different hash levels. */
165  return (sum >> 16) & ((FLAGS_COUNT / 8) - 1);
166}
167
168/* Insert a block with the checksum ADLERSUM at position POS in the source
169   data into the table BLOCKS.  Ignore true duplicates, i.e. blocks with
170   actually the same content. */
171static void
172add_block(struct blocks *blocks, apr_uint32_t adlersum, apr_uint32_t pos)
173{
174  apr_uint32_t h = hash_func(adlersum) & blocks->max;
175
176  /* This will terminate, since we know that we will not fill the table. */
177  for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max)
178    if (blocks->slots[h].adlersum == adlersum)
179      if (memcmp(blocks->data + blocks->slots[h].pos, blocks->data + pos,
180                 MATCH_BLOCKSIZE) == 0)
181        return;
182
183  blocks->slots[h].adlersum = adlersum;
184  blocks->slots[h].pos = pos;
185  blocks->flags[hash_flags(adlersum)] |= 1 << (adlersum & 7);
186}
187
188/* Find a block in BLOCKS with the checksum ADLERSUM and matching the content
189   at DATA, returning its position in the source data.  If there is no such
190   block, return NO_POSITION. */
191static apr_uint32_t
192find_block(const struct blocks *blocks,
193           apr_uint32_t adlersum,
194           const char* data)
195{
196  apr_uint32_t h = hash_func(adlersum) & blocks->max;
197
198  for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max)
199    if (blocks->slots[h].adlersum == adlersum)
200      if (memcmp(blocks->data + blocks->slots[h].pos, data,
201                 MATCH_BLOCKSIZE) == 0)
202        return blocks->slots[h].pos;
203
204  return NO_POSITION;
205}
206
207/* Initialize the matches table from DATA of size DATALEN.  This goes
208   through every block of MATCH_BLOCKSIZE bytes in the source and
209   checksums it, inserting the result into the BLOCKS table.  */
210static void
211init_blocks_table(const char *data,
212                  apr_size_t datalen,
213                  struct blocks *blocks,
214                  apr_pool_t *pool)
215{
216  apr_size_t nblocks;
217  apr_size_t wnslots = 1;
218  apr_uint32_t nslots;
219  apr_uint32_t i;
220
221  /* Be pessimistic about the block count. */
222  nblocks = datalen / MATCH_BLOCKSIZE + 1;
223  /* Find nearest larger power of two. */
224  while (wnslots <= nblocks)
225    wnslots *= 2;
226  /* Double the number of slots to avoid a too high load. */
227  wnslots *= 2;
228  /* Narrow the number of slots to 32 bits, which is the size of the
229     block position index in the hash table.
230     Sanity check: On 64-bit platforms, apr_size_t is likely to be
231     larger than apr_uint32_t. Make sure that the number of slots
232     actually fits into blocks->max.  It's safe to use a hard assert
233     here, because the largest possible value for nslots is
234     proportional to the text delta window size and is therefore much
235     smaller than the range of an apr_uint32_t.  If we ever happen to
236     increase the window size too much, this assertion will get
237     triggered by the test suite. */
238  nslots = (apr_uint32_t) wnslots;
239  SVN_ERR_ASSERT_NO_RETURN(wnslots == nslots);
240  blocks->max = nslots - 1;
241  blocks->data = data;
242  blocks->slots = apr_palloc(pool, nslots * sizeof(*(blocks->slots)));
243  for (i = 0; i < nslots; ++i)
244    {
245      /* Avoid using an indeterminate value in the lookup. */
246      blocks->slots[i].adlersum = 0;
247      blocks->slots[i].pos = NO_POSITION;
248    }
249
250  /* No checksum entries in SLOTS, yet => reset all checksum flags. */
251  memset(blocks->flags, 0, sizeof(blocks->flags));
252
253  /* If there is an odd block at the end of the buffer, we will
254     not use that shorter block for deltification (only indirectly
255     as an extension of some previous block). */
256  for (i = 0; i + MATCH_BLOCKSIZE <= datalen; i += MATCH_BLOCKSIZE)
257    add_block(blocks, init_adler32(data + i), i);
258}
259
260/* Try to find a match for the target data B in BLOCKS, and then
261   extend the match as long as data in A and B at the match position
262   continues to match.  We set the position in A we ended up in (in
263   case we extended it backwards) in APOSP and update the corresponding
264   position within B given in BPOSP. PENDING_INSERT_START sets the
265   lower limit to BPOSP.
266   Return number of matching bytes starting at ASOP.  Return 0 if
267   no match has been found.
268 */
269static apr_size_t
270find_match(const struct blocks *blocks,
271           const apr_uint32_t rolling,
272           const char *a,
273           apr_size_t asize,
274           const char *b,
275           apr_size_t bsize,
276           apr_size_t *bposp,
277           apr_size_t *aposp,
278           apr_size_t pending_insert_start)
279{
280  apr_size_t apos, bpos = *bposp;
281  apr_size_t delta, max_delta;
282
283  apos = find_block(blocks, rolling, b + bpos);
284
285  /* See if we have a match.  */
286  if (apos == NO_POSITION)
287    return 0;
288
289  /* Extend the match forward as far as possible */
290  max_delta = asize - apos - MATCH_BLOCKSIZE < bsize - bpos - MATCH_BLOCKSIZE
291            ? asize - apos - MATCH_BLOCKSIZE
292            : bsize - bpos - MATCH_BLOCKSIZE;
293  delta = svn_cstring__match_length(a + apos + MATCH_BLOCKSIZE,
294                                    b + bpos + MATCH_BLOCKSIZE,
295                                    max_delta);
296
297  /* See if we can extend backwards (max MATCH_BLOCKSIZE-1 steps because A's
298     content has been sampled only every MATCH_BLOCKSIZE positions).  */
299  while (apos > 0 && bpos > pending_insert_start && a[apos-1] == b[bpos-1])
300    {
301      --apos;
302      --bpos;
303      ++delta;
304    }
305
306  *aposp = apos;
307  *bposp = bpos;
308
309  return MATCH_BLOCKSIZE + delta;
310}
311
312/* Utility for compute_delta() that compares the range B[START,BSIZE) with
313 * the range of similar size before A[ASIZE]. Create corresponding copy and
314 * insert operations.
315 *
316 * BUILD_BATON and POOL will be passed through from compute_delta().
317 */
318static void
319store_delta_trailer(svn_txdelta__ops_baton_t *build_baton,
320                    const char *a,
321                    apr_size_t asize,
322                    const char *b,
323                    apr_size_t bsize,
324                    apr_size_t start,
325                    apr_pool_t *pool)
326{
327  apr_size_t end_match;
328  apr_size_t max_len = asize > (bsize - start) ? bsize - start : asize;
329  if (max_len == 0)
330    return;
331
332  end_match = svn_cstring__reverse_match_length(a + asize, b + bsize,
333                                                max_len);
334  if (end_match <= 4)
335    end_match = 0;
336
337  if (bsize - start > end_match)
338    svn_txdelta__insert_op(build_baton, svn_txdelta_new,
339                           start, bsize - start - end_match, b + start, pool);
340  if (end_match)
341    svn_txdelta__insert_op(build_baton, svn_txdelta_source,
342                           asize - end_match, end_match, NULL, pool);
343}
344
345
346/* Compute a delta from A to B using xdelta.
347
348   The basic xdelta algorithm is as follows:
349
350   1. Go through the source data, checksumming every MATCH_BLOCKSIZE
351      block of bytes using adler32, and inserting the checksum into a
352      match table with the position of the match.
353   2. Go through the target byte by byte, seeing if that byte starts a
354      match that we have in the match table.
355      2a. If so, try to extend the match as far as possible both
356          forwards and backwards, and then insert a source copy
357          operation into the delta ops builder for the match.
358      2b. If not, insert the byte as new data using an insert delta op.
359
360   Our implementation doesn't immediately insert "insert" operations,
361   it waits until we have another copy, or we are done.  The reasoning
362   is twofold:
363
364   1. Otherwise, we would just be building a ton of 1 byte insert
365      operations
366   2. So that we can extend a source match backwards into a pending
367     insert operation, and possibly remove the need for the insert
368     entirely.  This can happen due to stream alignment.
369*/
370static void
371compute_delta(svn_txdelta__ops_baton_t *build_baton,
372              const char *a,
373              apr_size_t asize,
374              const char *b,
375              apr_size_t bsize,
376              apr_pool_t *pool)
377{
378  struct blocks blocks;
379  apr_uint32_t rolling;
380  apr_size_t lo = 0, pending_insert_start = 0, upper;
381
382  /* Optimization: directly compare window starts. If more than 4
383   * bytes match, we can immediately create a matching windows.
384   * Shorter sequences result in a net data increase. */
385  lo = svn_cstring__match_length(a, b, asize > bsize ? bsize : asize);
386  if ((lo > 4) || (lo == bsize))
387    {
388      svn_txdelta__insert_op(build_baton, svn_txdelta_source,
389                             0, lo, NULL, pool);
390      pending_insert_start = lo;
391    }
392  else
393    lo = 0;
394
395  /* If the size of the target is smaller than the match blocksize, just
396     insert the entire target.  */
397  if ((bsize - lo < MATCH_BLOCKSIZE) || (asize < MATCH_BLOCKSIZE))
398    {
399      store_delta_trailer(build_baton, a, asize, b, bsize, lo, pool);
400      return;
401    }
402
403  upper = bsize - MATCH_BLOCKSIZE; /* this is now known to be >= LO */
404
405  /* Initialize the matches table.  */
406  init_blocks_table(a, asize, &blocks, pool);
407
408  /* Initialize our rolling checksum.  */
409  rolling = init_adler32(b + lo);
410  while (lo < upper)
411    {
412      apr_size_t matchlen;
413      apr_size_t apos;
414
415      /* Quickly skip positions whose respective ROLLING checksums
416         definitely do not match any SLOT in BLOCKS. */
417      while (!(blocks.flags[hash_flags(rolling)] & (1 << (rolling & 7)))
418             && lo < upper)
419        {
420          rolling = adler32_replace(rolling, b[lo], b[lo+MATCH_BLOCKSIZE]);
421          lo++;
422        }
423
424      /* LO is still <= UPPER, i.e. the following lookup is legal:
425         Closely check whether we've got a match for the current location.
426         Due to the above pre-filter, chances are that we find one. */
427      matchlen = find_match(&blocks, rolling, a, asize, b, bsize,
428                            &lo, &apos, pending_insert_start);
429
430      /* If we didn't find a real match, insert the byte at the target
431         position into the pending insert.  */
432      if (matchlen == 0)
433        {
434          /* move block one position forward. Short blocks at the end of
435             the buffer cannot be used as the beginning of a new match */
436          if (lo + MATCH_BLOCKSIZE < bsize)
437            rolling = adler32_replace(rolling, b[lo], b[lo+MATCH_BLOCKSIZE]);
438
439          lo++;
440        }
441      else
442        {
443          /* store the sequence of B that is between the matches */
444          if (lo - pending_insert_start > 0)
445            svn_txdelta__insert_op(build_baton, svn_txdelta_new,
446                                   0, lo - pending_insert_start,
447                                   b + pending_insert_start, pool);
448          else
449            {
450              /* the match borders on the previous op. Maybe, we found a
451               * match that is better than / overlapping the previous one. */
452              apr_size_t len = svn_cstring__reverse_match_length
453                                 (a + apos, b + lo, apos < lo ? apos : lo);
454              if (len > 0)
455                {
456                  len = svn_txdelta__remove_copy(build_baton, len);
457                  apos -= len;
458                  matchlen += len;
459                  lo -= len;
460                }
461            }
462
463          /* Reset the pending insert start to immediately after the
464             match. */
465          lo += matchlen;
466          pending_insert_start = lo;
467          svn_txdelta__insert_op(build_baton, svn_txdelta_source,
468                                 apos, matchlen, NULL, pool);
469
470          /* Calculate the Adler32 sum for the first block behind the match.
471           * Ignore short buffers at the end of B.
472           */
473          if (lo + MATCH_BLOCKSIZE <= bsize)
474            rolling = init_adler32(b + lo);
475        }
476    }
477
478  /* If we still have an insert pending at the end, throw it in.  */
479  store_delta_trailer(build_baton, a, asize, b, bsize, pending_insert_start, pool);
480}
481
482void
483svn_txdelta__xdelta(svn_txdelta__ops_baton_t *build_baton,
484                    const char *data,
485                    apr_size_t source_len,
486                    apr_size_t target_len,
487                    apr_pool_t *pool)
488{
489  /*  We should never be asked to compute something when the source_len is 0;
490      we just use a single insert op there (and rely on zlib for
491      compression). */
492  assert(source_len != 0);
493  compute_delta(build_baton, data, source_len,
494                data + source_len, target_len,
495                pool);
496}
497