text_delta.c revision 251881
1251881Speter/*
2251881Speter * text-delta.c -- Internal text delta representation
3251881Speter *
4251881Speter * ====================================================================
5251881Speter *    Licensed to the Apache Software Foundation (ASF) under one
6251881Speter *    or more contributor license agreements.  See the NOTICE file
7251881Speter *    distributed with this work for additional information
8251881Speter *    regarding copyright ownership.  The ASF licenses this file
9251881Speter *    to you under the Apache License, Version 2.0 (the
10251881Speter *    "License"); you may not use this file except in compliance
11251881Speter *    with the License.  You may obtain a copy of the License at
12251881Speter *
13251881Speter *      http://www.apache.org/licenses/LICENSE-2.0
14251881Speter *
15251881Speter *    Unless required by applicable law or agreed to in writing,
16251881Speter *    software distributed under the License is distributed on an
17251881Speter *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18251881Speter *    KIND, either express or implied.  See the License for the
19251881Speter *    specific language governing permissions and limitations
20251881Speter *    under the License.
21251881Speter * ====================================================================
22251881Speter */
23251881Speter
24251881Speter
25251881Speter#include <assert.h>
26251881Speter#include <string.h>
27251881Speter
28251881Speter#include <apr_general.h>        /* for APR_INLINE */
29251881Speter#include <apr_md5.h>            /* for, um...MD5 stuff */
30251881Speter
31251881Speter#include "svn_delta.h"
32251881Speter#include "svn_io.h"
33251881Speter#include "svn_pools.h"
34251881Speter#include "svn_checksum.h"
35251881Speter
36251881Speter#include "delta.h"
37251881Speter
38251881Speter
39251881Speter/* Text delta stream descriptor. */
40251881Speter
41251881Speterstruct svn_txdelta_stream_t {
42251881Speter  /* Copied from parameters to svn_txdelta_stream_create. */
43251881Speter  void *baton;
44251881Speter  svn_txdelta_next_window_fn_t next_window;
45251881Speter  svn_txdelta_md5_digest_fn_t md5_digest;
46251881Speter};
47251881Speter
48251881Speter/* Delta stream baton. */
49251881Speterstruct txdelta_baton {
50251881Speter  /* These are copied from parameters passed to svn_txdelta. */
51251881Speter  svn_stream_t *source;
52251881Speter  svn_stream_t *target;
53251881Speter
54251881Speter  /* Private data */
55251881Speter  svn_boolean_t more_source;    /* FALSE if source stream hit EOF. */
56251881Speter  svn_boolean_t more;           /* TRUE if there are more data in the pool. */
57251881Speter  svn_filesize_t pos;           /* Offset of next read in source file. */
58251881Speter  char *buf;                    /* Buffer for input data. */
59251881Speter
60251881Speter  svn_checksum_ctx_t *context;  /* If not NULL, the context for computing
61251881Speter                                   the checksum. */
62251881Speter  svn_checksum_t *checksum;     /* If non-NULL, the checksum of TARGET. */
63251881Speter
64251881Speter  apr_pool_t *result_pool;      /* For results (e.g. checksum) */
65251881Speter};
66251881Speter
67251881Speter
68251881Speter/* Target-push stream descriptor. */
69251881Speter
70251881Speterstruct tpush_baton {
71251881Speter  /* These are copied from parameters passed to svn_txdelta_target_push. */
72251881Speter  svn_stream_t *source;
73251881Speter  svn_txdelta_window_handler_t wh;
74251881Speter  void *whb;
75251881Speter  apr_pool_t *pool;
76251881Speter
77251881Speter  /* Private data */
78251881Speter  char *buf;
79251881Speter  svn_filesize_t source_offset;
80251881Speter  apr_size_t source_len;
81251881Speter  svn_boolean_t source_done;
82251881Speter  apr_size_t target_len;
83251881Speter};
84251881Speter
85251881Speter
86251881Speter/* Text delta applicator.  */
87251881Speter
88251881Speterstruct apply_baton {
89251881Speter  /* These are copied from parameters passed to svn_txdelta_apply.  */
90251881Speter  svn_stream_t *source;
91251881Speter  svn_stream_t *target;
92251881Speter
93251881Speter  /* Private data.  Between calls, SBUF contains the data from the
94251881Speter   * last window's source view, as specified by SBUF_OFFSET and
95251881Speter   * SBUF_LEN.  The contents of TBUF are not interesting between
96251881Speter   * calls.  */
97251881Speter  apr_pool_t *pool;             /* Pool to allocate data from */
98251881Speter  char *sbuf;                   /* Source buffer */
99251881Speter  apr_size_t sbuf_size;         /* Allocated source buffer space */
100251881Speter  svn_filesize_t sbuf_offset;   /* Offset of SBUF data in source stream */
101251881Speter  apr_size_t sbuf_len;          /* Length of SBUF data */
102251881Speter  char *tbuf;                   /* Target buffer */
103251881Speter  apr_size_t tbuf_size;         /* Allocated target buffer space */
104251881Speter
105251881Speter  apr_md5_ctx_t md5_context;    /* Leads to result_digest below. */
106251881Speter  unsigned char *result_digest; /* MD5 digest of resultant fulltext;
107251881Speter                                   must point to at least APR_MD5_DIGESTSIZE
108251881Speter                                   bytes of storage. */
109251881Speter
110251881Speter  const char *error_info;       /* Optional extra info for error returns. */
111251881Speter};
112251881Speter
113251881Speter
114251881Speter
115251881Spetersvn_txdelta_window_t *
116251881Spetersvn_txdelta__make_window(const svn_txdelta__ops_baton_t *build_baton,
117251881Speter                         apr_pool_t *pool)
118251881Speter{
119251881Speter  svn_txdelta_window_t *window;
120251881Speter  svn_string_t *new_data = apr_palloc(pool, sizeof(*new_data));
121251881Speter
122251881Speter  window = apr_palloc(pool, sizeof(*window));
123251881Speter  window->sview_offset = 0;
124251881Speter  window->sview_len = 0;
125251881Speter  window->tview_len = 0;
126251881Speter
127251881Speter  window->num_ops = build_baton->num_ops;
128251881Speter  window->src_ops = build_baton->src_ops;
129251881Speter  window->ops = build_baton->ops;
130251881Speter
131251881Speter  /* just copy the fields over, rather than alloc/copying into a whole new
132251881Speter     svn_string_t structure. */
133251881Speter  /* ### would be much nicer if window->new_data were not a ptr... */
134251881Speter  new_data->data = build_baton->new_data->data;
135251881Speter  new_data->len = build_baton->new_data->len;
136251881Speter  window->new_data = new_data;
137251881Speter
138251881Speter  return window;
139251881Speter}
140251881Speter
141251881Speter
142251881Speter/* Compute and return a delta window using the xdelta algorithm on
143251881Speter   DATA, which contains SOURCE_LEN bytes of source data and TARGET_LEN
144251881Speter   bytes of target data.  SOURCE_OFFSET gives the offset of the source
145251881Speter   data, and is simply copied into the window's sview_offset field. */
146251881Speterstatic svn_txdelta_window_t *
147251881Spetercompute_window(const char *data, apr_size_t source_len, apr_size_t target_len,
148251881Speter               svn_filesize_t source_offset, apr_pool_t *pool)
149251881Speter{
150251881Speter  svn_txdelta__ops_baton_t build_baton = { 0 };
151251881Speter  svn_txdelta_window_t *window;
152251881Speter
153251881Speter  /* Compute the delta operations. */
154251881Speter  build_baton.new_data = svn_stringbuf_create_empty(pool);
155251881Speter
156251881Speter  if (source_len == 0)
157251881Speter    svn_txdelta__insert_op(&build_baton, svn_txdelta_new, 0, target_len, data,
158251881Speter                           pool);
159251881Speter  else
160251881Speter    svn_txdelta__xdelta(&build_baton, data, source_len, target_len, pool);
161251881Speter
162251881Speter  /* Create and return the delta window. */
163251881Speter  window = svn_txdelta__make_window(&build_baton, pool);
164251881Speter  window->sview_offset = source_offset;
165251881Speter  window->sview_len = source_len;
166251881Speter  window->tview_len = target_len;
167251881Speter  return window;
168251881Speter}
169251881Speter
170251881Speter
171251881Speter
172251881Spetersvn_txdelta_window_t *
173251881Spetersvn_txdelta_window_dup(const svn_txdelta_window_t *window,
174251881Speter                       apr_pool_t *pool)
175251881Speter{
176251881Speter  svn_txdelta__ops_baton_t build_baton = { 0 };
177251881Speter  svn_txdelta_window_t *new_window;
178251881Speter  const apr_size_t ops_size = (window->num_ops * sizeof(*build_baton.ops));
179251881Speter
180251881Speter  build_baton.num_ops = window->num_ops;
181251881Speter  build_baton.src_ops = window->src_ops;
182251881Speter  build_baton.ops_size = window->num_ops;
183251881Speter  build_baton.ops = apr_palloc(pool, ops_size);
184251881Speter  memcpy(build_baton.ops, window->ops, ops_size);
185251881Speter  build_baton.new_data =
186251881Speter    svn_stringbuf_create_from_string(window->new_data, pool);
187251881Speter
188251881Speter  new_window = svn_txdelta__make_window(&build_baton, pool);
189251881Speter  new_window->sview_offset = window->sview_offset;
190251881Speter  new_window->sview_len = window->sview_len;
191251881Speter  new_window->tview_len = window->tview_len;
192251881Speter  return new_window;
193251881Speter}
194251881Speter
195251881Speter/* This is a private interlibrary compatibility wrapper. */
196251881Spetersvn_txdelta_window_t *
197251881Spetersvn_txdelta__copy_window(const svn_txdelta_window_t *window,
198251881Speter                         apr_pool_t *pool);
199251881Spetersvn_txdelta_window_t *
200251881Spetersvn_txdelta__copy_window(const svn_txdelta_window_t *window,
201251881Speter                         apr_pool_t *pool)
202251881Speter{
203251881Speter  return svn_txdelta_window_dup(window, pool);
204251881Speter}
205251881Speter
206251881Speter
207251881Speter/* Insert a delta op into a delta window. */
208251881Speter
209251881Spetervoid
210251881Spetersvn_txdelta__insert_op(svn_txdelta__ops_baton_t *build_baton,
211251881Speter                       enum svn_delta_action opcode,
212251881Speter                       apr_size_t offset,
213251881Speter                       apr_size_t length,
214251881Speter                       const char *new_data,
215251881Speter                       apr_pool_t *pool)
216251881Speter{
217251881Speter  svn_txdelta_op_t *op;
218251881Speter
219251881Speter  /* Check if this op can be merged with the previous op. The delta
220251881Speter     combiner sometimes generates such ops, and this is the obvious
221251881Speter     place to make the check. */
222251881Speter  if (build_baton->num_ops > 0)
223251881Speter    {
224251881Speter      op = &build_baton->ops[build_baton->num_ops - 1];
225251881Speter      if (op->action_code == opcode
226251881Speter          && (opcode == svn_txdelta_new
227251881Speter              || op->offset + op->length == offset))
228251881Speter        {
229251881Speter          op->length += length;
230251881Speter          if (opcode == svn_txdelta_new)
231251881Speter            svn_stringbuf_appendbytes(build_baton->new_data,
232251881Speter                                      new_data, length);
233251881Speter          return;
234251881Speter        }
235251881Speter    }
236251881Speter
237251881Speter  /* Create space for the new op. */
238251881Speter  if (build_baton->num_ops == build_baton->ops_size)
239251881Speter    {
240251881Speter      svn_txdelta_op_t *const old_ops = build_baton->ops;
241251881Speter      int const new_ops_size = (build_baton->ops_size == 0
242251881Speter                                ? 16 : 2 * build_baton->ops_size);
243251881Speter      build_baton->ops =
244251881Speter        apr_palloc(pool, new_ops_size * sizeof(*build_baton->ops));
245251881Speter
246251881Speter      /* Copy any existing ops into the new array */
247251881Speter      if (old_ops)
248251881Speter        memcpy(build_baton->ops, old_ops,
249251881Speter               build_baton->ops_size * sizeof(*build_baton->ops));
250251881Speter      build_baton->ops_size = new_ops_size;
251251881Speter    }
252251881Speter
253251881Speter  /* Insert the op. svn_delta_source and svn_delta_target are
254251881Speter     just inserted. For svn_delta_new, the new data must be
255251881Speter     copied into the window. */
256251881Speter  op = &build_baton->ops[build_baton->num_ops];
257251881Speter  switch (opcode)
258251881Speter    {
259251881Speter    case svn_txdelta_source:
260251881Speter      ++build_baton->src_ops;
261251881Speter      /*** FALLTHRU ***/
262251881Speter    case svn_txdelta_target:
263251881Speter      op->action_code = opcode;
264251881Speter      op->offset = offset;
265251881Speter      op->length = length;
266251881Speter      break;
267251881Speter    case svn_txdelta_new:
268251881Speter      op->action_code = opcode;
269251881Speter      op->offset = build_baton->new_data->len;
270251881Speter      op->length = length;
271251881Speter      svn_stringbuf_appendbytes(build_baton->new_data, new_data, length);
272251881Speter      break;
273251881Speter    default:
274251881Speter      assert(!"unknown delta op.");
275251881Speter    }
276251881Speter
277251881Speter  ++build_baton->num_ops;
278251881Speter}
279251881Speter
280251881Speterapr_size_t
281251881Spetersvn_txdelta__remove_copy(svn_txdelta__ops_baton_t *build_baton,
282251881Speter                         apr_size_t max_len)
283251881Speter{
284251881Speter  svn_txdelta_op_t *op;
285251881Speter  apr_size_t len = 0;
286251881Speter
287251881Speter  /* remove ops back to front */
288251881Speter  while (build_baton->num_ops > 0)
289251881Speter    {
290251881Speter      op = &build_baton->ops[build_baton->num_ops-1];
291251881Speter
292251881Speter      /*  we can't modify svn_txdelta_target ops -> stop there */
293251881Speter      if (op->action_code == svn_txdelta_target)
294251881Speter        break;
295251881Speter
296251881Speter      /*  handle the case that we cannot remove the op entirely */
297251881Speter      if (op->length + len > max_len)
298251881Speter        {
299251881Speter          /* truncate only insertions. Copies don't benefit
300251881Speter             from being truncated. */
301251881Speter          if (op->action_code == svn_txdelta_new)
302251881Speter            {
303251881Speter               build_baton->new_data->len -= max_len - len;
304251881Speter               op->length -= max_len - len;
305251881Speter               len = max_len;
306251881Speter            }
307251881Speter
308251881Speter          break;
309251881Speter        }
310251881Speter
311251881Speter      /* drop the op entirely */
312251881Speter      if (op->action_code == svn_txdelta_new)
313251881Speter        build_baton->new_data->len -= op->length;
314251881Speter
315251881Speter      len += op->length;
316251881Speter      --build_baton->num_ops;
317251881Speter    }
318251881Speter
319251881Speter  return len;
320251881Speter}
321251881Speter
322251881Speter
323251881Speter
324251881Speter/* Generic delta stream functions. */
325251881Speter
326251881Spetersvn_txdelta_stream_t *
327251881Spetersvn_txdelta_stream_create(void *baton,
328251881Speter                          svn_txdelta_next_window_fn_t next_window,
329251881Speter                          svn_txdelta_md5_digest_fn_t md5_digest,
330251881Speter                          apr_pool_t *pool)
331251881Speter{
332251881Speter  svn_txdelta_stream_t *stream = apr_palloc(pool, sizeof(*stream));
333251881Speter
334251881Speter  stream->baton = baton;
335251881Speter  stream->next_window = next_window;
336251881Speter  stream->md5_digest = md5_digest;
337251881Speter
338251881Speter  return stream;
339251881Speter}
340251881Speter
341251881Spetersvn_error_t *
342251881Spetersvn_txdelta_next_window(svn_txdelta_window_t **window,
343251881Speter                        svn_txdelta_stream_t *stream,
344251881Speter                        apr_pool_t *pool)
345251881Speter{
346251881Speter  return stream->next_window(window, stream->baton, pool);
347251881Speter}
348251881Speter
349251881Speterconst unsigned char *
350251881Spetersvn_txdelta_md5_digest(svn_txdelta_stream_t *stream)
351251881Speter{
352251881Speter  return stream->md5_digest(stream->baton);
353251881Speter}
354251881Speter
355251881Speter
356251881Speter
357251881Speterstatic svn_error_t *
358251881Spetertxdelta_next_window(svn_txdelta_window_t **window,
359251881Speter                    void *baton,
360251881Speter                    apr_pool_t *pool)
361251881Speter{
362251881Speter  struct txdelta_baton *b = baton;
363251881Speter  apr_size_t source_len = SVN_DELTA_WINDOW_SIZE;
364251881Speter  apr_size_t target_len = SVN_DELTA_WINDOW_SIZE;
365251881Speter
366251881Speter  /* Read the source stream. */
367251881Speter  if (b->more_source)
368251881Speter    {
369251881Speter      SVN_ERR(svn_stream_read(b->source, b->buf, &source_len));
370251881Speter      b->more_source = (source_len == SVN_DELTA_WINDOW_SIZE);
371251881Speter    }
372251881Speter  else
373251881Speter    source_len = 0;
374251881Speter
375251881Speter  /* Read the target stream. */
376251881Speter  SVN_ERR(svn_stream_read(b->target, b->buf + source_len, &target_len));
377251881Speter  b->pos += source_len;
378251881Speter
379251881Speter  if (target_len == 0)
380251881Speter    {
381251881Speter      /* No target data?  We're done; return the final window. */
382251881Speter      if (b->context != NULL)
383251881Speter        SVN_ERR(svn_checksum_final(&b->checksum, b->context, b->result_pool));
384251881Speter
385251881Speter      *window = NULL;
386251881Speter      b->more = FALSE;
387251881Speter      return SVN_NO_ERROR;
388251881Speter    }
389251881Speter  else if (b->context != NULL)
390251881Speter    SVN_ERR(svn_checksum_update(b->context, b->buf + source_len, target_len));
391251881Speter
392251881Speter  *window = compute_window(b->buf, source_len, target_len,
393251881Speter                           b->pos - source_len, pool);
394251881Speter
395251881Speter  /* That's it. */
396251881Speter  return SVN_NO_ERROR;
397251881Speter}
398251881Speter
399251881Speter
400251881Speterstatic const unsigned char *
401251881Spetertxdelta_md5_digest(void *baton)
402251881Speter{
403251881Speter  struct txdelta_baton *b = baton;
404251881Speter  /* If there are more windows for this stream, the digest has not yet
405251881Speter     been calculated.  */
406251881Speter  if (b->more)
407251881Speter    return NULL;
408251881Speter
409251881Speter  /* If checksumming has not been activated, there will be no digest. */
410251881Speter  if (b->context == NULL)
411251881Speter    return NULL;
412251881Speter
413251881Speter  /* The checksum should be there. */
414251881Speter  return b->checksum->digest;
415251881Speter}
416251881Speter
417251881Speter
418251881Spetersvn_error_t *
419251881Spetersvn_txdelta_run(svn_stream_t *source,
420251881Speter                svn_stream_t *target,
421251881Speter                svn_txdelta_window_handler_t handler,
422251881Speter                void *handler_baton,
423251881Speter                svn_checksum_kind_t checksum_kind,
424251881Speter                svn_checksum_t **checksum,
425251881Speter                svn_cancel_func_t cancel_func,
426251881Speter                void *cancel_baton,
427251881Speter                apr_pool_t *result_pool,
428251881Speter                apr_pool_t *scratch_pool)
429251881Speter{
430251881Speter  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
431251881Speter  struct txdelta_baton tb = { 0 };
432251881Speter  svn_txdelta_window_t *window;
433251881Speter
434251881Speter  tb.source = source;
435251881Speter  tb.target = target;
436251881Speter  tb.more_source = TRUE;
437251881Speter  tb.more = TRUE;
438251881Speter  tb.pos = 0;
439251881Speter  tb.buf = apr_palloc(scratch_pool, 2 * SVN_DELTA_WINDOW_SIZE);
440251881Speter  tb.result_pool = result_pool;
441251881Speter
442251881Speter  if (checksum != NULL)
443251881Speter    tb.context = svn_checksum_ctx_create(checksum_kind, scratch_pool);
444251881Speter
445251881Speter  do
446251881Speter    {
447251881Speter      /* free the window (if any) */
448251881Speter      svn_pool_clear(iterpool);
449251881Speter
450251881Speter      /* read in a single delta window */
451251881Speter      SVN_ERR(txdelta_next_window(&window, &tb, iterpool));
452251881Speter
453251881Speter      /* shove it at the handler */
454251881Speter      SVN_ERR((*handler)(window, handler_baton));
455251881Speter
456251881Speter      if (cancel_func)
457251881Speter        SVN_ERR(cancel_func(cancel_baton));
458251881Speter    }
459251881Speter  while (window != NULL);
460251881Speter
461251881Speter  svn_pool_destroy(iterpool);
462251881Speter
463251881Speter  if (checksum != NULL)
464251881Speter    *checksum = tb.checksum;  /* should be there! */
465251881Speter
466251881Speter  return SVN_NO_ERROR;
467251881Speter}
468251881Speter
469251881Speter
470251881Spetervoid
471251881Spetersvn_txdelta2(svn_txdelta_stream_t **stream,
472251881Speter             svn_stream_t *source,
473251881Speter             svn_stream_t *target,
474251881Speter             svn_boolean_t calculate_checksum,
475251881Speter             apr_pool_t *pool)
476251881Speter{
477251881Speter  struct txdelta_baton *b = apr_pcalloc(pool, sizeof(*b));
478251881Speter
479251881Speter  b->source = source;
480251881Speter  b->target = target;
481251881Speter  b->more_source = TRUE;
482251881Speter  b->more = TRUE;
483251881Speter  b->buf = apr_palloc(pool, 2 * SVN_DELTA_WINDOW_SIZE);
484251881Speter  b->context = calculate_checksum
485251881Speter             ? svn_checksum_ctx_create(svn_checksum_md5, pool)
486251881Speter             : NULL;
487251881Speter  b->result_pool = pool;
488251881Speter
489251881Speter  *stream = svn_txdelta_stream_create(b, txdelta_next_window,
490251881Speter                                      txdelta_md5_digest, pool);
491251881Speter}
492251881Speter
493251881Spetervoid
494251881Spetersvn_txdelta(svn_txdelta_stream_t **stream,
495251881Speter            svn_stream_t *source,
496251881Speter            svn_stream_t *target,
497251881Speter            apr_pool_t *pool)
498251881Speter{
499251881Speter  svn_txdelta2(stream, source, target, TRUE, pool);
500251881Speter}
501251881Speter
502251881Speter
503251881Speter
504251881Speter/* Functions for implementing a "target push" delta. */
505251881Speter
506251881Speter/* This is the write handler for a target-push delta stream.  It reads
507251881Speter * source data, buffers target data, and fires off delta windows when
508251881Speter * the target data buffer is full. */
509251881Speterstatic svn_error_t *
510251881Spetertpush_write_handler(void *baton, const char *data, apr_size_t *len)
511251881Speter{
512251881Speter  struct tpush_baton *tb = baton;
513251881Speter  apr_size_t chunk_len, data_len = *len;
514251881Speter  apr_pool_t *pool = svn_pool_create(tb->pool);
515251881Speter  svn_txdelta_window_t *window;
516251881Speter
517251881Speter  while (data_len > 0)
518251881Speter    {
519251881Speter      svn_pool_clear(pool);
520251881Speter
521251881Speter      /* Make sure we're all full up on source data, if possible. */
522251881Speter      if (tb->source_len == 0 && !tb->source_done)
523251881Speter        {
524251881Speter          tb->source_len = SVN_DELTA_WINDOW_SIZE;
525251881Speter          SVN_ERR(svn_stream_read(tb->source, tb->buf, &tb->source_len));
526251881Speter          if (tb->source_len < SVN_DELTA_WINDOW_SIZE)
527251881Speter            tb->source_done = TRUE;
528251881Speter        }
529251881Speter
530251881Speter      /* Copy in the target data, up to SVN_DELTA_WINDOW_SIZE. */
531251881Speter      chunk_len = SVN_DELTA_WINDOW_SIZE - tb->target_len;
532251881Speter      if (chunk_len > data_len)
533251881Speter        chunk_len = data_len;
534251881Speter      memcpy(tb->buf + tb->source_len + tb->target_len, data, chunk_len);
535251881Speter      data += chunk_len;
536251881Speter      data_len -= chunk_len;
537251881Speter      tb->target_len += chunk_len;
538251881Speter
539251881Speter      /* If we're full of target data, compute and fire off a window. */
540251881Speter      if (tb->target_len == SVN_DELTA_WINDOW_SIZE)
541251881Speter        {
542251881Speter          window = compute_window(tb->buf, tb->source_len, tb->target_len,
543251881Speter                                  tb->source_offset, pool);
544251881Speter          SVN_ERR(tb->wh(window, tb->whb));
545251881Speter          tb->source_offset += tb->source_len;
546251881Speter          tb->source_len = 0;
547251881Speter          tb->target_len = 0;
548251881Speter        }
549251881Speter    }
550251881Speter
551251881Speter  svn_pool_destroy(pool);
552251881Speter  return SVN_NO_ERROR;
553251881Speter}
554251881Speter
555251881Speter
556251881Speter/* This is the close handler for a target-push delta stream.  It sends
557251881Speter * a final window if there is any buffered target data, and then sends
558251881Speter * a NULL window signifying the end of the window stream. */
559251881Speterstatic svn_error_t *
560251881Spetertpush_close_handler(void *baton)
561251881Speter{
562251881Speter  struct tpush_baton *tb = baton;
563251881Speter  svn_txdelta_window_t *window;
564251881Speter
565251881Speter  /* Send a final window if we have any residual target data. */
566251881Speter  if (tb->target_len > 0)
567251881Speter    {
568251881Speter      window = compute_window(tb->buf, tb->source_len, tb->target_len,
569251881Speter                              tb->source_offset, tb->pool);
570251881Speter      SVN_ERR(tb->wh(window, tb->whb));
571251881Speter    }
572251881Speter
573251881Speter  /* Send a final NULL window signifying the end. */
574251881Speter  return tb->wh(NULL, tb->whb);
575251881Speter}
576251881Speter
577251881Speter
578251881Spetersvn_stream_t *
579251881Spetersvn_txdelta_target_push(svn_txdelta_window_handler_t handler,
580251881Speter                        void *handler_baton, svn_stream_t *source,
581251881Speter                        apr_pool_t *pool)
582251881Speter{
583251881Speter  struct tpush_baton *tb;
584251881Speter  svn_stream_t *stream;
585251881Speter
586251881Speter  /* Initialize baton. */
587251881Speter  tb = apr_palloc(pool, sizeof(*tb));
588251881Speter  tb->source = source;
589251881Speter  tb->wh = handler;
590251881Speter  tb->whb = handler_baton;
591251881Speter  tb->pool = pool;
592251881Speter  tb->buf = apr_palloc(pool, 2 * SVN_DELTA_WINDOW_SIZE);
593251881Speter  tb->source_offset = 0;
594251881Speter  tb->source_len = 0;
595251881Speter  tb->source_done = FALSE;
596251881Speter  tb->target_len = 0;
597251881Speter
598251881Speter  /* Create and return writable stream. */
599251881Speter  stream = svn_stream_create(tb, pool);
600251881Speter  svn_stream_set_write(stream, tpush_write_handler);
601251881Speter  svn_stream_set_close(stream, tpush_close_handler);
602251881Speter  return stream;
603251881Speter}
604251881Speter
605251881Speter
606251881Speter
607251881Speter/* Functions for applying deltas.  */
608251881Speter
609251881Speter/* Ensure that BUF has enough space for VIEW_LEN bytes.  */
610251881Speterstatic APR_INLINE svn_error_t *
611251881Spetersize_buffer(char **buf, apr_size_t *buf_size,
612251881Speter            apr_size_t view_len, apr_pool_t *pool)
613251881Speter{
614251881Speter  if (view_len > *buf_size)
615251881Speter    {
616251881Speter      *buf_size *= 2;
617251881Speter      if (*buf_size < view_len)
618251881Speter        *buf_size = view_len;
619251881Speter      SVN_ERR_ASSERT(APR_ALIGN_DEFAULT(*buf_size) >= *buf_size);
620251881Speter      *buf = apr_palloc(pool, *buf_size);
621251881Speter    }
622251881Speter
623251881Speter  return SVN_NO_ERROR;
624251881Speter}
625251881Speter
626251881Speter/* Copy LEN bytes from SOURCE to TARGET, optimizing for the case where LEN
627251881Speter * is often very small.  Return a pointer to the first byte after the copied
628251881Speter * target range, unlike standard memcpy(), as a potential further
629251881Speter * optimization for the caller.
630251881Speter *
631251881Speter * memcpy() is hard to tune for a wide range of buffer lengths.  Therefore,
632251881Speter * it is often tuned for high throughput on large buffers and relatively
633251881Speter * low latency for mid-sized buffers (tens of bytes).  However, the overhead
634251881Speter * for very small buffers (<10 bytes) is still high.  Even passing the
635251881Speter * parameters, for instance, may take as long as copying 3 bytes.
636251881Speter *
637251881Speter * Because short copy sequences seem to be a common case, at least in
638251881Speter * "format 2" FSFS repositories, we copy them directly.  Larger buffer sizes
639251881Speter * aren't hurt measurably by the exta 'if' clause.  */
640251881Speterstatic APR_INLINE char *
641251881Speterfast_memcpy(char *target, const char *source, apr_size_t len)
642251881Speter{
643251881Speter  if (len > 7)
644251881Speter    {
645251881Speter      memcpy(target, source, len);
646251881Speter      target += len;
647251881Speter    }
648251881Speter  else
649251881Speter    {
650251881Speter      /* memcpy is not exactly fast for small block sizes.
651251881Speter       * Since they are common, let's run optimized code for them. */
652251881Speter      const char *end = source + len;
653251881Speter      for (; source != end; source++)
654251881Speter        *(target++) = *source;
655251881Speter    }
656251881Speter
657251881Speter  return target;
658251881Speter}
659251881Speter
660251881Speter/* Copy LEN bytes from SOURCE to TARGET.  Unlike memmove() or memcpy(),
661251881Speter * create repeating patterns if the source and target ranges overlap.
662251881Speter * Return a pointer to the first byte after the copied target range.  */
663251881Speterstatic APR_INLINE char *
664251881Speterpatterning_copy(char *target, const char *source, apr_size_t len)
665251881Speter{
666251881Speter  const char *end = source + len;
667251881Speter
668251881Speter  /* On many machines, we can do "chunky" copies. */
669251881Speter
670251881Speter#if SVN_UNALIGNED_ACCESS_IS_OK
671251881Speter
672251881Speter  if (end + sizeof(apr_uint32_t) <= target)
673251881Speter    {
674251881Speter      /* Source and target are at least 4 bytes apart, so we can copy in
675251881Speter       * 4-byte chunks.  */
676251881Speter      for (; source + sizeof(apr_uint32_t) <= end;
677251881Speter           source += sizeof(apr_uint32_t),
678251881Speter           target += sizeof(apr_uint32_t))
679251881Speter      *(apr_uint32_t *)(target) = *(apr_uint32_t *)(source);
680251881Speter    }
681251881Speter
682251881Speter#endif
683251881Speter
684251881Speter  /* fall through to byte-wise copy (either for the below-chunk-size tail
685251881Speter   * or the whole copy) */
686251881Speter  for (; source != end; source++)
687251881Speter    *(target++) = *source;
688251881Speter
689251881Speter  return target;
690251881Speter}
691251881Speter
692251881Spetervoid
693251881Spetersvn_txdelta_apply_instructions(svn_txdelta_window_t *window,
694251881Speter                               const char *sbuf, char *tbuf,
695251881Speter                               apr_size_t *tlen)
696251881Speter{
697251881Speter  const svn_txdelta_op_t *op;
698251881Speter  apr_size_t tpos = 0;
699251881Speter
700251881Speter  for (op = window->ops; op < window->ops + window->num_ops; op++)
701251881Speter    {
702251881Speter      const apr_size_t buf_len = (op->length < *tlen - tpos
703251881Speter                                  ? op->length : *tlen - tpos);
704251881Speter
705251881Speter      /* Check some invariants common to all instructions.  */
706251881Speter      assert(tpos + op->length <= window->tview_len);
707251881Speter
708251881Speter      switch (op->action_code)
709251881Speter        {
710251881Speter        case svn_txdelta_source:
711251881Speter          /* Copy from source area.  */
712251881Speter          assert(sbuf);
713251881Speter          assert(op->offset + op->length <= window->sview_len);
714251881Speter          fast_memcpy(tbuf + tpos, sbuf + op->offset, buf_len);
715251881Speter          break;
716251881Speter
717251881Speter        case svn_txdelta_target:
718251881Speter          /* Copy from target area.  We can't use memcpy() or the like
719251881Speter           * since we need a specific semantics for overlapping copies:
720251881Speter           * they must result in repeating patterns.
721251881Speter           * Note that most copies won't have overlapping source and
722251881Speter           * target ranges (they are just a result of self-compressed
723251881Speter           * data) but a small percentage will.  */
724251881Speter          assert(op->offset < tpos);
725251881Speter          patterning_copy(tbuf + tpos, tbuf + op->offset, buf_len);
726251881Speter          break;
727251881Speter
728251881Speter        case svn_txdelta_new:
729251881Speter          /* Copy from window new area.  */
730251881Speter          assert(op->offset + op->length <= window->new_data->len);
731251881Speter          fast_memcpy(tbuf + tpos,
732251881Speter                      window->new_data->data + op->offset,
733251881Speter                      buf_len);
734251881Speter          break;
735251881Speter
736251881Speter        default:
737251881Speter          assert(!"Invalid delta instruction code");
738251881Speter        }
739251881Speter
740251881Speter      tpos += op->length;
741251881Speter      if (tpos >= *tlen)
742251881Speter        return;                 /* The buffer is full. */
743251881Speter    }
744251881Speter
745251881Speter  /* Check that we produced the right amount of data.  */
746251881Speter  assert(tpos == window->tview_len);
747251881Speter  *tlen = tpos;
748251881Speter}
749251881Speter
750251881Speter/* This is a private interlibrary compatibility wrapper. */
751251881Spetervoid
752251881Spetersvn_txdelta__apply_instructions(svn_txdelta_window_t *window,
753251881Speter                                const char *sbuf, char *tbuf,
754251881Speter                                apr_size_t *tlen);
755251881Spetervoid
756251881Spetersvn_txdelta__apply_instructions(svn_txdelta_window_t *window,
757251881Speter                                const char *sbuf, char *tbuf,
758251881Speter                                apr_size_t *tlen)
759251881Speter{
760251881Speter  svn_txdelta_apply_instructions(window, sbuf, tbuf, tlen);
761251881Speter}
762251881Speter
763251881Speter
764251881Speter/* Apply WINDOW to the streams given by APPL.  */
765251881Speterstatic svn_error_t *
766251881Speterapply_window(svn_txdelta_window_t *window, void *baton)
767251881Speter{
768251881Speter  struct apply_baton *ab = (struct apply_baton *) baton;
769251881Speter  apr_size_t len;
770251881Speter  svn_error_t *err;
771251881Speter
772251881Speter  if (window == NULL)
773251881Speter    {
774251881Speter      /* We're done; just clean up.  */
775251881Speter      if (ab->result_digest)
776251881Speter        apr_md5_final(ab->result_digest, &(ab->md5_context));
777251881Speter
778251881Speter      err = svn_stream_close(ab->target);
779251881Speter      svn_pool_destroy(ab->pool);
780251881Speter
781251881Speter      return err;
782251881Speter    }
783251881Speter
784251881Speter  /* Make sure the source view didn't slide backwards.  */
785251881Speter  SVN_ERR_ASSERT(window->sview_len == 0
786251881Speter                 || (window->sview_offset >= ab->sbuf_offset
787251881Speter                     && (window->sview_offset + window->sview_len
788251881Speter                         >= ab->sbuf_offset + ab->sbuf_len)));
789251881Speter
790251881Speter  /* Make sure there's enough room in the target buffer.  */
791251881Speter  SVN_ERR(size_buffer(&ab->tbuf, &ab->tbuf_size, window->tview_len, ab->pool));
792251881Speter
793251881Speter  /* Prepare the source buffer for reading from the input stream.  */
794251881Speter  if (window->sview_offset != ab->sbuf_offset
795251881Speter      || window->sview_len > ab->sbuf_size)
796251881Speter    {
797251881Speter      char *old_sbuf = ab->sbuf;
798251881Speter
799251881Speter      /* Make sure there's enough room.  */
800251881Speter      SVN_ERR(size_buffer(&ab->sbuf, &ab->sbuf_size, window->sview_len,
801251881Speter              ab->pool));
802251881Speter
803251881Speter      /* If the existing view overlaps with the new view, copy the
804251881Speter       * overlap to the beginning of the new buffer.  */
805251881Speter      if (  (apr_size_t)ab->sbuf_offset + ab->sbuf_len
806251881Speter          > (apr_size_t)window->sview_offset)
807251881Speter        {
808251881Speter          apr_size_t start =
809251881Speter            (apr_size_t)(window->sview_offset - ab->sbuf_offset);
810251881Speter          memmove(ab->sbuf, old_sbuf + start, ab->sbuf_len - start);
811251881Speter          ab->sbuf_len -= start;
812251881Speter        }
813251881Speter      else
814251881Speter        ab->sbuf_len = 0;
815251881Speter      ab->sbuf_offset = window->sview_offset;
816251881Speter    }
817251881Speter
818251881Speter  /* Read the remainder of the source view into the buffer.  */
819251881Speter  if (ab->sbuf_len < window->sview_len)
820251881Speter    {
821251881Speter      len = window->sview_len - ab->sbuf_len;
822251881Speter      err = svn_stream_read(ab->source, ab->sbuf + ab->sbuf_len, &len);
823251881Speter      if (err == SVN_NO_ERROR && len != window->sview_len - ab->sbuf_len)
824251881Speter        err = svn_error_create(SVN_ERR_INCOMPLETE_DATA, NULL,
825251881Speter                               "Delta source ended unexpectedly");
826251881Speter      if (err != SVN_NO_ERROR)
827251881Speter        return err;
828251881Speter      ab->sbuf_len = window->sview_len;
829251881Speter    }
830251881Speter
831251881Speter  /* Apply the window instructions to the source view to generate
832251881Speter     the target view.  */
833251881Speter  len = window->tview_len;
834251881Speter  svn_txdelta_apply_instructions(window, ab->sbuf, ab->tbuf, &len);
835251881Speter  SVN_ERR_ASSERT(len == window->tview_len);
836251881Speter
837251881Speter  /* Write out the output. */
838251881Speter
839251881Speter  /* ### We've also considered just adding two (optionally null)
840251881Speter     arguments to svn_stream_create(): read_checksum and
841251881Speter     write_checksum.  Then instead of every caller updating an md5
842251881Speter     context when it calls svn_stream_write() or svn_stream_read(),
843251881Speter     streams would do it automatically, and verify the checksum in
844251881Speter     svn_stream_closed().  But this might be overkill for issue #689;
845251881Speter     so for now we just update the context here. */
846251881Speter  if (ab->result_digest)
847251881Speter    apr_md5_update(&(ab->md5_context), ab->tbuf, len);
848251881Speter
849251881Speter  return svn_stream_write(ab->target, ab->tbuf, &len);
850251881Speter}
851251881Speter
852251881Speter
853251881Spetervoid
854251881Spetersvn_txdelta_apply(svn_stream_t *source,
855251881Speter                  svn_stream_t *target,
856251881Speter                  unsigned char *result_digest,
857251881Speter                  const char *error_info,
858251881Speter                  apr_pool_t *pool,
859251881Speter                  svn_txdelta_window_handler_t *handler,
860251881Speter                  void **handler_baton)
861251881Speter{
862251881Speter  apr_pool_t *subpool = svn_pool_create(pool);
863251881Speter  struct apply_baton *ab;
864251881Speter
865251881Speter  ab = apr_palloc(subpool, sizeof(*ab));
866251881Speter  ab->source = source;
867251881Speter  ab->target = target;
868251881Speter  ab->pool = subpool;
869251881Speter  ab->sbuf = NULL;
870251881Speter  ab->sbuf_size = 0;
871251881Speter  ab->sbuf_offset = 0;
872251881Speter  ab->sbuf_len = 0;
873251881Speter  ab->tbuf = NULL;
874251881Speter  ab->tbuf_size = 0;
875251881Speter  ab->result_digest = result_digest;
876251881Speter
877251881Speter  if (result_digest)
878251881Speter    apr_md5_init(&(ab->md5_context));
879251881Speter
880251881Speter  if (error_info)
881251881Speter    ab->error_info = apr_pstrdup(subpool, error_info);
882251881Speter  else
883251881Speter    ab->error_info = NULL;
884251881Speter
885251881Speter  *handler = apply_window;
886251881Speter  *handler_baton = ab;
887251881Speter}
888251881Speter
889251881Speter
890251881Speter
891251881Speter/* Convenience routines */
892251881Speter
893251881Spetersvn_error_t *
894251881Spetersvn_txdelta_send_string(const svn_string_t *string,
895251881Speter                        svn_txdelta_window_handler_t handler,
896251881Speter                        void *handler_baton,
897251881Speter                        apr_pool_t *pool)
898251881Speter{
899251881Speter  svn_txdelta_window_t window = { 0 };
900251881Speter  svn_txdelta_op_t op;
901251881Speter
902251881Speter  /* Build a single `new' op */
903251881Speter  op.action_code = svn_txdelta_new;
904251881Speter  op.offset = 0;
905251881Speter  op.length = string->len;
906251881Speter
907251881Speter  /* Build a single window containing a ptr to the string. */
908251881Speter  window.tview_len = string->len;
909251881Speter  window.num_ops = 1;
910251881Speter  window.ops = &op;
911251881Speter  window.new_data = string;
912251881Speter
913251881Speter  /* Push the one window at the handler. */
914251881Speter  SVN_ERR((*handler)(&window, handler_baton));
915251881Speter
916251881Speter  /* Push a NULL at the handler, because we're done. */
917251881Speter  return (*handler)(NULL, handler_baton);
918251881Speter}
919251881Speter
920251881Spetersvn_error_t *svn_txdelta_send_stream(svn_stream_t *stream,
921251881Speter                                     svn_txdelta_window_handler_t handler,
922251881Speter                                     void *handler_baton,
923251881Speter                                     unsigned char *digest,
924251881Speter                                     apr_pool_t *pool)
925251881Speter{
926251881Speter  svn_txdelta_window_t delta_window = { 0 };
927251881Speter  svn_txdelta_op_t delta_op;
928251881Speter  svn_string_t window_data;
929251881Speter  char read_buf[SVN__STREAM_CHUNK_SIZE + 1];
930251881Speter  svn_checksum_ctx_t *md5_checksum_ctx;
931251881Speter
932251881Speter  if (digest)
933251881Speter    md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool);
934251881Speter
935251881Speter  while (1)
936251881Speter    {
937251881Speter      apr_size_t read_len = SVN__STREAM_CHUNK_SIZE;
938251881Speter
939251881Speter      SVN_ERR(svn_stream_read(stream, read_buf, &read_len));
940251881Speter      if (read_len == 0)
941251881Speter        break;
942251881Speter
943251881Speter      window_data.data = read_buf;
944251881Speter      window_data.len = read_len;
945251881Speter
946251881Speter      delta_op.action_code = svn_txdelta_new;
947251881Speter      delta_op.offset = 0;
948251881Speter      delta_op.length = read_len;
949251881Speter
950251881Speter      delta_window.tview_len = read_len;
951251881Speter      delta_window.num_ops = 1;
952251881Speter      delta_window.ops = &delta_op;
953251881Speter      delta_window.new_data = &window_data;
954251881Speter
955251881Speter      SVN_ERR(handler(&delta_window, handler_baton));
956251881Speter
957251881Speter      if (digest)
958251881Speter        SVN_ERR(svn_checksum_update(md5_checksum_ctx, read_buf, read_len));
959251881Speter
960251881Speter      if (read_len < SVN__STREAM_CHUNK_SIZE)
961251881Speter        break;
962251881Speter    }
963251881Speter  SVN_ERR(handler(NULL, handler_baton));
964251881Speter
965251881Speter  if (digest)
966251881Speter    {
967251881Speter      svn_checksum_t *md5_checksum;
968251881Speter
969251881Speter      SVN_ERR(svn_checksum_final(&md5_checksum, md5_checksum_ctx, pool));
970251881Speter      memcpy(digest, md5_checksum->digest, APR_MD5_DIGESTSIZE);
971251881Speter    }
972251881Speter
973251881Speter  return SVN_NO_ERROR;
974251881Speter}
975251881Speter
976251881Spetersvn_error_t *svn_txdelta_send_txstream(svn_txdelta_stream_t *txstream,
977251881Speter                                       svn_txdelta_window_handler_t handler,
978251881Speter                                       void *handler_baton,
979251881Speter                                       apr_pool_t *pool)
980251881Speter{
981251881Speter  svn_txdelta_window_t *window;
982251881Speter
983251881Speter  /* create a pool just for the windows */
984251881Speter  apr_pool_t *wpool = svn_pool_create(pool);
985251881Speter
986251881Speter  do
987251881Speter    {
988251881Speter      /* free the window (if any) */
989251881Speter      svn_pool_clear(wpool);
990251881Speter
991251881Speter      /* read in a single delta window */
992251881Speter      SVN_ERR(svn_txdelta_next_window(&window, txstream, wpool));
993251881Speter
994251881Speter      /* shove it at the handler */
995251881Speter      SVN_ERR((*handler)(window, handler_baton));
996251881Speter    }
997251881Speter  while (window != NULL);
998251881Speter
999251881Speter  svn_pool_destroy(wpool);
1000251881Speter
1001251881Speter  return SVN_NO_ERROR;
1002251881Speter}
1003251881Speter
1004251881Spetersvn_error_t *
1005251881Spetersvn_txdelta_send_contents(const unsigned char *contents,
1006251881Speter                          apr_size_t len,
1007251881Speter                          svn_txdelta_window_handler_t handler,
1008251881Speter                          void *handler_baton,
1009251881Speter                          apr_pool_t *pool)
1010251881Speter{
1011251881Speter  svn_string_t new_data;
1012251881Speter  svn_txdelta_op_t op = { svn_txdelta_new, 0, 0 };
1013251881Speter  svn_txdelta_window_t window = { 0, 0, 0, 1, 0 };
1014251881Speter  window.ops = &op;
1015251881Speter  window.new_data = &new_data;
1016251881Speter
1017251881Speter  /* send CONTENT as a series of max-sized windows */
1018251881Speter  while (len > 0)
1019251881Speter    {
1020251881Speter      /* stuff next chunk into the window */
1021251881Speter      window.tview_len = len < SVN_DELTA_WINDOW_SIZE
1022251881Speter                       ? len
1023251881Speter                       : SVN_DELTA_WINDOW_SIZE;
1024251881Speter      op.length = window.tview_len;
1025251881Speter      new_data.len = window.tview_len;
1026251881Speter      new_data.data = (const char*)contents;
1027251881Speter
1028251881Speter      /* update remaining */
1029251881Speter      contents += window.tview_len;
1030251881Speter      len -= window.tview_len;
1031251881Speter
1032251881Speter      /* shove it at the handler */
1033251881Speter      SVN_ERR((*handler)(&window, handler_baton));
1034251881Speter    }
1035251881Speter
1036251881Speter  /* indicate end of stream */
1037251881Speter  SVN_ERR((*handler)(NULL, handler_baton));
1038251881Speter
1039251881Speter  return SVN_NO_ERROR;
1040251881Speter}
1041251881Speter
1042