reps-strings.c revision 299742
1/* reps-strings.c : intepreting representations with respect to strings
2 *
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 */
22
23#include <assert.h>
24
25#include "svn_fs.h"
26#include "svn_pools.h"
27
28#include "fs.h"
29#include "err.h"
30#include "trail.h"
31#include "reps-strings.h"
32
33#include "bdb/reps-table.h"
34#include "bdb/strings-table.h"
35
36#include "../libsvn_fs/fs-loader.h"
37#define SVN_WANT_BDB
38#include "svn_private_config.h"
39
40
41/*** Helper Functions ***/
42
43
44/* Return non-zero iff REP is mutable under transaction TXN_ID. */
45static svn_boolean_t rep_is_mutable(representation_t *rep,
46                                    const char *txn_id)
47{
48  if ((! rep->txn_id) || (strcmp(rep->txn_id, txn_id) != 0))
49    return FALSE;
50  return TRUE;
51}
52
53/* Helper macro that evaluates to an error message indicating that
54   the representation referred to by X has an unknown node kind. */
55#define UNKNOWN_NODE_KIND(x)                                   \
56  svn_error_createf                                            \
57    (SVN_ERR_FS_CORRUPT, NULL,                                 \
58     _("Unknown node kind for representation '%s'"), x)
59
60/* Return a `fulltext' representation, allocated in POOL, which
61 * references the string STR_KEY.
62 *
63 * If TXN_ID is non-zero and non-NULL, make the representation mutable
64 * under that TXN_ID.
65 *
66 * If STR_KEY is non-null, copy it into an allocation from POOL.
67 *
68 * If MD5_CHECKSUM is non-null, use it as the MD5 checksum for the new
69 * rep; else initialize the rep with an all-zero (i.e., always
70 * successful) MD5 checksum.
71 *
72 * If SHA1_CHECKSUM is non-null, use it as the SHA1 checksum for the new
73 * rep; else initialize the rep with an all-zero (i.e., always
74 * successful) SHA1 checksum.
75 */
76static representation_t *
77make_fulltext_rep(const char *str_key,
78                  const char *txn_id,
79                  svn_checksum_t *md5_checksum,
80                  svn_checksum_t *sha1_checksum,
81                  apr_pool_t *pool)
82
83{
84  representation_t *rep = apr_pcalloc(pool, sizeof(*rep));
85  if (txn_id && *txn_id)
86    rep->txn_id = apr_pstrdup(pool, txn_id);
87  rep->kind = rep_kind_fulltext;
88  rep->md5_checksum = svn_checksum_dup(md5_checksum, pool);
89  rep->sha1_checksum = svn_checksum_dup(sha1_checksum, pool);
90  rep->contents.fulltext.string_key
91    = str_key ? apr_pstrdup(pool, str_key) : NULL;
92  return rep;
93}
94
95
96/* Set *KEYS to an array of string keys gleaned from `delta'
97   representation REP.  Allocate *KEYS in POOL. */
98static svn_error_t *
99delta_string_keys(apr_array_header_t **keys,
100                  const representation_t *rep,
101                  apr_pool_t *pool)
102{
103  const char *key;
104  int i;
105  apr_array_header_t *chunks;
106
107  if (rep->kind != rep_kind_delta)
108    return svn_error_create
109      (SVN_ERR_FS_GENERAL, NULL,
110       _("Representation is not of type 'delta'"));
111
112  /* Set up a convenience variable. */
113  chunks = rep->contents.delta.chunks;
114
115  /* Initialize *KEYS to an empty array. */
116  *keys = apr_array_make(pool, chunks->nelts, sizeof(key));
117  if (! chunks->nelts)
118    return SVN_NO_ERROR;
119
120  /* Now, push the string keys for each window into *KEYS */
121  for (i = 0; i < chunks->nelts; i++)
122    {
123      rep_delta_chunk_t *chunk = APR_ARRAY_IDX(chunks, i, rep_delta_chunk_t *);
124
125      key = apr_pstrdup(pool, chunk->string_key);
126      APR_ARRAY_PUSH(*keys, const char *) = key;
127    }
128
129  return SVN_NO_ERROR;
130}
131
132
133/* Delete the strings associated with array KEYS in FS as part of TRAIL.  */
134static svn_error_t *
135delete_strings(const apr_array_header_t *keys,
136               svn_fs_t *fs,
137               trail_t *trail,
138               apr_pool_t *pool)
139{
140  int i;
141  const char *str_key;
142  apr_pool_t *subpool = svn_pool_create(pool);
143
144  for (i = 0; i < keys->nelts; i++)
145    {
146      svn_pool_clear(subpool);
147      str_key = APR_ARRAY_IDX(keys, i, const char *);
148      SVN_ERR(svn_fs_bdb__string_delete(fs, str_key, trail, subpool));
149    }
150  svn_pool_destroy(subpool);
151  return SVN_NO_ERROR;
152}
153
154
155
156/*** Reading the contents from a representation. ***/
157
158struct compose_handler_baton
159{
160  /* The combined window, and the pool it's allocated from. */
161  svn_txdelta_window_t *window;
162  apr_pool_t *window_pool;
163
164  /* If the incoming window was self-compressed, and the combined WINDOW
165     exists from previous iterations, SOURCE_BUF will point to the
166     expanded self-compressed window. */
167  char *source_buf;
168
169  /* The trail for this operation. WINDOW_POOL will be a child of
170     TRAIL->pool. No allocations will be made from TRAIL->pool itself. */
171  trail_t *trail;
172
173  /* TRUE when no more windows have to be read/combined. */
174  svn_boolean_t done;
175
176  /* TRUE if we've just started reading a new window. We need this
177     because the svndiff handler will push a NULL window at the end of
178     the stream, and we have to ignore that; but we must also know
179     when it's appropriate to push a NULL window at the combiner. */
180  svn_boolean_t init;
181};
182
183
184/* Handle one window. If BATON is emtpy, copy the WINDOW into it;
185   otherwise, combine WINDOW with the one in BATON, unless WINDOW
186   is self-compressed (i.e., does not copy from the source view),
187   in which case expand. */
188
189static svn_error_t *
190compose_handler(svn_txdelta_window_t *window, void *baton)
191{
192  struct compose_handler_baton *cb = baton;
193  SVN_ERR_ASSERT(!cb->done || window == NULL);
194  SVN_ERR_ASSERT(cb->trail && cb->trail->pool);
195
196  if (!cb->init && !window)
197    return SVN_NO_ERROR;
198
199  /* We should never get here if we've already expanded a
200     self-compressed window. */
201  SVN_ERR_ASSERT(!cb->source_buf);
202
203  if (cb->window)
204    {
205      if (window && (window->sview_len == 0 || window->src_ops == 0))
206        {
207          /* This is a self-compressed window. Don't combine it with
208             the others, because the combiner may go quadratic. Instead,
209             expand it here and signal that the combination has
210             ended. */
211          apr_size_t source_len = window->tview_len;
212          SVN_ERR_ASSERT(cb->window->sview_len == source_len);
213          cb->source_buf = apr_palloc(cb->window_pool, source_len);
214          svn_txdelta_apply_instructions(window, NULL,
215                                         cb->source_buf, &source_len);
216          cb->done = TRUE;
217        }
218      else
219        {
220          /* Combine the incoming window with whatever's in the baton. */
221          apr_pool_t *composite_pool = svn_pool_create(cb->trail->pool);
222          svn_txdelta_window_t *composite;
223
224          composite = svn_txdelta_compose_windows(window, cb->window,
225                                                  composite_pool);
226          svn_pool_destroy(cb->window_pool);
227          cb->window = composite;
228          cb->window_pool = composite_pool;
229          cb->done = (composite->sview_len == 0 || composite->src_ops == 0);
230        }
231    }
232  else if (window)
233    {
234      /* Copy the (first) window into the baton. */
235      apr_pool_t *window_pool = svn_pool_create(cb->trail->pool);
236      SVN_ERR_ASSERT(cb->window_pool == NULL);
237      cb->window = svn_txdelta_window_dup(window, window_pool);
238      cb->window_pool = window_pool;
239      cb->done = (window->sview_len == 0 || window->src_ops == 0);
240    }
241  else
242    cb->done = TRUE;
243
244  cb->init = FALSE;
245  return SVN_NO_ERROR;
246}
247
248
249
250/* Read one delta window from REP[CUR_CHUNK] and push it at the
251   composition handler. */
252
253static svn_error_t *
254get_one_window(struct compose_handler_baton *cb,
255               svn_fs_t *fs,
256               representation_t *rep,
257               int cur_chunk)
258{
259  svn_stream_t *wstream;
260  char diffdata[4096];   /* hunk of svndiff data */
261  svn_filesize_t off;    /* offset into svndiff data */
262  apr_size_t amt;        /* how much svndiff data to/was read */
263  const char *str_key;
264
265  apr_array_header_t *chunks = rep->contents.delta.chunks;
266  rep_delta_chunk_t *this_chunk, *first_chunk;
267
268  cb->init = TRUE;
269  if (chunks->nelts <= cur_chunk)
270    return compose_handler(NULL, cb);
271
272  /* Set up a window handling stream for the svndiff data. */
273  wstream = svn_txdelta_parse_svndiff(compose_handler, cb, TRUE,
274                                      cb->trail->pool);
275
276  /* First things first:  send the "SVN"{version} header through the
277     stream.  ### For now, we will just use the version specified
278     in the first chunk, and then verify that no chunks have a
279     different version number than the one used.  In the future,
280     we might simply convert chunks that use a different version
281     of the diff format -- or, heck, a different format
282     altogether -- to the format/version of the first chunk.  */
283  first_chunk = APR_ARRAY_IDX(chunks, 0, rep_delta_chunk_t*);
284  diffdata[0] = 'S';
285  diffdata[1] = 'V';
286  diffdata[2] = 'N';
287  diffdata[3] = (char) (first_chunk->version);
288  amt = 4;
289  SVN_ERR(svn_stream_write(wstream, diffdata, &amt));
290  /* FIXME: The stream write handler is borked; assert (amt == 4); */
291
292  /* Get this string key which holds this window's data.
293     ### todo: make sure this is an `svndiff' DIFF skel here. */
294  this_chunk = APR_ARRAY_IDX(chunks, cur_chunk, rep_delta_chunk_t*);
295  str_key = this_chunk->string_key;
296
297  /* Run through the svndiff data, at least as far as necessary. */
298  off = 0;
299  do
300    {
301      amt = sizeof(diffdata);
302      SVN_ERR(svn_fs_bdb__string_read(fs, str_key, diffdata,
303                                      off, &amt, cb->trail,
304                                      cb->trail->pool));
305      off += amt;
306      SVN_ERR(svn_stream_write(wstream, diffdata, &amt));
307    }
308  while (amt != 0);
309  SVN_ERR(svn_stream_close(wstream));
310
311  SVN_ERR_ASSERT(!cb->init);
312  SVN_ERR_ASSERT(cb->window != NULL);
313  SVN_ERR_ASSERT(cb->window_pool != NULL);
314  return SVN_NO_ERROR;
315}
316
317
318/* Undeltify a range of data. DELTAS is the set of delta windows to
319   combine, FULLTEXT is the source text, CUR_CHUNK is the index of the
320   delta chunk we're starting from. OFFSET is the relative offset of
321   the requested data within the chunk; BUF and LEN are what we're
322   undeltifying to. */
323
324static svn_error_t *
325rep_undeltify_range(svn_fs_t *fs,
326                    const apr_array_header_t *deltas,
327                    representation_t *fulltext,
328                    int cur_chunk,
329                    char *buf,
330                    apr_size_t offset,
331                    apr_size_t *len,
332                    trail_t *trail,
333                    apr_pool_t *pool)
334{
335  apr_size_t len_read = 0;
336
337  do
338    {
339      struct compose_handler_baton cb = { 0 };
340      char *source_buf, *target_buf;
341      apr_size_t target_len;
342      int cur_rep;
343
344      cb.trail = trail;
345      cb.done = FALSE;
346      for (cur_rep = 0; !cb.done && cur_rep < deltas->nelts; ++cur_rep)
347        {
348          representation_t *const rep =
349            APR_ARRAY_IDX(deltas, cur_rep, representation_t*);
350          SVN_ERR(get_one_window(&cb, fs, rep, cur_chunk));
351        }
352
353      if (!cb.window)
354          /* That's it, no more source data is available. */
355          break;
356
357      /* The source view length should not be 0 if there are source
358         copy ops in the window. */
359      SVN_ERR_ASSERT(cb.window->sview_len > 0 || cb.window->src_ops == 0);
360
361      /* cb.window is the combined delta window. Read the source text
362         into a buffer. */
363      if (cb.source_buf)
364        {
365          /* The combiner already created the source text from a
366             self-compressed window. */
367          source_buf = cb.source_buf;
368        }
369      else if (fulltext && cb.window->sview_len > 0 && cb.window->src_ops > 0)
370        {
371          apr_size_t source_len = cb.window->sview_len;
372          source_buf = apr_palloc(cb.window_pool, source_len);
373          SVN_ERR(svn_fs_bdb__string_read
374                  (fs, fulltext->contents.fulltext.string_key,
375                   source_buf, cb.window->sview_offset, &source_len,
376                   trail, pool));
377          if (source_len != cb.window->sview_len)
378            return svn_error_create
379                (SVN_ERR_FS_CORRUPT, NULL,
380                 _("Svndiff source length inconsistency"));
381        }
382      else
383        {
384          source_buf = NULL;    /* Won't read anything from here. */
385        }
386
387      if (offset > 0)
388        {
389          target_len = *len - len_read + offset;
390          target_buf = apr_palloc(cb.window_pool, target_len);
391        }
392      else
393        {
394          target_len = *len - len_read;
395          target_buf = buf;
396        }
397
398      svn_txdelta_apply_instructions(cb.window, source_buf,
399                                     target_buf, &target_len);
400      if (offset > 0)
401        {
402          SVN_ERR_ASSERT(target_len > offset);
403          target_len -= offset;
404          memcpy(buf, target_buf + offset, target_len);
405          offset = 0; /* Read from the beginning of the next chunk. */
406        }
407      /* Don't need this window any more. */
408      svn_pool_destroy(cb.window_pool);
409
410      len_read += target_len;
411      buf += target_len;
412      ++cur_chunk;
413    }
414  while (len_read < *len);
415
416  *len = len_read;
417  return SVN_NO_ERROR;
418}
419
420
421
422/* Calculate the index of the chunk in REP that contains REP_OFFSET,
423   and find the relative CHUNK_OFFSET within the chunk.
424   Return -1 if offset is beyond the end of the represented data.
425   ### The basic assumption is that all delta windows are the same size
426   and aligned at the same offset, so this number is the same in all
427   dependent deltas.  Oh, and the chunks in REP must be ordered. */
428
429static int
430get_chunk_offset(representation_t *rep,
431                 svn_filesize_t rep_offset,
432                 apr_size_t *chunk_offset)
433{
434  const apr_array_header_t *chunks = rep->contents.delta.chunks;
435  int cur_chunk;
436  assert(chunks->nelts);
437
438  /* ### Yes, this is a linear search.  I'll change this to bisection
439     the very second we notice it's slowing us down. */
440  for (cur_chunk = 0; cur_chunk < chunks->nelts; ++cur_chunk)
441  {
442    const rep_delta_chunk_t *const this_chunk
443      = APR_ARRAY_IDX(chunks, cur_chunk, rep_delta_chunk_t*);
444
445    if ((this_chunk->offset + this_chunk->size) > rep_offset)
446      {
447        assert(this_chunk->offset <= rep_offset);
448        assert(rep_offset - this_chunk->offset < SVN_MAX_OBJECT_SIZE);
449        *chunk_offset = (apr_size_t) (rep_offset - this_chunk->offset);
450        return cur_chunk;
451      }
452  }
453
454  return -1;
455}
456
457/* Copy into BUF *LEN bytes starting at OFFSET from the string
458   represented via REP_KEY in FS, as part of TRAIL.
459   The number of bytes actually copied is stored in *LEN.  */
460static svn_error_t *
461rep_read_range(svn_fs_t *fs,
462               const char *rep_key,
463               svn_filesize_t offset,
464               char *buf,
465               apr_size_t *len,
466               trail_t *trail,
467               apr_pool_t *pool)
468{
469  representation_t *rep;
470  apr_size_t chunk_offset;
471
472  /* Read in our REP. */
473  SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
474  if (rep->kind == rep_kind_fulltext)
475    {
476      SVN_ERR(svn_fs_bdb__string_read(fs, rep->contents.fulltext.string_key,
477                                      buf, offset, len, trail, pool));
478    }
479  else if (rep->kind == rep_kind_delta)
480    {
481      const int cur_chunk = get_chunk_offset(rep, offset, &chunk_offset);
482      if (cur_chunk < 0)
483        *len = 0;
484      else
485        {
486          svn_error_t *err;
487          /* Preserve for potential use in error message. */
488          const char *first_rep_key = rep_key;
489          /* Make a list of all the rep's we need to undeltify this range.
490             We'll have to read them within this trail anyway, so we might
491             as well do it once and up front. */
492          apr_array_header_t *reps = apr_array_make(pool, 30, sizeof(rep));
493          do
494            {
495              const rep_delta_chunk_t *const first_chunk
496                = APR_ARRAY_IDX(rep->contents.delta.chunks,
497                                0, rep_delta_chunk_t*);
498              const rep_delta_chunk_t *const chunk
499                = APR_ARRAY_IDX(rep->contents.delta.chunks,
500                                cur_chunk, rep_delta_chunk_t*);
501
502              /* Verify that this chunk is of the same version as the first. */
503              if (first_chunk->version != chunk->version)
504                return svn_error_createf
505                  (SVN_ERR_FS_CORRUPT, NULL,
506                   _("Diff version inconsistencies in representation '%s'"),
507                   rep_key);
508
509              rep_key = chunk->rep_key;
510              APR_ARRAY_PUSH(reps, representation_t *) = rep;
511              SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key,
512                                           trail, pool));
513            }
514          while (rep->kind == rep_kind_delta
515                 && rep->contents.delta.chunks->nelts > cur_chunk);
516
517          /* Right. We've either just read the fulltext rep, or a rep that's
518             too short, in which case we'll undeltify without source data.*/
519          if (rep->kind != rep_kind_delta && rep->kind != rep_kind_fulltext)
520            return UNKNOWN_NODE_KIND(rep_key);
521
522          if (rep->kind == rep_kind_delta)
523            rep = NULL;         /* Don't use source data */
524
525          err = rep_undeltify_range(fs, reps, rep, cur_chunk, buf,
526                                    chunk_offset, len, trail, pool);
527          if (err)
528            {
529              if (err->apr_err == SVN_ERR_FS_CORRUPT)
530                return svn_error_createf
531                  (SVN_ERR_FS_CORRUPT, err,
532                   _("Corruption detected whilst reading delta chain from "
533                     "representation '%s' to '%s'"), first_rep_key, rep_key);
534              else
535                return svn_error_trace(err);
536            }
537        }
538    }
539  else /* unknown kind */
540    return UNKNOWN_NODE_KIND(rep_key);
541
542  return SVN_NO_ERROR;
543}
544
545
546svn_error_t *
547svn_fs_base__get_mutable_rep(const char **new_rep_key,
548                             const char *rep_key,
549                             svn_fs_t *fs,
550                             const char *txn_id,
551                             trail_t *trail,
552                             apr_pool_t *pool)
553{
554  representation_t *rep = NULL;
555  const char *new_str = NULL;
556
557  /* We were passed an existing REP_KEY, so examine it.  If it is
558     mutable already, then just return REP_KEY as the mutable result
559     key.  */
560  if (rep_key && (rep_key[0] != '\0'))
561    {
562      SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
563      if (rep_is_mutable(rep, txn_id))
564        {
565          *new_rep_key = rep_key;
566          return SVN_NO_ERROR;
567        }
568    }
569
570  /* Either we weren't provided a base key to examine, or the base key
571     we were provided was not mutable.  So, let's make a new
572     representation and return its key to the caller. */
573  SVN_ERR(svn_fs_bdb__string_append(fs, &new_str, 0, NULL, trail, pool));
574  rep = make_fulltext_rep(new_str, txn_id,
575                          svn_checksum_empty_checksum(svn_checksum_md5,
576                                                      pool),
577                          svn_checksum_empty_checksum(svn_checksum_sha1,
578                                                      pool),
579                          pool);
580  return svn_fs_bdb__write_new_rep(new_rep_key, fs, rep, trail, pool);
581}
582
583
584svn_error_t *
585svn_fs_base__delete_rep_if_mutable(svn_fs_t *fs,
586                                   const char *rep_key,
587                                   const char *txn_id,
588                                   trail_t *trail,
589                                   apr_pool_t *pool)
590{
591  representation_t *rep;
592
593  SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
594  if (! rep_is_mutable(rep, txn_id))
595    return SVN_NO_ERROR;
596
597  if (rep->kind == rep_kind_fulltext)
598    {
599      SVN_ERR(svn_fs_bdb__string_delete(fs,
600                                        rep->contents.fulltext.string_key,
601                                        trail, pool));
602    }
603  else if (rep->kind == rep_kind_delta)
604    {
605      apr_array_header_t *keys;
606      SVN_ERR(delta_string_keys(&keys, rep, pool));
607      SVN_ERR(delete_strings(keys, fs, trail, pool));
608    }
609  else /* unknown kind */
610    return UNKNOWN_NODE_KIND(rep_key);
611
612  return svn_fs_bdb__delete_rep(fs, rep_key, trail, pool);
613}
614
615
616
617/*** Reading and writing data via representations. ***/
618
619/** Reading. **/
620
621struct rep_read_baton
622{
623  /* The FS from which we're reading. */
624  svn_fs_t *fs;
625
626  /* The representation skel whose contents we want to read.  If this
627     is NULL, the rep has never had any contents, so all reads fetch 0
628     bytes.
629
630     Formerly, we cached the entire rep skel here, not just the key.
631     That way we didn't have to fetch the rep from the db every time
632     we want to read a little bit more of the file.  Unfortunately,
633     this has a problem: if, say, a file's representation changes
634     while we're reading (changes from fulltext to delta, for
635     example), we'll never know it.  So for correctness, we now
636     refetch the representation skel every time we want to read
637     another chunk.  */
638  const char *rep_key;
639
640  /* How many bytes have been read already. */
641  svn_filesize_t offset;
642
643  /* If present, the read will be done as part of this trail, and the
644     trail's pool will be used.  Otherwise, see `pool' below.  */
645  trail_t *trail;
646
647  /* MD5 checksum context.  Initialized when the baton is created, updated as
648     we read data, and finalized when the stream is closed. */
649  svn_checksum_ctx_t *md5_checksum_ctx;
650
651  /* Final resting place of the checksum created by md5_checksum_cxt. */
652  svn_checksum_t *md5_checksum;
653
654  /* SHA1 checksum context.  Initialized when the baton is created, updated as
655     we read data, and finalized when the stream is closed. */
656  svn_checksum_ctx_t *sha1_checksum_ctx;
657
658  /* Final resting place of the checksum created by sha1_checksum_cxt. */
659  svn_checksum_t *sha1_checksum;
660
661  /* The length of the rep's contents (as fulltext, that is,
662     independent of how the rep actually stores the data.)  This is
663     retrieved when the baton is created, and used to determine when
664     we have read the last byte, at which point we compare checksums.
665
666     Getting this at baton creation time makes interleaved reads and
667     writes on the same rep in the same trail impossible.  But we're
668     not doing that, and probably no one ever should.  And anyway if
669     they do, they should see problems immediately. */
670  svn_filesize_t size;
671
672  /* Set to FALSE when the baton is created, TRUE when the checksum_ctx
673     is digestified. */
674  svn_boolean_t checksum_finalized;
675
676  /* Used for temporary allocations.  This pool is cleared at the
677     start of each invocation of the relevant stream read function --
678     see rep_read_contents().  */
679  apr_pool_t *scratch_pool;
680
681};
682
683
684static svn_error_t *
685rep_read_get_baton(struct rep_read_baton **rb_p,
686                   svn_fs_t *fs,
687                   const char *rep_key,
688                   svn_boolean_t use_trail_for_reads,
689                   trail_t *trail,
690                   apr_pool_t *pool)
691{
692  struct rep_read_baton *b;
693
694  b = apr_pcalloc(pool, sizeof(*b));
695  b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool);
696  b->sha1_checksum_ctx = svn_checksum_ctx_create(svn_checksum_sha1, pool);
697
698  if (rep_key)
699    SVN_ERR(svn_fs_base__rep_contents_size(&(b->size), fs, rep_key,
700                                           trail, pool));
701  else
702    b->size = 0;
703
704  b->checksum_finalized = FALSE;
705  b->fs = fs;
706  b->trail = use_trail_for_reads ? trail : NULL;
707  b->scratch_pool = svn_pool_create(pool);
708  b->rep_key = rep_key;
709  b->offset = 0;
710
711  *rb_p = b;
712
713  return SVN_NO_ERROR;
714}
715
716
717
718/*** Retrieving data. ***/
719
720svn_error_t *
721svn_fs_base__rep_contents_size(svn_filesize_t *size_p,
722                               svn_fs_t *fs,
723                               const char *rep_key,
724                               trail_t *trail,
725                               apr_pool_t *pool)
726{
727  representation_t *rep;
728
729  SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
730
731  if (rep->kind == rep_kind_fulltext)
732    {
733      /* Get the size by asking Berkeley for the string's length. */
734      SVN_ERR(svn_fs_bdb__string_size(size_p, fs,
735                                      rep->contents.fulltext.string_key,
736                                      trail, pool));
737    }
738  else if (rep->kind == rep_kind_delta)
739    {
740      /* Get the size by finding the last window pkg in the delta and
741         adding its offset to its size.  This way, we won't even be
742         messed up by overlapping windows, as long as the window pkgs
743         are still ordered. */
744      apr_array_header_t *chunks = rep->contents.delta.chunks;
745      rep_delta_chunk_t *last_chunk;
746
747      SVN_ERR_ASSERT(chunks->nelts);
748
749      last_chunk = APR_ARRAY_IDX(chunks, chunks->nelts - 1,
750                                 rep_delta_chunk_t *);
751      *size_p = last_chunk->offset + last_chunk->size;
752    }
753  else /* unknown kind */
754    return UNKNOWN_NODE_KIND(rep_key);
755
756  return SVN_NO_ERROR;
757}
758
759
760svn_error_t *
761svn_fs_base__rep_contents_checksums(svn_checksum_t **md5_checksum,
762                                    svn_checksum_t **sha1_checksum,
763                                    svn_fs_t *fs,
764                                    const char *rep_key,
765                                    trail_t *trail,
766                                    apr_pool_t *pool)
767{
768  representation_t *rep;
769
770  SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
771  if (md5_checksum)
772    *md5_checksum = svn_checksum_dup(rep->md5_checksum, pool);
773  if (sha1_checksum)
774    *sha1_checksum = svn_checksum_dup(rep->sha1_checksum, pool);
775
776  return SVN_NO_ERROR;
777}
778
779
780svn_error_t *
781svn_fs_base__rep_contents(svn_string_t *str,
782                          svn_fs_t *fs,
783                          const char *rep_key,
784                          trail_t *trail,
785                          apr_pool_t *pool)
786{
787  svn_filesize_t contents_size;
788  apr_size_t len;
789  char *data;
790
791  SVN_ERR(svn_fs_base__rep_contents_size(&contents_size, fs, rep_key,
792                                         trail, pool));
793
794  /* What if the contents are larger than we can handle? */
795  if (contents_size > SVN_MAX_OBJECT_SIZE)
796    return svn_error_createf
797      (SVN_ERR_FS_GENERAL, NULL,
798       _("Rep contents are too large: "
799         "got %s, limit is %s"),
800       apr_psprintf(pool, "%" SVN_FILESIZE_T_FMT, contents_size),
801       apr_psprintf(pool, "%" APR_SIZE_T_FMT, SVN_MAX_OBJECT_SIZE));
802  else
803    str->len = (apr_size_t) contents_size;
804
805  data = apr_palloc(pool, str->len);
806  str->data = data;
807  len = str->len;
808  SVN_ERR(rep_read_range(fs, rep_key, 0, data, &len, trail, pool));
809
810  /* Paranoia. */
811  if (len != str->len)
812    return svn_error_createf
813      (SVN_ERR_FS_CORRUPT, NULL,
814       _("Failure reading representation '%s'"), rep_key);
815
816  /* Just the standard paranoia. */
817  {
818    representation_t *rep;
819    svn_checksum_t *checksum, *rep_checksum;
820
821    SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
822    rep_checksum = rep->sha1_checksum ? rep->sha1_checksum : rep->md5_checksum;
823    SVN_ERR(svn_checksum(&checksum, rep_checksum->kind, str->data, str->len,
824                         pool));
825
826    if (! svn_checksum_match(checksum, rep_checksum))
827      return svn_error_create(SVN_ERR_FS_CORRUPT,
828                svn_checksum_mismatch_err(rep_checksum, checksum, pool,
829                            _("Checksum mismatch on representation '%s'"),
830                            rep_key),
831                NULL);
832  }
833
834  return SVN_NO_ERROR;
835}
836
837
838struct read_rep_args
839{
840  struct rep_read_baton *rb;   /* The data source.             */
841  char *buf;                   /* Where to put what we read.   */
842  apr_size_t *len;             /* How much to read / was read. */
843};
844
845
846/* BATON is of type `read_rep_args':
847
848   Read into BATON->rb->buf the *(BATON->len) bytes starting at
849   BATON->rb->offset from the data represented at BATON->rb->rep_key
850   in BATON->rb->fs, as part of TRAIL.
851
852   Afterwards, *(BATON->len) is the number of bytes actually read, and
853   BATON->rb->offset is incremented by that amount.
854
855   If BATON->rb->rep_key is null, this is assumed to mean the file's
856   contents have no representation, i.e., the file has no contents.
857   In that case, if BATON->rb->offset > 0, return the error
858   SVN_ERR_FS_FILE_CONTENTS_CHANGED, else just set *(BATON->len) to
859   zero and return.  */
860static svn_error_t *
861txn_body_read_rep(void *baton, trail_t *trail)
862{
863  struct read_rep_args *args = baton;
864
865  if (args->rb->rep_key)
866    {
867      SVN_ERR(rep_read_range(args->rb->fs,
868                             args->rb->rep_key,
869                             args->rb->offset,
870                             args->buf,
871                             args->len,
872                             trail,
873                             args->rb->scratch_pool));
874
875      args->rb->offset += *(args->len);
876
877      /* We calculate the checksum just once, the moment we see the
878       * last byte of data.  But we can't assume there was a short
879       * read.  The caller may have known the length of the data and
880       * requested exactly that amount, so there would never be a
881       * short read.  (That's why the read baton has to know the
882       * length of the data in advance.)
883       *
884       * On the other hand, some callers invoke the stream reader in a
885       * loop whose termination condition is that the read returned
886       * zero bytes of data -- which usually results in the read
887       * function being called one more time *after* the call that got
888       * a short read (indicating end-of-stream).
889       *
890       * The conditions below ensure that we compare checksums even
891       * when there is no short read associated with the last byte of
892       * data, while also ensuring that it's harmless to repeatedly
893       * read 0 bytes from the stream.
894       */
895      if (! args->rb->checksum_finalized)
896        {
897          SVN_ERR(svn_checksum_update(args->rb->md5_checksum_ctx, args->buf,
898                                      *(args->len)));
899          SVN_ERR(svn_checksum_update(args->rb->sha1_checksum_ctx, args->buf,
900                                      *(args->len)));
901
902          if (args->rb->offset == args->rb->size)
903            {
904              representation_t *rep;
905
906              SVN_ERR(svn_checksum_final(&args->rb->md5_checksum,
907                                         args->rb->md5_checksum_ctx,
908                                         trail->pool));
909              SVN_ERR(svn_checksum_final(&args->rb->sha1_checksum,
910                                         args->rb->sha1_checksum_ctx,
911                                         trail->pool));
912              args->rb->checksum_finalized = TRUE;
913
914              SVN_ERR(svn_fs_bdb__read_rep(&rep, args->rb->fs,
915                                           args->rb->rep_key,
916                                           trail, trail->pool));
917
918              if (rep->md5_checksum
919                  && (! svn_checksum_match(rep->md5_checksum,
920                                           args->rb->md5_checksum)))
921                return svn_error_create(SVN_ERR_FS_CORRUPT,
922                        svn_checksum_mismatch_err(rep->md5_checksum,
923                             args->rb->md5_checksum, trail->pool,
924                             _("MD5 checksum mismatch on representation '%s'"),
925                             args->rb->rep_key),
926                        NULL);
927
928              if (rep->sha1_checksum
929                  && (! svn_checksum_match(rep->sha1_checksum,
930                                           args->rb->sha1_checksum)))
931                return svn_error_createf(SVN_ERR_FS_CORRUPT,
932                        svn_checksum_mismatch_err(rep->sha1_checksum,
933                            args->rb->sha1_checksum, trail->pool,
934                            _("SHA1 checksum mismatch on representation '%s'"),
935                            args->rb->rep_key),
936                        NULL);
937            }
938        }
939    }
940  else if (args->rb->offset > 0)
941    {
942      return
943        svn_error_create
944        (SVN_ERR_FS_REP_CHANGED, NULL,
945         _("Null rep, but offset past zero already"));
946    }
947  else
948    *(args->len) = 0;
949
950  return SVN_NO_ERROR;
951}
952
953
954static svn_error_t *
955rep_read_contents(void *baton, char *buf, apr_size_t *len)
956{
957  struct rep_read_baton *rb = baton;
958  struct read_rep_args args;
959
960  /* Clear the scratch pool of the results of previous invocations. */
961  svn_pool_clear(rb->scratch_pool);
962
963  args.rb = rb;
964  args.buf = buf;
965  args.len = len;
966
967  /* If we got a trail, use it; else make one. */
968  if (rb->trail)
969    SVN_ERR(txn_body_read_rep(&args, rb->trail));
970  else
971    {
972      /* In the case of reading from the db, any returned data should
973         live in our pre-allocated buffer, so the whole operation can
974         happen within a single malloc/free cycle.  This prevents us
975         from creating millions of unnecessary trail subpools when
976         reading a big file.  */
977      SVN_ERR(svn_fs_base__retry_txn(rb->fs,
978                                     txn_body_read_rep,
979                                     &args,
980                                     TRUE,
981                                     rb->scratch_pool));
982    }
983  return SVN_NO_ERROR;
984}
985
986
987/** Writing. **/
988
989
990struct rep_write_baton
991{
992  /* The FS in which we're writing. */
993  svn_fs_t *fs;
994
995  /* The representation skel whose contents we want to write. */
996  const char *rep_key;
997
998  /* The transaction id under which this write action will take
999     place. */
1000  const char *txn_id;
1001
1002  /* If present, do the write as part of this trail, and use trail's
1003     pool.  Otherwise, see `pool' below.  */
1004  trail_t *trail;
1005
1006  /* SHA1 and MD5 checksums.  Initialized when the baton is created,
1007     updated as we write data, and finalized and stored when the
1008     stream is closed. */
1009  svn_checksum_ctx_t *md5_checksum_ctx;
1010  svn_checksum_t *md5_checksum;
1011  svn_checksum_ctx_t *sha1_checksum_ctx;
1012  svn_checksum_t *sha1_checksum;
1013  svn_boolean_t finalized;
1014
1015  /* Used for temporary allocations, iff `trail' (above) is null.  */
1016  apr_pool_t *pool;
1017
1018};
1019
1020
1021static struct rep_write_baton *
1022rep_write_get_baton(svn_fs_t *fs,
1023                    const char *rep_key,
1024                    const char *txn_id,
1025                    trail_t *trail,
1026                    apr_pool_t *pool)
1027{
1028  struct rep_write_baton *b;
1029
1030  b = apr_pcalloc(pool, sizeof(*b));
1031  b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool);
1032  b->sha1_checksum_ctx = svn_checksum_ctx_create(svn_checksum_sha1, pool);
1033  b->fs = fs;
1034  b->trail = trail;
1035  b->pool = pool;
1036  b->rep_key = rep_key;
1037  b->txn_id = txn_id;
1038  return b;
1039}
1040
1041
1042
1043/* Write LEN bytes from BUF into the end of the string represented via
1044   REP_KEY in FS, as part of TRAIL.  If the representation is not
1045   mutable, return the error SVN_FS_REP_NOT_MUTABLE. */
1046static svn_error_t *
1047rep_write(svn_fs_t *fs,
1048          const char *rep_key,
1049          const char *buf,
1050          apr_size_t len,
1051          const char *txn_id,
1052          trail_t *trail,
1053          apr_pool_t *pool)
1054{
1055  representation_t *rep;
1056
1057  SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
1058
1059  if (! rep_is_mutable(rep, txn_id))
1060    return svn_error_createf
1061      (SVN_ERR_FS_REP_NOT_MUTABLE, NULL,
1062       _("Rep '%s' is not mutable"), rep_key);
1063
1064  if (rep->kind == rep_kind_fulltext)
1065    {
1066      SVN_ERR(svn_fs_bdb__string_append
1067              (fs, &(rep->contents.fulltext.string_key), len, buf,
1068               trail, pool));
1069    }
1070  else if (rep->kind == rep_kind_delta)
1071    {
1072      /* There should never be a case when we have a mutable
1073         non-fulltext rep.  The only code that creates mutable reps is
1074         in this file, and it creates them fulltext. */
1075      return svn_error_createf
1076        (SVN_ERR_FS_CORRUPT, NULL,
1077         _("Rep '%s' both mutable and non-fulltext"), rep_key);
1078    }
1079  else /* unknown kind */
1080    return UNKNOWN_NODE_KIND(rep_key);
1081
1082  return SVN_NO_ERROR;
1083}
1084
1085
1086struct write_rep_args
1087{
1088  struct rep_write_baton *wb;   /* Destination.       */
1089  const char *buf;              /* Data.              */
1090  apr_size_t len;               /* How much to write. */
1091};
1092
1093
1094/* BATON is of type `write_rep_args':
1095   Append onto BATON->wb->rep_key's contents BATON->len bytes of
1096   data from BATON->wb->buf, in BATON->rb->fs, as part of TRAIL.
1097
1098   If the representation is not mutable, return the error
1099   SVN_FS_REP_NOT_MUTABLE.  */
1100static svn_error_t *
1101txn_body_write_rep(void *baton, trail_t *trail)
1102{
1103  struct write_rep_args *args = baton;
1104
1105  SVN_ERR(rep_write(args->wb->fs,
1106                    args->wb->rep_key,
1107                    args->buf,
1108                    args->len,
1109                    args->wb->txn_id,
1110                    trail,
1111                    trail->pool));
1112  SVN_ERR(svn_checksum_update(args->wb->md5_checksum_ctx,
1113                              args->buf, args->len));
1114  SVN_ERR(svn_checksum_update(args->wb->sha1_checksum_ctx,
1115                              args->buf, args->len));
1116  return SVN_NO_ERROR;
1117}
1118
1119
1120static svn_error_t *
1121rep_write_contents(void *baton,
1122                   const char *buf,
1123                   apr_size_t *len)
1124{
1125  struct rep_write_baton *wb = baton;
1126  struct write_rep_args args;
1127
1128  /* We toss LEN's indirectness because if not all the bytes are
1129     written, it's an error, so we wouldn't be reporting anything back
1130     through *LEN anyway. */
1131  args.wb = wb;
1132  args.buf = buf;
1133  args.len = *len;
1134
1135  /* If we got a trail, use it; else make one. */
1136  if (wb->trail)
1137    SVN_ERR(txn_body_write_rep(&args, wb->trail));
1138  else
1139    {
1140      /* In the case of simply writing the rep to the db, we're
1141         *certain* that there's no data coming back to us that needs
1142         to be preserved... so the whole operation can happen within a
1143         single malloc/free cycle.  This prevents us from creating
1144         millions of unnecessary trail subpools when writing a big
1145         file. */
1146      SVN_ERR(svn_fs_base__retry_txn(wb->fs,
1147                                     txn_body_write_rep,
1148                                     &args,
1149                                     TRUE,
1150                                     wb->pool));
1151    }
1152
1153  return SVN_NO_ERROR;
1154}
1155
1156
1157/* Helper for rep_write_close_contents(); see that doc string for
1158   more.  BATON is of type `struct rep_write_baton'. */
1159static svn_error_t *
1160txn_body_write_close_rep(void *baton, trail_t *trail)
1161{
1162  struct rep_write_baton *wb = baton;
1163  representation_t *rep;
1164
1165  SVN_ERR(svn_fs_bdb__read_rep(&rep, wb->fs, wb->rep_key,
1166                               trail, trail->pool));
1167  rep->md5_checksum = svn_checksum_dup(wb->md5_checksum, trail->pool);
1168  rep->sha1_checksum = svn_checksum_dup(wb->sha1_checksum, trail->pool);
1169  return svn_fs_bdb__write_rep(wb->fs, wb->rep_key, rep,
1170                               trail, trail->pool);
1171}
1172
1173
1174/* BATON is of type `struct rep_write_baton'.
1175 *
1176 * Finalize BATON->md5_context and store the resulting digest under
1177 * BATON->rep_key.
1178 */
1179static svn_error_t *
1180rep_write_close_contents(void *baton)
1181{
1182  struct rep_write_baton *wb = baton;
1183
1184  /* ### Thought: if we fixed apr-util MD5 contexts to allow repeated
1185     digestification, then we wouldn't need a stream close function at
1186     all -- instead, we could update the stored checksum each time a
1187     write occurred, which would have the added advantage of making
1188     interleaving reads and writes work.  Currently, they'd fail with
1189     a checksum mismatch, it just happens that our code never tries to
1190     do that anyway. */
1191
1192  if (! wb->finalized)
1193    {
1194      SVN_ERR(svn_checksum_final(&wb->md5_checksum, wb->md5_checksum_ctx,
1195                                 wb->pool));
1196      SVN_ERR(svn_checksum_final(&wb->sha1_checksum, wb->sha1_checksum_ctx,
1197                                 wb->pool));
1198      wb->finalized = TRUE;
1199    }
1200
1201  /* If we got a trail, use it; else make one. */
1202  if (wb->trail)
1203    return txn_body_write_close_rep(wb, wb->trail);
1204  else
1205    /* We need to keep our trail pool around this time so the
1206       checksums we've calculated survive. */
1207    return svn_fs_base__retry_txn(wb->fs, txn_body_write_close_rep,
1208                                  wb, FALSE, wb->pool);
1209}
1210
1211
1212/** Public read and write stream constructors. **/
1213
1214svn_error_t *
1215svn_fs_base__rep_contents_read_stream(svn_stream_t **rs_p,
1216                                      svn_fs_t *fs,
1217                                      const char *rep_key,
1218                                      svn_boolean_t use_trail_for_reads,
1219                                      trail_t *trail,
1220                                      apr_pool_t *pool)
1221{
1222  struct rep_read_baton *rb;
1223
1224  SVN_ERR(rep_read_get_baton(&rb, fs, rep_key, use_trail_for_reads,
1225                             trail, pool));
1226  *rs_p = svn_stream_create(rb, pool);
1227  svn_stream_set_read2(*rs_p, NULL /* only full read support */,
1228                       rep_read_contents);
1229
1230  return SVN_NO_ERROR;
1231}
1232
1233
1234/* Clear the contents of REP_KEY, so that it represents the empty
1235   string, as part of TRAIL.  TXN_ID is the id of the Subversion
1236   transaction under which this occurs.  If REP_KEY is not mutable,
1237   return the error SVN_ERR_FS_REP_NOT_MUTABLE.  */
1238static svn_error_t *
1239rep_contents_clear(svn_fs_t *fs,
1240                   const char *rep_key,
1241                   const char *txn_id,
1242                   trail_t *trail,
1243                   apr_pool_t *pool)
1244{
1245  representation_t *rep;
1246  const char *str_key;
1247
1248  SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
1249
1250  /* Make sure it's mutable. */
1251  if (! rep_is_mutable(rep, txn_id))
1252    return svn_error_createf
1253      (SVN_ERR_FS_REP_NOT_MUTABLE, NULL,
1254       _("Rep '%s' is not mutable"), rep_key);
1255
1256  SVN_ERR_ASSERT(rep->kind == rep_kind_fulltext);
1257
1258  /* If rep has no string, just return success.  Else, clear the
1259     underlying string.  */
1260  str_key = rep->contents.fulltext.string_key;
1261  if (str_key && *str_key)
1262    {
1263      SVN_ERR(svn_fs_bdb__string_clear(fs, str_key, trail, pool));
1264      rep->md5_checksum = NULL;
1265      rep->sha1_checksum = NULL;
1266      SVN_ERR(svn_fs_bdb__write_rep(fs, rep_key, rep, trail, pool));
1267    }
1268  return SVN_NO_ERROR;
1269}
1270
1271
1272svn_error_t *
1273svn_fs_base__rep_contents_write_stream(svn_stream_t **ws_p,
1274                                       svn_fs_t *fs,
1275                                       const char *rep_key,
1276                                       const char *txn_id,
1277                                       svn_boolean_t use_trail_for_writes,
1278                                       trail_t *trail,
1279                                       apr_pool_t *pool)
1280{
1281  struct rep_write_baton *wb;
1282
1283  /* Clear the current rep contents (free mutability check!). */
1284  SVN_ERR(rep_contents_clear(fs, rep_key, txn_id, trail, pool));
1285
1286  /* Now, generate the write baton and stream. */
1287  wb = rep_write_get_baton(fs, rep_key, txn_id,
1288                           use_trail_for_writes ? trail : NULL, pool);
1289  *ws_p = svn_stream_create(wb, pool);
1290  svn_stream_set_write(*ws_p, rep_write_contents);
1291  svn_stream_set_close(*ws_p, rep_write_close_contents);
1292
1293  return SVN_NO_ERROR;
1294}
1295
1296
1297
1298/*** Deltified storage. ***/
1299
1300/* Baton for svn_write_fn_t write_string_set(). */
1301struct write_svndiff_strings_baton
1302{
1303  /* The fs where lives the string we're writing. */
1304  svn_fs_t *fs;
1305
1306  /* The key of the string we're writing to.  Typically this is
1307     initialized to NULL, so svn_fs_base__string_append() can fill in a
1308     value. */
1309  const char *key;
1310
1311  /* The amount of txdelta data written to the current
1312     string-in-progress. */
1313  apr_size_t size;
1314
1315  /* The amount of svndiff header information we've written thus far
1316     to the strings table. */
1317  apr_size_t header_read;
1318
1319  /* The version number of the svndiff data written.  ### You'd better
1320     not count on this being populated after the first chunk is sent
1321     through the interface, since it lives at the 4th byte of the
1322     stream. */
1323  apr_byte_t version;
1324
1325  /* The trail we're writing in. */
1326  trail_t *trail;
1327
1328};
1329
1330
1331/* Function of type `svn_write_fn_t', for writing to a collection of
1332   strings; BATON is `struct write_svndiff_strings_baton *'.
1333
1334   On the first call, BATON->key is null.  A new string key in
1335   BATON->fs is chosen and stored in BATON->key; each call appends
1336   *LEN bytes from DATA onto the string.  *LEN is never changed; if
1337   the write fails to write all *LEN bytes, an error is returned.
1338   BATON->size is used to track the total amount of data written via
1339   this handler, and must be reset by the caller to 0 when appropriate.  */
1340static svn_error_t *
1341write_svndiff_strings(void *baton, const char *data, apr_size_t *len)
1342{
1343  struct write_svndiff_strings_baton *wb = baton;
1344  const char *buf = data;
1345  apr_size_t nheader = 0;
1346
1347  /* If we haven't stripped all the header information from this
1348     stream yet, keep stripping.  If someone sends a first window
1349     through here that's shorter than 4 bytes long, this will probably
1350     cause a nuclear reactor meltdown somewhere in the American
1351     midwest.  */
1352  if (wb->header_read < 4)
1353    {
1354      nheader = 4 - wb->header_read;
1355      *len -= nheader;
1356      buf += nheader;
1357      wb->header_read += nheader;
1358
1359      /* If we have *now* read the full 4-byte header, check that
1360         least byte for the version number of the svndiff format. */
1361      if (wb->header_read == 4)
1362        wb->version = *(buf - 1);
1363    }
1364
1365  /* Append to the current string we're writing (or create a new one
1366     if WB->key is NULL). */
1367  SVN_ERR(svn_fs_bdb__string_append(wb->fs, &(wb->key), *len,
1368                                    buf, wb->trail, wb->trail->pool));
1369
1370  /* Make sure we (still) have a key. */
1371  if (wb->key == NULL)
1372    return svn_error_create(SVN_ERR_FS_GENERAL, NULL,
1373                            _("Failed to get new string key"));
1374
1375  /* Restore *LEN to the value it *would* have been were it not for
1376     header stripping. */
1377  *len += nheader;
1378
1379  /* Increment our running total of bytes written to this string. */
1380  wb->size += *len;
1381
1382  return SVN_NO_ERROR;
1383}
1384
1385
1386typedef struct window_write_t
1387{
1388  const char *key; /* string key for this window */
1389  apr_size_t svndiff_len; /* amount of svndiff data written to the string */
1390  svn_filesize_t text_off; /* offset of fulltext represented by this window */
1391  apr_size_t text_len; /* amount of fulltext data represented by this window */
1392
1393} window_write_t;
1394
1395
1396svn_error_t *
1397svn_fs_base__rep_deltify(svn_fs_t *fs,
1398                         const char *target,
1399                         const char *source,
1400                         trail_t *trail,
1401                         apr_pool_t *pool)
1402{
1403  base_fs_data_t *bfd = fs->fsap_data;
1404  svn_stream_t *source_stream; /* stream to read the source */
1405  svn_stream_t *target_stream; /* stream to read the target */
1406  svn_txdelta_stream_t *txdelta_stream; /* stream to read delta windows  */
1407
1408  /* window-y things, and an array to track them */
1409  window_write_t *ww;
1410  apr_array_header_t *windows;
1411
1412  /* stream to write new (deltified) target data and its baton */
1413  svn_stream_t *new_target_stream;
1414  struct write_svndiff_strings_baton new_target_baton;
1415
1416  /* window handler/baton for writing to above stream */
1417  svn_txdelta_window_handler_t new_target_handler;
1418  void *new_target_handler_baton;
1419
1420  /* yes, we do windows */
1421  svn_txdelta_window_t *window;
1422
1423  /* The current offset into the fulltext that our window is about to
1424     write.  This doubles, after all windows are written, as the
1425     total size of the svndiff data for the deltification process. */
1426  svn_filesize_t tview_off = 0;
1427
1428  /* The total amount of diff data written while deltifying. */
1429  svn_filesize_t diffsize = 0;
1430
1431  /* TARGET's original string keys */
1432  apr_array_header_t *orig_str_keys;
1433
1434  /* The checksums for the representation's fulltext contents. */
1435  svn_checksum_t *rep_md5_checksum;
1436  svn_checksum_t *rep_sha1_checksum;
1437
1438  /* MD5 digest */
1439  const unsigned char *digest;
1440
1441  /* pool for holding the windows */
1442  apr_pool_t *wpool;
1443
1444  /* Paranoia: never allow a rep to be deltified against itself,
1445     because then there would be no fulltext reachable in the delta
1446     chain, and badness would ensue.  */
1447  if (strcmp(target, source) == 0)
1448    return svn_error_createf
1449      (SVN_ERR_FS_CORRUPT, NULL,
1450       _("Attempt to deltify '%s' against itself"),
1451       target);
1452
1453  /* Set up a handler for the svndiff data, which will write each
1454     window to its own string in the `strings' table. */
1455  new_target_baton.fs = fs;
1456  new_target_baton.trail = trail;
1457  new_target_baton.header_read = FALSE;
1458  new_target_stream = svn_stream_create(&new_target_baton, pool);
1459  svn_stream_set_write(new_target_stream, write_svndiff_strings);
1460
1461  /* Get streams to our source and target text data. */
1462  SVN_ERR(svn_fs_base__rep_contents_read_stream(&source_stream, fs, source,
1463                                                TRUE, trail, pool));
1464  SVN_ERR(svn_fs_base__rep_contents_read_stream(&target_stream, fs, target,
1465                                                TRUE, trail, pool));
1466
1467  /* Setup a stream to convert the textdelta data into svndiff windows. */
1468  svn_txdelta2(&txdelta_stream, source_stream, target_stream, TRUE, pool);
1469
1470  if (bfd->format >= SVN_FS_BASE__MIN_SVNDIFF1_FORMAT)
1471    svn_txdelta_to_svndiff3(&new_target_handler, &new_target_handler_baton,
1472                            new_target_stream, 1,
1473                            SVN_DELTA_COMPRESSION_LEVEL_DEFAULT, pool);
1474  else
1475    svn_txdelta_to_svndiff3(&new_target_handler, &new_target_handler_baton,
1476                            new_target_stream, 0,
1477                            SVN_DELTA_COMPRESSION_LEVEL_DEFAULT, pool);
1478
1479  /* subpool for the windows */
1480  wpool = svn_pool_create(pool);
1481
1482  /* Now, loop, manufacturing and dispatching windows of svndiff data. */
1483  windows = apr_array_make(pool, 1, sizeof(ww));
1484  do
1485    {
1486      /* Reset some baton variables. */
1487      new_target_baton.size = 0;
1488      new_target_baton.key = NULL;
1489
1490      /* Free the window. */
1491      svn_pool_clear(wpool);
1492
1493      /* Fetch the next window of txdelta data. */
1494      SVN_ERR(svn_txdelta_next_window(&window, txdelta_stream, wpool));
1495
1496      /* Send off this package to be written as svndiff data. */
1497      SVN_ERR(new_target_handler(window, new_target_handler_baton));
1498      if (window)
1499        {
1500          /* Add a new window description to our array. */
1501          ww = apr_pcalloc(pool, sizeof(*ww));
1502          ww->key = new_target_baton.key;
1503          ww->svndiff_len = new_target_baton.size;
1504          ww->text_off = tview_off;
1505          ww->text_len = window->tview_len;
1506          APR_ARRAY_PUSH(windows, window_write_t *) = ww;
1507
1508          /* Update our recordkeeping variables. */
1509          tview_off += window->tview_len;
1510          diffsize += ww->svndiff_len;
1511        }
1512
1513    } while (window);
1514
1515  svn_pool_destroy(wpool);
1516
1517  /* Having processed all the windows, we can query the MD5 digest
1518     from the stream.  */
1519  digest = svn_txdelta_md5_digest(txdelta_stream);
1520  if (! digest)
1521    return svn_error_createf
1522      (SVN_ERR_DELTA_MD5_CHECKSUM_ABSENT, NULL,
1523       _("Failed to calculate MD5 digest for '%s'"),
1524       source);
1525
1526  /* Construct a list of the strings used by the old representation so
1527     that we can delete them later.  While we are here, if the old
1528     representation was a fulltext, check to make sure the delta we're
1529     replacing it with is actually smaller.  (Don't perform this check
1530     if we're replacing a delta; in that case, we're going for a time
1531     optimization, not a space optimization.)  */
1532  {
1533    representation_t *old_rep;
1534    const char *str_key;
1535
1536    SVN_ERR(svn_fs_bdb__read_rep(&old_rep, fs, target, trail, pool));
1537    if (old_rep->kind == rep_kind_fulltext)
1538      {
1539        svn_filesize_t old_size = 0;
1540
1541        str_key = old_rep->contents.fulltext.string_key;
1542        SVN_ERR(svn_fs_bdb__string_size(&old_size, fs, str_key,
1543                                        trail, pool));
1544        orig_str_keys = apr_array_make(pool, 1, sizeof(str_key));
1545        APR_ARRAY_PUSH(orig_str_keys, const char *) = str_key;
1546
1547        /* If the new data is NOT an space optimization, destroy the
1548           string(s) we created, and get outta here. */
1549        if (diffsize >= old_size)
1550          {
1551            int i;
1552            for (i = 0; i < windows->nelts; i++)
1553              {
1554                ww = APR_ARRAY_IDX(windows, i, window_write_t *);
1555                SVN_ERR(svn_fs_bdb__string_delete(fs, ww->key, trail, pool));
1556              }
1557            return SVN_NO_ERROR;
1558          }
1559      }
1560    else if (old_rep->kind == rep_kind_delta)
1561      SVN_ERR(delta_string_keys(&orig_str_keys, old_rep, pool));
1562    else /* unknown kind */
1563      return UNKNOWN_NODE_KIND(target);
1564
1565    /* Save the checksums, since the new rep needs them. */
1566    rep_md5_checksum = svn_checksum_dup(old_rep->md5_checksum, pool);
1567    rep_sha1_checksum = svn_checksum_dup(old_rep->sha1_checksum, pool);
1568  }
1569
1570  /* Hook the new strings we wrote into the rest of the filesystem by
1571     building a new representation to replace our old one. */
1572  {
1573    representation_t new_rep;
1574    rep_delta_chunk_t *chunk;
1575    apr_array_header_t *chunks;
1576    int i;
1577
1578    new_rep.kind = rep_kind_delta;
1579    new_rep.txn_id = NULL;
1580
1581    /* Migrate the old rep's checksums to the new rep. */
1582    new_rep.md5_checksum = svn_checksum_dup(rep_md5_checksum, pool);
1583    new_rep.sha1_checksum = svn_checksum_dup(rep_sha1_checksum, pool);
1584
1585    chunks = apr_array_make(pool, windows->nelts, sizeof(chunk));
1586
1587    /* Loop through the windows we wrote, creating and adding new
1588       chunks to the representation. */
1589    for (i = 0; i < windows->nelts; i++)
1590      {
1591        ww = APR_ARRAY_IDX(windows, i, window_write_t *);
1592
1593        /* Allocate a chunk and its window */
1594        chunk = apr_palloc(pool, sizeof(*chunk));
1595        chunk->offset = ww->text_off;
1596
1597        /* Populate the window */
1598        chunk->version = new_target_baton.version;
1599        chunk->string_key = ww->key;
1600        chunk->size = ww->text_len;
1601        chunk->rep_key = source;
1602
1603        /* Add this chunk to the array. */
1604        APR_ARRAY_PUSH(chunks, rep_delta_chunk_t *) = chunk;
1605      }
1606
1607    /* Put the chunks array into the representation. */
1608    new_rep.contents.delta.chunks = chunks;
1609
1610    /* Write out the new representation. */
1611    SVN_ERR(svn_fs_bdb__write_rep(fs, target, &new_rep, trail, pool));
1612
1613    /* Delete the original pre-deltified strings. */
1614    SVN_ERR(delete_strings(orig_str_keys, fs, trail, pool));
1615  }
1616
1617  return SVN_NO_ERROR;
1618}
1619