1251881Speter/* reps-strings.c : intepreting representations with respect to strings
2251881Speter *
3251881Speter * ====================================================================
4251881Speter *    Licensed to the Apache Software Foundation (ASF) under one
5251881Speter *    or more contributor license agreements.  See the NOTICE file
6251881Speter *    distributed with this work for additional information
7251881Speter *    regarding copyright ownership.  The ASF licenses this file
8251881Speter *    to you under the Apache License, Version 2.0 (the
9251881Speter *    "License"); you may not use this file except in compliance
10251881Speter *    with the License.  You may obtain a copy of the License at
11251881Speter *
12251881Speter *      http://www.apache.org/licenses/LICENSE-2.0
13251881Speter *
14251881Speter *    Unless required by applicable law or agreed to in writing,
15251881Speter *    software distributed under the License is distributed on an
16251881Speter *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17251881Speter *    KIND, either express or implied.  See the License for the
18251881Speter *    specific language governing permissions and limitations
19251881Speter *    under the License.
20251881Speter * ====================================================================
21251881Speter */
22251881Speter
23251881Speter#include <assert.h>
24251881Speter
25251881Speter#include "svn_fs.h"
26251881Speter#include "svn_pools.h"
27251881Speter
28251881Speter#include "fs.h"
29251881Speter#include "err.h"
30251881Speter#include "trail.h"
31251881Speter#include "reps-strings.h"
32251881Speter
33251881Speter#include "bdb/reps-table.h"
34251881Speter#include "bdb/strings-table.h"
35251881Speter
36251881Speter#include "../libsvn_fs/fs-loader.h"
37251881Speter#define SVN_WANT_BDB
38251881Speter#include "svn_private_config.h"
39251881Speter
40251881Speter
41251881Speter/*** Helper Functions ***/
42251881Speter
43251881Speter
44251881Speter/* Return non-zero iff REP is mutable under transaction TXN_ID. */
45251881Speterstatic svn_boolean_t rep_is_mutable(representation_t *rep,
46251881Speter                                    const char *txn_id)
47251881Speter{
48251881Speter  if ((! rep->txn_id) || (strcmp(rep->txn_id, txn_id) != 0))
49251881Speter    return FALSE;
50251881Speter  return TRUE;
51251881Speter}
52251881Speter
53251881Speter/* Helper macro that evaluates to an error message indicating that
54251881Speter   the representation referred to by X has an unknown node kind. */
55251881Speter#define UNKNOWN_NODE_KIND(x)                                   \
56251881Speter  svn_error_createf                                            \
57251881Speter    (SVN_ERR_FS_CORRUPT, NULL,                                 \
58251881Speter     _("Unknown node kind for representation '%s'"), x)
59251881Speter
60251881Speter/* Return a `fulltext' representation, allocated in POOL, which
61251881Speter * references the string STR_KEY.
62251881Speter *
63251881Speter * If TXN_ID is non-zero and non-NULL, make the representation mutable
64251881Speter * under that TXN_ID.
65251881Speter *
66251881Speter * If STR_KEY is non-null, copy it into an allocation from POOL.
67251881Speter *
68251881Speter * If MD5_CHECKSUM is non-null, use it as the MD5 checksum for the new
69251881Speter * rep; else initialize the rep with an all-zero (i.e., always
70251881Speter * successful) MD5 checksum.
71251881Speter *
72251881Speter * If SHA1_CHECKSUM is non-null, use it as the SHA1 checksum for the new
73251881Speter * rep; else initialize the rep with an all-zero (i.e., always
74251881Speter * successful) SHA1 checksum.
75251881Speter */
76251881Speterstatic representation_t *
77251881Spetermake_fulltext_rep(const char *str_key,
78251881Speter                  const char *txn_id,
79251881Speter                  svn_checksum_t *md5_checksum,
80251881Speter                  svn_checksum_t *sha1_checksum,
81251881Speter                  apr_pool_t *pool)
82251881Speter
83251881Speter{
84251881Speter  representation_t *rep = apr_pcalloc(pool, sizeof(*rep));
85251881Speter  if (txn_id && *txn_id)
86251881Speter    rep->txn_id = apr_pstrdup(pool, txn_id);
87251881Speter  rep->kind = rep_kind_fulltext;
88251881Speter  rep->md5_checksum = svn_checksum_dup(md5_checksum, pool);
89251881Speter  rep->sha1_checksum = svn_checksum_dup(sha1_checksum, pool);
90251881Speter  rep->contents.fulltext.string_key
91251881Speter    = str_key ? apr_pstrdup(pool, str_key) : NULL;
92251881Speter  return rep;
93251881Speter}
94251881Speter
95251881Speter
96251881Speter/* Set *KEYS to an array of string keys gleaned from `delta'
97251881Speter   representation REP.  Allocate *KEYS in POOL. */
98251881Speterstatic svn_error_t *
99251881Speterdelta_string_keys(apr_array_header_t **keys,
100251881Speter                  const representation_t *rep,
101251881Speter                  apr_pool_t *pool)
102251881Speter{
103251881Speter  const char *key;
104251881Speter  int i;
105251881Speter  apr_array_header_t *chunks;
106251881Speter
107251881Speter  if (rep->kind != rep_kind_delta)
108251881Speter    return svn_error_create
109251881Speter      (SVN_ERR_FS_GENERAL, NULL,
110251881Speter       _("Representation is not of type 'delta'"));
111251881Speter
112251881Speter  /* Set up a convenience variable. */
113251881Speter  chunks = rep->contents.delta.chunks;
114251881Speter
115251881Speter  /* Initialize *KEYS to an empty array. */
116251881Speter  *keys = apr_array_make(pool, chunks->nelts, sizeof(key));
117251881Speter  if (! chunks->nelts)
118251881Speter    return SVN_NO_ERROR;
119251881Speter
120251881Speter  /* Now, push the string keys for each window into *KEYS */
121251881Speter  for (i = 0; i < chunks->nelts; i++)
122251881Speter    {
123251881Speter      rep_delta_chunk_t *chunk = APR_ARRAY_IDX(chunks, i, rep_delta_chunk_t *);
124251881Speter
125251881Speter      key = apr_pstrdup(pool, chunk->string_key);
126251881Speter      APR_ARRAY_PUSH(*keys, const char *) = key;
127251881Speter    }
128251881Speter
129251881Speter  return SVN_NO_ERROR;
130251881Speter}
131251881Speter
132251881Speter
133251881Speter/* Delete the strings associated with array KEYS in FS as part of TRAIL.  */
134251881Speterstatic svn_error_t *
135251881Speterdelete_strings(const apr_array_header_t *keys,
136251881Speter               svn_fs_t *fs,
137251881Speter               trail_t *trail,
138251881Speter               apr_pool_t *pool)
139251881Speter{
140251881Speter  int i;
141251881Speter  const char *str_key;
142251881Speter  apr_pool_t *subpool = svn_pool_create(pool);
143251881Speter
144251881Speter  for (i = 0; i < keys->nelts; i++)
145251881Speter    {
146251881Speter      svn_pool_clear(subpool);
147251881Speter      str_key = APR_ARRAY_IDX(keys, i, const char *);
148251881Speter      SVN_ERR(svn_fs_bdb__string_delete(fs, str_key, trail, subpool));
149251881Speter    }
150251881Speter  svn_pool_destroy(subpool);
151251881Speter  return SVN_NO_ERROR;
152251881Speter}
153251881Speter
154251881Speter
155251881Speter
156251881Speter/*** Reading the contents from a representation. ***/
157251881Speter
158251881Speterstruct compose_handler_baton
159251881Speter{
160251881Speter  /* The combined window, and the pool it's allocated from. */
161251881Speter  svn_txdelta_window_t *window;
162251881Speter  apr_pool_t *window_pool;
163251881Speter
164251881Speter  /* If the incoming window was self-compressed, and the combined WINDOW
165251881Speter     exists from previous iterations, SOURCE_BUF will point to the
166251881Speter     expanded self-compressed window. */
167251881Speter  char *source_buf;
168251881Speter
169251881Speter  /* The trail for this operation. WINDOW_POOL will be a child of
170251881Speter     TRAIL->pool. No allocations will be made from TRAIL->pool itself. */
171251881Speter  trail_t *trail;
172251881Speter
173251881Speter  /* TRUE when no more windows have to be read/combined. */
174251881Speter  svn_boolean_t done;
175251881Speter
176251881Speter  /* TRUE if we've just started reading a new window. We need this
177251881Speter     because the svndiff handler will push a NULL window at the end of
178251881Speter     the stream, and we have to ignore that; but we must also know
179251881Speter     when it's appropriate to push a NULL window at the combiner. */
180251881Speter  svn_boolean_t init;
181251881Speter};
182251881Speter
183251881Speter
184251881Speter/* Handle one window. If BATON is emtpy, copy the WINDOW into it;
185251881Speter   otherwise, combine WINDOW with the one in BATON, unless WINDOW
186251881Speter   is self-compressed (i.e., does not copy from the source view),
187251881Speter   in which case expand. */
188251881Speter
189251881Speterstatic svn_error_t *
190251881Spetercompose_handler(svn_txdelta_window_t *window, void *baton)
191251881Speter{
192251881Speter  struct compose_handler_baton *cb = baton;
193251881Speter  SVN_ERR_ASSERT(!cb->done || window == NULL);
194251881Speter  SVN_ERR_ASSERT(cb->trail && cb->trail->pool);
195251881Speter
196251881Speter  if (!cb->init && !window)
197251881Speter    return SVN_NO_ERROR;
198251881Speter
199251881Speter  /* We should never get here if we've already expanded a
200251881Speter     self-compressed window. */
201251881Speter  SVN_ERR_ASSERT(!cb->source_buf);
202251881Speter
203251881Speter  if (cb->window)
204251881Speter    {
205251881Speter      if (window && (window->sview_len == 0 || window->src_ops == 0))
206251881Speter        {
207251881Speter          /* This is a self-compressed window. Don't combine it with
208251881Speter             the others, because the combiner may go quadratic. Instead,
209251881Speter             expand it here and signal that the combination has
210251881Speter             ended. */
211251881Speter          apr_size_t source_len = window->tview_len;
212251881Speter          SVN_ERR_ASSERT(cb->window->sview_len == source_len);
213251881Speter          cb->source_buf = apr_palloc(cb->window_pool, source_len);
214251881Speter          svn_txdelta_apply_instructions(window, NULL,
215251881Speter                                         cb->source_buf, &source_len);
216251881Speter          cb->done = TRUE;
217251881Speter        }
218251881Speter      else
219251881Speter        {
220251881Speter          /* Combine the incoming window with whatever's in the baton. */
221251881Speter          apr_pool_t *composite_pool = svn_pool_create(cb->trail->pool);
222251881Speter          svn_txdelta_window_t *composite;
223251881Speter
224251881Speter          composite = svn_txdelta_compose_windows(window, cb->window,
225251881Speter                                                  composite_pool);
226251881Speter          svn_pool_destroy(cb->window_pool);
227251881Speter          cb->window = composite;
228251881Speter          cb->window_pool = composite_pool;
229251881Speter          cb->done = (composite->sview_len == 0 || composite->src_ops == 0);
230251881Speter        }
231251881Speter    }
232251881Speter  else if (window)
233251881Speter    {
234251881Speter      /* Copy the (first) window into the baton. */
235251881Speter      apr_pool_t *window_pool = svn_pool_create(cb->trail->pool);
236251881Speter      SVN_ERR_ASSERT(cb->window_pool == NULL);
237251881Speter      cb->window = svn_txdelta_window_dup(window, window_pool);
238251881Speter      cb->window_pool = window_pool;
239251881Speter      cb->done = (window->sview_len == 0 || window->src_ops == 0);
240251881Speter    }
241251881Speter  else
242251881Speter    cb->done = TRUE;
243251881Speter
244251881Speter  cb->init = FALSE;
245251881Speter  return SVN_NO_ERROR;
246251881Speter}
247251881Speter
248251881Speter
249251881Speter
250251881Speter/* Read one delta window from REP[CUR_CHUNK] and push it at the
251251881Speter   composition handler. */
252251881Speter
253251881Speterstatic svn_error_t *
254251881Speterget_one_window(struct compose_handler_baton *cb,
255251881Speter               svn_fs_t *fs,
256251881Speter               representation_t *rep,
257251881Speter               int cur_chunk)
258251881Speter{
259251881Speter  svn_stream_t *wstream;
260251881Speter  char diffdata[4096];   /* hunk of svndiff data */
261251881Speter  svn_filesize_t off;    /* offset into svndiff data */
262251881Speter  apr_size_t amt;        /* how much svndiff data to/was read */
263251881Speter  const char *str_key;
264251881Speter
265251881Speter  apr_array_header_t *chunks = rep->contents.delta.chunks;
266251881Speter  rep_delta_chunk_t *this_chunk, *first_chunk;
267251881Speter
268251881Speter  cb->init = TRUE;
269251881Speter  if (chunks->nelts <= cur_chunk)
270251881Speter    return compose_handler(NULL, cb);
271251881Speter
272251881Speter  /* Set up a window handling stream for the svndiff data. */
273251881Speter  wstream = svn_txdelta_parse_svndiff(compose_handler, cb, TRUE,
274251881Speter                                      cb->trail->pool);
275251881Speter
276251881Speter  /* First things first:  send the "SVN"{version} header through the
277251881Speter     stream.  ### For now, we will just use the version specified
278251881Speter     in the first chunk, and then verify that no chunks have a
279251881Speter     different version number than the one used.  In the future,
280251881Speter     we might simply convert chunks that use a different version
281251881Speter     of the diff format -- or, heck, a different format
282251881Speter     altogether -- to the format/version of the first chunk.  */
283251881Speter  first_chunk = APR_ARRAY_IDX(chunks, 0, rep_delta_chunk_t*);
284251881Speter  diffdata[0] = 'S';
285251881Speter  diffdata[1] = 'V';
286251881Speter  diffdata[2] = 'N';
287251881Speter  diffdata[3] = (char) (first_chunk->version);
288251881Speter  amt = 4;
289251881Speter  SVN_ERR(svn_stream_write(wstream, diffdata, &amt));
290251881Speter  /* FIXME: The stream write handler is borked; assert (amt == 4); */
291251881Speter
292251881Speter  /* Get this string key which holds this window's data.
293251881Speter     ### todo: make sure this is an `svndiff' DIFF skel here. */
294251881Speter  this_chunk = APR_ARRAY_IDX(chunks, cur_chunk, rep_delta_chunk_t*);
295251881Speter  str_key = this_chunk->string_key;
296251881Speter
297251881Speter  /* Run through the svndiff data, at least as far as necessary. */
298251881Speter  off = 0;
299251881Speter  do
300251881Speter    {
301251881Speter      amt = sizeof(diffdata);
302251881Speter      SVN_ERR(svn_fs_bdb__string_read(fs, str_key, diffdata,
303251881Speter                                      off, &amt, cb->trail,
304251881Speter                                      cb->trail->pool));
305251881Speter      off += amt;
306251881Speter      SVN_ERR(svn_stream_write(wstream, diffdata, &amt));
307251881Speter    }
308251881Speter  while (amt != 0);
309251881Speter  SVN_ERR(svn_stream_close(wstream));
310251881Speter
311251881Speter  SVN_ERR_ASSERT(!cb->init);
312251881Speter  SVN_ERR_ASSERT(cb->window != NULL);
313251881Speter  SVN_ERR_ASSERT(cb->window_pool != NULL);
314251881Speter  return SVN_NO_ERROR;
315251881Speter}
316251881Speter
317251881Speter
318251881Speter/* Undeltify a range of data. DELTAS is the set of delta windows to
319251881Speter   combine, FULLTEXT is the source text, CUR_CHUNK is the index of the
320251881Speter   delta chunk we're starting from. OFFSET is the relative offset of
321251881Speter   the requested data within the chunk; BUF and LEN are what we're
322251881Speter   undeltifying to. */
323251881Speter
324251881Speterstatic svn_error_t *
325251881Speterrep_undeltify_range(svn_fs_t *fs,
326251881Speter                    const apr_array_header_t *deltas,
327251881Speter                    representation_t *fulltext,
328251881Speter                    int cur_chunk,
329251881Speter                    char *buf,
330251881Speter                    apr_size_t offset,
331251881Speter                    apr_size_t *len,
332251881Speter                    trail_t *trail,
333251881Speter                    apr_pool_t *pool)
334251881Speter{
335251881Speter  apr_size_t len_read = 0;
336251881Speter
337251881Speter  do
338251881Speter    {
339251881Speter      struct compose_handler_baton cb = { 0 };
340251881Speter      char *source_buf, *target_buf;
341251881Speter      apr_size_t target_len;
342251881Speter      int cur_rep;
343251881Speter
344251881Speter      cb.trail = trail;
345251881Speter      cb.done = FALSE;
346251881Speter      for (cur_rep = 0; !cb.done && cur_rep < deltas->nelts; ++cur_rep)
347251881Speter        {
348251881Speter          representation_t *const rep =
349251881Speter            APR_ARRAY_IDX(deltas, cur_rep, representation_t*);
350251881Speter          SVN_ERR(get_one_window(&cb, fs, rep, cur_chunk));
351251881Speter        }
352251881Speter
353251881Speter      if (!cb.window)
354251881Speter          /* That's it, no more source data is available. */
355251881Speter          break;
356251881Speter
357251881Speter      /* The source view length should not be 0 if there are source
358251881Speter         copy ops in the window. */
359251881Speter      SVN_ERR_ASSERT(cb.window->sview_len > 0 || cb.window->src_ops == 0);
360251881Speter
361251881Speter      /* cb.window is the combined delta window. Read the source text
362251881Speter         into a buffer. */
363251881Speter      if (cb.source_buf)
364251881Speter        {
365251881Speter          /* The combiner already created the source text from a
366251881Speter             self-compressed window. */
367251881Speter          source_buf = cb.source_buf;
368251881Speter        }
369251881Speter      else if (fulltext && cb.window->sview_len > 0 && cb.window->src_ops > 0)
370251881Speter        {
371251881Speter          apr_size_t source_len = cb.window->sview_len;
372251881Speter          source_buf = apr_palloc(cb.window_pool, source_len);
373251881Speter          SVN_ERR(svn_fs_bdb__string_read
374251881Speter                  (fs, fulltext->contents.fulltext.string_key,
375251881Speter                   source_buf, cb.window->sview_offset, &source_len,
376251881Speter                   trail, pool));
377251881Speter          if (source_len != cb.window->sview_len)
378251881Speter            return svn_error_create
379251881Speter                (SVN_ERR_FS_CORRUPT, NULL,
380251881Speter                 _("Svndiff source length inconsistency"));
381251881Speter        }
382251881Speter      else
383251881Speter        {
384251881Speter          source_buf = NULL;    /* Won't read anything from here. */
385251881Speter        }
386251881Speter
387251881Speter      if (offset > 0)
388251881Speter        {
389251881Speter          target_len = *len - len_read + offset;
390251881Speter          target_buf = apr_palloc(cb.window_pool, target_len);
391251881Speter        }
392251881Speter      else
393251881Speter        {
394251881Speter          target_len = *len - len_read;
395251881Speter          target_buf = buf;
396251881Speter        }
397251881Speter
398251881Speter      svn_txdelta_apply_instructions(cb.window, source_buf,
399251881Speter                                     target_buf, &target_len);
400251881Speter      if (offset > 0)
401251881Speter        {
402251881Speter          SVN_ERR_ASSERT(target_len > offset);
403251881Speter          target_len -= offset;
404251881Speter          memcpy(buf, target_buf + offset, target_len);
405251881Speter          offset = 0; /* Read from the beginning of the next chunk. */
406251881Speter        }
407251881Speter      /* Don't need this window any more. */
408251881Speter      svn_pool_destroy(cb.window_pool);
409251881Speter
410251881Speter      len_read += target_len;
411251881Speter      buf += target_len;
412251881Speter      ++cur_chunk;
413251881Speter    }
414251881Speter  while (len_read < *len);
415251881Speter
416251881Speter  *len = len_read;
417251881Speter  return SVN_NO_ERROR;
418251881Speter}
419251881Speter
420251881Speter
421251881Speter
422251881Speter/* Calculate the index of the chunk in REP that contains REP_OFFSET,
423251881Speter   and find the relative CHUNK_OFFSET within the chunk.
424251881Speter   Return -1 if offset is beyond the end of the represented data.
425251881Speter   ### The basic assumption is that all delta windows are the same size
426251881Speter   and aligned at the same offset, so this number is the same in all
427251881Speter   dependent deltas.  Oh, and the chunks in REP must be ordered. */
428251881Speter
429251881Speterstatic int
430251881Speterget_chunk_offset(representation_t *rep,
431251881Speter                 svn_filesize_t rep_offset,
432251881Speter                 apr_size_t *chunk_offset)
433251881Speter{
434251881Speter  const apr_array_header_t *chunks = rep->contents.delta.chunks;
435251881Speter  int cur_chunk;
436251881Speter  assert(chunks->nelts);
437251881Speter
438251881Speter  /* ### Yes, this is a linear search.  I'll change this to bisection
439251881Speter     the very second we notice it's slowing us down. */
440251881Speter  for (cur_chunk = 0; cur_chunk < chunks->nelts; ++cur_chunk)
441251881Speter  {
442251881Speter    const rep_delta_chunk_t *const this_chunk
443251881Speter      = APR_ARRAY_IDX(chunks, cur_chunk, rep_delta_chunk_t*);
444251881Speter
445251881Speter    if ((this_chunk->offset + this_chunk->size) > rep_offset)
446251881Speter      {
447251881Speter        assert(this_chunk->offset <= rep_offset);
448251881Speter        assert(rep_offset - this_chunk->offset < SVN_MAX_OBJECT_SIZE);
449251881Speter        *chunk_offset = (apr_size_t) (rep_offset - this_chunk->offset);
450251881Speter        return cur_chunk;
451251881Speter      }
452251881Speter  }
453251881Speter
454251881Speter  return -1;
455251881Speter}
456251881Speter
457251881Speter/* Copy into BUF *LEN bytes starting at OFFSET from the string
458251881Speter   represented via REP_KEY in FS, as part of TRAIL.
459251881Speter   The number of bytes actually copied is stored in *LEN.  */
460251881Speterstatic svn_error_t *
461251881Speterrep_read_range(svn_fs_t *fs,
462251881Speter               const char *rep_key,
463251881Speter               svn_filesize_t offset,
464251881Speter               char *buf,
465251881Speter               apr_size_t *len,
466251881Speter               trail_t *trail,
467251881Speter               apr_pool_t *pool)
468251881Speter{
469251881Speter  representation_t *rep;
470251881Speter  apr_size_t chunk_offset;
471251881Speter
472251881Speter  /* Read in our REP. */
473251881Speter  SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
474251881Speter  if (rep->kind == rep_kind_fulltext)
475251881Speter    {
476251881Speter      SVN_ERR(svn_fs_bdb__string_read(fs, rep->contents.fulltext.string_key,
477251881Speter                                      buf, offset, len, trail, pool));
478251881Speter    }
479251881Speter  else if (rep->kind == rep_kind_delta)
480251881Speter    {
481251881Speter      const int cur_chunk = get_chunk_offset(rep, offset, &chunk_offset);
482251881Speter      if (cur_chunk < 0)
483251881Speter        *len = 0;
484251881Speter      else
485251881Speter        {
486251881Speter          svn_error_t *err;
487251881Speter          /* Preserve for potential use in error message. */
488251881Speter          const char *first_rep_key = rep_key;
489251881Speter          /* Make a list of all the rep's we need to undeltify this range.
490251881Speter             We'll have to read them within this trail anyway, so we might
491251881Speter             as well do it once and up front. */
492251881Speter          apr_array_header_t *reps = apr_array_make(pool, 30, sizeof(rep));
493251881Speter          do
494251881Speter            {
495251881Speter              const rep_delta_chunk_t *const first_chunk
496251881Speter                = APR_ARRAY_IDX(rep->contents.delta.chunks,
497251881Speter                                0, rep_delta_chunk_t*);
498251881Speter              const rep_delta_chunk_t *const chunk
499251881Speter                = APR_ARRAY_IDX(rep->contents.delta.chunks,
500251881Speter                                cur_chunk, rep_delta_chunk_t*);
501251881Speter
502251881Speter              /* Verify that this chunk is of the same version as the first. */
503251881Speter              if (first_chunk->version != chunk->version)
504251881Speter                return svn_error_createf
505251881Speter                  (SVN_ERR_FS_CORRUPT, NULL,
506251881Speter                   _("Diff version inconsistencies in representation '%s'"),
507251881Speter                   rep_key);
508251881Speter
509251881Speter              rep_key = chunk->rep_key;
510251881Speter              APR_ARRAY_PUSH(reps, representation_t *) = rep;
511251881Speter              SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key,
512251881Speter                                           trail, pool));
513251881Speter            }
514251881Speter          while (rep->kind == rep_kind_delta
515251881Speter                 && rep->contents.delta.chunks->nelts > cur_chunk);
516251881Speter
517251881Speter          /* Right. We've either just read the fulltext rep, or a rep that's
518251881Speter             too short, in which case we'll undeltify without source data.*/
519251881Speter          if (rep->kind != rep_kind_delta && rep->kind != rep_kind_fulltext)
520251881Speter            return UNKNOWN_NODE_KIND(rep_key);
521251881Speter
522251881Speter          if (rep->kind == rep_kind_delta)
523251881Speter            rep = NULL;         /* Don't use source data */
524251881Speter
525251881Speter          err = rep_undeltify_range(fs, reps, rep, cur_chunk, buf,
526251881Speter                                    chunk_offset, len, trail, pool);
527251881Speter          if (err)
528251881Speter            {
529251881Speter              if (err->apr_err == SVN_ERR_FS_CORRUPT)
530251881Speter                return svn_error_createf
531251881Speter                  (SVN_ERR_FS_CORRUPT, err,
532251881Speter                   _("Corruption detected whilst reading delta chain from "
533251881Speter                     "representation '%s' to '%s'"), first_rep_key, rep_key);
534251881Speter              else
535251881Speter                return svn_error_trace(err);
536251881Speter            }
537251881Speter        }
538251881Speter    }
539251881Speter  else /* unknown kind */
540251881Speter    return UNKNOWN_NODE_KIND(rep_key);
541251881Speter
542251881Speter  return SVN_NO_ERROR;
543251881Speter}
544251881Speter
545251881Speter
546251881Spetersvn_error_t *
547251881Spetersvn_fs_base__get_mutable_rep(const char **new_rep_key,
548251881Speter                             const char *rep_key,
549251881Speter                             svn_fs_t *fs,
550251881Speter                             const char *txn_id,
551251881Speter                             trail_t *trail,
552251881Speter                             apr_pool_t *pool)
553251881Speter{
554251881Speter  representation_t *rep = NULL;
555251881Speter  const char *new_str = NULL;
556251881Speter
557251881Speter  /* We were passed an existing REP_KEY, so examine it.  If it is
558251881Speter     mutable already, then just return REP_KEY as the mutable result
559251881Speter     key.  */
560251881Speter  if (rep_key && (rep_key[0] != '\0'))
561251881Speter    {
562251881Speter      SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
563251881Speter      if (rep_is_mutable(rep, txn_id))
564251881Speter        {
565251881Speter          *new_rep_key = rep_key;
566251881Speter          return SVN_NO_ERROR;
567251881Speter        }
568251881Speter    }
569251881Speter
570251881Speter  /* Either we weren't provided a base key to examine, or the base key
571251881Speter     we were provided was not mutable.  So, let's make a new
572251881Speter     representation and return its key to the caller. */
573251881Speter  SVN_ERR(svn_fs_bdb__string_append(fs, &new_str, 0, NULL, trail, pool));
574251881Speter  rep = make_fulltext_rep(new_str, txn_id,
575251881Speter                          svn_checksum_empty_checksum(svn_checksum_md5,
576251881Speter                                                      pool),
577251881Speter                          svn_checksum_empty_checksum(svn_checksum_sha1,
578251881Speter                                                      pool),
579251881Speter                          pool);
580251881Speter  return svn_fs_bdb__write_new_rep(new_rep_key, fs, rep, trail, pool);
581251881Speter}
582251881Speter
583251881Speter
584251881Spetersvn_error_t *
585251881Spetersvn_fs_base__delete_rep_if_mutable(svn_fs_t *fs,
586251881Speter                                   const char *rep_key,
587251881Speter                                   const char *txn_id,
588251881Speter                                   trail_t *trail,
589251881Speter                                   apr_pool_t *pool)
590251881Speter{
591251881Speter  representation_t *rep;
592251881Speter
593251881Speter  SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
594251881Speter  if (! rep_is_mutable(rep, txn_id))
595251881Speter    return SVN_NO_ERROR;
596251881Speter
597251881Speter  if (rep->kind == rep_kind_fulltext)
598251881Speter    {
599251881Speter      SVN_ERR(svn_fs_bdb__string_delete(fs,
600251881Speter                                        rep->contents.fulltext.string_key,
601251881Speter                                        trail, pool));
602251881Speter    }
603251881Speter  else if (rep->kind == rep_kind_delta)
604251881Speter    {
605251881Speter      apr_array_header_t *keys;
606251881Speter      SVN_ERR(delta_string_keys(&keys, rep, pool));
607251881Speter      SVN_ERR(delete_strings(keys, fs, trail, pool));
608251881Speter    }
609251881Speter  else /* unknown kind */
610251881Speter    return UNKNOWN_NODE_KIND(rep_key);
611251881Speter
612251881Speter  return svn_fs_bdb__delete_rep(fs, rep_key, trail, pool);
613251881Speter}
614251881Speter
615251881Speter
616251881Speter
617251881Speter/*** Reading and writing data via representations. ***/
618251881Speter
619251881Speter/** Reading. **/
620251881Speter
621251881Speterstruct rep_read_baton
622251881Speter{
623251881Speter  /* The FS from which we're reading. */
624251881Speter  svn_fs_t *fs;
625251881Speter
626251881Speter  /* The representation skel whose contents we want to read.  If this
627251881Speter     is NULL, the rep has never had any contents, so all reads fetch 0
628251881Speter     bytes.
629251881Speter
630251881Speter     Formerly, we cached the entire rep skel here, not just the key.
631251881Speter     That way we didn't have to fetch the rep from the db every time
632251881Speter     we want to read a little bit more of the file.  Unfortunately,
633251881Speter     this has a problem: if, say, a file's representation changes
634251881Speter     while we're reading (changes from fulltext to delta, for
635251881Speter     example), we'll never know it.  So for correctness, we now
636251881Speter     refetch the representation skel every time we want to read
637251881Speter     another chunk.  */
638251881Speter  const char *rep_key;
639251881Speter
640251881Speter  /* How many bytes have been read already. */
641251881Speter  svn_filesize_t offset;
642251881Speter
643251881Speter  /* If present, the read will be done as part of this trail, and the
644251881Speter     trail's pool will be used.  Otherwise, see `pool' below.  */
645251881Speter  trail_t *trail;
646251881Speter
647251881Speter  /* MD5 checksum context.  Initialized when the baton is created, updated as
648251881Speter     we read data, and finalized when the stream is closed. */
649251881Speter  svn_checksum_ctx_t *md5_checksum_ctx;
650251881Speter
651251881Speter  /* Final resting place of the checksum created by md5_checksum_cxt. */
652251881Speter  svn_checksum_t *md5_checksum;
653251881Speter
654251881Speter  /* SHA1 checksum context.  Initialized when the baton is created, updated as
655251881Speter     we read data, and finalized when the stream is closed. */
656251881Speter  svn_checksum_ctx_t *sha1_checksum_ctx;
657251881Speter
658251881Speter  /* Final resting place of the checksum created by sha1_checksum_cxt. */
659251881Speter  svn_checksum_t *sha1_checksum;
660251881Speter
661251881Speter  /* The length of the rep's contents (as fulltext, that is,
662251881Speter     independent of how the rep actually stores the data.)  This is
663251881Speter     retrieved when the baton is created, and used to determine when
664251881Speter     we have read the last byte, at which point we compare checksums.
665251881Speter
666251881Speter     Getting this at baton creation time makes interleaved reads and
667251881Speter     writes on the same rep in the same trail impossible.  But we're
668251881Speter     not doing that, and probably no one ever should.  And anyway if
669251881Speter     they do, they should see problems immediately. */
670251881Speter  svn_filesize_t size;
671251881Speter
672251881Speter  /* Set to FALSE when the baton is created, TRUE when the checksum_ctx
673251881Speter     is digestified. */
674251881Speter  svn_boolean_t checksum_finalized;
675251881Speter
676251881Speter  /* Used for temporary allocations.  This pool is cleared at the
677251881Speter     start of each invocation of the relevant stream read function --
678251881Speter     see rep_read_contents().  */
679251881Speter  apr_pool_t *scratch_pool;
680251881Speter
681251881Speter};
682251881Speter
683251881Speter
684251881Speterstatic svn_error_t *
685251881Speterrep_read_get_baton(struct rep_read_baton **rb_p,
686251881Speter                   svn_fs_t *fs,
687251881Speter                   const char *rep_key,
688251881Speter                   svn_boolean_t use_trail_for_reads,
689251881Speter                   trail_t *trail,
690251881Speter                   apr_pool_t *pool)
691251881Speter{
692251881Speter  struct rep_read_baton *b;
693251881Speter
694251881Speter  b = apr_pcalloc(pool, sizeof(*b));
695251881Speter  b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool);
696251881Speter  b->sha1_checksum_ctx = svn_checksum_ctx_create(svn_checksum_sha1, pool);
697251881Speter
698251881Speter  if (rep_key)
699251881Speter    SVN_ERR(svn_fs_base__rep_contents_size(&(b->size), fs, rep_key,
700251881Speter                                           trail, pool));
701251881Speter  else
702251881Speter    b->size = 0;
703251881Speter
704251881Speter  b->checksum_finalized = FALSE;
705251881Speter  b->fs = fs;
706251881Speter  b->trail = use_trail_for_reads ? trail : NULL;
707251881Speter  b->scratch_pool = svn_pool_create(pool);
708251881Speter  b->rep_key = rep_key;
709251881Speter  b->offset = 0;
710251881Speter
711251881Speter  *rb_p = b;
712251881Speter
713251881Speter  return SVN_NO_ERROR;
714251881Speter}
715251881Speter
716251881Speter
717251881Speter
718251881Speter/*** Retrieving data. ***/
719251881Speter
720251881Spetersvn_error_t *
721251881Spetersvn_fs_base__rep_contents_size(svn_filesize_t *size_p,
722251881Speter                               svn_fs_t *fs,
723251881Speter                               const char *rep_key,
724251881Speter                               trail_t *trail,
725251881Speter                               apr_pool_t *pool)
726251881Speter{
727251881Speter  representation_t *rep;
728251881Speter
729251881Speter  SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
730251881Speter
731251881Speter  if (rep->kind == rep_kind_fulltext)
732251881Speter    {
733251881Speter      /* Get the size by asking Berkeley for the string's length. */
734251881Speter      SVN_ERR(svn_fs_bdb__string_size(size_p, fs,
735251881Speter                                      rep->contents.fulltext.string_key,
736251881Speter                                      trail, pool));
737251881Speter    }
738251881Speter  else if (rep->kind == rep_kind_delta)
739251881Speter    {
740251881Speter      /* Get the size by finding the last window pkg in the delta and
741251881Speter         adding its offset to its size.  This way, we won't even be
742251881Speter         messed up by overlapping windows, as long as the window pkgs
743251881Speter         are still ordered. */
744251881Speter      apr_array_header_t *chunks = rep->contents.delta.chunks;
745251881Speter      rep_delta_chunk_t *last_chunk;
746251881Speter
747251881Speter      SVN_ERR_ASSERT(chunks->nelts);
748251881Speter
749251881Speter      last_chunk = APR_ARRAY_IDX(chunks, chunks->nelts - 1,
750251881Speter                                 rep_delta_chunk_t *);
751251881Speter      *size_p = last_chunk->offset + last_chunk->size;
752251881Speter    }
753251881Speter  else /* unknown kind */
754251881Speter    return UNKNOWN_NODE_KIND(rep_key);
755251881Speter
756251881Speter  return SVN_NO_ERROR;
757251881Speter}
758251881Speter
759251881Speter
760251881Spetersvn_error_t *
761251881Spetersvn_fs_base__rep_contents_checksums(svn_checksum_t **md5_checksum,
762251881Speter                                    svn_checksum_t **sha1_checksum,
763251881Speter                                    svn_fs_t *fs,
764251881Speter                                    const char *rep_key,
765251881Speter                                    trail_t *trail,
766251881Speter                                    apr_pool_t *pool)
767251881Speter{
768251881Speter  representation_t *rep;
769251881Speter
770251881Speter  SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
771251881Speter  if (md5_checksum)
772251881Speter    *md5_checksum = svn_checksum_dup(rep->md5_checksum, pool);
773251881Speter  if (sha1_checksum)
774251881Speter    *sha1_checksum = svn_checksum_dup(rep->sha1_checksum, pool);
775251881Speter
776251881Speter  return SVN_NO_ERROR;
777251881Speter}
778251881Speter
779251881Speter
780251881Spetersvn_error_t *
781251881Spetersvn_fs_base__rep_contents(svn_string_t *str,
782251881Speter                          svn_fs_t *fs,
783251881Speter                          const char *rep_key,
784251881Speter                          trail_t *trail,
785251881Speter                          apr_pool_t *pool)
786251881Speter{
787251881Speter  svn_filesize_t contents_size;
788251881Speter  apr_size_t len;
789251881Speter  char *data;
790251881Speter
791251881Speter  SVN_ERR(svn_fs_base__rep_contents_size(&contents_size, fs, rep_key,
792251881Speter                                         trail, pool));
793251881Speter
794251881Speter  /* What if the contents are larger than we can handle? */
795251881Speter  if (contents_size > SVN_MAX_OBJECT_SIZE)
796251881Speter    return svn_error_createf
797251881Speter      (SVN_ERR_FS_GENERAL, NULL,
798251881Speter       _("Rep contents are too large: "
799251881Speter         "got %s, limit is %s"),
800251881Speter       apr_psprintf(pool, "%" SVN_FILESIZE_T_FMT, contents_size),
801251881Speter       apr_psprintf(pool, "%" APR_SIZE_T_FMT, SVN_MAX_OBJECT_SIZE));
802251881Speter  else
803251881Speter    str->len = (apr_size_t) contents_size;
804251881Speter
805251881Speter  data = apr_palloc(pool, str->len);
806251881Speter  str->data = data;
807251881Speter  len = str->len;
808251881Speter  SVN_ERR(rep_read_range(fs, rep_key, 0, data, &len, trail, pool));
809251881Speter
810251881Speter  /* Paranoia. */
811251881Speter  if (len != str->len)
812251881Speter    return svn_error_createf
813251881Speter      (SVN_ERR_FS_CORRUPT, NULL,
814251881Speter       _("Failure reading representation '%s'"), rep_key);
815251881Speter
816251881Speter  /* Just the standard paranoia. */
817251881Speter  {
818251881Speter    representation_t *rep;
819251881Speter    svn_checksum_t *checksum, *rep_checksum;
820251881Speter
821251881Speter    SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
822251881Speter    rep_checksum = rep->sha1_checksum ? rep->sha1_checksum : rep->md5_checksum;
823251881Speter    SVN_ERR(svn_checksum(&checksum, rep_checksum->kind, str->data, str->len,
824251881Speter                         pool));
825251881Speter
826251881Speter    if (! svn_checksum_match(checksum, rep_checksum))
827251881Speter      return svn_error_create(SVN_ERR_FS_CORRUPT,
828251881Speter                svn_checksum_mismatch_err(rep_checksum, checksum, pool,
829251881Speter                            _("Checksum mismatch on representation '%s'"),
830251881Speter                            rep_key),
831251881Speter                NULL);
832251881Speter  }
833251881Speter
834251881Speter  return SVN_NO_ERROR;
835251881Speter}
836251881Speter
837251881Speter
838251881Speterstruct read_rep_args
839251881Speter{
840251881Speter  struct rep_read_baton *rb;   /* The data source.             */
841251881Speter  char *buf;                   /* Where to put what we read.   */
842251881Speter  apr_size_t *len;             /* How much to read / was read. */
843251881Speter};
844251881Speter
845251881Speter
846251881Speter/* BATON is of type `read_rep_args':
847251881Speter
848251881Speter   Read into BATON->rb->buf the *(BATON->len) bytes starting at
849251881Speter   BATON->rb->offset from the data represented at BATON->rb->rep_key
850251881Speter   in BATON->rb->fs, as part of TRAIL.
851251881Speter
852251881Speter   Afterwards, *(BATON->len) is the number of bytes actually read, and
853251881Speter   BATON->rb->offset is incremented by that amount.
854251881Speter
855251881Speter   If BATON->rb->rep_key is null, this is assumed to mean the file's
856251881Speter   contents have no representation, i.e., the file has no contents.
857251881Speter   In that case, if BATON->rb->offset > 0, return the error
858251881Speter   SVN_ERR_FS_FILE_CONTENTS_CHANGED, else just set *(BATON->len) to
859251881Speter   zero and return.  */
860251881Speterstatic svn_error_t *
861251881Spetertxn_body_read_rep(void *baton, trail_t *trail)
862251881Speter{
863251881Speter  struct read_rep_args *args = baton;
864251881Speter
865251881Speter  if (args->rb->rep_key)
866251881Speter    {
867251881Speter      SVN_ERR(rep_read_range(args->rb->fs,
868251881Speter                             args->rb->rep_key,
869251881Speter                             args->rb->offset,
870251881Speter                             args->buf,
871251881Speter                             args->len,
872251881Speter                             trail,
873251881Speter                             args->rb->scratch_pool));
874251881Speter
875251881Speter      args->rb->offset += *(args->len);
876251881Speter
877251881Speter      /* We calculate the checksum just once, the moment we see the
878251881Speter       * last byte of data.  But we can't assume there was a short
879251881Speter       * read.  The caller may have known the length of the data and
880251881Speter       * requested exactly that amount, so there would never be a
881251881Speter       * short read.  (That's why the read baton has to know the
882251881Speter       * length of the data in advance.)
883251881Speter       *
884251881Speter       * On the other hand, some callers invoke the stream reader in a
885251881Speter       * loop whose termination condition is that the read returned
886251881Speter       * zero bytes of data -- which usually results in the read
887251881Speter       * function being called one more time *after* the call that got
888251881Speter       * a short read (indicating end-of-stream).
889251881Speter       *
890251881Speter       * The conditions below ensure that we compare checksums even
891251881Speter       * when there is no short read associated with the last byte of
892251881Speter       * data, while also ensuring that it's harmless to repeatedly
893251881Speter       * read 0 bytes from the stream.
894251881Speter       */
895251881Speter      if (! args->rb->checksum_finalized)
896251881Speter        {
897251881Speter          SVN_ERR(svn_checksum_update(args->rb->md5_checksum_ctx, args->buf,
898251881Speter                                      *(args->len)));
899251881Speter          SVN_ERR(svn_checksum_update(args->rb->sha1_checksum_ctx, args->buf,
900251881Speter                                      *(args->len)));
901251881Speter
902251881Speter          if (args->rb->offset == args->rb->size)
903251881Speter            {
904251881Speter              representation_t *rep;
905251881Speter
906251881Speter              SVN_ERR(svn_checksum_final(&args->rb->md5_checksum,
907251881Speter                                         args->rb->md5_checksum_ctx,
908251881Speter                                         trail->pool));
909251881Speter              SVN_ERR(svn_checksum_final(&args->rb->sha1_checksum,
910251881Speter                                         args->rb->sha1_checksum_ctx,
911251881Speter                                         trail->pool));
912251881Speter              args->rb->checksum_finalized = TRUE;
913251881Speter
914251881Speter              SVN_ERR(svn_fs_bdb__read_rep(&rep, args->rb->fs,
915251881Speter                                           args->rb->rep_key,
916251881Speter                                           trail, trail->pool));
917251881Speter
918251881Speter              if (rep->md5_checksum
919251881Speter                  && (! svn_checksum_match(rep->md5_checksum,
920251881Speter                                           args->rb->md5_checksum)))
921251881Speter                return svn_error_create(SVN_ERR_FS_CORRUPT,
922251881Speter                        svn_checksum_mismatch_err(rep->md5_checksum,
923299742Sdim                             args->rb->md5_checksum, trail->pool,
924251881Speter                             _("MD5 checksum mismatch on representation '%s'"),
925251881Speter                             args->rb->rep_key),
926251881Speter                        NULL);
927251881Speter
928251881Speter              if (rep->sha1_checksum
929251881Speter                  && (! svn_checksum_match(rep->sha1_checksum,
930251881Speter                                           args->rb->sha1_checksum)))
931251881Speter                return svn_error_createf(SVN_ERR_FS_CORRUPT,
932251881Speter                        svn_checksum_mismatch_err(rep->sha1_checksum,
933251881Speter                            args->rb->sha1_checksum, trail->pool,
934251881Speter                            _("SHA1 checksum mismatch on representation '%s'"),
935251881Speter                            args->rb->rep_key),
936251881Speter                        NULL);
937251881Speter            }
938251881Speter        }
939251881Speter    }
940251881Speter  else if (args->rb->offset > 0)
941251881Speter    {
942251881Speter      return
943251881Speter        svn_error_create
944251881Speter        (SVN_ERR_FS_REP_CHANGED, NULL,
945251881Speter         _("Null rep, but offset past zero already"));
946251881Speter    }
947251881Speter  else
948251881Speter    *(args->len) = 0;
949251881Speter
950251881Speter  return SVN_NO_ERROR;
951251881Speter}
952251881Speter
953251881Speter
954251881Speterstatic svn_error_t *
955251881Speterrep_read_contents(void *baton, char *buf, apr_size_t *len)
956251881Speter{
957251881Speter  struct rep_read_baton *rb = baton;
958251881Speter  struct read_rep_args args;
959251881Speter
960251881Speter  /* Clear the scratch pool of the results of previous invocations. */
961251881Speter  svn_pool_clear(rb->scratch_pool);
962251881Speter
963251881Speter  args.rb = rb;
964251881Speter  args.buf = buf;
965251881Speter  args.len = len;
966251881Speter
967251881Speter  /* If we got a trail, use it; else make one. */
968251881Speter  if (rb->trail)
969251881Speter    SVN_ERR(txn_body_read_rep(&args, rb->trail));
970251881Speter  else
971251881Speter    {
972251881Speter      /* In the case of reading from the db, any returned data should
973251881Speter         live in our pre-allocated buffer, so the whole operation can
974251881Speter         happen within a single malloc/free cycle.  This prevents us
975251881Speter         from creating millions of unnecessary trail subpools when
976251881Speter         reading a big file.  */
977251881Speter      SVN_ERR(svn_fs_base__retry_txn(rb->fs,
978251881Speter                                     txn_body_read_rep,
979251881Speter                                     &args,
980251881Speter                                     TRUE,
981251881Speter                                     rb->scratch_pool));
982251881Speter    }
983251881Speter  return SVN_NO_ERROR;
984251881Speter}
985251881Speter
986251881Speter
987251881Speter/** Writing. **/
988251881Speter
989251881Speter
990251881Speterstruct rep_write_baton
991251881Speter{
992251881Speter  /* The FS in which we're writing. */
993251881Speter  svn_fs_t *fs;
994251881Speter
995251881Speter  /* The representation skel whose contents we want to write. */
996251881Speter  const char *rep_key;
997251881Speter
998251881Speter  /* The transaction id under which this write action will take
999251881Speter     place. */
1000251881Speter  const char *txn_id;
1001251881Speter
1002251881Speter  /* If present, do the write as part of this trail, and use trail's
1003251881Speter     pool.  Otherwise, see `pool' below.  */
1004251881Speter  trail_t *trail;
1005251881Speter
1006251881Speter  /* SHA1 and MD5 checksums.  Initialized when the baton is created,
1007251881Speter     updated as we write data, and finalized and stored when the
1008251881Speter     stream is closed. */
1009251881Speter  svn_checksum_ctx_t *md5_checksum_ctx;
1010251881Speter  svn_checksum_t *md5_checksum;
1011251881Speter  svn_checksum_ctx_t *sha1_checksum_ctx;
1012251881Speter  svn_checksum_t *sha1_checksum;
1013251881Speter  svn_boolean_t finalized;
1014251881Speter
1015251881Speter  /* Used for temporary allocations, iff `trail' (above) is null.  */
1016251881Speter  apr_pool_t *pool;
1017251881Speter
1018251881Speter};
1019251881Speter
1020251881Speter
1021251881Speterstatic struct rep_write_baton *
1022251881Speterrep_write_get_baton(svn_fs_t *fs,
1023251881Speter                    const char *rep_key,
1024251881Speter                    const char *txn_id,
1025251881Speter                    trail_t *trail,
1026251881Speter                    apr_pool_t *pool)
1027251881Speter{
1028251881Speter  struct rep_write_baton *b;
1029251881Speter
1030251881Speter  b = apr_pcalloc(pool, sizeof(*b));
1031251881Speter  b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool);
1032251881Speter  b->sha1_checksum_ctx = svn_checksum_ctx_create(svn_checksum_sha1, pool);
1033251881Speter  b->fs = fs;
1034251881Speter  b->trail = trail;
1035251881Speter  b->pool = pool;
1036251881Speter  b->rep_key = rep_key;
1037251881Speter  b->txn_id = txn_id;
1038251881Speter  return b;
1039251881Speter}
1040251881Speter
1041251881Speter
1042251881Speter
1043251881Speter/* Write LEN bytes from BUF into the end of the string represented via
1044251881Speter   REP_KEY in FS, as part of TRAIL.  If the representation is not
1045251881Speter   mutable, return the error SVN_FS_REP_NOT_MUTABLE. */
1046251881Speterstatic svn_error_t *
1047251881Speterrep_write(svn_fs_t *fs,
1048251881Speter          const char *rep_key,
1049251881Speter          const char *buf,
1050251881Speter          apr_size_t len,
1051251881Speter          const char *txn_id,
1052251881Speter          trail_t *trail,
1053251881Speter          apr_pool_t *pool)
1054251881Speter{
1055251881Speter  representation_t *rep;
1056251881Speter
1057251881Speter  SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
1058251881Speter
1059251881Speter  if (! rep_is_mutable(rep, txn_id))
1060251881Speter    return svn_error_createf
1061251881Speter      (SVN_ERR_FS_REP_NOT_MUTABLE, NULL,
1062251881Speter       _("Rep '%s' is not mutable"), rep_key);
1063251881Speter
1064251881Speter  if (rep->kind == rep_kind_fulltext)
1065251881Speter    {
1066251881Speter      SVN_ERR(svn_fs_bdb__string_append
1067251881Speter              (fs, &(rep->contents.fulltext.string_key), len, buf,
1068251881Speter               trail, pool));
1069251881Speter    }
1070251881Speter  else if (rep->kind == rep_kind_delta)
1071251881Speter    {
1072251881Speter      /* There should never be a case when we have a mutable
1073251881Speter         non-fulltext rep.  The only code that creates mutable reps is
1074251881Speter         in this file, and it creates them fulltext. */
1075251881Speter      return svn_error_createf
1076251881Speter        (SVN_ERR_FS_CORRUPT, NULL,
1077251881Speter         _("Rep '%s' both mutable and non-fulltext"), rep_key);
1078251881Speter    }
1079251881Speter  else /* unknown kind */
1080251881Speter    return UNKNOWN_NODE_KIND(rep_key);
1081251881Speter
1082251881Speter  return SVN_NO_ERROR;
1083251881Speter}
1084251881Speter
1085251881Speter
1086251881Speterstruct write_rep_args
1087251881Speter{
1088251881Speter  struct rep_write_baton *wb;   /* Destination.       */
1089251881Speter  const char *buf;              /* Data.              */
1090251881Speter  apr_size_t len;               /* How much to write. */
1091251881Speter};
1092251881Speter
1093251881Speter
1094251881Speter/* BATON is of type `write_rep_args':
1095251881Speter   Append onto BATON->wb->rep_key's contents BATON->len bytes of
1096251881Speter   data from BATON->wb->buf, in BATON->rb->fs, as part of TRAIL.
1097251881Speter
1098251881Speter   If the representation is not mutable, return the error
1099251881Speter   SVN_FS_REP_NOT_MUTABLE.  */
1100251881Speterstatic svn_error_t *
1101251881Spetertxn_body_write_rep(void *baton, trail_t *trail)
1102251881Speter{
1103251881Speter  struct write_rep_args *args = baton;
1104251881Speter
1105251881Speter  SVN_ERR(rep_write(args->wb->fs,
1106251881Speter                    args->wb->rep_key,
1107251881Speter                    args->buf,
1108251881Speter                    args->len,
1109251881Speter                    args->wb->txn_id,
1110251881Speter                    trail,
1111251881Speter                    trail->pool));
1112251881Speter  SVN_ERR(svn_checksum_update(args->wb->md5_checksum_ctx,
1113251881Speter                              args->buf, args->len));
1114251881Speter  SVN_ERR(svn_checksum_update(args->wb->sha1_checksum_ctx,
1115251881Speter                              args->buf, args->len));
1116251881Speter  return SVN_NO_ERROR;
1117251881Speter}
1118251881Speter
1119251881Speter
1120251881Speterstatic svn_error_t *
1121251881Speterrep_write_contents(void *baton,
1122251881Speter                   const char *buf,
1123251881Speter                   apr_size_t *len)
1124251881Speter{
1125251881Speter  struct rep_write_baton *wb = baton;
1126251881Speter  struct write_rep_args args;
1127251881Speter
1128251881Speter  /* We toss LEN's indirectness because if not all the bytes are
1129251881Speter     written, it's an error, so we wouldn't be reporting anything back
1130251881Speter     through *LEN anyway. */
1131251881Speter  args.wb = wb;
1132251881Speter  args.buf = buf;
1133251881Speter  args.len = *len;
1134251881Speter
1135251881Speter  /* If we got a trail, use it; else make one. */
1136251881Speter  if (wb->trail)
1137251881Speter    SVN_ERR(txn_body_write_rep(&args, wb->trail));
1138251881Speter  else
1139251881Speter    {
1140251881Speter      /* In the case of simply writing the rep to the db, we're
1141251881Speter         *certain* that there's no data coming back to us that needs
1142251881Speter         to be preserved... so the whole operation can happen within a
1143251881Speter         single malloc/free cycle.  This prevents us from creating
1144251881Speter         millions of unnecessary trail subpools when writing a big
1145251881Speter         file. */
1146251881Speter      SVN_ERR(svn_fs_base__retry_txn(wb->fs,
1147251881Speter                                     txn_body_write_rep,
1148251881Speter                                     &args,
1149251881Speter                                     TRUE,
1150251881Speter                                     wb->pool));
1151251881Speter    }
1152251881Speter
1153251881Speter  return SVN_NO_ERROR;
1154251881Speter}
1155251881Speter
1156251881Speter
1157251881Speter/* Helper for rep_write_close_contents(); see that doc string for
1158251881Speter   more.  BATON is of type `struct rep_write_baton'. */
1159251881Speterstatic svn_error_t *
1160251881Spetertxn_body_write_close_rep(void *baton, trail_t *trail)
1161251881Speter{
1162251881Speter  struct rep_write_baton *wb = baton;
1163251881Speter  representation_t *rep;
1164251881Speter
1165251881Speter  SVN_ERR(svn_fs_bdb__read_rep(&rep, wb->fs, wb->rep_key,
1166251881Speter                               trail, trail->pool));
1167251881Speter  rep->md5_checksum = svn_checksum_dup(wb->md5_checksum, trail->pool);
1168251881Speter  rep->sha1_checksum = svn_checksum_dup(wb->sha1_checksum, trail->pool);
1169251881Speter  return svn_fs_bdb__write_rep(wb->fs, wb->rep_key, rep,
1170251881Speter                               trail, trail->pool);
1171251881Speter}
1172251881Speter
1173251881Speter
1174251881Speter/* BATON is of type `struct rep_write_baton'.
1175251881Speter *
1176251881Speter * Finalize BATON->md5_context and store the resulting digest under
1177251881Speter * BATON->rep_key.
1178251881Speter */
1179251881Speterstatic svn_error_t *
1180251881Speterrep_write_close_contents(void *baton)
1181251881Speter{
1182251881Speter  struct rep_write_baton *wb = baton;
1183251881Speter
1184251881Speter  /* ### Thought: if we fixed apr-util MD5 contexts to allow repeated
1185251881Speter     digestification, then we wouldn't need a stream close function at
1186251881Speter     all -- instead, we could update the stored checksum each time a
1187251881Speter     write occurred, which would have the added advantage of making
1188251881Speter     interleaving reads and writes work.  Currently, they'd fail with
1189251881Speter     a checksum mismatch, it just happens that our code never tries to
1190251881Speter     do that anyway. */
1191251881Speter
1192251881Speter  if (! wb->finalized)
1193251881Speter    {
1194251881Speter      SVN_ERR(svn_checksum_final(&wb->md5_checksum, wb->md5_checksum_ctx,
1195251881Speter                                 wb->pool));
1196251881Speter      SVN_ERR(svn_checksum_final(&wb->sha1_checksum, wb->sha1_checksum_ctx,
1197251881Speter                                 wb->pool));
1198251881Speter      wb->finalized = TRUE;
1199251881Speter    }
1200251881Speter
1201251881Speter  /* If we got a trail, use it; else make one. */
1202251881Speter  if (wb->trail)
1203251881Speter    return txn_body_write_close_rep(wb, wb->trail);
1204251881Speter  else
1205251881Speter    /* We need to keep our trail pool around this time so the
1206251881Speter       checksums we've calculated survive. */
1207251881Speter    return svn_fs_base__retry_txn(wb->fs, txn_body_write_close_rep,
1208251881Speter                                  wb, FALSE, wb->pool);
1209251881Speter}
1210251881Speter
1211251881Speter
1212251881Speter/** Public read and write stream constructors. **/
1213251881Speter
1214251881Spetersvn_error_t *
1215251881Spetersvn_fs_base__rep_contents_read_stream(svn_stream_t **rs_p,
1216251881Speter                                      svn_fs_t *fs,
1217251881Speter                                      const char *rep_key,
1218251881Speter                                      svn_boolean_t use_trail_for_reads,
1219251881Speter                                      trail_t *trail,
1220251881Speter                                      apr_pool_t *pool)
1221251881Speter{
1222251881Speter  struct rep_read_baton *rb;
1223251881Speter
1224251881Speter  SVN_ERR(rep_read_get_baton(&rb, fs, rep_key, use_trail_for_reads,
1225251881Speter                             trail, pool));
1226251881Speter  *rs_p = svn_stream_create(rb, pool);
1227299742Sdim  svn_stream_set_read2(*rs_p, NULL /* only full read support */,
1228299742Sdim                       rep_read_contents);
1229251881Speter
1230251881Speter  return SVN_NO_ERROR;
1231251881Speter}
1232251881Speter
1233251881Speter
1234251881Speter/* Clear the contents of REP_KEY, so that it represents the empty
1235251881Speter   string, as part of TRAIL.  TXN_ID is the id of the Subversion
1236251881Speter   transaction under which this occurs.  If REP_KEY is not mutable,
1237251881Speter   return the error SVN_ERR_FS_REP_NOT_MUTABLE.  */
1238251881Speterstatic svn_error_t *
1239251881Speterrep_contents_clear(svn_fs_t *fs,
1240251881Speter                   const char *rep_key,
1241251881Speter                   const char *txn_id,
1242251881Speter                   trail_t *trail,
1243251881Speter                   apr_pool_t *pool)
1244251881Speter{
1245251881Speter  representation_t *rep;
1246251881Speter  const char *str_key;
1247251881Speter
1248251881Speter  SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
1249251881Speter
1250251881Speter  /* Make sure it's mutable. */
1251251881Speter  if (! rep_is_mutable(rep, txn_id))
1252251881Speter    return svn_error_createf
1253251881Speter      (SVN_ERR_FS_REP_NOT_MUTABLE, NULL,
1254251881Speter       _("Rep '%s' is not mutable"), rep_key);
1255251881Speter
1256251881Speter  SVN_ERR_ASSERT(rep->kind == rep_kind_fulltext);
1257251881Speter
1258251881Speter  /* If rep has no string, just return success.  Else, clear the
1259251881Speter     underlying string.  */
1260251881Speter  str_key = rep->contents.fulltext.string_key;
1261251881Speter  if (str_key && *str_key)
1262251881Speter    {
1263251881Speter      SVN_ERR(svn_fs_bdb__string_clear(fs, str_key, trail, pool));
1264251881Speter      rep->md5_checksum = NULL;
1265251881Speter      rep->sha1_checksum = NULL;
1266251881Speter      SVN_ERR(svn_fs_bdb__write_rep(fs, rep_key, rep, trail, pool));
1267251881Speter    }
1268251881Speter  return SVN_NO_ERROR;
1269251881Speter}
1270251881Speter
1271251881Speter
1272251881Spetersvn_error_t *
1273251881Spetersvn_fs_base__rep_contents_write_stream(svn_stream_t **ws_p,
1274251881Speter                                       svn_fs_t *fs,
1275251881Speter                                       const char *rep_key,
1276251881Speter                                       const char *txn_id,
1277251881Speter                                       svn_boolean_t use_trail_for_writes,
1278251881Speter                                       trail_t *trail,
1279251881Speter                                       apr_pool_t *pool)
1280251881Speter{
1281251881Speter  struct rep_write_baton *wb;
1282251881Speter
1283251881Speter  /* Clear the current rep contents (free mutability check!). */
1284251881Speter  SVN_ERR(rep_contents_clear(fs, rep_key, txn_id, trail, pool));
1285251881Speter
1286251881Speter  /* Now, generate the write baton and stream. */
1287251881Speter  wb = rep_write_get_baton(fs, rep_key, txn_id,
1288251881Speter                           use_trail_for_writes ? trail : NULL, pool);
1289251881Speter  *ws_p = svn_stream_create(wb, pool);
1290251881Speter  svn_stream_set_write(*ws_p, rep_write_contents);
1291251881Speter  svn_stream_set_close(*ws_p, rep_write_close_contents);
1292251881Speter
1293251881Speter  return SVN_NO_ERROR;
1294251881Speter}
1295251881Speter
1296251881Speter
1297251881Speter
1298251881Speter/*** Deltified storage. ***/
1299251881Speter
1300251881Speter/* Baton for svn_write_fn_t write_string_set(). */
1301251881Speterstruct write_svndiff_strings_baton
1302251881Speter{
1303251881Speter  /* The fs where lives the string we're writing. */
1304251881Speter  svn_fs_t *fs;
1305251881Speter
1306251881Speter  /* The key of the string we're writing to.  Typically this is
1307251881Speter     initialized to NULL, so svn_fs_base__string_append() can fill in a
1308251881Speter     value. */
1309251881Speter  const char *key;
1310251881Speter
1311251881Speter  /* The amount of txdelta data written to the current
1312251881Speter     string-in-progress. */
1313251881Speter  apr_size_t size;
1314251881Speter
1315251881Speter  /* The amount of svndiff header information we've written thus far
1316251881Speter     to the strings table. */
1317251881Speter  apr_size_t header_read;
1318251881Speter
1319251881Speter  /* The version number of the svndiff data written.  ### You'd better
1320251881Speter     not count on this being populated after the first chunk is sent
1321251881Speter     through the interface, since it lives at the 4th byte of the
1322251881Speter     stream. */
1323251881Speter  apr_byte_t version;
1324251881Speter
1325251881Speter  /* The trail we're writing in. */
1326251881Speter  trail_t *trail;
1327251881Speter
1328251881Speter};
1329251881Speter
1330251881Speter
1331251881Speter/* Function of type `svn_write_fn_t', for writing to a collection of
1332251881Speter   strings; BATON is `struct write_svndiff_strings_baton *'.
1333251881Speter
1334251881Speter   On the first call, BATON->key is null.  A new string key in
1335251881Speter   BATON->fs is chosen and stored in BATON->key; each call appends
1336251881Speter   *LEN bytes from DATA onto the string.  *LEN is never changed; if
1337251881Speter   the write fails to write all *LEN bytes, an error is returned.
1338251881Speter   BATON->size is used to track the total amount of data written via
1339251881Speter   this handler, and must be reset by the caller to 0 when appropriate.  */
1340251881Speterstatic svn_error_t *
1341251881Speterwrite_svndiff_strings(void *baton, const char *data, apr_size_t *len)
1342251881Speter{
1343251881Speter  struct write_svndiff_strings_baton *wb = baton;
1344251881Speter  const char *buf = data;
1345251881Speter  apr_size_t nheader = 0;
1346251881Speter
1347251881Speter  /* If we haven't stripped all the header information from this
1348251881Speter     stream yet, keep stripping.  If someone sends a first window
1349251881Speter     through here that's shorter than 4 bytes long, this will probably
1350251881Speter     cause a nuclear reactor meltdown somewhere in the American
1351251881Speter     midwest.  */
1352251881Speter  if (wb->header_read < 4)
1353251881Speter    {
1354251881Speter      nheader = 4 - wb->header_read;
1355251881Speter      *len -= nheader;
1356251881Speter      buf += nheader;
1357251881Speter      wb->header_read += nheader;
1358251881Speter
1359251881Speter      /* If we have *now* read the full 4-byte header, check that
1360251881Speter         least byte for the version number of the svndiff format. */
1361251881Speter      if (wb->header_read == 4)
1362251881Speter        wb->version = *(buf - 1);
1363251881Speter    }
1364251881Speter
1365251881Speter  /* Append to the current string we're writing (or create a new one
1366251881Speter     if WB->key is NULL). */
1367251881Speter  SVN_ERR(svn_fs_bdb__string_append(wb->fs, &(wb->key), *len,
1368251881Speter                                    buf, wb->trail, wb->trail->pool));
1369251881Speter
1370251881Speter  /* Make sure we (still) have a key. */
1371251881Speter  if (wb->key == NULL)
1372251881Speter    return svn_error_create(SVN_ERR_FS_GENERAL, NULL,
1373251881Speter                            _("Failed to get new string key"));
1374251881Speter
1375251881Speter  /* Restore *LEN to the value it *would* have been were it not for
1376251881Speter     header stripping. */
1377251881Speter  *len += nheader;
1378251881Speter
1379251881Speter  /* Increment our running total of bytes written to this string. */
1380251881Speter  wb->size += *len;
1381251881Speter
1382251881Speter  return SVN_NO_ERROR;
1383251881Speter}
1384251881Speter
1385251881Speter
1386251881Spetertypedef struct window_write_t
1387251881Speter{
1388251881Speter  const char *key; /* string key for this window */
1389251881Speter  apr_size_t svndiff_len; /* amount of svndiff data written to the string */
1390251881Speter  svn_filesize_t text_off; /* offset of fulltext represented by this window */
1391251881Speter  apr_size_t text_len; /* amount of fulltext data represented by this window */
1392251881Speter
1393251881Speter} window_write_t;
1394251881Speter
1395251881Speter
1396251881Spetersvn_error_t *
1397251881Spetersvn_fs_base__rep_deltify(svn_fs_t *fs,
1398251881Speter                         const char *target,
1399251881Speter                         const char *source,
1400251881Speter                         trail_t *trail,
1401251881Speter                         apr_pool_t *pool)
1402251881Speter{
1403251881Speter  base_fs_data_t *bfd = fs->fsap_data;
1404251881Speter  svn_stream_t *source_stream; /* stream to read the source */
1405251881Speter  svn_stream_t *target_stream; /* stream to read the target */
1406251881Speter  svn_txdelta_stream_t *txdelta_stream; /* stream to read delta windows  */
1407251881Speter
1408251881Speter  /* window-y things, and an array to track them */
1409251881Speter  window_write_t *ww;
1410251881Speter  apr_array_header_t *windows;
1411251881Speter
1412251881Speter  /* stream to write new (deltified) target data and its baton */
1413251881Speter  svn_stream_t *new_target_stream;
1414251881Speter  struct write_svndiff_strings_baton new_target_baton;
1415251881Speter
1416251881Speter  /* window handler/baton for writing to above stream */
1417251881Speter  svn_txdelta_window_handler_t new_target_handler;
1418251881Speter  void *new_target_handler_baton;
1419251881Speter
1420251881Speter  /* yes, we do windows */
1421251881Speter  svn_txdelta_window_t *window;
1422251881Speter
1423251881Speter  /* The current offset into the fulltext that our window is about to
1424251881Speter     write.  This doubles, after all windows are written, as the
1425251881Speter     total size of the svndiff data for the deltification process. */
1426251881Speter  svn_filesize_t tview_off = 0;
1427251881Speter
1428251881Speter  /* The total amount of diff data written while deltifying. */
1429251881Speter  svn_filesize_t diffsize = 0;
1430251881Speter
1431251881Speter  /* TARGET's original string keys */
1432251881Speter  apr_array_header_t *orig_str_keys;
1433251881Speter
1434251881Speter  /* The checksums for the representation's fulltext contents. */
1435251881Speter  svn_checksum_t *rep_md5_checksum;
1436251881Speter  svn_checksum_t *rep_sha1_checksum;
1437251881Speter
1438251881Speter  /* MD5 digest */
1439251881Speter  const unsigned char *digest;
1440251881Speter
1441251881Speter  /* pool for holding the windows */
1442251881Speter  apr_pool_t *wpool;
1443251881Speter
1444251881Speter  /* Paranoia: never allow a rep to be deltified against itself,
1445251881Speter     because then there would be no fulltext reachable in the delta
1446251881Speter     chain, and badness would ensue.  */
1447251881Speter  if (strcmp(target, source) == 0)
1448251881Speter    return svn_error_createf
1449251881Speter      (SVN_ERR_FS_CORRUPT, NULL,
1450251881Speter       _("Attempt to deltify '%s' against itself"),
1451251881Speter       target);
1452251881Speter
1453251881Speter  /* Set up a handler for the svndiff data, which will write each
1454251881Speter     window to its own string in the `strings' table. */
1455251881Speter  new_target_baton.fs = fs;
1456251881Speter  new_target_baton.trail = trail;
1457251881Speter  new_target_baton.header_read = FALSE;
1458251881Speter  new_target_stream = svn_stream_create(&new_target_baton, pool);
1459251881Speter  svn_stream_set_write(new_target_stream, write_svndiff_strings);
1460251881Speter
1461251881Speter  /* Get streams to our source and target text data. */
1462251881Speter  SVN_ERR(svn_fs_base__rep_contents_read_stream(&source_stream, fs, source,
1463251881Speter                                                TRUE, trail, pool));
1464251881Speter  SVN_ERR(svn_fs_base__rep_contents_read_stream(&target_stream, fs, target,
1465251881Speter                                                TRUE, trail, pool));
1466251881Speter
1467251881Speter  /* Setup a stream to convert the textdelta data into svndiff windows. */
1468251881Speter  svn_txdelta2(&txdelta_stream, source_stream, target_stream, TRUE, pool);
1469251881Speter
1470251881Speter  if (bfd->format >= SVN_FS_BASE__MIN_SVNDIFF1_FORMAT)
1471251881Speter    svn_txdelta_to_svndiff3(&new_target_handler, &new_target_handler_baton,
1472251881Speter                            new_target_stream, 1,
1473251881Speter                            SVN_DELTA_COMPRESSION_LEVEL_DEFAULT, pool);
1474251881Speter  else
1475251881Speter    svn_txdelta_to_svndiff3(&new_target_handler, &new_target_handler_baton,
1476251881Speter                            new_target_stream, 0,
1477251881Speter                            SVN_DELTA_COMPRESSION_LEVEL_DEFAULT, pool);
1478251881Speter
1479251881Speter  /* subpool for the windows */
1480251881Speter  wpool = svn_pool_create(pool);
1481251881Speter
1482251881Speter  /* Now, loop, manufacturing and dispatching windows of svndiff data. */
1483251881Speter  windows = apr_array_make(pool, 1, sizeof(ww));
1484251881Speter  do
1485251881Speter    {
1486251881Speter      /* Reset some baton variables. */
1487251881Speter      new_target_baton.size = 0;
1488251881Speter      new_target_baton.key = NULL;
1489251881Speter
1490251881Speter      /* Free the window. */
1491251881Speter      svn_pool_clear(wpool);
1492251881Speter
1493251881Speter      /* Fetch the next window of txdelta data. */
1494251881Speter      SVN_ERR(svn_txdelta_next_window(&window, txdelta_stream, wpool));
1495251881Speter
1496251881Speter      /* Send off this package to be written as svndiff data. */
1497251881Speter      SVN_ERR(new_target_handler(window, new_target_handler_baton));
1498251881Speter      if (window)
1499251881Speter        {
1500251881Speter          /* Add a new window description to our array. */
1501251881Speter          ww = apr_pcalloc(pool, sizeof(*ww));
1502251881Speter          ww->key = new_target_baton.key;
1503251881Speter          ww->svndiff_len = new_target_baton.size;
1504251881Speter          ww->text_off = tview_off;
1505251881Speter          ww->text_len = window->tview_len;
1506251881Speter          APR_ARRAY_PUSH(windows, window_write_t *) = ww;
1507251881Speter
1508251881Speter          /* Update our recordkeeping variables. */
1509251881Speter          tview_off += window->tview_len;
1510251881Speter          diffsize += ww->svndiff_len;
1511251881Speter        }
1512251881Speter
1513251881Speter    } while (window);
1514251881Speter
1515251881Speter  svn_pool_destroy(wpool);
1516251881Speter
1517251881Speter  /* Having processed all the windows, we can query the MD5 digest
1518251881Speter     from the stream.  */
1519251881Speter  digest = svn_txdelta_md5_digest(txdelta_stream);
1520251881Speter  if (! digest)
1521251881Speter    return svn_error_createf
1522251881Speter      (SVN_ERR_DELTA_MD5_CHECKSUM_ABSENT, NULL,
1523251881Speter       _("Failed to calculate MD5 digest for '%s'"),
1524251881Speter       source);
1525251881Speter
1526251881Speter  /* Construct a list of the strings used by the old representation so
1527251881Speter     that we can delete them later.  While we are here, if the old
1528251881Speter     representation was a fulltext, check to make sure the delta we're
1529251881Speter     replacing it with is actually smaller.  (Don't perform this check
1530251881Speter     if we're replacing a delta; in that case, we're going for a time
1531251881Speter     optimization, not a space optimization.)  */
1532251881Speter  {
1533251881Speter    representation_t *old_rep;
1534251881Speter    const char *str_key;
1535251881Speter
1536251881Speter    SVN_ERR(svn_fs_bdb__read_rep(&old_rep, fs, target, trail, pool));
1537251881Speter    if (old_rep->kind == rep_kind_fulltext)
1538251881Speter      {
1539251881Speter        svn_filesize_t old_size = 0;
1540251881Speter
1541251881Speter        str_key = old_rep->contents.fulltext.string_key;
1542251881Speter        SVN_ERR(svn_fs_bdb__string_size(&old_size, fs, str_key,
1543251881Speter                                        trail, pool));
1544251881Speter        orig_str_keys = apr_array_make(pool, 1, sizeof(str_key));
1545251881Speter        APR_ARRAY_PUSH(orig_str_keys, const char *) = str_key;
1546251881Speter
1547251881Speter        /* If the new data is NOT an space optimization, destroy the
1548251881Speter           string(s) we created, and get outta here. */
1549251881Speter        if (diffsize >= old_size)
1550251881Speter          {
1551251881Speter            int i;
1552251881Speter            for (i = 0; i < windows->nelts; i++)
1553251881Speter              {
1554251881Speter                ww = APR_ARRAY_IDX(windows, i, window_write_t *);
1555251881Speter                SVN_ERR(svn_fs_bdb__string_delete(fs, ww->key, trail, pool));
1556251881Speter              }
1557251881Speter            return SVN_NO_ERROR;
1558251881Speter          }
1559251881Speter      }
1560251881Speter    else if (old_rep->kind == rep_kind_delta)
1561251881Speter      SVN_ERR(delta_string_keys(&orig_str_keys, old_rep, pool));
1562251881Speter    else /* unknown kind */
1563251881Speter      return UNKNOWN_NODE_KIND(target);
1564251881Speter
1565251881Speter    /* Save the checksums, since the new rep needs them. */
1566251881Speter    rep_md5_checksum = svn_checksum_dup(old_rep->md5_checksum, pool);
1567251881Speter    rep_sha1_checksum = svn_checksum_dup(old_rep->sha1_checksum, pool);
1568251881Speter  }
1569251881Speter
1570251881Speter  /* Hook the new strings we wrote into the rest of the filesystem by
1571251881Speter     building a new representation to replace our old one. */
1572251881Speter  {
1573251881Speter    representation_t new_rep;
1574251881Speter    rep_delta_chunk_t *chunk;
1575251881Speter    apr_array_header_t *chunks;
1576251881Speter    int i;
1577251881Speter
1578251881Speter    new_rep.kind = rep_kind_delta;
1579251881Speter    new_rep.txn_id = NULL;
1580251881Speter
1581251881Speter    /* Migrate the old rep's checksums to the new rep. */
1582251881Speter    new_rep.md5_checksum = svn_checksum_dup(rep_md5_checksum, pool);
1583251881Speter    new_rep.sha1_checksum = svn_checksum_dup(rep_sha1_checksum, pool);
1584251881Speter
1585251881Speter    chunks = apr_array_make(pool, windows->nelts, sizeof(chunk));
1586251881Speter
1587251881Speter    /* Loop through the windows we wrote, creating and adding new
1588251881Speter       chunks to the representation. */
1589251881Speter    for (i = 0; i < windows->nelts; i++)
1590251881Speter      {
1591251881Speter        ww = APR_ARRAY_IDX(windows, i, window_write_t *);
1592251881Speter
1593251881Speter        /* Allocate a chunk and its window */
1594251881Speter        chunk = apr_palloc(pool, sizeof(*chunk));
1595251881Speter        chunk->offset = ww->text_off;
1596251881Speter
1597251881Speter        /* Populate the window */
1598251881Speter        chunk->version = new_target_baton.version;
1599251881Speter        chunk->string_key = ww->key;
1600251881Speter        chunk->size = ww->text_len;
1601251881Speter        chunk->rep_key = source;
1602251881Speter
1603251881Speter        /* Add this chunk to the array. */
1604251881Speter        APR_ARRAY_PUSH(chunks, rep_delta_chunk_t *) = chunk;
1605251881Speter      }
1606251881Speter
1607251881Speter    /* Put the chunks array into the representation. */
1608251881Speter    new_rep.contents.delta.chunks = chunks;
1609251881Speter
1610251881Speter    /* Write out the new representation. */
1611251881Speter    SVN_ERR(svn_fs_bdb__write_rep(fs, target, &new_rep, trail, pool));
1612251881Speter
1613251881Speter    /* Delete the original pre-deltified strings. */
1614251881Speter    SVN_ERR(delete_strings(orig_str_keys, fs, trail, pool));
1615251881Speter  }
1616251881Speter
1617251881Speter  return SVN_NO_ERROR;
1618251881Speter}
1619