1/* cached_data.c --- cached (read) access to FSX data
2 *
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 */
22
23#include "cached_data.h"
24
25#include <assert.h>
26
27#include "svn_hash.h"
28#include "svn_ctype.h"
29#include "svn_sorts.h"
30
31#include "private/svn_io_private.h"
32#include "private/svn_sorts_private.h"
33#include "private/svn_subr_private.h"
34#include "private/svn_temp_serializer.h"
35
36#include "fs_x.h"
37#include "low_level.h"
38#include "util.h"
39#include "pack.h"
40#include "temp_serializer.h"
41#include "index.h"
42#include "changes.h"
43#include "noderevs.h"
44#include "reps.h"
45
46#include "../libsvn_fs/fs-loader.h"
47#include "../libsvn_delta/delta.h"  /* for SVN_DELTA_WINDOW_SIZE */
48
49#include "svn_private_config.h"
50
51/* forward-declare. See implementation for the docstring */
52static svn_error_t *
53block_read(void **result,
54           svn_fs_t *fs,
55           const svn_fs_x__id_t *id,
56           svn_fs_x__revision_file_t *revision_file,
57           apr_pool_t *result_pool,
58           apr_pool_t *scratch_pool);
59
60
61/* Defined this to enable access logging via dgb__log_access
62#define SVN_FS_X__LOG_ACCESS
63*/
64
65/* When SVN_FS_X__LOG_ACCESS has been defined, write a line to console
66 * showing where ID is located in FS and use ITEM to show details on it's
67 * contents if not NULL.  Use SCRATCH_POOL for temporary allocations.
68 */
69static svn_error_t *
70dgb__log_access(svn_fs_t *fs,
71                const svn_fs_x__id_t *id,
72                void *item,
73                apr_uint32_t item_type,
74                apr_pool_t *scratch_pool)
75{
76  /* no-op if this macro is not defined */
77#ifdef SVN_FS_X__LOG_ACCESS
78  svn_fs_x__data_t *ffd = fs->fsap_data;
79  apr_off_t offset = -1;
80  apr_off_t end_offset = 0;
81  apr_uint32_t sub_item = 0;
82  svn_fs_x__p2l_entry_t *entry = NULL;
83  static const char *types[] = {"<n/a>", "frep ", "drep ", "fprop", "dprop",
84                                "node ", "chgs ", "rep  ", "c:", "n:", "r:"};
85  const char *description = "";
86  const char *type = types[item_type];
87  const char *pack = "";
88  svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set);
89
90  /* determine rev / pack file offset */
91  SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, id, scratch_pool));
92
93  /* constructing the pack file description */
94  if (revision < ffd->min_unpacked_rev)
95    pack = apr_psprintf(scratch_pool, "%4ld|",
96                        revision / ffd->max_files_per_dir);
97
98  /* construct description if possible */
99  if (item_type == SVN_FS_X__ITEM_TYPE_NODEREV && item != NULL)
100    {
101      svn_fs_x__noderev_t *node = item;
102      const char *data_rep
103        = node->data_rep
104        ? apr_psprintf(scratch_pool, " d=%ld/%" APR_UINT64_T_FMT,
105                       svn_fs_x__get_revnum(node->data_rep->id.change_set),
106                       node->data_rep->id.number)
107        : "";
108      const char *prop_rep
109        = node->prop_rep
110        ? apr_psprintf(scratch_pool, " p=%ld/%" APR_UINT64_T_FMT,
111                       svn_fs_x__get_revnum(node->prop_rep->id.change_set),
112                       node->prop_rep->id.number)
113        : "";
114      description = apr_psprintf(scratch_pool, "%s   (pc=%d%s%s)",
115                                 node->created_path,
116                                 node->predecessor_count,
117                                 data_rep,
118                                 prop_rep);
119    }
120  else if (item_type == SVN_FS_X__ITEM_TYPE_ANY_REP)
121    {
122      svn_fs_x__rep_header_t *header = item;
123      if (header == NULL)
124        description = "  (txdelta window)";
125      else if (header->type == svn_fs_x__rep_self_delta)
126        description = "  DELTA";
127      else
128        description = apr_psprintf(scratch_pool,
129                                   "  DELTA against %ld/%" APR_UINT64_T_FMT,
130                                   header->base_revision,
131                                   header->base_item_index);
132    }
133  else if (item_type == SVN_FS_X__ITEM_TYPE_CHANGES && item != NULL)
134    {
135      apr_array_header_t *changes = item;
136      switch (changes->nelts)
137        {
138          case 0:  description = "  no change";
139                   break;
140          case 1:  description = "  1 change";
141                   break;
142          default: description = apr_psprintf(scratch_pool, "  %d changes",
143                                              changes->nelts);
144        }
145    }
146
147  /* reverse index lookup: get item description in ENTRY */
148  SVN_ERR(svn_fs_x__p2l_entry_lookup(&entry, fs, revision, offset,
149                                      scratch_pool));
150  if (entry)
151    {
152      /* more details */
153      end_offset = offset + entry->size;
154      type = types[entry->type];
155
156      /* merge the sub-item number with the container type */
157      if (   entry->type == SVN_FS_X__ITEM_TYPE_CHANGES_CONT
158          || entry->type == SVN_FS_X__ITEM_TYPE_NODEREVS_CONT
159          || entry->type == SVN_FS_X__ITEM_TYPE_REPS_CONT)
160        type = apr_psprintf(scratch_pool, "%s%-3d", type, sub_item);
161    }
162
163  /* line output */
164  printf("%5s%4lx:%04lx -%4lx:%04lx %s %7ld %5"APR_UINT64_T_FMT"   %s\n",
165          pack, (long)(offset / ffd->block_size),
166          (long)(offset % ffd->block_size),
167          (long)(end_offset / ffd->block_size),
168          (long)(end_offset % ffd->block_size),
169          type, revision, id->number, description);
170
171#endif
172
173  return SVN_NO_ERROR;
174}
175
176/* Convenience wrapper around svn_io_file_aligned_seek, taking filesystem
177   FS instead of a block size. */
178static svn_error_t *
179aligned_seek(svn_fs_t *fs,
180             apr_file_t *file,
181             apr_off_t *buffer_start,
182             apr_off_t offset,
183             apr_pool_t *scratch_pool)
184{
185  svn_fs_x__data_t *ffd = fs->fsap_data;
186  return svn_error_trace(svn_io_file_aligned_seek(file, ffd->block_size,
187                                                  buffer_start, offset,
188                                                  scratch_pool));
189}
190
191/* Open the revision file for the item given by ID in filesystem FS and
192   store the newly opened file in FILE.  Seek to the item's location before
193   returning.
194
195   Allocate the result in RESULT_POOL and temporaries in SCRATCH_POOL. */
196static svn_error_t *
197open_and_seek_revision(svn_fs_x__revision_file_t **file,
198                       svn_fs_t *fs,
199                       const svn_fs_x__id_t *id,
200                       apr_pool_t *result_pool,
201                       apr_pool_t *scratch_pool)
202{
203  svn_fs_x__revision_file_t *rev_file;
204  apr_off_t offset = -1;
205  apr_uint32_t sub_item = 0;
206  svn_revnum_t rev = svn_fs_x__get_revnum(id->change_set);
207
208  SVN_ERR(svn_fs_x__ensure_revision_exists(rev, fs, scratch_pool));
209
210  SVN_ERR(svn_fs_x__open_pack_or_rev_file(&rev_file, fs, rev, result_pool,
211                                          scratch_pool));
212  SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, rev_file, id,
213                                scratch_pool));
214  SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, scratch_pool));
215
216  *file = rev_file;
217
218  return SVN_NO_ERROR;
219}
220
221/* Open the representation REP for a node-revision in filesystem FS, seek
222   to its position and store the newly opened file in FILE.
223
224   Allocate the result in RESULT_POOL and temporaries in SCRATCH_POOL. */
225static svn_error_t *
226open_and_seek_transaction(svn_fs_x__revision_file_t **file,
227                          svn_fs_t *fs,
228                          svn_fs_x__representation_t *rep,
229                          apr_pool_t *result_pool,
230                          apr_pool_t *scratch_pool)
231{
232  apr_off_t offset;
233  apr_uint32_t sub_item = 0;
234  apr_int64_t txn_id = svn_fs_x__get_txn_id(rep->id.change_set);
235
236  SVN_ERR(svn_fs_x__open_proto_rev_file(file, fs, txn_id, result_pool,
237                                        scratch_pool));
238
239  SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, *file, &rep->id,
240                                scratch_pool));
241  SVN_ERR(aligned_seek(fs, (*file)->file, NULL, offset, scratch_pool));
242
243  return SVN_NO_ERROR;
244}
245
246/* Given a node-id ID, and a representation REP in filesystem FS, open
247   the correct file and seek to the correction location.  Store this
248   file in *FILE_P.
249
250   Allocate the result in RESULT_POOL and temporaries in SCRATCH_POOL. */
251static svn_error_t *
252open_and_seek_representation(svn_fs_x__revision_file_t **file_p,
253                             svn_fs_t *fs,
254                             svn_fs_x__representation_t *rep,
255                             apr_pool_t *result_pool,
256                             apr_pool_t *scratch_pool)
257{
258  if (svn_fs_x__is_revision(rep->id.change_set))
259    return open_and_seek_revision(file_p, fs, &rep->id, result_pool,
260                                  scratch_pool);
261  else
262    return open_and_seek_transaction(file_p, fs, rep, result_pool,
263                                     scratch_pool);
264}
265
266
267
268static svn_error_t *
269err_dangling_id(svn_fs_t *fs,
270                const svn_fs_x__id_t *id)
271{
272  svn_string_t *id_str = svn_fs_x__id_unparse(id, fs->pool);
273  return svn_error_createf
274    (SVN_ERR_FS_ID_NOT_FOUND, 0,
275     _("Reference to non-existent node '%s' in filesystem '%s'"),
276     id_str->data, fs->path);
277}
278
279/* Get the node-revision for the node ID in FS.
280   Set *NODEREV_P to the new node-revision structure, allocated in POOL.
281   See svn_fs_x__get_node_revision, which wraps this and adds another
282   error. */
283static svn_error_t *
284get_node_revision_body(svn_fs_x__noderev_t **noderev_p,
285                       svn_fs_t *fs,
286                       const svn_fs_x__id_t *id,
287                       apr_pool_t *result_pool,
288                       apr_pool_t *scratch_pool)
289{
290  svn_error_t *err;
291  svn_boolean_t is_cached = FALSE;
292  svn_fs_x__data_t *ffd = fs->fsap_data;
293
294  if (svn_fs_x__is_txn(id->change_set))
295    {
296      apr_file_t *file;
297
298      /* This is a transaction node-rev.  Its storage logic is very
299         different from that of rev / pack files. */
300      err = svn_io_file_open(&file,
301                             svn_fs_x__path_txn_node_rev(fs, id,
302                                                         scratch_pool,
303                                                         scratch_pool),
304                             APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
305                             scratch_pool);
306      if (err)
307        {
308          if (APR_STATUS_IS_ENOENT(err->apr_err))
309            {
310              svn_error_clear(err);
311              return svn_error_trace(err_dangling_id(fs, id));
312            }
313
314          return svn_error_trace(err);
315        }
316
317      SVN_ERR(svn_fs_x__read_noderev(noderev_p,
318                                     svn_stream_from_aprfile2(file,
319                                                              FALSE,
320                                                              scratch_pool),
321                                     result_pool, scratch_pool));
322    }
323  else
324    {
325      svn_fs_x__revision_file_t *revision_file;
326
327      /* noderevs in rev / pack files can be cached */
328      svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set);
329      svn_fs_x__pair_cache_key_t key;
330
331      SVN_ERR(svn_fs_x__open_pack_or_rev_file(&revision_file, fs, revision,
332                                              scratch_pool, scratch_pool));
333
334      /* First, try a noderevs container cache lookup. */
335      if (   svn_fs_x__is_packed_rev(fs, revision)
336          && ffd->noderevs_container_cache)
337        {
338          apr_off_t offset;
339          apr_uint32_t sub_item;
340          SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, revision_file,
341                                        id, scratch_pool));
342          key.revision = svn_fs_x__packed_base_rev(fs, revision);
343          key.second = offset;
344
345          SVN_ERR(svn_cache__get_partial((void **)noderev_p, &is_cached,
346                                         ffd->noderevs_container_cache, &key,
347                                         svn_fs_x__noderevs_get_func,
348                                         &sub_item, result_pool));
349          if (is_cached)
350            return SVN_NO_ERROR;
351        }
352
353      key.revision = revision;
354      key.second = id->number;
355
356      /* Not found or not applicable. Try a noderev cache lookup.
357       * If that succeeds, we are done here. */
358      if (ffd->node_revision_cache)
359        {
360          SVN_ERR(svn_cache__get((void **) noderev_p,
361                                 &is_cached,
362                                 ffd->node_revision_cache,
363                                 &key,
364                                 result_pool));
365          if (is_cached)
366            return SVN_NO_ERROR;
367        }
368
369      /* block-read will parse the whole block and will also return
370         the one noderev that we need right now. */
371      SVN_ERR(block_read((void **)noderev_p, fs,
372                         id,
373                         revision_file,
374                         result_pool,
375                         scratch_pool));
376      SVN_ERR(svn_fs_x__close_revision_file(revision_file));
377    }
378
379  return SVN_NO_ERROR;
380}
381
382svn_error_t *
383svn_fs_x__get_node_revision(svn_fs_x__noderev_t **noderev_p,
384                            svn_fs_t *fs,
385                            const svn_fs_x__id_t *id,
386                            apr_pool_t *result_pool,
387                            apr_pool_t *scratch_pool)
388{
389  svn_error_t *err = get_node_revision_body(noderev_p, fs, id,
390                                            result_pool, scratch_pool);
391  if (err && err->apr_err == SVN_ERR_FS_CORRUPT)
392    {
393      svn_string_t *id_string = svn_fs_x__id_unparse(id, scratch_pool);
394      return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
395                               "Corrupt node-revision '%s'",
396                               id_string->data);
397    }
398
399  SVN_ERR(dgb__log_access(fs, id, *noderev_p,
400                          SVN_FS_X__ITEM_TYPE_NODEREV, scratch_pool));
401
402  return svn_error_trace(err);
403}
404
405
406svn_error_t *
407svn_fs_x__get_mergeinfo_count(apr_int64_t *count,
408                              svn_fs_t *fs,
409                              const svn_fs_x__id_t *id,
410                              apr_pool_t *scratch_pool)
411{
412  svn_fs_x__noderev_t *noderev;
413
414  /* If we want a full acccess log, we need to provide full data and
415     cannot take shortcuts here. */
416#if !defined(SVN_FS_X__LOG_ACCESS)
417
418  /* First, try a noderevs container cache lookup. */
419  if (! svn_fs_x__is_txn(id->change_set))
420    {
421      /* noderevs in rev / pack files can be cached */
422      svn_fs_x__data_t *ffd = fs->fsap_data;
423      svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set);
424
425      svn_fs_x__revision_file_t *rev_file;
426      SVN_ERR(svn_fs_x__open_pack_or_rev_file(&rev_file, fs, revision,
427                                              scratch_pool, scratch_pool));
428
429      if (   svn_fs_x__is_packed_rev(fs, revision)
430          && ffd->noderevs_container_cache)
431        {
432          svn_fs_x__pair_cache_key_t key;
433          apr_off_t offset;
434          apr_uint32_t sub_item;
435          svn_boolean_t is_cached;
436
437          SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, rev_file,
438                                        id, scratch_pool));
439          key.revision = svn_fs_x__packed_base_rev(fs, revision);
440          key.second = offset;
441
442          SVN_ERR(svn_cache__get_partial((void **)count, &is_cached,
443                                         ffd->noderevs_container_cache, &key,
444                                         svn_fs_x__mergeinfo_count_get_func,
445                                         &sub_item, scratch_pool));
446          if (is_cached)
447            return SVN_NO_ERROR;
448        }
449    }
450#endif
451
452  /* fallback to the naive implementation handling all edge cases */
453  SVN_ERR(svn_fs_x__get_node_revision(&noderev, fs, id, scratch_pool,
454                                      scratch_pool));
455  *count = noderev->mergeinfo_count;
456
457  return SVN_NO_ERROR;
458}
459
460/* Describes a lazily opened rev / pack file.  Instances will be shared
461   between multiple instances of rep_state_t. */
462typedef struct shared_file_t
463{
464  /* The opened file. NULL while file is not open, yet. */
465  svn_fs_x__revision_file_t *rfile;
466
467  /* file system to open the file in */
468  svn_fs_t *fs;
469
470  /* a revision contained in the FILE.  Since this file may be shared,
471     that value may be different from REP_STATE_T->REVISION. */
472  svn_revnum_t revision;
473
474  /* pool to use when creating the FILE.  This guarantees that the file
475     remains open / valid beyond the respective local context that required
476     the file to be opened eventually. */
477  apr_pool_t *pool;
478} shared_file_t;
479
480/* Represents where in the current svndiff data block each
481   representation is. */
482typedef struct rep_state_t
483{
484                    /* shared lazy-open rev/pack file structure */
485  shared_file_t *sfile;
486                    /* The txdelta window cache to use or NULL. */
487  svn_cache__t *window_cache;
488                    /* Caches un-deltified windows. May be NULL. */
489  svn_cache__t *combined_cache;
490                    /* ID addressing the representation */
491  svn_fs_x__id_t rep_id;
492                    /* length of the header at the start of the rep.
493                       0 iff this is rep is stored in a container
494                       (i.e. does not have a header) */
495  apr_size_t header_size;
496  apr_off_t start;  /* The starting offset for the raw
497                       svndiff data minus header.
498                       -1 if the offset is yet unknown. */
499                    /* sub-item index in case the rep is containered */
500  apr_uint32_t sub_item;
501  apr_off_t current;/* The current offset relative to START. */
502  apr_off_t size;   /* The on-disk size of the representation. */
503  int ver;          /* If a delta, what svndiff version?
504                       -1 for unknown delta version. */
505  int chunk_index;  /* number of the window to read */
506} rep_state_t;
507
508/* Simple wrapper around svn_fs_x__get_file_offset to simplify callers. */
509static svn_error_t *
510get_file_offset(apr_off_t *offset,
511                rep_state_t *rs,
512                apr_pool_t *scratch_pool)
513{
514  return svn_error_trace(svn_fs_x__get_file_offset(offset,
515                                                   rs->sfile->rfile->file,
516                                                   scratch_pool));
517}
518
519/* Simple wrapper around svn_io_file_aligned_seek to simplify callers. */
520static svn_error_t *
521rs_aligned_seek(rep_state_t *rs,
522                apr_off_t *buffer_start,
523                apr_off_t offset,
524                apr_pool_t *scratch_pool)
525{
526  svn_fs_x__data_t *ffd = rs->sfile->fs->fsap_data;
527  return svn_error_trace(svn_io_file_aligned_seek(rs->sfile->rfile->file,
528                                                  ffd->block_size,
529                                                  buffer_start, offset,
530                                                  scratch_pool));
531}
532
533/* Open FILE->FILE and FILE->STREAM if they haven't been opened, yet. */
534static svn_error_t*
535auto_open_shared_file(shared_file_t *file)
536{
537  if (file->rfile == NULL)
538    SVN_ERR(svn_fs_x__open_pack_or_rev_file(&file->rfile, file->fs,
539                                            file->revision, file->pool,
540                                            file->pool));
541
542  return SVN_NO_ERROR;
543}
544
545/* Set RS->START to the begin of the representation raw in RS->SFILE->RFILE,
546   if that hasn't been done yet.  Use SCRATCH_POOL for temporary allocations.
547 */
548static svn_error_t*
549auto_set_start_offset(rep_state_t *rs,
550                      apr_pool_t *scratch_pool)
551{
552  if (rs->start == -1)
553    {
554      SVN_ERR(svn_fs_x__item_offset(&rs->start, &rs->sub_item,
555                                    rs->sfile->fs, rs->sfile->rfile,
556                                    &rs->rep_id, scratch_pool));
557      rs->start += rs->header_size;
558    }
559
560  return SVN_NO_ERROR;
561}
562
563/* Set RS->VER depending on what is found in the already open RS->FILE->FILE
564   if the diff version is still unknown.  Use SCRATCH_POOL for temporary
565   allocations.
566 */
567static svn_error_t*
568auto_read_diff_version(rep_state_t *rs,
569                       apr_pool_t *scratch_pool)
570{
571  if (rs->ver == -1)
572    {
573      char buf[4];
574      SVN_ERR(rs_aligned_seek(rs, NULL, rs->start, scratch_pool));
575      SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, buf,
576                                     sizeof(buf), NULL, NULL, scratch_pool));
577
578      /* ### Layering violation */
579      if (! ((buf[0] == 'S') && (buf[1] == 'V') && (buf[2] == 'N')))
580        return svn_error_create
581          (SVN_ERR_FS_CORRUPT, NULL,
582           _("Malformed svndiff data in representation"));
583      rs->ver = buf[3];
584
585      rs->chunk_index = 0;
586      rs->current = 4;
587    }
588
589  return SVN_NO_ERROR;
590}
591
592/* See create_rep_state, which wraps this and adds another error. */
593static svn_error_t *
594create_rep_state_body(rep_state_t **rep_state,
595                      svn_fs_x__rep_header_t **rep_header,
596                      shared_file_t **shared_file,
597                      svn_fs_x__representation_t *rep,
598                      svn_fs_t *fs,
599                      apr_pool_t *result_pool,
600                      apr_pool_t *scratch_pool)
601{
602  svn_fs_x__data_t *ffd = fs->fsap_data;
603  rep_state_t *rs = apr_pcalloc(result_pool, sizeof(*rs));
604  svn_fs_x__rep_header_t *rh;
605  svn_boolean_t is_cached = FALSE;
606  svn_revnum_t revision = svn_fs_x__get_revnum(rep->id.change_set);
607  apr_uint64_t estimated_window_storage;
608
609  /* If the hint is
610   * - given,
611   * - refers to a valid revision,
612   * - refers to a packed revision,
613   * - as does the rep we want to read, and
614   * - refers to the same pack file as the rep
615   * we can re-use the same, already open file object
616   */
617  svn_boolean_t reuse_shared_file
618    =    shared_file && *shared_file && (*shared_file)->rfile
619      && SVN_IS_VALID_REVNUM((*shared_file)->revision)
620      && (*shared_file)->revision < ffd->min_unpacked_rev
621      && revision < ffd->min_unpacked_rev
622      && (   ((*shared_file)->revision / ffd->max_files_per_dir)
623          == (revision / ffd->max_files_per_dir));
624
625  svn_fs_x__representation_cache_key_t key = { 0 };
626  key.revision = revision;
627  key.is_packed = revision < ffd->min_unpacked_rev;
628  key.item_index = rep->id.number;
629
630  /* continue constructing RS and RA */
631  rs->size = rep->size;
632  rs->rep_id = rep->id;
633  rs->ver = -1;
634  rs->start = -1;
635
636  /* Very long files stored as self-delta will produce a huge number of
637     delta windows.  Don't cache them lest we don't thrash the cache.
638     Since we don't know the depth of the delta chain, let's assume, the
639     whole contents get rewritten 3 times.
640   */
641  estimated_window_storage
642    = 4 * (  (rep->expanded_size ? rep->expanded_size : rep->size)
643           + SVN_DELTA_WINDOW_SIZE);
644  estimated_window_storage = MIN(estimated_window_storage, APR_SIZE_MAX);
645
646  rs->window_cache =    ffd->txdelta_window_cache
647                     && svn_cache__is_cachable(ffd->txdelta_window_cache,
648                                       (apr_size_t)estimated_window_storage)
649                   ? ffd->txdelta_window_cache
650                   : NULL;
651  rs->combined_cache =    ffd->combined_window_cache
652                       && svn_cache__is_cachable(ffd->combined_window_cache,
653                                       (apr_size_t)estimated_window_storage)
654                     ? ffd->combined_window_cache
655                     : NULL;
656
657  /* cache lookup, i.e. skip reading the rep header if possible */
658  if (ffd->rep_header_cache && SVN_IS_VALID_REVNUM(revision))
659    SVN_ERR(svn_cache__get((void **) &rh, &is_cached,
660                           ffd->rep_header_cache, &key, result_pool));
661
662  /* initialize the (shared) FILE member in RS */
663  if (reuse_shared_file)
664    {
665      rs->sfile = *shared_file;
666    }
667  else
668    {
669      shared_file_t *file = apr_pcalloc(result_pool, sizeof(*file));
670      file->revision = revision;
671      file->pool = result_pool;
672      file->fs = fs;
673      rs->sfile = file;
674
675      /* remember the current file, if suggested by the caller */
676      if (shared_file)
677        *shared_file = file;
678    }
679
680  /* read rep header, if necessary */
681  if (!is_cached)
682    {
683      /* we will need the on-disk location for non-txn reps */
684      apr_off_t offset;
685      svn_boolean_t in_container = TRUE;
686
687      /* ensure file is open and navigate to the start of rep header */
688      if (reuse_shared_file)
689        {
690          /* ... we can re-use the same, already open file object.
691           * This implies that we don't read from a txn.
692           */
693          rs->sfile = *shared_file;
694          SVN_ERR(auto_open_shared_file(rs->sfile));
695        }
696      else
697        {
698          /* otherwise, create a new file object.  May or may not be
699           * an in-txn file.
700           */
701          SVN_ERR(open_and_seek_representation(&rs->sfile->rfile, fs, rep,
702                                               result_pool, scratch_pool));
703        }
704
705      if (SVN_IS_VALID_REVNUM(revision))
706        {
707          apr_uint32_t sub_item;
708
709          SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs,
710                                        rs->sfile->rfile, &rep->id,
711                                        scratch_pool));
712
713          /* is rep stored in some star-deltified container? */
714          if (sub_item == 0)
715            {
716              svn_fs_x__p2l_entry_t *entry;
717              SVN_ERR(svn_fs_x__p2l_entry_lookup(&entry, fs, rs->sfile->rfile,
718                                                 revision, offset,
719                                                 scratch_pool, scratch_pool));
720              in_container = entry->type == SVN_FS_X__ITEM_TYPE_REPS_CONT;
721            }
722
723          if (in_container)
724            {
725              /* construct a container rep header */
726              *rep_header = apr_pcalloc(result_pool, sizeof(**rep_header));
727              (*rep_header)->type = svn_fs_x__rep_container;
728
729              /* exit to caller */
730              *rep_state = rs;
731              return SVN_NO_ERROR;
732            }
733
734          SVN_ERR(rs_aligned_seek(rs, NULL, offset, scratch_pool));
735        }
736
737      SVN_ERR(svn_fs_x__read_rep_header(&rh, rs->sfile->rfile->stream,
738                                        result_pool, scratch_pool));
739      SVN_ERR(get_file_offset(&rs->start, rs, result_pool));
740
741      /* populate the cache if appropriate */
742      if (SVN_IS_VALID_REVNUM(revision))
743        {
744          SVN_ERR(block_read(NULL, fs, &rs->rep_id, rs->sfile->rfile,
745                             result_pool, scratch_pool));
746          if (ffd->rep_header_cache)
747            SVN_ERR(svn_cache__set(ffd->rep_header_cache, &key, rh,
748                                   scratch_pool));
749        }
750    }
751
752  /* finalize */
753  SVN_ERR(dgb__log_access(fs, &rs->rep_id, rh, SVN_FS_X__ITEM_TYPE_ANY_REP,
754                          scratch_pool));
755
756  rs->header_size = rh->header_size;
757  *rep_state = rs;
758  *rep_header = rh;
759
760  rs->chunk_index = 0;
761
762  /* skip "SVNx" diff marker */
763  rs->current = 4;
764
765  return SVN_NO_ERROR;
766}
767
768/* Read the rep args for REP in filesystem FS and create a rep_state
769   for reading the representation.  Return the rep_state in *REP_STATE
770   and the rep args in *REP_ARGS, both allocated in POOL.
771
772   When reading multiple reps, i.e. a skip delta chain, you may provide
773   non-NULL SHARED_FILE.  (If SHARED_FILE is not NULL, in the first
774   call it should be a pointer to NULL.)  The function will use this
775   variable to store the previous call results and tries to re-use it.
776   This may result in significant savings in I/O for packed files and
777   number of open file handles.
778 */
779static svn_error_t *
780create_rep_state(rep_state_t **rep_state,
781                 svn_fs_x__rep_header_t **rep_header,
782                 shared_file_t **shared_file,
783                 svn_fs_x__representation_t *rep,
784                 svn_fs_t *fs,
785                 apr_pool_t *result_pool,
786                 apr_pool_t *scratch_pool)
787{
788  svn_error_t *err = create_rep_state_body(rep_state, rep_header,
789                                           shared_file, rep, fs,
790                                           result_pool, scratch_pool);
791  if (err && err->apr_err == SVN_ERR_FS_CORRUPT)
792    {
793      /* ### This always returns "-1" for transaction reps, because
794         ### this particular bit of code doesn't know if the rep is
795         ### stored in the protorev or in the mutable area (for props
796         ### or dir contents).  It is pretty rare for FSX to *read*
797         ### from the protorev file, though, so this is probably OK.
798         ### And anyone going to debug corruption errors is probably
799         ### going to jump straight to this comment anyway! */
800      return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
801                               "Corrupt representation '%s'",
802                               rep
803                               ? svn_fs_x__unparse_representation
804                                   (rep, TRUE, scratch_pool,
805                                    scratch_pool)->data
806                               : "(null)");
807    }
808  /* ### Call representation_string() ? */
809  return svn_error_trace(err);
810}
811
812svn_error_t *
813svn_fs_x__check_rep(svn_fs_x__representation_t *rep,
814                    svn_fs_t *fs,
815                    apr_pool_t *scratch_pool)
816{
817  apr_off_t offset;
818  apr_uint32_t sub_item;
819  svn_fs_x__p2l_entry_t *entry;
820  svn_revnum_t revision = svn_fs_x__get_revnum(rep->id.change_set);
821
822  svn_fs_x__revision_file_t *rev_file;
823  SVN_ERR(svn_fs_x__open_pack_or_rev_file(&rev_file, fs, revision,
824                                          scratch_pool, scratch_pool));
825
826  /* Does REP->ID refer to an actual item? Which one is it? */
827  SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, rev_file, &rep->id,
828                                scratch_pool));
829
830  /* What is the type of that item? */
831  SVN_ERR(svn_fs_x__p2l_entry_lookup(&entry, fs, rev_file, revision, offset,
832                                     scratch_pool, scratch_pool));
833
834  /* Verify that we've got an item that is actually a representation. */
835  if (   entry == NULL
836      || (   entry->type != SVN_FS_X__ITEM_TYPE_FILE_REP
837          && entry->type != SVN_FS_X__ITEM_TYPE_DIR_REP
838          && entry->type != SVN_FS_X__ITEM_TYPE_FILE_PROPS
839          && entry->type != SVN_FS_X__ITEM_TYPE_DIR_PROPS
840          && entry->type != SVN_FS_X__ITEM_TYPE_REPS_CONT))
841    return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
842                             _("No representation found at offset %s "
843                               "for item %s in revision %ld"),
844                             apr_off_t_toa(scratch_pool, offset),
845                             apr_psprintf(scratch_pool, "%" APR_UINT64_T_FMT,
846                                          rep->id.number),
847                             revision);
848
849  return SVN_NO_ERROR;
850}
851
852/* .
853   Do any allocations in POOL. */
854svn_error_t *
855svn_fs_x__rep_chain_length(int *chain_length,
856                           int *shard_count,
857                           svn_fs_x__representation_t *rep,
858                           svn_fs_t *fs,
859                           apr_pool_t *scratch_pool)
860{
861  svn_fs_x__data_t *ffd = fs->fsap_data;
862  svn_revnum_t shard_size = ffd->max_files_per_dir;
863  svn_boolean_t is_delta = FALSE;
864  int count = 0;
865  int shards = 1;
866  svn_revnum_t revision = svn_fs_x__get_revnum(rep->id.change_set);
867  svn_revnum_t last_shard = revision / shard_size;
868
869  /* Note that this iteration pool will be used in a non-standard way.
870   * To reuse open file handles between iterations (e.g. while within the
871   * same pack file), we only clear this pool once in a while instead of
872   * at the start of each iteration. */
873  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
874
875  /* Check whether the length of the deltification chain is acceptable.
876   * Otherwise, shared reps may form a non-skipping delta chain in
877   * extreme cases. */
878  svn_fs_x__representation_t base_rep = *rep;
879
880  /* re-use open files between iterations */
881  shared_file_t *file_hint = NULL;
882
883  svn_fs_x__rep_header_t *header;
884
885  /* follow the delta chain towards the end but for at most
886   * MAX_CHAIN_LENGTH steps. */
887  do
888    {
889      rep_state_t *rep_state;
890      revision = svn_fs_x__get_revnum(base_rep.id.change_set);
891      if (revision / shard_size != last_shard)
892        {
893          last_shard = revision / shard_size;
894          ++shards;
895        }
896
897      SVN_ERR(create_rep_state_body(&rep_state,
898                                    &header,
899                                    &file_hint,
900                                    &base_rep,
901                                    fs,
902                                    iterpool,
903                                    iterpool));
904
905      base_rep.id.change_set
906        = svn_fs_x__change_set_by_rev(header->base_revision);
907      base_rep.id.number = header->base_item_index;
908      base_rep.size = header->base_length;
909      is_delta = header->type == svn_fs_x__rep_delta;
910
911      /* Clear it the ITERPOOL once in a while.  Doing it too frequently
912       * renders the FILE_HINT ineffective.  Doing too infrequently, may
913       * leave us with too many open file handles.
914       *
915       * Note that this is mostly about efficiency, with larger values
916       * being more efficient, and any non-zero value is legal here.  When
917       * reading deltified contents, we may keep 10s of rev files open at
918       * the same time and the system has to cope with that.  Thus, the
919       * limit of 16 chosen below is in the same ballpark.
920       */
921      ++count;
922      if (count % 16 == 0)
923        {
924          file_hint = NULL;
925          svn_pool_clear(iterpool);
926        }
927    }
928  while (is_delta && base_rep.id.change_set);
929
930  *chain_length = count;
931  *shard_count = shards;
932  svn_pool_destroy(iterpool);
933
934  return SVN_NO_ERROR;
935}
936
937
938typedef struct rep_read_baton_t
939{
940  /* The FS from which we're reading. */
941  svn_fs_t *fs;
942
943  /* Representation to read. */
944  svn_fs_x__representation_t rep;
945
946  /* If not NULL, this is the base for the first delta window in rs_list */
947  svn_stringbuf_t *base_window;
948
949  /* The state of all prior delta representations. */
950  apr_array_header_t *rs_list;
951
952  /* The plaintext state, if there is a plaintext. */
953  rep_state_t *src_state;
954
955  /* The index of the current delta chunk, if we are reading a delta. */
956  int chunk_index;
957
958  /* The buffer where we store undeltified data. */
959  char *buf;
960  apr_size_t buf_pos;
961  apr_size_t buf_len;
962
963  /* A checksum context for summing the data read in order to verify it.
964     Note: we don't need to use the sha1 checksum because we're only doing
965     data verification, for which md5 is perfectly safe.  */
966  svn_checksum_ctx_t *md5_checksum_ctx;
967
968  svn_boolean_t checksum_finalized;
969
970  /* The stored checksum of the representation we are reading, its
971     length, and the amount we've read so far.  Some of this
972     information is redundant with rs_list and src_state, but it's
973     convenient for the checksumming code to have it here. */
974  unsigned char md5_digest[APR_MD5_DIGESTSIZE];
975
976  svn_filesize_t len;
977  svn_filesize_t off;
978
979  /* The key for the fulltext cache for this rep, if there is a
980     fulltext cache. */
981  svn_fs_x__pair_cache_key_t fulltext_cache_key;
982  /* The text we've been reading, if we're going to cache it. */
983  svn_stringbuf_t *current_fulltext;
984
985  /* If not NULL, attempt to read the data from this cache.
986     Once that lookup fails, reset it to NULL. */
987  svn_cache__t *fulltext_cache;
988
989  /* Bytes delivered from the FULLTEXT_CACHE so far.  If the next
990     lookup fails, we need to skip that much data from the reconstructed
991     window stream before we continue normal operation. */
992  svn_filesize_t fulltext_delivered;
993
994  /* Used for temporary allocations during the read. */
995  apr_pool_t *scratch_pool;
996
997  /* Pool used to store file handles and other data that is persistant
998     for the entire stream read. */
999  apr_pool_t *filehandle_pool;
1000} rep_read_baton_t;
1001
1002/* Set window key in *KEY to address the window described by RS.
1003   For convenience, return the KEY. */
1004static svn_fs_x__window_cache_key_t *
1005get_window_key(svn_fs_x__window_cache_key_t *key,
1006               rep_state_t *rs)
1007{
1008  svn_revnum_t revision = svn_fs_x__get_revnum(rs->rep_id.change_set);
1009  assert(revision <= APR_UINT32_MAX);
1010
1011  key->revision = (apr_uint32_t)revision;
1012  key->item_index = rs->rep_id.number;
1013  key->chunk_index = rs->chunk_index;
1014
1015  return key;
1016}
1017
1018/* Read the WINDOW_P number CHUNK_INDEX for the representation given in
1019 * rep state RS from the current FSX session's cache.  This will be a
1020 * no-op and IS_CACHED will be set to FALSE if no cache has been given.
1021 * If a cache is available IS_CACHED will inform the caller about the
1022 * success of the lookup. Allocations (of the window in particualar) will
1023 * be made from POOL.
1024 *
1025 * If the information could be found, put RS to CHUNK_INDEX.
1026 */
1027
1028/* Return data type for get_cached_window_sizes_func.
1029 */
1030typedef struct window_sizes_t
1031{
1032  /* length of the txdelta window in its on-disk format */
1033  svn_filesize_t packed_len;
1034
1035  /* expanded (and combined) window length */
1036  svn_filesize_t target_len;
1037} window_sizes_t;
1038
1039/* Implements svn_cache__partial_getter_func_t extracting the packed
1040 * and expanded window sizes from a cached window and return the size
1041 * info as a window_sizes_t* in *OUT.
1042 */
1043static svn_error_t *
1044get_cached_window_sizes_func(void **out,
1045                             const void *data,
1046                             apr_size_t data_len,
1047                             void *baton,
1048                             apr_pool_t *pool)
1049{
1050  const svn_fs_x__txdelta_cached_window_t *window = data;
1051  const svn_txdelta_window_t *txdelta_window
1052    = svn_temp_deserializer__ptr(window, (const void **)&window->window);
1053
1054  window_sizes_t *result = apr_palloc(pool, sizeof(*result));
1055  result->packed_len = window->end_offset - window->start_offset;
1056  result->target_len = txdelta_window->tview_len;
1057
1058  *out = result;
1059
1060  return SVN_NO_ERROR;
1061}
1062
1063/* Read the WINDOW_P number CHUNK_INDEX for the representation given in
1064 * rep state RS from the current FSFS session's cache.  This will be a
1065 * no-op and IS_CACHED will be set to FALSE if no cache has been given.
1066 * If a cache is available IS_CACHED will inform the caller about the
1067 * success of the lookup. Allocations of the window in will be made
1068 * from RESULT_POOL. Use SCRATCH_POOL for temporary allocations.
1069 *
1070 * If the information could be found, put RS to CHUNK_INDEX.
1071 */
1072static svn_error_t *
1073get_cached_window_sizes(window_sizes_t **sizes,
1074                        rep_state_t *rs,
1075                        svn_boolean_t *is_cached,
1076                        apr_pool_t *pool)
1077{
1078  if (! rs->window_cache)
1079    {
1080      /* txdelta window has not been enabled */
1081      *is_cached = FALSE;
1082    }
1083  else
1084    {
1085      svn_fs_x__window_cache_key_t key = { 0 };
1086      SVN_ERR(svn_cache__get_partial((void **)sizes,
1087                                     is_cached,
1088                                     rs->window_cache,
1089                                     get_window_key(&key, rs),
1090                                     get_cached_window_sizes_func,
1091                                     NULL,
1092                                     pool));
1093    }
1094
1095  return SVN_NO_ERROR;
1096}
1097
1098static svn_error_t *
1099get_cached_window(svn_txdelta_window_t **window_p,
1100                  rep_state_t *rs,
1101                  int chunk_index,
1102                  svn_boolean_t *is_cached,
1103                  apr_pool_t *result_pool,
1104                  apr_pool_t *scratch_pool)
1105{
1106  if (! rs->window_cache)
1107    {
1108      /* txdelta window has not been enabled */
1109      *is_cached = FALSE;
1110    }
1111  else
1112    {
1113      /* ask the cache for the desired txdelta window */
1114      svn_fs_x__txdelta_cached_window_t *cached_window;
1115      svn_fs_x__window_cache_key_t key = { 0 };
1116      get_window_key(&key, rs);
1117      key.chunk_index = chunk_index;
1118      SVN_ERR(svn_cache__get((void **) &cached_window,
1119                             is_cached,
1120                             rs->window_cache,
1121                             &key,
1122                             result_pool));
1123
1124      if (*is_cached)
1125        {
1126          /* found it. Pass it back to the caller. */
1127          *window_p = cached_window->window;
1128
1129          /* manipulate the RS as if we just read the data */
1130          rs->current = cached_window->end_offset;
1131          rs->chunk_index = chunk_index;
1132        }
1133    }
1134
1135  return SVN_NO_ERROR;
1136}
1137
1138/* Store the WINDOW read for the rep state RS with the given START_OFFSET
1139 * within the pack / rev file in the current FSX session's cache.  This
1140 * will be a no-op if no cache has been given.
1141 * Temporary allocations will be made from SCRATCH_POOL. */
1142static svn_error_t *
1143set_cached_window(svn_txdelta_window_t *window,
1144                  rep_state_t *rs,
1145                  apr_off_t start_offset,
1146                  apr_pool_t *scratch_pool)
1147{
1148  if (rs->window_cache)
1149    {
1150      /* store the window and the first offset _past_ it */
1151      svn_fs_x__txdelta_cached_window_t cached_window;
1152      svn_fs_x__window_cache_key_t key = {0};
1153
1154      cached_window.window = window;
1155      cached_window.start_offset = start_offset - rs->start;
1156      cached_window.end_offset = rs->current;
1157
1158      /* but key it with the start offset because that is the known state
1159       * when we will look it up */
1160      SVN_ERR(svn_cache__set(rs->window_cache,
1161                             get_window_key(&key, rs),
1162                             &cached_window,
1163                             scratch_pool));
1164    }
1165
1166  return SVN_NO_ERROR;
1167}
1168
1169/* Read the WINDOW_P for the rep state RS from the current FSX session's
1170 * cache. This will be a no-op and IS_CACHED will be set to FALSE if no
1171 * cache has been given. If a cache is available IS_CACHED will inform
1172 * the caller about the success of the lookup. Allocations (of the window
1173 * in particular) will be made from POOL.
1174 */
1175static svn_error_t *
1176get_cached_combined_window(svn_stringbuf_t **window_p,
1177                           rep_state_t *rs,
1178                           svn_boolean_t *is_cached,
1179                           apr_pool_t *pool)
1180{
1181  if (! rs->combined_cache)
1182    {
1183      /* txdelta window has not been enabled */
1184      *is_cached = FALSE;
1185    }
1186  else
1187    {
1188      /* ask the cache for the desired txdelta window */
1189      svn_fs_x__window_cache_key_t key = { 0 };
1190      return svn_cache__get((void **)window_p,
1191                            is_cached,
1192                            rs->combined_cache,
1193                            get_window_key(&key, rs),
1194                            pool);
1195    }
1196
1197  return SVN_NO_ERROR;
1198}
1199
1200/* Store the WINDOW read for the rep state RS in the current FSX session's
1201 * cache. This will be a no-op if no cache has been given.
1202 * Temporary allocations will be made from SCRATCH_POOL. */
1203static svn_error_t *
1204set_cached_combined_window(svn_stringbuf_t *window,
1205                           rep_state_t *rs,
1206                           apr_pool_t *scratch_pool)
1207{
1208  if (rs->combined_cache)
1209    {
1210      /* but key it with the start offset because that is the known state
1211       * when we will look it up */
1212      svn_fs_x__window_cache_key_t key = { 0 };
1213      return svn_cache__set(rs->combined_cache,
1214                            get_window_key(&key, rs),
1215                            window,
1216                            scratch_pool);
1217    }
1218
1219  return SVN_NO_ERROR;
1220}
1221
1222/* Build an array of rep_state structures in *LIST giving the delta
1223   reps from first_rep to a  self-compressed rep.  Set *SRC_STATE to
1224   the container rep we find at the end of the chain, or to NULL if
1225   the final delta representation is self-compressed.
1226   The representation to start from is designated by filesystem FS, id
1227   ID, and representation REP.
1228   Also, set *WINDOW_P to the base window content for *LIST, if it
1229   could be found in cache. Otherwise, *LIST will contain the base
1230   representation for the whole delta chain.
1231 */
1232static svn_error_t *
1233build_rep_list(apr_array_header_t **list,
1234               svn_stringbuf_t **window_p,
1235               rep_state_t **src_state,
1236               svn_fs_t *fs,
1237               svn_fs_x__representation_t *first_rep,
1238               apr_pool_t *result_pool,
1239               apr_pool_t *scratch_pool)
1240{
1241  svn_fs_x__representation_t rep;
1242  rep_state_t *rs = NULL;
1243  svn_fs_x__rep_header_t *rep_header;
1244  svn_boolean_t is_cached = FALSE;
1245  shared_file_t *shared_file = NULL;
1246  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1247
1248  *list = apr_array_make(result_pool, 1, sizeof(rep_state_t *));
1249  rep = *first_rep;
1250
1251  /* for the top-level rep, we need the rep_args */
1252  SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file, &rep, fs,
1253                           result_pool, iterpool));
1254
1255  while (1)
1256    {
1257      svn_pool_clear(iterpool);
1258
1259      /* fetch state, if that has not been done already */
1260      if (!rs)
1261        SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file,
1262                                 &rep, fs, result_pool, iterpool));
1263
1264      /* for txn reps and containered reps, there won't be a cached
1265       * combined window */
1266      if (svn_fs_x__is_revision(rep.id.change_set)
1267          && rep_header->type != svn_fs_x__rep_container)
1268        SVN_ERR(get_cached_combined_window(window_p, rs, &is_cached,
1269                                           result_pool));
1270
1271      if (is_cached)
1272        {
1273          /* We already have a reconstructed window in our cache.
1274             Write a pseudo rep_state with the full length. */
1275          rs->start = 0;
1276          rs->current = 0;
1277          rs->size = (*window_p)->len;
1278          *src_state = rs;
1279          break;
1280        }
1281
1282      if (rep_header->type == svn_fs_x__rep_container)
1283        {
1284          /* This is a container item, so just return the current rep_state. */
1285          *src_state = rs;
1286          break;
1287        }
1288
1289      /* Push this rep onto the list.  If it's self-compressed, we're done. */
1290      APR_ARRAY_PUSH(*list, rep_state_t *) = rs;
1291      if (rep_header->type == svn_fs_x__rep_self_delta)
1292        {
1293          *src_state = NULL;
1294          break;
1295        }
1296
1297      rep.id.change_set
1298        = svn_fs_x__change_set_by_rev(rep_header->base_revision);
1299      rep.id.number = rep_header->base_item_index;
1300      rep.size = rep_header->base_length;
1301
1302      rs = NULL;
1303    }
1304  svn_pool_destroy(iterpool);
1305
1306  return SVN_NO_ERROR;
1307}
1308
1309
1310/* Create a rep_read_baton structure for node revision NODEREV in
1311   filesystem FS and store it in *RB_P.  If FULLTEXT_CACHE_KEY is not
1312   NULL, it is the rep's key in the fulltext cache, and a stringbuf
1313   must be allocated to store the text.  If rep is mutable, it must be
1314   refer to file contents.
1315
1316   Allocate the result in RESULT_POOL.  This includes the pools within *RB_P.
1317 */
1318static svn_error_t *
1319rep_read_get_baton(rep_read_baton_t **rb_p,
1320                   svn_fs_t *fs,
1321                   svn_fs_x__representation_t *rep,
1322                   svn_fs_x__pair_cache_key_t fulltext_cache_key,
1323                   apr_pool_t *result_pool)
1324{
1325  rep_read_baton_t *b;
1326
1327  b = apr_pcalloc(result_pool, sizeof(*b));
1328  b->fs = fs;
1329  b->rep = *rep;
1330  b->base_window = NULL;
1331  b->chunk_index = 0;
1332  b->buf = NULL;
1333  b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5,
1334                                                result_pool);
1335  b->checksum_finalized = FALSE;
1336  memcpy(b->md5_digest, rep->md5_digest, sizeof(rep->md5_digest));
1337  b->len = rep->expanded_size;
1338  b->off = 0;
1339  b->fulltext_cache_key = fulltext_cache_key;
1340
1341  /* Clearable sub-pools.  Since they have to remain valid for as long as B
1342     lives, we can't take them from some scratch pool.  The caller of this
1343     function will have no control over how those subpools will be used. */
1344  b->scratch_pool = svn_pool_create(result_pool);
1345  b->filehandle_pool = svn_pool_create(result_pool);
1346  b->fulltext_cache = NULL;
1347  b->fulltext_delivered = 0;
1348  b->current_fulltext = NULL;
1349
1350  /* Save our output baton. */
1351  *rb_p = b;
1352
1353  return SVN_NO_ERROR;
1354}
1355
1356/* Skip forwards to THIS_CHUNK in REP_STATE and then read the next delta
1357   window into *NWIN. */
1358static svn_error_t *
1359read_delta_window(svn_txdelta_window_t **nwin, int this_chunk,
1360                  rep_state_t *rs, apr_pool_t *result_pool,
1361                  apr_pool_t *scratch_pool)
1362{
1363  svn_boolean_t is_cached;
1364  apr_off_t start_offset;
1365  apr_off_t end_offset;
1366  apr_pool_t *iterpool;
1367
1368  SVN_ERR_ASSERT(rs->chunk_index <= this_chunk);
1369
1370  SVN_ERR(dgb__log_access(rs->sfile->fs, &rs->rep_id, NULL,
1371                          SVN_FS_X__ITEM_TYPE_ANY_REP, scratch_pool));
1372
1373  /* Read the next window.  But first, try to find it in the cache. */
1374  SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached,
1375                            result_pool, scratch_pool));
1376  if (is_cached)
1377    return SVN_NO_ERROR;
1378
1379  /* someone has to actually read the data from file.  Open it */
1380  SVN_ERR(auto_open_shared_file(rs->sfile));
1381
1382  /* invoke the 'block-read' feature for non-txn data.
1383     However, don't do that if we are in the middle of some representation,
1384     because the block is unlikely to contain other data. */
1385  if (   rs->chunk_index == 0
1386      && svn_fs_x__is_revision(rs->rep_id.change_set)
1387      && rs->window_cache)
1388    {
1389      SVN_ERR(block_read(NULL, rs->sfile->fs, &rs->rep_id,
1390                         rs->sfile->rfile, result_pool, scratch_pool));
1391
1392      /* reading the whole block probably also provided us with the
1393         desired txdelta window */
1394      SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached,
1395                                result_pool, scratch_pool));
1396      if (is_cached)
1397        return SVN_NO_ERROR;
1398    }
1399
1400  /* data is still not cached -> we need to read it.
1401     Make sure we have all the necessary info. */
1402  SVN_ERR(auto_set_start_offset(rs, scratch_pool));
1403  SVN_ERR(auto_read_diff_version(rs, scratch_pool));
1404
1405  /* RS->FILE may be shared between RS instances -> make sure we point
1406   * to the right data. */
1407  start_offset = rs->start + rs->current;
1408  SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, scratch_pool));
1409
1410  /* Skip windows to reach the current chunk if we aren't there yet. */
1411  iterpool = svn_pool_create(scratch_pool);
1412  while (rs->chunk_index < this_chunk)
1413    {
1414      apr_file_t *file = rs->sfile->rfile->file;
1415      svn_pool_clear(iterpool);
1416
1417      SVN_ERR(svn_txdelta_skip_svndiff_window(file, rs->ver, iterpool));
1418      rs->chunk_index++;
1419      SVN_ERR(svn_fs_x__get_file_offset(&start_offset, file, iterpool));
1420
1421      rs->current = start_offset - rs->start;
1422      if (rs->current >= rs->size)
1423        return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1424                                _("Reading one svndiff window read "
1425                                  "beyond the end of the "
1426                                  "representation"));
1427    }
1428  svn_pool_destroy(iterpool);
1429
1430  /* Actually read the next window. */
1431  SVN_ERR(svn_txdelta_read_svndiff_window(nwin, rs->sfile->rfile->stream,
1432                                          rs->ver, result_pool));
1433  SVN_ERR(get_file_offset(&end_offset, rs, scratch_pool));
1434  rs->current = end_offset - rs->start;
1435  if (rs->current > rs->size)
1436    return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1437                            _("Reading one svndiff window read beyond "
1438                              "the end of the representation"));
1439
1440  /* the window has not been cached before, thus cache it now
1441   * (if caching is used for them at all) */
1442  if (svn_fs_x__is_revision(rs->rep_id.change_set))
1443    SVN_ERR(set_cached_window(*nwin, rs, start_offset, scratch_pool));
1444
1445  return SVN_NO_ERROR;
1446}
1447
1448/* Read the whole representation RS and return it in *NWIN. */
1449static svn_error_t *
1450read_container_window(svn_stringbuf_t **nwin,
1451                      rep_state_t *rs,
1452                      apr_size_t size,
1453                      apr_pool_t *result_pool,
1454                      apr_pool_t *scratch_pool)
1455{
1456  svn_fs_x__rep_extractor_t *extractor = NULL;
1457  svn_fs_t *fs = rs->sfile->fs;
1458  svn_fs_x__data_t *ffd = fs->fsap_data;
1459  svn_fs_x__pair_cache_key_t key;
1460  svn_revnum_t revision = svn_fs_x__get_revnum(rs->rep_id.change_set);
1461
1462  SVN_ERR(auto_set_start_offset(rs, scratch_pool));
1463  key.revision = svn_fs_x__packed_base_rev(fs, revision);
1464  key.second = rs->start;
1465
1466  /* already in cache? */
1467  if (ffd->reps_container_cache)
1468    {
1469      svn_boolean_t is_cached = FALSE;
1470      svn_fs_x__reps_baton_t baton;
1471      baton.fs = fs;
1472      baton.idx = rs->sub_item;
1473
1474      SVN_ERR(svn_cache__get_partial((void**)&extractor, &is_cached,
1475                                     ffd->reps_container_cache, &key,
1476                                     svn_fs_x__reps_get_func, &baton,
1477                                     result_pool));
1478    }
1479
1480  /* read from disk, if necessary */
1481  if (extractor == NULL)
1482    {
1483      SVN_ERR(auto_open_shared_file(rs->sfile));
1484      SVN_ERR(block_read((void **)&extractor, fs, &rs->rep_id,
1485                         rs->sfile->rfile, result_pool, scratch_pool));
1486    }
1487
1488  SVN_ERR(svn_fs_x__extractor_drive(nwin, extractor, rs->current, size,
1489                                    result_pool, scratch_pool));
1490
1491  /* Update RS. */
1492  rs->current += (apr_off_t)size;
1493
1494  return SVN_NO_ERROR;
1495}
1496
1497/* Get the undeltified window that is a result of combining all deltas
1498   from the current desired representation identified in *RB with its
1499   base representation.  Store the window in *RESULT. */
1500static svn_error_t *
1501get_combined_window(svn_stringbuf_t **result,
1502                    rep_read_baton_t *rb)
1503{
1504  apr_pool_t *pool, *new_pool, *window_pool;
1505  int i;
1506  apr_array_header_t *windows;
1507  svn_stringbuf_t *source, *buf = rb->base_window;
1508  rep_state_t *rs;
1509  apr_pool_t *iterpool;
1510
1511  /* Read all windows that we need to combine. This is fine because
1512     the size of each window is relatively small (100kB) and skip-
1513     delta limits the number of deltas in a chain to well under 100.
1514     Stop early if one of them does not depend on its predecessors. */
1515  window_pool = svn_pool_create(rb->scratch_pool);
1516  windows = apr_array_make(window_pool, 0, sizeof(svn_txdelta_window_t *));
1517  iterpool = svn_pool_create(rb->scratch_pool);
1518  for (i = 0; i < rb->rs_list->nelts; ++i)
1519    {
1520      svn_txdelta_window_t *window;
1521
1522      svn_pool_clear(iterpool);
1523
1524      rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *);
1525      SVN_ERR(read_delta_window(&window, rb->chunk_index, rs, window_pool,
1526                                iterpool));
1527
1528      APR_ARRAY_PUSH(windows, svn_txdelta_window_t *) = window;
1529      if (window->src_ops == 0)
1530        {
1531          ++i;
1532          break;
1533        }
1534    }
1535
1536  /* Combine in the windows from the other delta reps. */
1537  pool = svn_pool_create(rb->scratch_pool);
1538  for (--i; i >= 0; --i)
1539    {
1540      svn_txdelta_window_t *window;
1541
1542      svn_pool_clear(iterpool);
1543
1544      rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *);
1545      window = APR_ARRAY_IDX(windows, i, svn_txdelta_window_t *);
1546
1547      /* Maybe, we've got a start representation in a container.  If we do,
1548         read as much data from it as the needed for the txdelta window's
1549         source view.
1550         Note that BUF / SOURCE may only be NULL in the first iteration. */
1551      source = buf;
1552      if (source == NULL && rb->src_state != NULL)
1553        SVN_ERR(read_container_window(&source, rb->src_state,
1554                                      window->sview_len, pool, iterpool));
1555
1556      /* Combine this window with the current one. */
1557      new_pool = svn_pool_create(rb->scratch_pool);
1558      buf = svn_stringbuf_create_ensure(window->tview_len, new_pool);
1559      buf->len = window->tview_len;
1560
1561      svn_txdelta_apply_instructions(window, source ? source->data : NULL,
1562                                     buf->data, &buf->len);
1563      if (buf->len != window->tview_len)
1564        return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1565                                _("svndiff window length is "
1566                                  "corrupt"));
1567
1568      /* Cache windows only if the whole rep content could be read as a
1569         single chunk.  Only then will no other chunk need a deeper RS
1570         list than the cached chunk. */
1571      if (   (rb->chunk_index == 0) && (rs->current == rs->size)
1572          && svn_fs_x__is_revision(rs->rep_id.change_set))
1573        SVN_ERR(set_cached_combined_window(buf, rs, new_pool));
1574
1575      rs->chunk_index++;
1576
1577      /* Cycle pools so that we only need to hold three windows at a time. */
1578      svn_pool_destroy(pool);
1579      pool = new_pool;
1580    }
1581  svn_pool_destroy(iterpool);
1582
1583  svn_pool_destroy(window_pool);
1584
1585  *result = buf;
1586  return SVN_NO_ERROR;
1587}
1588
1589/* Returns whether or not the expanded fulltext of the file is cachable
1590 * based on its size SIZE.  The decision depends on the cache used by RB.
1591 */
1592static svn_boolean_t
1593fulltext_size_is_cachable(svn_fs_x__data_t *ffd,
1594                          svn_filesize_t size)
1595{
1596  return (size < APR_SIZE_MAX)
1597      && svn_cache__is_cachable(ffd->fulltext_cache, (apr_size_t)size);
1598}
1599
1600/* Close method used on streams returned by read_representation().
1601 */
1602static svn_error_t *
1603rep_read_contents_close(void *baton)
1604{
1605  rep_read_baton_t *rb = baton;
1606
1607  svn_pool_destroy(rb->scratch_pool);
1608  svn_pool_destroy(rb->filehandle_pool);
1609
1610  return SVN_NO_ERROR;
1611}
1612
1613/* Inialize the representation read state RS for the given REP_HEADER and
1614 * p2l index ENTRY.  If not NULL, assign FILE and STREAM to RS.
1615 * Allocate all sub-structures of RS in RESULT_POOL.
1616 */
1617static svn_error_t *
1618init_rep_state(rep_state_t *rs,
1619               svn_fs_x__rep_header_t *rep_header,
1620               svn_fs_t *fs,
1621               svn_fs_x__revision_file_t *rev_file,
1622               svn_fs_x__p2l_entry_t* entry,
1623               apr_pool_t *result_pool)
1624{
1625  svn_fs_x__data_t *ffd = fs->fsap_data;
1626  shared_file_t *shared_file = apr_pcalloc(result_pool, sizeof(*shared_file));
1627
1628  /* this function does not apply to representation containers */
1629  SVN_ERR_ASSERT(entry->type >= SVN_FS_X__ITEM_TYPE_FILE_REP
1630                 && entry->type <= SVN_FS_X__ITEM_TYPE_DIR_PROPS);
1631  SVN_ERR_ASSERT(entry->item_count == 1);
1632
1633  shared_file->rfile = rev_file;
1634  shared_file->fs = fs;
1635  shared_file->revision = svn_fs_x__get_revnum(entry->items[0].change_set);
1636  shared_file->pool = result_pool;
1637
1638  rs->sfile = shared_file;
1639  rs->rep_id = entry->items[0];
1640  rs->header_size = rep_header->header_size;
1641  rs->start = entry->offset + rs->header_size;
1642  rs->current = 4;
1643  rs->size = entry->size - rep_header->header_size - 7;
1644  rs->ver = 1;
1645  rs->chunk_index = 0;
1646  rs->window_cache = ffd->txdelta_window_cache;
1647  rs->combined_cache = ffd->combined_window_cache;
1648
1649  return SVN_NO_ERROR;
1650}
1651
1652/* Walk through all windows in the representation addressed by RS in FS
1653 * (excluding the delta bases) and put those not already cached into the
1654 * window caches.  If MAX_OFFSET is not -1, don't read windows that start
1655 * at or beyond that offset.  As a side effect, return the total sum of all
1656 * expanded window sizes in *FULLTEXT_LEN.
1657 * Use SCRATCH_POOL for temporary allocations.
1658 */
1659static svn_error_t *
1660cache_windows(svn_filesize_t *fulltext_len,
1661              svn_fs_t *fs,
1662              rep_state_t *rs,
1663              apr_off_t max_offset,
1664              apr_pool_t *scratch_pool)
1665{
1666  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1667  *fulltext_len = 0;
1668
1669  while (rs->current < rs->size)
1670    {
1671      svn_boolean_t is_cached = FALSE;
1672      window_sizes_t *window_sizes;
1673
1674      svn_pool_clear(iterpool);
1675      if (max_offset != -1 && rs->start + rs->current >= max_offset)
1676        {
1677          svn_pool_destroy(iterpool);
1678          return SVN_NO_ERROR;
1679        }
1680
1681      /* efficiently skip windows that are still being cached instead
1682       * of fully decoding them */
1683      SVN_ERR(get_cached_window_sizes(&window_sizes, rs, &is_cached,
1684                                      iterpool));
1685      if (is_cached)
1686        {
1687          *fulltext_len += window_sizes->target_len;
1688          rs->current += window_sizes->packed_len;
1689        }
1690      else
1691        {
1692          svn_txdelta_window_t *window;
1693          apr_off_t start_offset = rs->start + rs->current;
1694          apr_off_t end_offset;
1695          apr_off_t block_start;
1696
1697          /* navigate to & read the current window */
1698          SVN_ERR(rs_aligned_seek(rs, &block_start, start_offset, iterpool));
1699          SVN_ERR(svn_txdelta_read_svndiff_window(&window,
1700                                                  rs->sfile->rfile->stream,
1701                                                  rs->ver, iterpool));
1702
1703          /* aggregate expanded window size */
1704          *fulltext_len += window->tview_len;
1705
1706          /* determine on-disk window size */
1707          SVN_ERR(svn_fs_x__get_file_offset(&end_offset,
1708                                            rs->sfile->rfile->file,
1709                                            iterpool));
1710          rs->current = end_offset - rs->start;
1711          if (rs->current > rs->size)
1712            return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1713                          _("Reading one svndiff window read beyond "
1714                                      "the end of the representation"));
1715
1716          /* if the window has not been cached before, cache it now
1717           * (if caching is used for them at all) */
1718          if (!is_cached)
1719            SVN_ERR(set_cached_window(window, rs, start_offset, iterpool));
1720        }
1721
1722      rs->chunk_index++;
1723    }
1724
1725  svn_pool_destroy(iterpool);
1726
1727  return SVN_NO_ERROR;
1728}
1729
1730/* Try to get the representation header identified by KEY from FS's cache.
1731 * If it has not been cached, read it from the current position in STREAM
1732 * and put it into the cache (if caching has been enabled for rep headers).
1733 * Return the result in *REP_HEADER.  Use POOL for allocations.
1734 */
1735static svn_error_t *
1736read_rep_header(svn_fs_x__rep_header_t **rep_header,
1737                svn_fs_t *fs,
1738                svn_stream_t *stream,
1739                svn_fs_x__representation_cache_key_t *key,
1740                apr_pool_t *pool)
1741{
1742  svn_fs_x__data_t *ffd = fs->fsap_data;
1743  svn_boolean_t is_cached = FALSE;
1744
1745  if (ffd->rep_header_cache)
1746    {
1747      SVN_ERR(svn_cache__get((void**)rep_header, &is_cached,
1748                             ffd->rep_header_cache, key, pool));
1749      if (is_cached)
1750        return SVN_NO_ERROR;
1751    }
1752
1753  SVN_ERR(svn_fs_x__read_rep_header(rep_header, stream, pool, pool));
1754
1755  if (ffd->rep_header_cache)
1756    SVN_ERR(svn_cache__set(ffd->rep_header_cache, key, *rep_header, pool));
1757
1758  return SVN_NO_ERROR;
1759}
1760
1761svn_error_t *
1762svn_fs_x__get_representation_length(svn_filesize_t *packed_len,
1763                                    svn_filesize_t *expanded_len,
1764                                    svn_fs_t *fs,
1765                                    svn_fs_x__revision_file_t *rev_file,
1766                                    svn_fs_x__p2l_entry_t* entry,
1767                                    apr_pool_t *scratch_pool)
1768{
1769  svn_fs_x__representation_cache_key_t key = { 0 };
1770  rep_state_t rs = { 0 };
1771  svn_fs_x__rep_header_t *rep_header;
1772
1773  /* this function does not apply to representation containers */
1774  SVN_ERR_ASSERT(entry->type >= SVN_FS_X__ITEM_TYPE_FILE_REP
1775                 && entry->type <= SVN_FS_X__ITEM_TYPE_DIR_PROPS);
1776  SVN_ERR_ASSERT(entry->item_count == 1);
1777
1778  /* get / read the representation header */
1779  key.revision = svn_fs_x__get_revnum(entry->items[0].change_set);
1780  key.is_packed = svn_fs_x__is_packed_rev(fs, key.revision);
1781  key.item_index = entry->items[0].number;
1782  SVN_ERR(read_rep_header(&rep_header, fs, rev_file->stream, &key,
1783                          scratch_pool));
1784
1785  /* prepare representation reader state (rs) structure */
1786  SVN_ERR(init_rep_state(&rs, rep_header, fs, rev_file, entry,
1787                         scratch_pool));
1788
1789  /* RS->SFILE may be shared between RS instances -> make sure we point
1790   * to the right data. */
1791  *packed_len = rs.size;
1792  SVN_ERR(cache_windows(expanded_len, fs, &rs, -1, scratch_pool));
1793
1794  return SVN_NO_ERROR;
1795}
1796
1797/* Return the next *LEN bytes of the rep from our plain / delta windows
1798   and store them in *BUF. */
1799static svn_error_t *
1800get_contents_from_windows(rep_read_baton_t *rb,
1801                          char *buf,
1802                          apr_size_t *len)
1803{
1804  apr_size_t copy_len, remaining = *len;
1805  char *cur = buf;
1806  rep_state_t *rs;
1807
1808  /* Special case for when there are no delta reps, only a
1809     containered text. */
1810  if (rb->rs_list->nelts == 0 && rb->buf == NULL)
1811    {
1812      copy_len = remaining;
1813      rs = rb->src_state;
1814
1815      /* reps in containers don't have a header */
1816      if (rs->header_size == 0 && rb->base_window == NULL)
1817        {
1818          /* RS->SIZE is unreliable here because it is based upon
1819           * the delta rep size _before_ putting the data into a
1820           * a container. */
1821          SVN_ERR(read_container_window(&rb->base_window, rs, rb->len,
1822                                        rb->scratch_pool, rb->scratch_pool));
1823          rs->current -= rb->base_window->len;
1824        }
1825
1826      if (rb->base_window != NULL)
1827        {
1828          /* We got the desired rep directly from the cache.
1829             This is where we need the pseudo rep_state created
1830             by build_rep_list(). */
1831          apr_size_t offset = (apr_size_t)rs->current;
1832          if (copy_len + offset > rb->base_window->len)
1833            copy_len = offset < rb->base_window->len
1834                     ? rb->base_window->len - offset
1835                     : 0ul;
1836
1837          memcpy (cur, rb->base_window->data + offset, copy_len);
1838        }
1839
1840      rs->current += copy_len;
1841      *len = copy_len;
1842      return SVN_NO_ERROR;
1843    }
1844
1845  while (remaining > 0)
1846    {
1847      /* If we have buffered data from a previous chunk, use that. */
1848      if (rb->buf)
1849        {
1850          /* Determine how much to copy from the buffer. */
1851          copy_len = rb->buf_len - rb->buf_pos;
1852          if (copy_len > remaining)
1853            copy_len = remaining;
1854
1855          /* Actually copy the data. */
1856          memcpy(cur, rb->buf + rb->buf_pos, copy_len);
1857          rb->buf_pos += copy_len;
1858          cur += copy_len;
1859          remaining -= copy_len;
1860
1861          /* If the buffer is all used up, clear it and empty the
1862             local pool. */
1863          if (rb->buf_pos == rb->buf_len)
1864            {
1865              svn_pool_clear(rb->scratch_pool);
1866              rb->buf = NULL;
1867            }
1868        }
1869      else
1870        {
1871          svn_stringbuf_t *sbuf = NULL;
1872
1873          rs = APR_ARRAY_IDX(rb->rs_list, 0, rep_state_t *);
1874          if (rs->current == rs->size)
1875            break;
1876
1877          /* Get more buffered data by evaluating a chunk. */
1878          SVN_ERR(get_combined_window(&sbuf, rb));
1879
1880          rb->chunk_index++;
1881          rb->buf_len = sbuf->len;
1882          rb->buf = sbuf->data;
1883          rb->buf_pos = 0;
1884        }
1885    }
1886
1887  *len = cur - buf;
1888
1889  return SVN_NO_ERROR;
1890}
1891
1892/* Baton type for get_fulltext_partial. */
1893typedef struct fulltext_baton_t
1894{
1895  /* Target buffer to write to; of at least LEN bytes. */
1896  char *buffer;
1897
1898  /* Offset within the respective fulltext at which we shall start to
1899     copy data into BUFFER. */
1900  apr_size_t start;
1901
1902  /* Number of bytes to copy.  The actual amount may be less in case
1903     the fulltext is short(er). */
1904  apr_size_t len;
1905
1906  /* Number of bytes actually copied into BUFFER. */
1907  apr_size_t read;
1908} fulltext_baton_t;
1909
1910/* Implement svn_cache__partial_getter_func_t for fulltext caches.
1911 * From the fulltext in DATA, we copy the range specified by the
1912 * fulltext_baton_t* BATON into the buffer provided by that baton.
1913 * OUT and RESULT_POOL are not used.
1914 */
1915static svn_error_t *
1916get_fulltext_partial(void **out,
1917                     const void *data,
1918                     apr_size_t data_len,
1919                     void *baton,
1920                     apr_pool_t *result_pool)
1921{
1922  fulltext_baton_t *fulltext_baton = baton;
1923
1924  /* We cached the fulltext with an NUL appended to it. */
1925  apr_size_t fulltext_len = data_len - 1;
1926
1927  /* Clip the copy range to what the fulltext size allows. */
1928  apr_size_t start = MIN(fulltext_baton->start, fulltext_len);
1929  fulltext_baton->read = MIN(fulltext_len - start, fulltext_baton->len);
1930
1931  /* Copy the data to the output buffer and be done. */
1932  memcpy(fulltext_baton->buffer, (const char *)data + start,
1933         fulltext_baton->read);
1934
1935  return SVN_NO_ERROR;
1936}
1937
1938/* Find the fulltext specified in BATON in the fulltext cache given
1939 * as well by BATON.  If that succeeds, set *CACHED to TRUE and copy
1940 * up to the next *LEN bytes into BUFFER.  Set *LEN to the actual
1941 * number of bytes copied.
1942 */
1943static svn_error_t *
1944get_contents_from_fulltext(svn_boolean_t *cached,
1945                           rep_read_baton_t *baton,
1946                           char *buffer,
1947                           apr_size_t *len)
1948{
1949  void *dummy;
1950  fulltext_baton_t fulltext_baton;
1951
1952  SVN_ERR_ASSERT((apr_size_t)baton->fulltext_delivered
1953                 == baton->fulltext_delivered);
1954  fulltext_baton.buffer = buffer;
1955  fulltext_baton.start = (apr_size_t)baton->fulltext_delivered;
1956  fulltext_baton.len = *len;
1957  fulltext_baton.read = 0;
1958
1959  SVN_ERR(svn_cache__get_partial(&dummy, cached, baton->fulltext_cache,
1960                                 &baton->fulltext_cache_key,
1961                                 get_fulltext_partial, &fulltext_baton,
1962                                 baton->scratch_pool));
1963
1964  if (*cached)
1965    {
1966      baton->fulltext_delivered += fulltext_baton.read;
1967      *len = fulltext_baton.read;
1968    }
1969
1970  return SVN_NO_ERROR;
1971}
1972
1973/* Determine the optimal size of a string buf that shall receive a
1974 * (full-) text of NEEDED bytes.
1975 *
1976 * The critical point is that those buffers may be very large and
1977 * can cause memory fragmentation.  We apply simple heuristics to
1978 * make fragmentation less likely.
1979 */
1980static apr_size_t
1981optimimal_allocation_size(apr_size_t needed)
1982{
1983  /* For all allocations, assume some overhead that is shared between
1984   * OS memory managemnt, APR memory management and svn_stringbuf_t. */
1985  const apr_size_t overhead = 0x400;
1986  apr_size_t optimal;
1987
1988  /* If an allocation size if safe for other ephemeral buffers, it should
1989   * be safe for ours. */
1990  if (needed <= SVN__STREAM_CHUNK_SIZE)
1991    return needed;
1992
1993  /* Paranoia edge case:
1994   * Skip our heuristics if they created arithmetical overflow.
1995   * Beware to make this test work for NEEDED = APR_SIZE_MAX as well! */
1996  if (needed >= APR_SIZE_MAX / 2 - overhead)
1997    return needed;
1998
1999  /* As per definition SVN__STREAM_CHUNK_SIZE is a power of two.
2000   * Since we know NEEDED to be larger than that, use it as the
2001   * starting point.
2002   *
2003   * Heuristics: Allocate a power-of-two number of bytes that fit
2004   *             NEEDED plus some OVERHEAD.  The APR allocator
2005   *             will round it up to the next full page size.
2006   */
2007  optimal = SVN__STREAM_CHUNK_SIZE;
2008  while (optimal - overhead < needed)
2009    optimal *= 2;
2010
2011  /* This is above or equal to NEEDED. */
2012  return optimal - overhead;
2013}
2014
2015/* After a fulltext cache lookup failure, we will continue to read from
2016 * combined delta or plain windows.  However, we must first make that data
2017 * stream in BATON catch up tho the position LEN already delivered from the
2018 * fulltext cache.  Also, we need to store the reconstructed fulltext if we
2019 * want to cache it at the end.
2020 */
2021static svn_error_t *
2022skip_contents(rep_read_baton_t *baton,
2023              svn_filesize_t len)
2024{
2025  svn_error_t *err = SVN_NO_ERROR;
2026
2027  /* Do we want to cache the reconstructed fulltext? */
2028  if (SVN_IS_VALID_REVNUM(baton->fulltext_cache_key.revision))
2029    {
2030      char *buffer;
2031      svn_filesize_t to_alloc = MAX(len, baton->len);
2032
2033      /* This should only be happening if BATON->LEN and LEN are
2034       * cacheable, implying they fit into memory. */
2035      SVN_ERR_ASSERT((apr_size_t)to_alloc == to_alloc);
2036
2037      /* Allocate the fulltext buffer. */
2038      baton->current_fulltext = svn_stringbuf_create_ensure(
2039                        optimimal_allocation_size((apr_size_t)to_alloc),
2040                        baton->filehandle_pool);
2041
2042      /* Read LEN bytes from the window stream and store the data
2043       * in the fulltext buffer (will be filled by further reads later). */
2044      baton->current_fulltext->len = (apr_size_t)len;
2045      baton->current_fulltext->data[(apr_size_t)len] = 0;
2046
2047      buffer = baton->current_fulltext->data;
2048      while (len > 0 && !err)
2049        {
2050          apr_size_t to_read = (apr_size_t)len;
2051          err = get_contents_from_windows(baton, buffer, &to_read);
2052          len -= to_read;
2053          buffer += to_read;
2054        }
2055    }
2056  else if (len > 0)
2057    {
2058      /* Simply drain LEN bytes from the window stream. */
2059      apr_pool_t *subpool = svn_pool_create(baton->scratch_pool);
2060      char *buffer = apr_palloc(subpool, SVN__STREAM_CHUNK_SIZE);
2061
2062      while (len > 0 && !err)
2063        {
2064          apr_size_t to_read = len > SVN__STREAM_CHUNK_SIZE
2065                            ? SVN__STREAM_CHUNK_SIZE
2066                            : (apr_size_t)len;
2067
2068          err = get_contents_from_windows(baton, buffer, &to_read);
2069          len -= to_read;
2070        }
2071
2072      svn_pool_destroy(subpool);
2073    }
2074
2075  return svn_error_trace(err);
2076}
2077
2078/* BATON is of type `rep_read_baton_t'; read the next *LEN bytes of the
2079   representation and store them in *BUF.  Sum as we read and verify
2080   the MD5 sum at the end. */
2081static svn_error_t *
2082rep_read_contents(void *baton,
2083                  char *buf,
2084                  apr_size_t *len)
2085{
2086  rep_read_baton_t *rb = baton;
2087
2088  /* Get data from the fulltext cache for as long as we can. */
2089  if (rb->fulltext_cache)
2090    {
2091      svn_boolean_t cached;
2092      SVN_ERR(get_contents_from_fulltext(&cached, rb, buf, len));
2093      if (cached)
2094        return SVN_NO_ERROR;
2095
2096      /* Cache miss.  From now on, we will never read from the fulltext
2097       * cache for this representation anymore. */
2098      rb->fulltext_cache = NULL;
2099    }
2100
2101  /* No fulltext cache to help us.  We must read from the window stream. */
2102  if (!rb->rs_list)
2103    {
2104      /* Window stream not initialized, yet.  Do it now. */
2105      SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
2106                             &rb->src_state, rb->fs, &rb->rep,
2107                             rb->filehandle_pool, rb->scratch_pool));
2108
2109      /* In case we did read from the fulltext cache before, make the
2110       * window stream catch up.  Also, initialize the fulltext buffer
2111       * if we want to cache the fulltext at the end. */
2112      SVN_ERR(skip_contents(rb, rb->fulltext_delivered));
2113    }
2114
2115  /* Get the next block of data. */
2116  SVN_ERR(get_contents_from_windows(rb, buf, len));
2117
2118  if (rb->current_fulltext)
2119    svn_stringbuf_appendbytes(rb->current_fulltext, buf, *len);
2120
2121  /* Perform checksumming.  We want to check the checksum as soon as
2122     the last byte of data is read, in case the caller never performs
2123     a short read, but we don't want to finalize the MD5 context
2124     twice. */
2125  if (!rb->checksum_finalized)
2126    {
2127      SVN_ERR(svn_checksum_update(rb->md5_checksum_ctx, buf, *len));
2128      rb->off += *len;
2129      if (rb->off == rb->len)
2130        {
2131          svn_checksum_t *md5_checksum;
2132          svn_checksum_t expected;
2133          expected.kind = svn_checksum_md5;
2134          expected.digest = rb->md5_digest;
2135
2136          rb->checksum_finalized = TRUE;
2137          SVN_ERR(svn_checksum_final(&md5_checksum, rb->md5_checksum_ctx,
2138                                     rb->scratch_pool));
2139          if (!svn_checksum_match(md5_checksum, &expected))
2140            return svn_error_create(SVN_ERR_FS_CORRUPT,
2141                    svn_checksum_mismatch_err(&expected, md5_checksum,
2142                        rb->scratch_pool,
2143                        _("Checksum mismatch while reading representation")),
2144                    NULL);
2145        }
2146    }
2147
2148  if (rb->off == rb->len && rb->current_fulltext)
2149    {
2150      svn_fs_x__data_t *ffd = rb->fs->fsap_data;
2151      SVN_ERR(svn_cache__set(ffd->fulltext_cache, &rb->fulltext_cache_key,
2152                             rb->current_fulltext, rb->scratch_pool));
2153      rb->current_fulltext = NULL;
2154    }
2155
2156  return SVN_NO_ERROR;
2157}
2158
2159svn_error_t *
2160svn_fs_x__get_contents(svn_stream_t **contents_p,
2161                       svn_fs_t *fs,
2162                       svn_fs_x__representation_t *rep,
2163                       svn_boolean_t cache_fulltext,
2164                       apr_pool_t *result_pool)
2165{
2166  if (! rep)
2167    {
2168      *contents_p = svn_stream_empty(result_pool);
2169    }
2170  else
2171    {
2172      svn_fs_x__data_t *ffd = fs->fsap_data;
2173      svn_filesize_t len = rep->expanded_size;
2174      rep_read_baton_t *rb;
2175      svn_revnum_t revision = svn_fs_x__get_revnum(rep->id.change_set);
2176
2177      svn_fs_x__pair_cache_key_t fulltext_cache_key = { 0 };
2178      fulltext_cache_key.revision = revision;
2179      fulltext_cache_key.second = rep->id.number;
2180
2181      /* Initialize the reader baton.  Some members may added lazily
2182       * while reading from the stream */
2183      SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key,
2184                                 result_pool));
2185
2186      /* Make the stream attempt fulltext cache lookups if the fulltext
2187       * is cacheable.  If it is not, then also don't try to buffer and
2188       * cache it. */
2189      if (ffd->fulltext_cache && cache_fulltext
2190          && SVN_IS_VALID_REVNUM(revision)
2191          && fulltext_size_is_cachable(ffd, len))
2192        {
2193          rb->fulltext_cache = ffd->fulltext_cache;
2194        }
2195      else
2196        {
2197          /* This will also prevent the reconstructed fulltext from being
2198             put into the cache. */
2199          rb->fulltext_cache_key.revision = SVN_INVALID_REVNUM;
2200        }
2201
2202      *contents_p = svn_stream_create(rb, result_pool);
2203      svn_stream_set_read2(*contents_p, NULL /* only full read support */,
2204                           rep_read_contents);
2205      svn_stream_set_close(*contents_p, rep_read_contents_close);
2206    }
2207
2208  return SVN_NO_ERROR;
2209}
2210
2211
2212/* Baton for cache_access_wrapper. Wraps the original parameters of
2213 * svn_fs_x__try_process_file_content().
2214 */
2215typedef struct cache_access_wrapper_baton_t
2216{
2217  svn_fs_process_contents_func_t func;
2218  void* baton;
2219} cache_access_wrapper_baton_t;
2220
2221/* Wrapper to translate between svn_fs_process_contents_func_t and
2222 * svn_cache__partial_getter_func_t.
2223 */
2224static svn_error_t *
2225cache_access_wrapper(void **out,
2226                     const void *data,
2227                     apr_size_t data_len,
2228                     void *baton,
2229                     apr_pool_t *pool)
2230{
2231  cache_access_wrapper_baton_t *wrapper_baton = baton;
2232
2233  SVN_ERR(wrapper_baton->func((const unsigned char *)data,
2234                              data_len - 1, /* cache adds terminating 0 */
2235                              wrapper_baton->baton,
2236                              pool));
2237
2238  /* non-NULL value to signal the calling cache that all went well */
2239  *out = baton;
2240
2241  return SVN_NO_ERROR;
2242}
2243
2244svn_error_t *
2245svn_fs_x__try_process_file_contents(svn_boolean_t *success,
2246                                    svn_fs_t *fs,
2247                                    svn_fs_x__noderev_t *noderev,
2248                                    svn_fs_process_contents_func_t processor,
2249                                    void* baton,
2250                                    apr_pool_t *scratch_pool)
2251{
2252  svn_fs_x__representation_t *rep = noderev->data_rep;
2253  if (rep)
2254    {
2255      svn_fs_x__data_t *ffd = fs->fsap_data;
2256      svn_fs_x__pair_cache_key_t fulltext_cache_key = { 0 };
2257
2258      fulltext_cache_key.revision = svn_fs_x__get_revnum(rep->id.change_set);
2259      fulltext_cache_key.second = rep->id.number;
2260      if (ffd->fulltext_cache
2261          && SVN_IS_VALID_REVNUM(fulltext_cache_key.revision)
2262          && fulltext_size_is_cachable(ffd, rep->expanded_size))
2263        {
2264          cache_access_wrapper_baton_t wrapper_baton;
2265          void *dummy = NULL;
2266
2267          wrapper_baton.func = processor;
2268          wrapper_baton.baton = baton;
2269          return svn_cache__get_partial(&dummy, success,
2270                                        ffd->fulltext_cache,
2271                                        &fulltext_cache_key,
2272                                        cache_access_wrapper,
2273                                        &wrapper_baton,
2274                                        scratch_pool);
2275        }
2276    }
2277
2278  *success = FALSE;
2279  return SVN_NO_ERROR;
2280}
2281
2282/* Baton used when reading delta windows. */
2283typedef struct delta_read_baton_t
2284{
2285  struct rep_state_t *rs;
2286  unsigned char md5_digest[APR_MD5_DIGESTSIZE];
2287} delta_read_baton_t;
2288
2289/* This implements the svn_txdelta_next_window_fn_t interface. */
2290static svn_error_t *
2291delta_read_next_window(svn_txdelta_window_t **window,
2292                       void *baton,
2293                       apr_pool_t *pool)
2294{
2295  delta_read_baton_t *drb = baton;
2296  apr_pool_t *scratch_pool = svn_pool_create(pool);
2297
2298  *window = NULL;
2299  if (drb->rs->current < drb->rs->size)
2300    {
2301      SVN_ERR(read_delta_window(window, drb->rs->chunk_index, drb->rs, pool,
2302                                scratch_pool));
2303      drb->rs->chunk_index++;
2304    }
2305
2306  svn_pool_destroy(scratch_pool);
2307
2308  return SVN_NO_ERROR;
2309}
2310
2311/* This implements the svn_txdelta_md5_digest_fn_t interface. */
2312static const unsigned char *
2313delta_read_md5_digest(void *baton)
2314{
2315  delta_read_baton_t *drb = baton;
2316  return drb->md5_digest;
2317}
2318
2319/* Return a txdelta stream for on-disk representation REP_STATE
2320 * of TARGET.  Allocate the result in RESULT_POOL.
2321 */
2322static svn_txdelta_stream_t *
2323get_storaged_delta_stream(rep_state_t *rep_state,
2324                          svn_fs_x__noderev_t *target,
2325                          apr_pool_t *result_pool)
2326{
2327  /* Create the delta read baton. */
2328  delta_read_baton_t *drb = apr_pcalloc(result_pool, sizeof(*drb));
2329  drb->rs = rep_state;
2330  memcpy(drb->md5_digest, target->data_rep->md5_digest,
2331         sizeof(drb->md5_digest));
2332  return svn_txdelta_stream_create(drb, delta_read_next_window,
2333                                   delta_read_md5_digest, result_pool);
2334}
2335
2336svn_error_t *
2337svn_fs_x__get_file_delta_stream(svn_txdelta_stream_t **stream_p,
2338                                svn_fs_t *fs,
2339                                svn_fs_x__noderev_t *source,
2340                                svn_fs_x__noderev_t *target,
2341                                apr_pool_t *result_pool,
2342                                apr_pool_t *scratch_pool)
2343{
2344  svn_stream_t *source_stream, *target_stream;
2345  rep_state_t *rep_state;
2346  svn_fs_x__rep_header_t *rep_header;
2347  svn_fs_x__data_t *ffd = fs->fsap_data;
2348
2349  /* Try a shortcut: if the target is stored as a delta against the source,
2350     then just use that delta.  However, prefer using the fulltext cache
2351     whenever that is available. */
2352  if (target->data_rep && (source || !ffd->fulltext_cache))
2353    {
2354      /* Read target's base rep if any. */
2355      SVN_ERR(create_rep_state(&rep_state, &rep_header, NULL,
2356                               target->data_rep, fs, result_pool,
2357                               scratch_pool));
2358
2359      /* Try a shortcut: if the target is stored as a delta against the source,
2360         then just use that delta. */
2361      if (source && source->data_rep && target->data_rep)
2362        {
2363          /* If that matches source, then use this delta as is.
2364             Note that we want an actual delta here.  E.g. a self-delta would
2365             not be good enough. */
2366          if (rep_header->type == svn_fs_x__rep_delta
2367              && rep_header->base_revision
2368                 == svn_fs_x__get_revnum(source->data_rep->id.change_set)
2369              && rep_header->base_item_index == source->data_rep->id.number)
2370            {
2371              *stream_p = get_storaged_delta_stream(rep_state, target,
2372                                                    result_pool);
2373              return SVN_NO_ERROR;
2374            }
2375        }
2376      else if (!source)
2377        {
2378          /* We want a self-delta. There is a fair chance that TARGET got
2379             added in this revision and is already stored in the requested
2380             format. */
2381          if (rep_header->type == svn_fs_x__rep_self_delta)
2382            {
2383              *stream_p = get_storaged_delta_stream(rep_state, target,
2384                                                    result_pool);
2385              return SVN_NO_ERROR;
2386            }
2387        }
2388
2389      /* Don't keep file handles open for longer than necessary. */
2390      if (rep_state->sfile->rfile)
2391        {
2392          SVN_ERR(svn_fs_x__close_revision_file(rep_state->sfile->rfile));
2393          rep_state->sfile->rfile = NULL;
2394        }
2395    }
2396
2397  /* Read both fulltexts and construct a delta. */
2398  if (source)
2399    SVN_ERR(svn_fs_x__get_contents(&source_stream, fs, source->data_rep,
2400                                   TRUE, result_pool));
2401  else
2402    source_stream = svn_stream_empty(result_pool);
2403
2404  SVN_ERR(svn_fs_x__get_contents(&target_stream, fs, target->data_rep,
2405                                 TRUE, result_pool));
2406
2407  /* Because source and target stream will already verify their content,
2408   * there is no need to do this once more.  In particular if the stream
2409   * content is being fetched from cache. */
2410  svn_txdelta2(stream_p, source_stream, target_stream, FALSE, result_pool);
2411
2412  return SVN_NO_ERROR;
2413}
2414
2415/* Return TRUE when all svn_fs_x__dirent_t* in ENTRIES are already sorted
2416   by their respective name. */
2417static svn_boolean_t
2418sorted(apr_array_header_t *entries)
2419{
2420  int i;
2421
2422  const svn_fs_x__dirent_t * const *dirents = (const void *)entries->elts;
2423  for (i = 0; i < entries->nelts-1; ++i)
2424    if (strcmp(dirents[i]->name, dirents[i+1]->name) > 0)
2425      return FALSE;
2426
2427  return TRUE;
2428}
2429
2430/* Compare the names of the two dirents given in **A and **B. */
2431static int
2432compare_dirents(const void *a,
2433                const void *b)
2434{
2435  const svn_fs_x__dirent_t *lhs = *((const svn_fs_x__dirent_t * const *) a);
2436  const svn_fs_x__dirent_t *rhs = *((const svn_fs_x__dirent_t * const *) b);
2437
2438  return strcmp(lhs->name, rhs->name);
2439}
2440
2441/* Compare the name of the dirents given in **A with the C string in *B. */
2442static int
2443compare_dirent_name(const void *a,
2444                    const void *b)
2445{
2446  const svn_fs_x__dirent_t *lhs = *((const svn_fs_x__dirent_t * const *) a);
2447  const char *rhs = b;
2448
2449  return strcmp(lhs->name, rhs);
2450}
2451
2452/* Into ENTRIES, read all directories entries from the key-value text in
2453 * STREAM.  If INCREMENTAL is TRUE, read until the end of the STREAM and
2454 * update the data.  ID is provided for nicer error messages.
2455 */
2456static svn_error_t *
2457read_dir_entries(apr_array_header_t *entries,
2458                 svn_stream_t *stream,
2459                 svn_boolean_t incremental,
2460                 const svn_fs_x__id_t *id,
2461                 apr_pool_t *result_pool,
2462                 apr_pool_t *scratch_pool)
2463{
2464  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
2465  apr_hash_t *hash = incremental ? svn_hash__make(scratch_pool) : NULL;
2466  const char *terminator = SVN_HASH_TERMINATOR;
2467
2468  /* Read until the terminator (non-incremental) or the end of STREAM
2469     (incremental mode).  In the latter mode, we use a temporary HASH
2470     to make updating and removing entries cheaper. */
2471  while (1)
2472    {
2473      svn_hash__entry_t entry;
2474      svn_fs_x__dirent_t *dirent;
2475      char *str;
2476
2477      svn_pool_clear(iterpool);
2478      SVN_ERR(svn_hash__read_entry(&entry, stream, terminator,
2479                                   incremental, iterpool));
2480
2481      /* End of directory? */
2482      if (entry.key == NULL)
2483        {
2484          /* In incremental mode, we skip the terminator and read the
2485             increments following it until the end of the stream. */
2486          if (incremental && terminator)
2487            terminator = NULL;
2488          else
2489            break;
2490        }
2491
2492      /* Deleted entry? */
2493      if (entry.val == NULL)
2494        {
2495          /* We must be in incremental mode */
2496          assert(hash);
2497          apr_hash_set(hash, entry.key, entry.keylen, NULL);
2498          continue;
2499        }
2500
2501      /* Add a new directory entry. */
2502      dirent = apr_pcalloc(result_pool, sizeof(*dirent));
2503      dirent->name = apr_pstrmemdup(result_pool, entry.key, entry.keylen);
2504
2505      str = svn_cstring_tokenize(" ", &entry.val);
2506      if (str == NULL)
2507        return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2508                      _("Directory entry corrupt in '%s'"),
2509                      svn_fs_x__id_unparse(id, scratch_pool)->data);
2510
2511      if (strcmp(str, SVN_FS_X__KIND_FILE) == 0)
2512        {
2513          dirent->kind = svn_node_file;
2514        }
2515      else if (strcmp(str, SVN_FS_X__KIND_DIR) == 0)
2516        {
2517          dirent->kind = svn_node_dir;
2518        }
2519      else
2520        {
2521          return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2522                      _("Directory entry corrupt in '%s'"),
2523                      svn_fs_x__id_unparse(id, scratch_pool)->data);
2524        }
2525
2526      str = svn_cstring_tokenize(" ", &entry.val);
2527      if (str == NULL)
2528        return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2529                      _("Directory entry corrupt in '%s'"),
2530                      svn_fs_x__id_unparse(id, scratch_pool)->data);
2531
2532      SVN_ERR(svn_fs_x__id_parse(&dirent->id, str));
2533
2534      /* In incremental mode, update the hash; otherwise, write to the
2535       * final array. */
2536      if (incremental)
2537        apr_hash_set(hash, dirent->name, entry.keylen, dirent);
2538      else
2539        APR_ARRAY_PUSH(entries, svn_fs_x__dirent_t *) = dirent;
2540    }
2541
2542  /* Convert container to a sorted array. */
2543  if (incremental)
2544    {
2545      apr_hash_index_t *hi;
2546      for (hi = apr_hash_first(iterpool, hash); hi; hi = apr_hash_next(hi))
2547        APR_ARRAY_PUSH(entries, svn_fs_x__dirent_t *) = apr_hash_this_val(hi);
2548    }
2549
2550  if (!sorted(entries))
2551    svn_sort__array(entries, compare_dirents);
2552
2553  svn_pool_destroy(iterpool);
2554
2555  return SVN_NO_ERROR;
2556}
2557
2558/* Fetch the contents of a directory into ENTRIES.  Values are stored
2559   as filename to string mappings; further conversion is necessary to
2560   convert them into svn_fs_x__dirent_t values. */
2561static svn_error_t *
2562get_dir_contents(apr_array_header_t **entries,
2563                 svn_fs_t *fs,
2564                 svn_fs_x__noderev_t *noderev,
2565                 apr_pool_t *result_pool,
2566                 apr_pool_t *scratch_pool)
2567{
2568  svn_stream_t *contents;
2569  const svn_fs_x__id_t *id = &noderev->noderev_id;
2570
2571  *entries = apr_array_make(result_pool, 16, sizeof(svn_fs_x__dirent_t *));
2572  if (noderev->data_rep
2573      && ! svn_fs_x__is_revision(noderev->data_rep->id.change_set))
2574    {
2575      const char *filename
2576        = svn_fs_x__path_txn_node_children(fs, id, scratch_pool,
2577                                           scratch_pool);
2578
2579      /* The representation is mutable.  Read the old directory
2580         contents from the mutable children file, followed by the
2581         changes we've made in this transaction. */
2582      SVN_ERR(svn_stream_open_readonly(&contents, filename, scratch_pool,
2583                                       scratch_pool));
2584      SVN_ERR(read_dir_entries(*entries, contents, TRUE,  id,
2585                               result_pool, scratch_pool));
2586      SVN_ERR(svn_stream_close(contents));
2587    }
2588  else if (noderev->data_rep)
2589    {
2590      /* Undeltify content before parsing it. Otherwise, we could only
2591       * parse it byte-by-byte.
2592       */
2593      apr_size_t len = noderev->data_rep->expanded_size;
2594      svn_stringbuf_t *text;
2595
2596      /* The representation is immutable.  Read it normally. */
2597      SVN_ERR(svn_fs_x__get_contents(&contents, fs, noderev->data_rep,
2598                                     FALSE, scratch_pool));
2599      SVN_ERR(svn_stringbuf_from_stream(&text, contents, len, scratch_pool));
2600      SVN_ERR(svn_stream_close(contents));
2601
2602      /* de-serialize hash */
2603      contents = svn_stream_from_stringbuf(text, scratch_pool);
2604      SVN_ERR(read_dir_entries(*entries, contents, FALSE,  id,
2605                               result_pool, scratch_pool));
2606    }
2607
2608  return SVN_NO_ERROR;
2609}
2610
2611
2612/* Return the cache object in FS responsible to storing the directory the
2613 * NODEREV plus the corresponding pre-allocated *KEY.
2614 */
2615static svn_cache__t *
2616locate_dir_cache(svn_fs_t *fs,
2617                 svn_fs_x__id_t *key,
2618                 svn_fs_x__noderev_t *noderev)
2619{
2620  svn_fs_x__data_t *ffd = fs->fsap_data;
2621  if (svn_fs_x__is_txn(noderev->noderev_id.change_set))
2622    {
2623      /* data in txns must be addressed by ID since the representation has
2624         not been created, yet. */
2625      *key = noderev->noderev_id;
2626    }
2627  else
2628    {
2629      /* committed data can use simple rev,item pairs */
2630      if (noderev->data_rep)
2631        {
2632          *key = noderev->data_rep->id;
2633        }
2634      else
2635        {
2636          /* no data rep -> empty directory.
2637             Use a key that does definitely not clash with non-NULL reps. */
2638          key->change_set = SVN_FS_X__INVALID_CHANGE_SET;
2639          key->number = SVN_FS_X__ITEM_INDEX_UNUSED;
2640        }
2641    }
2642
2643  return ffd->dir_cache;
2644}
2645
2646svn_error_t *
2647svn_fs_x__rep_contents_dir(apr_array_header_t **entries_p,
2648                           svn_fs_t *fs,
2649                           svn_fs_x__noderev_t *noderev,
2650                           apr_pool_t *result_pool,
2651                           apr_pool_t *scratch_pool)
2652{
2653  svn_fs_x__id_t key;
2654
2655  /* find the cache we may use */
2656  svn_cache__t *cache = locate_dir_cache(fs, &key, noderev);
2657  if (cache)
2658    {
2659      svn_boolean_t found;
2660
2661      SVN_ERR(svn_cache__get((void **)entries_p, &found, cache, &key,
2662                             result_pool));
2663      if (found)
2664        return SVN_NO_ERROR;
2665    }
2666
2667  /* Read in the directory contents. */
2668  SVN_ERR(get_dir_contents(entries_p, fs, noderev, result_pool,
2669                           scratch_pool));
2670
2671  /* Update the cache, if we are to use one. */
2672  if (cache)
2673    SVN_ERR(svn_cache__set(cache, &key, *entries_p, scratch_pool));
2674
2675  return SVN_NO_ERROR;
2676}
2677
2678svn_fs_x__dirent_t *
2679svn_fs_x__find_dir_entry(apr_array_header_t *entries,
2680                         const char *name,
2681                         int *hint)
2682{
2683  svn_fs_x__dirent_t **result
2684    = svn_sort__array_lookup(entries, name, hint, compare_dirent_name);
2685  return result ? *result : NULL;
2686}
2687
2688svn_error_t *
2689svn_fs_x__rep_contents_dir_entry(svn_fs_x__dirent_t **dirent,
2690                                 svn_fs_t *fs,
2691                                 svn_fs_x__noderev_t *noderev,
2692                                 const char *name,
2693                                 apr_size_t *hint,
2694                                 apr_pool_t *result_pool,
2695                                 apr_pool_t *scratch_pool)
2696{
2697  svn_boolean_t found = FALSE;
2698
2699  /* find the cache we may use */
2700  svn_fs_x__id_t key;
2701  svn_cache__t *cache = locate_dir_cache(fs, &key, noderev);
2702  if (cache)
2703    {
2704      svn_fs_x__ede_baton_t baton;
2705      baton.hint = *hint;
2706      baton.name = name;
2707
2708      /* Cache lookup. */
2709      SVN_ERR(svn_cache__get_partial((void **)dirent,
2710                                     &found,
2711                                     cache,
2712                                     &key,
2713                                     svn_fs_x__extract_dir_entry,
2714                                     &baton,
2715                                     result_pool));
2716
2717      /* Remember the new clue only if we found something at that spot. */
2718      if (found)
2719        *hint = baton.hint;
2720    }
2721
2722  /* fetch data from disk if we did not find it in the cache */
2723  if (! found)
2724    {
2725      apr_array_header_t *entries;
2726      svn_fs_x__dirent_t *entry;
2727      svn_fs_x__dirent_t *entry_copy = NULL;
2728
2729      /* read the dir from the file system. It will probably be put it
2730         into the cache for faster lookup in future calls. */
2731      SVN_ERR(svn_fs_x__rep_contents_dir(&entries, fs, noderev,
2732                                         scratch_pool, scratch_pool));
2733
2734      /* find desired entry and return a copy in POOL, if found */
2735      entry = svn_fs_x__find_dir_entry(entries, name, NULL);
2736      if (entry)
2737        {
2738          entry_copy = apr_pmemdup(result_pool, entry, sizeof(*entry_copy));
2739          entry_copy->name = apr_pstrdup(result_pool, entry->name);
2740        }
2741
2742      *dirent = entry_copy;
2743    }
2744
2745  return SVN_NO_ERROR;
2746}
2747
2748svn_error_t *
2749svn_fs_x__get_proplist(apr_hash_t **proplist_p,
2750                       svn_fs_t *fs,
2751                       svn_fs_x__noderev_t *noderev,
2752                       apr_pool_t *result_pool,
2753                       apr_pool_t *scratch_pool)
2754{
2755  apr_hash_t *proplist;
2756  svn_stream_t *stream;
2757  const svn_fs_x__id_t *noderev_id = &noderev->noderev_id;
2758
2759  if (noderev->prop_rep
2760      && !svn_fs_x__is_revision(noderev->prop_rep->id.change_set))
2761    {
2762      const char *filename = svn_fs_x__path_txn_node_props(fs, noderev_id,
2763                                                           scratch_pool,
2764                                                           scratch_pool);
2765      proplist = apr_hash_make(result_pool);
2766
2767      SVN_ERR(svn_stream_open_readonly(&stream, filename, scratch_pool,
2768                                       scratch_pool));
2769      SVN_ERR(svn_hash_read2(proplist, stream, SVN_HASH_TERMINATOR,
2770                             result_pool));
2771      SVN_ERR(svn_stream_close(stream));
2772    }
2773  else if (noderev->prop_rep)
2774    {
2775      svn_fs_x__data_t *ffd = fs->fsap_data;
2776      svn_fs_x__representation_t *rep = noderev->prop_rep;
2777      svn_fs_x__pair_cache_key_t key = { 0 };
2778
2779      key.revision = svn_fs_x__get_revnum(rep->id.change_set);
2780      key.second = rep->id.number;
2781      if (ffd->properties_cache && SVN_IS_VALID_REVNUM(key.revision))
2782        {
2783          svn_boolean_t is_cached;
2784          SVN_ERR(svn_cache__get((void **) proplist_p, &is_cached,
2785                                 ffd->properties_cache, &key, result_pool));
2786          if (is_cached)
2787            return SVN_NO_ERROR;
2788        }
2789
2790      proplist = apr_hash_make(result_pool);
2791      SVN_ERR(svn_fs_x__get_contents(&stream, fs, noderev->prop_rep, FALSE,
2792                                     scratch_pool));
2793      SVN_ERR(svn_hash_read2(proplist, stream, SVN_HASH_TERMINATOR,
2794                             result_pool));
2795      SVN_ERR(svn_stream_close(stream));
2796
2797      if (ffd->properties_cache && SVN_IS_VALID_REVNUM(rep->id.change_set))
2798        SVN_ERR(svn_cache__set(ffd->properties_cache, &key, proplist,
2799                               scratch_pool));
2800    }
2801  else
2802    {
2803      /* return an empty prop list if the node doesn't have any props */
2804      proplist = apr_hash_make(result_pool);
2805    }
2806
2807  *proplist_p = proplist;
2808
2809  return SVN_NO_ERROR;
2810}
2811
2812
2813
2814svn_error_t *
2815svn_fs_x__get_changes(apr_array_header_t **changes,
2816                      svn_fs_t *fs,
2817                      svn_revnum_t rev,
2818                      apr_pool_t *result_pool)
2819{
2820  svn_fs_x__revision_file_t *revision_file;
2821  svn_boolean_t found;
2822  svn_fs_x__data_t *ffd = fs->fsap_data;
2823  apr_pool_t *scratch_pool = svn_pool_create(result_pool);
2824
2825  svn_fs_x__id_t id;
2826  id.change_set = svn_fs_x__change_set_by_rev(rev);
2827  id.number = SVN_FS_X__ITEM_INDEX_CHANGES;
2828
2829  /* Provide revision file. */
2830
2831  SVN_ERR(svn_fs_x__ensure_revision_exists(rev, fs, scratch_pool));
2832  SVN_ERR(svn_fs_x__open_pack_or_rev_file(&revision_file, fs, rev,
2833                                          scratch_pool, scratch_pool));
2834
2835  /* try cache lookup first */
2836
2837  if (ffd->changes_container_cache && svn_fs_x__is_packed_rev(fs, rev))
2838    {
2839      apr_off_t offset;
2840      apr_uint32_t sub_item;
2841      svn_fs_x__pair_cache_key_t key;
2842
2843      SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, revision_file,
2844                                    &id, scratch_pool));
2845      key.revision = svn_fs_x__packed_base_rev(fs, rev);
2846      key.second = offset;
2847
2848      SVN_ERR(svn_cache__get_partial((void **)changes, &found,
2849                                     ffd->changes_container_cache, &key,
2850                                     svn_fs_x__changes_get_list_func,
2851                                     &sub_item, result_pool));
2852    }
2853  else if (ffd->changes_cache)
2854    {
2855      SVN_ERR(svn_cache__get((void **) changes, &found, ffd->changes_cache,
2856                             &rev, result_pool));
2857    }
2858  else
2859    {
2860      found = FALSE;
2861    }
2862
2863  if (!found)
2864    {
2865      /* 'block-read' will also provide us with the desired data */
2866      SVN_ERR(block_read((void **)changes, fs, &id, revision_file,
2867                         result_pool, scratch_pool));
2868
2869      SVN_ERR(svn_fs_x__close_revision_file(revision_file));
2870    }
2871
2872  SVN_ERR(dgb__log_access(fs, &id, *changes, SVN_FS_X__ITEM_TYPE_CHANGES,
2873                          scratch_pool));
2874
2875  svn_pool_destroy(scratch_pool);
2876  return SVN_NO_ERROR;
2877}
2878
2879/* Fetch the representation data (header, txdelta / plain windows)
2880 * addressed by ENTRY->ITEM in FS and cache it if caches are enabled.
2881 * Read the data from the already open FILE and the wrapping
2882 * STREAM object.  If MAX_OFFSET is not -1, don't read windows that start
2883 * at or beyond that offset.  Use SCRATCH_POOL for temporary allocations.
2884 */
2885static svn_error_t *
2886block_read_contents(svn_fs_t *fs,
2887                    svn_fs_x__revision_file_t *rev_file,
2888                    svn_fs_x__p2l_entry_t* entry,
2889                    svn_fs_x__pair_cache_key_t *key,
2890                    apr_off_t max_offset,
2891                    apr_pool_t *scratch_pool)
2892{
2893  svn_fs_x__data_t *ffd = fs->fsap_data;
2894  svn_fs_x__representation_cache_key_t header_key = { 0 };
2895  rep_state_t rs = { 0 };
2896  svn_filesize_t fulltext_len;
2897  svn_fs_x__rep_header_t *rep_header;
2898
2899  if (!ffd->txdelta_window_cache || !ffd->combined_window_cache)
2900    return SVN_NO_ERROR;
2901
2902  header_key.revision = (apr_int32_t)key->revision;
2903  header_key.is_packed = svn_fs_x__is_packed_rev(fs, header_key.revision);
2904  header_key.item_index = key->second;
2905
2906  SVN_ERR(read_rep_header(&rep_header, fs, rev_file->stream, &header_key,
2907                          scratch_pool));
2908  SVN_ERR(init_rep_state(&rs, rep_header, fs, rev_file, entry, scratch_pool));
2909  SVN_ERR(cache_windows(&fulltext_len, fs, &rs, max_offset, scratch_pool));
2910
2911  return SVN_NO_ERROR;
2912}
2913
2914/* For the given REV_FILE in FS, in *STREAM return a stream covering the
2915 * item specified by ENTRY.  Also, verify the item's content by low-level
2916 * checksum.  Allocate the result in POOL.
2917 */
2918static svn_error_t *
2919read_item(svn_stream_t **stream,
2920          svn_fs_t *fs,
2921          svn_fs_x__revision_file_t *rev_file,
2922          svn_fs_x__p2l_entry_t* entry,
2923          apr_pool_t *pool)
2924{
2925  apr_uint32_t digest;
2926  svn_checksum_t *expected, *actual;
2927  apr_uint32_t plain_digest;
2928
2929  /* Read item into string buffer. */
2930  svn_stringbuf_t *text = svn_stringbuf_create_ensure(entry->size, pool);
2931  text->len = entry->size;
2932  text->data[text->len] = 0;
2933  SVN_ERR(svn_io_file_read_full2(rev_file->file, text->data, text->len,
2934                                 NULL, NULL, pool));
2935
2936  /* Return (construct, calculate) stream and checksum. */
2937  *stream = svn_stream_from_stringbuf(text, pool);
2938  digest = svn__fnv1a_32x4(text->data, text->len);
2939
2940  /* Checksums will match most of the time. */
2941  if (entry->fnv1_checksum == digest)
2942    return SVN_NO_ERROR;
2943
2944  /* Construct proper checksum objects from their digests to allow for
2945   * nice error messages. */
2946  plain_digest = htonl(entry->fnv1_checksum);
2947  expected = svn_checksum__from_digest_fnv1a_32x4(
2948                (const unsigned char *)&plain_digest, pool);
2949  plain_digest = htonl(digest);
2950  actual = svn_checksum__from_digest_fnv1a_32x4(
2951                (const unsigned char *)&plain_digest, pool);
2952
2953  /* Construct the full error message with all the info we have. */
2954  return svn_checksum_mismatch_err(expected, actual, pool,
2955                 _("Low-level checksum mismatch while reading\n"
2956                   "%s bytes of meta data at offset %s "),
2957                 apr_psprintf(pool, "%" APR_OFF_T_FMT, entry->size),
2958                 apr_psprintf(pool, "%" APR_OFF_T_FMT, entry->offset));
2959}
2960
2961/* Read all txdelta / plain windows following REP_HEADER in FS as described
2962 * by ENTRY.  Read the data from the already open FILE and the wrapping
2963 * STREAM object.  If MAX_OFFSET is not -1, don't read windows that start
2964 * at or beyond that offset.  Use SCRATCH_POOL for temporary allocations.
2965 * If caching is not enabled, this is a no-op.
2966 */
2967static svn_error_t *
2968block_read_changes(apr_array_header_t **changes,
2969                   svn_fs_t *fs,
2970                   svn_fs_x__revision_file_t *rev_file,
2971                   svn_fs_x__p2l_entry_t* entry,
2972                   svn_boolean_t must_read,
2973                   apr_pool_t *result_pool,
2974                   apr_pool_t *scratch_pool)
2975{
2976  svn_fs_x__data_t *ffd = fs->fsap_data;
2977  svn_stream_t *stream;
2978  svn_revnum_t revision = svn_fs_x__get_revnum(entry->items[0].change_set);
2979  if (!must_read && !ffd->changes_cache)
2980    return SVN_NO_ERROR;
2981
2982  /* we don't support containers, yet */
2983  SVN_ERR_ASSERT(entry->item_count == 1);
2984
2985  /* already in cache? */
2986  if (!must_read && ffd->changes_cache)
2987    {
2988      svn_boolean_t is_cached = FALSE;
2989      SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_cache, &revision,
2990                                 scratch_pool));
2991      if (is_cached)
2992        return SVN_NO_ERROR;
2993    }
2994
2995  SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
2996
2997  /* read changes from revision file */
2998
2999  SVN_ERR(svn_fs_x__read_changes(changes, stream, result_pool, scratch_pool));
3000
3001  /* cache for future reference */
3002
3003  if (ffd->changes_cache)
3004    {
3005      /* Guesstimate for the size of the in-cache representation. */
3006      apr_size_t estimated_size = (apr_size_t)250 * (*changes)->nelts;
3007
3008      /* Don't even serialize data that probably won't fit into the
3009        * cache.  This often implies that either CHANGES is very
3010        * large, memory is scarce or both.  Having a huge temporary
3011        * copy would not be a good thing in either case. */
3012      if (svn_cache__is_cachable(ffd->changes_cache, estimated_size))
3013        SVN_ERR(svn_cache__set(ffd->changes_cache, &revision, *changes,
3014                               scratch_pool));
3015    }
3016
3017  return SVN_NO_ERROR;
3018}
3019
3020static svn_error_t *
3021block_read_changes_container(apr_array_header_t **changes,
3022                             svn_fs_t *fs,
3023                             svn_fs_x__revision_file_t *rev_file,
3024                             svn_fs_x__p2l_entry_t* entry,
3025                             apr_uint32_t sub_item,
3026                             svn_boolean_t must_read,
3027                             apr_pool_t *result_pool,
3028                             apr_pool_t *scratch_pool)
3029{
3030  svn_fs_x__data_t *ffd = fs->fsap_data;
3031  svn_fs_x__changes_t *container;
3032  svn_fs_x__pair_cache_key_t key;
3033  svn_stream_t *stream;
3034  svn_revnum_t revision = svn_fs_x__get_revnum(entry->items[0].change_set);
3035
3036  key.revision = svn_fs_x__packed_base_rev(fs, revision);
3037  key.second = entry->offset;
3038
3039  /* already in cache? */
3040  if (!must_read && ffd->changes_container_cache)
3041    {
3042      svn_boolean_t is_cached = FALSE;
3043      SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_container_cache,
3044                                 &key, scratch_pool));
3045      if (is_cached)
3046        return SVN_NO_ERROR;
3047    }
3048
3049  SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3050
3051  /* read changes from revision file */
3052
3053  SVN_ERR(svn_fs_x__read_changes_container(&container, stream, scratch_pool,
3054                                           scratch_pool));
3055
3056  /* extract requested data */
3057
3058  if (must_read)
3059    SVN_ERR(svn_fs_x__changes_get_list(changes, container, sub_item,
3060                                       result_pool));
3061
3062  if (ffd->changes_container_cache)
3063    SVN_ERR(svn_cache__set(ffd->changes_container_cache, &key, container,
3064                           scratch_pool));
3065
3066  return SVN_NO_ERROR;
3067}
3068
3069static svn_error_t *
3070block_read_noderev(svn_fs_x__noderev_t **noderev_p,
3071                   svn_fs_t *fs,
3072                   svn_fs_x__revision_file_t *rev_file,
3073                   svn_fs_x__p2l_entry_t* entry,
3074                   svn_fs_x__pair_cache_key_t *key,
3075                   svn_boolean_t must_read,
3076                   apr_pool_t *result_pool,
3077                   apr_pool_t *scratch_pool)
3078{
3079  svn_fs_x__data_t *ffd = fs->fsap_data;
3080  svn_stream_t *stream;
3081  if (!must_read && !ffd->node_revision_cache)
3082    return SVN_NO_ERROR;
3083
3084  /* we don't support containers, yet */
3085  SVN_ERR_ASSERT(entry->item_count == 1);
3086
3087  /* already in cache? */
3088  if (!must_read && ffd->node_revision_cache)
3089    {
3090      svn_boolean_t is_cached = FALSE;
3091      SVN_ERR(svn_cache__has_key(&is_cached, ffd->node_revision_cache, key,
3092                                 scratch_pool));
3093      if (is_cached)
3094        return SVN_NO_ERROR;
3095    }
3096
3097  SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3098
3099  /* read node rev from revision file */
3100
3101  SVN_ERR(svn_fs_x__read_noderev(noderev_p, stream, result_pool,
3102                                 scratch_pool));
3103  if (ffd->node_revision_cache)
3104    SVN_ERR(svn_cache__set(ffd->node_revision_cache, key, *noderev_p,
3105                           scratch_pool));
3106
3107  return SVN_NO_ERROR;
3108}
3109
3110static svn_error_t *
3111block_read_noderevs_container(svn_fs_x__noderev_t **noderev_p,
3112                              svn_fs_t *fs,
3113                              svn_fs_x__revision_file_t *rev_file,
3114                              svn_fs_x__p2l_entry_t* entry,
3115                              apr_uint32_t sub_item,
3116                              svn_boolean_t must_read,
3117                              apr_pool_t *result_pool,
3118                              apr_pool_t *scratch_pool)
3119{
3120  svn_fs_x__data_t *ffd = fs->fsap_data;
3121  svn_fs_x__noderevs_t *container;
3122  svn_stream_t *stream;
3123  svn_fs_x__pair_cache_key_t key;
3124  svn_revnum_t revision = svn_fs_x__get_revnum(entry->items[0].change_set);
3125
3126  key.revision = svn_fs_x__packed_base_rev(fs, revision);
3127  key.second = entry->offset;
3128
3129  /* already in cache? */
3130  if (!must_read && ffd->noderevs_container_cache)
3131    {
3132      svn_boolean_t is_cached = FALSE;
3133      SVN_ERR(svn_cache__has_key(&is_cached, ffd->noderevs_container_cache,
3134                                 &key, scratch_pool));
3135      if (is_cached)
3136        return SVN_NO_ERROR;
3137    }
3138
3139  SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3140
3141  /* read noderevs from revision file */
3142  SVN_ERR(svn_fs_x__read_noderevs_container(&container, stream, scratch_pool,
3143                                            scratch_pool));
3144
3145  /* extract requested data */
3146  if (must_read)
3147    SVN_ERR(svn_fs_x__noderevs_get(noderev_p, container, sub_item,
3148                                   result_pool));
3149
3150  if (ffd->noderevs_container_cache)
3151    SVN_ERR(svn_cache__set(ffd->noderevs_container_cache, &key, container,
3152                           scratch_pool));
3153
3154  return SVN_NO_ERROR;
3155}
3156
3157static svn_error_t *
3158block_read_reps_container(svn_fs_x__rep_extractor_t **extractor,
3159                          svn_fs_t *fs,
3160                          svn_fs_x__revision_file_t *rev_file,
3161                          svn_fs_x__p2l_entry_t* entry,
3162                          apr_uint32_t sub_item,
3163                          svn_boolean_t must_read,
3164                          apr_pool_t *result_pool,
3165                          apr_pool_t *scratch_pool)
3166{
3167  svn_fs_x__data_t *ffd = fs->fsap_data;
3168  svn_fs_x__reps_t *container;
3169  svn_stream_t *stream;
3170  svn_fs_x__pair_cache_key_t key;
3171  svn_revnum_t revision = svn_fs_x__get_revnum(entry->items[0].change_set);
3172
3173  key.revision = svn_fs_x__packed_base_rev(fs, revision);
3174  key.second = entry->offset;
3175
3176  /* already in cache? */
3177  if (!must_read && ffd->reps_container_cache)
3178    {
3179      svn_boolean_t is_cached = FALSE;
3180      SVN_ERR(svn_cache__has_key(&is_cached, ffd->reps_container_cache,
3181                                 &key, scratch_pool));
3182      if (is_cached)
3183        return SVN_NO_ERROR;
3184    }
3185
3186  SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3187
3188  /* read noderevs from revision file */
3189  SVN_ERR(svn_fs_x__read_reps_container(&container, stream, result_pool,
3190                                        scratch_pool));
3191
3192  /* extract requested data */
3193
3194  if (must_read)
3195    SVN_ERR(svn_fs_x__reps_get(extractor, fs, container, sub_item,
3196                               result_pool));
3197
3198  if (ffd->noderevs_container_cache)
3199    SVN_ERR(svn_cache__set(ffd->reps_container_cache, &key, container,
3200                           scratch_pool));
3201
3202  return SVN_NO_ERROR;
3203}
3204
3205static svn_error_t *
3206block_read(void **result,
3207           svn_fs_t *fs,
3208           const svn_fs_x__id_t *id,
3209           svn_fs_x__revision_file_t *revision_file,
3210           apr_pool_t *result_pool,
3211           apr_pool_t *scratch_pool)
3212{
3213  svn_fs_x__data_t *ffd = fs->fsap_data;
3214  apr_off_t offset, wanted_offset = 0;
3215  apr_off_t block_start = 0;
3216  apr_uint32_t wanted_sub_item = 0;
3217  svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set);
3218  apr_array_header_t *entries;
3219  int run_count = 0;
3220  int i;
3221  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
3222
3223  /* don't try this on transaction protorev files */
3224  SVN_ERR_ASSERT(SVN_IS_VALID_REVNUM(revision));
3225
3226  /* index lookup: find the OFFSET of the item we *must* read plus (in the
3227   * "do-while" block) the list of items in the same block. */
3228  SVN_ERR(svn_fs_x__item_offset(&wanted_offset, &wanted_sub_item, fs,
3229                                revision_file, id, iterpool));
3230
3231  offset = wanted_offset;
3232  do
3233    {
3234      /* fetch list of items in the block surrounding OFFSET */
3235      SVN_ERR(aligned_seek(fs, revision_file->file, &block_start, offset,
3236                           iterpool));
3237      SVN_ERR(svn_fs_x__p2l_index_lookup(&entries, fs, revision_file,
3238                                         revision, block_start,
3239                                         ffd->block_size, scratch_pool,
3240                                         scratch_pool));
3241
3242      /* read all items from the block */
3243      for (i = 0; i < entries->nelts; ++i)
3244        {
3245          svn_boolean_t is_result, is_wanted;
3246          apr_pool_t *pool;
3247
3248          svn_fs_x__p2l_entry_t* entry
3249            = &APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t);
3250
3251          /* skip empty sections */
3252          if (entry->type == SVN_FS_X__ITEM_TYPE_UNUSED)
3253            continue;
3254
3255          /* the item / container we were looking for? */
3256          is_wanted =    entry->offset == wanted_offset
3257                      && entry->item_count >= wanted_sub_item
3258                      && svn_fs_x__id_eq(entry->items + wanted_sub_item, id);
3259          is_result = result && is_wanted;
3260
3261          /* select the pool that we want the item to be allocated in */
3262          pool = is_result ? result_pool : iterpool;
3263
3264          /* handle all items that start within this block and are relatively
3265           * small (i.e. < block size).  Always read the item we need to return.
3266           */
3267          if (is_result || (   entry->offset >= block_start
3268                            && entry->size < ffd->block_size))
3269            {
3270              void *item = NULL;
3271              svn_fs_x__pair_cache_key_t key = { 0 };
3272              key.revision = svn_fs_x__get_revnum(entry->items[0].change_set);
3273              key.second = entry->items[0].number;
3274
3275              SVN_ERR(svn_io_file_seek(revision_file->file, SEEK_SET,
3276                                       &entry->offset, iterpool));
3277              switch (entry->type)
3278                {
3279                  case SVN_FS_X__ITEM_TYPE_FILE_REP:
3280                  case SVN_FS_X__ITEM_TYPE_DIR_REP:
3281                  case SVN_FS_X__ITEM_TYPE_FILE_PROPS:
3282                  case SVN_FS_X__ITEM_TYPE_DIR_PROPS:
3283                    SVN_ERR(block_read_contents(fs, revision_file,
3284                                                entry, &key,
3285                                                is_wanted
3286                                                  ? -1
3287                                                  : block_start + ffd->block_size,
3288                                                iterpool));
3289                    break;
3290
3291                  case SVN_FS_X__ITEM_TYPE_NODEREV:
3292                    if (ffd->node_revision_cache || is_result)
3293                      SVN_ERR(block_read_noderev((svn_fs_x__noderev_t **)&item,
3294                                                 fs, revision_file,
3295                                                 entry, &key, is_result,
3296                                                 pool, iterpool));
3297                    break;
3298
3299                  case SVN_FS_X__ITEM_TYPE_CHANGES:
3300                    SVN_ERR(block_read_changes((apr_array_header_t **)&item,
3301                                               fs, revision_file,
3302                                               entry, is_result,
3303                                               pool, iterpool));
3304                    break;
3305
3306                  case SVN_FS_X__ITEM_TYPE_CHANGES_CONT:
3307                    SVN_ERR(block_read_changes_container
3308                                            ((apr_array_header_t **)&item,
3309                                             fs, revision_file,
3310                                             entry, wanted_sub_item,
3311                                             is_result, pool, iterpool));
3312                    break;
3313
3314                  case SVN_FS_X__ITEM_TYPE_NODEREVS_CONT:
3315                    SVN_ERR(block_read_noderevs_container
3316                                            ((svn_fs_x__noderev_t **)&item,
3317                                             fs, revision_file,
3318                                             entry, wanted_sub_item,
3319                                             is_result, pool, iterpool));
3320                    break;
3321
3322                  case SVN_FS_X__ITEM_TYPE_REPS_CONT:
3323                    SVN_ERR(block_read_reps_container
3324                                      ((svn_fs_x__rep_extractor_t **)&item,
3325                                       fs, revision_file,
3326                                       entry, wanted_sub_item,
3327                                       is_result, pool, iterpool));
3328                    break;
3329
3330                  default:
3331                    break;
3332                }
3333
3334              if (is_result)
3335                *result = item;
3336
3337              /* if we crossed a block boundary, read the remainder of
3338               * the last block as well */
3339              offset = entry->offset + entry->size;
3340              if (offset > block_start + ffd->block_size)
3341                ++run_count;
3342
3343              svn_pool_clear(iterpool);
3344            }
3345        }
3346    }
3347  while(run_count++ == 1); /* can only be true once and only if a block
3348                            * boundary got crossed */
3349
3350  /* if the caller requested a result, we must have provided one by now */
3351  assert(!result || *result);
3352  svn_pool_destroy(iterpool);
3353
3354  return SVN_NO_ERROR;
3355}
3356