cached_data.c revision 298845
1/* cached_data.c --- cached (read) access to FSFS data
2 *
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 */
22
23#include "cached_data.h"
24
25#include <assert.h>
26
27#include "svn_hash.h"
28#include "svn_ctype.h"
29#include "svn_sorts.h"
30#include "private/svn_delta_private.h"
31#include "private/svn_io_private.h"
32#include "private/svn_sorts_private.h"
33#include "private/svn_subr_private.h"
34#include "private/svn_temp_serializer.h"
35
36#include "fs_fs.h"
37#include "id.h"
38#include "index.h"
39#include "low_level.h"
40#include "pack.h"
41#include "util.h"
42#include "temp_serializer.h"
43
44#include "../libsvn_fs/fs-loader.h"
45#include "../libsvn_delta/delta.h"  /* for SVN_DELTA_WINDOW_SIZE */
46
47#include "svn_private_config.h"
48
49/* forward-declare. See implementation for the docstring */
50static svn_error_t *
51block_read(void **result,
52           svn_fs_t *fs,
53           svn_revnum_t revision,
54           apr_uint64_t item_index,
55           svn_fs_fs__revision_file_t *revision_file,
56           apr_pool_t *result_pool,
57           apr_pool_t *scratch_pool);
58
59
60/* Defined this to enable access logging via dgb__log_access
61#define SVN_FS_FS__LOG_ACCESS
62 */
63
64/* When SVN_FS_FS__LOG_ACCESS has been defined, write a line to console
65 * showing where REVISION, ITEM_INDEX is located in FS and use ITEM to
66 * show details on it's contents if not NULL.  To support format 6 and
67 * earlier repos, ITEM_TYPE (SVN_FS_FS__ITEM_TYPE_*) must match ITEM.
68 * Use SCRATCH_POOL for temporary allocations.
69 *
70 * For pre-format7 repos, the display will be restricted.
71 */
72static svn_error_t *
73dbg_log_access(svn_fs_t *fs,
74               svn_revnum_t revision,
75               apr_uint64_t item_index,
76               void *item,
77               apr_uint32_t item_type,
78               apr_pool_t *scratch_pool)
79{
80  /* no-op if this macro is not defined */
81#ifdef SVN_FS_FS__LOG_ACCESS
82  fs_fs_data_t *ffd = fs->fsap_data;
83  apr_off_t end_offset = 0;
84  svn_fs_fs__p2l_entry_t *entry = NULL;
85  static const char *types[] = {"<n/a>", "frep ", "drep ", "fprop", "dprop",
86                                "node ", "chgs ", "rep  "};
87  const char *description = "";
88  const char *type = types[item_type];
89  const char *pack = "";
90  apr_off_t offset;
91  svn_fs_fs__revision_file_t *rev_file;
92
93  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, revision,
94                                           scratch_pool));
95
96  /* determine rev / pack file offset */
97  SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, revision, NULL,
98                                 item_index, scratch_pool));
99
100  /* constructing the pack file description */
101  if (revision < ffd->min_unpacked_rev)
102    pack = apr_psprintf(scratch_pool, "%4ld|",
103                        revision / ffd->max_files_per_dir);
104
105  /* construct description if possible */
106  if (item_type == SVN_FS_FS__ITEM_TYPE_NODEREV && item != NULL)
107    {
108      node_revision_t *node = item;
109      const char *data_rep
110        = node->data_rep
111        ? apr_psprintf(scratch_pool, " d=%ld/%" APR_UINT64_T_FMT,
112                       node->data_rep->revision,
113                       node->data_rep->item_index)
114        : "";
115      const char *prop_rep
116        = node->prop_rep
117        ? apr_psprintf(scratch_pool, " p=%ld/%" APR_UINT64_T_FMT,
118                       node->prop_rep->revision,
119                       node->prop_rep->item_index)
120        : "";
121      description = apr_psprintf(scratch_pool, "%s   (pc=%d%s%s)",
122                                 node->created_path,
123                                 node->predecessor_count,
124                                 data_rep,
125                                 prop_rep);
126    }
127  else if (item_type == SVN_FS_FS__ITEM_TYPE_ANY_REP)
128    {
129      svn_fs_fs__rep_header_t *header = item;
130      if (header == NULL)
131        description = "  (txdelta window)";
132      else if (header->type == svn_fs_fs__rep_plain)
133        description = "  PLAIN";
134      else if (header->type == svn_fs_fs__rep_self_delta)
135        description = "  DELTA";
136      else
137        description = apr_psprintf(scratch_pool,
138                                   "  DELTA against %ld/%" APR_UINT64_T_FMT,
139                                   header->base_revision,
140                                   header->base_item_index);
141    }
142  else if (item_type == SVN_FS_FS__ITEM_TYPE_CHANGES && item != NULL)
143    {
144      apr_array_header_t *changes = item;
145      switch (changes->nelts)
146        {
147          case 0:  description = "  no change";
148                   break;
149          case 1:  description = "  1 change";
150                   break;
151          default: description = apr_psprintf(scratch_pool, "  %d changes",
152                                              changes->nelts);
153        }
154    }
155
156  /* some info is only available in format7 repos */
157  if (svn_fs_fs__use_log_addressing(fs))
158    {
159      /* reverse index lookup: get item description in ENTRY */
160      SVN_ERR(svn_fs_fs__p2l_entry_lookup(&entry, fs, rev_file, revision,
161                                          offset, scratch_pool));
162      if (entry)
163        {
164          /* more details */
165          end_offset = offset + entry->size;
166          type = types[entry->type];
167        }
168
169      /* line output */
170      printf("%5s%4lx:%04lx -%4lx:%04lx %s %7ld %5"APR_UINT64_T_FMT"   %s\n",
171             pack, (long)(offset / ffd->block_size),
172             (long)(offset % ffd->block_size),
173             (long)(end_offset / ffd->block_size),
174             (long)(end_offset % ffd->block_size),
175             type, revision, item_index, description);
176    }
177  else
178    {
179      /* reduced logging for format 6 and earlier */
180      printf("%5s%10" APR_UINT64_T_HEX_FMT " %s %7ld %7" APR_UINT64_T_FMT \
181             "   %s\n",
182             pack, (apr_uint64_t)(offset), type, revision, item_index,
183             description);
184    }
185
186#endif
187
188  return SVN_NO_ERROR;
189}
190
191/* Convenience wrapper around svn_io_file_aligned_seek, taking filesystem
192   FS instead of a block size. */
193static svn_error_t *
194aligned_seek(svn_fs_t *fs,
195             apr_file_t *file,
196             apr_off_t *buffer_start,
197             apr_off_t offset,
198             apr_pool_t *pool)
199{
200  fs_fs_data_t *ffd = fs->fsap_data;
201  return svn_error_trace(svn_io_file_aligned_seek(file, ffd->block_size,
202                                                  buffer_start, offset,
203                                                  pool));
204}
205
206/* Open the revision file for revision REV in filesystem FS and store
207   the newly opened file in FILE.  Seek to location OFFSET before
208   returning.  Perform temporary allocations in POOL. */
209static svn_error_t *
210open_and_seek_revision(svn_fs_fs__revision_file_t **file,
211                       svn_fs_t *fs,
212                       svn_revnum_t rev,
213                       apr_uint64_t item,
214                       apr_pool_t *pool)
215{
216  svn_fs_fs__revision_file_t *rev_file;
217  apr_off_t offset = -1;
218
219  SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, pool));
220
221  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, rev, pool, pool));
222  SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, rev, NULL, item,
223                                 pool));
224
225  SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, pool));
226
227  *file = rev_file;
228
229  return SVN_NO_ERROR;
230}
231
232/* Open the representation REP for a node-revision in filesystem FS, seek
233   to its position and store the newly opened file in FILE.  Perform
234   temporary allocations in POOL. */
235static svn_error_t *
236open_and_seek_transaction(svn_fs_fs__revision_file_t **file,
237                          svn_fs_t *fs,
238                          representation_t *rep,
239                          apr_pool_t *pool)
240{
241  apr_off_t offset;
242
243  SVN_ERR(svn_fs_fs__open_proto_rev_file(file, fs, &rep->txn_id, pool, pool));
244
245  SVN_ERR(svn_fs_fs__item_offset(&offset, fs, NULL, SVN_INVALID_REVNUM,
246                                 &rep->txn_id, rep->item_index, pool));
247  SVN_ERR(aligned_seek(fs, (*file)->file, NULL, offset, pool));
248
249  return SVN_NO_ERROR;
250}
251
252/* Given a node-id ID, and a representation REP in filesystem FS, open
253   the correct file and seek to the correction location.  Store this
254   file in *FILE_P.  Perform any allocations in POOL. */
255static svn_error_t *
256open_and_seek_representation(svn_fs_fs__revision_file_t **file_p,
257                             svn_fs_t *fs,
258                             representation_t *rep,
259                             apr_pool_t *pool)
260{
261  if (! svn_fs_fs__id_txn_used(&rep->txn_id))
262    return open_and_seek_revision(file_p, fs, rep->revision, rep->item_index,
263                                  pool);
264  else
265    return open_and_seek_transaction(file_p, fs, rep, pool);
266}
267
268
269
270static svn_error_t *
271err_dangling_id(svn_fs_t *fs, const svn_fs_id_t *id)
272{
273  svn_string_t *id_str = svn_fs_fs__id_unparse(id, fs->pool);
274  return svn_error_createf
275    (SVN_ERR_FS_ID_NOT_FOUND, 0,
276     _("Reference to non-existent node '%s' in filesystem '%s'"),
277     id_str->data, fs->path);
278}
279
280/* Return TRUE, if FS is of a format that supports block-read and the
281   feature has been enabled. */
282static svn_boolean_t
283use_block_read(svn_fs_t *fs)
284{
285  fs_fs_data_t *ffd = fs->fsap_data;
286  return svn_fs_fs__use_log_addressing(fs) && ffd->use_block_read;
287}
288
289/* Get the node-revision for the node ID in FS.
290   Set *NODEREV_P to the new node-revision structure, allocated in POOL.
291   See svn_fs_fs__get_node_revision, which wraps this and adds another
292   error. */
293static svn_error_t *
294get_node_revision_body(node_revision_t **noderev_p,
295                       svn_fs_t *fs,
296                       const svn_fs_id_t *id,
297                       apr_pool_t *result_pool,
298                       apr_pool_t *scratch_pool)
299{
300  svn_error_t *err;
301  svn_boolean_t is_cached = FALSE;
302  fs_fs_data_t *ffd = fs->fsap_data;
303
304  if (svn_fs_fs__id_is_txn(id))
305    {
306      apr_file_t *file;
307
308      /* This is a transaction node-rev.  Its storage logic is very
309         different from that of rev / pack files. */
310      err = svn_io_file_open(&file,
311                             svn_fs_fs__path_txn_node_rev(fs, id,
312                             scratch_pool),
313                             APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
314                             scratch_pool);
315      if (err)
316        {
317          if (APR_STATUS_IS_ENOENT(err->apr_err))
318            {
319              svn_error_clear(err);
320              return svn_error_trace(err_dangling_id(fs, id));
321            }
322
323          return svn_error_trace(err);
324        }
325
326      SVN_ERR(svn_fs_fs__read_noderev(noderev_p,
327                                      svn_stream_from_aprfile2(file,
328                                                               FALSE,
329                                                               scratch_pool),
330                                      result_pool, scratch_pool));
331    }
332  else
333    {
334      svn_fs_fs__revision_file_t *revision_file;
335
336      /* noderevs in rev / pack files can be cached */
337      const svn_fs_fs__id_part_t *rev_item = svn_fs_fs__id_rev_item(id);
338      pair_cache_key_t key = { 0 };
339      key.revision = rev_item->revision;
340      key.second = rev_item->number;
341
342      /* Not found or not applicable. Try a noderev cache lookup.
343       * If that succeeds, we are done here. */
344      if (ffd->node_revision_cache)
345        {
346          SVN_ERR(svn_cache__get((void **) noderev_p,
347                                 &is_cached,
348                                 ffd->node_revision_cache,
349                                 &key,
350                                 result_pool));
351          if (is_cached)
352            return SVN_NO_ERROR;
353        }
354
355      /* read the data from disk */
356      SVN_ERR(open_and_seek_revision(&revision_file, fs,
357                                     rev_item->revision,
358                                     rev_item->number,
359                                     scratch_pool));
360
361      if (use_block_read(fs))
362        {
363          /* block-read will parse the whole block and will also return
364             the one noderev that we need right now. */
365          SVN_ERR(block_read((void **)noderev_p, fs,
366                             rev_item->revision,
367                             rev_item->number,
368                             revision_file,
369                             result_pool,
370                             scratch_pool));
371        }
372      else
373        {
374          /* physical addressing mode reading, parsing and caching */
375          SVN_ERR(svn_fs_fs__read_noderev(noderev_p,
376                                          revision_file->stream,
377                                          result_pool,
378                                          scratch_pool));
379
380          /* Workaround issue #4031: is-fresh-txn-root in revision files. */
381          (*noderev_p)->is_fresh_txn_root = FALSE;
382
383          /* The noderev is not in cache, yet. Add it, if caching has been enabled. */
384          if (ffd->node_revision_cache)
385            SVN_ERR(svn_cache__set(ffd->node_revision_cache,
386                                   &key,
387                                   *noderev_p,
388                                   scratch_pool));
389        }
390
391      SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
392    }
393
394  return SVN_NO_ERROR;
395}
396
397svn_error_t *
398svn_fs_fs__get_node_revision(node_revision_t **noderev_p,
399                             svn_fs_t *fs,
400                             const svn_fs_id_t *id,
401                             apr_pool_t *result_pool,
402                             apr_pool_t *scratch_pool)
403{
404  const svn_fs_fs__id_part_t *rev_item = svn_fs_fs__id_rev_item(id);
405
406  svn_error_t *err = get_node_revision_body(noderev_p, fs, id,
407                                            result_pool, scratch_pool);
408  if (err && err->apr_err == SVN_ERR_FS_CORRUPT)
409    {
410      svn_string_t *id_string = svn_fs_fs__id_unparse(id, scratch_pool);
411      return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
412                               "Corrupt node-revision '%s'",
413                               id_string->data);
414    }
415
416  SVN_ERR(dbg_log_access(fs,
417                         rev_item->revision,
418                         rev_item->number,
419                         *noderev_p,
420                         SVN_FS_FS__ITEM_TYPE_NODEREV,
421                         scratch_pool));
422
423  return svn_error_trace(err);
424}
425
426
427/* Given a revision file REV_FILE, opened to REV in FS, find the Node-ID
428   of the header located at OFFSET and store it in *ID_P.  Allocate
429   temporary variables from POOL. */
430static svn_error_t *
431get_fs_id_at_offset(svn_fs_id_t **id_p,
432                    svn_fs_fs__revision_file_t *rev_file,
433                    svn_fs_t *fs,
434                    svn_revnum_t rev,
435                    apr_off_t offset,
436                    apr_pool_t *pool)
437{
438  node_revision_t *noderev;
439
440  SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, pool));
441  SVN_ERR(svn_fs_fs__read_noderev(&noderev,
442                                  rev_file->stream,
443                                  pool, pool));
444
445  /* noderev->id is const, get rid of that */
446  *id_p = svn_fs_fs__id_copy(noderev->id, pool);
447
448  /* assert that the txn_id is REV
449   * (asserting on offset would be harder because we the rev_offset is not
450   * known here) */
451  assert(svn_fs_fs__id_rev(*id_p) == rev);
452
453  return SVN_NO_ERROR;
454}
455
456
457/* Given an open revision file REV_FILE in FS for REV, locate the trailer that
458   specifies the offset to the root node-id and to the changed path
459   information.  Store the root node offset in *ROOT_OFFSET and the
460   changed path offset in *CHANGES_OFFSET.  If either of these
461   pointers is NULL, do nothing with it.
462
463   Allocate temporary variables from POOL. */
464static svn_error_t *
465get_root_changes_offset(apr_off_t *root_offset,
466                        apr_off_t *changes_offset,
467                        svn_fs_fs__revision_file_t *rev_file,
468                        svn_fs_t *fs,
469                        svn_revnum_t rev,
470                        apr_pool_t *pool)
471{
472  fs_fs_data_t *ffd = fs->fsap_data;
473  apr_off_t rev_offset;
474  apr_seek_where_t seek_relative;
475  svn_stringbuf_t *trailer;
476  char buffer[64];
477  apr_off_t start;
478  apr_off_t end;
479  apr_size_t len;
480
481  /* Determine where to seek to in the file.
482
483     If we've got a pack file, we want to seek to the end of the desired
484     revision.  But we don't track that, so we seek to the beginning of the
485     next revision.
486
487     Unless the next revision is in a different file, in which case, we can
488     just seek to the end of the pack file -- just like we do in the
489     non-packed case. */
490  if (rev_file->is_packed && ((rev + 1) % ffd->max_files_per_dir != 0))
491    {
492      SVN_ERR(svn_fs_fs__get_packed_offset(&end, fs, rev + 1, pool));
493      seek_relative = APR_SET;
494    }
495  else
496    {
497      seek_relative = APR_END;
498      end = 0;
499    }
500
501  /* Offset of the revision from the start of the pack file, if applicable. */
502  if (rev_file->is_packed)
503    SVN_ERR(svn_fs_fs__get_packed_offset(&rev_offset, fs, rev, pool));
504  else
505    rev_offset = 0;
506
507  /* We will assume that the last line containing the two offsets
508     will never be longer than 64 characters. */
509  SVN_ERR(svn_io_file_seek(rev_file->file, seek_relative, &end, pool));
510
511  if (end < sizeof(buffer))
512    {
513      len = (apr_size_t)end;
514      start = 0;
515    }
516  else
517    {
518      len = sizeof(buffer);
519      start = end - sizeof(buffer);
520    }
521
522  /* Read in this last block, from which we will identify the last line. */
523  SVN_ERR(aligned_seek(fs, rev_file->file, NULL, start, pool));
524  SVN_ERR(svn_io_file_read_full2(rev_file->file, buffer, len, NULL, NULL,
525                                 pool));
526
527  /* Parse the last line. */
528  trailer = svn_stringbuf_ncreate(buffer, len, pool);
529  SVN_ERR(svn_fs_fs__parse_revision_trailer(root_offset,
530                                            changes_offset,
531                                            trailer,
532                                            rev));
533
534  /* return absolute offsets */
535  if (root_offset)
536    *root_offset += rev_offset;
537  if (changes_offset)
538    *changes_offset += rev_offset;
539
540  return SVN_NO_ERROR;
541}
542
543svn_error_t *
544svn_fs_fs__rev_get_root(svn_fs_id_t **root_id_p,
545                        svn_fs_t *fs,
546                        svn_revnum_t rev,
547                        apr_pool_t *result_pool,
548                        apr_pool_t *scratch_pool)
549{
550  fs_fs_data_t *ffd = fs->fsap_data;
551  SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, scratch_pool));
552
553  if (svn_fs_fs__use_log_addressing(fs))
554    {
555      *root_id_p = svn_fs_fs__id_create_root(rev, result_pool);
556    }
557  else
558    {
559      svn_fs_fs__revision_file_t *revision_file;
560      apr_off_t root_offset;
561      svn_fs_id_t *root_id = NULL;
562      svn_boolean_t is_cached;
563
564      SVN_ERR(svn_cache__get((void **) root_id_p, &is_cached,
565                            ffd->rev_root_id_cache, &rev, result_pool));
566      if (is_cached)
567        return SVN_NO_ERROR;
568
569      SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&revision_file, fs, rev,
570                                               scratch_pool, scratch_pool));
571      SVN_ERR(get_root_changes_offset(&root_offset, NULL,
572                                      revision_file, fs, rev,
573                                      scratch_pool));
574
575      SVN_ERR(get_fs_id_at_offset(&root_id, revision_file, fs, rev,
576                                  root_offset, result_pool));
577
578      SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
579
580      SVN_ERR(svn_cache__set(ffd->rev_root_id_cache, &rev, root_id,
581                             scratch_pool));
582
583      *root_id_p = root_id;
584    }
585
586  return SVN_NO_ERROR;
587}
588
589/* Describes a lazily opened rev / pack file.  Instances will be shared
590   between multiple instances of rep_state_t. */
591typedef struct shared_file_t
592{
593  /* The opened file. NULL while file is not open, yet. */
594  svn_fs_fs__revision_file_t *rfile;
595
596  /* file system to open the file in */
597  svn_fs_t *fs;
598
599  /* a revision contained in the FILE.  Since this file may be shared,
600     that value may be different from REP_STATE_T->REVISION. */
601  svn_revnum_t revision;
602
603  /* pool to use when creating the FILE.  This guarantees that the file
604     remains open / valid beyond the respective local context that required
605     the file to be opened eventually. */
606  apr_pool_t *pool;
607} shared_file_t;
608
609/* Represents where in the current svndiff data block each
610   representation is. */
611typedef struct rep_state_t
612{
613                    /* shared lazy-open rev/pack file structure */
614  shared_file_t *sfile;
615                    /* The txdelta window cache to use or NULL. */
616  svn_cache__t *raw_window_cache;
617                    /* Caches raw (unparsed) windows. May be NULL. */
618  svn_cache__t *window_cache;
619                    /* Caches un-deltified windows. May be NULL. */
620  svn_cache__t *combined_cache;
621                    /* revision containing the representation */
622  svn_revnum_t revision;
623                    /* representation's item index in REVISION */
624  apr_uint64_t item_index;
625                    /* length of the header at the start of the rep.
626                       0 iff this is rep is stored in a container
627                       (i.e. does not have a header) */
628  apr_size_t header_size;
629  apr_off_t start;  /* The starting offset for the raw
630                       svndiff/plaintext data minus header.
631                       -1 if the offset is yet unknown. */
632  apr_off_t current;/* The current offset relative to START. */
633  apr_off_t size;   /* The on-disk size of the representation. */
634  int ver;          /* If a delta, what svndiff version?
635                       -1 for unknown delta version. */
636  int chunk_index;  /* number of the window to read */
637} rep_state_t;
638
639/* Simple wrapper around svn_fs_fs__get_file_offset to simplify callers. */
640static svn_error_t *
641get_file_offset(apr_off_t *offset,
642                rep_state_t *rs,
643                apr_pool_t *pool)
644{
645  return svn_error_trace(svn_fs_fs__get_file_offset(offset,
646                                                    rs->sfile->rfile->file,
647                                                    pool));
648}
649
650/* Simple wrapper around svn_io_file_aligned_seek to simplify callers. */
651static svn_error_t *
652rs_aligned_seek(rep_state_t *rs,
653                apr_off_t *buffer_start,
654                apr_off_t offset,
655                apr_pool_t *pool)
656{
657  fs_fs_data_t *ffd = rs->sfile->fs->fsap_data;
658  return svn_error_trace(svn_io_file_aligned_seek(rs->sfile->rfile->file,
659                                                  ffd->block_size,
660                                                  buffer_start, offset,
661                                                  pool));
662}
663
664/* Open FILE->FILE and FILE->STREAM if they haven't been opened, yet. */
665static svn_error_t*
666auto_open_shared_file(shared_file_t *file)
667{
668  if (file->rfile == NULL)
669    SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&file->rfile, file->fs,
670                                             file->revision, file->pool,
671                                             file->pool));
672
673  return SVN_NO_ERROR;
674}
675
676/* Set RS->START to the begin of the representation raw in RS->FILE->FILE,
677   if that hasn't been done yet.  Use POOL for temporary allocations. */
678static svn_error_t*
679auto_set_start_offset(rep_state_t *rs, apr_pool_t *pool)
680{
681  if (rs->start == -1)
682    {
683      SVN_ERR(svn_fs_fs__item_offset(&rs->start, rs->sfile->fs,
684                                     rs->sfile->rfile, rs->revision, NULL,
685                                     rs->item_index, pool));
686      rs->start += rs->header_size;
687    }
688
689  return SVN_NO_ERROR;
690}
691
692/* Set RS->VER depending on what is found in the already open RS->FILE->FILE
693   if the diff version is still unknown.  Use POOL for temporary allocations.
694 */
695static svn_error_t*
696auto_read_diff_version(rep_state_t *rs, apr_pool_t *pool)
697{
698  if (rs->ver == -1)
699    {
700      char buf[4];
701      SVN_ERR(rs_aligned_seek(rs, NULL, rs->start, pool));
702      SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, buf,
703                                     sizeof(buf), NULL, NULL, pool));
704
705      /* ### Layering violation */
706      if (! ((buf[0] == 'S') && (buf[1] == 'V') && (buf[2] == 'N')))
707        return svn_error_create
708          (SVN_ERR_FS_CORRUPT, NULL,
709           _("Malformed svndiff data in representation"));
710      rs->ver = buf[3];
711
712      rs->chunk_index = 0;
713      rs->current = 4;
714    }
715
716  return SVN_NO_ERROR;
717}
718
719/* See create_rep_state, which wraps this and adds another error. */
720static svn_error_t *
721create_rep_state_body(rep_state_t **rep_state,
722                      svn_fs_fs__rep_header_t **rep_header,
723                      shared_file_t **shared_file,
724                      representation_t *rep,
725                      svn_fs_t *fs,
726                      apr_pool_t *result_pool,
727                      apr_pool_t *scratch_pool)
728{
729  fs_fs_data_t *ffd = fs->fsap_data;
730  rep_state_t *rs = apr_pcalloc(result_pool, sizeof(*rs));
731  svn_fs_fs__rep_header_t *rh;
732  svn_boolean_t is_cached = FALSE;
733  apr_uint64_t estimated_window_storage;
734
735  /* If the hint is
736   * - given,
737   * - refers to a valid revision,
738   * - refers to a packed revision,
739   * - as does the rep we want to read, and
740   * - refers to the same pack file as the rep
741   * we can re-use the same, already open file object
742   */
743  svn_boolean_t reuse_shared_file
744    =    shared_file && *shared_file && (*shared_file)->rfile
745      && SVN_IS_VALID_REVNUM((*shared_file)->revision)
746      && (*shared_file)->revision < ffd->min_unpacked_rev
747      && rep->revision < ffd->min_unpacked_rev
748      && (   ((*shared_file)->revision / ffd->max_files_per_dir)
749          == (rep->revision / ffd->max_files_per_dir));
750
751  pair_cache_key_t key;
752  key.revision = rep->revision;
753  key.second = rep->item_index;
754
755  /* continue constructing RS and RA */
756  rs->size = rep->size;
757  rs->revision = rep->revision;
758  rs->item_index = rep->item_index;
759  rs->raw_window_cache = ffd->raw_window_cache;
760  rs->ver = -1;
761  rs->start = -1;
762
763  /* Very long files stored as self-delta will produce a huge number of
764     delta windows.  Don't cache them lest we don't thrash the cache.
765     Since we don't know the depth of the delta chain, let's assume, the
766     whole contents get rewritten 3 times.
767   */
768  estimated_window_storage
769    = 4 * (  (rep->expanded_size ? rep->expanded_size : rep->size)
770           + SVN_DELTA_WINDOW_SIZE);
771  estimated_window_storage = MIN(estimated_window_storage, APR_SIZE_MAX);
772
773  rs->window_cache =    ffd->txdelta_window_cache
774                     && svn_cache__is_cachable(ffd->txdelta_window_cache,
775                                       (apr_size_t)estimated_window_storage)
776                   ? ffd->txdelta_window_cache
777                   : NULL;
778  rs->combined_cache =    ffd->combined_window_cache
779                       && svn_cache__is_cachable(ffd->combined_window_cache,
780                                       (apr_size_t)estimated_window_storage)
781                     ? ffd->combined_window_cache
782                     : NULL;
783
784  /* cache lookup, i.e. skip reading the rep header if possible */
785  if (ffd->rep_header_cache && !svn_fs_fs__id_txn_used(&rep->txn_id))
786    SVN_ERR(svn_cache__get((void **) &rh, &is_cached,
787                           ffd->rep_header_cache, &key, result_pool));
788
789  /* initialize the (shared) FILE member in RS */
790  if (reuse_shared_file)
791    {
792      rs->sfile = *shared_file;
793    }
794  else
795    {
796      shared_file_t *file = apr_pcalloc(result_pool, sizeof(*file));
797      file->revision = rep->revision;
798      file->pool = result_pool;
799      file->fs = fs;
800      rs->sfile = file;
801
802      /* remember the current file, if suggested by the caller */
803      if (shared_file)
804        *shared_file = file;
805    }
806
807  /* read rep header, if necessary */
808  if (!is_cached)
809    {
810      /* ensure file is open and navigate to the start of rep header */
811      if (reuse_shared_file)
812        {
813          apr_off_t offset;
814
815          /* ... we can re-use the same, already open file object.
816           * This implies that we don't read from a txn.
817           */
818          rs->sfile = *shared_file;
819          SVN_ERR(auto_open_shared_file(rs->sfile));
820          SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rs->sfile->rfile,
821                                         rep->revision, NULL, rep->item_index,
822                                         scratch_pool));
823          SVN_ERR(rs_aligned_seek(rs, NULL, offset, scratch_pool));
824        }
825      else
826        {
827          /* otherwise, create a new file object.  May or may not be
828           * an in-txn file.
829           */
830          SVN_ERR(open_and_seek_representation(&rs->sfile->rfile, fs, rep,
831                                               result_pool));
832        }
833
834      SVN_ERR(svn_fs_fs__read_rep_header(&rh, rs->sfile->rfile->stream,
835                                         result_pool, scratch_pool));
836      SVN_ERR(get_file_offset(&rs->start, rs, result_pool));
837
838      /* populate the cache if appropriate */
839      if (! svn_fs_fs__id_txn_used(&rep->txn_id))
840        {
841          if (use_block_read(fs))
842            SVN_ERR(block_read(NULL, fs, rep->revision, rep->item_index,
843                               rs->sfile->rfile, result_pool, scratch_pool));
844          else
845            if (ffd->rep_header_cache)
846              SVN_ERR(svn_cache__set(ffd->rep_header_cache, &key, rh,
847                                     scratch_pool));
848        }
849    }
850
851  /* finalize */
852  SVN_ERR(dbg_log_access(fs, rep->revision, rep->item_index, rh,
853                         SVN_FS_FS__ITEM_TYPE_ANY_REP, scratch_pool));
854
855  rs->header_size = rh->header_size;
856  *rep_state = rs;
857  *rep_header = rh;
858
859  if (rh->type == svn_fs_fs__rep_plain)
860    /* This is a plaintext, so just return the current rep_state. */
861    return SVN_NO_ERROR;
862
863  /* skip "SVNx" diff marker */
864  rs->current = 4;
865
866  return SVN_NO_ERROR;
867}
868
869/* Read the rep args for REP in filesystem FS and create a rep_state
870   for reading the representation.  Return the rep_state in *REP_STATE
871   and the rep header in *REP_HEADER, both allocated in POOL.
872
873   When reading multiple reps, i.e. a skip delta chain, you may provide
874   non-NULL SHARED_FILE.  (If SHARED_FILE is not NULL, in the first
875   call it should be a pointer to NULL.)  The function will use this
876   variable to store the previous call results and tries to re-use it.
877   This may result in significant savings in I/O for packed files and
878   number of open file handles.
879 */
880static svn_error_t *
881create_rep_state(rep_state_t **rep_state,
882                 svn_fs_fs__rep_header_t **rep_header,
883                 shared_file_t **shared_file,
884                 representation_t *rep,
885                 svn_fs_t *fs,
886                 apr_pool_t *result_pool,
887                 apr_pool_t *scratch_pool)
888{
889  svn_error_t *err = create_rep_state_body(rep_state, rep_header,
890                                           shared_file, rep, fs,
891                                           result_pool, scratch_pool);
892  if (err && err->apr_err == SVN_ERR_FS_CORRUPT)
893    {
894      fs_fs_data_t *ffd = fs->fsap_data;
895      const char *rep_str;
896
897      /* ### This always returns "-1" for transaction reps, because
898         ### this particular bit of code doesn't know if the rep is
899         ### stored in the protorev or in the mutable area (for props
900         ### or dir contents).  It is pretty rare for FSFS to *read*
901         ### from the protorev file, though, so this is probably OK.
902         ### And anyone going to debug corruption errors is probably
903         ### going to jump straight to this comment anyway! */
904      rep_str = rep
905              ? svn_fs_fs__unparse_representation
906                  (rep, ffd->format, TRUE, scratch_pool, scratch_pool)->data
907              : "(null)";
908
909      return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
910                               "Corrupt representation '%s'",
911                               rep_str);
912    }
913  /* ### Call representation_string() ? */
914  return svn_error_trace(err);
915}
916
917svn_error_t *
918svn_fs_fs__check_rep(representation_t *rep,
919                     svn_fs_t *fs,
920                     void **hint,
921                     apr_pool_t *scratch_pool)
922{
923  if (svn_fs_fs__use_log_addressing(fs))
924    {
925      apr_off_t offset;
926      svn_fs_fs__p2l_entry_t *entry;
927      svn_fs_fs__revision_file_t *rev_file = NULL;
928
929      /* Reuse the revision file provided by *HINT, if it is given and
930       * actually the rev / pack file that we want. */
931      svn_revnum_t start_rev = svn_fs_fs__packed_base_rev(fs, rep->revision);
932      if (hint)
933        rev_file = *(svn_fs_fs__revision_file_t **)hint;
934
935      if (rev_file == NULL || rev_file->start_revision != start_rev)
936        SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, rep->revision,
937                                                 scratch_pool, scratch_pool));
938
939      if (hint)
940        *hint = rev_file;
941
942      /* This will auto-retry if there was a background pack. */
943      SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, rep->revision,
944                                     NULL, rep->item_index, scratch_pool));
945
946      /* This may fail if there is a background pack operation (can't auto-
947         retry because the item offset lookup has to be redone as well). */
948      SVN_ERR(svn_fs_fs__p2l_entry_lookup(&entry, fs, rev_file,
949                                          rep->revision, offset,
950                                          scratch_pool, scratch_pool));
951
952      if (   entry == NULL
953          || entry->type < SVN_FS_FS__ITEM_TYPE_FILE_REP
954          || entry->type > SVN_FS_FS__ITEM_TYPE_DIR_PROPS)
955        return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
956                                 _("No representation found at offset %s "
957                                   "for item %s in revision %ld"),
958                                 apr_off_t_toa(scratch_pool, offset),
959                                 apr_psprintf(scratch_pool,
960                                              "%" APR_UINT64_T_FMT,
961                                              rep->item_index),
962                                 rep->revision);
963    }
964  else
965    {
966      rep_state_t *rs;
967      svn_fs_fs__rep_header_t *rep_header;
968
969      /* ### Should this be using read_rep_line() directly? */
970      SVN_ERR(create_rep_state(&rs, &rep_header, (shared_file_t**)hint,
971                               rep, fs, scratch_pool, scratch_pool));
972    }
973
974  return SVN_NO_ERROR;
975}
976
977svn_error_t *
978svn_fs_fs__rep_chain_length(int *chain_length,
979                            int *shard_count,
980                            representation_t *rep,
981                            svn_fs_t *fs,
982                            apr_pool_t *scratch_pool)
983{
984  fs_fs_data_t *ffd = fs->fsap_data;
985  svn_revnum_t shard_size = ffd->max_files_per_dir
986                          ? ffd->max_files_per_dir
987                          : 1;
988  apr_pool_t *subpool = svn_pool_create(scratch_pool);
989  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
990  svn_boolean_t is_delta = FALSE;
991  int count = 0;
992  int shards = 1;
993  svn_revnum_t last_shard = rep->revision / shard_size;
994
995  /* Check whether the length of the deltification chain is acceptable.
996   * Otherwise, shared reps may form a non-skipping delta chain in
997   * extreme cases. */
998  representation_t base_rep = *rep;
999
1000  /* re-use open files between iterations */
1001  shared_file_t *file_hint = NULL;
1002
1003  svn_fs_fs__rep_header_t *header;
1004
1005  /* follow the delta chain towards the end but for at most
1006   * MAX_CHAIN_LENGTH steps. */
1007  do
1008    {
1009      rep_state_t *rep_state;
1010
1011      svn_pool_clear(iterpool);
1012
1013      if (base_rep.revision / shard_size != last_shard)
1014        {
1015          last_shard = base_rep.revision / shard_size;
1016          ++shards;
1017        }
1018
1019      SVN_ERR(create_rep_state_body(&rep_state,
1020                                    &header,
1021                                    &file_hint,
1022                                    &base_rep,
1023                                    fs,
1024                                    subpool,
1025                                    iterpool));
1026
1027      base_rep.revision = header->base_revision;
1028      base_rep.item_index = header->base_item_index;
1029      base_rep.size = header->base_length;
1030      svn_fs_fs__id_txn_reset(&base_rep.txn_id);
1031      is_delta = header->type == svn_fs_fs__rep_delta;
1032
1033      /* Clear it the SUBPOOL once in a while.  Doing it too frequently
1034       * renders the FILE_HINT ineffective.  Doing too infrequently, may
1035       * leave us with too many open file handles.
1036       *
1037       * Note that this is mostly about efficiency, with larger values
1038       * being more efficient, and any non-zero value is legal here.  When
1039       * reading deltified contents, we may keep 10s of rev files open at
1040       * the same time and the system has to cope with that.  Thus, the
1041       * limit of 16 chosen below is in the same ballpark.
1042       */
1043      ++count;
1044      if (count % 16 == 0)
1045        {
1046          file_hint = NULL;
1047          svn_pool_clear(subpool);
1048        }
1049    }
1050  while (is_delta && base_rep.revision);
1051
1052  *chain_length = count;
1053  *shard_count = shards;
1054  svn_pool_destroy(subpool);
1055  svn_pool_destroy(iterpool);
1056
1057  return SVN_NO_ERROR;
1058}
1059
1060struct rep_read_baton
1061{
1062  /* The FS from which we're reading. */
1063  svn_fs_t *fs;
1064
1065  /* Representation to read. */
1066  representation_t rep;
1067
1068  /* If not NULL, this is the base for the first delta window in rs_list */
1069  svn_stringbuf_t *base_window;
1070
1071  /* The state of all prior delta representations. */
1072  apr_array_header_t *rs_list;
1073
1074  /* The plaintext state, if there is a plaintext. */
1075  rep_state_t *src_state;
1076
1077  /* The index of the current delta chunk, if we are reading a delta. */
1078  int chunk_index;
1079
1080  /* The buffer where we store undeltified data. */
1081  char *buf;
1082  apr_size_t buf_pos;
1083  apr_size_t buf_len;
1084
1085  /* A checksum context for summing the data read in order to verify it.
1086     Note: we don't need to use the sha1 checksum because we're only doing
1087     data verification, for which md5 is perfectly safe.  */
1088  svn_checksum_ctx_t *md5_checksum_ctx;
1089
1090  svn_boolean_t checksum_finalized;
1091
1092  /* The stored checksum of the representation we are reading, its
1093     length, and the amount we've read so far.  Some of this
1094     information is redundant with rs_list and src_state, but it's
1095     convenient for the checksumming code to have it here. */
1096  unsigned char md5_digest[APR_MD5_DIGESTSIZE];
1097
1098  svn_filesize_t len;
1099  svn_filesize_t off;
1100
1101  /* The key for the fulltext cache for this rep, if there is a
1102     fulltext cache. */
1103  pair_cache_key_t fulltext_cache_key;
1104  /* The text we've been reading, if we're going to cache it. */
1105  svn_stringbuf_t *current_fulltext;
1106
1107  /* If not NULL, attempt to read the data from this cache.
1108     Once that lookup fails, reset it to NULL. */
1109  svn_cache__t *fulltext_cache;
1110
1111  /* Bytes delivered from the FULLTEXT_CACHE so far.  If the next
1112     lookup fails, we need to skip that much data from the reconstructed
1113     window stream before we continue normal operation. */
1114  svn_filesize_t fulltext_delivered;
1115
1116  /* Used for temporary allocations during the read. */
1117  apr_pool_t *pool;
1118
1119  /* Pool used to store file handles and other data that is persistant
1120     for the entire stream read. */
1121  apr_pool_t *filehandle_pool;
1122};
1123
1124/* Set window key in *KEY to address the window described by RS.
1125   For convenience, return the KEY. */
1126static window_cache_key_t *
1127get_window_key(window_cache_key_t *key, rep_state_t *rs)
1128{
1129  assert(rs->revision <= APR_UINT32_MAX);
1130  key->revision = (apr_uint32_t)rs->revision;
1131  key->item_index = rs->item_index;
1132  key->chunk_index = rs->chunk_index;
1133
1134  return key;
1135}
1136
1137/* Implement svn_cache__partial_getter_func_t for raw txdelta windows.
1138 * Parse the raw data and return a svn_fs_fs__txdelta_cached_window_t.
1139 */
1140static svn_error_t *
1141parse_raw_window(void **out,
1142                 const void *data,
1143                 apr_size_t data_len,
1144                 void *baton,
1145                 apr_pool_t *result_pool)
1146{
1147  svn_string_t raw_window;
1148  svn_stream_t *stream;
1149
1150  /* unparsed and parsed window */
1151  const svn_fs_fs__raw_cached_window_t *window
1152    = (const svn_fs_fs__raw_cached_window_t *)data;
1153  svn_fs_fs__txdelta_cached_window_t *result
1154    = apr_pcalloc(result_pool, sizeof(*result));
1155
1156  /* create a read stream taking the raw window as input */
1157  raw_window.data = svn_temp_deserializer__ptr(window,
1158                                (const void * const *)&window->window.data);
1159  raw_window.len = window->window.len;
1160  stream = svn_stream_from_string(&raw_window, result_pool);
1161
1162  /* parse it */
1163  SVN_ERR(svn_txdelta_read_svndiff_window(&result->window, stream, 1,
1164                                          result_pool));
1165
1166  /* complete the window and return it */
1167  result->end_offset = window->end_offset;
1168  *out = result;
1169
1170  return SVN_NO_ERROR;
1171}
1172
1173
1174/* Read the WINDOW_P number CHUNK_INDEX for the representation given in
1175 * rep state RS from the current FSFS session's cache.  This will be a
1176 * no-op and IS_CACHED will be set to FALSE if no cache has been given.
1177 * If a cache is available IS_CACHED will inform the caller about the
1178 * success of the lookup. Allocations of the window in will be made
1179 * from RESULT_POOL. Use SCRATCH_POOL for temporary allocations.
1180 *
1181 * If the information could be found, put RS to CHUNK_INDEX.
1182 */
1183static svn_error_t *
1184get_cached_window(svn_txdelta_window_t **window_p,
1185                  rep_state_t *rs,
1186                  int chunk_index,
1187                  svn_boolean_t *is_cached,
1188                  apr_pool_t *result_pool,
1189                  apr_pool_t *scratch_pool)
1190{
1191  if (! rs->window_cache)
1192    {
1193      /* txdelta window has not been enabled */
1194      *is_cached = FALSE;
1195    }
1196  else
1197    {
1198      /* ask the cache for the desired txdelta window */
1199      svn_fs_fs__txdelta_cached_window_t *cached_window;
1200      window_cache_key_t key = { 0 };
1201      get_window_key(&key, rs);
1202      key.chunk_index = chunk_index;
1203      SVN_ERR(svn_cache__get((void **) &cached_window,
1204                             is_cached,
1205                             rs->window_cache,
1206                             &key,
1207                             result_pool));
1208
1209      /* If we did not find a parsed txdelta window, we might have a raw
1210         version of it in our cache.  If so, read, parse and re-cache it. */
1211      if (!*is_cached && rs->raw_window_cache)
1212        {
1213          SVN_ERR(svn_cache__get_partial((void **) &cached_window, is_cached,
1214                                         rs->raw_window_cache, &key,
1215                                         parse_raw_window, NULL, result_pool));
1216          if (*is_cached)
1217            SVN_ERR(svn_cache__set(rs->window_cache, &key, cached_window,
1218                                   scratch_pool));
1219        }
1220
1221      /* Return cached information. */
1222      if (*is_cached)
1223        {
1224          /* found it. Pass it back to the caller. */
1225          *window_p = cached_window->window;
1226
1227          /* manipulate the RS as if we just read the data */
1228          rs->current = cached_window->end_offset;
1229          rs->chunk_index = chunk_index;
1230        }
1231    }
1232
1233  return SVN_NO_ERROR;
1234}
1235
1236/* Store the WINDOW read for the rep state RS in the current FSFS
1237 * session's cache.  This will be a no-op if no cache has been given.
1238 * Temporary allocations will be made from SCRATCH_POOL. */
1239static svn_error_t *
1240set_cached_window(svn_txdelta_window_t *window,
1241                  rep_state_t *rs,
1242                  apr_pool_t *scratch_pool)
1243{
1244  if (rs->window_cache)
1245    {
1246      /* store the window and the first offset _past_ it */
1247      svn_fs_fs__txdelta_cached_window_t cached_window;
1248      window_cache_key_t key = {0};
1249
1250      cached_window.window = window;
1251      cached_window.end_offset = rs->current;
1252
1253      /* but key it with the start offset because that is the known state
1254       * when we will look it up */
1255      SVN_ERR(svn_cache__set(rs->window_cache,
1256                             get_window_key(&key, rs),
1257                             &cached_window,
1258                             scratch_pool));
1259    }
1260
1261  return SVN_NO_ERROR;
1262}
1263
1264/* Read the WINDOW_P for the rep state RS from the current FSFS session's
1265 * cache. This will be a no-op and IS_CACHED will be set to FALSE if no
1266 * cache has been given. If a cache is available IS_CACHED will inform
1267 * the caller about the success of the lookup. Allocations (of the window
1268 * in particular) will be made from POOL.
1269 */
1270static svn_error_t *
1271get_cached_combined_window(svn_stringbuf_t **window_p,
1272                           rep_state_t *rs,
1273                           svn_boolean_t *is_cached,
1274                           apr_pool_t *pool)
1275{
1276  if (! rs->combined_cache)
1277    {
1278      /* txdelta window has not been enabled */
1279      *is_cached = FALSE;
1280    }
1281  else
1282    {
1283      /* ask the cache for the desired txdelta window */
1284      window_cache_key_t key = { 0 };
1285      return svn_cache__get((void **)window_p,
1286                            is_cached,
1287                            rs->combined_cache,
1288                            get_window_key(&key, rs),
1289                            pool);
1290    }
1291
1292  return SVN_NO_ERROR;
1293}
1294
1295/* Store the WINDOW read for the rep state RS in the current FSFS session's
1296 * cache. This will be a no-op if no cache has been given.
1297 * Temporary allocations will be made from SCRATCH_POOL. */
1298static svn_error_t *
1299set_cached_combined_window(svn_stringbuf_t *window,
1300                           rep_state_t *rs,
1301                           apr_pool_t *scratch_pool)
1302{
1303  if (rs->combined_cache)
1304    {
1305      /* but key it with the start offset because that is the known state
1306       * when we will look it up */
1307      window_cache_key_t key = { 0 };
1308      return svn_cache__set(rs->combined_cache,
1309                            get_window_key(&key, rs),
1310                            window,
1311                            scratch_pool);
1312    }
1313
1314  return SVN_NO_ERROR;
1315}
1316
1317/* Build an array of rep_state structures in *LIST giving the delta
1318   reps from first_rep to a plain-text or self-compressed rep.  Set
1319   *SRC_STATE to the plain-text rep we find at the end of the chain,
1320   or to NULL if the final delta representation is self-compressed.
1321   The representation to start from is designated by filesystem FS, id
1322   ID, and representation REP.
1323   Also, set *WINDOW_P to the base window content for *LIST, if it
1324   could be found in cache. Otherwise, *LIST will contain the base
1325   representation for the whole delta chain.
1326   Finally, return the expanded size of the representation in
1327   *EXPANDED_SIZE. It will take care of cases where only the on-disk
1328   size is known.  */
1329static svn_error_t *
1330build_rep_list(apr_array_header_t **list,
1331               svn_stringbuf_t **window_p,
1332               rep_state_t **src_state,
1333               svn_filesize_t *expanded_size,
1334               svn_fs_t *fs,
1335               representation_t *first_rep,
1336               apr_pool_t *pool)
1337{
1338  representation_t rep;
1339  rep_state_t *rs = NULL;
1340  svn_fs_fs__rep_header_t *rep_header;
1341  svn_boolean_t is_cached = FALSE;
1342  shared_file_t *shared_file = NULL;
1343  apr_pool_t *iterpool = svn_pool_create(pool);
1344
1345  *list = apr_array_make(pool, 1, sizeof(rep_state_t *));
1346  rep = *first_rep;
1347
1348  /* The value as stored in the data struct.
1349     0 is either for unknown length or actually zero length. */
1350  *expanded_size = first_rep->expanded_size;
1351
1352  /* for the top-level rep, we need the rep_args */
1353  SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file, &rep, fs, pool,
1354                           iterpool));
1355
1356  /* Unknown size or empty representation?
1357     That implies the this being the first iteration.
1358     Usually size equals on-disk size, except for empty,
1359     compressed representations (delta, size = 4).
1360     Please note that for all non-empty deltas have
1361     a 4-byte header _plus_ some data. */
1362  if (*expanded_size == 0)
1363    if (rep_header->type == svn_fs_fs__rep_plain || first_rep->size != 4)
1364      *expanded_size = first_rep->size;
1365
1366  while (1)
1367    {
1368      svn_pool_clear(iterpool);
1369
1370      /* fetch state, if that has not been done already */
1371      if (!rs)
1372        SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file,
1373                                 &rep, fs, pool, iterpool));
1374
1375      /* for txn reps, there won't be a cached combined window */
1376      if (!svn_fs_fs__id_txn_used(&rep.txn_id))
1377        SVN_ERR(get_cached_combined_window(window_p, rs, &is_cached, pool));
1378
1379      if (is_cached)
1380        {
1381          /* We already have a reconstructed window in our cache.
1382             Write a pseudo rep_state with the full length. */
1383          rs->start = 0;
1384          rs->current = 0;
1385          rs->size = (*window_p)->len;
1386          *src_state = rs;
1387          break;
1388        }
1389
1390      if (rep_header->type == svn_fs_fs__rep_plain)
1391        {
1392          /* This is a plaintext, so just return the current rep_state. */
1393          *src_state = rs;
1394          break;
1395        }
1396
1397      /* Push this rep onto the list.  If it's self-compressed, we're done. */
1398      APR_ARRAY_PUSH(*list, rep_state_t *) = rs;
1399      if (rep_header->type == svn_fs_fs__rep_self_delta)
1400        {
1401          *src_state = NULL;
1402          break;
1403        }
1404
1405      rep.revision = rep_header->base_revision;
1406      rep.item_index = rep_header->base_item_index;
1407      rep.size = rep_header->base_length;
1408      svn_fs_fs__id_txn_reset(&rep.txn_id);
1409
1410      rs = NULL;
1411    }
1412  svn_pool_destroy(iterpool);
1413
1414  return SVN_NO_ERROR;
1415}
1416
1417
1418/* Create a rep_read_baton structure for node revision NODEREV in
1419   filesystem FS and store it in *RB_P.  Perform all allocations in
1420   POOL.  If rep is mutable, it must be for file contents. */
1421static svn_error_t *
1422rep_read_get_baton(struct rep_read_baton **rb_p,
1423                   svn_fs_t *fs,
1424                   representation_t *rep,
1425                   pair_cache_key_t fulltext_cache_key,
1426                   apr_pool_t *pool)
1427{
1428  struct rep_read_baton *b;
1429
1430  b = apr_pcalloc(pool, sizeof(*b));
1431  b->fs = fs;
1432  b->rep = *rep;
1433  b->base_window = NULL;
1434  b->chunk_index = 0;
1435  b->buf = NULL;
1436  b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool);
1437  b->checksum_finalized = FALSE;
1438  memcpy(b->md5_digest, rep->md5_digest, sizeof(rep->md5_digest));
1439  b->len = rep->expanded_size;
1440  b->off = 0;
1441  b->fulltext_cache_key = fulltext_cache_key;
1442  b->pool = svn_pool_create(pool);
1443  b->filehandle_pool = svn_pool_create(pool);
1444  b->fulltext_cache = NULL;
1445  b->fulltext_delivered = 0;
1446  b->current_fulltext = NULL;
1447
1448  /* Save our output baton. */
1449  *rb_p = b;
1450
1451  return SVN_NO_ERROR;
1452}
1453
1454/* Skip forwards to THIS_CHUNK in REP_STATE and then read the next delta
1455   window into *NWIN.  Note that RS->CHUNK_INDEX will be THIS_CHUNK rather
1456   than THIS_CHUNK + 1 when this function returns. */
1457static svn_error_t *
1458read_delta_window(svn_txdelta_window_t **nwin, int this_chunk,
1459                  rep_state_t *rs, apr_pool_t *result_pool,
1460                  apr_pool_t *scratch_pool)
1461{
1462  svn_boolean_t is_cached;
1463  apr_off_t start_offset;
1464  apr_off_t end_offset;
1465  apr_pool_t *iterpool;
1466
1467  SVN_ERR_ASSERT(rs->chunk_index <= this_chunk);
1468
1469  SVN_ERR(dbg_log_access(rs->sfile->fs, rs->revision, rs->item_index,
1470                         NULL, SVN_FS_FS__ITEM_TYPE_ANY_REP, scratch_pool));
1471
1472  /* Read the next window.  But first, try to find it in the cache. */
1473  SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached,
1474                            result_pool, scratch_pool));
1475  if (is_cached)
1476    return SVN_NO_ERROR;
1477
1478  /* someone has to actually read the data from file.  Open it */
1479  SVN_ERR(auto_open_shared_file(rs->sfile));
1480
1481  /* invoke the 'block-read' feature for non-txn data.
1482     However, don't do that if we are in the middle of some representation,
1483     because the block is unlikely to contain other data. */
1484  if (   rs->chunk_index == 0
1485      && SVN_IS_VALID_REVNUM(rs->revision)
1486      && use_block_read(rs->sfile->fs)
1487      && rs->raw_window_cache)
1488    {
1489      SVN_ERR(block_read(NULL, rs->sfile->fs, rs->revision, rs->item_index,
1490                         rs->sfile->rfile, result_pool, scratch_pool));
1491
1492      /* reading the whole block probably also provided us with the
1493         desired txdelta window */
1494      SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached,
1495                                result_pool, scratch_pool));
1496      if (is_cached)
1497        return SVN_NO_ERROR;
1498    }
1499
1500  /* data is still not cached -> we need to read it.
1501     Make sure we have all the necessary info. */
1502  SVN_ERR(auto_set_start_offset(rs, scratch_pool));
1503  SVN_ERR(auto_read_diff_version(rs, scratch_pool));
1504
1505  /* RS->FILE may be shared between RS instances -> make sure we point
1506   * to the right data. */
1507  start_offset = rs->start + rs->current;
1508  SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, scratch_pool));
1509
1510  /* Skip windows to reach the current chunk if we aren't there yet. */
1511  iterpool = svn_pool_create(scratch_pool);
1512  while (rs->chunk_index < this_chunk)
1513    {
1514      svn_pool_clear(iterpool);
1515      SVN_ERR(svn_txdelta_skip_svndiff_window(rs->sfile->rfile->file,
1516                                              rs->ver, iterpool));
1517      rs->chunk_index++;
1518      SVN_ERR(get_file_offset(&start_offset, rs, iterpool));
1519      rs->current = start_offset - rs->start;
1520      if (rs->current >= rs->size)
1521        return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1522                                _("Reading one svndiff window read "
1523                                  "beyond the end of the "
1524                                  "representation"));
1525    }
1526  svn_pool_destroy(iterpool);
1527
1528  /* Actually read the next window. */
1529  SVN_ERR(svn_txdelta_read_svndiff_window(nwin, rs->sfile->rfile->stream,
1530                                          rs->ver, result_pool));
1531  SVN_ERR(get_file_offset(&end_offset, rs, scratch_pool));
1532  rs->current = end_offset - rs->start;
1533  if (rs->current > rs->size)
1534    return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1535                            _("Reading one svndiff window read beyond "
1536                              "the end of the representation"));
1537
1538  /* the window has not been cached before, thus cache it now
1539   * (if caching is used for them at all) */
1540  if (SVN_IS_VALID_REVNUM(rs->revision))
1541    SVN_ERR(set_cached_window(*nwin, rs, scratch_pool));
1542
1543  return SVN_NO_ERROR;
1544}
1545
1546/* Read SIZE bytes from the representation RS and return it in *NWIN. */
1547static svn_error_t *
1548read_plain_window(svn_stringbuf_t **nwin, rep_state_t *rs,
1549                  apr_size_t size, apr_pool_t *result_pool,
1550                  apr_pool_t *scratch_pool)
1551{
1552  apr_off_t offset;
1553
1554  /* RS->FILE may be shared between RS instances -> make sure we point
1555   * to the right data. */
1556  SVN_ERR(auto_open_shared_file(rs->sfile));
1557  SVN_ERR(auto_set_start_offset(rs, scratch_pool));
1558
1559  offset = rs->start + rs->current;
1560  SVN_ERR(rs_aligned_seek(rs, NULL, offset, scratch_pool));
1561
1562  /* Read the plain data. */
1563  *nwin = svn_stringbuf_create_ensure(size, result_pool);
1564  SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, (*nwin)->data, size,
1565                                 NULL, NULL, result_pool));
1566  (*nwin)->data[size] = 0;
1567
1568  /* Update RS. */
1569  rs->current += (apr_off_t)size;
1570
1571  return SVN_NO_ERROR;
1572}
1573
1574/* Get the undeltified window that is a result of combining all deltas
1575   from the current desired representation identified in *RB with its
1576   base representation.  Store the window in *RESULT. */
1577static svn_error_t *
1578get_combined_window(svn_stringbuf_t **result,
1579                    struct rep_read_baton *rb)
1580{
1581  apr_pool_t *pool, *new_pool, *window_pool;
1582  int i;
1583  apr_array_header_t *windows;
1584  svn_stringbuf_t *source, *buf = rb->base_window;
1585  rep_state_t *rs;
1586  apr_pool_t *iterpool;
1587
1588  /* Read all windows that we need to combine. This is fine because
1589     the size of each window is relatively small (100kB) and skip-
1590     delta limits the number of deltas in a chain to well under 100.
1591     Stop early if one of them does not depend on its predecessors. */
1592  window_pool = svn_pool_create(rb->pool);
1593  windows = apr_array_make(window_pool, 0, sizeof(svn_txdelta_window_t *));
1594  iterpool = svn_pool_create(rb->pool);
1595  for (i = 0; i < rb->rs_list->nelts; ++i)
1596    {
1597      svn_txdelta_window_t *window;
1598
1599      svn_pool_clear(iterpool);
1600
1601      rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *);
1602      SVN_ERR(read_delta_window(&window, rb->chunk_index, rs, window_pool,
1603                                iterpool));
1604
1605      APR_ARRAY_PUSH(windows, svn_txdelta_window_t *) = window;
1606      if (window->src_ops == 0)
1607        {
1608          ++i;
1609          break;
1610        }
1611    }
1612
1613  /* Combine in the windows from the other delta reps. */
1614  pool = svn_pool_create(rb->pool);
1615  for (--i; i >= 0; --i)
1616    {
1617      svn_txdelta_window_t *window;
1618
1619      svn_pool_clear(iterpool);
1620
1621      rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *);
1622      window = APR_ARRAY_IDX(windows, i, svn_txdelta_window_t *);
1623
1624      /* Maybe, we've got a PLAIN start representation.  If we do, read
1625         as much data from it as the needed for the txdelta window's source
1626         view.
1627         Note that BUF / SOURCE may only be NULL in the first iteration.
1628         Also note that we may have short-cut reading the delta chain --
1629         in which case SRC_OPS is 0 and it might not be a PLAIN rep. */
1630      source = buf;
1631      if (source == NULL && rb->src_state != NULL && window->src_ops)
1632        SVN_ERR(read_plain_window(&source, rb->src_state, window->sview_len,
1633                                  pool, iterpool));
1634
1635      /* Combine this window with the current one. */
1636      new_pool = svn_pool_create(rb->pool);
1637      buf = svn_stringbuf_create_ensure(window->tview_len, new_pool);
1638      buf->len = window->tview_len;
1639
1640      svn_txdelta_apply_instructions(window, source ? source->data : NULL,
1641                                     buf->data, &buf->len);
1642      if (buf->len != window->tview_len)
1643        return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1644                                _("svndiff window length is "
1645                                  "corrupt"));
1646
1647      /* Cache windows only if the whole rep content could be read as a
1648         single chunk.  Only then will no other chunk need a deeper RS
1649         list than the cached chunk. */
1650      if (   (rb->chunk_index == 0) && (rs->current == rs->size)
1651          && SVN_IS_VALID_REVNUM(rs->revision))
1652        SVN_ERR(set_cached_combined_window(buf, rs, new_pool));
1653
1654      rs->chunk_index++;
1655
1656      /* Cycle pools so that we only need to hold three windows at a time. */
1657      svn_pool_destroy(pool);
1658      pool = new_pool;
1659    }
1660  svn_pool_destroy(iterpool);
1661
1662  svn_pool_destroy(window_pool);
1663
1664  *result = buf;
1665  return SVN_NO_ERROR;
1666}
1667
1668/* Returns whether or not the expanded fulltext of the file is cachable
1669 * based on its size SIZE.  The decision depends on the cache used by RB.
1670 */
1671static svn_boolean_t
1672fulltext_size_is_cachable(fs_fs_data_t *ffd, svn_filesize_t size)
1673{
1674  return (size < APR_SIZE_MAX)
1675      && svn_cache__is_cachable(ffd->fulltext_cache, (apr_size_t)size);
1676}
1677
1678/* Close method used on streams returned by read_representation().
1679 */
1680static svn_error_t *
1681rep_read_contents_close(void *baton)
1682{
1683  struct rep_read_baton *rb = baton;
1684
1685  svn_pool_destroy(rb->pool);
1686  svn_pool_destroy(rb->filehandle_pool);
1687
1688  return SVN_NO_ERROR;
1689}
1690
1691/* Return the next *LEN bytes of the rep from our plain / delta windows
1692   and store them in *BUF. */
1693static svn_error_t *
1694get_contents_from_windows(struct rep_read_baton *rb,
1695                          char *buf,
1696                          apr_size_t *len)
1697{
1698  apr_size_t copy_len, remaining = *len;
1699  char *cur = buf;
1700  rep_state_t *rs;
1701
1702  /* Special case for when there are no delta reps, only a plain
1703     text. */
1704  if (rb->rs_list->nelts == 0)
1705    {
1706      copy_len = remaining;
1707      rs = rb->src_state;
1708
1709      if (rb->base_window != NULL)
1710        {
1711          /* We got the desired rep directly from the cache.
1712             This is where we need the pseudo rep_state created
1713             by build_rep_list(). */
1714          apr_size_t offset = (apr_size_t)rs->current;
1715          if (copy_len + offset > rb->base_window->len)
1716            copy_len = offset < rb->base_window->len
1717                     ? rb->base_window->len - offset
1718                     : 0ul;
1719
1720          memcpy (cur, rb->base_window->data + offset, copy_len);
1721        }
1722      else
1723        {
1724          apr_off_t offset;
1725          if (((apr_off_t) copy_len) > rs->size - rs->current)
1726            copy_len = (apr_size_t) (rs->size - rs->current);
1727
1728          SVN_ERR(auto_open_shared_file(rs->sfile));
1729          SVN_ERR(auto_set_start_offset(rs, rb->pool));
1730
1731          offset = rs->start + rs->current;
1732          SVN_ERR(rs_aligned_seek(rs, NULL, offset, rb->pool));
1733          SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, cur,
1734                                         copy_len, NULL, NULL, rb->pool));
1735        }
1736
1737      rs->current += copy_len;
1738      *len = copy_len;
1739      return SVN_NO_ERROR;
1740    }
1741
1742  while (remaining > 0)
1743    {
1744      /* If we have buffered data from a previous chunk, use that. */
1745      if (rb->buf)
1746        {
1747          /* Determine how much to copy from the buffer. */
1748          copy_len = rb->buf_len - rb->buf_pos;
1749          if (copy_len > remaining)
1750            copy_len = remaining;
1751
1752          /* Actually copy the data. */
1753          memcpy(cur, rb->buf + rb->buf_pos, copy_len);
1754          rb->buf_pos += copy_len;
1755          cur += copy_len;
1756          remaining -= copy_len;
1757
1758          /* If the buffer is all used up, clear it and empty the
1759             local pool. */
1760          if (rb->buf_pos == rb->buf_len)
1761            {
1762              svn_pool_clear(rb->pool);
1763              rb->buf = NULL;
1764            }
1765        }
1766      else
1767        {
1768          svn_stringbuf_t *sbuf = NULL;
1769
1770          rs = APR_ARRAY_IDX(rb->rs_list, 0, rep_state_t *);
1771          if (rs->current == rs->size)
1772            break;
1773
1774          /* Get more buffered data by evaluating a chunk. */
1775          SVN_ERR(get_combined_window(&sbuf, rb));
1776
1777          rb->chunk_index++;
1778          rb->buf_len = sbuf->len;
1779          rb->buf = sbuf->data;
1780          rb->buf_pos = 0;
1781        }
1782    }
1783
1784  *len = cur - buf;
1785
1786  return SVN_NO_ERROR;
1787}
1788
1789/* Baton type for get_fulltext_partial. */
1790typedef struct fulltext_baton_t
1791{
1792  /* Target buffer to write to; of at least LEN bytes. */
1793  char *buffer;
1794
1795  /* Offset within the respective fulltext at which we shall start to
1796     copy data into BUFFER. */
1797  apr_size_t start;
1798
1799  /* Number of bytes to copy.  The actual amount may be less in case
1800     the fulltext is short(er). */
1801  apr_size_t len;
1802
1803  /* Number of bytes actually copied into BUFFER. */
1804  apr_size_t read;
1805} fulltext_baton_t;
1806
1807/* Implement svn_cache__partial_getter_func_t for fulltext caches.
1808 * From the fulltext in DATA, we copy the range specified by the
1809 * fulltext_baton_t* BATON into the buffer provided by that baton.
1810 * OUT and RESULT_POOL are not used.
1811 */
1812static svn_error_t *
1813get_fulltext_partial(void **out,
1814                     const void *data,
1815                     apr_size_t data_len,
1816                     void *baton,
1817                     apr_pool_t *result_pool)
1818{
1819  fulltext_baton_t *fulltext_baton = baton;
1820
1821  /* We cached the fulltext with an NUL appended to it. */
1822  apr_size_t fulltext_len = data_len - 1;
1823
1824  /* Clip the copy range to what the fulltext size allows. */
1825  apr_size_t start = MIN(fulltext_baton->start, fulltext_len);
1826  fulltext_baton->read = MIN(fulltext_len - start, fulltext_baton->len);
1827
1828  /* Copy the data to the output buffer and be done. */
1829  memcpy(fulltext_baton->buffer, (const char *)data + start,
1830         fulltext_baton->read);
1831
1832  return SVN_NO_ERROR;
1833}
1834
1835/* Find the fulltext specified in BATON in the fulltext cache given
1836 * as well by BATON.  If that succeeds, set *CACHED to TRUE and copy
1837 * up to the next *LEN bytes into BUFFER.  Set *LEN to the actual
1838 * number of bytes copied.
1839 */
1840static svn_error_t *
1841get_contents_from_fulltext(svn_boolean_t *cached,
1842                           struct rep_read_baton *baton,
1843                           char *buffer,
1844                           apr_size_t *len)
1845{
1846  void *dummy;
1847  fulltext_baton_t fulltext_baton;
1848
1849  SVN_ERR_ASSERT((apr_size_t)baton->fulltext_delivered
1850                 == baton->fulltext_delivered);
1851  fulltext_baton.buffer = buffer;
1852  fulltext_baton.start = (apr_size_t)baton->fulltext_delivered;
1853  fulltext_baton.len = *len;
1854  fulltext_baton.read = 0;
1855
1856  SVN_ERR(svn_cache__get_partial(&dummy, cached, baton->fulltext_cache,
1857                                 &baton->fulltext_cache_key,
1858                                 get_fulltext_partial, &fulltext_baton,
1859                                 baton->pool));
1860
1861  if (*cached)
1862    {
1863      baton->fulltext_delivered += fulltext_baton.read;
1864      *len = fulltext_baton.read;
1865    }
1866
1867  return SVN_NO_ERROR;
1868}
1869
1870/* Determine the optimal size of a string buf that shall receive a
1871 * (full-) text of NEEDED bytes.
1872 *
1873 * The critical point is that those buffers may be very large and
1874 * can cause memory fragmentation.  We apply simple heuristics to
1875 * make fragmentation less likely.
1876 */
1877static apr_size_t
1878optimimal_allocation_size(apr_size_t needed)
1879{
1880  /* For all allocations, assume some overhead that is shared between
1881   * OS memory managemnt, APR memory management and svn_stringbuf_t. */
1882  const apr_size_t overhead = 0x400;
1883  apr_size_t optimal;
1884
1885  /* If an allocation size if safe for other ephemeral buffers, it should
1886   * be safe for ours. */
1887  if (needed <= SVN__STREAM_CHUNK_SIZE)
1888    return needed;
1889
1890  /* Paranoia edge case:
1891   * Skip our heuristics if they created arithmetical overflow.
1892   * Beware to make this test work for NEEDED = APR_SIZE_MAX as well! */
1893  if (needed >= APR_SIZE_MAX / 2 - overhead)
1894    return needed;
1895
1896  /* As per definition SVN__STREAM_CHUNK_SIZE is a power of two.
1897   * Since we know NEEDED to be larger than that, use it as the
1898   * starting point.
1899   *
1900   * Heuristics: Allocate a power-of-two number of bytes that fit
1901   *             NEEDED plus some OVERHEAD.  The APR allocator
1902   *             will round it up to the next full page size.
1903   */
1904  optimal = SVN__STREAM_CHUNK_SIZE;
1905  while (optimal - overhead < needed)
1906    optimal *= 2;
1907
1908  /* This is above or equal to NEEDED. */
1909  return optimal - overhead;
1910}
1911
1912/* After a fulltext cache lookup failure, we will continue to read from
1913 * combined delta or plain windows.  However, we must first make that data
1914 * stream in BATON catch up tho the position LEN already delivered from the
1915 * fulltext cache.  Also, we need to store the reconstructed fulltext if we
1916 * want to cache it at the end.
1917 */
1918static svn_error_t *
1919skip_contents(struct rep_read_baton *baton,
1920              svn_filesize_t len)
1921{
1922  svn_error_t *err = SVN_NO_ERROR;
1923
1924  /* Do we want to cache the reconstructed fulltext? */
1925  if (SVN_IS_VALID_REVNUM(baton->fulltext_cache_key.revision))
1926    {
1927      char *buffer;
1928      svn_filesize_t to_alloc = MAX(len, baton->len);
1929
1930      /* This should only be happening if BATON->LEN and LEN are
1931       * cacheable, implying they fit into memory. */
1932      SVN_ERR_ASSERT((apr_size_t)to_alloc == to_alloc);
1933
1934      /* Allocate the fulltext buffer. */
1935      baton->current_fulltext = svn_stringbuf_create_ensure(
1936                        optimimal_allocation_size((apr_size_t)to_alloc),
1937                        baton->filehandle_pool);
1938
1939      /* Read LEN bytes from the window stream and store the data
1940       * in the fulltext buffer (will be filled by further reads later). */
1941      baton->current_fulltext->len = (apr_size_t)len;
1942      baton->current_fulltext->data[(apr_size_t)len] = 0;
1943
1944      buffer = baton->current_fulltext->data;
1945      while (len > 0 && !err)
1946        {
1947          apr_size_t to_read = (apr_size_t)len;
1948          err = get_contents_from_windows(baton, buffer, &to_read);
1949          len -= to_read;
1950          buffer += to_read;
1951        }
1952    }
1953  else if (len > 0)
1954    {
1955      /* Simply drain LEN bytes from the window stream. */
1956      apr_pool_t *subpool = subpool = svn_pool_create(baton->pool);
1957      char *buffer = apr_palloc(subpool, SVN__STREAM_CHUNK_SIZE);
1958
1959      while (len > 0 && !err)
1960        {
1961          apr_size_t to_read = len > SVN__STREAM_CHUNK_SIZE
1962                            ? SVN__STREAM_CHUNK_SIZE
1963                            : (apr_size_t)len;
1964
1965          err = get_contents_from_windows(baton, buffer, &to_read);
1966          len -= to_read;
1967        }
1968
1969      svn_pool_destroy(subpool);
1970    }
1971
1972  return svn_error_trace(err);
1973}
1974
1975/* BATON is of type `rep_read_baton'; read the next *LEN bytes of the
1976   representation and store them in *BUF.  Sum as we read and verify
1977   the MD5 sum at the end. */
1978static svn_error_t *
1979rep_read_contents(void *baton,
1980                  char *buf,
1981                  apr_size_t *len)
1982{
1983  struct rep_read_baton *rb = baton;
1984
1985  /* Get data from the fulltext cache for as long as we can. */
1986  if (rb->fulltext_cache)
1987    {
1988      svn_boolean_t cached;
1989      SVN_ERR(get_contents_from_fulltext(&cached, rb, buf, len));
1990      if (cached)
1991        return SVN_NO_ERROR;
1992
1993      /* Cache miss.  From now on, we will never read from the fulltext
1994       * cache for this representation anymore. */
1995      rb->fulltext_cache = NULL;
1996    }
1997
1998  /* No fulltext cache to help us.  We must read from the window stream. */
1999  if (!rb->rs_list)
2000    {
2001      /* Window stream not initialized, yet.  Do it now. */
2002      SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
2003                             &rb->src_state, &rb->len, rb->fs, &rb->rep,
2004                             rb->filehandle_pool));
2005
2006      /* In case we did read from the fulltext cache before, make the
2007       * window stream catch up.  Also, initialize the fulltext buffer
2008       * if we want to cache the fulltext at the end. */
2009      SVN_ERR(skip_contents(rb, rb->fulltext_delivered));
2010    }
2011
2012  /* Get the next block of data. */
2013  SVN_ERR(get_contents_from_windows(rb, buf, len));
2014
2015  if (rb->current_fulltext)
2016    svn_stringbuf_appendbytes(rb->current_fulltext, buf, *len);
2017
2018  /* Perform checksumming.  We want to check the checksum as soon as
2019     the last byte of data is read, in case the caller never performs
2020     a short read, but we don't want to finalize the MD5 context
2021     twice. */
2022  if (!rb->checksum_finalized)
2023    {
2024      SVN_ERR(svn_checksum_update(rb->md5_checksum_ctx, buf, *len));
2025      rb->off += *len;
2026      if (rb->off == rb->len)
2027        {
2028          svn_checksum_t *md5_checksum;
2029          svn_checksum_t expected;
2030          expected.kind = svn_checksum_md5;
2031          expected.digest = rb->md5_digest;
2032
2033          rb->checksum_finalized = TRUE;
2034          SVN_ERR(svn_checksum_final(&md5_checksum, rb->md5_checksum_ctx,
2035                                     rb->pool));
2036          if (!svn_checksum_match(md5_checksum, &expected))
2037            return svn_error_create(SVN_ERR_FS_CORRUPT,
2038                    svn_checksum_mismatch_err(&expected, md5_checksum,
2039                        rb->pool,
2040                        _("Checksum mismatch while reading representation")),
2041                    NULL);
2042        }
2043    }
2044
2045  if (rb->off == rb->len && rb->current_fulltext)
2046    {
2047      fs_fs_data_t *ffd = rb->fs->fsap_data;
2048      SVN_ERR(svn_cache__set(ffd->fulltext_cache, &rb->fulltext_cache_key,
2049                             rb->current_fulltext, rb->pool));
2050      rb->current_fulltext = NULL;
2051    }
2052
2053  return SVN_NO_ERROR;
2054}
2055
2056svn_error_t *
2057svn_fs_fs__get_contents(svn_stream_t **contents_p,
2058                        svn_fs_t *fs,
2059                        representation_t *rep,
2060                        svn_boolean_t cache_fulltext,
2061                        apr_pool_t *pool)
2062{
2063  if (! rep)
2064    {
2065      *contents_p = svn_stream_empty(pool);
2066    }
2067  else
2068    {
2069      fs_fs_data_t *ffd = fs->fsap_data;
2070      svn_filesize_t len = rep->expanded_size ? rep->expanded_size : rep->size;
2071      struct rep_read_baton *rb;
2072
2073      pair_cache_key_t fulltext_cache_key = { 0 };
2074      fulltext_cache_key.revision = rep->revision;
2075      fulltext_cache_key.second = rep->item_index;
2076
2077      /* Initialize the reader baton.  Some members may added lazily
2078       * while reading from the stream */
2079      SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key, pool));
2080
2081      /* Make the stream attempt fulltext cache lookups if the fulltext
2082       * is cacheable.  If it is not, then also don't try to buffer and
2083       * cache it. */
2084      if (ffd->fulltext_cache && cache_fulltext
2085          && SVN_IS_VALID_REVNUM(rep->revision)
2086          && fulltext_size_is_cachable(ffd, len))
2087        {
2088          rb->fulltext_cache = ffd->fulltext_cache;
2089        }
2090      else
2091        {
2092          /* This will also prevent the reconstructed fulltext from being
2093             put into the cache. */
2094          rb->fulltext_cache_key.revision = SVN_INVALID_REVNUM;
2095        }
2096
2097      *contents_p = svn_stream_create(rb, pool);
2098      svn_stream_set_read2(*contents_p, NULL /* only full read support */,
2099                           rep_read_contents);
2100      svn_stream_set_close(*contents_p, rep_read_contents_close);
2101    }
2102
2103  return SVN_NO_ERROR;
2104}
2105
2106/* Baton for cache_access_wrapper. Wraps the original parameters of
2107 * svn_fs_fs__try_process_file_content().
2108 */
2109typedef struct cache_access_wrapper_baton_t
2110{
2111  svn_fs_process_contents_func_t func;
2112  void* baton;
2113} cache_access_wrapper_baton_t;
2114
2115/* Wrapper to translate between svn_fs_process_contents_func_t and
2116 * svn_cache__partial_getter_func_t.
2117 */
2118static svn_error_t *
2119cache_access_wrapper(void **out,
2120                     const void *data,
2121                     apr_size_t data_len,
2122                     void *baton,
2123                     apr_pool_t *pool)
2124{
2125  cache_access_wrapper_baton_t *wrapper_baton = baton;
2126
2127  SVN_ERR(wrapper_baton->func((const unsigned char *)data,
2128                              data_len - 1, /* cache adds terminating 0 */
2129                              wrapper_baton->baton,
2130                              pool));
2131
2132  /* non-NULL value to signal the calling cache that all went well */
2133  *out = baton;
2134
2135  return SVN_NO_ERROR;
2136}
2137
2138svn_error_t *
2139svn_fs_fs__try_process_file_contents(svn_boolean_t *success,
2140                                     svn_fs_t *fs,
2141                                     node_revision_t *noderev,
2142                                     svn_fs_process_contents_func_t processor,
2143                                     void* baton,
2144                                     apr_pool_t *pool)
2145{
2146  representation_t *rep = noderev->data_rep;
2147  if (rep)
2148    {
2149      fs_fs_data_t *ffd = fs->fsap_data;
2150      pair_cache_key_t fulltext_cache_key = { 0 };
2151
2152      fulltext_cache_key.revision = rep->revision;
2153      fulltext_cache_key.second = rep->item_index;
2154      if (ffd->fulltext_cache && SVN_IS_VALID_REVNUM(rep->revision)
2155          && fulltext_size_is_cachable(ffd, rep->expanded_size))
2156        {
2157          cache_access_wrapper_baton_t wrapper_baton;
2158          void *dummy = NULL;
2159
2160          wrapper_baton.func = processor;
2161          wrapper_baton.baton = baton;
2162          return svn_cache__get_partial(&dummy, success,
2163                                        ffd->fulltext_cache,
2164                                        &fulltext_cache_key,
2165                                        cache_access_wrapper,
2166                                        &wrapper_baton,
2167                                        pool);
2168        }
2169    }
2170
2171  *success = FALSE;
2172  return SVN_NO_ERROR;
2173}
2174
2175
2176/* Baton used when reading delta windows. */
2177struct delta_read_baton
2178{
2179  rep_state_t *rs;
2180  unsigned char md5_digest[APR_MD5_DIGESTSIZE];
2181};
2182
2183/* This implements the svn_txdelta_next_window_fn_t interface. */
2184static svn_error_t *
2185delta_read_next_window(svn_txdelta_window_t **window, void *baton,
2186                       apr_pool_t *pool)
2187{
2188  struct delta_read_baton *drb = baton;
2189  apr_pool_t *scratch_pool = svn_pool_create(pool);
2190
2191  *window = NULL;
2192  if (drb->rs->current < drb->rs->size)
2193    {
2194      SVN_ERR(read_delta_window(window, drb->rs->chunk_index, drb->rs, pool,
2195                                scratch_pool));
2196      drb->rs->chunk_index++;
2197    }
2198
2199  svn_pool_destroy(scratch_pool);
2200
2201  return SVN_NO_ERROR;
2202}
2203
2204/* This implements the svn_txdelta_md5_digest_fn_t interface. */
2205static const unsigned char *
2206delta_read_md5_digest(void *baton)
2207{
2208  struct delta_read_baton *drb = baton;
2209  return drb->md5_digest;
2210}
2211
2212/* Return a txdelta stream for on-disk representation REP_STATE
2213 * of TARGET.  Allocate the result in POOL.
2214 */
2215static svn_txdelta_stream_t *
2216get_storaged_delta_stream(rep_state_t *rep_state,
2217                          node_revision_t *target,
2218                          apr_pool_t *pool)
2219{
2220  /* Create the delta read baton. */
2221  struct delta_read_baton *drb = apr_pcalloc(pool, sizeof(*drb));
2222  drb->rs = rep_state;
2223  memcpy(drb->md5_digest, target->data_rep->md5_digest,
2224         sizeof(drb->md5_digest));
2225  return svn_txdelta_stream_create(drb, delta_read_next_window,
2226                                   delta_read_md5_digest, pool);
2227}
2228
2229svn_error_t *
2230svn_fs_fs__get_file_delta_stream(svn_txdelta_stream_t **stream_p,
2231                                 svn_fs_t *fs,
2232                                 node_revision_t *source,
2233                                 node_revision_t *target,
2234                                 apr_pool_t *pool)
2235{
2236  svn_stream_t *source_stream, *target_stream;
2237  rep_state_t *rep_state;
2238  svn_fs_fs__rep_header_t *rep_header;
2239  fs_fs_data_t *ffd = fs->fsap_data;
2240
2241  /* Try a shortcut: if the target is stored as a delta against the source,
2242     then just use that delta.  However, prefer using the fulltext cache
2243     whenever that is available. */
2244  if (target->data_rep && (source || ! ffd->fulltext_cache))
2245    {
2246      /* Read target's base rep if any. */
2247      SVN_ERR(create_rep_state(&rep_state, &rep_header, NULL,
2248                                target->data_rep, fs, pool, pool));
2249
2250      if (source && source->data_rep && target->data_rep)
2251        {
2252          /* If that matches source, then use this delta as is.
2253             Note that we want an actual delta here.  E.g. a self-delta would
2254             not be good enough. */
2255          if (rep_header->type == svn_fs_fs__rep_delta
2256              && rep_header->base_revision == source->data_rep->revision
2257              && rep_header->base_item_index == source->data_rep->item_index)
2258            {
2259              *stream_p = get_storaged_delta_stream(rep_state, target, pool);
2260              return SVN_NO_ERROR;
2261            }
2262        }
2263      else if (!source)
2264        {
2265          /* We want a self-delta. There is a fair chance that TARGET got
2266             added in this revision and is already stored in the requested
2267             format. */
2268          if (rep_header->type == svn_fs_fs__rep_self_delta)
2269            {
2270              *stream_p = get_storaged_delta_stream(rep_state, target, pool);
2271              return SVN_NO_ERROR;
2272            }
2273        }
2274
2275      /* Don't keep file handles open for longer than necessary. */
2276      if (rep_state->sfile->rfile)
2277        {
2278          SVN_ERR(svn_fs_fs__close_revision_file(rep_state->sfile->rfile));
2279          rep_state->sfile->rfile = NULL;
2280        }
2281    }
2282
2283  /* Read both fulltexts and construct a delta. */
2284  if (source)
2285    SVN_ERR(svn_fs_fs__get_contents(&source_stream, fs, source->data_rep,
2286                                    TRUE, pool));
2287  else
2288    source_stream = svn_stream_empty(pool);
2289  SVN_ERR(svn_fs_fs__get_contents(&target_stream, fs, target->data_rep,
2290                                  TRUE, pool));
2291
2292  /* Because source and target stream will already verify their content,
2293   * there is no need to do this once more.  In particular if the stream
2294   * content is being fetched from cache. */
2295  svn_txdelta2(stream_p, source_stream, target_stream, FALSE, pool);
2296
2297  return SVN_NO_ERROR;
2298}
2299
2300/* Return TRUE when all svn_fs_dirent_t* in ENTRIES are already sorted
2301   by their respective name. */
2302static svn_boolean_t
2303sorted(apr_array_header_t *entries)
2304{
2305  int i;
2306
2307  const svn_fs_dirent_t * const *dirents = (const void *)entries->elts;
2308  for (i = 0; i < entries->nelts-1; ++i)
2309    if (strcmp(dirents[i]->name, dirents[i+1]->name) > 0)
2310      return FALSE;
2311
2312  return TRUE;
2313}
2314
2315/* Compare the names of the two dirents given in **A and **B. */
2316static int
2317compare_dirents(const void *a, const void *b)
2318{
2319  const svn_fs_dirent_t *lhs = *((const svn_fs_dirent_t * const *) a);
2320  const svn_fs_dirent_t *rhs = *((const svn_fs_dirent_t * const *) b);
2321
2322  return strcmp(lhs->name, rhs->name);
2323}
2324
2325/* Compare the name of the dirents given in **A with the C string in *B. */
2326static int
2327compare_dirent_name(const void *a, const void *b)
2328{
2329  const svn_fs_dirent_t *lhs = *((const svn_fs_dirent_t * const *) a);
2330  const char *rhs = b;
2331
2332  return strcmp(lhs->name, rhs);
2333}
2334
2335/* Into ENTRIES, read all directories entries from the key-value text in
2336 * STREAM.  If INCREMENTAL is TRUE, read until the end of the STREAM and
2337 * update the data.  ID is provided for nicer error messages.
2338 */
2339static svn_error_t *
2340read_dir_entries(apr_array_header_t *entries,
2341                 svn_stream_t *stream,
2342                 svn_boolean_t incremental,
2343                 const svn_fs_id_t *id,
2344                 apr_pool_t *result_pool,
2345                 apr_pool_t *scratch_pool)
2346{
2347  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
2348  apr_hash_t *hash = incremental ? svn_hash__make(scratch_pool) : NULL;
2349  const char *terminator = SVN_HASH_TERMINATOR;
2350
2351  /* Read until the terminator (non-incremental) or the end of STREAM
2352     (incremental mode).  In the latter mode, we use a temporary HASH
2353     to make updating and removing entries cheaper. */
2354  while (1)
2355    {
2356      svn_hash__entry_t entry;
2357      svn_fs_dirent_t *dirent;
2358      char *str;
2359
2360      svn_pool_clear(iterpool);
2361      SVN_ERR(svn_hash__read_entry(&entry, stream, terminator,
2362                                   incremental, iterpool));
2363
2364      /* End of directory? */
2365      if (entry.key == NULL)
2366        {
2367          /* In incremental mode, we skip the terminator and read the
2368             increments following it until the end of the stream. */
2369          if (incremental && terminator)
2370            terminator = NULL;
2371          else
2372            break;
2373        }
2374
2375      /* Deleted entry? */
2376      if (entry.val == NULL)
2377        {
2378          /* We must be in incremental mode */
2379          assert(hash);
2380          apr_hash_set(hash, entry.key, entry.keylen, NULL);
2381          continue;
2382        }
2383
2384      /* Add a new directory entry. */
2385      dirent = apr_pcalloc(result_pool, sizeof(*dirent));
2386      dirent->name = apr_pstrmemdup(result_pool, entry.key, entry.keylen);
2387
2388      str = svn_cstring_tokenize(" ", &entry.val);
2389      if (str == NULL)
2390        return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2391                           _("Directory entry corrupt in '%s'"),
2392                           svn_fs_fs__id_unparse(id, scratch_pool)->data);
2393
2394      if (strcmp(str, SVN_FS_FS__KIND_FILE) == 0)
2395        {
2396          dirent->kind = svn_node_file;
2397        }
2398      else if (strcmp(str, SVN_FS_FS__KIND_DIR) == 0)
2399        {
2400          dirent->kind = svn_node_dir;
2401        }
2402      else
2403        {
2404          return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2405                           _("Directory entry corrupt in '%s'"),
2406                           svn_fs_fs__id_unparse(id, scratch_pool)->data);
2407        }
2408
2409      str = svn_cstring_tokenize(" ", &entry.val);
2410      if (str == NULL)
2411        return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2412                           _("Directory entry corrupt in '%s'"),
2413                           svn_fs_fs__id_unparse(id, scratch_pool)->data);
2414
2415      SVN_ERR(svn_fs_fs__id_parse(&dirent->id, str, result_pool));
2416
2417      /* In incremental mode, update the hash; otherwise, write to the
2418       * final array.  Be sure to use hash keys that survive this iteration.
2419       */
2420      if (incremental)
2421        apr_hash_set(hash, dirent->name, entry.keylen, dirent);
2422      else
2423        APR_ARRAY_PUSH(entries, svn_fs_dirent_t *) = dirent;
2424    }
2425
2426  /* Convert container to a sorted array. */
2427  if (incremental)
2428    {
2429      apr_hash_index_t *hi;
2430      for (hi = apr_hash_first(iterpool, hash); hi; hi = apr_hash_next(hi))
2431        APR_ARRAY_PUSH(entries, svn_fs_dirent_t *) = apr_hash_this_val(hi);
2432    }
2433
2434  if (!sorted(entries))
2435    svn_sort__array(entries, compare_dirents);
2436
2437  svn_pool_destroy(iterpool);
2438
2439  return SVN_NO_ERROR;
2440}
2441
2442/* Fetch the contents of a directory into ENTRIES.  Values are stored
2443   as filename to string mappings; further conversion is necessary to
2444   convert them into svn_fs_dirent_t values. */
2445static svn_error_t *
2446get_dir_contents(apr_array_header_t **entries,
2447                 svn_fs_t *fs,
2448                 node_revision_t *noderev,
2449                 apr_pool_t *result_pool,
2450                 apr_pool_t *scratch_pool)
2451{
2452  svn_stream_t *contents;
2453
2454  *entries = apr_array_make(result_pool, 16, sizeof(svn_fs_dirent_t *));
2455  if (noderev->data_rep && svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id))
2456    {
2457      const char *filename
2458        = svn_fs_fs__path_txn_node_children(fs, noderev->id, scratch_pool);
2459
2460      /* The representation is mutable.  Read the old directory
2461         contents from the mutable children file, followed by the
2462         changes we've made in this transaction. */
2463      SVN_ERR(svn_stream_open_readonly(&contents, filename, scratch_pool,
2464                                       scratch_pool));
2465      SVN_ERR(read_dir_entries(*entries, contents, TRUE, noderev->id,
2466                               result_pool, scratch_pool));
2467      SVN_ERR(svn_stream_close(contents));
2468    }
2469  else if (noderev->data_rep)
2470    {
2471      /* Undeltify content before parsing it. Otherwise, we could only
2472       * parse it byte-by-byte.
2473       */
2474      apr_size_t len = noderev->data_rep->expanded_size
2475                     ? (apr_size_t)noderev->data_rep->expanded_size
2476                     : (apr_size_t)noderev->data_rep->size;
2477      svn_stringbuf_t *text;
2478
2479      /* The representation is immutable.  Read it normally. */
2480      SVN_ERR(svn_fs_fs__get_contents(&contents, fs, noderev->data_rep,
2481                                      FALSE, scratch_pool));
2482      SVN_ERR(svn_stringbuf_from_stream(&text, contents, len, scratch_pool));
2483      SVN_ERR(svn_stream_close(contents));
2484
2485      /* de-serialize hash */
2486      contents = svn_stream_from_stringbuf(text, scratch_pool);
2487      SVN_ERR(read_dir_entries(*entries, contents, FALSE,  noderev->id,
2488                               result_pool, scratch_pool));
2489    }
2490
2491  return SVN_NO_ERROR;
2492}
2493
2494
2495/* Return the cache object in FS responsible to storing the directory the
2496 * NODEREV plus the corresponding *KEY.  If no cache exists, return NULL.
2497 * PAIR_KEY must point to some key struct, which does not need to be
2498 * initialized.  We use it to avoid dynamic allocation.
2499 */
2500static svn_cache__t *
2501locate_dir_cache(svn_fs_t *fs,
2502                 const void **key,
2503                 pair_cache_key_t *pair_key,
2504                 node_revision_t *noderev,
2505                 apr_pool_t *pool)
2506{
2507  fs_fs_data_t *ffd = fs->fsap_data;
2508  if (svn_fs_fs__id_is_txn(noderev->id))
2509    {
2510      /* data in txns requires the expensive fs_id-based addressing mode */
2511      *key = svn_fs_fs__id_unparse(noderev->id, pool)->data;
2512      return ffd->txn_dir_cache;
2513    }
2514  else
2515    {
2516      /* committed data can use simple rev,item pairs */
2517      if (noderev->data_rep)
2518        {
2519          pair_key->revision = noderev->data_rep->revision;
2520          pair_key->second = noderev->data_rep->item_index;
2521          *key = pair_key;
2522        }
2523      else
2524        {
2525          /* no data rep -> empty directory.
2526             A NULL key causes a cache miss. */
2527          *key = NULL;
2528        }
2529
2530      return ffd->dir_cache;
2531    }
2532}
2533
2534svn_error_t *
2535svn_fs_fs__rep_contents_dir(apr_array_header_t **entries_p,
2536                            svn_fs_t *fs,
2537                            node_revision_t *noderev,
2538                            apr_pool_t *result_pool,
2539                            apr_pool_t *scratch_pool)
2540{
2541  pair_cache_key_t pair_key = { 0 };
2542  const void *key;
2543
2544  /* find the cache we may use */
2545  svn_cache__t *cache = locate_dir_cache(fs, &key, &pair_key, noderev,
2546                                         scratch_pool);
2547  if (cache)
2548    {
2549      svn_boolean_t found;
2550
2551      SVN_ERR(svn_cache__get((void **)entries_p, &found, cache, key,
2552                             result_pool));
2553      if (found)
2554        return SVN_NO_ERROR;
2555    }
2556
2557  /* Read in the directory contents. */
2558  SVN_ERR(get_dir_contents(entries_p, fs, noderev, result_pool,
2559                           scratch_pool));
2560
2561  /* Update the cache, if we are to use one.
2562   *
2563   * Don't even attempt to serialize very large directories; it would cause
2564   * an unnecessary memory allocation peak.  150 bytes/entry is about right.
2565   */
2566  if (cache && svn_cache__is_cachable(cache, 150 * (*entries_p)->nelts))
2567    SVN_ERR(svn_cache__set(cache, key, *entries_p, scratch_pool));
2568
2569  return SVN_NO_ERROR;
2570}
2571
2572svn_fs_dirent_t *
2573svn_fs_fs__find_dir_entry(apr_array_header_t *entries,
2574                          const char *name,
2575                          int *hint)
2576{
2577  svn_fs_dirent_t **result
2578    = svn_sort__array_lookup(entries, name, hint, compare_dirent_name);
2579  return result ? *result : NULL;
2580}
2581
2582svn_error_t *
2583svn_fs_fs__rep_contents_dir_entry(svn_fs_dirent_t **dirent,
2584                                  svn_fs_t *fs,
2585                                  node_revision_t *noderev,
2586                                  const char *name,
2587                                  apr_pool_t *result_pool,
2588                                  apr_pool_t *scratch_pool)
2589{
2590  svn_boolean_t found = FALSE;
2591
2592  /* find the cache we may use */
2593  pair_cache_key_t pair_key = { 0 };
2594  const void *key;
2595  svn_cache__t *cache = locate_dir_cache(fs, &key, &pair_key, noderev,
2596                                         scratch_pool);
2597  if (cache)
2598    {
2599      /* Cache lookup. */
2600      SVN_ERR(svn_cache__get_partial((void **)dirent,
2601                                     &found,
2602                                     cache,
2603                                     key,
2604                                     svn_fs_fs__extract_dir_entry,
2605                                     (void*)name,
2606                                     result_pool));
2607    }
2608
2609  /* fetch data from disk if we did not find it in the cache */
2610  if (! found)
2611    {
2612      apr_array_header_t *entries;
2613      svn_fs_dirent_t *entry;
2614      svn_fs_dirent_t *entry_copy = NULL;
2615
2616      /* read the dir from the file system. It will probably be put it
2617         into the cache for faster lookup in future calls. */
2618      SVN_ERR(svn_fs_fs__rep_contents_dir(&entries, fs, noderev,
2619                                          scratch_pool, scratch_pool));
2620
2621      /* find desired entry and return a copy in POOL, if found */
2622      entry = svn_fs_fs__find_dir_entry(entries, name, NULL);
2623      if (entry)
2624        {
2625          entry_copy = apr_palloc(result_pool, sizeof(*entry_copy));
2626          entry_copy->name = apr_pstrdup(result_pool, entry->name);
2627          entry_copy->id = svn_fs_fs__id_copy(entry->id, result_pool);
2628          entry_copy->kind = entry->kind;
2629        }
2630
2631      *dirent = entry_copy;
2632    }
2633
2634  return SVN_NO_ERROR;
2635}
2636
2637svn_error_t *
2638svn_fs_fs__get_proplist(apr_hash_t **proplist_p,
2639                        svn_fs_t *fs,
2640                        node_revision_t *noderev,
2641                        apr_pool_t *pool)
2642{
2643  apr_hash_t *proplist;
2644  svn_stream_t *stream;
2645
2646  if (noderev->prop_rep && svn_fs_fs__id_txn_used(&noderev->prop_rep->txn_id))
2647    {
2648      svn_error_t *err;
2649      const char *filename
2650        = svn_fs_fs__path_txn_node_props(fs, noderev->id, pool);
2651      proplist = apr_hash_make(pool);
2652
2653      SVN_ERR(svn_stream_open_readonly(&stream, filename, pool, pool));
2654      err = svn_hash_read2(proplist, stream, SVN_HASH_TERMINATOR, pool);
2655      if (err)
2656        {
2657          svn_string_t *id_str = svn_fs_fs__id_unparse(noderev->id, pool);
2658
2659          svn_error_clear(svn_stream_close(stream));
2660          return svn_error_quick_wrapf(err,
2661                   _("malformed property list for node-revision '%s' in '%s'"),
2662                   id_str->data, filename);
2663        }
2664      SVN_ERR(svn_stream_close(stream));
2665    }
2666  else if (noderev->prop_rep)
2667    {
2668      svn_error_t *err;
2669      fs_fs_data_t *ffd = fs->fsap_data;
2670      representation_t *rep = noderev->prop_rep;
2671      pair_cache_key_t key = { 0 };
2672
2673      key.revision = rep->revision;
2674      key.second = rep->item_index;
2675      if (ffd->properties_cache && SVN_IS_VALID_REVNUM(rep->revision))
2676        {
2677          svn_boolean_t is_cached;
2678          SVN_ERR(svn_cache__get((void **) proplist_p, &is_cached,
2679                                 ffd->properties_cache, &key, pool));
2680          if (is_cached)
2681            return SVN_NO_ERROR;
2682        }
2683
2684      proplist = apr_hash_make(pool);
2685      SVN_ERR(svn_fs_fs__get_contents(&stream, fs, noderev->prop_rep, FALSE,
2686                                      pool));
2687      err = svn_hash_read2(proplist, stream, SVN_HASH_TERMINATOR, pool);
2688      if (err)
2689        {
2690          svn_string_t *id_str = svn_fs_fs__id_unparse(noderev->id, pool);
2691
2692          svn_error_clear(svn_stream_close(stream));
2693          return svn_error_quick_wrapf(err,
2694                   _("malformed property list for node-revision '%s'"),
2695                   id_str->data);
2696        }
2697      SVN_ERR(svn_stream_close(stream));
2698
2699      if (ffd->properties_cache && SVN_IS_VALID_REVNUM(rep->revision))
2700        SVN_ERR(svn_cache__set(ffd->properties_cache, &key, proplist, pool));
2701    }
2702  else
2703    {
2704      /* return an empty prop list if the node doesn't have any props */
2705      proplist = apr_hash_make(pool);
2706    }
2707
2708  *proplist_p = proplist;
2709
2710  return SVN_NO_ERROR;
2711}
2712
2713svn_error_t *
2714svn_fs_fs__get_changes(apr_array_header_t **changes,
2715                       svn_fs_t *fs,
2716                       svn_revnum_t rev,
2717                       apr_pool_t *result_pool)
2718{
2719  apr_off_t changes_offset = SVN_FS_FS__ITEM_INDEX_CHANGES;
2720  svn_fs_fs__revision_file_t *revision_file;
2721  svn_boolean_t found;
2722  fs_fs_data_t *ffd = fs->fsap_data;
2723  apr_pool_t *scratch_pool = svn_pool_create(result_pool);
2724
2725  /* try cache lookup first */
2726
2727  if (ffd->changes_cache)
2728    {
2729      SVN_ERR(svn_cache__get((void **) changes, &found, ffd->changes_cache,
2730                             &rev, result_pool));
2731    }
2732  else
2733    {
2734      found = FALSE;
2735    }
2736
2737  if (!found)
2738    {
2739      /* read changes from revision file */
2740
2741      SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, scratch_pool));
2742      SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&revision_file, fs, rev,
2743                                               scratch_pool, scratch_pool));
2744
2745      if (use_block_read(fs))
2746        {
2747          /* 'block-read' will also provide us with the desired data */
2748          SVN_ERR(block_read((void **)changes, fs,
2749                             rev, SVN_FS_FS__ITEM_INDEX_CHANGES,
2750                             revision_file, result_pool, scratch_pool));
2751        }
2752      else
2753        {
2754          /* Addressing is very different for old formats
2755           * (needs to read the revision trailer). */
2756          if (svn_fs_fs__use_log_addressing(fs))
2757            SVN_ERR(svn_fs_fs__item_offset(&changes_offset, fs,
2758                                           revision_file, rev, NULL,
2759                                           SVN_FS_FS__ITEM_INDEX_CHANGES,
2760                                           scratch_pool));
2761          else
2762            SVN_ERR(get_root_changes_offset(NULL, &changes_offset,
2763                                            revision_file, fs, rev,
2764                                            scratch_pool));
2765
2766          /* Actual reading and parsing are the same, though. */
2767          SVN_ERR(aligned_seek(fs, revision_file->file, NULL, changes_offset,
2768                               scratch_pool));
2769          SVN_ERR(svn_fs_fs__read_changes(changes, revision_file->stream,
2770                                          result_pool, scratch_pool));
2771
2772          /* cache for future reference */
2773
2774          if (ffd->changes_cache)
2775            {
2776              /* Guesstimate for the size of the in-cache representation. */
2777              apr_size_t estimated_size = (apr_size_t)250 * (*changes)->nelts;
2778
2779              /* Don't even serialize data that probably won't fit into the
2780               * cache.  This often implies that either CHANGES is very
2781               * large, memory is scarce or both.  Having a huge temporary
2782               * copy would not be a good thing in either case. */
2783              if (svn_cache__is_cachable(ffd->changes_cache, estimated_size))
2784                SVN_ERR(svn_cache__set(ffd->changes_cache, &rev, *changes,
2785                                       scratch_pool));
2786            }
2787        }
2788
2789      SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
2790    }
2791
2792  SVN_ERR(dbg_log_access(fs, rev, changes_offset, *changes,
2793                         SVN_FS_FS__ITEM_TYPE_CHANGES, scratch_pool));
2794
2795  svn_pool_destroy(scratch_pool);
2796  return SVN_NO_ERROR;
2797}
2798
2799/* Inialize the representation read state RS for the given REP_HEADER and
2800 * p2l index ENTRY.  If not NULL, assign FILE and STREAM to RS.
2801 * Use RESULT_POOL for allocations.
2802 */
2803static svn_error_t *
2804init_rep_state(rep_state_t *rs,
2805               svn_fs_fs__rep_header_t *rep_header,
2806               svn_fs_t *fs,
2807               svn_fs_fs__revision_file_t *file,
2808               svn_fs_fs__p2l_entry_t* entry,
2809               apr_pool_t *result_pool)
2810{
2811  fs_fs_data_t *ffd = fs->fsap_data;
2812  shared_file_t *shared_file = apr_pcalloc(result_pool, sizeof(*shared_file));
2813
2814  /* this function does not apply to representation containers */
2815  SVN_ERR_ASSERT(entry->type >= SVN_FS_FS__ITEM_TYPE_FILE_REP
2816                 && entry->type <= SVN_FS_FS__ITEM_TYPE_DIR_PROPS);
2817
2818  shared_file->rfile = file;
2819  shared_file->fs = fs;
2820  shared_file->revision = entry->item.revision;
2821  shared_file->pool = result_pool;
2822
2823  rs->sfile = shared_file;
2824  rs->revision = entry->item.revision;
2825  rs->item_index = entry->item.number;
2826  rs->header_size = rep_header->header_size;
2827  rs->start = entry->offset + rs->header_size;
2828  rs->current = rep_header->type == svn_fs_fs__rep_plain ? 0 : 4;
2829  rs->size = entry->size - rep_header->header_size - 7;
2830  rs->ver = 1;
2831  rs->chunk_index = 0;
2832  rs->raw_window_cache = ffd->raw_window_cache;
2833  rs->window_cache = ffd->txdelta_window_cache;
2834  rs->combined_cache = ffd->combined_window_cache;
2835
2836  return SVN_NO_ERROR;
2837}
2838
2839/* Implement svn_cache__partial_getter_func_t for txdelta windows.
2840 * Instead of the whole window data, return only END_OFFSET member.
2841 */
2842static svn_error_t *
2843get_txdelta_window_end(void **out,
2844                       const void *data,
2845                       apr_size_t data_len,
2846                       void *baton,
2847                       apr_pool_t *result_pool)
2848{
2849  const svn_fs_fs__txdelta_cached_window_t *window
2850    = (const svn_fs_fs__txdelta_cached_window_t *)data;
2851  *(apr_off_t*)out = window->end_offset;
2852
2853  return SVN_NO_ERROR;
2854}
2855
2856/* Implement svn_cache__partial_getter_func_t for raw windows.
2857 * Instead of the whole window data, return only END_OFFSET member.
2858 */
2859static svn_error_t *
2860get_raw_window_end(void **out,
2861                   const void *data,
2862                   apr_size_t data_len,
2863                   void *baton,
2864                   apr_pool_t *result_pool)
2865{
2866  const svn_fs_fs__raw_cached_window_t *window
2867    = (const svn_fs_fs__raw_cached_window_t *)data;
2868  *(apr_off_t*)out = window->end_offset;
2869
2870  return SVN_NO_ERROR;
2871}
2872
2873/* Walk through all windows in the representation addressed by RS in FS
2874 * (excluding the delta bases) and put those not already cached into the
2875 * window caches.  If MAX_OFFSET is not -1, don't read windows that start
2876 * at or beyond that offset.  Use POOL for temporary allocations.
2877 *
2878 * This function requires RS->RAW_WINDOW_CACHE and RS->WINDOW_CACHE to
2879 * be non-NULL.
2880 */
2881static svn_error_t *
2882cache_windows(svn_fs_t *fs,
2883              rep_state_t *rs,
2884              apr_off_t max_offset,
2885              apr_pool_t *pool)
2886{
2887  apr_pool_t *iterpool = svn_pool_create(pool);
2888  while (rs->current < rs->size)
2889    {
2890      apr_off_t end_offset;
2891      svn_boolean_t found = FALSE;
2892      window_cache_key_t key = { 0 };
2893
2894      svn_pool_clear(iterpool);
2895
2896      if (max_offset != -1 && rs->start + rs->current >= max_offset)
2897        {
2898          svn_pool_destroy(iterpool);
2899          return SVN_NO_ERROR;
2900        }
2901
2902      /* We don't need to read the data again if it is already in cache.
2903       * It might be cached as either raw or parsed window.
2904       */
2905      SVN_ERR(svn_cache__get_partial((void **) &end_offset, &found,
2906                                     rs->raw_window_cache,
2907                                     get_window_key(&key, rs),
2908                                     get_raw_window_end, NULL,
2909                                     iterpool));
2910      if (! found)
2911        SVN_ERR(svn_cache__get_partial((void **) &end_offset, &found,
2912                                       rs->window_cache, &key,
2913                                       get_txdelta_window_end, NULL,
2914                                       iterpool));
2915
2916      if (found)
2917        {
2918          rs->current = end_offset;
2919        }
2920      else
2921        {
2922          /* Read, decode and cache the window. */
2923          svn_fs_fs__raw_cached_window_t window;
2924          apr_off_t start_offset = rs->start + rs->current;
2925          apr_size_t window_len;
2926          char *buf;
2927
2928          /* navigate to the current window */
2929          SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, iterpool));
2930          SVN_ERR(svn_txdelta__read_raw_window_len(&window_len,
2931                                                   rs->sfile->rfile->stream,
2932                                                   iterpool));
2933
2934          /* Read the raw window. */
2935          buf = apr_palloc(iterpool, window_len + 1);
2936          SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, iterpool));
2937          SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, buf,
2938                                         window_len, NULL, NULL, iterpool));
2939          buf[window_len] = 0;
2940
2941          /* update relative offset in representation */
2942          rs->current += window_len;
2943
2944          /* Construct the cachable raw window object. */
2945          window.end_offset = rs->current;
2946          window.window.len = window_len;
2947          window.window.data = buf;
2948
2949          /* cache the window now */
2950          SVN_ERR(svn_cache__set(rs->raw_window_cache, &key, &window,
2951                                 iterpool));
2952        }
2953
2954      if (rs->current > rs->size)
2955        return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
2956                                _("Reading one svndiff window read beyond "
2957                                            "the end of the representation"));
2958
2959      rs->chunk_index++;
2960    }
2961
2962  svn_pool_destroy(iterpool);
2963  return SVN_NO_ERROR;
2964}
2965
2966/* Read all txdelta / plain windows following REP_HEADER in FS as described
2967 * by ENTRY.  Read the data from the already open FILE and the wrapping
2968 * STREAM object.  If MAX_OFFSET is not -1, don't read windows that start
2969 * at or beyond that offset.  Use SCRATCH_POOL for temporary allocations.
2970 * If caching is not enabled, this is a no-op.
2971 */
2972static svn_error_t *
2973block_read_windows(svn_fs_fs__rep_header_t *rep_header,
2974                   svn_fs_t *fs,
2975                   svn_fs_fs__revision_file_t *rev_file,
2976                   svn_fs_fs__p2l_entry_t* entry,
2977                   apr_off_t max_offset,
2978                   apr_pool_t *result_pool,
2979                   apr_pool_t *scratch_pool)
2980{
2981  fs_fs_data_t *ffd = fs->fsap_data;
2982  rep_state_t rs = { 0 };
2983  apr_off_t offset;
2984  window_cache_key_t key = { 0 };
2985
2986  if (   (rep_header->type != svn_fs_fs__rep_plain
2987          && (!ffd->txdelta_window_cache || !ffd->raw_window_cache))
2988      || (rep_header->type == svn_fs_fs__rep_plain
2989          && !ffd->combined_window_cache))
2990    return SVN_NO_ERROR;
2991
2992  SVN_ERR(init_rep_state(&rs, rep_header, fs, rev_file, entry,
2993                         result_pool));
2994
2995  /* RS->FILE may be shared between RS instances -> make sure we point
2996   * to the right data. */
2997  offset = rs.start + rs.current;
2998  if (rep_header->type == svn_fs_fs__rep_plain)
2999    {
3000      svn_stringbuf_t *plaintext;
3001      svn_boolean_t is_cached;
3002
3003      /* already in cache? */
3004      SVN_ERR(svn_cache__has_key(&is_cached, rs.combined_cache,
3005                                 get_window_key(&key, &rs),
3006                                 scratch_pool));
3007      if (is_cached)
3008        return SVN_NO_ERROR;
3009
3010      /* for larger reps, the header may have crossed a block boundary.
3011       * make sure we still read blocks properly aligned, i.e. don't use
3012       * plain seek here. */
3013      SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, scratch_pool));
3014
3015      plaintext = svn_stringbuf_create_ensure(rs.size, result_pool);
3016      SVN_ERR(svn_io_file_read_full2(rev_file->file, plaintext->data,
3017                                     rs.size, &plaintext->len, NULL,
3018                                     result_pool));
3019      plaintext->data[plaintext->len] = 0;
3020      rs.current += rs.size;
3021
3022      SVN_ERR(set_cached_combined_window(plaintext, &rs, scratch_pool));
3023    }
3024  else
3025    {
3026      SVN_ERR(cache_windows(fs, &rs, max_offset, scratch_pool));
3027    }
3028
3029  return SVN_NO_ERROR;
3030}
3031
3032/* Try to get the representation header identified by KEY from FS's cache.
3033 * If it has not been cached, read it from the current position in STREAM
3034 * and put it into the cache (if caching has been enabled for rep headers).
3035 * Return the result in *REP_HEADER.  Use POOL for allocations.
3036 */
3037static svn_error_t *
3038read_rep_header(svn_fs_fs__rep_header_t **rep_header,
3039                svn_fs_t *fs,
3040                svn_stream_t *stream,
3041                pair_cache_key_t *key,
3042                apr_pool_t *result_pool,
3043                apr_pool_t *scratch_pool)
3044{
3045  fs_fs_data_t *ffd = fs->fsap_data;
3046  svn_boolean_t is_cached = FALSE;
3047
3048  if (ffd->rep_header_cache)
3049    {
3050      SVN_ERR(svn_cache__get((void**)rep_header, &is_cached,
3051                             ffd->rep_header_cache, key,
3052                             result_pool));
3053      if (is_cached)
3054        return SVN_NO_ERROR;
3055    }
3056
3057  SVN_ERR(svn_fs_fs__read_rep_header(rep_header, stream, result_pool,
3058                                     scratch_pool));
3059
3060  if (ffd->rep_header_cache)
3061    SVN_ERR(svn_cache__set(ffd->rep_header_cache, key, *rep_header,
3062                           scratch_pool));
3063
3064  return SVN_NO_ERROR;
3065}
3066
3067/* Fetch the representation data (header, txdelta / plain windows)
3068 * addressed by ENTRY->ITEM in FS and cache it if caches are enabled.
3069 * Read the data from the already open FILE and the wrapping
3070 * STREAM object.  If MAX_OFFSET is not -1, don't read windows that start
3071 * at or beyond that offset.
3072 * Use SCRATCH_POOL for temporary allocations.
3073 */
3074static svn_error_t *
3075block_read_contents(svn_fs_t *fs,
3076                    svn_fs_fs__revision_file_t *rev_file,
3077                    svn_fs_fs__p2l_entry_t* entry,
3078                    apr_off_t max_offset,
3079                    apr_pool_t *result_pool,
3080                    apr_pool_t *scratch_pool)
3081{
3082  pair_cache_key_t header_key = { 0 };
3083  svn_fs_fs__rep_header_t *rep_header;
3084
3085  header_key.revision = (apr_int32_t)entry->item.revision;
3086  header_key.second = entry->item.number;
3087
3088  SVN_ERR(read_rep_header(&rep_header, fs, rev_file->stream, &header_key,
3089                          result_pool, scratch_pool));
3090  SVN_ERR(block_read_windows(rep_header, fs, rev_file, entry, max_offset,
3091                             result_pool, scratch_pool));
3092
3093  return SVN_NO_ERROR;
3094}
3095
3096/* For the given REV_FILE in FS, in *STREAM return a stream covering the
3097 * item specified by ENTRY.  Also, verify the item's content by low-level
3098 * checksum.  Allocate the result in POOL.
3099 */
3100static svn_error_t *
3101read_item(svn_stream_t **stream,
3102          svn_fs_t *fs,
3103          svn_fs_fs__revision_file_t *rev_file,
3104          svn_fs_fs__p2l_entry_t* entry,
3105          apr_pool_t *pool)
3106{
3107  apr_uint32_t digest;
3108  svn_checksum_t *expected, *actual;
3109  apr_uint32_t plain_digest;
3110
3111  /* Read item into string buffer. */
3112  svn_stringbuf_t *text = svn_stringbuf_create_ensure(entry->size, pool);
3113  text->len = entry->size;
3114  text->data[text->len] = 0;
3115  SVN_ERR(svn_io_file_read_full2(rev_file->file, text->data, text->len,
3116                                 NULL, NULL, pool));
3117
3118  /* Return (construct, calculate) stream and checksum. */
3119  *stream = svn_stream_from_stringbuf(text, pool);
3120  digest = svn__fnv1a_32x4(text->data, text->len);
3121
3122  /* Checksums will match most of the time. */
3123  if (entry->fnv1_checksum == digest)
3124    return SVN_NO_ERROR;
3125
3126  /* Construct proper checksum objects from their digests to allow for
3127   * nice error messages. */
3128  plain_digest = htonl(entry->fnv1_checksum);
3129  expected = svn_checksum__from_digest_fnv1a_32x4(
3130                (const unsigned char *)&plain_digest, pool);
3131  plain_digest = htonl(digest);
3132  actual = svn_checksum__from_digest_fnv1a_32x4(
3133                (const unsigned char *)&plain_digest, pool);
3134
3135  /* Construct the full error message with all the info we have. */
3136  return svn_checksum_mismatch_err(expected, actual, pool,
3137                 _("Low-level checksum mismatch while reading\n"
3138                   "%s bytes of meta data at offset %s "
3139                   "for item %s in revision %ld"),
3140                 apr_psprintf(pool, "%" APR_OFF_T_FMT, entry->size),
3141                 apr_psprintf(pool, "%" APR_OFF_T_FMT, entry->offset),
3142                 apr_psprintf(pool, "%" APR_UINT64_T_FMT, entry->item.number),
3143                 entry->item.revision);
3144}
3145
3146/* If not already cached or if MUST_READ is set, read the changed paths
3147 * list addressed by ENTRY in FS and ret��rn it in *CHANGES.  Cache the
3148 * result if caching is enabled.  Read the data from the already open
3149 * FILE and wrapping FILE_STREAM.  Use POOL for allocations.
3150 */
3151static svn_error_t *
3152block_read_changes(apr_array_header_t **changes,
3153                   svn_fs_t *fs,
3154                   svn_fs_fs__revision_file_t *rev_file,
3155                   svn_fs_fs__p2l_entry_t *entry,
3156                   svn_boolean_t must_read,
3157                   apr_pool_t *result_pool,
3158                   apr_pool_t *scratch_pool)
3159{
3160  fs_fs_data_t *ffd = fs->fsap_data;
3161  svn_stream_t *stream;
3162  if (!must_read && !ffd->changes_cache)
3163    return SVN_NO_ERROR;
3164
3165  /* already in cache? */
3166  if (!must_read && ffd->changes_cache)
3167    {
3168      svn_boolean_t is_cached;
3169      SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_cache,
3170                                 &entry->item.revision,
3171                                 scratch_pool));
3172      if (is_cached)
3173        return SVN_NO_ERROR;
3174    }
3175
3176  SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3177
3178  /* read changes from revision file */
3179  SVN_ERR(svn_fs_fs__read_changes(changes, stream, result_pool,
3180                                  scratch_pool));
3181
3182  /* cache for future reference */
3183  if (ffd->changes_cache)
3184    SVN_ERR(svn_cache__set(ffd->changes_cache, &entry->item.revision,
3185                           *changes, scratch_pool));
3186
3187  return SVN_NO_ERROR;
3188}
3189
3190/* If not already cached or if MUST_READ is set, read the nod revision
3191 * addressed by ENTRY in FS and ret��rn it in *NODEREV_P.  Cache the
3192 * result if caching is enabled.  Read the data from the already open
3193 * FILE and wrapping FILE_STREAM. Use SCRATCH_POOL for temporary allocations.
3194 */
3195static svn_error_t *
3196block_read_noderev(node_revision_t **noderev_p,
3197                   svn_fs_t *fs,
3198                   svn_fs_fs__revision_file_t *rev_file,
3199                   svn_fs_fs__p2l_entry_t *entry,
3200                   svn_boolean_t must_read,
3201                   apr_pool_t *result_pool,
3202                   apr_pool_t *scratch_pool)
3203{
3204  fs_fs_data_t *ffd = fs->fsap_data;
3205  svn_stream_t *stream;
3206
3207  pair_cache_key_t key = { 0 };
3208  key.revision = entry->item.revision;
3209  key.second = entry->item.number;
3210
3211  if (!must_read && !ffd->node_revision_cache)
3212    return SVN_NO_ERROR;
3213
3214  /* already in cache? */
3215  if (!must_read && ffd->node_revision_cache)
3216    {
3217      svn_boolean_t is_cached;
3218      SVN_ERR(svn_cache__has_key(&is_cached, ffd->node_revision_cache,
3219                                 &key, scratch_pool));
3220      if (is_cached)
3221        return SVN_NO_ERROR;
3222    }
3223
3224  SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3225
3226  /* read node rev from revision file */
3227  SVN_ERR(svn_fs_fs__read_noderev(noderev_p, stream,
3228                                  result_pool, scratch_pool));
3229
3230  /* Workaround issue #4031: is-fresh-txn-root in revision files. */
3231  (*noderev_p)->is_fresh_txn_root = FALSE;
3232
3233  if (ffd->node_revision_cache)
3234    SVN_ERR(svn_cache__set(ffd->node_revision_cache, &key, *noderev_p,
3235                           scratch_pool));
3236
3237  return SVN_NO_ERROR;
3238}
3239
3240/* Read the whole (e.g. 64kB) block containing ITEM_INDEX of REVISION in FS
3241 * and put all data into cache.  If necessary and depending on heuristics,
3242 * neighboring blocks may also get read.  The data is being read from
3243 * already open REVISION_FILE, which must be the correct rev / pack file
3244 * w.r.t. REVISION.
3245 *
3246 * For noderevs and changed path lists, the item fetched can be allocated
3247 * RESULT_POOL and returned in *RESULT.  Otherwise, RESULT must be NULL.
3248 */
3249static svn_error_t *
3250block_read(void **result,
3251           svn_fs_t *fs,
3252           svn_revnum_t revision,
3253           apr_uint64_t item_index,
3254           svn_fs_fs__revision_file_t *revision_file,
3255           apr_pool_t *result_pool,
3256           apr_pool_t *scratch_pool)
3257{
3258  fs_fs_data_t *ffd = fs->fsap_data;
3259  apr_off_t offset, wanted_offset = 0;
3260  apr_off_t block_start = 0;
3261  apr_array_header_t *entries;
3262  int run_count = 0;
3263  int i;
3264  apr_pool_t *iterpool;
3265
3266  /* Block read is an optional feature. If the caller does not want anything
3267   * specific we may not have to read anything. */
3268  if (!result)
3269    return SVN_NO_ERROR;
3270
3271  iterpool = svn_pool_create(scratch_pool);
3272
3273  /* don't try this on transaction protorev files */
3274  SVN_ERR_ASSERT(SVN_IS_VALID_REVNUM(revision));
3275
3276  /* index lookup: find the OFFSET of the item we *must* read plus (in the
3277   * "do-while" block) the list of items in the same block. */
3278  SVN_ERR(svn_fs_fs__item_offset(&wanted_offset, fs, revision_file,
3279                                 revision, NULL, item_index, iterpool));
3280
3281  offset = wanted_offset;
3282
3283  /* Heuristics:
3284   *
3285   * Read this block.  If the last item crosses the block boundary, read
3286   * the next block but stop there.  Because cross-boundary items cause
3287   * blocks to be read twice, this heuristics will limit this effect to
3288   * approx. 50% of blocks, probably less, while providing a sensible
3289   * amount of read-ahead.
3290   */
3291  do
3292    {
3293      /* fetch list of items in the block surrounding OFFSET */
3294      block_start = offset - (offset % ffd->block_size);
3295      SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, fs, revision_file,
3296                                          revision, block_start,
3297                                          ffd->block_size, scratch_pool,
3298                                          scratch_pool));
3299
3300      SVN_ERR(aligned_seek(fs, revision_file->file, &block_start, offset,
3301                           iterpool));
3302
3303      /* read all items from the block */
3304      for (i = 0; i < entries->nelts; ++i)
3305        {
3306          svn_boolean_t is_result, is_wanted;
3307          apr_pool_t *pool;
3308          svn_fs_fs__p2l_entry_t* entry;
3309
3310          svn_pool_clear(iterpool);
3311
3312          /* skip empty sections */
3313          entry = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
3314          if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
3315            continue;
3316
3317          /* the item / container we were looking for? */
3318          is_wanted =    entry->offset == wanted_offset
3319                      && entry->item.revision == revision
3320                      && entry->item.number == item_index;
3321          is_result = result && is_wanted;
3322
3323          /* select the pool that we want the item to be allocated in */
3324          pool = is_result ? result_pool : iterpool;
3325
3326          /* handle all items that start within this block and are relatively
3327           * small (i.e. < block size).  Always read the item we need to return.
3328           */
3329          if (is_result || (   entry->offset >= block_start
3330                            && entry->size < ffd->block_size))
3331            {
3332              void *item = NULL;
3333              SVN_ERR(svn_io_file_seek(revision_file->file, APR_SET,
3334                                       &entry->offset, iterpool));
3335              switch (entry->type)
3336                {
3337                  case SVN_FS_FS__ITEM_TYPE_FILE_REP:
3338                  case SVN_FS_FS__ITEM_TYPE_DIR_REP:
3339                  case SVN_FS_FS__ITEM_TYPE_FILE_PROPS:
3340                  case SVN_FS_FS__ITEM_TYPE_DIR_PROPS:
3341                    SVN_ERR(block_read_contents(fs, revision_file, entry,
3342                                                is_wanted
3343                                                  ? -1
3344                                                  : block_start + ffd->block_size,
3345                                                pool, iterpool));
3346                    break;
3347
3348                  case SVN_FS_FS__ITEM_TYPE_NODEREV:
3349                    if (ffd->node_revision_cache || is_result)
3350                      SVN_ERR(block_read_noderev((node_revision_t **)&item,
3351                                                 fs, revision_file,
3352                                                 entry, is_result, pool,
3353                                                 iterpool));
3354                    break;
3355
3356                  case SVN_FS_FS__ITEM_TYPE_CHANGES:
3357                    SVN_ERR(block_read_changes((apr_array_header_t **)&item,
3358                                               fs, revision_file,
3359                                               entry, is_result,
3360                                               pool, iterpool));
3361                    break;
3362
3363                  default:
3364                    break;
3365                }
3366
3367              if (is_result)
3368                *result = item;
3369
3370              /* if we crossed a block boundary, read the remainder of
3371               * the last block as well */
3372              offset = entry->offset + entry->size;
3373              if (offset > block_start + ffd->block_size)
3374                ++run_count;
3375            }
3376        }
3377
3378    }
3379  while(run_count++ == 1); /* can only be true once and only if a block
3380                            * boundary got crossed */
3381
3382  /* if the caller requested a result, we must have provided one by now */
3383  assert(!result || *result);
3384  svn_pool_destroy(iterpool);
3385
3386  return SVN_NO_ERROR;
3387}
3388