dag_cache.c revision 362181
1/* dag_cache.c : DAG walker and node cache.
2 *
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 */
22
23
24/* The job of this layer is to take a filesystem with lots of node
25   sharing going on --- the real DAG filesystem as it appears in the
26   database --- and make it look and act like an ordinary tree
27   filesystem, with no sharing.
28
29   We do just-in-time cloning: you can walk from some unfinished
30   transaction's root down into directories and files shared with
31   committed revisions; as soon as you try to change something, the
32   appropriate nodes get cloned (and parent directory entries updated)
33   invisibly, behind your back.  Any other references you have to
34   nodes that have been cloned by other changes, even made by other
35   processes, are automatically updated to point to the right clones.  */
36
37
38#include <stdlib.h>
39#include <string.h>
40#include <assert.h>
41#include <apr_pools.h>
42#include <apr_hash.h>
43
44#include "svn_hash.h"
45#include "svn_private_config.h"
46#include "svn_pools.h"
47#include "svn_error.h"
48#include "svn_path.h"
49#include "svn_mergeinfo.h"
50#include "svn_fs.h"
51#include "svn_props.h"
52#include "svn_sorts.h"
53
54#include "fs.h"
55#include "dag.h"
56#include "dag_cache.h"
57#include "lock.h"
58#include "tree.h"
59#include "fs_x.h"
60#include "fs_id.h"
61#include "temp_serializer.h"
62#include "cached_data.h"
63#include "transaction.h"
64#include "pack.h"
65#include "util.h"
66
67#include "private/svn_mergeinfo_private.h"
68#include "private/svn_subr_private.h"
69#include "private/svn_fs_util.h"
70#include "private/svn_fspath.h"
71#include "../libsvn_fs/fs-loader.h"
72
73
74/*** Path handling ***/
75
76/* DAG caching uses "normalized" paths - which are a relaxed form of
77   canonical relpaths.  They omit the leading '/' of the abspath and trim
78   any trailing '/'.  Any sequences of '//' will be kept as the path walker
79   simply skips over them.
80
81   Non-canonical sections of the path will therefore only impact efficiency
82   (extra walker iterations and possibly duplicated entries in the cache)
83   but not correctness.
84
85   Another optimization is that we don't NUL-terminate the path but strictly
86   use its length info.  That way, it can be traversed easily without
87   chopping it up and patching it together again.  ultimately, however,
88   the path string is NUL-terminated because we wrapped a NUL-terminated
89   C string.
90 */
91
92/* Set *RESULT to a normalized version of PATH without actually copying any
93   string contents.
94
95   For convenience, return the RESULT pointer as the function value too. */
96static svn_string_t *
97normalize_path(svn_string_t *result,
98               const char *path)
99{
100  apr_size_t len;
101
102  if (path[0] == '/')
103    ++path;
104
105  len = strlen(path);
106  while (len && path[len-1] == '/')
107    --len;
108
109  result->data = path;
110  result->len = len;
111
112  return result;
113}
114
115/* Extend PATH, i.e. increase its LEN, to cover the next segment.  Skip
116   sequences of '/'.  Store the segment in ENTRY and return a pointer to
117   it C string representation.  If no segment has been found (end of path),
118   return NULL. */
119static const char *
120next_entry_name(svn_string_t *path,
121                svn_stringbuf_t *entry)
122{
123  const char *segment_start;
124  const char *segment_end;
125
126  /* Moving to the next segment, skip separators
127     (normalized does not imply canonical). */
128  segment_start = path->data + path->len;
129  while (*segment_start == '/')
130    ++segment_start;
131
132  /* End of path? */
133  if (*segment_start == '\0')
134    return NULL;
135
136  /* Find the end of this segment.  Note that strchr would not give us the
137     length of the last segment. */
138  segment_end = segment_start;
139  while (*segment_end != '/' && *segment_end != '\0')
140    ++segment_end;
141
142  /* Copy the segment into the result buffer. */
143  svn_stringbuf_setempty(entry);
144  svn_stringbuf_appendbytes(entry, segment_start,
145                            segment_end - segment_start);
146
147  /* Extend the "visible" part of the path to the end of that segment. */
148  path->len = segment_end - path->data;
149
150  /* Indicate that we found something. */
151  return entry->data;
152}
153
154/* Split the normalized PATH into its last segment the corresponding parent
155   directory.  Store them in ENTRY and DIRECTORY, respectively.
156
157   If PATH is empty, return FALSE and produce no output.
158   Otherwise, return TRUE.
159 */
160static svn_boolean_t
161extract_last_segment(const svn_string_t *path,
162                     svn_string_t *directory,
163                     svn_stringbuf_t *entry)
164{
165  const char *segment_start;
166  const char *parent_end;
167
168  /* Edge case.  We can't navigate in empty paths. */
169  if (path->len == 0)
170    return FALSE;
171
172  /* Find the start of the last segment.  Note that normalized paths never
173     start nor end with a '/'. */
174  segment_start = path->data + path->len - 1;
175  while (*segment_start != '/' && segment_start != path->data)
176    --segment_start;
177
178  /* At root level already, i.e. no parent? */
179  if (segment_start == path->data)
180    {
181      /* Construct result. */
182      directory->data = "";
183      directory->len = 0;
184
185      svn_stringbuf_setempty(entry);
186      svn_stringbuf_appendbytes(entry, path->data, path->len);
187
188      return TRUE;
189    }
190
191  /* Find the end of the parent directory. */
192  parent_end = segment_start;
193  while (parent_end[-1] == '/')
194    --parent_end;
195
196  /* Construct result. */
197  directory->data = path->data;
198  directory->len = parent_end - path->data;
199
200  ++segment_start; /* previously pointed to the last '/'. */
201  svn_stringbuf_setempty(entry);
202  svn_stringbuf_appendbytes(entry, segment_start,
203                            path->len - (segment_start - path->data));
204
205  return TRUE;
206}
207
208
209/*** Node Caching ***/
210
211/* 1st level cache */
212
213/* An entry in the first-level cache.  REVISION and PATH form the key that
214   will ultimately be matched.
215 */
216typedef struct cache_entry_t
217{
218  /* hash value derived from PATH, REVISION.
219     Used to short-circuit failed lookups. */
220  apr_uint32_t hash_value;
221
222  /* change set to which the NODE belongs */
223  svn_fs_x__change_set_t change_set;
224
225  /* path of the NODE */
226  char *path;
227
228  /* cached value of strlen(PATH). */
229  apr_size_t path_len;
230
231  /* the node allocated in the cache's pool. NULL for empty entries. */
232  dag_node_t *node;
233} cache_entry_t;
234
235/* Number of entries in the cache.  Keep this low to keep pressure on the
236   CPU caches low as well.  A binary value is most efficient.  If we walk
237   a directory tree, we want enough entries to store nodes for all files
238   without overwriting the nodes for the parent folder.  That way, there
239   will be no unnecessary misses (except for a few random ones caused by
240   hash collision).
241
242   The actual number of instances may be higher but entries that got
243   overwritten are no longer visible.
244 */
245enum { BUCKET_COUNT = 256 };
246
247/* The actual cache structure.  All nodes will be allocated in POOL.
248   When the number of INSERTIONS (i.e. objects created form that pool)
249   exceeds a certain threshold, the pool will be cleared and the cache
250   with it.
251 */
252struct svn_fs_x__dag_cache_t
253{
254  /* fixed number of (possibly empty) cache entries */
255  cache_entry_t buckets[BUCKET_COUNT];
256
257  /* pool used for all node allocation */
258  apr_pool_t *pool;
259
260  /* number of entries created from POOL since the last cleanup */
261  apr_size_t insertions;
262
263  /* Property lookups etc. have a very high locality (75% re-hit).
264     Thus, remember the last hit location for optimistic lookup. */
265  apr_size_t last_hit;
266
267  /* Position of the last bucket hit that actually had a DAG node in it.
268     LAST_HIT may refer to a bucket that matches path@rev but has not
269     its NODE element set, yet.
270     This value is a mere hint for optimistic lookup and any value is
271     valid (as long as it is < BUCKET_COUNT). */
272  apr_size_t last_non_empty;
273};
274
275svn_fs_x__dag_cache_t*
276svn_fs_x__create_dag_cache(apr_pool_t *result_pool)
277{
278  svn_fs_x__dag_cache_t *result = apr_pcalloc(result_pool, sizeof(*result));
279  result->pool = svn_pool_create(result_pool);
280
281  return result;
282}
283
284/* Clears the CACHE at regular intervals (destroying all cached nodes).
285 * Return TRUE if the cache got cleared and previously obtained references
286 * to cache contents have become invalid.
287 */
288static svn_boolean_t
289auto_clear_dag_cache(svn_fs_x__dag_cache_t* cache)
290{
291  if (cache->insertions <= BUCKET_COUNT)
292    return FALSE;
293
294  svn_pool_clear(cache->pool);
295
296  memset(cache->buckets, 0, sizeof(cache->buckets));
297  cache->insertions = 0;
298
299  return TRUE;
300}
301
302/* For the given CHANGE_SET and PATH, return the respective entry in CACHE.
303   If the entry is empty, its NODE member will be NULL and the caller
304   may then set it to the corresponding DAG node allocated in CACHE->POOL.
305 */
306static cache_entry_t *
307cache_lookup(svn_fs_x__dag_cache_t *cache,
308             svn_fs_x__change_set_t change_set,
309             const svn_string_t *path)
310{
311  apr_size_t i, bucket_index;
312  apr_size_t path_len = path->len;
313  apr_uint32_t hash_value = (apr_uint32_t)(apr_uint64_t)change_set;
314
315#if SVN_UNALIGNED_ACCESS_IS_OK
316  /* "randomizing" / distributing factor used in our hash function */
317  const apr_uint32_t factor = 0xd1f3da69;
318#endif
319
320  /* optimistic lookup: hit the same bucket again? */
321  cache_entry_t *result = &cache->buckets[cache->last_hit];
322  if (   (result->change_set == change_set)
323      && (result->path_len == path_len)
324      && !memcmp(result->path, path->data, path_len))
325    {
326      /* Remember the position of the last node we found in this cache. */
327      if (result->node)
328        cache->last_non_empty = cache->last_hit;
329
330      return result;
331    }
332
333  /* need to do a full lookup.  Calculate the hash value
334     (HASH_VALUE has been initialized to REVISION). */
335  i = 0;
336#if SVN_UNALIGNED_ACCESS_IS_OK
337  /* We relax the dependency chain between iterations by processing
338     two chunks from the input per hash_value self-multiplication.
339     The HASH_VALUE update latency is now 1 MUL latency + 1 ADD latency
340     per 2 chunks instead of 1 chunk.
341   */
342  for (; i + 8 <= path_len; i += 8)
343    hash_value = hash_value * factor * factor
344               + (  *(const apr_uint32_t*)(path->data + i) * factor
345                  + *(const apr_uint32_t*)(path->data + i + 4));
346#endif
347
348  for (; i < path_len; ++i)
349    /* Help GCC to minimize the HASH_VALUE update latency by splitting the
350       MUL 33 of the naive implementation: h = h * 33 + path[i].  This
351       shortens the dependency chain from 1 shift + 2 ADDs to 1 shift + 1 ADD.
352     */
353    hash_value = hash_value * 32 + (hash_value + (apr_byte_t)path->data[i]);
354
355  bucket_index = hash_value + (hash_value >> 16);
356  bucket_index = (bucket_index + (bucket_index >> 8)) % BUCKET_COUNT;
357
358  /* access the corresponding bucket and remember its location */
359  result = &cache->buckets[bucket_index];
360  cache->last_hit = bucket_index;
361
362  /* if it is *NOT* a match,  clear the bucket, expect the caller to fill
363     in the node and count it as an insertion */
364  if (   (result->hash_value != hash_value)
365      || (result->change_set != change_set)
366      || (result->path_len != path_len)
367      || memcmp(result->path, path->data, path_len))
368    {
369      result->hash_value = hash_value;
370      result->change_set = change_set;
371
372      if (result->path_len < path_len || result->path_len == 0)
373        result->path = apr_palloc(cache->pool, path_len + 1);
374      result->path_len = path_len;
375
376      memcpy(result->path, path->data, path_len);
377      result->path[path_len] = 0;
378
379      result->node = NULL;
380
381      cache->insertions++;
382    }
383  else if (result->node)
384    {
385      /* This bucket is valid & has a suitable DAG node in it.
386         Remember its location. */
387      cache->last_non_empty = bucket_index;
388    }
389
390  return result;
391}
392
393/* Optimistic lookup using the last seen non-empty location in CACHE.
394   Return the node of that entry, if it is still in use and matches PATH.
395   Return NULL otherwise. */
396static dag_node_t *
397cache_lookup_last_path(svn_fs_x__dag_cache_t *cache,
398                       const svn_string_t *path)
399{
400  cache_entry_t *result = &cache->buckets[cache->last_non_empty];
401
402  if (   result->node
403      && (result->path_len == path->len)
404      && !memcmp(result->path, path->data, path->len))
405    {
406      return result->node;
407    }
408
409  return NULL;
410}
411
412/* Return the cached DAG node for PATH from ROOT's node cache, or NULL if
413   the node isn't cached.
414 */
415static dag_node_t *
416dag_node_cache_get(svn_fs_root_t *root,
417                   const svn_string_t *path)
418{
419  svn_fs_x__data_t *ffd = root->fs->fsap_data;
420  svn_fs_x__change_set_t change_set = svn_fs_x__root_change_set(root);
421
422  auto_clear_dag_cache(ffd->dag_node_cache);
423  return cache_lookup(ffd->dag_node_cache, change_set, path)->node;
424}
425
426
427void
428svn_fs_x__update_dag_cache(dag_node_t *node)
429{
430  svn_fs_x__data_t *ffd = svn_fs_x__dag_get_fs(node)->fsap_data;
431  const char *path = svn_fs_x__dag_get_created_path(node);
432  svn_fs_x__dag_cache_t *cache = ffd->dag_node_cache;
433
434  cache_entry_t *bucket;
435  svn_string_t normalized;
436
437  auto_clear_dag_cache(cache);
438  bucket = cache_lookup(cache, svn_fs_x__dag_get_id(node)->change_set,
439                        normalize_path(&normalized, path));
440  bucket->node = svn_fs_x__dag_dup(node, cache->pool);
441}
442
443void
444svn_fs_x__invalidate_dag_cache(svn_fs_root_t *root,
445                               const char *path)
446{
447  svn_fs_x__data_t *ffd = root->fs->fsap_data;
448  svn_fs_x__dag_cache_t *cache = ffd->dag_node_cache;
449  svn_fs_x__change_set_t change_set = svn_fs_x__root_change_set(root);
450
451  apr_size_t i;
452  for (i = 0; i < BUCKET_COUNT; ++i)
453    {
454      cache_entry_t *bucket = &cache->buckets[i];
455      if (bucket->change_set == change_set && bucket->node)
456        {
457          /* The call to svn_relpath_skip_ancestor() will require both
458             parameters to be canonical.  Since we allow for non-canonical
459             paths in our cache (unlikely to actually happen), we drop all
460             such entries.
461           */
462          if (!svn_relpath_is_canonical(bucket->path)
463              || svn_relpath_skip_ancestor(path + 1, bucket->path))
464            bucket->node = NULL;
465        }
466    }
467}
468
469
470/* Traversing directory paths.  */
471
472/* Try a short-cut for the open_path() function using the last node accessed.
473 * If that ROOT is that nodes's "created rev" and PATH matches its "created-
474 * path", return the node in *NODE_P.  Set it to NULL otherwise.
475 *
476 * This function is used to support ra_serf-style access patterns where we
477 * are first asked for path@rev and then for path@c_rev of the same node.
478 * The shortcut works by ignoring the "rev" part of the cache key and then
479 * checking whether we got lucky.  Lookup and verification are both quick
480 * plus there are many early outs for common types of mismatch.
481 */
482static svn_error_t *
483try_match_last_node(dag_node_t **node_p,
484                    svn_fs_root_t *root,
485                    const svn_string_t *path)
486{
487  svn_fs_x__data_t *ffd = root->fs->fsap_data;
488
489  /* Optimistic lookup: if the last node returned from the cache applied to
490     the same PATH, return it in NODE. */
491  dag_node_t *node = cache_lookup_last_path(ffd->dag_node_cache, path);
492
493  /* Did we get a bucket with a committed node? */
494  if (node && !svn_fs_x__dag_check_mutable(node))
495    {
496      /* Get the path&rev pair at which this node was created.
497         This is repository location for which this node is _known_ to be
498         the right lookup result irrespective of how we found it. */
499      const char *created_path
500        = svn_fs_x__dag_get_created_path(node) + 1;
501      svn_revnum_t revision = svn_fs_x__dag_get_revision(node);
502
503      /* Is it an exact match? */
504      if (   revision == root->rev
505          && strlen(created_path) == path->len
506          && memcmp(created_path, path->data, path->len) == 0)
507        {
508          svn_fs_x__dag_cache_t *cache = ffd->dag_node_cache;
509
510          /* Insert NODE into the cache at a second location.
511             In a fraction of all calls, the auto-cleanup may
512             cause NODE to become invalid. */
513          if (!auto_clear_dag_cache(cache))
514            {
515              /* Cache it under its full path@rev access path. */
516              svn_fs_x__change_set_t change_set
517                = svn_fs_x__change_set_by_rev(revision);
518              cache_entry_t *bucket = cache_lookup(cache, change_set, path);
519              bucket->node = node;
520
521              *node_p = node;
522              return SVN_NO_ERROR;
523            }
524        }
525    }
526
527  *node_p = NULL;
528  return SVN_NO_ERROR;
529}
530
531
532/* From directory node PARENT, under ROOT, go one step down to the entry
533   NAME and return a reference to it in *CHILD_P.
534
535   PATH is the combination of PARENT's path and NAME and is provided by
536   the caller such that we don't have to construct it here ourselves.
537   Similarly, CHANGE_SET is redundant with ROOT.
538
539   If the directory entry cannot be found, instead of returning an error,
540   *CHILD_P will be set to NULL if ALLOW_EMPTY is TRUE.
541
542   NOTE: *NODE_P will live within the DAG cache and we merely return a
543   reference to it.  Hence, it will invalid upon the next cache insertion.
544   Callers must create a copy if they want a non-temporary object.
545*/
546static svn_error_t *
547dag_step(dag_node_t **child_p,
548         svn_fs_root_t *root,
549         dag_node_t *parent,
550         const char *name,
551         const svn_string_t *path,
552         svn_fs_x__change_set_t change_set,
553         svn_boolean_t allow_empty,
554         apr_pool_t *scratch_pool)
555{
556  svn_fs_t *fs = svn_fs_x__dag_get_fs(parent);
557  svn_fs_x__data_t *ffd = fs->fsap_data;
558  cache_entry_t *bucket;
559  svn_fs_x__id_t node_id;
560
561  /* Locate the corresponding cache entry.  We may need PARENT to remain
562     valid for later use, so don't call auto_clear_dag_cache() here. */
563  bucket = cache_lookup(ffd->dag_node_cache, change_set, path);
564  if (bucket->node)
565    {
566      /* Already cached. Return a reference to the cached object. */
567      *child_p = bucket->node;
568      return SVN_NO_ERROR;
569    }
570
571  /* Get the ID of the node we are looking for.  The function call checks
572     for various error conditions such like PARENT not being a directory. */
573  SVN_ERR(svn_fs_x__dir_entry_id(&node_id, parent, name, scratch_pool));
574  if (! svn_fs_x__id_used(&node_id))
575    {
576      const char *dir;
577
578      /* No such directory entry.  Is a simple NULL result o.k.? */
579      if (allow_empty)
580        {
581          *child_p = NULL;
582          return SVN_NO_ERROR;
583        }
584
585      /* Produce an appropriate error message. */
586      dir = apr_pstrmemdup(scratch_pool, path->data, path->len);
587      dir = svn_fs__canonicalize_abspath(dir, scratch_pool);
588
589      return SVN_FS__NOT_FOUND(root, dir);
590    }
591
592  /* We are about to add a new entry to the cache.  Periodically clear it.
593     If we had to clear it just now (< 1% chance), re-add the entry for our
594     item. */
595  if (auto_clear_dag_cache(ffd->dag_node_cache))
596    bucket = cache_lookup(ffd->dag_node_cache, change_set, path);
597
598  /* Construct the DAG node object for NODE_ID. Let it live in the cache. */
599  SVN_ERR(svn_fs_x__dag_get_node(&bucket->node, fs, &node_id,
600                                 ffd->dag_node_cache->pool,
601                                 scratch_pool));
602
603  /* Return a reference to the cached object. */
604  *child_p = bucket->node;
605  return SVN_NO_ERROR;
606}
607
608/* Return the CHANGE_SET's root node in *NODE_P.  ROOT is the FS API root
609   object for CHANGE_SET.  Use SCRATCH_POOL for temporary allocations.
610
611   NOTE: *NODE_P will live within the DAG cache and we merely return a
612   reference to it.  Hence, it will invalid upon the next cache insertion.
613   Callers must create a copy if they want a non-temporary object.
614 */
615static svn_error_t *
616get_root_node(dag_node_t **node_p,
617              svn_fs_root_t *root,
618              svn_fs_x__change_set_t change_set,
619              apr_pool_t *scratch_pool)
620{
621  svn_fs_t *fs = root->fs;
622  svn_fs_x__data_t *ffd = fs->fsap_data;
623  cache_entry_t *bucket;
624  const svn_string_t path = { "", 0 };
625
626  /* Auto-insert the node in the cache. */
627  auto_clear_dag_cache(ffd->dag_node_cache);
628  bucket = cache_lookup(ffd->dag_node_cache, change_set, &path);
629
630  /* If it is not already cached, construct the DAG node object for NODE_ID.
631     Let it live in the cache.  Sadly, we often can't reuse txn DAG nodes. */
632  if (bucket->node == NULL)
633    SVN_ERR(svn_fs_x__dag_root(&bucket->node, fs, change_set,
634                               ffd->dag_node_cache->pool, scratch_pool));
635
636  /* Return a reference to the cached object. */
637  *node_p = bucket->node;
638  return SVN_NO_ERROR;
639}
640
641/* Walk the DAG starting at ROOT, following PATH and return a reference to
642   the target node in *NODE_P.   Use SCRATCH_POOL for temporary allocations.
643
644   NOTE: *NODE_P will live within the DAG cache and we merely return a
645   reference to it.  Hence, it will invalid upon the next cache insertion.
646   Callers must create a copy if they want a non-temporary object.
647*/
648static svn_error_t *
649walk_dag_path(dag_node_t **node_p,
650              svn_fs_root_t *root,
651              svn_string_t *path,
652              apr_pool_t *scratch_pool)
653{
654  dag_node_t *here = NULL; /* The directory we're currently looking at.  */
655  apr_pool_t *iterpool;
656  svn_fs_x__change_set_t change_set = svn_fs_x__root_change_set(root);
657  const char *entry;
658  svn_string_t directory;
659  svn_stringbuf_t *entry_buffer;
660
661  /* Special case: root directory.
662     We will later assume that all paths have at least one parent level,
663     so we must check here for those that don't. */
664  if (path->len == 0)
665    return svn_error_trace(get_root_node(node_p, root, change_set,
666                                         scratch_pool));
667
668  /* Callers often traverse the DAG in some path-based order or along the
669     history segments.  That allows us to try a few guesses about where to
670     find the next item.  This is only useful if the caller didn't request
671     the full parent chain. */
672
673  /* First attempt: Assume that we access the DAG for the same path as
674     in the last lookup but for a different revision that happens to be
675     the last revision that touched the respective node.  This is a
676     common pattern when e.g. checking out over ra_serf.  Note that this
677     will only work for committed data as the revision info for nodes in
678     txns is bogus.
679
680     This shortcut is quick and will exit this function upon success.
681     So, try it first. */
682  if (!root->is_txn_root)
683    {
684      SVN_ERR(try_match_last_node(node_p, root, path));
685
686      /* Did the shortcut work? */
687      if (*node_p)
688          return SVN_NO_ERROR;
689    }
690
691  /* Second attempt: Try starting the lookup immediately at the parent
692     node.  We will often have recently accessed either a sibling or
693     said parent directory itself for the same revision.  ENTRY will
694     point to the last '/' in PATH. */
695  entry_buffer = svn_stringbuf_create_ensure(64, scratch_pool);
696  if (extract_last_segment(path, &directory, entry_buffer))
697    {
698      here = dag_node_cache_get(root, &directory);
699
700      /* Did the shortcut work? */
701      if (here)
702        return svn_error_trace(dag_step(node_p, root, here,
703                                        entry_buffer->data, path,
704                                        change_set, FALSE, scratch_pool));
705    }
706
707  /* Now there is something to iterate over. Thus, create the ITERPOOL. */
708  iterpool = svn_pool_create(scratch_pool);
709
710  /* Make a parent_path item for the root node, using its own current
711     copy id.  */
712  SVN_ERR(get_root_node(&here, root, change_set, iterpool));
713  path->len = 0;
714
715  /* Walk the path segment by segment. */
716  for (entry = next_entry_name(path, entry_buffer);
717       entry;
718       entry = next_entry_name(path, entry_buffer))
719    {
720      svn_pool_clear(iterpool);
721
722      /* Note that HERE is allocated from the DAG node cache and will
723         therefore survive the ITERPOOL cleanup. */
724      SVN_ERR(dag_step(&here, root, here, entry, path, change_set, FALSE,
725                       iterpool));
726    }
727
728  svn_pool_destroy(iterpool);
729  *node_p = here;
730
731  return SVN_NO_ERROR;
732}
733
734
735/* Return a text string describing the absolute path of parent path
736   DAG_PATH.  It will be allocated in POOL. */
737static const char *
738parent_path_path(svn_fs_x__dag_path_t *dag_path,
739                 apr_pool_t *pool)
740{
741  const char *path_so_far = "/";
742  if (dag_path->parent)
743    path_so_far = parent_path_path(dag_path->parent, pool);
744  return dag_path->entry
745    ? svn_fspath__join(path_so_far, dag_path->entry, pool)
746    : path_so_far;
747}
748
749
750/* Choose a copy ID inheritance method *INHERIT_P to be used in the
751   event that immutable node CHILD in FS needs to be made mutable.  If
752   the inheritance method is copy_id_inherit_new, also return a
753   *COPY_SRC_PATH on which to base the new copy ID (else return NULL
754   for that path).  CHILD must have a parent (it cannot be the root
755   node).  Temporary allocations are taken from SCRATCH_POOL. */
756static svn_error_t *
757get_copy_inheritance(svn_fs_x__copy_id_inherit_t *inherit_p,
758                     const char **copy_src_path,
759                     svn_fs_t *fs,
760                     svn_fs_x__dag_path_t *child,
761                     apr_pool_t *scratch_pool)
762{
763  svn_fs_x__id_t child_copy_id, parent_copy_id;
764  const char *id_path = NULL;
765  svn_fs_root_t *copyroot_root;
766  dag_node_t *copyroot_node;
767  svn_revnum_t copyroot_rev;
768  const char *copyroot_path;
769
770  SVN_ERR_ASSERT(child && child->parent);
771
772  /* Initialize some convenience variables. */
773  child_copy_id = *svn_fs_x__dag_get_copy_id(child->node);
774  parent_copy_id = *svn_fs_x__dag_get_copy_id(child->parent->node);
775
776  /* By default, there is no copy source. */
777  *copy_src_path = NULL;
778
779  /* If this child is already mutable, we have nothing to do. */
780  if (svn_fs_x__dag_check_mutable(child->node))
781    {
782      *inherit_p = svn_fs_x__copy_id_inherit_self;
783      return SVN_NO_ERROR;
784    }
785
786  /* From this point on, we'll assume that the child will just take
787     its copy ID from its parent. */
788  *inherit_p = svn_fs_x__copy_id_inherit_parent;
789
790  /* Special case: if the child's copy ID is '0', use the parent's
791     copy ID. */
792  if (svn_fs_x__id_is_root(&child_copy_id))
793    return SVN_NO_ERROR;
794
795  /* Compare the copy IDs of the child and its parent.  If they are
796     the same, then the child is already on the same branch as the
797     parent, and should use the same mutability copy ID that the
798     parent will use. */
799  if (svn_fs_x__id_eq(&child_copy_id, &parent_copy_id))
800    return SVN_NO_ERROR;
801
802  /* If the child is on the same branch that the parent is on, the
803     child should just use the same copy ID that the parent would use.
804     Else, the child needs to generate a new copy ID to use should it
805     need to be made mutable.  We will claim that child is on the same
806     branch as its parent if the child itself is not a branch point,
807     or if it is a branch point that we are accessing via its original
808     copy destination path. */
809  svn_fs_x__dag_get_copyroot(&copyroot_rev, &copyroot_path, child->node);
810  SVN_ERR(svn_fs_x__revision_root(&copyroot_root, fs, copyroot_rev,
811                                  scratch_pool));
812  SVN_ERR(svn_fs_x__get_temp_dag_node(&copyroot_node, copyroot_root,
813                                      copyroot_path, scratch_pool));
814
815  if (!svn_fs_x__dag_related_node(copyroot_node, child->node))
816    return SVN_NO_ERROR;
817
818  /* Determine if we are looking at the child via its original path or
819     as a subtree item of a copied tree. */
820  id_path = svn_fs_x__dag_get_created_path(child->node);
821  if (strcmp(id_path, parent_path_path(child, scratch_pool)) == 0)
822    {
823      *inherit_p = svn_fs_x__copy_id_inherit_self;
824      return SVN_NO_ERROR;
825    }
826
827  /* We are pretty sure that the child node is an unedited nested
828     branched node.  When it needs to be made mutable, it should claim
829     a new copy ID. */
830  *inherit_p = svn_fs_x__copy_id_inherit_new;
831  *copy_src_path = id_path;
832  return SVN_NO_ERROR;
833}
834
835/* Allocate a new svn_fs_x__dag_path_t node from RESULT_POOL, containing
836   NODE, ENTRY and PARENT; NODE and ENTRY are copied into RESULT_POOL.  */
837static svn_fs_x__dag_path_t *
838make_parent_path(dag_node_t *node,
839                 const svn_stringbuf_t *entry,
840                 svn_fs_x__dag_path_t *parent,
841                 apr_pool_t *result_pool)
842{
843  svn_fs_x__dag_path_t *dag_path
844    = apr_pcalloc(result_pool, sizeof(*dag_path));
845  if (node)
846    dag_path->node = svn_fs_x__dag_dup(node, result_pool);
847  dag_path->entry = apr_pstrmemdup(result_pool, entry->data, entry->len);
848  dag_path->parent = parent;
849  dag_path->copy_inherit = svn_fs_x__copy_id_inherit_unknown;
850  dag_path->copy_src_path = NULL;
851  return dag_path;
852}
853
854svn_error_t *
855svn_fs_x__get_dag_path(svn_fs_x__dag_path_t **dag_path_p,
856                       svn_fs_root_t *root,
857                       const char *fs_path,
858                       int flags,
859                       svn_boolean_t is_txn_path,
860                       apr_pool_t *result_pool,
861                       apr_pool_t *scratch_pool)
862{
863  svn_fs_t *fs = root->fs;
864  dag_node_t *here = NULL; /* The directory we're currently looking at.  */
865  svn_fs_x__dag_path_t *dag_path; /* The path from HERE up to the root. */
866  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
867
868  svn_fs_x__change_set_t change_set = svn_fs_x__root_change_set(root);
869  const char *entry;
870  svn_string_t path;
871  svn_stringbuf_t *entry_buffer = svn_stringbuf_create_ensure(64,
872                                                              scratch_pool);
873  apr_size_t path_len;
874
875  /* Normalize the FS_PATH to be compatible with our DAG walk utils. */
876  normalize_path(&path, fs_path); /* "" */
877
878  /* Make a DAG_PATH for the root node, using its own current copy id.  */
879  SVN_ERR(get_root_node(&here, root, change_set, iterpool));
880  dag_path = make_parent_path(here, entry_buffer, NULL, result_pool);
881  dag_path->copy_inherit = svn_fs_x__copy_id_inherit_self;
882
883  path_len = path.len;
884  path.len = 0;
885
886  /* Walk the path segment by segment.  Add to the DAG_PATH as we go. */
887  for (entry = next_entry_name(&path, entry_buffer);
888       entry;
889       entry = next_entry_name(&path, entry_buffer))
890    {
891      svn_pool_clear(iterpool);
892
893      /* If the current node is not a directory and we are just here to
894       * check for the path's existence, then that's o.k.
895       * Otherwise, non-dir nodes will cause an error in dag_step. */
896      if (   (flags & svn_fs_x__dag_path_allow_null)
897          && (svn_fs_x__dag_node_kind(dag_path->node) != svn_node_dir))
898        {
899          dag_path = NULL;
900          break;
901        }
902
903      /* Find the sub-node. */
904      SVN_ERR(dag_step(&here, root, dag_path->node, entry, &path, change_set,
905                       TRUE, iterpool));
906
907      /* "node not found" requires special handling.  */
908      if (here == NULL)
909        {
910          /* If this was the last path component, and the caller
911             said it was optional, then don't return an error;
912             just put a NULL node pointer in the path.
913           */
914          if ((flags & svn_fs_x__dag_path_last_optional)
915              && (path_len == path.len))
916            {
917              dag_path = make_parent_path(NULL, entry_buffer, dag_path,
918                                          result_pool);
919              break;
920            }
921          else if (flags & svn_fs_x__dag_path_allow_null)
922            {
923              dag_path = NULL;
924              break;
925            }
926          else
927            {
928              /* Build a better error message than svn_fs_x__dag_open
929                 can provide, giving the root and full path name.  */
930              return SVN_FS__NOT_FOUND(root, fs_path);
931            }
932        }
933
934      /* Now, make a parent_path item for CHILD. */
935      dag_path = make_parent_path(here, entry_buffer, dag_path, result_pool);
936      if (is_txn_path)
937        {
938          SVN_ERR(get_copy_inheritance(&dag_path->copy_inherit,
939                                       &dag_path->copy_src_path,
940                                       fs, dag_path, iterpool));
941        }
942    }
943
944  svn_pool_destroy(iterpool);
945  *dag_path_p = dag_path;
946  return SVN_NO_ERROR;
947}
948
949/* Set *NODE_P to a mutable root directory for ROOT, cloning if
950   necessary, allocating in RESULT_POOL.  ROOT must be a transaction root.
951   Use ERROR_PATH in error messages.  Use SCRATCH_POOL for temporaries.*/
952static svn_error_t *
953mutable_root_node(dag_node_t **node_p,
954                  svn_fs_root_t *root,
955                  const char *error_path,
956                  apr_pool_t *result_pool,
957                  apr_pool_t *scratch_pool)
958{
959  /* If it's not a transaction root, we can't change its contents.  */
960  if (!root->is_txn_root)
961    return SVN_FS__ERR_NOT_MUTABLE(root->fs, root->rev, error_path);
962
963  /* It's a transaction root.
964     Get the appropriate DAG root node and copy it into RESULT_POOL. */
965  SVN_ERR(get_root_node(node_p, root, svn_fs_x__root_change_set(root),
966                        scratch_pool));
967  *node_p = svn_fs_x__dag_dup(*node_p, result_pool);
968
969  return SVN_NO_ERROR;
970}
971
972svn_error_t *
973svn_fs_x__make_path_mutable(svn_fs_root_t *root,
974                            svn_fs_x__dag_path_t *parent_path,
975                            const char *error_path,
976                            apr_pool_t *result_pool,
977                            apr_pool_t *scratch_pool)
978{
979  dag_node_t *clone;
980  svn_fs_x__txn_id_t txn_id = svn_fs_x__root_txn_id(root);
981
982  /* Is the node mutable already?  */
983  if (svn_fs_x__dag_check_mutable(parent_path->node))
984    return SVN_NO_ERROR;
985
986  /* Are we trying to clone the root, or somebody's child node?  */
987  if (parent_path->parent)
988    {
989      svn_fs_x__id_t copy_id = { SVN_INVALID_REVNUM, 0 };
990      svn_fs_x__id_t *copy_id_ptr = &copy_id;
991      svn_fs_x__copy_id_inherit_t inherit = parent_path->copy_inherit;
992      const char *clone_path, *copyroot_path;
993      svn_revnum_t copyroot_rev;
994      svn_boolean_t is_parent_copyroot = FALSE;
995      svn_fs_root_t *copyroot_root;
996      dag_node_t *copyroot_node;
997      apr_pool_t *subpool;
998
999      /* We're trying to clone somebody's child.  Make sure our parent
1000         is mutable.  */
1001      SVN_ERR(svn_fs_x__make_path_mutable(root, parent_path->parent,
1002                                          error_path, result_pool,
1003                                          scratch_pool));
1004
1005      /* Allocate all temporaries in a sub-pool that we control locally.
1006         That way, we keep only the data of one level of recursion around
1007         at any time. */
1008      subpool = svn_pool_create(scratch_pool);
1009      switch (inherit)
1010        {
1011        case svn_fs_x__copy_id_inherit_parent:
1012          copy_id = *svn_fs_x__dag_get_copy_id(parent_path->parent->node);
1013          break;
1014
1015        case svn_fs_x__copy_id_inherit_new:
1016          SVN_ERR(svn_fs_x__reserve_copy_id(&copy_id, root->fs, txn_id,
1017                                            subpool));
1018          break;
1019
1020        case svn_fs_x__copy_id_inherit_self:
1021          copy_id_ptr = NULL;
1022          break;
1023
1024        case svn_fs_x__copy_id_inherit_unknown:
1025        default:
1026          SVN_ERR_MALFUNCTION(); /* uh-oh -- somebody didn't calculate copy-ID
1027                      inheritance data. */
1028        }
1029
1030      /* Determine what copyroot our new child node should use. */
1031      svn_fs_x__dag_get_copyroot(&copyroot_rev, &copyroot_path,
1032                                 parent_path->node);
1033      SVN_ERR(svn_fs_x__revision_root(&copyroot_root, root->fs,
1034                                      copyroot_rev, subpool));
1035      SVN_ERR(svn_fs_x__get_temp_dag_node(&copyroot_node, copyroot_root,
1036                                          copyroot_path, subpool));
1037
1038      if (!svn_fs_x__dag_related_node(copyroot_node, parent_path->node))
1039        is_parent_copyroot = TRUE;
1040
1041      /* Now make this node mutable.  */
1042      clone_path = parent_path_path(parent_path->parent, subpool);
1043      SVN_ERR(svn_fs_x__dag_clone_child(&clone,
1044                                        parent_path->parent->node,
1045                                        clone_path,
1046                                        parent_path->entry,
1047                                        copy_id_ptr, txn_id,
1048                                        is_parent_copyroot,
1049                                        result_pool,
1050                                        subpool));
1051
1052      /* Update the path cache. */
1053      svn_fs_x__update_dag_cache(clone);
1054      svn_pool_destroy(subpool);
1055    }
1056  else
1057    {
1058      /* We're trying to clone the root directory.  */
1059      SVN_ERR(mutable_root_node(&clone, root, error_path, result_pool,
1060                                scratch_pool));
1061    }
1062
1063  /* Update the PARENT_PATH link to refer to the clone.  */
1064  parent_path->node = clone;
1065
1066  return SVN_NO_ERROR;
1067}
1068
1069
1070svn_error_t *
1071svn_fs_x__get_temp_dag_node(dag_node_t **node_p,
1072                            svn_fs_root_t *root,
1073                            const char *path,
1074                            apr_pool_t *scratch_pool)
1075{
1076  svn_string_t normalized;
1077
1078  /* First we look for the DAG in our cache. */
1079  *node_p = dag_node_cache_get(root, normalize_path(&normalized, path));
1080
1081  /* If it is not there, walk the DAG and fill the cache. */
1082  if (! *node_p)
1083    SVN_ERR(walk_dag_path(node_p, root, &normalized, scratch_pool));
1084
1085  return SVN_NO_ERROR;
1086}
1087
1088
1089svn_error_t *
1090svn_fs_x__get_dag_node(dag_node_t **dag_node_p,
1091                       svn_fs_root_t *root,
1092                       const char *path,
1093                       apr_pool_t *result_pool,
1094                       apr_pool_t *scratch_pool)
1095{
1096  dag_node_t *node = NULL;
1097  SVN_ERR(svn_fs_x__get_temp_dag_node(&node, root, path, scratch_pool));
1098
1099  /* We want the returned node to live in POOL. */
1100  *dag_node_p = svn_fs_x__dag_dup(node, result_pool);
1101
1102  return SVN_NO_ERROR;
1103}
1104