dag_cache.c revision 362181
1/* dag_cache.c : DAG walker and node cache. 2 * 3 * ==================================================================== 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 * ==================================================================== 21 */ 22 23 24/* The job of this layer is to take a filesystem with lots of node 25 sharing going on --- the real DAG filesystem as it appears in the 26 database --- and make it look and act like an ordinary tree 27 filesystem, with no sharing. 28 29 We do just-in-time cloning: you can walk from some unfinished 30 transaction's root down into directories and files shared with 31 committed revisions; as soon as you try to change something, the 32 appropriate nodes get cloned (and parent directory entries updated) 33 invisibly, behind your back. Any other references you have to 34 nodes that have been cloned by other changes, even made by other 35 processes, are automatically updated to point to the right clones. */ 36 37 38#include <stdlib.h> 39#include <string.h> 40#include <assert.h> 41#include <apr_pools.h> 42#include <apr_hash.h> 43 44#include "svn_hash.h" 45#include "svn_private_config.h" 46#include "svn_pools.h" 47#include "svn_error.h" 48#include "svn_path.h" 49#include "svn_mergeinfo.h" 50#include "svn_fs.h" 51#include "svn_props.h" 52#include "svn_sorts.h" 53 54#include "fs.h" 55#include "dag.h" 56#include "dag_cache.h" 57#include "lock.h" 58#include "tree.h" 59#include "fs_x.h" 60#include "fs_id.h" 61#include "temp_serializer.h" 62#include "cached_data.h" 63#include "transaction.h" 64#include "pack.h" 65#include "util.h" 66 67#include "private/svn_mergeinfo_private.h" 68#include "private/svn_subr_private.h" 69#include "private/svn_fs_util.h" 70#include "private/svn_fspath.h" 71#include "../libsvn_fs/fs-loader.h" 72 73 74/*** Path handling ***/ 75 76/* DAG caching uses "normalized" paths - which are a relaxed form of 77 canonical relpaths. They omit the leading '/' of the abspath and trim 78 any trailing '/'. Any sequences of '//' will be kept as the path walker 79 simply skips over them. 80 81 Non-canonical sections of the path will therefore only impact efficiency 82 (extra walker iterations and possibly duplicated entries in the cache) 83 but not correctness. 84 85 Another optimization is that we don't NUL-terminate the path but strictly 86 use its length info. That way, it can be traversed easily without 87 chopping it up and patching it together again. ultimately, however, 88 the path string is NUL-terminated because we wrapped a NUL-terminated 89 C string. 90 */ 91 92/* Set *RESULT to a normalized version of PATH without actually copying any 93 string contents. 94 95 For convenience, return the RESULT pointer as the function value too. */ 96static svn_string_t * 97normalize_path(svn_string_t *result, 98 const char *path) 99{ 100 apr_size_t len; 101 102 if (path[0] == '/') 103 ++path; 104 105 len = strlen(path); 106 while (len && path[len-1] == '/') 107 --len; 108 109 result->data = path; 110 result->len = len; 111 112 return result; 113} 114 115/* Extend PATH, i.e. increase its LEN, to cover the next segment. Skip 116 sequences of '/'. Store the segment in ENTRY and return a pointer to 117 it C string representation. If no segment has been found (end of path), 118 return NULL. */ 119static const char * 120next_entry_name(svn_string_t *path, 121 svn_stringbuf_t *entry) 122{ 123 const char *segment_start; 124 const char *segment_end; 125 126 /* Moving to the next segment, skip separators 127 (normalized does not imply canonical). */ 128 segment_start = path->data + path->len; 129 while (*segment_start == '/') 130 ++segment_start; 131 132 /* End of path? */ 133 if (*segment_start == '\0') 134 return NULL; 135 136 /* Find the end of this segment. Note that strchr would not give us the 137 length of the last segment. */ 138 segment_end = segment_start; 139 while (*segment_end != '/' && *segment_end != '\0') 140 ++segment_end; 141 142 /* Copy the segment into the result buffer. */ 143 svn_stringbuf_setempty(entry); 144 svn_stringbuf_appendbytes(entry, segment_start, 145 segment_end - segment_start); 146 147 /* Extend the "visible" part of the path to the end of that segment. */ 148 path->len = segment_end - path->data; 149 150 /* Indicate that we found something. */ 151 return entry->data; 152} 153 154/* Split the normalized PATH into its last segment the corresponding parent 155 directory. Store them in ENTRY and DIRECTORY, respectively. 156 157 If PATH is empty, return FALSE and produce no output. 158 Otherwise, return TRUE. 159 */ 160static svn_boolean_t 161extract_last_segment(const svn_string_t *path, 162 svn_string_t *directory, 163 svn_stringbuf_t *entry) 164{ 165 const char *segment_start; 166 const char *parent_end; 167 168 /* Edge case. We can't navigate in empty paths. */ 169 if (path->len == 0) 170 return FALSE; 171 172 /* Find the start of the last segment. Note that normalized paths never 173 start nor end with a '/'. */ 174 segment_start = path->data + path->len - 1; 175 while (*segment_start != '/' && segment_start != path->data) 176 --segment_start; 177 178 /* At root level already, i.e. no parent? */ 179 if (segment_start == path->data) 180 { 181 /* Construct result. */ 182 directory->data = ""; 183 directory->len = 0; 184 185 svn_stringbuf_setempty(entry); 186 svn_stringbuf_appendbytes(entry, path->data, path->len); 187 188 return TRUE; 189 } 190 191 /* Find the end of the parent directory. */ 192 parent_end = segment_start; 193 while (parent_end[-1] == '/') 194 --parent_end; 195 196 /* Construct result. */ 197 directory->data = path->data; 198 directory->len = parent_end - path->data; 199 200 ++segment_start; /* previously pointed to the last '/'. */ 201 svn_stringbuf_setempty(entry); 202 svn_stringbuf_appendbytes(entry, segment_start, 203 path->len - (segment_start - path->data)); 204 205 return TRUE; 206} 207 208 209/*** Node Caching ***/ 210 211/* 1st level cache */ 212 213/* An entry in the first-level cache. REVISION and PATH form the key that 214 will ultimately be matched. 215 */ 216typedef struct cache_entry_t 217{ 218 /* hash value derived from PATH, REVISION. 219 Used to short-circuit failed lookups. */ 220 apr_uint32_t hash_value; 221 222 /* change set to which the NODE belongs */ 223 svn_fs_x__change_set_t change_set; 224 225 /* path of the NODE */ 226 char *path; 227 228 /* cached value of strlen(PATH). */ 229 apr_size_t path_len; 230 231 /* the node allocated in the cache's pool. NULL for empty entries. */ 232 dag_node_t *node; 233} cache_entry_t; 234 235/* Number of entries in the cache. Keep this low to keep pressure on the 236 CPU caches low as well. A binary value is most efficient. If we walk 237 a directory tree, we want enough entries to store nodes for all files 238 without overwriting the nodes for the parent folder. That way, there 239 will be no unnecessary misses (except for a few random ones caused by 240 hash collision). 241 242 The actual number of instances may be higher but entries that got 243 overwritten are no longer visible. 244 */ 245enum { BUCKET_COUNT = 256 }; 246 247/* The actual cache structure. All nodes will be allocated in POOL. 248 When the number of INSERTIONS (i.e. objects created form that pool) 249 exceeds a certain threshold, the pool will be cleared and the cache 250 with it. 251 */ 252struct svn_fs_x__dag_cache_t 253{ 254 /* fixed number of (possibly empty) cache entries */ 255 cache_entry_t buckets[BUCKET_COUNT]; 256 257 /* pool used for all node allocation */ 258 apr_pool_t *pool; 259 260 /* number of entries created from POOL since the last cleanup */ 261 apr_size_t insertions; 262 263 /* Property lookups etc. have a very high locality (75% re-hit). 264 Thus, remember the last hit location for optimistic lookup. */ 265 apr_size_t last_hit; 266 267 /* Position of the last bucket hit that actually had a DAG node in it. 268 LAST_HIT may refer to a bucket that matches path@rev but has not 269 its NODE element set, yet. 270 This value is a mere hint for optimistic lookup and any value is 271 valid (as long as it is < BUCKET_COUNT). */ 272 apr_size_t last_non_empty; 273}; 274 275svn_fs_x__dag_cache_t* 276svn_fs_x__create_dag_cache(apr_pool_t *result_pool) 277{ 278 svn_fs_x__dag_cache_t *result = apr_pcalloc(result_pool, sizeof(*result)); 279 result->pool = svn_pool_create(result_pool); 280 281 return result; 282} 283 284/* Clears the CACHE at regular intervals (destroying all cached nodes). 285 * Return TRUE if the cache got cleared and previously obtained references 286 * to cache contents have become invalid. 287 */ 288static svn_boolean_t 289auto_clear_dag_cache(svn_fs_x__dag_cache_t* cache) 290{ 291 if (cache->insertions <= BUCKET_COUNT) 292 return FALSE; 293 294 svn_pool_clear(cache->pool); 295 296 memset(cache->buckets, 0, sizeof(cache->buckets)); 297 cache->insertions = 0; 298 299 return TRUE; 300} 301 302/* For the given CHANGE_SET and PATH, return the respective entry in CACHE. 303 If the entry is empty, its NODE member will be NULL and the caller 304 may then set it to the corresponding DAG node allocated in CACHE->POOL. 305 */ 306static cache_entry_t * 307cache_lookup(svn_fs_x__dag_cache_t *cache, 308 svn_fs_x__change_set_t change_set, 309 const svn_string_t *path) 310{ 311 apr_size_t i, bucket_index; 312 apr_size_t path_len = path->len; 313 apr_uint32_t hash_value = (apr_uint32_t)(apr_uint64_t)change_set; 314 315#if SVN_UNALIGNED_ACCESS_IS_OK 316 /* "randomizing" / distributing factor used in our hash function */ 317 const apr_uint32_t factor = 0xd1f3da69; 318#endif 319 320 /* optimistic lookup: hit the same bucket again? */ 321 cache_entry_t *result = &cache->buckets[cache->last_hit]; 322 if ( (result->change_set == change_set) 323 && (result->path_len == path_len) 324 && !memcmp(result->path, path->data, path_len)) 325 { 326 /* Remember the position of the last node we found in this cache. */ 327 if (result->node) 328 cache->last_non_empty = cache->last_hit; 329 330 return result; 331 } 332 333 /* need to do a full lookup. Calculate the hash value 334 (HASH_VALUE has been initialized to REVISION). */ 335 i = 0; 336#if SVN_UNALIGNED_ACCESS_IS_OK 337 /* We relax the dependency chain between iterations by processing 338 two chunks from the input per hash_value self-multiplication. 339 The HASH_VALUE update latency is now 1 MUL latency + 1 ADD latency 340 per 2 chunks instead of 1 chunk. 341 */ 342 for (; i + 8 <= path_len; i += 8) 343 hash_value = hash_value * factor * factor 344 + ( *(const apr_uint32_t*)(path->data + i) * factor 345 + *(const apr_uint32_t*)(path->data + i + 4)); 346#endif 347 348 for (; i < path_len; ++i) 349 /* Help GCC to minimize the HASH_VALUE update latency by splitting the 350 MUL 33 of the naive implementation: h = h * 33 + path[i]. This 351 shortens the dependency chain from 1 shift + 2 ADDs to 1 shift + 1 ADD. 352 */ 353 hash_value = hash_value * 32 + (hash_value + (apr_byte_t)path->data[i]); 354 355 bucket_index = hash_value + (hash_value >> 16); 356 bucket_index = (bucket_index + (bucket_index >> 8)) % BUCKET_COUNT; 357 358 /* access the corresponding bucket and remember its location */ 359 result = &cache->buckets[bucket_index]; 360 cache->last_hit = bucket_index; 361 362 /* if it is *NOT* a match, clear the bucket, expect the caller to fill 363 in the node and count it as an insertion */ 364 if ( (result->hash_value != hash_value) 365 || (result->change_set != change_set) 366 || (result->path_len != path_len) 367 || memcmp(result->path, path->data, path_len)) 368 { 369 result->hash_value = hash_value; 370 result->change_set = change_set; 371 372 if (result->path_len < path_len || result->path_len == 0) 373 result->path = apr_palloc(cache->pool, path_len + 1); 374 result->path_len = path_len; 375 376 memcpy(result->path, path->data, path_len); 377 result->path[path_len] = 0; 378 379 result->node = NULL; 380 381 cache->insertions++; 382 } 383 else if (result->node) 384 { 385 /* This bucket is valid & has a suitable DAG node in it. 386 Remember its location. */ 387 cache->last_non_empty = bucket_index; 388 } 389 390 return result; 391} 392 393/* Optimistic lookup using the last seen non-empty location in CACHE. 394 Return the node of that entry, if it is still in use and matches PATH. 395 Return NULL otherwise. */ 396static dag_node_t * 397cache_lookup_last_path(svn_fs_x__dag_cache_t *cache, 398 const svn_string_t *path) 399{ 400 cache_entry_t *result = &cache->buckets[cache->last_non_empty]; 401 402 if ( result->node 403 && (result->path_len == path->len) 404 && !memcmp(result->path, path->data, path->len)) 405 { 406 return result->node; 407 } 408 409 return NULL; 410} 411 412/* Return the cached DAG node for PATH from ROOT's node cache, or NULL if 413 the node isn't cached. 414 */ 415static dag_node_t * 416dag_node_cache_get(svn_fs_root_t *root, 417 const svn_string_t *path) 418{ 419 svn_fs_x__data_t *ffd = root->fs->fsap_data; 420 svn_fs_x__change_set_t change_set = svn_fs_x__root_change_set(root); 421 422 auto_clear_dag_cache(ffd->dag_node_cache); 423 return cache_lookup(ffd->dag_node_cache, change_set, path)->node; 424} 425 426 427void 428svn_fs_x__update_dag_cache(dag_node_t *node) 429{ 430 svn_fs_x__data_t *ffd = svn_fs_x__dag_get_fs(node)->fsap_data; 431 const char *path = svn_fs_x__dag_get_created_path(node); 432 svn_fs_x__dag_cache_t *cache = ffd->dag_node_cache; 433 434 cache_entry_t *bucket; 435 svn_string_t normalized; 436 437 auto_clear_dag_cache(cache); 438 bucket = cache_lookup(cache, svn_fs_x__dag_get_id(node)->change_set, 439 normalize_path(&normalized, path)); 440 bucket->node = svn_fs_x__dag_dup(node, cache->pool); 441} 442 443void 444svn_fs_x__invalidate_dag_cache(svn_fs_root_t *root, 445 const char *path) 446{ 447 svn_fs_x__data_t *ffd = root->fs->fsap_data; 448 svn_fs_x__dag_cache_t *cache = ffd->dag_node_cache; 449 svn_fs_x__change_set_t change_set = svn_fs_x__root_change_set(root); 450 451 apr_size_t i; 452 for (i = 0; i < BUCKET_COUNT; ++i) 453 { 454 cache_entry_t *bucket = &cache->buckets[i]; 455 if (bucket->change_set == change_set && bucket->node) 456 { 457 /* The call to svn_relpath_skip_ancestor() will require both 458 parameters to be canonical. Since we allow for non-canonical 459 paths in our cache (unlikely to actually happen), we drop all 460 such entries. 461 */ 462 if (!svn_relpath_is_canonical(bucket->path) 463 || svn_relpath_skip_ancestor(path + 1, bucket->path)) 464 bucket->node = NULL; 465 } 466 } 467} 468 469 470/* Traversing directory paths. */ 471 472/* Try a short-cut for the open_path() function using the last node accessed. 473 * If that ROOT is that nodes's "created rev" and PATH matches its "created- 474 * path", return the node in *NODE_P. Set it to NULL otherwise. 475 * 476 * This function is used to support ra_serf-style access patterns where we 477 * are first asked for path@rev and then for path@c_rev of the same node. 478 * The shortcut works by ignoring the "rev" part of the cache key and then 479 * checking whether we got lucky. Lookup and verification are both quick 480 * plus there are many early outs for common types of mismatch. 481 */ 482static svn_error_t * 483try_match_last_node(dag_node_t **node_p, 484 svn_fs_root_t *root, 485 const svn_string_t *path) 486{ 487 svn_fs_x__data_t *ffd = root->fs->fsap_data; 488 489 /* Optimistic lookup: if the last node returned from the cache applied to 490 the same PATH, return it in NODE. */ 491 dag_node_t *node = cache_lookup_last_path(ffd->dag_node_cache, path); 492 493 /* Did we get a bucket with a committed node? */ 494 if (node && !svn_fs_x__dag_check_mutable(node)) 495 { 496 /* Get the path&rev pair at which this node was created. 497 This is repository location for which this node is _known_ to be 498 the right lookup result irrespective of how we found it. */ 499 const char *created_path 500 = svn_fs_x__dag_get_created_path(node) + 1; 501 svn_revnum_t revision = svn_fs_x__dag_get_revision(node); 502 503 /* Is it an exact match? */ 504 if ( revision == root->rev 505 && strlen(created_path) == path->len 506 && memcmp(created_path, path->data, path->len) == 0) 507 { 508 svn_fs_x__dag_cache_t *cache = ffd->dag_node_cache; 509 510 /* Insert NODE into the cache at a second location. 511 In a fraction of all calls, the auto-cleanup may 512 cause NODE to become invalid. */ 513 if (!auto_clear_dag_cache(cache)) 514 { 515 /* Cache it under its full path@rev access path. */ 516 svn_fs_x__change_set_t change_set 517 = svn_fs_x__change_set_by_rev(revision); 518 cache_entry_t *bucket = cache_lookup(cache, change_set, path); 519 bucket->node = node; 520 521 *node_p = node; 522 return SVN_NO_ERROR; 523 } 524 } 525 } 526 527 *node_p = NULL; 528 return SVN_NO_ERROR; 529} 530 531 532/* From directory node PARENT, under ROOT, go one step down to the entry 533 NAME and return a reference to it in *CHILD_P. 534 535 PATH is the combination of PARENT's path and NAME and is provided by 536 the caller such that we don't have to construct it here ourselves. 537 Similarly, CHANGE_SET is redundant with ROOT. 538 539 If the directory entry cannot be found, instead of returning an error, 540 *CHILD_P will be set to NULL if ALLOW_EMPTY is TRUE. 541 542 NOTE: *NODE_P will live within the DAG cache and we merely return a 543 reference to it. Hence, it will invalid upon the next cache insertion. 544 Callers must create a copy if they want a non-temporary object. 545*/ 546static svn_error_t * 547dag_step(dag_node_t **child_p, 548 svn_fs_root_t *root, 549 dag_node_t *parent, 550 const char *name, 551 const svn_string_t *path, 552 svn_fs_x__change_set_t change_set, 553 svn_boolean_t allow_empty, 554 apr_pool_t *scratch_pool) 555{ 556 svn_fs_t *fs = svn_fs_x__dag_get_fs(parent); 557 svn_fs_x__data_t *ffd = fs->fsap_data; 558 cache_entry_t *bucket; 559 svn_fs_x__id_t node_id; 560 561 /* Locate the corresponding cache entry. We may need PARENT to remain 562 valid for later use, so don't call auto_clear_dag_cache() here. */ 563 bucket = cache_lookup(ffd->dag_node_cache, change_set, path); 564 if (bucket->node) 565 { 566 /* Already cached. Return a reference to the cached object. */ 567 *child_p = bucket->node; 568 return SVN_NO_ERROR; 569 } 570 571 /* Get the ID of the node we are looking for. The function call checks 572 for various error conditions such like PARENT not being a directory. */ 573 SVN_ERR(svn_fs_x__dir_entry_id(&node_id, parent, name, scratch_pool)); 574 if (! svn_fs_x__id_used(&node_id)) 575 { 576 const char *dir; 577 578 /* No such directory entry. Is a simple NULL result o.k.? */ 579 if (allow_empty) 580 { 581 *child_p = NULL; 582 return SVN_NO_ERROR; 583 } 584 585 /* Produce an appropriate error message. */ 586 dir = apr_pstrmemdup(scratch_pool, path->data, path->len); 587 dir = svn_fs__canonicalize_abspath(dir, scratch_pool); 588 589 return SVN_FS__NOT_FOUND(root, dir); 590 } 591 592 /* We are about to add a new entry to the cache. Periodically clear it. 593 If we had to clear it just now (< 1% chance), re-add the entry for our 594 item. */ 595 if (auto_clear_dag_cache(ffd->dag_node_cache)) 596 bucket = cache_lookup(ffd->dag_node_cache, change_set, path); 597 598 /* Construct the DAG node object for NODE_ID. Let it live in the cache. */ 599 SVN_ERR(svn_fs_x__dag_get_node(&bucket->node, fs, &node_id, 600 ffd->dag_node_cache->pool, 601 scratch_pool)); 602 603 /* Return a reference to the cached object. */ 604 *child_p = bucket->node; 605 return SVN_NO_ERROR; 606} 607 608/* Return the CHANGE_SET's root node in *NODE_P. ROOT is the FS API root 609 object for CHANGE_SET. Use SCRATCH_POOL for temporary allocations. 610 611 NOTE: *NODE_P will live within the DAG cache and we merely return a 612 reference to it. Hence, it will invalid upon the next cache insertion. 613 Callers must create a copy if they want a non-temporary object. 614 */ 615static svn_error_t * 616get_root_node(dag_node_t **node_p, 617 svn_fs_root_t *root, 618 svn_fs_x__change_set_t change_set, 619 apr_pool_t *scratch_pool) 620{ 621 svn_fs_t *fs = root->fs; 622 svn_fs_x__data_t *ffd = fs->fsap_data; 623 cache_entry_t *bucket; 624 const svn_string_t path = { "", 0 }; 625 626 /* Auto-insert the node in the cache. */ 627 auto_clear_dag_cache(ffd->dag_node_cache); 628 bucket = cache_lookup(ffd->dag_node_cache, change_set, &path); 629 630 /* If it is not already cached, construct the DAG node object for NODE_ID. 631 Let it live in the cache. Sadly, we often can't reuse txn DAG nodes. */ 632 if (bucket->node == NULL) 633 SVN_ERR(svn_fs_x__dag_root(&bucket->node, fs, change_set, 634 ffd->dag_node_cache->pool, scratch_pool)); 635 636 /* Return a reference to the cached object. */ 637 *node_p = bucket->node; 638 return SVN_NO_ERROR; 639} 640 641/* Walk the DAG starting at ROOT, following PATH and return a reference to 642 the target node in *NODE_P. Use SCRATCH_POOL for temporary allocations. 643 644 NOTE: *NODE_P will live within the DAG cache and we merely return a 645 reference to it. Hence, it will invalid upon the next cache insertion. 646 Callers must create a copy if they want a non-temporary object. 647*/ 648static svn_error_t * 649walk_dag_path(dag_node_t **node_p, 650 svn_fs_root_t *root, 651 svn_string_t *path, 652 apr_pool_t *scratch_pool) 653{ 654 dag_node_t *here = NULL; /* The directory we're currently looking at. */ 655 apr_pool_t *iterpool; 656 svn_fs_x__change_set_t change_set = svn_fs_x__root_change_set(root); 657 const char *entry; 658 svn_string_t directory; 659 svn_stringbuf_t *entry_buffer; 660 661 /* Special case: root directory. 662 We will later assume that all paths have at least one parent level, 663 so we must check here for those that don't. */ 664 if (path->len == 0) 665 return svn_error_trace(get_root_node(node_p, root, change_set, 666 scratch_pool)); 667 668 /* Callers often traverse the DAG in some path-based order or along the 669 history segments. That allows us to try a few guesses about where to 670 find the next item. This is only useful if the caller didn't request 671 the full parent chain. */ 672 673 /* First attempt: Assume that we access the DAG for the same path as 674 in the last lookup but for a different revision that happens to be 675 the last revision that touched the respective node. This is a 676 common pattern when e.g. checking out over ra_serf. Note that this 677 will only work for committed data as the revision info for nodes in 678 txns is bogus. 679 680 This shortcut is quick and will exit this function upon success. 681 So, try it first. */ 682 if (!root->is_txn_root) 683 { 684 SVN_ERR(try_match_last_node(node_p, root, path)); 685 686 /* Did the shortcut work? */ 687 if (*node_p) 688 return SVN_NO_ERROR; 689 } 690 691 /* Second attempt: Try starting the lookup immediately at the parent 692 node. We will often have recently accessed either a sibling or 693 said parent directory itself for the same revision. ENTRY will 694 point to the last '/' in PATH. */ 695 entry_buffer = svn_stringbuf_create_ensure(64, scratch_pool); 696 if (extract_last_segment(path, &directory, entry_buffer)) 697 { 698 here = dag_node_cache_get(root, &directory); 699 700 /* Did the shortcut work? */ 701 if (here) 702 return svn_error_trace(dag_step(node_p, root, here, 703 entry_buffer->data, path, 704 change_set, FALSE, scratch_pool)); 705 } 706 707 /* Now there is something to iterate over. Thus, create the ITERPOOL. */ 708 iterpool = svn_pool_create(scratch_pool); 709 710 /* Make a parent_path item for the root node, using its own current 711 copy id. */ 712 SVN_ERR(get_root_node(&here, root, change_set, iterpool)); 713 path->len = 0; 714 715 /* Walk the path segment by segment. */ 716 for (entry = next_entry_name(path, entry_buffer); 717 entry; 718 entry = next_entry_name(path, entry_buffer)) 719 { 720 svn_pool_clear(iterpool); 721 722 /* Note that HERE is allocated from the DAG node cache and will 723 therefore survive the ITERPOOL cleanup. */ 724 SVN_ERR(dag_step(&here, root, here, entry, path, change_set, FALSE, 725 iterpool)); 726 } 727 728 svn_pool_destroy(iterpool); 729 *node_p = here; 730 731 return SVN_NO_ERROR; 732} 733 734 735/* Return a text string describing the absolute path of parent path 736 DAG_PATH. It will be allocated in POOL. */ 737static const char * 738parent_path_path(svn_fs_x__dag_path_t *dag_path, 739 apr_pool_t *pool) 740{ 741 const char *path_so_far = "/"; 742 if (dag_path->parent) 743 path_so_far = parent_path_path(dag_path->parent, pool); 744 return dag_path->entry 745 ? svn_fspath__join(path_so_far, dag_path->entry, pool) 746 : path_so_far; 747} 748 749 750/* Choose a copy ID inheritance method *INHERIT_P to be used in the 751 event that immutable node CHILD in FS needs to be made mutable. If 752 the inheritance method is copy_id_inherit_new, also return a 753 *COPY_SRC_PATH on which to base the new copy ID (else return NULL 754 for that path). CHILD must have a parent (it cannot be the root 755 node). Temporary allocations are taken from SCRATCH_POOL. */ 756static svn_error_t * 757get_copy_inheritance(svn_fs_x__copy_id_inherit_t *inherit_p, 758 const char **copy_src_path, 759 svn_fs_t *fs, 760 svn_fs_x__dag_path_t *child, 761 apr_pool_t *scratch_pool) 762{ 763 svn_fs_x__id_t child_copy_id, parent_copy_id; 764 const char *id_path = NULL; 765 svn_fs_root_t *copyroot_root; 766 dag_node_t *copyroot_node; 767 svn_revnum_t copyroot_rev; 768 const char *copyroot_path; 769 770 SVN_ERR_ASSERT(child && child->parent); 771 772 /* Initialize some convenience variables. */ 773 child_copy_id = *svn_fs_x__dag_get_copy_id(child->node); 774 parent_copy_id = *svn_fs_x__dag_get_copy_id(child->parent->node); 775 776 /* By default, there is no copy source. */ 777 *copy_src_path = NULL; 778 779 /* If this child is already mutable, we have nothing to do. */ 780 if (svn_fs_x__dag_check_mutable(child->node)) 781 { 782 *inherit_p = svn_fs_x__copy_id_inherit_self; 783 return SVN_NO_ERROR; 784 } 785 786 /* From this point on, we'll assume that the child will just take 787 its copy ID from its parent. */ 788 *inherit_p = svn_fs_x__copy_id_inherit_parent; 789 790 /* Special case: if the child's copy ID is '0', use the parent's 791 copy ID. */ 792 if (svn_fs_x__id_is_root(&child_copy_id)) 793 return SVN_NO_ERROR; 794 795 /* Compare the copy IDs of the child and its parent. If they are 796 the same, then the child is already on the same branch as the 797 parent, and should use the same mutability copy ID that the 798 parent will use. */ 799 if (svn_fs_x__id_eq(&child_copy_id, &parent_copy_id)) 800 return SVN_NO_ERROR; 801 802 /* If the child is on the same branch that the parent is on, the 803 child should just use the same copy ID that the parent would use. 804 Else, the child needs to generate a new copy ID to use should it 805 need to be made mutable. We will claim that child is on the same 806 branch as its parent if the child itself is not a branch point, 807 or if it is a branch point that we are accessing via its original 808 copy destination path. */ 809 svn_fs_x__dag_get_copyroot(©root_rev, ©root_path, child->node); 810 SVN_ERR(svn_fs_x__revision_root(©root_root, fs, copyroot_rev, 811 scratch_pool)); 812 SVN_ERR(svn_fs_x__get_temp_dag_node(©root_node, copyroot_root, 813 copyroot_path, scratch_pool)); 814 815 if (!svn_fs_x__dag_related_node(copyroot_node, child->node)) 816 return SVN_NO_ERROR; 817 818 /* Determine if we are looking at the child via its original path or 819 as a subtree item of a copied tree. */ 820 id_path = svn_fs_x__dag_get_created_path(child->node); 821 if (strcmp(id_path, parent_path_path(child, scratch_pool)) == 0) 822 { 823 *inherit_p = svn_fs_x__copy_id_inherit_self; 824 return SVN_NO_ERROR; 825 } 826 827 /* We are pretty sure that the child node is an unedited nested 828 branched node. When it needs to be made mutable, it should claim 829 a new copy ID. */ 830 *inherit_p = svn_fs_x__copy_id_inherit_new; 831 *copy_src_path = id_path; 832 return SVN_NO_ERROR; 833} 834 835/* Allocate a new svn_fs_x__dag_path_t node from RESULT_POOL, containing 836 NODE, ENTRY and PARENT; NODE and ENTRY are copied into RESULT_POOL. */ 837static svn_fs_x__dag_path_t * 838make_parent_path(dag_node_t *node, 839 const svn_stringbuf_t *entry, 840 svn_fs_x__dag_path_t *parent, 841 apr_pool_t *result_pool) 842{ 843 svn_fs_x__dag_path_t *dag_path 844 = apr_pcalloc(result_pool, sizeof(*dag_path)); 845 if (node) 846 dag_path->node = svn_fs_x__dag_dup(node, result_pool); 847 dag_path->entry = apr_pstrmemdup(result_pool, entry->data, entry->len); 848 dag_path->parent = parent; 849 dag_path->copy_inherit = svn_fs_x__copy_id_inherit_unknown; 850 dag_path->copy_src_path = NULL; 851 return dag_path; 852} 853 854svn_error_t * 855svn_fs_x__get_dag_path(svn_fs_x__dag_path_t **dag_path_p, 856 svn_fs_root_t *root, 857 const char *fs_path, 858 int flags, 859 svn_boolean_t is_txn_path, 860 apr_pool_t *result_pool, 861 apr_pool_t *scratch_pool) 862{ 863 svn_fs_t *fs = root->fs; 864 dag_node_t *here = NULL; /* The directory we're currently looking at. */ 865 svn_fs_x__dag_path_t *dag_path; /* The path from HERE up to the root. */ 866 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 867 868 svn_fs_x__change_set_t change_set = svn_fs_x__root_change_set(root); 869 const char *entry; 870 svn_string_t path; 871 svn_stringbuf_t *entry_buffer = svn_stringbuf_create_ensure(64, 872 scratch_pool); 873 apr_size_t path_len; 874 875 /* Normalize the FS_PATH to be compatible with our DAG walk utils. */ 876 normalize_path(&path, fs_path); /* "" */ 877 878 /* Make a DAG_PATH for the root node, using its own current copy id. */ 879 SVN_ERR(get_root_node(&here, root, change_set, iterpool)); 880 dag_path = make_parent_path(here, entry_buffer, NULL, result_pool); 881 dag_path->copy_inherit = svn_fs_x__copy_id_inherit_self; 882 883 path_len = path.len; 884 path.len = 0; 885 886 /* Walk the path segment by segment. Add to the DAG_PATH as we go. */ 887 for (entry = next_entry_name(&path, entry_buffer); 888 entry; 889 entry = next_entry_name(&path, entry_buffer)) 890 { 891 svn_pool_clear(iterpool); 892 893 /* If the current node is not a directory and we are just here to 894 * check for the path's existence, then that's o.k. 895 * Otherwise, non-dir nodes will cause an error in dag_step. */ 896 if ( (flags & svn_fs_x__dag_path_allow_null) 897 && (svn_fs_x__dag_node_kind(dag_path->node) != svn_node_dir)) 898 { 899 dag_path = NULL; 900 break; 901 } 902 903 /* Find the sub-node. */ 904 SVN_ERR(dag_step(&here, root, dag_path->node, entry, &path, change_set, 905 TRUE, iterpool)); 906 907 /* "node not found" requires special handling. */ 908 if (here == NULL) 909 { 910 /* If this was the last path component, and the caller 911 said it was optional, then don't return an error; 912 just put a NULL node pointer in the path. 913 */ 914 if ((flags & svn_fs_x__dag_path_last_optional) 915 && (path_len == path.len)) 916 { 917 dag_path = make_parent_path(NULL, entry_buffer, dag_path, 918 result_pool); 919 break; 920 } 921 else if (flags & svn_fs_x__dag_path_allow_null) 922 { 923 dag_path = NULL; 924 break; 925 } 926 else 927 { 928 /* Build a better error message than svn_fs_x__dag_open 929 can provide, giving the root and full path name. */ 930 return SVN_FS__NOT_FOUND(root, fs_path); 931 } 932 } 933 934 /* Now, make a parent_path item for CHILD. */ 935 dag_path = make_parent_path(here, entry_buffer, dag_path, result_pool); 936 if (is_txn_path) 937 { 938 SVN_ERR(get_copy_inheritance(&dag_path->copy_inherit, 939 &dag_path->copy_src_path, 940 fs, dag_path, iterpool)); 941 } 942 } 943 944 svn_pool_destroy(iterpool); 945 *dag_path_p = dag_path; 946 return SVN_NO_ERROR; 947} 948 949/* Set *NODE_P to a mutable root directory for ROOT, cloning if 950 necessary, allocating in RESULT_POOL. ROOT must be a transaction root. 951 Use ERROR_PATH in error messages. Use SCRATCH_POOL for temporaries.*/ 952static svn_error_t * 953mutable_root_node(dag_node_t **node_p, 954 svn_fs_root_t *root, 955 const char *error_path, 956 apr_pool_t *result_pool, 957 apr_pool_t *scratch_pool) 958{ 959 /* If it's not a transaction root, we can't change its contents. */ 960 if (!root->is_txn_root) 961 return SVN_FS__ERR_NOT_MUTABLE(root->fs, root->rev, error_path); 962 963 /* It's a transaction root. 964 Get the appropriate DAG root node and copy it into RESULT_POOL. */ 965 SVN_ERR(get_root_node(node_p, root, svn_fs_x__root_change_set(root), 966 scratch_pool)); 967 *node_p = svn_fs_x__dag_dup(*node_p, result_pool); 968 969 return SVN_NO_ERROR; 970} 971 972svn_error_t * 973svn_fs_x__make_path_mutable(svn_fs_root_t *root, 974 svn_fs_x__dag_path_t *parent_path, 975 const char *error_path, 976 apr_pool_t *result_pool, 977 apr_pool_t *scratch_pool) 978{ 979 dag_node_t *clone; 980 svn_fs_x__txn_id_t txn_id = svn_fs_x__root_txn_id(root); 981 982 /* Is the node mutable already? */ 983 if (svn_fs_x__dag_check_mutable(parent_path->node)) 984 return SVN_NO_ERROR; 985 986 /* Are we trying to clone the root, or somebody's child node? */ 987 if (parent_path->parent) 988 { 989 svn_fs_x__id_t copy_id = { SVN_INVALID_REVNUM, 0 }; 990 svn_fs_x__id_t *copy_id_ptr = ©_id; 991 svn_fs_x__copy_id_inherit_t inherit = parent_path->copy_inherit; 992 const char *clone_path, *copyroot_path; 993 svn_revnum_t copyroot_rev; 994 svn_boolean_t is_parent_copyroot = FALSE; 995 svn_fs_root_t *copyroot_root; 996 dag_node_t *copyroot_node; 997 apr_pool_t *subpool; 998 999 /* We're trying to clone somebody's child. Make sure our parent 1000 is mutable. */ 1001 SVN_ERR(svn_fs_x__make_path_mutable(root, parent_path->parent, 1002 error_path, result_pool, 1003 scratch_pool)); 1004 1005 /* Allocate all temporaries in a sub-pool that we control locally. 1006 That way, we keep only the data of one level of recursion around 1007 at any time. */ 1008 subpool = svn_pool_create(scratch_pool); 1009 switch (inherit) 1010 { 1011 case svn_fs_x__copy_id_inherit_parent: 1012 copy_id = *svn_fs_x__dag_get_copy_id(parent_path->parent->node); 1013 break; 1014 1015 case svn_fs_x__copy_id_inherit_new: 1016 SVN_ERR(svn_fs_x__reserve_copy_id(©_id, root->fs, txn_id, 1017 subpool)); 1018 break; 1019 1020 case svn_fs_x__copy_id_inherit_self: 1021 copy_id_ptr = NULL; 1022 break; 1023 1024 case svn_fs_x__copy_id_inherit_unknown: 1025 default: 1026 SVN_ERR_MALFUNCTION(); /* uh-oh -- somebody didn't calculate copy-ID 1027 inheritance data. */ 1028 } 1029 1030 /* Determine what copyroot our new child node should use. */ 1031 svn_fs_x__dag_get_copyroot(©root_rev, ©root_path, 1032 parent_path->node); 1033 SVN_ERR(svn_fs_x__revision_root(©root_root, root->fs, 1034 copyroot_rev, subpool)); 1035 SVN_ERR(svn_fs_x__get_temp_dag_node(©root_node, copyroot_root, 1036 copyroot_path, subpool)); 1037 1038 if (!svn_fs_x__dag_related_node(copyroot_node, parent_path->node)) 1039 is_parent_copyroot = TRUE; 1040 1041 /* Now make this node mutable. */ 1042 clone_path = parent_path_path(parent_path->parent, subpool); 1043 SVN_ERR(svn_fs_x__dag_clone_child(&clone, 1044 parent_path->parent->node, 1045 clone_path, 1046 parent_path->entry, 1047 copy_id_ptr, txn_id, 1048 is_parent_copyroot, 1049 result_pool, 1050 subpool)); 1051 1052 /* Update the path cache. */ 1053 svn_fs_x__update_dag_cache(clone); 1054 svn_pool_destroy(subpool); 1055 } 1056 else 1057 { 1058 /* We're trying to clone the root directory. */ 1059 SVN_ERR(mutable_root_node(&clone, root, error_path, result_pool, 1060 scratch_pool)); 1061 } 1062 1063 /* Update the PARENT_PATH link to refer to the clone. */ 1064 parent_path->node = clone; 1065 1066 return SVN_NO_ERROR; 1067} 1068 1069 1070svn_error_t * 1071svn_fs_x__get_temp_dag_node(dag_node_t **node_p, 1072 svn_fs_root_t *root, 1073 const char *path, 1074 apr_pool_t *scratch_pool) 1075{ 1076 svn_string_t normalized; 1077 1078 /* First we look for the DAG in our cache. */ 1079 *node_p = dag_node_cache_get(root, normalize_path(&normalized, path)); 1080 1081 /* If it is not there, walk the DAG and fill the cache. */ 1082 if (! *node_p) 1083 SVN_ERR(walk_dag_path(node_p, root, &normalized, scratch_pool)); 1084 1085 return SVN_NO_ERROR; 1086} 1087 1088 1089svn_error_t * 1090svn_fs_x__get_dag_node(dag_node_t **dag_node_p, 1091 svn_fs_root_t *root, 1092 const char *path, 1093 apr_pool_t *result_pool, 1094 apr_pool_t *scratch_pool) 1095{ 1096 dag_node_t *node = NULL; 1097 SVN_ERR(svn_fs_x__get_temp_dag_node(&node, root, path, scratch_pool)); 1098 1099 /* We want the returned node to live in POOL. */ 1100 *dag_node_p = svn_fs_x__dag_dup(node, result_pool); 1101 1102 return SVN_NO_ERROR; 1103} 1104