1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 29/* 30 * Copyright (c) 1989, 1993, 1995 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Poul-Henning Kamp of the FreeBSD Project. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by the University of 47 * California, Berkeley and its contributors. 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * 65 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 66 */ 67/* 68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 69 * support for mandatory and extensible security protections. This notice 70 * is included in support of clause 2.2 (b) of the Apple Public License, 71 * Version 2.0. 72 */ 73#include <sys/param.h> 74#include <sys/systm.h> 75#include <sys/time.h> 76#include <sys/mount_internal.h> 77#include <sys/vnode_internal.h> 78#include <miscfs/specfs/specdev.h> 79#include <sys/namei.h> 80#include <sys/errno.h> 81#include <sys/malloc.h> 82#include <sys/kauth.h> 83#include <sys/user.h> 84#include <sys/paths.h> 85 86#if CONFIG_MACF 87#include <security/mac_framework.h> 88#endif 89 90/* 91 * Name caching works as follows: 92 * 93 * Names found by directory scans are retained in a cache 94 * for future reference. It is managed LRU, so frequently 95 * used names will hang around. Cache is indexed by hash value 96 * obtained from (vp, name) where vp refers to the directory 97 * containing name. 98 * 99 * If it is a "negative" entry, (i.e. for a name that is known NOT to 100 * exist) the vnode pointer will be NULL. 101 * 102 * Upon reaching the last segment of a path, if the reference 103 * is for DELETE, or NOCACHE is set (rewrite), and the 104 * name is located in the cache, it will be dropped. 105 */ 106 107/* 108 * Structures associated with name cacheing. 109 */ 110 111LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 112u_long nchashmask; 113u_long nchash; /* size of hash table - 1 */ 114long numcache; /* number of cache entries allocated */ 115int desiredNodes; 116int desiredNegNodes; 117int ncs_negtotal; 118int nc_disabled = 0; 119TAILQ_HEAD(, namecache) nchead; /* chain of all name cache entries */ 120TAILQ_HEAD(, namecache) neghead; /* chain of only negative cache entries */ 121 122 123#if COLLECT_STATS 124 125struct nchstats nchstats; /* cache effectiveness statistics */ 126 127#define NCHSTAT(v) { \ 128 nchstats.v++; \ 129} 130#define NAME_CACHE_LOCK() name_cache_lock() 131#define NAME_CACHE_UNLOCK() name_cache_unlock() 132#define NAME_CACHE_LOCK_SHARED() name_cache_lock() 133 134#else 135 136#define NCHSTAT(v) 137#define NAME_CACHE_LOCK() name_cache_lock() 138#define NAME_CACHE_UNLOCK() name_cache_unlock() 139#define NAME_CACHE_LOCK_SHARED() name_cache_lock_shared() 140 141#endif 142 143 144/* vars for name cache list lock */ 145lck_grp_t * namecache_lck_grp; 146lck_grp_attr_t * namecache_lck_grp_attr; 147lck_attr_t * namecache_lck_attr; 148 149lck_grp_t * strcache_lck_grp; 150lck_grp_attr_t * strcache_lck_grp_attr; 151lck_attr_t * strcache_lck_attr; 152 153lck_rw_t * namecache_rw_lock; 154lck_rw_t * strtable_rw_lock; 155 156#define NUM_STRCACHE_LOCKS 1024 157 158lck_mtx_t strcache_mtx_locks[NUM_STRCACHE_LOCKS]; 159 160 161static vnode_t cache_lookup_locked(vnode_t dvp, struct componentname *cnp); 162static const char *add_name_internal(const char *, uint32_t, u_int, boolean_t, u_int); 163static void init_string_table(void); 164static void cache_delete(struct namecache *, int); 165static void cache_enter_locked(vnode_t dvp, vnode_t vp, struct componentname *cnp, const char *strname); 166 167#ifdef DUMP_STRING_TABLE 168/* 169 * Internal dump function used for debugging 170 */ 171void dump_string_table(void); 172#endif /* DUMP_STRING_TABLE */ 173 174static void init_crc32(void); 175static unsigned int crc32tab[256]; 176 177 178#define NCHHASH(dvp, hash_val) \ 179 (&nchashtbl[(dvp->v_id ^ (hash_val)) & nchashmask]) 180 181 182 183/* 184 * This function builds the path to a filename in "buff". The 185 * length of the buffer *INCLUDING* the trailing zero byte is 186 * returned in outlen. NOTE: the length includes the trailing 187 * zero byte and thus the length is one greater than what strlen 188 * would return. This is important and lots of code elsewhere 189 * in the kernel assumes this behavior. 190 * 191 * This function can call vnop in file system if the parent vnode 192 * does not exist or when called for hardlinks via volfs path. 193 * If BUILDPATH_NO_FS_ENTER is set in flags, it only uses values present 194 * in the name cache and does not enter the file system. 195 * 196 * If BUILDPATH_CHECK_MOVED is set in flags, we return EAGAIN when 197 * we encounter ENOENT during path reconstruction. ENOENT means that 198 * one of the parents moved while we were building the path. The 199 * caller can special handle this case by calling build_path again. 200 * 201 * If BUILDPATH_VOLUME_RELATIVE is set in flags, we return path 202 * that is relative to the nearest mount point, i.e. do not 203 * cross over mount points during building the path. 204 * 205 * passed in vp must have a valid io_count reference 206 */ 207int 208build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs_context_t ctx) 209{ 210 vnode_t vp, tvp; 211 vnode_t vp_with_iocount; 212 vnode_t proc_root_dir_vp; 213 char *end; 214 const char *str; 215 int len; 216 int ret = 0; 217 int fixhardlink; 218 219 if (first_vp == NULLVP) 220 return (EINVAL); 221 222 if (buflen <= 1) 223 return (ENOSPC); 224 225 /* 226 * Grab the process fd so we can evaluate fd_rdir. 227 */ 228 if (vfs_context_proc(ctx)->p_fd) 229 proc_root_dir_vp = vfs_context_proc(ctx)->p_fd->fd_rdir; 230 else 231 proc_root_dir_vp = NULL; 232 233 vp_with_iocount = NULLVP; 234again: 235 vp = first_vp; 236 237 end = &buff[buflen-1]; 238 *end = '\0'; 239 240 /* 241 * holding the NAME_CACHE_LOCK in shared mode is 242 * sufficient to stabilize both the vp->v_parent chain 243 * and the 'vp->v_mount->mnt_vnodecovered' chain 244 * 245 * if we need to drop this lock, we must first grab the v_id 246 * from the vnode we're currently working with... if that 247 * vnode doesn't already have an io_count reference (the vp 248 * passed in comes with one), we must grab a reference 249 * after we drop the NAME_CACHE_LOCK via vnode_getwithvid... 250 * deadlocks may result if you call vnode_get while holding 251 * the NAME_CACHE_LOCK... we lazily release the reference 252 * we pick up the next time we encounter a need to drop 253 * the NAME_CACHE_LOCK or before we return from this routine 254 */ 255 NAME_CACHE_LOCK_SHARED(); 256 257 /* 258 * Check if this is the root of a file system. 259 */ 260 while (vp && vp->v_flag & VROOT) { 261 if (vp->v_mount == NULL) { 262 ret = EINVAL; 263 goto out_unlock; 264 } 265 if ((vp->v_mount->mnt_flag & MNT_ROOTFS) || (vp == proc_root_dir_vp)) { 266 /* 267 * It's the root of the root file system, so it's 268 * just "/". 269 */ 270 *--end = '/'; 271 272 goto out_unlock; 273 } else { 274 /* 275 * This the root of the volume and the caller does not 276 * want to cross mount points. Therefore just return 277 * '/' as the relative path. 278 */ 279 if (flags & BUILDPATH_VOLUME_RELATIVE) { 280 *--end = '/'; 281 goto out_unlock; 282 } else { 283 vp = vp->v_mount->mnt_vnodecovered; 284 } 285 } 286 } 287 288 while ((vp != NULLVP) && (vp->v_parent != vp)) { 289 int vid; 290 291 /* 292 * For hardlinks the v_name may be stale, so if its OK 293 * to enter a file system, ask the file system for the 294 * name and parent (below). 295 */ 296 fixhardlink = (vp->v_flag & VISHARDLINK) && 297 (vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID) && 298 !(flags & BUILDPATH_NO_FS_ENTER); 299 300 if (!fixhardlink) { 301 str = vp->v_name; 302 303 if (str == NULL || *str == '\0') { 304 if (vp->v_parent != NULL) 305 ret = EINVAL; 306 else 307 ret = ENOENT; 308 goto out_unlock; 309 } 310 len = strlen(str); 311 /* 312 * Check that there's enough space (including space for the '/') 313 */ 314 if ((end - buff) < (len + 1)) { 315 ret = ENOSPC; 316 goto out_unlock; 317 } 318 /* 319 * Copy the name backwards. 320 */ 321 str += len; 322 323 for (; len > 0; len--) 324 *--end = *--str; 325 /* 326 * Add a path separator. 327 */ 328 *--end = '/'; 329 } 330 331 /* 332 * Walk up the parent chain. 333 */ 334 if (((vp->v_parent != NULLVP) && !fixhardlink) || 335 (flags & BUILDPATH_NO_FS_ENTER)) { 336 337 /* 338 * In this if () block we are not allowed to enter the filesystem 339 * to conclusively get the most accurate parent identifier. 340 * As a result, if 'vp' does not identify '/' and it 341 * does not have a valid v_parent, then error out 342 * and disallow further path construction 343 */ 344 if ((vp->v_parent == NULLVP) && (rootvnode != vp)) { 345 /* Only '/' is allowed to have a NULL parent pointer */ 346 ret = EINVAL; 347 348 /* The code below will exit early if 'tvp = vp' == NULL */ 349 } 350 vp = vp->v_parent; 351 352 /* 353 * if the vnode we have in hand isn't a directory and it 354 * has a v_parent, then we started with the resource fork 355 * so skip up to avoid getting a duplicate copy of the 356 * file name in the path. 357 */ 358 if (vp && !vnode_isdir(vp) && vp->v_parent) { 359 vp = vp->v_parent; 360 } 361 } else { 362 /* 363 * No parent, go get it if supported. 364 */ 365 struct vnode_attr va; 366 vnode_t dvp; 367 368 /* 369 * Make sure file system supports obtaining a path from id. 370 */ 371 if (!(vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID)) { 372 ret = ENOENT; 373 goto out_unlock; 374 } 375 vid = vp->v_id; 376 377 NAME_CACHE_UNLOCK(); 378 379 if (vp != first_vp && vp != vp_with_iocount) { 380 if (vp_with_iocount) { 381 vnode_put(vp_with_iocount); 382 vp_with_iocount = NULLVP; 383 } 384 if (vnode_getwithvid(vp, vid)) 385 goto again; 386 vp_with_iocount = vp; 387 } 388 VATTR_INIT(&va); 389 VATTR_WANTED(&va, va_parentid); 390 391 if (fixhardlink) { 392 VATTR_WANTED(&va, va_name); 393 MALLOC_ZONE(va.va_name, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); 394 } else { 395 va.va_name = NULL; 396 } 397 /* 398 * Ask the file system for its parent id and for its name (optional). 399 */ 400 ret = vnode_getattr(vp, &va, ctx); 401 402 if (fixhardlink) { 403 if ((ret == 0) && (VATTR_IS_SUPPORTED(&va, va_name))) { 404 str = va.va_name; 405 vnode_update_identity(vp, NULL, str, strlen(str), 0, VNODE_UPDATE_NAME); 406 } else if (vp->v_name) { 407 str = vp->v_name; 408 ret = 0; 409 } else { 410 ret = ENOENT; 411 goto bad_news; 412 } 413 len = strlen(str); 414 415 /* 416 * Check that there's enough space. 417 */ 418 if ((end - buff) < (len + 1)) { 419 ret = ENOSPC; 420 } else { 421 /* Copy the name backwards. */ 422 str += len; 423 424 for (; len > 0; len--) { 425 *--end = *--str; 426 } 427 /* 428 * Add a path separator. 429 */ 430 *--end = '/'; 431 } 432bad_news: 433 FREE_ZONE(va.va_name, MAXPATHLEN, M_NAMEI); 434 } 435 if (ret || !VATTR_IS_SUPPORTED(&va, va_parentid)) { 436 ret = ENOENT; 437 goto out; 438 } 439 /* 440 * Ask the file system for the parent vnode. 441 */ 442 if ((ret = VFS_VGET(vp->v_mount, (ino64_t)va.va_parentid, &dvp, ctx))) 443 goto out; 444 445 if (!fixhardlink && (vp->v_parent != dvp)) 446 vnode_update_identity(vp, dvp, NULL, 0, 0, VNODE_UPDATE_PARENT); 447 448 if (vp_with_iocount) 449 vnode_put(vp_with_iocount); 450 vp = dvp; 451 vp_with_iocount = vp; 452 453 NAME_CACHE_LOCK_SHARED(); 454 455 /* 456 * if the vnode we have in hand isn't a directory and it 457 * has a v_parent, then we started with the resource fork 458 * so skip up to avoid getting a duplicate copy of the 459 * file name in the path. 460 */ 461 if (vp && !vnode_isdir(vp) && vp->v_parent) 462 vp = vp->v_parent; 463 } 464 465 /* 466 * When a mount point is crossed switch the vp. 467 * Continue until we find the root or we find 468 * a vnode that's not the root of a mounted 469 * file system. 470 */ 471 tvp = vp; 472 473 while (tvp) { 474 if (tvp == proc_root_dir_vp) 475 goto out_unlock; /* encountered the root */ 476 477 if (!(tvp->v_flag & VROOT) || !tvp->v_mount) 478 break; /* not the root of a mounted FS */ 479 480 if (flags & BUILDPATH_VOLUME_RELATIVE) { 481 /* Do not cross over mount points */ 482 tvp = NULL; 483 } else { 484 tvp = tvp->v_mount->mnt_vnodecovered; 485 } 486 } 487 if (tvp == NULLVP) 488 goto out_unlock; 489 vp = tvp; 490 491 if (vp && (flags & BUILDPATH_CHECKACCESS)) { 492 vid = vp->v_id; 493 494 NAME_CACHE_UNLOCK(); 495 496 if (vp != first_vp && vp != vp_with_iocount) { 497 if (vp_with_iocount) { 498 vnode_put(vp_with_iocount); 499 vp_with_iocount = NULLVP; 500 } 501 if (vnode_getwithvid(vp, vid)) 502 goto again; 503 vp_with_iocount = vp; 504 } 505 if ((ret = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx))) 506 goto out; /* no peeking */ 507 508 NAME_CACHE_LOCK_SHARED(); 509 } 510 } 511out_unlock: 512 NAME_CACHE_UNLOCK(); 513out: 514 if (vp_with_iocount) 515 vnode_put(vp_with_iocount); 516 /* 517 * Slide the name down to the beginning of the buffer. 518 */ 519 memmove(buff, end, &buff[buflen] - end); 520 521 /* 522 * length includes the trailing zero byte 523 */ 524 *outlen = &buff[buflen] - end; 525 526 /* One of the parents was moved during path reconstruction. 527 * The caller is interested in knowing whether any of the 528 * parents moved via BUILDPATH_CHECK_MOVED, so return EAGAIN. 529 */ 530 if ((ret == ENOENT) && (flags & BUILDPATH_CHECK_MOVED)) { 531 ret = EAGAIN; 532 } 533 534 return (ret); 535} 536 537 538/* 539 * return NULLVP if vp's parent doesn't 540 * exist, or we can't get a valid iocount 541 * else return the parent of vp 542 */ 543vnode_t 544vnode_getparent(vnode_t vp) 545{ 546 vnode_t pvp = NULLVP; 547 int pvid; 548 549 NAME_CACHE_LOCK_SHARED(); 550 /* 551 * v_parent is stable behind the name_cache lock 552 * however, the only thing we can really guarantee 553 * is that we've grabbed a valid iocount on the 554 * parent of 'vp' at the time we took the name_cache lock... 555 * once we drop the lock, vp could get re-parented 556 */ 557 if ( (pvp = vp->v_parent) != NULLVP ) { 558 pvid = pvp->v_id; 559 560 NAME_CACHE_UNLOCK(); 561 562 if (vnode_getwithvid(pvp, pvid) != 0) 563 pvp = NULL; 564 } else 565 NAME_CACHE_UNLOCK(); 566 return (pvp); 567} 568 569const char * 570vnode_getname(vnode_t vp) 571{ 572 const char *name = NULL; 573 574 NAME_CACHE_LOCK_SHARED(); 575 576 if (vp->v_name) 577 name = vfs_addname(vp->v_name, strlen(vp->v_name), 0, 0); 578 NAME_CACHE_UNLOCK(); 579 580 return (name); 581} 582 583void 584vnode_putname(const char *name) 585{ 586 vfs_removename(name); 587} 588 589static const char unknown_vnodename[] = "(unknown vnode name)"; 590 591const char * 592vnode_getname_printable(vnode_t vp) 593{ 594 const char *name = vnode_getname(vp); 595 if (name != NULL) 596 return name; 597 598 switch (vp->v_type) { 599 case VCHR: 600 case VBLK: 601 { 602 /* 603 * Create an artificial dev name from 604 * major and minor device number 605 */ 606 char dev_name[64]; 607 (void) snprintf(dev_name, sizeof(dev_name), 608 "%c(%u, %u)", VCHR == vp->v_type ? 'c':'b', 609 major(vp->v_rdev), minor(vp->v_rdev)); 610 /* 611 * Add the newly created dev name to the name 612 * cache to allow easier cleanup. Also, 613 * vfs_addname allocates memory for the new name 614 * and returns it. 615 */ 616 NAME_CACHE_LOCK_SHARED(); 617 name = vfs_addname(dev_name, strlen(dev_name), 0, 0); 618 NAME_CACHE_UNLOCK(); 619 return name; 620 } 621 default: 622 return unknown_vnodename; 623 } 624} 625 626void 627vnode_putname_printable(const char *name) 628{ 629 if (name == unknown_vnodename) 630 return; 631 vnode_putname(name); 632} 633 634 635/* 636 * if VNODE_UPDATE_PARENT, and we can take 637 * a reference on dvp, then update vp with 638 * it's new parent... if vp already has a parent, 639 * then drop the reference vp held on it 640 * 641 * if VNODE_UPDATE_NAME, 642 * then drop string ref on v_name if it exists, and if name is non-NULL 643 * then pick up a string reference on name and record it in v_name... 644 * optionally pass in the length and hashval of name if known 645 * 646 * if VNODE_UPDATE_CACHE, flush the name cache entries associated with vp 647 */ 648void 649vnode_update_identity(vnode_t vp, vnode_t dvp, const char *name, int name_len, uint32_t name_hashval, int flags) 650{ 651 struct namecache *ncp; 652 vnode_t old_parentvp = NULLVP; 653#if NAMEDSTREAMS 654 int isstream = (vp->v_flag & VISNAMEDSTREAM); 655 int kusecountbumped = 0; 656#endif 657 kauth_cred_t tcred = NULL; 658 const char *vname = NULL; 659 const char *tname = NULL; 660 661 if (flags & VNODE_UPDATE_PARENT) { 662 if (dvp && vnode_ref(dvp) != 0) { 663 dvp = NULLVP; 664 } 665#if NAMEDSTREAMS 666 /* Don't count a stream's parent ref during unmounts */ 667 if (isstream && dvp && (dvp != vp) && (dvp != vp->v_parent) && (dvp->v_type == VREG)) { 668 vnode_lock_spin(dvp); 669 ++dvp->v_kusecount; 670 kusecountbumped = 1; 671 vnode_unlock(dvp); 672 } 673#endif 674 } else { 675 dvp = NULLVP; 676 } 677 if ( (flags & VNODE_UPDATE_NAME) ) { 678 if (name != vp->v_name) { 679 if (name && *name) { 680 if (name_len == 0) 681 name_len = strlen(name); 682 tname = vfs_addname(name, name_len, name_hashval, 0); 683 } 684 } else 685 flags &= ~VNODE_UPDATE_NAME; 686 } 687 if ( (flags & (VNODE_UPDATE_PURGE | VNODE_UPDATE_PARENT | VNODE_UPDATE_CACHE | VNODE_UPDATE_NAME)) ) { 688 689 NAME_CACHE_LOCK(); 690 691 if ( (flags & VNODE_UPDATE_PURGE) ) { 692 693 if (vp->v_parent) 694 vp->v_parent->v_nc_generation++; 695 696 while ( (ncp = LIST_FIRST(&vp->v_nclinks)) ) 697 cache_delete(ncp, 1); 698 699 while ( (ncp = LIST_FIRST(&vp->v_ncchildren)) ) 700 cache_delete(ncp, 1); 701 702 /* 703 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held 704 */ 705 tcred = vp->v_cred; 706 vp->v_cred = NOCRED; 707 vp->v_authorized_actions = 0; 708 } 709 if ( (flags & VNODE_UPDATE_NAME) ) { 710 vname = vp->v_name; 711 vp->v_name = tname; 712 } 713 if (flags & VNODE_UPDATE_PARENT) { 714 if (dvp != vp && dvp != vp->v_parent) { 715 old_parentvp = vp->v_parent; 716 vp->v_parent = dvp; 717 dvp = NULLVP; 718 719 if (old_parentvp) 720 flags |= VNODE_UPDATE_CACHE; 721 } 722 } 723 if (flags & VNODE_UPDATE_CACHE) { 724 while ( (ncp = LIST_FIRST(&vp->v_nclinks)) ) 725 cache_delete(ncp, 1); 726 } 727 NAME_CACHE_UNLOCK(); 728 729 if (vname != NULL) 730 vfs_removename(vname); 731 732 if (IS_VALID_CRED(tcred)) 733 kauth_cred_unref(&tcred); 734 } 735 if (dvp != NULLVP) { 736#if NAMEDSTREAMS 737 /* Back-out the ref we took if we lost a race for vp->v_parent. */ 738 if (kusecountbumped) { 739 vnode_lock_spin(dvp); 740 if (dvp->v_kusecount > 0) 741 --dvp->v_kusecount; 742 vnode_unlock(dvp); 743 } 744#endif 745 vnode_rele(dvp); 746 } 747 if (old_parentvp) { 748 struct uthread *ut; 749 750#if NAMEDSTREAMS 751 if (isstream) { 752 vnode_lock_spin(old_parentvp); 753 if ((old_parentvp->v_type != VDIR) && (old_parentvp->v_kusecount > 0)) 754 --old_parentvp->v_kusecount; 755 vnode_unlock(old_parentvp); 756 } 757#endif 758 ut = get_bsdthread_info(current_thread()); 759 760 /* 761 * indicated to vnode_rele that it shouldn't do a 762 * vnode_reclaim at this time... instead it will 763 * chain the vnode to the uu_vreclaims list... 764 * we'll be responsible for calling vnode_reclaim 765 * on each of the vnodes in this list... 766 */ 767 ut->uu_defer_reclaims = 1; 768 ut->uu_vreclaims = NULLVP; 769 770 while ( (vp = old_parentvp) != NULLVP ) { 771 772 vnode_lock_spin(vp); 773 vnode_rele_internal(vp, 0, 0, 1); 774 775 /* 776 * check to see if the vnode is now in the state 777 * that would have triggered a vnode_reclaim in vnode_rele 778 * if it is, we save it's parent pointer and then NULL 779 * out the v_parent field... we'll drop the reference 780 * that was held on the next iteration of this loop... 781 * this short circuits a potential deep recursion if we 782 * have a long chain of parents in this state... 783 * we'll sit in this loop until we run into 784 * a parent in this chain that is not in this state 785 * 786 * make our check and the vnode_rele atomic 787 * with respect to the current vnode we're working on 788 * by holding the vnode lock 789 * if vnode_rele deferred the vnode_reclaim and has put 790 * this vnode on the list to be reaped by us, than 791 * it has left this vnode with an iocount == 1 792 */ 793 if ( (vp->v_iocount == 1) && (vp->v_usecount == 0) && 794 ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD)) == VL_MARKTERM)) { 795 /* 796 * vnode_rele wanted to do a vnode_reclaim on this vnode 797 * it should be sitting on the head of the uu_vreclaims chain 798 * pull the parent pointer now so that when we do the 799 * vnode_reclaim for each of the vnodes in the uu_vreclaims 800 * list, we won't recurse back through here 801 * 802 * need to do a convert here in case vnode_rele_internal 803 * returns with the lock held in the spin mode... it 804 * can drop and retake the lock under certain circumstances 805 */ 806 vnode_lock_convert(vp); 807 808 NAME_CACHE_LOCK(); 809 old_parentvp = vp->v_parent; 810 vp->v_parent = NULLVP; 811 NAME_CACHE_UNLOCK(); 812 } else { 813 /* 814 * we're done... we ran into a vnode that isn't 815 * being terminated 816 */ 817 old_parentvp = NULLVP; 818 } 819 vnode_unlock(vp); 820 } 821 ut->uu_defer_reclaims = 0; 822 823 while ( (vp = ut->uu_vreclaims) != NULLVP) { 824 ut->uu_vreclaims = vp->v_defer_reclaimlist; 825 826 /* 827 * vnode_put will drive the vnode_reclaim if 828 * we are still the only reference on this vnode 829 */ 830 vnode_put(vp); 831 } 832 } 833} 834 835 836/* 837 * Mark a vnode as having multiple hard links. HFS makes use of this 838 * because it keeps track of each link separately, and wants to know 839 * which link was actually used. 840 * 841 * This will cause the name cache to force a VNOP_LOOKUP on the vnode 842 * so that HFS can post-process the lookup. Also, volfs will call 843 * VNOP_GETATTR2 to determine the parent, instead of using v_parent. 844 */ 845void vnode_setmultipath(vnode_t vp) 846{ 847 vnode_lock_spin(vp); 848 849 /* 850 * In theory, we're changing the vnode's identity as far as the 851 * name cache is concerned, so we ought to grab the name cache lock 852 * here. However, there is already a race, and grabbing the name 853 * cache lock only makes the race window slightly smaller. 854 * 855 * The race happens because the vnode already exists in the name 856 * cache, and could be found by one thread before another thread 857 * can set the hard link flag. 858 */ 859 860 vp->v_flag |= VISHARDLINK; 861 862 vnode_unlock(vp); 863} 864 865 866 867/* 868 * backwards compatibility 869 */ 870void vnode_uncache_credentials(vnode_t vp) 871{ 872 vnode_uncache_authorized_action(vp, KAUTH_INVALIDATE_CACHED_RIGHTS); 873} 874 875 876/* 877 * use the exclusive form of NAME_CACHE_LOCK to protect the update of the 878 * following fields in the vnode: v_cred_timestamp, v_cred, v_authorized_actions 879 * we use this lock so that we can look at the v_cred and v_authorized_actions 880 * atomically while behind the NAME_CACHE_LOCK in shared mode in 'cache_lookup_path', 881 * which is the super-hot path... if we are updating the authorized actions for this 882 * vnode, we are already in the super-slow and far less frequented path so its not 883 * that bad that we take the lock exclusive for this case... of course we strive 884 * to hold it for the minimum amount of time possible 885 */ 886 887void vnode_uncache_authorized_action(vnode_t vp, kauth_action_t action) 888{ 889 kauth_cred_t tcred = NOCRED; 890 891 NAME_CACHE_LOCK(); 892 893 vp->v_authorized_actions &= ~action; 894 895 if (action == KAUTH_INVALIDATE_CACHED_RIGHTS && 896 IS_VALID_CRED(vp->v_cred)) { 897 /* 898 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held 899 */ 900 tcred = vp->v_cred; 901 vp->v_cred = NOCRED; 902 } 903 NAME_CACHE_UNLOCK(); 904 905 if (tcred != NOCRED) 906 kauth_cred_unref(&tcred); 907} 908 909 910extern int bootarg_vnode_cache_defeat; /* default = 0, from bsd_init.c */ 911 912boolean_t 913vnode_cache_is_authorized(vnode_t vp, vfs_context_t ctx, kauth_action_t action) 914{ 915 kauth_cred_t ucred; 916 boolean_t retval = FALSE; 917 918 /* Boot argument to defeat rights caching */ 919 if (bootarg_vnode_cache_defeat) 920 return FALSE; 921 922 if ( (vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) { 923 /* 924 * a TTL is enabled on the rights cache... handle it here 925 * a TTL of 0 indicates that no rights should be cached 926 */ 927 if (vp->v_mount->mnt_authcache_ttl) { 928 if ( !(vp->v_mount->mnt_kern_flag & MNTK_AUTH_CACHE_TTL) ) { 929 /* 930 * For filesystems marked only MNTK_AUTH_OPAQUE (generally network ones), 931 * we will only allow a SEARCH right on a directory to be cached... 932 * that cached right always has a default TTL associated with it 933 */ 934 if (action != KAUTH_VNODE_SEARCH || vp->v_type != VDIR) 935 vp = NULLVP; 936 } 937 if (vp != NULLVP && vnode_cache_is_stale(vp) == TRUE) { 938 vnode_uncache_authorized_action(vp, vp->v_authorized_actions); 939 vp = NULLVP; 940 } 941 } else 942 vp = NULLVP; 943 } 944 if (vp != NULLVP) { 945 ucred = vfs_context_ucred(ctx); 946 947 NAME_CACHE_LOCK_SHARED(); 948 949 if (vp->v_cred == ucred && (vp->v_authorized_actions & action) == action) 950 retval = TRUE; 951 952 NAME_CACHE_UNLOCK(); 953 } 954 return retval; 955} 956 957 958void vnode_cache_authorized_action(vnode_t vp, vfs_context_t ctx, kauth_action_t action) 959{ 960 kauth_cred_t tcred = NOCRED; 961 kauth_cred_t ucred; 962 struct timeval tv; 963 boolean_t ttl_active = FALSE; 964 965 ucred = vfs_context_ucred(ctx); 966 967 if (!IS_VALID_CRED(ucred) || action == 0) 968 return; 969 970 if ( (vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) { 971 /* 972 * a TTL is enabled on the rights cache... handle it here 973 * a TTL of 0 indicates that no rights should be cached 974 */ 975 if (vp->v_mount->mnt_authcache_ttl == 0) 976 return; 977 978 if ( !(vp->v_mount->mnt_kern_flag & MNTK_AUTH_CACHE_TTL) ) { 979 /* 980 * only cache SEARCH action for filesystems marked 981 * MNTK_AUTH_OPAQUE on VDIRs... 982 * the lookup_path code will time these out 983 */ 984 if ( (action & ~KAUTH_VNODE_SEARCH) || vp->v_type != VDIR ) 985 return; 986 } 987 ttl_active = TRUE; 988 989 microuptime(&tv); 990 } 991 NAME_CACHE_LOCK(); 992 993 if (vp->v_cred != ucred) { 994 kauth_cred_ref(ucred); 995 /* 996 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held 997 */ 998 tcred = vp->v_cred; 999 vp->v_cred = ucred; 1000 vp->v_authorized_actions = 0; 1001 } 1002 if (ttl_active == TRUE && vp->v_authorized_actions == 0) { 1003 /* 1004 * only reset the timestamnp on the 1005 * first authorization cached after the previous 1006 * timer has expired or we're switching creds... 1007 * 'vnode_cache_is_authorized' will clear the 1008 * authorized actions if the TTL is active and 1009 * it has expired 1010 */ 1011 vp->v_cred_timestamp = tv.tv_sec; 1012 } 1013 vp->v_authorized_actions |= action; 1014 1015 NAME_CACHE_UNLOCK(); 1016 1017 if (IS_VALID_CRED(tcred)) 1018 kauth_cred_unref(&tcred); 1019} 1020 1021 1022boolean_t vnode_cache_is_stale(vnode_t vp) 1023{ 1024 struct timeval tv; 1025 boolean_t retval; 1026 1027 microuptime(&tv); 1028 1029 if ((tv.tv_sec - vp->v_cred_timestamp) > vp->v_mount->mnt_authcache_ttl) 1030 retval = TRUE; 1031 else 1032 retval = FALSE; 1033 1034 return retval; 1035} 1036 1037 1038 1039/* 1040 * Returns: 0 Success 1041 * ERECYCLE vnode was recycled from underneath us. Force lookup to be re-driven from namei. 1042 * This errno value should not be seen by anyone outside of the kernel. 1043 */ 1044int 1045cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, 1046 vfs_context_t ctx, int *dp_authorized, vnode_t last_dp) 1047{ 1048 char *cp; /* pointer into pathname argument */ 1049 int vid; 1050 int vvid = 0; /* protected by vp != NULLVP */ 1051 vnode_t vp = NULLVP; 1052 vnode_t tdp = NULLVP; 1053 kauth_cred_t ucred; 1054 boolean_t ttl_enabled = FALSE; 1055 struct timeval tv; 1056 mount_t mp; 1057 unsigned int hash; 1058 int error = 0; 1059 1060#if CONFIG_TRIGGERS 1061 vnode_t trigger_vp; 1062#endif /* CONFIG_TRIGGERS */ 1063 1064 ucred = vfs_context_ucred(ctx); 1065 ndp->ni_flag &= ~(NAMEI_TRAILINGSLASH); 1066 1067 NAME_CACHE_LOCK_SHARED(); 1068 1069 if ( dp->v_mount && (dp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) { 1070 ttl_enabled = TRUE; 1071 microuptime(&tv); 1072 } 1073 for (;;) { 1074 /* 1075 * Search a directory. 1076 * 1077 * The cn_hash value is for use by cache_lookup 1078 * The last component of the filename is left accessible via 1079 * cnp->cn_nameptr for callers that need the name. 1080 */ 1081 hash = 0; 1082 cp = cnp->cn_nameptr; 1083 1084 while (*cp && (*cp != '/')) { 1085 hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8; 1086 } 1087 /* 1088 * the crc generator can legitimately generate 1089 * a 0... however, 0 for us means that we 1090 * haven't computed a hash, so use 1 instead 1091 */ 1092 if (hash == 0) 1093 hash = 1; 1094 cnp->cn_hash = hash; 1095 cnp->cn_namelen = cp - cnp->cn_nameptr; 1096 1097 ndp->ni_pathlen -= cnp->cn_namelen; 1098 ndp->ni_next = cp; 1099 1100 /* 1101 * Replace multiple slashes by a single slash and trailing slashes 1102 * by a null. This must be done before VNOP_LOOKUP() because some 1103 * fs's don't know about trailing slashes. Remember if there were 1104 * trailing slashes to handle symlinks, existing non-directories 1105 * and non-existing files that won't be directories specially later. 1106 */ 1107 while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) { 1108 cp++; 1109 ndp->ni_pathlen--; 1110 1111 if (*cp == '\0') { 1112 ndp->ni_flag |= NAMEI_TRAILINGSLASH; 1113 *ndp->ni_next = '\0'; 1114 } 1115 } 1116 ndp->ni_next = cp; 1117 1118 cnp->cn_flags &= ~(MAKEENTRY | ISLASTCN | ISDOTDOT); 1119 1120 if (*cp == '\0') 1121 cnp->cn_flags |= ISLASTCN; 1122 1123 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') 1124 cnp->cn_flags |= ISDOTDOT; 1125 1126 *dp_authorized = 0; 1127#if NAMEDRSRCFORK 1128 /* 1129 * Process a request for a file's resource fork. 1130 * 1131 * Consume the _PATH_RSRCFORKSPEC suffix and tag the path. 1132 */ 1133 if ((ndp->ni_pathlen == sizeof(_PATH_RSRCFORKSPEC)) && 1134 (cp[1] == '.' && cp[2] == '.') && 1135 bcmp(cp, _PATH_RSRCFORKSPEC, sizeof(_PATH_RSRCFORKSPEC)) == 0) { 1136 /* Skip volfs file systems that don't support native streams. */ 1137 if ((dp->v_mount != NULL) && 1138 (dp->v_mount->mnt_flag & MNT_DOVOLFS) && 1139 (dp->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0) { 1140 goto skiprsrcfork; 1141 } 1142 cnp->cn_flags |= CN_WANTSRSRCFORK; 1143 cnp->cn_flags |= ISLASTCN; 1144 ndp->ni_next[0] = '\0'; 1145 ndp->ni_pathlen = 1; 1146 } 1147skiprsrcfork: 1148#endif 1149 1150#if CONFIG_MACF 1151 1152 /* 1153 * Name cache provides authorization caching (see below) 1154 * that will short circuit MAC checks in lookup(). 1155 * We must perform MAC check here. On denial 1156 * dp_authorized will remain 0 and second check will 1157 * be perfomed in lookup(). 1158 */ 1159 if (!(cnp->cn_flags & DONOTAUTH)) { 1160 error = mac_vnode_check_lookup(ctx, dp, cnp); 1161 if (error) { 1162 NAME_CACHE_UNLOCK(); 1163 goto errorout; 1164 } 1165 } 1166#endif /* MAC */ 1167 if (ttl_enabled && ((tv.tv_sec - dp->v_cred_timestamp) > dp->v_mount->mnt_authcache_ttl)) 1168 break; 1169 1170 /* 1171 * NAME_CACHE_LOCK holds these fields stable 1172 */ 1173 if ((dp->v_cred != ucred || !(dp->v_authorized_actions & KAUTH_VNODE_SEARCH)) && 1174 !(dp->v_authorized_actions & KAUTH_VNODE_SEARCHBYANYONE)) 1175 break; 1176 1177 /* 1178 * indicate that we're allowed to traverse this directory... 1179 * even if we fail the cache lookup or decide to bail for 1180 * some other reason, this information is valid and is used 1181 * to avoid doing a vnode_authorize before the call to VNOP_LOOKUP 1182 */ 1183 *dp_authorized = 1; 1184 1185 if ( (cnp->cn_flags & (ISLASTCN | ISDOTDOT)) ) { 1186 if (cnp->cn_nameiop != LOOKUP) 1187 break; 1188 if (cnp->cn_flags & LOCKPARENT) 1189 break; 1190 if (cnp->cn_flags & NOCACHE) 1191 break; 1192 if (cnp->cn_flags & ISDOTDOT) { 1193 /* 1194 * Force directory hardlinks to go to 1195 * file system for ".." requests. 1196 */ 1197 if (dp && (dp->v_flag & VISHARDLINK)) { 1198 break; 1199 } 1200 /* 1201 * Quit here only if we can't use 1202 * the parent directory pointer or 1203 * don't have one. Otherwise, we'll 1204 * use it below. 1205 */ 1206 if ((dp->v_flag & VROOT) || 1207 dp == ndp->ni_rootdir || 1208 dp->v_parent == NULLVP) 1209 break; 1210 } 1211 } 1212 1213 if ((cnp->cn_flags & CN_SKIPNAMECACHE)) { 1214 /* 1215 * Force lookup to go to the filesystem with 1216 * all cnp fields set up. 1217 */ 1218 break; 1219 } 1220 1221 /* 1222 * "." and ".." aren't supposed to be cached, so check 1223 * for them before checking the cache. 1224 */ 1225 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') 1226 vp = dp; 1227 else if ( (cnp->cn_flags & ISDOTDOT) ) 1228 vp = dp->v_parent; 1229 else { 1230 if ( (vp = cache_lookup_locked(dp, cnp)) == NULLVP) 1231 break; 1232 1233 if ( (vp->v_flag & VISHARDLINK) ) { 1234 /* 1235 * The file system wants a VNOP_LOOKUP on this vnode 1236 */ 1237 vp = NULL; 1238 break; 1239 } 1240 } 1241 if ( (cnp->cn_flags & ISLASTCN) ) 1242 break; 1243 1244 if (vp->v_type != VDIR) { 1245 if (vp->v_type != VLNK) 1246 vp = NULL; 1247 break; 1248 } 1249 1250 if ( (mp = vp->v_mountedhere) && ((cnp->cn_flags & NOCROSSMOUNT) == 0)) { 1251 1252 if (mp->mnt_realrootvp == NULLVP || mp->mnt_generation != mount_generation || 1253 mp->mnt_realrootvp_vid != mp->mnt_realrootvp->v_id) 1254 break; 1255 vp = mp->mnt_realrootvp; 1256 } 1257 1258#if CONFIG_TRIGGERS 1259 /* 1260 * After traversing all mountpoints stacked here, if we have a 1261 * trigger in hand, resolve it. Note that we don't need to 1262 * leave the fast path if the mount has already happened. 1263 */ 1264 if ((vp->v_resolve != NULL) && 1265 (vp->v_resolve->vr_resolve_func != NULL)) { 1266 break; 1267 } 1268#endif /* CONFIG_TRIGGERS */ 1269 1270 1271 dp = vp; 1272 vp = NULLVP; 1273 1274 cnp->cn_nameptr = ndp->ni_next + 1; 1275 ndp->ni_pathlen--; 1276 while (*cnp->cn_nameptr == '/') { 1277 cnp->cn_nameptr++; 1278 ndp->ni_pathlen--; 1279 } 1280 } 1281 if (vp != NULLVP) 1282 vvid = vp->v_id; 1283 vid = dp->v_id; 1284 1285 NAME_CACHE_UNLOCK(); 1286 1287 if ((vp != NULLVP) && (vp->v_type != VLNK) && 1288 ((cnp->cn_flags & (ISLASTCN | LOCKPARENT | WANTPARENT | SAVESTART)) == ISLASTCN)) { 1289 /* 1290 * if we've got a child and it's the last component, and 1291 * the lookup doesn't need to return the parent then we 1292 * can skip grabbing an iocount on the parent, since all 1293 * we're going to do with it is a vnode_put just before 1294 * we return from 'lookup'. If it's a symbolic link, 1295 * we need the parent in case the link happens to be 1296 * a relative pathname. 1297 */ 1298 tdp = dp; 1299 dp = NULLVP; 1300 } else { 1301need_dp: 1302 /* 1303 * return the last directory we looked at 1304 * with an io reference held. If it was the one passed 1305 * in as a result of the last iteration of VNOP_LOOKUP, 1306 * it should already hold an io ref. No need to increase ref. 1307 */ 1308 if (last_dp != dp){ 1309 1310 if (dp == ndp->ni_usedvp) { 1311 /* 1312 * if this vnode matches the one passed in via USEDVP 1313 * than this context already holds an io_count... just 1314 * use vnode_get to get an extra ref for lookup to play 1315 * with... can't use the getwithvid variant here because 1316 * it will block behind a vnode_drain which would result 1317 * in a deadlock (since we already own an io_count that the 1318 * vnode_drain is waiting on)... vnode_get grabs the io_count 1319 * immediately w/o waiting... it always succeeds 1320 */ 1321 vnode_get(dp); 1322 } else if ((error = vnode_getwithvid_drainok(dp, vid))) { 1323 /* 1324 * failure indicates the vnode 1325 * changed identity or is being 1326 * TERMINATED... in either case 1327 * punt this lookup. 1328 * 1329 * don't necessarily return ENOENT, though, because 1330 * we really want to go back to disk and make sure it's 1331 * there or not if someone else is changing this 1332 * vnode. That being said, the one case where we do want 1333 * to return ENOENT is when the vnode's mount point is 1334 * in the process of unmounting and we might cause a deadlock 1335 * in our attempt to take an iocount. An ENODEV error return 1336 * is from vnode_get* is an indication this but we change that 1337 * ENOENT for upper layers. 1338 */ 1339 if (error == ENODEV) { 1340 error = ENOENT; 1341 } else { 1342 error = ERECYCLE; 1343 } 1344 goto errorout; 1345 } 1346 } 1347 } 1348 if (vp != NULLVP) { 1349 if ( (vnode_getwithvid_drainok(vp, vvid)) ) { 1350 vp = NULLVP; 1351 1352 /* 1353 * can't get reference on the vp we'd like 1354 * to return... if we didn't grab a reference 1355 * on the directory (due to fast path bypass), 1356 * then we need to do it now... we can't return 1357 * with both ni_dvp and ni_vp NULL, and no 1358 * error condition 1359 */ 1360 if (dp == NULLVP) { 1361 dp = tdp; 1362 goto need_dp; 1363 } 1364 } 1365 } 1366 1367 ndp->ni_dvp = dp; 1368 ndp->ni_vp = vp; 1369 1370#if CONFIG_TRIGGERS 1371 trigger_vp = vp ? vp : dp; 1372 if ((error == 0) && (trigger_vp != NULLVP) && vnode_isdir(trigger_vp)) { 1373 error = vnode_trigger_resolve(trigger_vp, ndp, ctx); 1374 if (error) { 1375 if (vp) 1376 vnode_put(vp); 1377 if (dp) 1378 vnode_put(dp); 1379 goto errorout; 1380 } 1381 } 1382#endif /* CONFIG_TRIGGERS */ 1383 1384errorout: 1385 /* 1386 * If we came into cache_lookup_path after an iteration of the lookup loop that 1387 * resulted in a call to VNOP_LOOKUP, then VNOP_LOOKUP returned a vnode with a io ref 1388 * on it. It is now the job of cache_lookup_path to drop the ref on this vnode 1389 * when it is no longer needed. If we get to this point, and last_dp is not NULL 1390 * and it is ALSO not the dvp we want to return to caller of this function, it MUST be 1391 * the case that we got to a subsequent path component and this previous vnode is 1392 * no longer needed. We can then drop the io ref on it. 1393 */ 1394 if ((last_dp != NULLVP) && (last_dp != ndp->ni_dvp)){ 1395 vnode_put(last_dp); 1396 } 1397 1398 //initialized to 0, should be the same if no error cases occurred. 1399 return error; 1400} 1401 1402 1403static vnode_t 1404cache_lookup_locked(vnode_t dvp, struct componentname *cnp) 1405{ 1406 struct namecache *ncp; 1407 struct nchashhead *ncpp; 1408 long namelen = cnp->cn_namelen; 1409 unsigned int hashval = cnp->cn_hash; 1410 1411 if (nc_disabled) { 1412 return NULL; 1413 } 1414 1415 ncpp = NCHHASH(dvp, cnp->cn_hash); 1416 LIST_FOREACH(ncp, ncpp, nc_hash) { 1417 if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) { 1418 if (memcmp(ncp->nc_name, cnp->cn_nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0) 1419 break; 1420 } 1421 } 1422 if (ncp == 0) { 1423 /* 1424 * We failed to find an entry 1425 */ 1426 NCHSTAT(ncs_miss); 1427 return (NULL); 1428 } 1429 NCHSTAT(ncs_goodhits); 1430 1431 return (ncp->nc_vp); 1432} 1433 1434 1435unsigned int hash_string(const char *cp, int len); 1436// 1437// Have to take a len argument because we may only need to 1438// hash part of a componentname. 1439// 1440unsigned int 1441hash_string(const char *cp, int len) 1442{ 1443 unsigned hash = 0; 1444 1445 if (len) { 1446 while (len--) { 1447 hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8; 1448 } 1449 } else { 1450 while (*cp != '\0') { 1451 hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8; 1452 } 1453 } 1454 /* 1455 * the crc generator can legitimately generate 1456 * a 0... however, 0 for us means that we 1457 * haven't computed a hash, so use 1 instead 1458 */ 1459 if (hash == 0) 1460 hash = 1; 1461 return hash; 1462} 1463 1464 1465/* 1466 * Lookup an entry in the cache 1467 * 1468 * We don't do this if the segment name is long, simply so the cache 1469 * can avoid holding long names (which would either waste space, or 1470 * add greatly to the complexity). 1471 * 1472 * Lookup is called with dvp pointing to the directory to search, 1473 * cnp pointing to the name of the entry being sought. If the lookup 1474 * succeeds, the vnode is returned in *vpp, and a status of -1 is 1475 * returned. If the lookup determines that the name does not exist 1476 * (negative cacheing), a status of ENOENT is returned. If the lookup 1477 * fails, a status of zero is returned. 1478 */ 1479 1480int 1481cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) 1482{ 1483 struct namecache *ncp; 1484 struct nchashhead *ncpp; 1485 long namelen = cnp->cn_namelen; 1486 unsigned int hashval; 1487 boolean_t have_exclusive = FALSE; 1488 uint32_t vid; 1489 vnode_t vp; 1490 1491 if (cnp->cn_hash == 0) 1492 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen); 1493 hashval = cnp->cn_hash; 1494 1495 if (nc_disabled) { 1496 return 0; 1497 } 1498 1499 NAME_CACHE_LOCK_SHARED(); 1500 1501relook: 1502 ncpp = NCHHASH(dvp, cnp->cn_hash); 1503 LIST_FOREACH(ncp, ncpp, nc_hash) { 1504 if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) { 1505 if (memcmp(ncp->nc_name, cnp->cn_nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0) 1506 break; 1507 } 1508 } 1509 /* We failed to find an entry */ 1510 if (ncp == 0) { 1511 NCHSTAT(ncs_miss); 1512 NAME_CACHE_UNLOCK(); 1513 return (0); 1514 } 1515 1516 /* We don't want to have an entry, so dump it */ 1517 if ((cnp->cn_flags & MAKEENTRY) == 0) { 1518 if (have_exclusive == TRUE) { 1519 NCHSTAT(ncs_badhits); 1520 cache_delete(ncp, 1); 1521 NAME_CACHE_UNLOCK(); 1522 return (0); 1523 } 1524 NAME_CACHE_UNLOCK(); 1525 NAME_CACHE_LOCK(); 1526 have_exclusive = TRUE; 1527 goto relook; 1528 } 1529 vp = ncp->nc_vp; 1530 1531 /* We found a "positive" match, return the vnode */ 1532 if (vp) { 1533 NCHSTAT(ncs_goodhits); 1534 1535 vid = vp->v_id; 1536 NAME_CACHE_UNLOCK(); 1537 1538 if (vnode_getwithvid(vp, vid)) { 1539#if COLLECT_STATS 1540 NAME_CACHE_LOCK(); 1541 NCHSTAT(ncs_badvid); 1542 NAME_CACHE_UNLOCK(); 1543#endif 1544 return (0); 1545 } 1546 *vpp = vp; 1547 return (-1); 1548 } 1549 1550 /* We found a negative match, and want to create it, so purge */ 1551 if (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) { 1552 if (have_exclusive == TRUE) { 1553 NCHSTAT(ncs_badhits); 1554 cache_delete(ncp, 1); 1555 NAME_CACHE_UNLOCK(); 1556 return (0); 1557 } 1558 NAME_CACHE_UNLOCK(); 1559 NAME_CACHE_LOCK(); 1560 have_exclusive = TRUE; 1561 goto relook; 1562 } 1563 1564 /* 1565 * We found a "negative" match, ENOENT notifies client of this match. 1566 */ 1567 NCHSTAT(ncs_neghits); 1568 1569 NAME_CACHE_UNLOCK(); 1570 return (ENOENT); 1571} 1572 1573const char * 1574cache_enter_create(vnode_t dvp, vnode_t vp, struct componentname *cnp) 1575{ 1576 const char *strname; 1577 1578 if (cnp->cn_hash == 0) 1579 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen); 1580 1581 /* 1582 * grab 2 references on the string entered 1583 * one for the cache_enter_locked to consume 1584 * and the second to be consumed by v_name (vnode_create call point) 1585 */ 1586 strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, TRUE, 0); 1587 1588 NAME_CACHE_LOCK(); 1589 1590 cache_enter_locked(dvp, vp, cnp, strname); 1591 1592 NAME_CACHE_UNLOCK(); 1593 1594 return (strname); 1595} 1596 1597 1598/* 1599 * Add an entry to the cache... 1600 * but first check to see if the directory 1601 * that this entry is to be associated with has 1602 * had any cache_purges applied since we took 1603 * our identity snapshot... this check needs to 1604 * be done behind the name cache lock 1605 */ 1606void 1607cache_enter_with_gen(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, int gen) 1608{ 1609 1610 if (cnp->cn_hash == 0) 1611 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen); 1612 1613 NAME_CACHE_LOCK(); 1614 1615 if (dvp->v_nc_generation == gen) 1616 (void)cache_enter_locked(dvp, vp, cnp, NULL); 1617 1618 NAME_CACHE_UNLOCK(); 1619} 1620 1621 1622/* 1623 * Add an entry to the cache. 1624 */ 1625void 1626cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) 1627{ 1628 const char *strname; 1629 1630 if (cnp->cn_hash == 0) 1631 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen); 1632 1633 /* 1634 * grab 1 reference on the string entered 1635 * for the cache_enter_locked to consume 1636 */ 1637 strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0); 1638 1639 NAME_CACHE_LOCK(); 1640 1641 cache_enter_locked(dvp, vp, cnp, strname); 1642 1643 NAME_CACHE_UNLOCK(); 1644} 1645 1646 1647static void 1648cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, const char *strname) 1649{ 1650 struct namecache *ncp, *negp; 1651 struct nchashhead *ncpp; 1652 1653 if (nc_disabled) 1654 return; 1655 1656 /* 1657 * if the entry is for -ve caching vp is null 1658 */ 1659 if ((vp != NULLVP) && (LIST_FIRST(&vp->v_nclinks))) { 1660 /* 1661 * someone beat us to the punch.. 1662 * this vnode is already in the cache 1663 */ 1664 if (strname != NULL) 1665 vfs_removename(strname); 1666 return; 1667 } 1668 /* 1669 * We allocate a new entry if we are less than the maximum 1670 * allowed and the one at the front of the list is in use. 1671 * Otherwise we use the one at the front of the list. 1672 */ 1673 if (numcache < desiredNodes && 1674 ((ncp = nchead.tqh_first) == NULL || 1675 ncp->nc_hash.le_prev != 0)) { 1676 /* 1677 * Allocate one more entry 1678 */ 1679 ncp = (struct namecache *)_MALLOC_ZONE(sizeof(*ncp), M_CACHE, M_WAITOK); 1680 numcache++; 1681 } else { 1682 /* 1683 * reuse an old entry 1684 */ 1685 ncp = TAILQ_FIRST(&nchead); 1686 TAILQ_REMOVE(&nchead, ncp, nc_entry); 1687 1688 if (ncp->nc_hash.le_prev != 0) { 1689 /* 1690 * still in use... we need to 1691 * delete it before re-using it 1692 */ 1693 NCHSTAT(ncs_stolen); 1694 cache_delete(ncp, 0); 1695 } 1696 } 1697 NCHSTAT(ncs_enters); 1698 1699 /* 1700 * Fill in cache info, if vp is NULL this is a "negative" cache entry. 1701 */ 1702 ncp->nc_vp = vp; 1703 ncp->nc_dvp = dvp; 1704 ncp->nc_hashval = cnp->cn_hash; 1705 1706 if (strname == NULL) 1707 ncp->nc_name = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0); 1708 else 1709 ncp->nc_name = strname; 1710 /* 1711 * make us the newest entry in the cache 1712 * i.e. we'll be the last to be stolen 1713 */ 1714 TAILQ_INSERT_TAIL(&nchead, ncp, nc_entry); 1715 1716 ncpp = NCHHASH(dvp, cnp->cn_hash); 1717#if DIAGNOSTIC 1718 { 1719 struct namecache *p; 1720 1721 for (p = ncpp->lh_first; p != 0; p = p->nc_hash.le_next) 1722 if (p == ncp) 1723 panic("cache_enter: duplicate"); 1724 } 1725#endif 1726 /* 1727 * make us available to be found via lookup 1728 */ 1729 LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 1730 1731 if (vp) { 1732 /* 1733 * add to the list of name cache entries 1734 * that point at vp 1735 */ 1736 LIST_INSERT_HEAD(&vp->v_nclinks, ncp, nc_un.nc_link); 1737 } else { 1738 /* 1739 * this is a negative cache entry (vp == NULL) 1740 * stick it on the negative cache list. 1741 */ 1742 TAILQ_INSERT_TAIL(&neghead, ncp, nc_un.nc_negentry); 1743 1744 ncs_negtotal++; 1745 1746 if (ncs_negtotal > desiredNegNodes) { 1747 /* 1748 * if we've reached our desired limit 1749 * of negative cache entries, delete 1750 * the oldest 1751 */ 1752 negp = TAILQ_FIRST(&neghead); 1753 cache_delete(negp, 1); 1754 } 1755 } 1756 /* 1757 * add us to the list of name cache entries that 1758 * are children of dvp 1759 */ 1760 LIST_INSERT_HEAD(&dvp->v_ncchildren, ncp, nc_child); 1761} 1762 1763 1764/* 1765 * Initialize CRC-32 remainder table. 1766 */ 1767static void init_crc32(void) 1768{ 1769 /* 1770 * the CRC-32 generator polynomial is: 1771 * x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^10 1772 * + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1 1773 */ 1774 unsigned int crc32_polynomial = 0x04c11db7; 1775 unsigned int i,j; 1776 1777 /* 1778 * pre-calculate the CRC-32 remainder for each possible octet encoding 1779 */ 1780 for (i = 0; i < 256; i++) { 1781 unsigned int crc_rem = i << 24; 1782 1783 for (j = 0; j < 8; j++) { 1784 if (crc_rem & 0x80000000) 1785 crc_rem = (crc_rem << 1) ^ crc32_polynomial; 1786 else 1787 crc_rem = (crc_rem << 1); 1788 } 1789 crc32tab[i] = crc_rem; 1790 } 1791} 1792 1793 1794/* 1795 * Name cache initialization, from vfs_init() when we are booting 1796 */ 1797void 1798nchinit(void) 1799{ 1800 int i; 1801 1802 desiredNegNodes = (desiredvnodes / 10); 1803 desiredNodes = desiredvnodes + desiredNegNodes; 1804 1805 TAILQ_INIT(&nchead); 1806 TAILQ_INIT(&neghead); 1807 1808 init_crc32(); 1809 1810 nchashtbl = hashinit(MAX(CONFIG_NC_HASH, (2 *desiredNodes)), M_CACHE, &nchash); 1811 nchashmask = nchash; 1812 nchash++; 1813 1814 init_string_table(); 1815 1816 /* Allocate name cache lock group attribute and group */ 1817 namecache_lck_grp_attr= lck_grp_attr_alloc_init(); 1818 1819 namecache_lck_grp = lck_grp_alloc_init("Name Cache", namecache_lck_grp_attr); 1820 1821 /* Allocate name cache lock attribute */ 1822 namecache_lck_attr = lck_attr_alloc_init(); 1823 1824 /* Allocate name cache lock */ 1825 namecache_rw_lock = lck_rw_alloc_init(namecache_lck_grp, namecache_lck_attr); 1826 1827 1828 /* Allocate string cache lock group attribute and group */ 1829 strcache_lck_grp_attr= lck_grp_attr_alloc_init(); 1830 1831 strcache_lck_grp = lck_grp_alloc_init("String Cache", strcache_lck_grp_attr); 1832 1833 /* Allocate string cache lock attribute */ 1834 strcache_lck_attr = lck_attr_alloc_init(); 1835 1836 /* Allocate string cache lock */ 1837 strtable_rw_lock = lck_rw_alloc_init(strcache_lck_grp, strcache_lck_attr); 1838 1839 for (i = 0; i < NUM_STRCACHE_LOCKS; i++) 1840 lck_mtx_init(&strcache_mtx_locks[i], strcache_lck_grp, strcache_lck_attr); 1841} 1842 1843void 1844name_cache_lock_shared(void) 1845{ 1846 lck_rw_lock_shared(namecache_rw_lock); 1847} 1848 1849void 1850name_cache_lock(void) 1851{ 1852 lck_rw_lock_exclusive(namecache_rw_lock); 1853} 1854 1855void 1856name_cache_unlock(void) 1857{ 1858 lck_rw_done(namecache_rw_lock); 1859} 1860 1861 1862int 1863resize_namecache(u_int newsize) 1864{ 1865 struct nchashhead *new_table; 1866 struct nchashhead *old_table; 1867 struct nchashhead *old_head, *head; 1868 struct namecache *entry, *next; 1869 uint32_t i, hashval; 1870 int dNodes, dNegNodes; 1871 u_long new_size, old_size; 1872 1873 dNegNodes = (newsize / 10); 1874 dNodes = newsize + dNegNodes; 1875 1876 // we don't support shrinking yet 1877 if (dNodes <= desiredNodes) { 1878 return 0; 1879 } 1880 new_table = hashinit(2 * dNodes, M_CACHE, &nchashmask); 1881 new_size = nchashmask + 1; 1882 1883 if (new_table == NULL) { 1884 return ENOMEM; 1885 } 1886 1887 NAME_CACHE_LOCK(); 1888 // do the switch! 1889 old_table = nchashtbl; 1890 nchashtbl = new_table; 1891 old_size = nchash; 1892 nchash = new_size; 1893 1894 // walk the old table and insert all the entries into 1895 // the new table 1896 // 1897 for(i=0; i < old_size; i++) { 1898 old_head = &old_table[i]; 1899 for (entry=old_head->lh_first; entry != NULL; entry=next) { 1900 // 1901 // XXXdbg - Beware: this assumes that hash_string() does 1902 // the same thing as what happens in 1903 // lookup() over in vfs_lookup.c 1904 hashval = hash_string(entry->nc_name, 0); 1905 entry->nc_hashval = hashval; 1906 head = NCHHASH(entry->nc_dvp, hashval); 1907 1908 next = entry->nc_hash.le_next; 1909 LIST_INSERT_HEAD(head, entry, nc_hash); 1910 } 1911 } 1912 desiredNodes = dNodes; 1913 desiredNegNodes = dNegNodes; 1914 1915 NAME_CACHE_UNLOCK(); 1916 FREE(old_table, M_CACHE); 1917 1918 return 0; 1919} 1920 1921static void 1922cache_delete(struct namecache *ncp, int age_entry) 1923{ 1924 NCHSTAT(ncs_deletes); 1925 1926 if (ncp->nc_vp) { 1927 LIST_REMOVE(ncp, nc_un.nc_link); 1928 } else { 1929 TAILQ_REMOVE(&neghead, ncp, nc_un.nc_negentry); 1930 ncs_negtotal--; 1931 } 1932 LIST_REMOVE(ncp, nc_child); 1933 1934 LIST_REMOVE(ncp, nc_hash); 1935 /* 1936 * this field is used to indicate 1937 * that the entry is in use and 1938 * must be deleted before it can 1939 * be reused... 1940 */ 1941 ncp->nc_hash.le_prev = NULL; 1942 1943 if (age_entry) { 1944 /* 1945 * make it the next one available 1946 * for cache_enter's use 1947 */ 1948 TAILQ_REMOVE(&nchead, ncp, nc_entry); 1949 TAILQ_INSERT_HEAD(&nchead, ncp, nc_entry); 1950 } 1951 vfs_removename(ncp->nc_name); 1952 ncp->nc_name = NULL; 1953} 1954 1955 1956/* 1957 * purge the entry associated with the 1958 * specified vnode from the name cache 1959 */ 1960void 1961cache_purge(vnode_t vp) 1962{ 1963 struct namecache *ncp; 1964 kauth_cred_t tcred = NULL; 1965 1966 if ((LIST_FIRST(&vp->v_nclinks) == NULL) && 1967 (LIST_FIRST(&vp->v_ncchildren) == NULL) && 1968 (vp->v_cred == NOCRED) && 1969 (vp->v_parent == NULLVP)) 1970 return; 1971 1972 NAME_CACHE_LOCK(); 1973 1974 if (vp->v_parent) 1975 vp->v_parent->v_nc_generation++; 1976 1977 while ( (ncp = LIST_FIRST(&vp->v_nclinks)) ) 1978 cache_delete(ncp, 1); 1979 1980 while ( (ncp = LIST_FIRST(&vp->v_ncchildren)) ) 1981 cache_delete(ncp, 1); 1982 1983 /* 1984 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held 1985 */ 1986 tcred = vp->v_cred; 1987 vp->v_cred = NOCRED; 1988 vp->v_authorized_actions = 0; 1989 1990 NAME_CACHE_UNLOCK(); 1991 1992 if (IS_VALID_CRED(tcred)) 1993 kauth_cred_unref(&tcred); 1994} 1995 1996/* 1997 * Purge all negative cache entries that are children of the 1998 * given vnode. A case-insensitive file system (or any file 1999 * system that has multiple equivalent names for the same 2000 * directory entry) can use this when creating or renaming 2001 * to remove negative entries that may no longer apply. 2002 */ 2003void 2004cache_purge_negatives(vnode_t vp) 2005{ 2006 struct namecache *ncp, *next_ncp; 2007 2008 NAME_CACHE_LOCK(); 2009 2010 LIST_FOREACH_SAFE(ncp, &vp->v_ncchildren, nc_child, next_ncp) 2011 if (ncp->nc_vp == NULL) 2012 cache_delete(ncp , 1); 2013 2014 NAME_CACHE_UNLOCK(); 2015} 2016 2017/* 2018 * Flush all entries referencing a particular filesystem. 2019 * 2020 * Since we need to check it anyway, we will flush all the invalid 2021 * entries at the same time. 2022 */ 2023void 2024cache_purgevfs(struct mount *mp) 2025{ 2026 struct nchashhead *ncpp; 2027 struct namecache *ncp; 2028 2029 NAME_CACHE_LOCK(); 2030 /* Scan hash tables for applicable entries */ 2031 for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) { 2032restart: 2033 for (ncp = ncpp->lh_first; ncp != 0; ncp = ncp->nc_hash.le_next) { 2034 if (ncp->nc_dvp->v_mount == mp) { 2035 cache_delete(ncp, 0); 2036 goto restart; 2037 } 2038 } 2039 } 2040 NAME_CACHE_UNLOCK(); 2041} 2042 2043 2044 2045// 2046// String ref routines 2047// 2048static LIST_HEAD(stringhead, string_t) *string_ref_table; 2049static u_long string_table_mask; 2050static uint32_t filled_buckets=0; 2051 2052 2053typedef struct string_t { 2054 LIST_ENTRY(string_t) hash_chain; 2055 const char *str; 2056 uint32_t refcount; 2057} string_t; 2058 2059 2060static void 2061resize_string_ref_table(void) 2062{ 2063 struct stringhead *new_table; 2064 struct stringhead *old_table; 2065 struct stringhead *old_head, *head; 2066 string_t *entry, *next; 2067 uint32_t i, hashval; 2068 u_long new_mask, old_mask; 2069 2070 /* 2071 * need to hold the table lock exclusively 2072 * in order to grow the table... need to recheck 2073 * the need to resize again after we've taken 2074 * the lock exclusively in case some other thread 2075 * beat us to the punch 2076 */ 2077 lck_rw_lock_exclusive(strtable_rw_lock); 2078 2079 if (4 * filled_buckets < ((string_table_mask + 1) * 3)) { 2080 lck_rw_done(strtable_rw_lock); 2081 return; 2082 } 2083 new_table = hashinit((string_table_mask + 1) * 2, M_CACHE, &new_mask); 2084 2085 if (new_table == NULL) { 2086 printf("failed to resize the hash table.\n"); 2087 lck_rw_done(strtable_rw_lock); 2088 return; 2089 } 2090 2091 // do the switch! 2092 old_table = string_ref_table; 2093 string_ref_table = new_table; 2094 old_mask = string_table_mask; 2095 string_table_mask = new_mask; 2096 filled_buckets = 0; 2097 2098 // walk the old table and insert all the entries into 2099 // the new table 2100 // 2101 for (i = 0; i <= old_mask; i++) { 2102 old_head = &old_table[i]; 2103 for (entry = old_head->lh_first; entry != NULL; entry = next) { 2104 hashval = hash_string((const char *)entry->str, 0); 2105 head = &string_ref_table[hashval & string_table_mask]; 2106 if (head->lh_first == NULL) { 2107 filled_buckets++; 2108 } 2109 next = entry->hash_chain.le_next; 2110 LIST_INSERT_HEAD(head, entry, hash_chain); 2111 } 2112 } 2113 lck_rw_done(strtable_rw_lock); 2114 2115 FREE(old_table, M_CACHE); 2116} 2117 2118 2119static void 2120init_string_table(void) 2121{ 2122 string_ref_table = hashinit(CONFIG_VFS_NAMES, M_CACHE, &string_table_mask); 2123} 2124 2125 2126const char * 2127vfs_addname(const char *name, uint32_t len, u_int hashval, u_int flags) 2128{ 2129 return (add_name_internal(name, len, hashval, FALSE, flags)); 2130} 2131 2132 2133static const char * 2134add_name_internal(const char *name, uint32_t len, u_int hashval, boolean_t need_extra_ref, __unused u_int flags) 2135{ 2136 struct stringhead *head; 2137 string_t *entry; 2138 uint32_t chain_len = 0; 2139 uint32_t hash_index; 2140 uint32_t lock_index; 2141 char *ptr; 2142 2143 /* 2144 * if the length already accounts for the null-byte, then 2145 * subtract one so later on we don't index past the end 2146 * of the string. 2147 */ 2148 if (len > 0 && name[len-1] == '\0') { 2149 len--; 2150 } 2151 if (hashval == 0) { 2152 hashval = hash_string(name, len); 2153 } 2154 2155 /* 2156 * take this lock 'shared' to keep the hash stable 2157 * if someone else decides to grow the pool they 2158 * will take this lock exclusively 2159 */ 2160 lck_rw_lock_shared(strtable_rw_lock); 2161 2162 /* 2163 * If the table gets more than 3/4 full, resize it 2164 */ 2165 if (4 * filled_buckets >= ((string_table_mask + 1) * 3)) { 2166 lck_rw_done(strtable_rw_lock); 2167 2168 resize_string_ref_table(); 2169 2170 lck_rw_lock_shared(strtable_rw_lock); 2171 } 2172 hash_index = hashval & string_table_mask; 2173 lock_index = hash_index % NUM_STRCACHE_LOCKS; 2174 2175 head = &string_ref_table[hash_index]; 2176 2177 lck_mtx_lock_spin(&strcache_mtx_locks[lock_index]); 2178 2179 for (entry = head->lh_first; entry != NULL; chain_len++, entry = entry->hash_chain.le_next) { 2180 if (memcmp(entry->str, name, len) == 0 && entry->str[len] == 0) { 2181 entry->refcount++; 2182 break; 2183 } 2184 } 2185 if (entry == NULL) { 2186 lck_mtx_convert_spin(&strcache_mtx_locks[lock_index]); 2187 /* 2188 * it wasn't already there so add it. 2189 */ 2190 MALLOC(entry, string_t *, sizeof(string_t) + len + 1, M_TEMP, M_WAITOK); 2191 2192 if (head->lh_first == NULL) { 2193 OSAddAtomic(1, &filled_buckets); 2194 } 2195 ptr = (char *)((char *)entry + sizeof(string_t)); 2196 strncpy(ptr, name, len); 2197 ptr[len] = '\0'; 2198 entry->str = ptr; 2199 entry->refcount = 1; 2200 LIST_INSERT_HEAD(head, entry, hash_chain); 2201 } 2202 if (need_extra_ref == TRUE) 2203 entry->refcount++; 2204 2205 lck_mtx_unlock(&strcache_mtx_locks[lock_index]); 2206 lck_rw_done(strtable_rw_lock); 2207 2208 return (const char *)entry->str; 2209} 2210 2211 2212int 2213vfs_removename(const char *nameref) 2214{ 2215 struct stringhead *head; 2216 string_t *entry; 2217 uint32_t hashval; 2218 uint32_t hash_index; 2219 uint32_t lock_index; 2220 int retval = ENOENT; 2221 2222 hashval = hash_string(nameref, 0); 2223 2224 /* 2225 * take this lock 'shared' to keep the hash stable 2226 * if someone else decides to grow the pool they 2227 * will take this lock exclusively 2228 */ 2229 lck_rw_lock_shared(strtable_rw_lock); 2230 /* 2231 * must compute the head behind the table lock 2232 * since the size and location of the table 2233 * can change on the fly 2234 */ 2235 hash_index = hashval & string_table_mask; 2236 lock_index = hash_index % NUM_STRCACHE_LOCKS; 2237 2238 head = &string_ref_table[hash_index]; 2239 2240 lck_mtx_lock_spin(&strcache_mtx_locks[lock_index]); 2241 2242 for (entry = head->lh_first; entry != NULL; entry = entry->hash_chain.le_next) { 2243 if (entry->str == nameref) { 2244 entry->refcount--; 2245 2246 if (entry->refcount == 0) { 2247 LIST_REMOVE(entry, hash_chain); 2248 2249 if (head->lh_first == NULL) { 2250 OSAddAtomic(-1, &filled_buckets); 2251 } 2252 } else { 2253 entry = NULL; 2254 } 2255 retval = 0; 2256 break; 2257 } 2258 } 2259 lck_mtx_unlock(&strcache_mtx_locks[lock_index]); 2260 lck_rw_done(strtable_rw_lock); 2261 2262 if (entry != NULL) 2263 FREE(entry, M_TEMP); 2264 2265 return retval; 2266} 2267 2268 2269#ifdef DUMP_STRING_TABLE 2270void 2271dump_string_table(void) 2272{ 2273 struct stringhead *head; 2274 string_t *entry; 2275 u_long i; 2276 2277 lck_rw_lock_shared(strtable_rw_lock); 2278 2279 for (i = 0; i <= string_table_mask; i++) { 2280 head = &string_ref_table[i]; 2281 for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) { 2282 printf("%6d - %s\n", entry->refcount, entry->str); 2283 } 2284 } 2285 lck_rw_done(strtable_rw_lock); 2286} 2287#endif /* DUMP_STRING_TABLE */ 2288