1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 29/* 30 * Copyright (c) 1989, 1993, 1995 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Poul-Henning Kamp of the FreeBSD Project. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by the University of 47 * California, Berkeley and its contributors. 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * 65 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 66 */ 67/* 68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 69 * support for mandatory and extensible security protections. This notice 70 * is included in support of clause 2.2 (b) of the Apple Public License, 71 * Version 2.0. 72 */ 73#include <sys/param.h> 74#include <sys/systm.h> 75#include <sys/time.h> 76#include <sys/mount_internal.h> 77#include <sys/vnode_internal.h> 78#include <miscfs/specfs/specdev.h> 79#include <sys/namei.h> 80#include <sys/errno.h> 81#include <sys/malloc.h> 82#include <sys/kauth.h> 83#include <sys/user.h> 84#include <sys/paths.h> 85 86#if CONFIG_MACF 87#include <security/mac_framework.h> 88#endif 89 90/* 91 * Name caching works as follows: 92 * 93 * Names found by directory scans are retained in a cache 94 * for future reference. It is managed LRU, so frequently 95 * used names will hang around. Cache is indexed by hash value 96 * obtained from (vp, name) where vp refers to the directory 97 * containing name. 98 * 99 * If it is a "negative" entry, (i.e. for a name that is known NOT to 100 * exist) the vnode pointer will be NULL. 101 * 102 * Upon reaching the last segment of a path, if the reference 103 * is for DELETE, or NOCACHE is set (rewrite), and the 104 * name is located in the cache, it will be dropped. 105 */ 106 107/* 108 * Structures associated with name cacheing. 109 */ 110 111LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 112u_long nchashmask; 113u_long nchash; /* size of hash table - 1 */ 114long numcache; /* number of cache entries allocated */ 115int desiredNodes; 116int desiredNegNodes; 117int ncs_negtotal; 118int nc_disabled = 0; 119TAILQ_HEAD(, namecache) nchead; /* chain of all name cache entries */ 120TAILQ_HEAD(, namecache) neghead; /* chain of only negative cache entries */ 121 122 123#if COLLECT_STATS 124 125struct nchstats nchstats; /* cache effectiveness statistics */ 126 127#define NCHSTAT(v) { \ 128 nchstats.v++; \ 129} 130#define NAME_CACHE_LOCK() name_cache_lock() 131#define NAME_CACHE_UNLOCK() name_cache_unlock() 132#define NAME_CACHE_LOCK_SHARED() name_cache_lock() 133 134#else 135 136#define NCHSTAT(v) 137#define NAME_CACHE_LOCK() name_cache_lock() 138#define NAME_CACHE_UNLOCK() name_cache_unlock() 139#define NAME_CACHE_LOCK_SHARED() name_cache_lock_shared() 140 141#endif 142 143 144/* vars for name cache list lock */ 145lck_grp_t * namecache_lck_grp; 146lck_grp_attr_t * namecache_lck_grp_attr; 147lck_attr_t * namecache_lck_attr; 148 149lck_grp_t * strcache_lck_grp; 150lck_grp_attr_t * strcache_lck_grp_attr; 151lck_attr_t * strcache_lck_attr; 152 153lck_rw_t * namecache_rw_lock; 154lck_rw_t * strtable_rw_lock; 155 156#define NUM_STRCACHE_LOCKS 1024 157 158lck_mtx_t strcache_mtx_locks[NUM_STRCACHE_LOCKS]; 159 160 161static vnode_t cache_lookup_locked(vnode_t dvp, struct componentname *cnp); 162static const char *add_name_internal(const char *, uint32_t, u_int, boolean_t, u_int); 163static void init_string_table(void); 164static void cache_delete(struct namecache *, int); 165static void cache_enter_locked(vnode_t dvp, vnode_t vp, struct componentname *cnp, const char *strname); 166 167#ifdef DUMP_STRING_TABLE 168/* 169 * Internal dump function used for debugging 170 */ 171void dump_string_table(void); 172#endif /* DUMP_STRING_TABLE */ 173 174static void init_crc32(void); 175static unsigned int crc32tab[256]; 176 177 178#define NCHHASH(dvp, hash_val) \ 179 (&nchashtbl[(dvp->v_id ^ (hash_val)) & nchashmask]) 180 181 182 183/* 184 * This function builds the path to a filename in "buff". The 185 * length of the buffer *INCLUDING* the trailing zero byte is 186 * returned in outlen. NOTE: the length includes the trailing 187 * zero byte and thus the length is one greater than what strlen 188 * would return. This is important and lots of code elsewhere 189 * in the kernel assumes this behavior. 190 * 191 * This function can call vnop in file system if the parent vnode 192 * does not exist or when called for hardlinks via volfs path. 193 * If BUILDPATH_NO_FS_ENTER is set in flags, it only uses values present 194 * in the name cache and does not enter the file system. 195 * 196 * If BUILDPATH_CHECK_MOVED is set in flags, we return EAGAIN when 197 * we encounter ENOENT during path reconstruction. ENOENT means that 198 * one of the parents moved while we were building the path. The 199 * caller can special handle this case by calling build_path again. 200 * 201 * If BUILDPATH_VOLUME_RELATIVE is set in flags, we return path 202 * that is relative to the nearest mount point, i.e. do not 203 * cross over mount points during building the path. 204 * 205 * passed in vp must have a valid io_count reference 206 */ 207int 208build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs_context_t ctx) 209{ 210 vnode_t vp, tvp; 211 vnode_t vp_with_iocount; 212 vnode_t proc_root_dir_vp; 213 char *end; 214 const char *str; 215 int len; 216 int ret = 0; 217 int fixhardlink; 218 219 if (first_vp == NULLVP) 220 return (EINVAL); 221 222 /* 223 * Grab the process fd so we can evaluate fd_rdir. 224 */ 225 if (vfs_context_proc(ctx)->p_fd) 226 proc_root_dir_vp = vfs_context_proc(ctx)->p_fd->fd_rdir; 227 else 228 proc_root_dir_vp = NULL; 229 230 vp_with_iocount = NULLVP; 231again: 232 vp = first_vp; 233 234 end = &buff[buflen-1]; 235 *end = '\0'; 236 237 /* 238 * holding the NAME_CACHE_LOCK in shared mode is 239 * sufficient to stabilize both the vp->v_parent chain 240 * and the 'vp->v_mount->mnt_vnodecovered' chain 241 * 242 * if we need to drop this lock, we must first grab the v_id 243 * from the vnode we're currently working with... if that 244 * vnode doesn't already have an io_count reference (the vp 245 * passed in comes with one), we must grab a reference 246 * after we drop the NAME_CACHE_LOCK via vnode_getwithvid... 247 * deadlocks may result if you call vnode_get while holding 248 * the NAME_CACHE_LOCK... we lazily release the reference 249 * we pick up the next time we encounter a need to drop 250 * the NAME_CACHE_LOCK or before we return from this routine 251 */ 252 NAME_CACHE_LOCK_SHARED(); 253 254 /* 255 * Check if this is the root of a file system. 256 */ 257 while (vp && vp->v_flag & VROOT) { 258 if (vp->v_mount == NULL) { 259 ret = EINVAL; 260 goto out_unlock; 261 } 262 if ((vp->v_mount->mnt_flag & MNT_ROOTFS) || (vp == proc_root_dir_vp)) { 263 /* 264 * It's the root of the root file system, so it's 265 * just "/". 266 */ 267 *--end = '/'; 268 269 goto out_unlock; 270 } else { 271 /* 272 * This the root of the volume and the caller does not 273 * want to cross mount points. Therefore just return 274 * '/' as the relative path. 275 */ 276 if (flags & BUILDPATH_VOLUME_RELATIVE) { 277 *--end = '/'; 278 goto out_unlock; 279 } else { 280 vp = vp->v_mount->mnt_vnodecovered; 281 } 282 } 283 } 284 285 while ((vp != NULLVP) && (vp->v_parent != vp)) { 286 int vid; 287 288 /* 289 * For hardlinks the v_name may be stale, so if its OK 290 * to enter a file system, ask the file system for the 291 * name and parent (below). 292 */ 293 fixhardlink = (vp->v_flag & VISHARDLINK) && 294 (vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID) && 295 !(flags & BUILDPATH_NO_FS_ENTER); 296 297 if (!fixhardlink) { 298 str = vp->v_name; 299 300 if (str == NULL || *str == '\0') { 301 if (vp->v_parent != NULL) 302 ret = EINVAL; 303 else 304 ret = ENOENT; 305 goto out_unlock; 306 } 307 len = strlen(str); 308 /* 309 * Check that there's enough space (including space for the '/') 310 */ 311 if ((end - buff) < (len + 1)) { 312 ret = ENOSPC; 313 goto out_unlock; 314 } 315 /* 316 * Copy the name backwards. 317 */ 318 str += len; 319 320 for (; len > 0; len--) 321 *--end = *--str; 322 /* 323 * Add a path separator. 324 */ 325 *--end = '/'; 326 } 327 328 /* 329 * Walk up the parent chain. 330 */ 331 if (((vp->v_parent != NULLVP) && !fixhardlink) || 332 (flags & BUILDPATH_NO_FS_ENTER)) { 333 334 /* 335 * In this if () block we are not allowed to enter the filesystem 336 * to conclusively get the most accurate parent identifier. 337 * As a result, if 'vp' does not identify '/' and it 338 * does not have a valid v_parent, then error out 339 * and disallow further path construction 340 */ 341 if ((vp->v_parent == NULLVP) && (rootvnode != vp)) { 342 /* Only '/' is allowed to have a NULL parent pointer */ 343 ret = EINVAL; 344 345 /* The code below will exit early if 'tvp = vp' == NULL */ 346 } 347 vp = vp->v_parent; 348 349 /* 350 * if the vnode we have in hand isn't a directory and it 351 * has a v_parent, then we started with the resource fork 352 * so skip up to avoid getting a duplicate copy of the 353 * file name in the path. 354 */ 355 if (vp && !vnode_isdir(vp) && vp->v_parent) { 356 vp = vp->v_parent; 357 } 358 } else { 359 /* 360 * No parent, go get it if supported. 361 */ 362 struct vnode_attr va; 363 vnode_t dvp; 364 365 /* 366 * Make sure file system supports obtaining a path from id. 367 */ 368 if (!(vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID)) { 369 ret = ENOENT; 370 goto out_unlock; 371 } 372 vid = vp->v_id; 373 374 NAME_CACHE_UNLOCK(); 375 376 if (vp != first_vp && vp != vp_with_iocount) { 377 if (vp_with_iocount) { 378 vnode_put(vp_with_iocount); 379 vp_with_iocount = NULLVP; 380 } 381 if (vnode_getwithvid(vp, vid)) 382 goto again; 383 vp_with_iocount = vp; 384 } 385 VATTR_INIT(&va); 386 VATTR_WANTED(&va, va_parentid); 387 388 if (fixhardlink) { 389 VATTR_WANTED(&va, va_name); 390 MALLOC_ZONE(va.va_name, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); 391 } else { 392 va.va_name = NULL; 393 } 394 /* 395 * Ask the file system for its parent id and for its name (optional). 396 */ 397 ret = vnode_getattr(vp, &va, ctx); 398 399 if (fixhardlink) { 400 if ((ret == 0) && (VATTR_IS_SUPPORTED(&va, va_name))) { 401 str = va.va_name; 402 vnode_update_identity(vp, NULL, str, strlen(str), 0, VNODE_UPDATE_NAME); 403 } else if (vp->v_name) { 404 str = vp->v_name; 405 ret = 0; 406 } else { 407 ret = ENOENT; 408 goto bad_news; 409 } 410 len = strlen(str); 411 412 /* 413 * Check that there's enough space. 414 */ 415 if ((end - buff) < (len + 1)) { 416 ret = ENOSPC; 417 } else { 418 /* Copy the name backwards. */ 419 str += len; 420 421 for (; len > 0; len--) { 422 *--end = *--str; 423 } 424 /* 425 * Add a path separator. 426 */ 427 *--end = '/'; 428 } 429bad_news: 430 FREE_ZONE(va.va_name, MAXPATHLEN, M_NAMEI); 431 } 432 if (ret || !VATTR_IS_SUPPORTED(&va, va_parentid)) { 433 ret = ENOENT; 434 goto out; 435 } 436 /* 437 * Ask the file system for the parent vnode. 438 */ 439 if ((ret = VFS_VGET(vp->v_mount, (ino64_t)va.va_parentid, &dvp, ctx))) 440 goto out; 441 442 if (!fixhardlink && (vp->v_parent != dvp)) 443 vnode_update_identity(vp, dvp, NULL, 0, 0, VNODE_UPDATE_PARENT); 444 445 if (vp_with_iocount) 446 vnode_put(vp_with_iocount); 447 vp = dvp; 448 vp_with_iocount = vp; 449 450 NAME_CACHE_LOCK_SHARED(); 451 452 /* 453 * if the vnode we have in hand isn't a directory and it 454 * has a v_parent, then we started with the resource fork 455 * so skip up to avoid getting a duplicate copy of the 456 * file name in the path. 457 */ 458 if (vp && !vnode_isdir(vp) && vp->v_parent) 459 vp = vp->v_parent; 460 } 461 462 /* 463 * When a mount point is crossed switch the vp. 464 * Continue until we find the root or we find 465 * a vnode that's not the root of a mounted 466 * file system. 467 */ 468 tvp = vp; 469 470 while (tvp) { 471 if (tvp == proc_root_dir_vp) 472 goto out_unlock; /* encountered the root */ 473 474 if (!(tvp->v_flag & VROOT) || !tvp->v_mount) 475 break; /* not the root of a mounted FS */ 476 477 if (flags & BUILDPATH_VOLUME_RELATIVE) { 478 /* Do not cross over mount points */ 479 tvp = NULL; 480 } else { 481 tvp = tvp->v_mount->mnt_vnodecovered; 482 } 483 } 484 if (tvp == NULLVP) 485 goto out_unlock; 486 vp = tvp; 487 488 if (vp && (flags & BUILDPATH_CHECKACCESS)) { 489 vid = vp->v_id; 490 491 NAME_CACHE_UNLOCK(); 492 493 if (vp != first_vp && vp != vp_with_iocount) { 494 if (vp_with_iocount) { 495 vnode_put(vp_with_iocount); 496 vp_with_iocount = NULLVP; 497 } 498 if (vnode_getwithvid(vp, vid)) 499 goto again; 500 vp_with_iocount = vp; 501 } 502 if ((ret = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx))) 503 goto out; /* no peeking */ 504 505 NAME_CACHE_LOCK_SHARED(); 506 } 507 } 508out_unlock: 509 NAME_CACHE_UNLOCK(); 510out: 511 if (vp_with_iocount) 512 vnode_put(vp_with_iocount); 513 /* 514 * Slide the name down to the beginning of the buffer. 515 */ 516 memmove(buff, end, &buff[buflen] - end); 517 518 /* 519 * length includes the trailing zero byte 520 */ 521 *outlen = &buff[buflen] - end; 522 523 /* One of the parents was moved during path reconstruction. 524 * The caller is interested in knowing whether any of the 525 * parents moved via BUILDPATH_CHECK_MOVED, so return EAGAIN. 526 */ 527 if ((ret == ENOENT) && (flags & BUILDPATH_CHECK_MOVED)) { 528 ret = EAGAIN; 529 } 530 531 return (ret); 532} 533 534 535/* 536 * return NULLVP if vp's parent doesn't 537 * exist, or we can't get a valid iocount 538 * else return the parent of vp 539 */ 540vnode_t 541vnode_getparent(vnode_t vp) 542{ 543 vnode_t pvp = NULLVP; 544 int pvid; 545 546 NAME_CACHE_LOCK_SHARED(); 547 /* 548 * v_parent is stable behind the name_cache lock 549 * however, the only thing we can really guarantee 550 * is that we've grabbed a valid iocount on the 551 * parent of 'vp' at the time we took the name_cache lock... 552 * once we drop the lock, vp could get re-parented 553 */ 554 if ( (pvp = vp->v_parent) != NULLVP ) { 555 pvid = pvp->v_id; 556 557 NAME_CACHE_UNLOCK(); 558 559 if (vnode_getwithvid(pvp, pvid) != 0) 560 pvp = NULL; 561 } else 562 NAME_CACHE_UNLOCK(); 563 return (pvp); 564} 565 566const char * 567vnode_getname(vnode_t vp) 568{ 569 const char *name = NULL; 570 571 NAME_CACHE_LOCK_SHARED(); 572 573 if (vp->v_name) 574 name = vfs_addname(vp->v_name, strlen(vp->v_name), 0, 0); 575 NAME_CACHE_UNLOCK(); 576 577 return (name); 578} 579 580void 581vnode_putname(const char *name) 582{ 583 vfs_removename(name); 584} 585 586static const char unknown_vnodename[] = "(unknown vnode name)"; 587 588const char * 589vnode_getname_printable(vnode_t vp) 590{ 591 const char *name = vnode_getname(vp); 592 if (name != NULL) 593 return name; 594 595 switch (vp->v_type) { 596 case VCHR: 597 case VBLK: 598 { 599 /* 600 * Create an artificial dev name from 601 * major and minor device number 602 */ 603 char dev_name[64]; 604 (void) snprintf(dev_name, sizeof(dev_name), 605 "%c(%u, %u)", VCHR == vp->v_type ? 'c':'b', 606 major(vp->v_rdev), minor(vp->v_rdev)); 607 /* 608 * Add the newly created dev name to the name 609 * cache to allow easier cleanup. Also, 610 * vfs_addname allocates memory for the new name 611 * and returns it. 612 */ 613 NAME_CACHE_LOCK_SHARED(); 614 name = vfs_addname(dev_name, strlen(dev_name), 0, 0); 615 NAME_CACHE_UNLOCK(); 616 return name; 617 } 618 default: 619 return unknown_vnodename; 620 } 621} 622 623void 624vnode_putname_printable(const char *name) 625{ 626 if (name == unknown_vnodename) 627 return; 628 vnode_putname(name); 629} 630 631 632/* 633 * if VNODE_UPDATE_PARENT, and we can take 634 * a reference on dvp, then update vp with 635 * it's new parent... if vp already has a parent, 636 * then drop the reference vp held on it 637 * 638 * if VNODE_UPDATE_NAME, 639 * then drop string ref on v_name if it exists, and if name is non-NULL 640 * then pick up a string reference on name and record it in v_name... 641 * optionally pass in the length and hashval of name if known 642 * 643 * if VNODE_UPDATE_CACHE, flush the name cache entries associated with vp 644 */ 645void 646vnode_update_identity(vnode_t vp, vnode_t dvp, const char *name, int name_len, uint32_t name_hashval, int flags) 647{ 648 struct namecache *ncp; 649 vnode_t old_parentvp = NULLVP; 650#if NAMEDSTREAMS 651 int isstream = (vp->v_flag & VISNAMEDSTREAM); 652 int kusecountbumped = 0; 653#endif 654 kauth_cred_t tcred = NULL; 655 const char *vname = NULL; 656 const char *tname = NULL; 657 658 if (flags & VNODE_UPDATE_PARENT) { 659 if (dvp && vnode_ref(dvp) != 0) { 660 dvp = NULLVP; 661 } 662#if NAMEDSTREAMS 663 /* Don't count a stream's parent ref during unmounts */ 664 if (isstream && dvp && (dvp != vp) && (dvp != vp->v_parent) && (dvp->v_type == VREG)) { 665 vnode_lock_spin(dvp); 666 ++dvp->v_kusecount; 667 kusecountbumped = 1; 668 vnode_unlock(dvp); 669 } 670#endif 671 } else { 672 dvp = NULLVP; 673 } 674 if ( (flags & VNODE_UPDATE_NAME) ) { 675 if (name != vp->v_name) { 676 if (name && *name) { 677 if (name_len == 0) 678 name_len = strlen(name); 679 tname = vfs_addname(name, name_len, name_hashval, 0); 680 } 681 } else 682 flags &= ~VNODE_UPDATE_NAME; 683 } 684 if ( (flags & (VNODE_UPDATE_PURGE | VNODE_UPDATE_PARENT | VNODE_UPDATE_CACHE | VNODE_UPDATE_NAME)) ) { 685 686 NAME_CACHE_LOCK(); 687 688 if ( (flags & VNODE_UPDATE_PURGE) ) { 689 690 if (vp->v_parent) 691 vp->v_parent->v_nc_generation++; 692 693 while ( (ncp = LIST_FIRST(&vp->v_nclinks)) ) 694 cache_delete(ncp, 1); 695 696 while ( (ncp = LIST_FIRST(&vp->v_ncchildren)) ) 697 cache_delete(ncp, 1); 698 699 /* 700 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held 701 */ 702 tcred = vp->v_cred; 703 vp->v_cred = NOCRED; 704 vp->v_authorized_actions = 0; 705 } 706 if ( (flags & VNODE_UPDATE_NAME) ) { 707 vname = vp->v_name; 708 vp->v_name = tname; 709 } 710 if (flags & VNODE_UPDATE_PARENT) { 711 if (dvp != vp && dvp != vp->v_parent) { 712 old_parentvp = vp->v_parent; 713 vp->v_parent = dvp; 714 dvp = NULLVP; 715 716 if (old_parentvp) 717 flags |= VNODE_UPDATE_CACHE; 718 } 719 } 720 if (flags & VNODE_UPDATE_CACHE) { 721 while ( (ncp = LIST_FIRST(&vp->v_nclinks)) ) 722 cache_delete(ncp, 1); 723 } 724 NAME_CACHE_UNLOCK(); 725 726 if (vname != NULL) 727 vfs_removename(vname); 728 729 if (IS_VALID_CRED(tcred)) 730 kauth_cred_unref(&tcred); 731 } 732 if (dvp != NULLVP) { 733#if NAMEDSTREAMS 734 /* Back-out the ref we took if we lost a race for vp->v_parent. */ 735 if (kusecountbumped) { 736 vnode_lock_spin(dvp); 737 if (dvp->v_kusecount > 0) 738 --dvp->v_kusecount; 739 vnode_unlock(dvp); 740 } 741#endif 742 vnode_rele(dvp); 743 } 744 if (old_parentvp) { 745 struct uthread *ut; 746 747#if NAMEDSTREAMS 748 if (isstream) { 749 vnode_lock_spin(old_parentvp); 750 if ((old_parentvp->v_type != VDIR) && (old_parentvp->v_kusecount > 0)) 751 --old_parentvp->v_kusecount; 752 vnode_unlock(old_parentvp); 753 } 754#endif 755 ut = get_bsdthread_info(current_thread()); 756 757 /* 758 * indicated to vnode_rele that it shouldn't do a 759 * vnode_reclaim at this time... instead it will 760 * chain the vnode to the uu_vreclaims list... 761 * we'll be responsible for calling vnode_reclaim 762 * on each of the vnodes in this list... 763 */ 764 ut->uu_defer_reclaims = 1; 765 ut->uu_vreclaims = NULLVP; 766 767 while ( (vp = old_parentvp) != NULLVP ) { 768 769 vnode_lock_spin(vp); 770 vnode_rele_internal(vp, 0, 0, 1); 771 772 /* 773 * check to see if the vnode is now in the state 774 * that would have triggered a vnode_reclaim in vnode_rele 775 * if it is, we save it's parent pointer and then NULL 776 * out the v_parent field... we'll drop the reference 777 * that was held on the next iteration of this loop... 778 * this short circuits a potential deep recursion if we 779 * have a long chain of parents in this state... 780 * we'll sit in this loop until we run into 781 * a parent in this chain that is not in this state 782 * 783 * make our check and the vnode_rele atomic 784 * with respect to the current vnode we're working on 785 * by holding the vnode lock 786 * if vnode_rele deferred the vnode_reclaim and has put 787 * this vnode on the list to be reaped by us, than 788 * it has left this vnode with an iocount == 1 789 */ 790 if ( (vp->v_iocount == 1) && (vp->v_usecount == 0) && 791 ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD)) == VL_MARKTERM)) { 792 /* 793 * vnode_rele wanted to do a vnode_reclaim on this vnode 794 * it should be sitting on the head of the uu_vreclaims chain 795 * pull the parent pointer now so that when we do the 796 * vnode_reclaim for each of the vnodes in the uu_vreclaims 797 * list, we won't recurse back through here 798 * 799 * need to do a convert here in case vnode_rele_internal 800 * returns with the lock held in the spin mode... it 801 * can drop and retake the lock under certain circumstances 802 */ 803 vnode_lock_convert(vp); 804 805 NAME_CACHE_LOCK(); 806 old_parentvp = vp->v_parent; 807 vp->v_parent = NULLVP; 808 NAME_CACHE_UNLOCK(); 809 } else { 810 /* 811 * we're done... we ran into a vnode that isn't 812 * being terminated 813 */ 814 old_parentvp = NULLVP; 815 } 816 vnode_unlock(vp); 817 } 818 ut->uu_defer_reclaims = 0; 819 820 while ( (vp = ut->uu_vreclaims) != NULLVP) { 821 ut->uu_vreclaims = vp->v_defer_reclaimlist; 822 823 /* 824 * vnode_put will drive the vnode_reclaim if 825 * we are still the only reference on this vnode 826 */ 827 vnode_put(vp); 828 } 829 } 830} 831 832 833/* 834 * Mark a vnode as having multiple hard links. HFS makes use of this 835 * because it keeps track of each link separately, and wants to know 836 * which link was actually used. 837 * 838 * This will cause the name cache to force a VNOP_LOOKUP on the vnode 839 * so that HFS can post-process the lookup. Also, volfs will call 840 * VNOP_GETATTR2 to determine the parent, instead of using v_parent. 841 */ 842void vnode_setmultipath(vnode_t vp) 843{ 844 vnode_lock_spin(vp); 845 846 /* 847 * In theory, we're changing the vnode's identity as far as the 848 * name cache is concerned, so we ought to grab the name cache lock 849 * here. However, there is already a race, and grabbing the name 850 * cache lock only makes the race window slightly smaller. 851 * 852 * The race happens because the vnode already exists in the name 853 * cache, and could be found by one thread before another thread 854 * can set the hard link flag. 855 */ 856 857 vp->v_flag |= VISHARDLINK; 858 859 vnode_unlock(vp); 860} 861 862 863 864/* 865 * backwards compatibility 866 */ 867void vnode_uncache_credentials(vnode_t vp) 868{ 869 vnode_uncache_authorized_action(vp, KAUTH_INVALIDATE_CACHED_RIGHTS); 870} 871 872 873/* 874 * use the exclusive form of NAME_CACHE_LOCK to protect the update of the 875 * following fields in the vnode: v_cred_timestamp, v_cred, v_authorized_actions 876 * we use this lock so that we can look at the v_cred and v_authorized_actions 877 * atomically while behind the NAME_CACHE_LOCK in shared mode in 'cache_lookup_path', 878 * which is the super-hot path... if we are updating the authorized actions for this 879 * vnode, we are already in the super-slow and far less frequented path so its not 880 * that bad that we take the lock exclusive for this case... of course we strive 881 * to hold it for the minimum amount of time possible 882 */ 883 884void vnode_uncache_authorized_action(vnode_t vp, kauth_action_t action) 885{ 886 kauth_cred_t tcred = NOCRED; 887 888 NAME_CACHE_LOCK(); 889 890 vp->v_authorized_actions &= ~action; 891 892 if (action == KAUTH_INVALIDATE_CACHED_RIGHTS && 893 IS_VALID_CRED(vp->v_cred)) { 894 /* 895 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held 896 */ 897 tcred = vp->v_cred; 898 vp->v_cred = NOCRED; 899 } 900 NAME_CACHE_UNLOCK(); 901 902 if (tcred != NOCRED) 903 kauth_cred_unref(&tcred); 904} 905 906 907extern int bootarg_vnode_cache_defeat; /* default = 0, from bsd_init.c */ 908 909boolean_t 910vnode_cache_is_authorized(vnode_t vp, vfs_context_t ctx, kauth_action_t action) 911{ 912 kauth_cred_t ucred; 913 boolean_t retval = FALSE; 914 915 /* Boot argument to defeat rights caching */ 916 if (bootarg_vnode_cache_defeat) 917 return FALSE; 918 919 if ( (vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) { 920 /* 921 * a TTL is enabled on the rights cache... handle it here 922 * a TTL of 0 indicates that no rights should be cached 923 */ 924 if (vp->v_mount->mnt_authcache_ttl) { 925 if ( !(vp->v_mount->mnt_kern_flag & MNTK_AUTH_CACHE_TTL) ) { 926 /* 927 * For filesystems marked only MNTK_AUTH_OPAQUE (generally network ones), 928 * we will only allow a SEARCH right on a directory to be cached... 929 * that cached right always has a default TTL associated with it 930 */ 931 if (action != KAUTH_VNODE_SEARCH || vp->v_type != VDIR) 932 vp = NULLVP; 933 } 934 if (vp != NULLVP && vnode_cache_is_stale(vp) == TRUE) { 935 vnode_uncache_authorized_action(vp, vp->v_authorized_actions); 936 vp = NULLVP; 937 } 938 } else 939 vp = NULLVP; 940 } 941 if (vp != NULLVP) { 942 ucred = vfs_context_ucred(ctx); 943 944 NAME_CACHE_LOCK_SHARED(); 945 946 if (vp->v_cred == ucred && (vp->v_authorized_actions & action) == action) 947 retval = TRUE; 948 949 NAME_CACHE_UNLOCK(); 950 } 951 return retval; 952} 953 954 955void vnode_cache_authorized_action(vnode_t vp, vfs_context_t ctx, kauth_action_t action) 956{ 957 kauth_cred_t tcred = NOCRED; 958 kauth_cred_t ucred; 959 struct timeval tv; 960 boolean_t ttl_active = FALSE; 961 962 ucred = vfs_context_ucred(ctx); 963 964 if (!IS_VALID_CRED(ucred) || action == 0) 965 return; 966 967 if ( (vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) { 968 /* 969 * a TTL is enabled on the rights cache... handle it here 970 * a TTL of 0 indicates that no rights should be cached 971 */ 972 if (vp->v_mount->mnt_authcache_ttl == 0) 973 return; 974 975 if ( !(vp->v_mount->mnt_kern_flag & MNTK_AUTH_CACHE_TTL) ) { 976 /* 977 * only cache SEARCH action for filesystems marked 978 * MNTK_AUTH_OPAQUE on VDIRs... 979 * the lookup_path code will time these out 980 */ 981 if ( (action & ~KAUTH_VNODE_SEARCH) || vp->v_type != VDIR ) 982 return; 983 } 984 ttl_active = TRUE; 985 986 microuptime(&tv); 987 } 988 NAME_CACHE_LOCK(); 989 990 if (vp->v_cred != ucred) { 991 kauth_cred_ref(ucred); 992 /* 993 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held 994 */ 995 tcred = vp->v_cred; 996 vp->v_cred = ucred; 997 vp->v_authorized_actions = 0; 998 } 999 if (ttl_active == TRUE && vp->v_authorized_actions == 0) { 1000 /* 1001 * only reset the timestamnp on the 1002 * first authorization cached after the previous 1003 * timer has expired or we're switching creds... 1004 * 'vnode_cache_is_authorized' will clear the 1005 * authorized actions if the TTL is active and 1006 * it has expired 1007 */ 1008 vp->v_cred_timestamp = tv.tv_sec; 1009 } 1010 vp->v_authorized_actions |= action; 1011 1012 NAME_CACHE_UNLOCK(); 1013 1014 if (IS_VALID_CRED(tcred)) 1015 kauth_cred_unref(&tcred); 1016} 1017 1018 1019boolean_t vnode_cache_is_stale(vnode_t vp) 1020{ 1021 struct timeval tv; 1022 boolean_t retval; 1023 1024 microuptime(&tv); 1025 1026 if ((tv.tv_sec - vp->v_cred_timestamp) > vp->v_mount->mnt_authcache_ttl) 1027 retval = TRUE; 1028 else 1029 retval = FALSE; 1030 1031 return retval; 1032} 1033 1034 1035 1036/* 1037 * Returns: 0 Success 1038 * ERECYCLE vnode was recycled from underneath us. Force lookup to be re-driven from namei. 1039 * This errno value should not be seen by anyone outside of the kernel. 1040 */ 1041int 1042cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, 1043 vfs_context_t ctx, int *dp_authorized, vnode_t last_dp) 1044{ 1045 char *cp; /* pointer into pathname argument */ 1046 int vid; 1047 int vvid = 0; /* protected by vp != NULLVP */ 1048 vnode_t vp = NULLVP; 1049 vnode_t tdp = NULLVP; 1050 kauth_cred_t ucred; 1051 boolean_t ttl_enabled = FALSE; 1052 struct timeval tv; 1053 mount_t mp; 1054 unsigned int hash; 1055 int error = 0; 1056 1057#if CONFIG_TRIGGERS 1058 vnode_t trigger_vp; 1059#endif /* CONFIG_TRIGGERS */ 1060 1061 ucred = vfs_context_ucred(ctx); 1062 ndp->ni_flag &= ~(NAMEI_TRAILINGSLASH); 1063 1064 NAME_CACHE_LOCK_SHARED(); 1065 1066 if ( dp->v_mount && (dp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) { 1067 ttl_enabled = TRUE; 1068 microuptime(&tv); 1069 } 1070 for (;;) { 1071 /* 1072 * Search a directory. 1073 * 1074 * The cn_hash value is for use by cache_lookup 1075 * The last component of the filename is left accessible via 1076 * cnp->cn_nameptr for callers that need the name. 1077 */ 1078 hash = 0; 1079 cp = cnp->cn_nameptr; 1080 1081 while (*cp && (*cp != '/')) { 1082 hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8; 1083 } 1084 /* 1085 * the crc generator can legitimately generate 1086 * a 0... however, 0 for us means that we 1087 * haven't computed a hash, so use 1 instead 1088 */ 1089 if (hash == 0) 1090 hash = 1; 1091 cnp->cn_hash = hash; 1092 cnp->cn_namelen = cp - cnp->cn_nameptr; 1093 1094 ndp->ni_pathlen -= cnp->cn_namelen; 1095 ndp->ni_next = cp; 1096 1097 /* 1098 * Replace multiple slashes by a single slash and trailing slashes 1099 * by a null. This must be done before VNOP_LOOKUP() because some 1100 * fs's don't know about trailing slashes. Remember if there were 1101 * trailing slashes to handle symlinks, existing non-directories 1102 * and non-existing files that won't be directories specially later. 1103 */ 1104 while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) { 1105 cp++; 1106 ndp->ni_pathlen--; 1107 1108 if (*cp == '\0') { 1109 ndp->ni_flag |= NAMEI_TRAILINGSLASH; 1110 *ndp->ni_next = '\0'; 1111 } 1112 } 1113 ndp->ni_next = cp; 1114 1115 cnp->cn_flags &= ~(MAKEENTRY | ISLASTCN | ISDOTDOT); 1116 1117 if (*cp == '\0') 1118 cnp->cn_flags |= ISLASTCN; 1119 1120 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') 1121 cnp->cn_flags |= ISDOTDOT; 1122 1123 *dp_authorized = 0; 1124#if NAMEDRSRCFORK 1125 /* 1126 * Process a request for a file's resource fork. 1127 * 1128 * Consume the _PATH_RSRCFORKSPEC suffix and tag the path. 1129 */ 1130 if ((ndp->ni_pathlen == sizeof(_PATH_RSRCFORKSPEC)) && 1131 (cp[1] == '.' && cp[2] == '.') && 1132 bcmp(cp, _PATH_RSRCFORKSPEC, sizeof(_PATH_RSRCFORKSPEC)) == 0) { 1133 /* Skip volfs file systems that don't support native streams. */ 1134 if ((dp->v_mount != NULL) && 1135 (dp->v_mount->mnt_flag & MNT_DOVOLFS) && 1136 (dp->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0) { 1137 goto skiprsrcfork; 1138 } 1139 cnp->cn_flags |= CN_WANTSRSRCFORK; 1140 cnp->cn_flags |= ISLASTCN; 1141 ndp->ni_next[0] = '\0'; 1142 ndp->ni_pathlen = 1; 1143 } 1144skiprsrcfork: 1145#endif 1146 1147#if CONFIG_MACF 1148 1149 /* 1150 * Name cache provides authorization caching (see below) 1151 * that will short circuit MAC checks in lookup(). 1152 * We must perform MAC check here. On denial 1153 * dp_authorized will remain 0 and second check will 1154 * be perfomed in lookup(). 1155 */ 1156 if (!(cnp->cn_flags & DONOTAUTH)) { 1157 error = mac_vnode_check_lookup(ctx, dp, cnp); 1158 if (error) { 1159 NAME_CACHE_UNLOCK(); 1160 goto errorout; 1161 } 1162 } 1163#endif /* MAC */ 1164 if (ttl_enabled && ((tv.tv_sec - dp->v_cred_timestamp) > dp->v_mount->mnt_authcache_ttl)) 1165 break; 1166 1167 /* 1168 * NAME_CACHE_LOCK holds these fields stable 1169 */ 1170 if ((dp->v_cred != ucred || !(dp->v_authorized_actions & KAUTH_VNODE_SEARCH)) && 1171 !(dp->v_authorized_actions & KAUTH_VNODE_SEARCHBYANYONE)) 1172 break; 1173 1174 /* 1175 * indicate that we're allowed to traverse this directory... 1176 * even if we fail the cache lookup or decide to bail for 1177 * some other reason, this information is valid and is used 1178 * to avoid doing a vnode_authorize before the call to VNOP_LOOKUP 1179 */ 1180 *dp_authorized = 1; 1181 1182 if ( (cnp->cn_flags & (ISLASTCN | ISDOTDOT)) ) { 1183 if (cnp->cn_nameiop != LOOKUP) 1184 break; 1185 if (cnp->cn_flags & LOCKPARENT) 1186 break; 1187 if (cnp->cn_flags & NOCACHE) 1188 break; 1189 if (cnp->cn_flags & ISDOTDOT) { 1190 /* 1191 * Force directory hardlinks to go to 1192 * file system for ".." requests. 1193 */ 1194 if (dp && (dp->v_flag & VISHARDLINK)) { 1195 break; 1196 } 1197 /* 1198 * Quit here only if we can't use 1199 * the parent directory pointer or 1200 * don't have one. Otherwise, we'll 1201 * use it below. 1202 */ 1203 if ((dp->v_flag & VROOT) || 1204 dp == ndp->ni_rootdir || 1205 dp->v_parent == NULLVP) 1206 break; 1207 } 1208 } 1209 1210 if ((cnp->cn_flags & CN_SKIPNAMECACHE)) { 1211 /* 1212 * Force lookup to go to the filesystem with 1213 * all cnp fields set up. 1214 */ 1215 break; 1216 } 1217 1218 /* 1219 * "." and ".." aren't supposed to be cached, so check 1220 * for them before checking the cache. 1221 */ 1222 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') 1223 vp = dp; 1224 else if ( (cnp->cn_flags & ISDOTDOT) ) 1225 vp = dp->v_parent; 1226 else { 1227 if ( (vp = cache_lookup_locked(dp, cnp)) == NULLVP) 1228 break; 1229 1230 if ( (vp->v_flag & VISHARDLINK) ) { 1231 /* 1232 * The file system wants a VNOP_LOOKUP on this vnode 1233 */ 1234 vp = NULL; 1235 break; 1236 } 1237 } 1238 if ( (cnp->cn_flags & ISLASTCN) ) 1239 break; 1240 1241 if (vp->v_type != VDIR) { 1242 if (vp->v_type != VLNK) 1243 vp = NULL; 1244 break; 1245 } 1246 1247 if ( (mp = vp->v_mountedhere) && ((cnp->cn_flags & NOCROSSMOUNT) == 0)) { 1248 1249 if (mp->mnt_realrootvp == NULLVP || mp->mnt_generation != mount_generation || 1250 mp->mnt_realrootvp_vid != mp->mnt_realrootvp->v_id) 1251 break; 1252 vp = mp->mnt_realrootvp; 1253 } 1254 1255#if CONFIG_TRIGGERS 1256 /* 1257 * After traversing all mountpoints stacked here, if we have a 1258 * trigger in hand, resolve it. Note that we don't need to 1259 * leave the fast path if the mount has already happened. 1260 */ 1261 if ((vp->v_resolve != NULL) && 1262 (vp->v_resolve->vr_resolve_func != NULL)) { 1263 break; 1264 } 1265#endif /* CONFIG_TRIGGERS */ 1266 1267 1268 dp = vp; 1269 vp = NULLVP; 1270 1271 cnp->cn_nameptr = ndp->ni_next + 1; 1272 ndp->ni_pathlen--; 1273 while (*cnp->cn_nameptr == '/') { 1274 cnp->cn_nameptr++; 1275 ndp->ni_pathlen--; 1276 } 1277 } 1278 if (vp != NULLVP) 1279 vvid = vp->v_id; 1280 vid = dp->v_id; 1281 1282 NAME_CACHE_UNLOCK(); 1283 1284 if ((vp != NULLVP) && (vp->v_type != VLNK) && 1285 ((cnp->cn_flags & (ISLASTCN | LOCKPARENT | WANTPARENT | SAVESTART)) == ISLASTCN)) { 1286 /* 1287 * if we've got a child and it's the last component, and 1288 * the lookup doesn't need to return the parent then we 1289 * can skip grabbing an iocount on the parent, since all 1290 * we're going to do with it is a vnode_put just before 1291 * we return from 'lookup'. If it's a symbolic link, 1292 * we need the parent in case the link happens to be 1293 * a relative pathname. 1294 */ 1295 tdp = dp; 1296 dp = NULLVP; 1297 } else { 1298need_dp: 1299 /* 1300 * return the last directory we looked at 1301 * with an io reference held. If it was the one passed 1302 * in as a result of the last iteration of VNOP_LOOKUP, 1303 * it should already hold an io ref. No need to increase ref. 1304 */ 1305 if (last_dp != dp){ 1306 1307 if (dp == ndp->ni_usedvp) { 1308 /* 1309 * if this vnode matches the one passed in via USEDVP 1310 * than this context already holds an io_count... just 1311 * use vnode_get to get an extra ref for lookup to play 1312 * with... can't use the getwithvid variant here because 1313 * it will block behind a vnode_drain which would result 1314 * in a deadlock (since we already own an io_count that the 1315 * vnode_drain is waiting on)... vnode_get grabs the io_count 1316 * immediately w/o waiting... it always succeeds 1317 */ 1318 vnode_get(dp); 1319 } else if ((error = vnode_getwithvid_drainok(dp, vid))) { 1320 /* 1321 * failure indicates the vnode 1322 * changed identity or is being 1323 * TERMINATED... in either case 1324 * punt this lookup. 1325 * 1326 * don't necessarily return ENOENT, though, because 1327 * we really want to go back to disk and make sure it's 1328 * there or not if someone else is changing this 1329 * vnode. That being said, the one case where we do want 1330 * to return ENOENT is when the vnode's mount point is 1331 * in the process of unmounting and we might cause a deadlock 1332 * in our attempt to take an iocount. An ENODEV error return 1333 * is from vnode_get* is an indication this but we change that 1334 * ENOENT for upper layers. 1335 */ 1336 if (error == ENODEV) { 1337 error = ENOENT; 1338 } else { 1339 error = ERECYCLE; 1340 } 1341 goto errorout; 1342 } 1343 } 1344 } 1345 if (vp != NULLVP) { 1346 if ( (vnode_getwithvid_drainok(vp, vvid)) ) { 1347 vp = NULLVP; 1348 1349 /* 1350 * can't get reference on the vp we'd like 1351 * to return... if we didn't grab a reference 1352 * on the directory (due to fast path bypass), 1353 * then we need to do it now... we can't return 1354 * with both ni_dvp and ni_vp NULL, and no 1355 * error condition 1356 */ 1357 if (dp == NULLVP) { 1358 dp = tdp; 1359 goto need_dp; 1360 } 1361 } 1362 } 1363 1364 ndp->ni_dvp = dp; 1365 ndp->ni_vp = vp; 1366 1367#if CONFIG_TRIGGERS 1368 trigger_vp = vp ? vp : dp; 1369 if ((error == 0) && (trigger_vp != NULLVP) && vnode_isdir(trigger_vp)) { 1370 error = vnode_trigger_resolve(trigger_vp, ndp, ctx); 1371 if (error) { 1372 if (vp) 1373 vnode_put(vp); 1374 if (dp) 1375 vnode_put(dp); 1376 goto errorout; 1377 } 1378 } 1379#endif /* CONFIG_TRIGGERS */ 1380 1381errorout: 1382 /* 1383 * If we came into cache_lookup_path after an iteration of the lookup loop that 1384 * resulted in a call to VNOP_LOOKUP, then VNOP_LOOKUP returned a vnode with a io ref 1385 * on it. It is now the job of cache_lookup_path to drop the ref on this vnode 1386 * when it is no longer needed. If we get to this point, and last_dp is not NULL 1387 * and it is ALSO not the dvp we want to return to caller of this function, it MUST be 1388 * the case that we got to a subsequent path component and this previous vnode is 1389 * no longer needed. We can then drop the io ref on it. 1390 */ 1391 if ((last_dp != NULLVP) && (last_dp != ndp->ni_dvp)){ 1392 vnode_put(last_dp); 1393 } 1394 1395 //initialized to 0, should be the same if no error cases occurred. 1396 return error; 1397} 1398 1399 1400static vnode_t 1401cache_lookup_locked(vnode_t dvp, struct componentname *cnp) 1402{ 1403 struct namecache *ncp; 1404 struct nchashhead *ncpp; 1405 long namelen = cnp->cn_namelen; 1406 unsigned int hashval = (cnp->cn_hash & NCHASHMASK); 1407 1408 if (nc_disabled) { 1409 return NULL; 1410 } 1411 1412 ncpp = NCHHASH(dvp, cnp->cn_hash); 1413 LIST_FOREACH(ncp, ncpp, nc_hash) { 1414 if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) { 1415 if (memcmp(ncp->nc_name, cnp->cn_nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0) 1416 break; 1417 } 1418 } 1419 if (ncp == 0) { 1420 /* 1421 * We failed to find an entry 1422 */ 1423 NCHSTAT(ncs_miss); 1424 return (NULL); 1425 } 1426 NCHSTAT(ncs_goodhits); 1427 1428 return (ncp->nc_vp); 1429} 1430 1431 1432unsigned int hash_string(const char *cp, int len); 1433// 1434// Have to take a len argument because we may only need to 1435// hash part of a componentname. 1436// 1437unsigned int 1438hash_string(const char *cp, int len) 1439{ 1440 unsigned hash = 0; 1441 1442 if (len) { 1443 while (len--) { 1444 hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8; 1445 } 1446 } else { 1447 while (*cp != '\0') { 1448 hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8; 1449 } 1450 } 1451 /* 1452 * the crc generator can legitimately generate 1453 * a 0... however, 0 for us means that we 1454 * haven't computed a hash, so use 1 instead 1455 */ 1456 if (hash == 0) 1457 hash = 1; 1458 return hash; 1459} 1460 1461 1462/* 1463 * Lookup an entry in the cache 1464 * 1465 * We don't do this if the segment name is long, simply so the cache 1466 * can avoid holding long names (which would either waste space, or 1467 * add greatly to the complexity). 1468 * 1469 * Lookup is called with dvp pointing to the directory to search, 1470 * cnp pointing to the name of the entry being sought. If the lookup 1471 * succeeds, the vnode is returned in *vpp, and a status of -1 is 1472 * returned. If the lookup determines that the name does not exist 1473 * (negative cacheing), a status of ENOENT is returned. If the lookup 1474 * fails, a status of zero is returned. 1475 */ 1476 1477int 1478cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) 1479{ 1480 struct namecache *ncp; 1481 struct nchashhead *ncpp; 1482 long namelen = cnp->cn_namelen; 1483 unsigned int hashval; 1484 boolean_t have_exclusive = FALSE; 1485 uint32_t vid; 1486 vnode_t vp; 1487 1488 if (cnp->cn_hash == 0) 1489 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen); 1490 hashval = (cnp->cn_hash & NCHASHMASK); 1491 1492 if (nc_disabled) { 1493 return 0; 1494 } 1495 1496 NAME_CACHE_LOCK_SHARED(); 1497 1498relook: 1499 ncpp = NCHHASH(dvp, cnp->cn_hash); 1500 LIST_FOREACH(ncp, ncpp, nc_hash) { 1501 if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) { 1502 if (memcmp(ncp->nc_name, cnp->cn_nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0) 1503 break; 1504 } 1505 } 1506 /* We failed to find an entry */ 1507 if (ncp == 0) { 1508 NCHSTAT(ncs_miss); 1509 NAME_CACHE_UNLOCK(); 1510 return (0); 1511 } 1512 1513 /* We don't want to have an entry, so dump it */ 1514 if ((cnp->cn_flags & MAKEENTRY) == 0) { 1515 if (have_exclusive == TRUE) { 1516 NCHSTAT(ncs_badhits); 1517 cache_delete(ncp, 1); 1518 NAME_CACHE_UNLOCK(); 1519 return (0); 1520 } 1521 NAME_CACHE_UNLOCK(); 1522 NAME_CACHE_LOCK(); 1523 have_exclusive = TRUE; 1524 goto relook; 1525 } 1526 vp = ncp->nc_vp; 1527 1528 /* We found a "positive" match, return the vnode */ 1529 if (vp) { 1530 NCHSTAT(ncs_goodhits); 1531 1532 vid = vp->v_id; 1533 NAME_CACHE_UNLOCK(); 1534 1535 if (vnode_getwithvid(vp, vid)) { 1536#if COLLECT_STATS 1537 NAME_CACHE_LOCK(); 1538 NCHSTAT(ncs_badvid); 1539 NAME_CACHE_UNLOCK(); 1540#endif 1541 return (0); 1542 } 1543 *vpp = vp; 1544 return (-1); 1545 } 1546 1547 /* We found a negative match, and want to create it, so purge */ 1548 if (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) { 1549 if (have_exclusive == TRUE) { 1550 NCHSTAT(ncs_badhits); 1551 cache_delete(ncp, 1); 1552 NAME_CACHE_UNLOCK(); 1553 return (0); 1554 } 1555 NAME_CACHE_UNLOCK(); 1556 NAME_CACHE_LOCK(); 1557 have_exclusive = TRUE; 1558 goto relook; 1559 } 1560 1561 /* 1562 * We found a "negative" match, ENOENT notifies client of this match. 1563 * The nc_whiteout field records whether this is a whiteout. 1564 */ 1565 NCHSTAT(ncs_neghits); 1566 1567 if (ncp->nc_whiteout) 1568 cnp->cn_flags |= ISWHITEOUT; 1569 NAME_CACHE_UNLOCK(); 1570 return (ENOENT); 1571} 1572 1573const char * 1574cache_enter_create(vnode_t dvp, vnode_t vp, struct componentname *cnp) 1575{ 1576 const char *strname; 1577 1578 if (cnp->cn_hash == 0) 1579 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen); 1580 1581 /* 1582 * grab 2 references on the string entered 1583 * one for the cache_enter_locked to consume 1584 * and the second to be consumed by v_name (vnode_create call point) 1585 */ 1586 strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, TRUE, 0); 1587 1588 NAME_CACHE_LOCK(); 1589 1590 cache_enter_locked(dvp, vp, cnp, strname); 1591 1592 NAME_CACHE_UNLOCK(); 1593 1594 return (strname); 1595} 1596 1597 1598/* 1599 * Add an entry to the cache... 1600 * but first check to see if the directory 1601 * that this entry is to be associated with has 1602 * had any cache_purges applied since we took 1603 * our identity snapshot... this check needs to 1604 * be done behind the name cache lock 1605 */ 1606void 1607cache_enter_with_gen(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, int gen) 1608{ 1609 1610 if (cnp->cn_hash == 0) 1611 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen); 1612 1613 NAME_CACHE_LOCK(); 1614 1615 if (dvp->v_nc_generation == gen) 1616 (void)cache_enter_locked(dvp, vp, cnp, NULL); 1617 1618 NAME_CACHE_UNLOCK(); 1619} 1620 1621 1622/* 1623 * Add an entry to the cache. 1624 */ 1625void 1626cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) 1627{ 1628 const char *strname; 1629 1630 if (cnp->cn_hash == 0) 1631 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen); 1632 1633 /* 1634 * grab 1 reference on the string entered 1635 * for the cache_enter_locked to consume 1636 */ 1637 strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0); 1638 1639 NAME_CACHE_LOCK(); 1640 1641 cache_enter_locked(dvp, vp, cnp, strname); 1642 1643 NAME_CACHE_UNLOCK(); 1644} 1645 1646 1647static void 1648cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, const char *strname) 1649{ 1650 struct namecache *ncp, *negp; 1651 struct nchashhead *ncpp; 1652 1653 if (nc_disabled) 1654 return; 1655 1656 /* 1657 * if the entry is for -ve caching vp is null 1658 */ 1659 if ((vp != NULLVP) && (LIST_FIRST(&vp->v_nclinks))) { 1660 /* 1661 * someone beat us to the punch.. 1662 * this vnode is already in the cache 1663 */ 1664 if (strname != NULL) 1665 vfs_removename(strname); 1666 return; 1667 } 1668 /* 1669 * We allocate a new entry if we are less than the maximum 1670 * allowed and the one at the front of the list is in use. 1671 * Otherwise we use the one at the front of the list. 1672 */ 1673 if (numcache < desiredNodes && 1674 ((ncp = nchead.tqh_first) == NULL || 1675 ncp->nc_hash.le_prev != 0)) { 1676 /* 1677 * Allocate one more entry 1678 */ 1679 ncp = (struct namecache *)_MALLOC_ZONE(sizeof(*ncp), M_CACHE, M_WAITOK); 1680 numcache++; 1681 } else { 1682 /* 1683 * reuse an old entry 1684 */ 1685 ncp = TAILQ_FIRST(&nchead); 1686 TAILQ_REMOVE(&nchead, ncp, nc_entry); 1687 1688 if (ncp->nc_hash.le_prev != 0) { 1689 /* 1690 * still in use... we need to 1691 * delete it before re-using it 1692 */ 1693 NCHSTAT(ncs_stolen); 1694 cache_delete(ncp, 0); 1695 } 1696 } 1697 NCHSTAT(ncs_enters); 1698 1699 /* 1700 * Fill in cache info, if vp is NULL this is a "negative" cache entry. 1701 */ 1702 ncp->nc_vp = vp; 1703 ncp->nc_dvp = dvp; 1704 ncp->nc_hashval = cnp->cn_hash; 1705 ncp->nc_whiteout = FALSE; 1706 1707 if (strname == NULL) 1708 ncp->nc_name = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0); 1709 else 1710 ncp->nc_name = strname; 1711 /* 1712 * make us the newest entry in the cache 1713 * i.e. we'll be the last to be stolen 1714 */ 1715 TAILQ_INSERT_TAIL(&nchead, ncp, nc_entry); 1716 1717 ncpp = NCHHASH(dvp, cnp->cn_hash); 1718#if DIAGNOSTIC 1719 { 1720 struct namecache *p; 1721 1722 for (p = ncpp->lh_first; p != 0; p = p->nc_hash.le_next) 1723 if (p == ncp) 1724 panic("cache_enter: duplicate"); 1725 } 1726#endif 1727 /* 1728 * make us available to be found via lookup 1729 */ 1730 LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 1731 1732 if (vp) { 1733 /* 1734 * add to the list of name cache entries 1735 * that point at vp 1736 */ 1737 LIST_INSERT_HEAD(&vp->v_nclinks, ncp, nc_un.nc_link); 1738 } else { 1739 /* 1740 * this is a negative cache entry (vp == NULL) 1741 * stick it on the negative cache list 1742 * and record the whiteout state 1743 */ 1744 TAILQ_INSERT_TAIL(&neghead, ncp, nc_un.nc_negentry); 1745 1746 if (cnp->cn_flags & ISWHITEOUT) 1747 ncp->nc_whiteout = TRUE; 1748 ncs_negtotal++; 1749 1750 if (ncs_negtotal > desiredNegNodes) { 1751 /* 1752 * if we've reached our desired limit 1753 * of negative cache entries, delete 1754 * the oldest 1755 */ 1756 negp = TAILQ_FIRST(&neghead); 1757 cache_delete(negp, 1); 1758 } 1759 } 1760 /* 1761 * add us to the list of name cache entries that 1762 * are children of dvp 1763 */ 1764 LIST_INSERT_HEAD(&dvp->v_ncchildren, ncp, nc_child); 1765} 1766 1767 1768/* 1769 * Initialize CRC-32 remainder table. 1770 */ 1771static void init_crc32(void) 1772{ 1773 /* 1774 * the CRC-32 generator polynomial is: 1775 * x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^10 1776 * + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1 1777 */ 1778 unsigned int crc32_polynomial = 0x04c11db7; 1779 unsigned int i,j; 1780 1781 /* 1782 * pre-calculate the CRC-32 remainder for each possible octet encoding 1783 */ 1784 for (i = 0; i < 256; i++) { 1785 unsigned int crc_rem = i << 24; 1786 1787 for (j = 0; j < 8; j++) { 1788 if (crc_rem & 0x80000000) 1789 crc_rem = (crc_rem << 1) ^ crc32_polynomial; 1790 else 1791 crc_rem = (crc_rem << 1); 1792 } 1793 crc32tab[i] = crc_rem; 1794 } 1795} 1796 1797 1798/* 1799 * Name cache initialization, from vfs_init() when we are booting 1800 */ 1801void 1802nchinit(void) 1803{ 1804 int i; 1805 1806 desiredNegNodes = (desiredvnodes / 10); 1807 desiredNodes = desiredvnodes + desiredNegNodes; 1808 1809 TAILQ_INIT(&nchead); 1810 TAILQ_INIT(&neghead); 1811 1812 init_crc32(); 1813 1814 nchashtbl = hashinit(MAX(CONFIG_NC_HASH, (2 *desiredNodes)), M_CACHE, &nchash); 1815 nchashmask = nchash; 1816 nchash++; 1817 1818 init_string_table(); 1819 1820 /* Allocate name cache lock group attribute and group */ 1821 namecache_lck_grp_attr= lck_grp_attr_alloc_init(); 1822 1823 namecache_lck_grp = lck_grp_alloc_init("Name Cache", namecache_lck_grp_attr); 1824 1825 /* Allocate name cache lock attribute */ 1826 namecache_lck_attr = lck_attr_alloc_init(); 1827 1828 /* Allocate name cache lock */ 1829 namecache_rw_lock = lck_rw_alloc_init(namecache_lck_grp, namecache_lck_attr); 1830 1831 1832 /* Allocate string cache lock group attribute and group */ 1833 strcache_lck_grp_attr= lck_grp_attr_alloc_init(); 1834 1835 strcache_lck_grp = lck_grp_alloc_init("String Cache", strcache_lck_grp_attr); 1836 1837 /* Allocate string cache lock attribute */ 1838 strcache_lck_attr = lck_attr_alloc_init(); 1839 1840 /* Allocate string cache lock */ 1841 strtable_rw_lock = lck_rw_alloc_init(strcache_lck_grp, strcache_lck_attr); 1842 1843 for (i = 0; i < NUM_STRCACHE_LOCKS; i++) 1844 lck_mtx_init(&strcache_mtx_locks[i], strcache_lck_grp, strcache_lck_attr); 1845} 1846 1847void 1848name_cache_lock_shared(void) 1849{ 1850 lck_rw_lock_shared(namecache_rw_lock); 1851} 1852 1853void 1854name_cache_lock(void) 1855{ 1856 lck_rw_lock_exclusive(namecache_rw_lock); 1857} 1858 1859void 1860name_cache_unlock(void) 1861{ 1862 lck_rw_done(namecache_rw_lock); 1863} 1864 1865 1866int 1867resize_namecache(u_int newsize) 1868{ 1869 struct nchashhead *new_table; 1870 struct nchashhead *old_table; 1871 struct nchashhead *old_head, *head; 1872 struct namecache *entry, *next; 1873 uint32_t i, hashval; 1874 int dNodes, dNegNodes; 1875 u_long new_size, old_size; 1876 1877 dNegNodes = (newsize / 10); 1878 dNodes = newsize + dNegNodes; 1879 1880 // we don't support shrinking yet 1881 if (dNodes <= desiredNodes) { 1882 return 0; 1883 } 1884 new_table = hashinit(2 * dNodes, M_CACHE, &nchashmask); 1885 new_size = nchashmask + 1; 1886 1887 if (new_table == NULL) { 1888 return ENOMEM; 1889 } 1890 1891 NAME_CACHE_LOCK(); 1892 // do the switch! 1893 old_table = nchashtbl; 1894 nchashtbl = new_table; 1895 old_size = nchash; 1896 nchash = new_size; 1897 1898 // walk the old table and insert all the entries into 1899 // the new table 1900 // 1901 for(i=0; i < old_size; i++) { 1902 old_head = &old_table[i]; 1903 for (entry=old_head->lh_first; entry != NULL; entry=next) { 1904 // 1905 // XXXdbg - Beware: this assumes that hash_string() does 1906 // the same thing as what happens in 1907 // lookup() over in vfs_lookup.c 1908 hashval = hash_string(entry->nc_name, 0); 1909 entry->nc_hashval = hashval; 1910 head = NCHHASH(entry->nc_dvp, hashval); 1911 1912 next = entry->nc_hash.le_next; 1913 LIST_INSERT_HEAD(head, entry, nc_hash); 1914 } 1915 } 1916 desiredNodes = dNodes; 1917 desiredNegNodes = dNegNodes; 1918 1919 NAME_CACHE_UNLOCK(); 1920 FREE(old_table, M_CACHE); 1921 1922 return 0; 1923} 1924 1925static void 1926cache_delete(struct namecache *ncp, int age_entry) 1927{ 1928 NCHSTAT(ncs_deletes); 1929 1930 if (ncp->nc_vp) { 1931 LIST_REMOVE(ncp, nc_un.nc_link); 1932 } else { 1933 TAILQ_REMOVE(&neghead, ncp, nc_un.nc_negentry); 1934 ncs_negtotal--; 1935 } 1936 LIST_REMOVE(ncp, nc_child); 1937 1938 LIST_REMOVE(ncp, nc_hash); 1939 /* 1940 * this field is used to indicate 1941 * that the entry is in use and 1942 * must be deleted before it can 1943 * be reused... 1944 */ 1945 ncp->nc_hash.le_prev = NULL; 1946 1947 if (age_entry) { 1948 /* 1949 * make it the next one available 1950 * for cache_enter's use 1951 */ 1952 TAILQ_REMOVE(&nchead, ncp, nc_entry); 1953 TAILQ_INSERT_HEAD(&nchead, ncp, nc_entry); 1954 } 1955 vfs_removename(ncp->nc_name); 1956 ncp->nc_name = NULL; 1957} 1958 1959 1960/* 1961 * purge the entry associated with the 1962 * specified vnode from the name cache 1963 */ 1964void 1965cache_purge(vnode_t vp) 1966{ 1967 struct namecache *ncp; 1968 kauth_cred_t tcred = NULL; 1969 1970 if ((LIST_FIRST(&vp->v_nclinks) == NULL) && 1971 (LIST_FIRST(&vp->v_ncchildren) == NULL) && 1972 (vp->v_cred == NOCRED) && 1973 (vp->v_parent == NULLVP)) 1974 return; 1975 1976 NAME_CACHE_LOCK(); 1977 1978 if (vp->v_parent) 1979 vp->v_parent->v_nc_generation++; 1980 1981 while ( (ncp = LIST_FIRST(&vp->v_nclinks)) ) 1982 cache_delete(ncp, 1); 1983 1984 while ( (ncp = LIST_FIRST(&vp->v_ncchildren)) ) 1985 cache_delete(ncp, 1); 1986 1987 /* 1988 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held 1989 */ 1990 tcred = vp->v_cred; 1991 vp->v_cred = NOCRED; 1992 vp->v_authorized_actions = 0; 1993 1994 NAME_CACHE_UNLOCK(); 1995 1996 if (IS_VALID_CRED(tcred)) 1997 kauth_cred_unref(&tcred); 1998} 1999 2000/* 2001 * Purge all negative cache entries that are children of the 2002 * given vnode. A case-insensitive file system (or any file 2003 * system that has multiple equivalent names for the same 2004 * directory entry) can use this when creating or renaming 2005 * to remove negative entries that may no longer apply. 2006 */ 2007void 2008cache_purge_negatives(vnode_t vp) 2009{ 2010 struct namecache *ncp, *next_ncp; 2011 2012 NAME_CACHE_LOCK(); 2013 2014 LIST_FOREACH_SAFE(ncp, &vp->v_ncchildren, nc_child, next_ncp) 2015 if (ncp->nc_vp == NULL) 2016 cache_delete(ncp , 1); 2017 2018 NAME_CACHE_UNLOCK(); 2019} 2020 2021/* 2022 * Flush all entries referencing a particular filesystem. 2023 * 2024 * Since we need to check it anyway, we will flush all the invalid 2025 * entries at the same time. 2026 */ 2027void 2028cache_purgevfs(struct mount *mp) 2029{ 2030 struct nchashhead *ncpp; 2031 struct namecache *ncp; 2032 2033 NAME_CACHE_LOCK(); 2034 /* Scan hash tables for applicable entries */ 2035 for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) { 2036restart: 2037 for (ncp = ncpp->lh_first; ncp != 0; ncp = ncp->nc_hash.le_next) { 2038 if (ncp->nc_dvp->v_mount == mp) { 2039 cache_delete(ncp, 0); 2040 goto restart; 2041 } 2042 } 2043 } 2044 NAME_CACHE_UNLOCK(); 2045} 2046 2047 2048 2049// 2050// String ref routines 2051// 2052static LIST_HEAD(stringhead, string_t) *string_ref_table; 2053static u_long string_table_mask; 2054static uint32_t filled_buckets=0; 2055 2056 2057typedef struct string_t { 2058 LIST_ENTRY(string_t) hash_chain; 2059 const char *str; 2060 uint32_t refcount; 2061} string_t; 2062 2063 2064static void 2065resize_string_ref_table(void) 2066{ 2067 struct stringhead *new_table; 2068 struct stringhead *old_table; 2069 struct stringhead *old_head, *head; 2070 string_t *entry, *next; 2071 uint32_t i, hashval; 2072 u_long new_mask, old_mask; 2073 2074 /* 2075 * need to hold the table lock exclusively 2076 * in order to grow the table... need to recheck 2077 * the need to resize again after we've taken 2078 * the lock exclusively in case some other thread 2079 * beat us to the punch 2080 */ 2081 lck_rw_lock_exclusive(strtable_rw_lock); 2082 2083 if (4 * filled_buckets < ((string_table_mask + 1) * 3)) { 2084 lck_rw_done(strtable_rw_lock); 2085 return; 2086 } 2087 new_table = hashinit((string_table_mask + 1) * 2, M_CACHE, &new_mask); 2088 2089 if (new_table == NULL) { 2090 printf("failed to resize the hash table.\n"); 2091 lck_rw_done(strtable_rw_lock); 2092 return; 2093 } 2094 2095 // do the switch! 2096 old_table = string_ref_table; 2097 string_ref_table = new_table; 2098 old_mask = string_table_mask; 2099 string_table_mask = new_mask; 2100 filled_buckets = 0; 2101 2102 // walk the old table and insert all the entries into 2103 // the new table 2104 // 2105 for (i = 0; i <= old_mask; i++) { 2106 old_head = &old_table[i]; 2107 for (entry = old_head->lh_first; entry != NULL; entry = next) { 2108 hashval = hash_string((const char *)entry->str, 0); 2109 head = &string_ref_table[hashval & string_table_mask]; 2110 if (head->lh_first == NULL) { 2111 filled_buckets++; 2112 } 2113 next = entry->hash_chain.le_next; 2114 LIST_INSERT_HEAD(head, entry, hash_chain); 2115 } 2116 } 2117 lck_rw_done(strtable_rw_lock); 2118 2119 FREE(old_table, M_CACHE); 2120} 2121 2122 2123static void 2124init_string_table(void) 2125{ 2126 string_ref_table = hashinit(CONFIG_VFS_NAMES, M_CACHE, &string_table_mask); 2127} 2128 2129 2130const char * 2131vfs_addname(const char *name, uint32_t len, u_int hashval, u_int flags) 2132{ 2133 return (add_name_internal(name, len, hashval, FALSE, flags)); 2134} 2135 2136 2137static const char * 2138add_name_internal(const char *name, uint32_t len, u_int hashval, boolean_t need_extra_ref, __unused u_int flags) 2139{ 2140 struct stringhead *head; 2141 string_t *entry; 2142 uint32_t chain_len = 0; 2143 uint32_t hash_index; 2144 uint32_t lock_index; 2145 char *ptr; 2146 2147 /* 2148 * if the length already accounts for the null-byte, then 2149 * subtract one so later on we don't index past the end 2150 * of the string. 2151 */ 2152 if (len > 0 && name[len-1] == '\0') { 2153 len--; 2154 } 2155 if (hashval == 0) { 2156 hashval = hash_string(name, len); 2157 } 2158 2159 /* 2160 * take this lock 'shared' to keep the hash stable 2161 * if someone else decides to grow the pool they 2162 * will take this lock exclusively 2163 */ 2164 lck_rw_lock_shared(strtable_rw_lock); 2165 2166 /* 2167 * If the table gets more than 3/4 full, resize it 2168 */ 2169 if (4 * filled_buckets >= ((string_table_mask + 1) * 3)) { 2170 lck_rw_done(strtable_rw_lock); 2171 2172 resize_string_ref_table(); 2173 2174 lck_rw_lock_shared(strtable_rw_lock); 2175 } 2176 hash_index = hashval & string_table_mask; 2177 lock_index = hash_index % NUM_STRCACHE_LOCKS; 2178 2179 head = &string_ref_table[hash_index]; 2180 2181 lck_mtx_lock_spin(&strcache_mtx_locks[lock_index]); 2182 2183 for (entry = head->lh_first; entry != NULL; chain_len++, entry = entry->hash_chain.le_next) { 2184 if (memcmp(entry->str, name, len) == 0 && entry->str[len] == 0) { 2185 entry->refcount++; 2186 break; 2187 } 2188 } 2189 if (entry == NULL) { 2190 lck_mtx_convert_spin(&strcache_mtx_locks[lock_index]); 2191 /* 2192 * it wasn't already there so add it. 2193 */ 2194 MALLOC(entry, string_t *, sizeof(string_t) + len + 1, M_TEMP, M_WAITOK); 2195 2196 if (head->lh_first == NULL) { 2197 OSAddAtomic(1, &filled_buckets); 2198 } 2199 ptr = (char *)((char *)entry + sizeof(string_t)); 2200 strncpy(ptr, name, len); 2201 ptr[len] = '\0'; 2202 entry->str = ptr; 2203 entry->refcount = 1; 2204 LIST_INSERT_HEAD(head, entry, hash_chain); 2205 } 2206 if (need_extra_ref == TRUE) 2207 entry->refcount++; 2208 2209 lck_mtx_unlock(&strcache_mtx_locks[lock_index]); 2210 lck_rw_done(strtable_rw_lock); 2211 2212 return (const char *)entry->str; 2213} 2214 2215 2216int 2217vfs_removename(const char *nameref) 2218{ 2219 struct stringhead *head; 2220 string_t *entry; 2221 uint32_t hashval; 2222 uint32_t hash_index; 2223 uint32_t lock_index; 2224 int retval = ENOENT; 2225 2226 hashval = hash_string(nameref, 0); 2227 2228 /* 2229 * take this lock 'shared' to keep the hash stable 2230 * if someone else decides to grow the pool they 2231 * will take this lock exclusively 2232 */ 2233 lck_rw_lock_shared(strtable_rw_lock); 2234 /* 2235 * must compute the head behind the table lock 2236 * since the size and location of the table 2237 * can change on the fly 2238 */ 2239 hash_index = hashval & string_table_mask; 2240 lock_index = hash_index % NUM_STRCACHE_LOCKS; 2241 2242 head = &string_ref_table[hash_index]; 2243 2244 lck_mtx_lock_spin(&strcache_mtx_locks[lock_index]); 2245 2246 for (entry = head->lh_first; entry != NULL; entry = entry->hash_chain.le_next) { 2247 if (entry->str == nameref) { 2248 entry->refcount--; 2249 2250 if (entry->refcount == 0) { 2251 LIST_REMOVE(entry, hash_chain); 2252 2253 if (head->lh_first == NULL) { 2254 OSAddAtomic(-1, &filled_buckets); 2255 } 2256 } else { 2257 entry = NULL; 2258 } 2259 retval = 0; 2260 break; 2261 } 2262 } 2263 lck_mtx_unlock(&strcache_mtx_locks[lock_index]); 2264 lck_rw_done(strtable_rw_lock); 2265 2266 if (entry != NULL) 2267 FREE(entry, M_TEMP); 2268 2269 return retval; 2270} 2271 2272 2273#ifdef DUMP_STRING_TABLE 2274void 2275dump_string_table(void) 2276{ 2277 struct stringhead *head; 2278 string_t *entry; 2279 u_long i; 2280 2281 lck_rw_lock_shared(strtable_rw_lock); 2282 2283 for (i = 0; i <= string_table_mask; i++) { 2284 head = &string_ref_table[i]; 2285 for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) { 2286 printf("%6d - %s\n", entry->refcount, entry->str); 2287 } 2288 } 2289 lck_rw_done(strtable_rw_lock); 2290} 2291#endif /* DUMP_STRING_TABLE */ 2292