vfs_cache.c revision 89316
1/* 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Poul-Henning Kamp of the FreeBSD Project. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 37 * $FreeBSD: head/sys/kern/vfs_cache.c 89316 2002-01-13 21:37:49Z alfred $ 38 */ 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/kernel.h> 43#include <sys/lock.h> 44#include <sys/mutex.h> 45#include <sys/sysctl.h> 46#include <sys/mount.h> 47#include <sys/vnode.h> 48#include <sys/namei.h> 49#include <sys/malloc.h> 50#include <sys/sysproto.h> 51#include <sys/proc.h> 52#include <sys/filedesc.h> 53#include <sys/fnv_hash.h> 54 55/* 56 * This structure describes the elements in the cache of recent 57 * names looked up by namei. 58 */ 59 60struct namecache { 61 LIST_ENTRY(namecache) nc_hash; /* hash chain */ 62 LIST_ENTRY(namecache) nc_src; /* source vnode list */ 63 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ 64 struct vnode *nc_dvp; /* vnode of parent of name */ 65 struct vnode *nc_vp; /* vnode the name refers to */ 66 u_char nc_flag; /* flag bits */ 67 u_char nc_nlen; /* length of name */ 68 char nc_name[0]; /* segment name */ 69}; 70 71/* 72 * Name caching works as follows: 73 * 74 * Names found by directory scans are retained in a cache 75 * for future reference. It is managed LRU, so frequently 76 * used names will hang around. Cache is indexed by hash value 77 * obtained from (vp, name) where vp refers to the directory 78 * containing name. 79 * 80 * If it is a "negative" entry, (i.e. for a name that is known NOT to 81 * exist) the vnode pointer will be NULL. 82 * 83 * Upon reaching the last segment of a path, if the reference 84 * is for DELETE, or NOCACHE is set (rewrite), and the 85 * name is located in the cache, it will be dropped. 86 */ 87 88/* 89 * Structures associated with name cacheing. 90 */ 91#define NCHHASH(hash) \ 92 (&nchashtbl[(hash) & nchash]) 93static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 94static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ 95static u_long nchash; /* size of hash table */ 96SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 97static u_long ncnegfactor = 16; /* ratio of negative entries */ 98SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 99static u_long numneg; /* number of cache entries allocated */ 100SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 101static u_long numcache; /* number of cache entries allocated */ 102SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 103static u_long numcachehv; /* number of cache entries with vnodes held */ 104SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, ""); 105#if 0 106static u_long numcachepl; /* number of cache purge for leaf entries */ 107SYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, ""); 108#endif 109struct nchstats nchstats; /* cache effectiveness statistics */ 110 111static int doingcache = 1; /* 1 => enable the cache */ 112SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, ""); 113SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); 114SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); 115 116/* 117 * The new name cache statistics 118 */ 119SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); 120#define STATNODE(mode, name, var) \ 121 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); 122STATNODE(CTLFLAG_RD, numneg, &numneg); 123STATNODE(CTLFLAG_RD, numcache, &numcache); 124static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); 125static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); 126static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); 127static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); 128static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); 129static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); 130static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); 131static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); 132static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); 133static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); 134 135SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats, 136 sizeof(nchstats), "LU", "VFS cache effectiveness statistics"); 137 138 139 140static void cache_zap __P((struct namecache *ncp)); 141 142static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 143 144/* 145 * Flags in namecache.nc_flag 146 */ 147#define NCF_WHITE 1 148 149/* 150 * Grab an atomic snapshot of the name cache hash chain lengths 151 */ 152SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats"); 153 154static int 155sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) 156{ 157 int error; 158 struct nchashhead *ncpp; 159 struct namecache *ncp; 160 int n_nchash; 161 int count; 162 163 n_nchash = nchash + 1; /* nchash is max index, not count */ 164 if (!req->oldptr) 165 return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); 166 167 /* Scan hash tables for applicable entries */ 168 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 169 count = 0; 170 LIST_FOREACH(ncp, ncpp, nc_hash) { 171 count++; 172 } 173 error = SYSCTL_OUT(req, (caddr_t)&count, sizeof(count)); 174 if (error) 175 return (error); 176 } 177 return (0); 178} 179SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD, 180 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths"); 181 182static int 183sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) 184{ 185 int error; 186 struct nchashhead *ncpp; 187 struct namecache *ncp; 188 int n_nchash; 189 int count, maxlength, used, pct; 190 191 if (!req->oldptr) 192 return SYSCTL_OUT(req, 0, 4 * sizeof(int)); 193 194 n_nchash = nchash + 1; /* nchash is max index, not count */ 195 used = 0; 196 maxlength = 0; 197 198 /* Scan hash tables for applicable entries */ 199 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 200 count = 0; 201 LIST_FOREACH(ncp, ncpp, nc_hash) { 202 count++; 203 } 204 if (count) 205 used++; 206 if (maxlength < count) 207 maxlength = count; 208 } 209 n_nchash = nchash + 1; 210 pct = (used * 100 * 100) / n_nchash; 211 error = SYSCTL_OUT(req, (caddr_t)&n_nchash, sizeof(n_nchash)); 212 if (error) 213 return (error); 214 error = SYSCTL_OUT(req, (caddr_t)&used, sizeof(used)); 215 if (error) 216 return (error); 217 error = SYSCTL_OUT(req, (caddr_t)&maxlength, sizeof(maxlength)); 218 if (error) 219 return (error); 220 error = SYSCTL_OUT(req, (caddr_t)&pct, sizeof(pct)); 221 if (error) 222 return (error); 223 return (0); 224} 225SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD, 226 0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths"); 227 228/* 229 * Delete an entry from its hash list and move it to the front 230 * of the LRU list for immediate reuse. 231 */ 232static void 233cache_zap(ncp) 234 struct namecache *ncp; 235{ 236 LIST_REMOVE(ncp, nc_hash); 237 LIST_REMOVE(ncp, nc_src); 238 if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { 239 vdrop(ncp->nc_dvp); 240 numcachehv--; 241 } 242 if (ncp->nc_vp) { 243 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); 244 } else { 245 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 246 numneg--; 247 } 248 numcache--; 249 free(ncp, M_VFSCACHE); 250} 251 252/* 253 * cache_leaf_test() 254 * 255 * Test whether this (directory) vnode's namei cache entry contains 256 * subdirectories or not. Used to determine whether the directory is 257 * a leaf in the namei cache or not. Note: the directory may still 258 * contain files in the namei cache. 259 * 260 * Returns 0 if the directory is a leaf, -1 if it isn't. 261 */ 262int 263cache_leaf_test(struct vnode *vp) 264{ 265 struct namecache *ncpc; 266 267 for (ncpc = LIST_FIRST(&vp->v_cache_src); 268 ncpc != NULL; 269 ncpc = LIST_NEXT(ncpc, nc_src) 270 ) { 271 if (ncpc->nc_vp != NULL && ncpc->nc_vp->v_type == VDIR) 272 return(-1); 273 } 274 return(0); 275} 276 277/* 278 * Lookup an entry in the cache 279 * 280 * We don't do this if the segment name is long, simply so the cache 281 * can avoid holding long names (which would either waste space, or 282 * add greatly to the complexity). 283 * 284 * Lookup is called with dvp pointing to the directory to search, 285 * cnp pointing to the name of the entry being sought. If the lookup 286 * succeeds, the vnode is returned in *vpp, and a status of -1 is 287 * returned. If the lookup determines that the name does not exist 288 * (negative cacheing), a status of ENOENT is returned. If the lookup 289 * fails, a status of zero is returned. 290 */ 291 292int 293cache_lookup(dvp, vpp, cnp) 294 struct vnode *dvp; 295 struct vnode **vpp; 296 struct componentname *cnp; 297{ 298 struct namecache *ncp; 299 u_int32_t hash; 300 301 if (!doingcache) { 302 cnp->cn_flags &= ~MAKEENTRY; 303 return (0); 304 } 305 306 numcalls++; 307 308 if (cnp->cn_nameptr[0] == '.') { 309 if (cnp->cn_namelen == 1) { 310 *vpp = dvp; 311 dothits++; 312 return (-1); 313 } 314 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 315 dotdothits++; 316 if (dvp->v_dd->v_id != dvp->v_ddid || 317 (cnp->cn_flags & MAKEENTRY) == 0) { 318 dvp->v_ddid = 0; 319 return (0); 320 } 321 *vpp = dvp->v_dd; 322 return (-1); 323 } 324 } 325 326 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 327 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 328 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 329 numchecks++; 330 if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && 331 !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) 332 break; 333 } 334 335 /* We failed to find an entry */ 336 if (ncp == 0) { 337 if ((cnp->cn_flags & MAKEENTRY) == 0) { 338 nummisszap++; 339 } else { 340 nummiss++; 341 } 342 nchstats.ncs_miss++; 343 return (0); 344 } 345 346 /* We don't want to have an entry, so dump it */ 347 if ((cnp->cn_flags & MAKEENTRY) == 0) { 348 numposzaps++; 349 nchstats.ncs_badhits++; 350 cache_zap(ncp); 351 return (0); 352 } 353 354 /* We found a "positive" match, return the vnode */ 355 if (ncp->nc_vp) { 356 numposhits++; 357 nchstats.ncs_goodhits++; 358 *vpp = ncp->nc_vp; 359 return (-1); 360 } 361 362 /* We found a negative match, and want to create it, so purge */ 363 if (cnp->cn_nameiop == CREATE) { 364 numnegzaps++; 365 nchstats.ncs_badhits++; 366 cache_zap(ncp); 367 return (0); 368 } 369 370 numneghits++; 371 /* 372 * We found a "negative" match, ENOENT notifies client of this match. 373 * The nc_vpid field records whether this is a whiteout. 374 */ 375 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 376 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 377 nchstats.ncs_neghits++; 378 if (ncp->nc_flag & NCF_WHITE) 379 cnp->cn_flags |= ISWHITEOUT; 380 return (ENOENT); 381} 382 383/* 384 * Add an entry to the cache. 385 */ 386void 387cache_enter(dvp, vp, cnp) 388 struct vnode *dvp; 389 struct vnode *vp; 390 struct componentname *cnp; 391{ 392 struct namecache *ncp; 393 struct nchashhead *ncpp; 394 u_int32_t hash; 395 int len; 396 397 if (!doingcache) 398 return; 399 400 if (cnp->cn_nameptr[0] == '.') { 401 if (cnp->cn_namelen == 1) { 402 return; 403 } 404 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 405 if (vp) { 406 dvp->v_dd = vp; 407 dvp->v_ddid = vp->v_id; 408 } else { 409 dvp->v_dd = dvp; 410 dvp->v_ddid = 0; 411 } 412 return; 413 } 414 } 415 416 ncp = (struct namecache *) 417 malloc(sizeof *ncp + cnp->cn_namelen, M_VFSCACHE, M_WAITOK); 418 bzero((char *)ncp, sizeof *ncp); 419 numcache++; 420 if (!vp) { 421 numneg++; 422 ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0; 423 } else if (vp->v_type == VDIR) { 424 vp->v_dd = dvp; 425 vp->v_ddid = dvp->v_id; 426 } 427 428 /* 429 * Fill in cache info, if vp is NULL this is a "negative" cache entry. 430 * For negative entries, we have to record whether it is a whiteout. 431 * the whiteout flag is stored in the nc_vpid field which is 432 * otherwise unused. 433 */ 434 ncp->nc_vp = vp; 435 ncp->nc_dvp = dvp; 436 len = ncp->nc_nlen = cnp->cn_namelen; 437 hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); 438 bcopy(cnp->cn_nameptr, ncp->nc_name, len); 439 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 440 ncpp = NCHHASH(hash); 441 LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 442 if (LIST_EMPTY(&dvp->v_cache_src)) { 443 vhold(dvp); 444 numcachehv++; 445 } 446 LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); 447 if (vp) { 448 TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); 449 } else { 450 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 451 } 452 if (numneg * ncnegfactor > numcache) { 453 ncp = TAILQ_FIRST(&ncneg); 454 cache_zap(ncp); 455 } 456} 457 458/* 459 * Name cache initialization, from vfs_init() when we are booting 460 */ 461static void 462nchinit(void *dummy __unused) 463{ 464 465 TAILQ_INIT(&ncneg); 466 nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); 467} 468SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL) 469 470 471/* 472 * Invalidate all entries to a particular vnode. 473 * 474 * Remove all entries in the namecache relating to this vnode and 475 * change the v_id. We take the v_id from a global counter, since 476 * it becomes a handy sequence number in crash-dumps that way. 477 * No valid vnode will ever have (v_id == 0). 478 * 479 * XXX: Only time and the size of v_id prevents this from failing: 480 * XXX: In theory we should hunt down all (struct vnode*, v_id) 481 * XXX: soft references and nuke them, at least on the global 482 * XXX: v_id wraparound. The period of resistance can be extended 483 * XXX: by incrementing each vnodes v_id individually instead of 484 * XXX: using the global v_id. 485 */ 486 487void 488cache_purge(vp) 489 struct vnode *vp; 490{ 491 static u_long nextid; 492 493 while (!LIST_EMPTY(&vp->v_cache_src)) 494 cache_zap(LIST_FIRST(&vp->v_cache_src)); 495 while (!TAILQ_EMPTY(&vp->v_cache_dst)) 496 cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); 497 498 do 499 nextid++; 500 while (nextid == vp->v_id || !nextid); 501 vp->v_id = nextid; 502 vp->v_dd = vp; 503 vp->v_ddid = 0; 504} 505 506/* 507 * Flush all entries referencing a particular filesystem. 508 * 509 * Since we need to check it anyway, we will flush all the invalid 510 * entries at the same time. 511 */ 512void 513cache_purgevfs(mp) 514 struct mount *mp; 515{ 516 struct nchashhead *ncpp; 517 struct namecache *ncp, *nnp; 518 519 /* Scan hash tables for applicable entries */ 520 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { 521 for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) { 522 nnp = LIST_NEXT(ncp, nc_hash); 523 if (ncp->nc_dvp->v_mount == mp) { 524 cache_zap(ncp); 525 } 526 } 527 } 528} 529 530#if 0 531 532/* 533 * Flush all dirctory entries with no child directories held in 534 * the cache. 535 * 536 * Since we need to check it anyway, we will flush all the invalid 537 * entries at the same time. 538 */ 539void 540cache_purgeleafdirs(ndir) 541 int ndir; 542{ 543 struct nchashhead *ncpp; 544 struct namecache *ncp, *nnp, *ncpc, *nnpc; 545 struct vnode *dvp; 546 547 /* Scan hash tables for applicable entries */ 548 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl && ndir > 0; ncpp--) { 549 for (ncp = LIST_FIRST(ncpp); ncp != 0 && ndir > 0; ncp = nnp) { 550 nnp = LIST_NEXT(ncp, nc_hash); 551 if (ncp->nc_dvp != 0) { 552 /* 553 * Skip over if nc_dvp of this cache holds 554 * a child directory, or the hold count of 555 * nc_dvp is greater than 1 (in which case 556 * nc_dvp is likely to be the working 557 * directory of a process). 558 */ 559 if (ncp->nc_dvp->v_holdcnt > 1) 560 continue; 561 for (ncpc = LIST_FIRST(&ncp->nc_dvp->v_cache_src); 562 ncpc != 0; ncpc = nnpc) { 563 nnpc = LIST_NEXT(ncpc, nc_src); 564 if (ncpc->nc_vp != 0 && ncpc->nc_vp->v_type == VDIR) 565 break; 566 } 567 if (ncpc == 0) { 568 /* 569 * Zap all of this directory's children, 570 * held in ncp->nc_dvp->v_cache_src. 571 */ 572 dvp = ncp->nc_dvp; 573 while (!LIST_EMPTY(&dvp->v_cache_src)) 574 cache_zap(LIST_FIRST(&dvp->v_cache_src)); 575 576 ndir--; 577 578 /* Restart in case where nnp is reclaimed. */ 579 nnp = LIST_FIRST(ncpp); 580 continue; 581 } 582 } 583 } 584 } 585 numcachepl++; 586} 587 588#endif 589 590/* 591 * Perform canonical checks and cache lookup and pass on to filesystem 592 * through the vop_cachedlookup only if needed. 593 */ 594 595int 596vfs_cache_lookup(ap) 597 struct vop_lookup_args /* { 598 struct vnode *a_dvp; 599 struct vnode **a_vpp; 600 struct componentname *a_cnp; 601 } */ *ap; 602{ 603 struct vnode *dvp, *vp; 604 int lockparent; 605 int error; 606 struct vnode **vpp = ap->a_vpp; 607 struct componentname *cnp = ap->a_cnp; 608 struct ucred *cred = cnp->cn_cred; 609 int flags = cnp->cn_flags; 610 struct thread *td = cnp->cn_thread; 611 u_long vpid; /* capability number of vnode */ 612 613 *vpp = NULL; 614 dvp = ap->a_dvp; 615 lockparent = flags & LOCKPARENT; 616 617 if (dvp->v_type != VDIR) 618 return (ENOTDIR); 619 620 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 621 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 622 return (EROFS); 623 624 error = VOP_ACCESS(dvp, VEXEC, cred, td); 625 626 if (error) 627 return (error); 628 629 error = cache_lookup(dvp, vpp, cnp); 630 631 if (!error) 632 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 633 634 if (error == ENOENT) 635 return (error); 636 637 vp = *vpp; 638 vpid = vp->v_id; 639 cnp->cn_flags &= ~PDIRUNLOCK; 640 if (dvp == vp) { /* lookup on "." */ 641 VREF(vp); 642 error = 0; 643 } else if (flags & ISDOTDOT) { 644 VOP_UNLOCK(dvp, 0, td); 645 cnp->cn_flags |= PDIRUNLOCK; 646 error = vget(vp, LK_EXCLUSIVE, td); 647 if (!error && lockparent && (flags & ISLASTCN)) { 648 if ((error = vn_lock(dvp, LK_EXCLUSIVE, td)) == 0) 649 cnp->cn_flags &= ~PDIRUNLOCK; 650 } 651 } else { 652 error = vget(vp, LK_EXCLUSIVE, td); 653 if (!lockparent || error || !(flags & ISLASTCN)) { 654 VOP_UNLOCK(dvp, 0, td); 655 cnp->cn_flags |= PDIRUNLOCK; 656 } 657 } 658 /* 659 * Check that the capability number did not change 660 * while we were waiting for the lock. 661 */ 662 if (!error) { 663 if (vpid == vp->v_id) 664 return (0); 665 vput(vp); 666 if (lockparent && dvp != vp && (flags & ISLASTCN)) { 667 VOP_UNLOCK(dvp, 0, td); 668 cnp->cn_flags |= PDIRUNLOCK; 669 } 670 } 671 if (cnp->cn_flags & PDIRUNLOCK) { 672 error = vn_lock(dvp, LK_EXCLUSIVE, td); 673 if (error) 674 return (error); 675 cnp->cn_flags &= ~PDIRUNLOCK; 676 } 677 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 678} 679 680 681#ifndef _SYS_SYSPROTO_H_ 682struct __getcwd_args { 683 u_char *buf; 684 u_int buflen; 685}; 686#endif 687 688static int disablecwd; 689SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, ""); 690 691static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); 692static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); 693static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); 694static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); 695static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); 696static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); 697int 698__getcwd(td, uap) 699 struct thread *td; 700 struct __getcwd_args *uap; 701{ 702 char *bp, *buf; 703 int error, i, slash_prefixed; 704 struct filedesc *fdp; 705 struct namecache *ncp; 706 struct vnode *vp; 707 708 numcwdcalls++; 709 if (disablecwd) 710 return (ENODEV); 711 if (uap->buflen < 2) 712 return (EINVAL); 713 if (uap->buflen > MAXPATHLEN) 714 uap->buflen = MAXPATHLEN; 715 buf = bp = malloc(uap->buflen, M_TEMP, M_WAITOK); 716 bp += uap->buflen - 1; 717 *bp = '\0'; 718 fdp = td->td_proc->p_fd; 719 slash_prefixed = 0; 720 FILEDESC_LOCK(fdp); 721 for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { 722 if (vp->v_flag & VROOT) { 723 if (vp->v_mount == NULL) { /* forced unmount */ 724 FILEDESC_UNLOCK(fdp); 725 free(buf, M_TEMP); 726 return (EBADF); 727 } 728 vp = vp->v_mount->mnt_vnodecovered; 729 continue; 730 } 731 if (vp->v_dd->v_id != vp->v_ddid) { 732 FILEDESC_UNLOCK(fdp); 733 numcwdfail1++; 734 free(buf, M_TEMP); 735 return (ENOTDIR); 736 } 737 ncp = TAILQ_FIRST(&vp->v_cache_dst); 738 if (!ncp) { 739 FILEDESC_UNLOCK(fdp); 740 numcwdfail2++; 741 free(buf, M_TEMP); 742 return (ENOENT); 743 } 744 if (ncp->nc_dvp != vp->v_dd) { 745 FILEDESC_UNLOCK(fdp); 746 numcwdfail3++; 747 free(buf, M_TEMP); 748 return (EBADF); 749 } 750 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 751 if (bp == buf) { 752 FILEDESC_UNLOCK(fdp); 753 numcwdfail4++; 754 free(buf, M_TEMP); 755 return (ENOMEM); 756 } 757 *--bp = ncp->nc_name[i]; 758 } 759 if (bp == buf) { 760 FILEDESC_UNLOCK(fdp); 761 numcwdfail4++; 762 free(buf, M_TEMP); 763 return (ENOMEM); 764 } 765 *--bp = '/'; 766 slash_prefixed = 1; 767 vp = vp->v_dd; 768 } 769 FILEDESC_UNLOCK(fdp); 770 if (!slash_prefixed) { 771 if (bp == buf) { 772 numcwdfail4++; 773 free(buf, M_TEMP); 774 return (ENOMEM); 775 } 776 *--bp = '/'; 777 } 778 numcwdfound++; 779 error = copyout(bp, uap->buf, strlen(bp) + 1); 780 free(buf, M_TEMP); 781 return (error); 782} 783 784/* 785 * Thus begins the fullpath magic. 786 */ 787 788#undef STATNODE 789#define STATNODE(name) \ 790 static u_int name; \ 791 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 792 793static int disablefullpath; 794SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, 795 &disablefullpath, 0, ""); 796 797STATNODE(numfullpathcalls); 798STATNODE(numfullpathfail1); 799STATNODE(numfullpathfail2); 800STATNODE(numfullpathfail3); 801STATNODE(numfullpathfail4); 802STATNODE(numfullpathfound); 803 804int 805vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) 806{ 807 char *bp, *buf; 808 int i, slash_prefixed; 809 struct filedesc *fdp; 810 struct namecache *ncp; 811 struct vnode *vp; 812 813 numfullpathcalls++; 814 if (disablefullpath) 815 return (ENODEV); 816 if (vn == NULL) 817 return (EINVAL); 818 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 819 bp = buf + MAXPATHLEN - 1; 820 *bp = '\0'; 821 fdp = td->td_proc->p_fd; 822 slash_prefixed = 0; 823 FILEDESC_LOCK(fdp); 824 for (vp = vn; vp != fdp->fd_rdir && vp != rootvnode;) { 825 if (vp->v_flag & VROOT) { 826 if (vp->v_mount == NULL) { /* forced unmount */ 827 FILEDESC_UNLOCK(fdp); 828 free(buf, M_TEMP); 829 return (EBADF); 830 } 831 vp = vp->v_mount->mnt_vnodecovered; 832 continue; 833 } 834 if (vp != vn && vp->v_dd->v_id != vp->v_ddid) { 835 FILEDESC_UNLOCK(fdp); 836 numfullpathfail1++; 837 free(buf, M_TEMP); 838 return (ENOTDIR); 839 } 840 ncp = TAILQ_FIRST(&vp->v_cache_dst); 841 if (!ncp) { 842 FILEDESC_UNLOCK(fdp); 843 numfullpathfail2++; 844 free(buf, M_TEMP); 845 return (ENOENT); 846 } 847 if (vp != vn && ncp->nc_dvp != vp->v_dd) { 848 FILEDESC_UNLOCK(fdp); 849 numfullpathfail3++; 850 free(buf, M_TEMP); 851 return (EBADF); 852 } 853 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 854 if (bp == buf) { 855 FILEDESC_UNLOCK(fdp); 856 numfullpathfail4++; 857 free(buf, M_TEMP); 858 return (ENOMEM); 859 } 860 *--bp = ncp->nc_name[i]; 861 } 862 if (bp == buf) { 863 FILEDESC_UNLOCK(fdp); 864 numfullpathfail4++; 865 free(buf, M_TEMP); 866 return (ENOMEM); 867 } 868 *--bp = '/'; 869 slash_prefixed = 1; 870 vp = ncp->nc_dvp; 871 } 872 if (!slash_prefixed) { 873 if (bp == buf) { 874 FILEDESC_UNLOCK(fdp); 875 numfullpathfail4++; 876 free(buf, M_TEMP); 877 return (ENOMEM); 878 } 879 *--bp = '/'; 880 } 881 FILEDESC_UNLOCK(fdp); 882 numfullpathfound++; 883 *retbuf = bp; 884 *freebuf = buf; 885 return (0); 886} 887