vfs_cache.c revision 83000
1/* 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Poul-Henning Kamp of the FreeBSD Project. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 37 * $FreeBSD: head/sys/kern/vfs_cache.c 83000 2001-09-04 19:03:47Z iedowse $ 38 */ 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/kernel.h> 43#include <sys/lock.h> 44#include <sys/sysctl.h> 45#include <sys/mount.h> 46#include <sys/vnode.h> 47#include <sys/namei.h> 48#include <sys/malloc.h> 49#include <sys/sysproto.h> 50#include <sys/proc.h> 51#include <sys/filedesc.h> 52#include <sys/fnv_hash.h> 53 54/* 55 * This structure describes the elements in the cache of recent 56 * names looked up by namei. 57 */ 58 59struct namecache { 60 LIST_ENTRY(namecache) nc_hash; /* hash chain */ 61 LIST_ENTRY(namecache) nc_src; /* source vnode list */ 62 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ 63 struct vnode *nc_dvp; /* vnode of parent of name */ 64 struct vnode *nc_vp; /* vnode the name refers to */ 65 u_char nc_flag; /* flag bits */ 66 u_char nc_nlen; /* length of name */ 67 char nc_name[0]; /* segment name */ 68}; 69 70/* 71 * Name caching works as follows: 72 * 73 * Names found by directory scans are retained in a cache 74 * for future reference. It is managed LRU, so frequently 75 * used names will hang around. Cache is indexed by hash value 76 * obtained from (vp, name) where vp refers to the directory 77 * containing name. 78 * 79 * If it is a "negative" entry, (i.e. for a name that is known NOT to 80 * exist) the vnode pointer will be NULL. 81 * 82 * Upon reaching the last segment of a path, if the reference 83 * is for DELETE, or NOCACHE is set (rewrite), and the 84 * name is located in the cache, it will be dropped. 85 */ 86 87/* 88 * Structures associated with name cacheing. 89 */ 90#define NCHHASH(hash) \ 91 (&nchashtbl[(hash) & nchash]) 92static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 93static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ 94static u_long nchash; /* size of hash table */ 95SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 96static u_long ncnegfactor = 16; /* ratio of negative entries */ 97SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 98static u_long numneg; /* number of cache entries allocated */ 99SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 100static u_long numcache; /* number of cache entries allocated */ 101SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 102static u_long numcachehv; /* number of cache entries with vnodes held */ 103SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, ""); 104static u_long numcachepl; /* number of cache purge for leaf entries */ 105SYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, ""); 106struct nchstats nchstats; /* cache effectiveness statistics */ 107 108static int doingcache = 1; /* 1 => enable the cache */ 109SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, ""); 110SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); 111SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); 112 113/* 114 * The new name cache statistics 115 */ 116SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); 117#define STATNODE(mode, name, var) \ 118 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); 119STATNODE(CTLFLAG_RD, numneg, &numneg); 120STATNODE(CTLFLAG_RD, numcache, &numcache); 121static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); 122static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); 123static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); 124static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); 125static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); 126static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); 127static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); 128static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); 129static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); 130static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); 131 132SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats, 133 sizeof(nchstats), "LU", "VFS cache effectiveness statistics"); 134 135 136 137static void cache_zap __P((struct namecache *ncp)); 138 139static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 140 141/* 142 * Flags in namecache.nc_flag 143 */ 144#define NCF_WHITE 1 145 146/* 147 * Grab an atomic snapshot of the name cache hash chain lengths 148 */ 149SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats"); 150 151static int 152sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) 153{ 154 int error; 155 struct nchashhead *ncpp; 156 struct namecache *ncp; 157 int n_nchash; 158 int count; 159 160 n_nchash = nchash + 1; /* nchash is max index, not count */ 161 if (!req->oldptr) 162 return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); 163 164 /* Scan hash tables for applicable entries */ 165 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 166 count = 0; 167 LIST_FOREACH(ncp, ncpp, nc_hash) { 168 count++; 169 } 170 error = SYSCTL_OUT(req, (caddr_t)&count, sizeof(count)); 171 if (error) 172 return (error); 173 } 174 return (0); 175} 176SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD, 177 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths"); 178 179static int 180sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) 181{ 182 int error; 183 struct nchashhead *ncpp; 184 struct namecache *ncp; 185 int n_nchash; 186 int count, maxlength, used, pct; 187 188 if (!req->oldptr) 189 return SYSCTL_OUT(req, 0, 4 * sizeof(int)); 190 191 n_nchash = nchash + 1; /* nchash is max index, not count */ 192 used = 0; 193 maxlength = 0; 194 195 /* Scan hash tables for applicable entries */ 196 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 197 count = 0; 198 LIST_FOREACH(ncp, ncpp, nc_hash) { 199 count++; 200 } 201 if (count) 202 used++; 203 if (maxlength < count) 204 maxlength = count; 205 } 206 n_nchash = nchash + 1; 207 pct = (used * 100 * 100) / n_nchash; 208 error = SYSCTL_OUT(req, (caddr_t)&n_nchash, sizeof(n_nchash)); 209 if (error) 210 return (error); 211 error = SYSCTL_OUT(req, (caddr_t)&used, sizeof(used)); 212 if (error) 213 return (error); 214 error = SYSCTL_OUT(req, (caddr_t)&maxlength, sizeof(maxlength)); 215 if (error) 216 return (error); 217 error = SYSCTL_OUT(req, (caddr_t)&pct, sizeof(pct)); 218 if (error) 219 return (error); 220 return (0); 221} 222SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD, 223 0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths"); 224 225/* 226 * Delete an entry from its hash list and move it to the front 227 * of the LRU list for immediate reuse. 228 */ 229static void 230cache_zap(ncp) 231 struct namecache *ncp; 232{ 233 LIST_REMOVE(ncp, nc_hash); 234 LIST_REMOVE(ncp, nc_src); 235 if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { 236 vdrop(ncp->nc_dvp); 237 numcachehv--; 238 } 239 if (ncp->nc_vp) { 240 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); 241 } else { 242 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 243 numneg--; 244 } 245 numcache--; 246 free(ncp, M_VFSCACHE); 247} 248 249/* 250 * Lookup an entry in the cache 251 * 252 * We don't do this if the segment name is long, simply so the cache 253 * can avoid holding long names (which would either waste space, or 254 * add greatly to the complexity). 255 * 256 * Lookup is called with dvp pointing to the directory to search, 257 * cnp pointing to the name of the entry being sought. If the lookup 258 * succeeds, the vnode is returned in *vpp, and a status of -1 is 259 * returned. If the lookup determines that the name does not exist 260 * (negative cacheing), a status of ENOENT is returned. If the lookup 261 * fails, a status of zero is returned. 262 */ 263 264int 265cache_lookup(dvp, vpp, cnp) 266 struct vnode *dvp; 267 struct vnode **vpp; 268 struct componentname *cnp; 269{ 270 struct namecache *ncp; 271 u_int32_t hash; 272 273 if (!doingcache) { 274 cnp->cn_flags &= ~MAKEENTRY; 275 return (0); 276 } 277 278 numcalls++; 279 280 if (cnp->cn_nameptr[0] == '.') { 281 if (cnp->cn_namelen == 1) { 282 *vpp = dvp; 283 dothits++; 284 return (-1); 285 } 286 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 287 dotdothits++; 288 if (dvp->v_dd->v_id != dvp->v_ddid || 289 (cnp->cn_flags & MAKEENTRY) == 0) { 290 dvp->v_ddid = 0; 291 return (0); 292 } 293 *vpp = dvp->v_dd; 294 return (-1); 295 } 296 } 297 298 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 299 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 300 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 301 numchecks++; 302 if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && 303 !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) 304 break; 305 } 306 307 /* We failed to find an entry */ 308 if (ncp == 0) { 309 if ((cnp->cn_flags & MAKEENTRY) == 0) { 310 nummisszap++; 311 } else { 312 nummiss++; 313 } 314 nchstats.ncs_miss++; 315 return (0); 316 } 317 318 /* We don't want to have an entry, so dump it */ 319 if ((cnp->cn_flags & MAKEENTRY) == 0) { 320 numposzaps++; 321 nchstats.ncs_badhits++; 322 cache_zap(ncp); 323 return (0); 324 } 325 326 /* We found a "positive" match, return the vnode */ 327 if (ncp->nc_vp) { 328 numposhits++; 329 nchstats.ncs_goodhits++; 330 *vpp = ncp->nc_vp; 331 return (-1); 332 } 333 334 /* We found a negative match, and want to create it, so purge */ 335 if (cnp->cn_nameiop == CREATE) { 336 numnegzaps++; 337 nchstats.ncs_badhits++; 338 cache_zap(ncp); 339 return (0); 340 } 341 342 numneghits++; 343 /* 344 * We found a "negative" match, ENOENT notifies client of this match. 345 * The nc_vpid field records whether this is a whiteout. 346 */ 347 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 348 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 349 nchstats.ncs_neghits++; 350 if (ncp->nc_flag & NCF_WHITE) 351 cnp->cn_flags |= ISWHITEOUT; 352 return (ENOENT); 353} 354 355/* 356 * Add an entry to the cache. 357 */ 358void 359cache_enter(dvp, vp, cnp) 360 struct vnode *dvp; 361 struct vnode *vp; 362 struct componentname *cnp; 363{ 364 struct namecache *ncp; 365 struct nchashhead *ncpp; 366 u_int32_t hash; 367 int len; 368 369 if (!doingcache) 370 return; 371 372 if (cnp->cn_nameptr[0] == '.') { 373 if (cnp->cn_namelen == 1) { 374 return; 375 } 376 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 377 if (vp) { 378 dvp->v_dd = vp; 379 dvp->v_ddid = vp->v_id; 380 } else { 381 dvp->v_dd = dvp; 382 dvp->v_ddid = 0; 383 } 384 return; 385 } 386 } 387 388 ncp = (struct namecache *) 389 malloc(sizeof *ncp + cnp->cn_namelen, M_VFSCACHE, M_WAITOK); 390 bzero((char *)ncp, sizeof *ncp); 391 numcache++; 392 if (!vp) { 393 numneg++; 394 ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0; 395 } else if (vp->v_type == VDIR) { 396 vp->v_dd = dvp; 397 vp->v_ddid = dvp->v_id; 398 } 399 400 /* 401 * Fill in cache info, if vp is NULL this is a "negative" cache entry. 402 * For negative entries, we have to record whether it is a whiteout. 403 * the whiteout flag is stored in the nc_vpid field which is 404 * otherwise unused. 405 */ 406 ncp->nc_vp = vp; 407 ncp->nc_dvp = dvp; 408 len = ncp->nc_nlen = cnp->cn_namelen; 409 hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); 410 bcopy(cnp->cn_nameptr, ncp->nc_name, len); 411 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 412 ncpp = NCHHASH(hash); 413 LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 414 if (LIST_EMPTY(&dvp->v_cache_src)) { 415 vhold(dvp); 416 numcachehv++; 417 } 418 LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); 419 if (vp) { 420 TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); 421 } else { 422 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 423 } 424 if (numneg * ncnegfactor > numcache) { 425 ncp = TAILQ_FIRST(&ncneg); 426 cache_zap(ncp); 427 } 428} 429 430/* 431 * Name cache initialization, from vfs_init() when we are booting 432 */ 433static void 434nchinit(void *dummy __unused) 435{ 436 437 TAILQ_INIT(&ncneg); 438 nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); 439} 440SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL) 441 442 443/* 444 * Invalidate all entries to a particular vnode. 445 * 446 * Remove all entries in the namecache relating to this vnode and 447 * change the v_id. We take the v_id from a global counter, since 448 * it becomes a handy sequence number in crash-dumps that way. 449 * No valid vnode will ever have (v_id == 0). 450 * 451 * XXX: Only time and the size of v_id prevents this from failing: 452 * XXX: In theory we should hunt down all (struct vnode*, v_id) 453 * XXX: soft references and nuke them, at least on the global 454 * XXX: v_id wraparound. The period of resistance can be extended 455 * XXX: by incrementing each vnodes v_id individually instead of 456 * XXX: using the global v_id. 457 */ 458 459void 460cache_purge(vp) 461 struct vnode *vp; 462{ 463 static u_long nextid; 464 465 while (!LIST_EMPTY(&vp->v_cache_src)) 466 cache_zap(LIST_FIRST(&vp->v_cache_src)); 467 while (!TAILQ_EMPTY(&vp->v_cache_dst)) 468 cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); 469 470 do 471 nextid++; 472 while (nextid == vp->v_id || !nextid); 473 vp->v_id = nextid; 474 vp->v_dd = vp; 475 vp->v_ddid = 0; 476} 477 478/* 479 * Flush all entries referencing a particular filesystem. 480 * 481 * Since we need to check it anyway, we will flush all the invalid 482 * entries at the same time. 483 */ 484void 485cache_purgevfs(mp) 486 struct mount *mp; 487{ 488 struct nchashhead *ncpp; 489 struct namecache *ncp, *nnp; 490 491 /* Scan hash tables for applicable entries */ 492 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { 493 for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) { 494 nnp = LIST_NEXT(ncp, nc_hash); 495 if (ncp->nc_dvp->v_mount == mp) { 496 cache_zap(ncp); 497 } 498 } 499 } 500} 501 502/* 503 * Flush all dirctory entries with no child directories held in 504 * the cache. 505 * 506 * Since we need to check it anyway, we will flush all the invalid 507 * entries at the same time. 508 */ 509void 510cache_purgeleafdirs(ndir) 511 int ndir; 512{ 513 struct nchashhead *ncpp; 514 struct namecache *ncp, *nnp, *ncpc, *nnpc; 515 struct vnode *dvp; 516 517 /* Scan hash tables for applicable entries */ 518 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl && ndir > 0; ncpp--) { 519 for (ncp = LIST_FIRST(ncpp); ncp != 0 && ndir > 0; ncp = nnp) { 520 nnp = LIST_NEXT(ncp, nc_hash); 521 if (ncp->nc_dvp != 0) { 522 /* 523 * Skip over if nc_dvp of this cache holds 524 * a child directory, or the hold count of 525 * nc_dvp is greater than 1 (in which case 526 * nc_dvp is likely to be the working 527 * directory of a process). 528 */ 529 if (ncp->nc_dvp->v_holdcnt > 1) 530 continue; 531 for (ncpc = LIST_FIRST(&ncp->nc_dvp->v_cache_src); 532 ncpc != 0; ncpc = nnpc) { 533 nnpc = LIST_NEXT(ncpc, nc_src); 534 if (ncpc->nc_vp != 0 && ncpc->nc_vp->v_type == VDIR) 535 break; 536 } 537 if (ncpc == 0) { 538 /* 539 * Zap all of this directory's children, 540 * held in ncp->nc_dvp->v_cache_src. 541 */ 542 dvp = ncp->nc_dvp; 543 while (!LIST_EMPTY(&dvp->v_cache_src)) 544 cache_zap(LIST_FIRST(&dvp->v_cache_src)); 545 546 ndir--; 547 548 /* Restart in case where nnp is reclaimed. */ 549 nnp = LIST_FIRST(ncpp); 550 continue; 551 } 552 } 553 } 554 } 555 numcachepl++; 556} 557 558/* 559 * Perform canonical checks and cache lookup and pass on to filesystem 560 * through the vop_cachedlookup only if needed. 561 */ 562 563int 564vfs_cache_lookup(ap) 565 struct vop_lookup_args /* { 566 struct vnode *a_dvp; 567 struct vnode **a_vpp; 568 struct componentname *a_cnp; 569 } */ *ap; 570{ 571 struct vnode *dvp, *vp; 572 int lockparent; 573 int error; 574 struct vnode **vpp = ap->a_vpp; 575 struct componentname *cnp = ap->a_cnp; 576 struct ucred *cred = cnp->cn_cred; 577 int flags = cnp->cn_flags; 578 struct proc *p = cnp->cn_proc; 579 u_long vpid; /* capability number of vnode */ 580 581 *vpp = NULL; 582 dvp = ap->a_dvp; 583 lockparent = flags & LOCKPARENT; 584 585 if (dvp->v_type != VDIR) 586 return (ENOTDIR); 587 588 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 589 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 590 return (EROFS); 591 592 error = VOP_ACCESS(dvp, VEXEC, cred, p); 593 594 if (error) 595 return (error); 596 597 error = cache_lookup(dvp, vpp, cnp); 598 599 if (!error) 600 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 601 602 if (error == ENOENT) 603 return (error); 604 605 vp = *vpp; 606 vpid = vp->v_id; 607 cnp->cn_flags &= ~PDIRUNLOCK; 608 if (dvp == vp) { /* lookup on "." */ 609 VREF(vp); 610 error = 0; 611 } else if (flags & ISDOTDOT) { 612 VOP_UNLOCK(dvp, 0, p); 613 cnp->cn_flags |= PDIRUNLOCK; 614 error = vget(vp, LK_EXCLUSIVE, p); 615 if (!error && lockparent && (flags & ISLASTCN)) { 616 if ((error = vn_lock(dvp, LK_EXCLUSIVE, p)) == 0) 617 cnp->cn_flags &= ~PDIRUNLOCK; 618 } 619 } else { 620 error = vget(vp, LK_EXCLUSIVE, p); 621 if (!lockparent || error || !(flags & ISLASTCN)) { 622 VOP_UNLOCK(dvp, 0, p); 623 cnp->cn_flags |= PDIRUNLOCK; 624 } 625 } 626 /* 627 * Check that the capability number did not change 628 * while we were waiting for the lock. 629 */ 630 if (!error) { 631 if (vpid == vp->v_id) 632 return (0); 633 vput(vp); 634 if (lockparent && dvp != vp && (flags & ISLASTCN)) { 635 VOP_UNLOCK(dvp, 0, p); 636 cnp->cn_flags |= PDIRUNLOCK; 637 } 638 } 639 if (cnp->cn_flags & PDIRUNLOCK) { 640 error = vn_lock(dvp, LK_EXCLUSIVE, p); 641 if (error) 642 return (error); 643 cnp->cn_flags &= ~PDIRUNLOCK; 644 } 645 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 646} 647 648 649#ifndef _SYS_SYSPROTO_H_ 650struct __getcwd_args { 651 u_char *buf; 652 u_int buflen; 653}; 654#endif 655 656static int disablecwd; 657SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, ""); 658 659static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); 660static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); 661static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); 662static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); 663static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); 664static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); 665int 666__getcwd(p, uap) 667 struct proc *p; 668 struct __getcwd_args *uap; 669{ 670 char *bp, *buf; 671 int error, i, slash_prefixed; 672 struct filedesc *fdp; 673 struct namecache *ncp; 674 struct vnode *vp; 675 676 numcwdcalls++; 677 if (disablecwd) 678 return (ENODEV); 679 if (uap->buflen < 2) 680 return (EINVAL); 681 if (uap->buflen > MAXPATHLEN) 682 uap->buflen = MAXPATHLEN; 683 buf = bp = malloc(uap->buflen, M_TEMP, M_WAITOK); 684 bp += uap->buflen - 1; 685 *bp = '\0'; 686 fdp = p->p_fd; 687 slash_prefixed = 0; 688 for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { 689 if (vp->v_flag & VROOT) { 690 if (vp->v_mount == NULL) { /* forced unmount */ 691 free(buf, M_TEMP); 692 return (EBADF); 693 } 694 vp = vp->v_mount->mnt_vnodecovered; 695 continue; 696 } 697 if (vp->v_dd->v_id != vp->v_ddid) { 698 numcwdfail1++; 699 free(buf, M_TEMP); 700 return (ENOTDIR); 701 } 702 ncp = TAILQ_FIRST(&vp->v_cache_dst); 703 if (!ncp) { 704 numcwdfail2++; 705 free(buf, M_TEMP); 706 return (ENOENT); 707 } 708 if (ncp->nc_dvp != vp->v_dd) { 709 numcwdfail3++; 710 free(buf, M_TEMP); 711 return (EBADF); 712 } 713 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 714 if (bp == buf) { 715 numcwdfail4++; 716 free(buf, M_TEMP); 717 return (ENOMEM); 718 } 719 *--bp = ncp->nc_name[i]; 720 } 721 if (bp == buf) { 722 numcwdfail4++; 723 free(buf, M_TEMP); 724 return (ENOMEM); 725 } 726 *--bp = '/'; 727 slash_prefixed = 1; 728 vp = vp->v_dd; 729 } 730 if (!slash_prefixed) { 731 if (bp == buf) { 732 numcwdfail4++; 733 free(buf, M_TEMP); 734 return (ENOMEM); 735 } 736 *--bp = '/'; 737 } 738 numcwdfound++; 739 error = copyout(bp, uap->buf, strlen(bp) + 1); 740 free(buf, M_TEMP); 741 return (error); 742} 743 744/* 745 * Thus begins the fullpath magic. 746 */ 747 748#undef STATNODE 749#define STATNODE(name) \ 750 static u_int name; \ 751 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 752 753static int disablefullpath; 754SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, 755 &disablefullpath, 0, ""); 756 757STATNODE(numfullpathcalls); 758STATNODE(numfullpathfail1); 759STATNODE(numfullpathfail2); 760STATNODE(numfullpathfail3); 761STATNODE(numfullpathfail4); 762STATNODE(numfullpathfound); 763 764int 765textvp_fullpath(struct proc *p, char **retbuf, char **retfreebuf) { 766 char *bp, *buf; 767 int i, slash_prefixed; 768 struct filedesc *fdp; 769 struct namecache *ncp; 770 struct vnode *vp, *textvp; 771 772 numfullpathcalls++; 773 if (disablefullpath) 774 return (ENODEV); 775 textvp = p->p_textvp; 776 if (textvp == NULL) 777 return (EINVAL); 778 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 779 bp = buf + MAXPATHLEN - 1; 780 *bp = '\0'; 781 fdp = p->p_fd; 782 slash_prefixed = 0; 783 for (vp = textvp; vp != fdp->fd_rdir && vp != rootvnode;) { 784 if (vp->v_flag & VROOT) { 785 if (vp->v_mount == NULL) { /* forced unmount */ 786 free(buf, M_TEMP); 787 return (EBADF); 788 } 789 vp = vp->v_mount->mnt_vnodecovered; 790 continue; 791 } 792 if (vp != textvp && vp->v_dd->v_id != vp->v_ddid) { 793 numfullpathfail1++; 794 free(buf, M_TEMP); 795 return (ENOTDIR); 796 } 797 ncp = TAILQ_FIRST(&vp->v_cache_dst); 798 if (!ncp) { 799 numfullpathfail2++; 800 free(buf, M_TEMP); 801 return (ENOENT); 802 } 803 if (vp != textvp && ncp->nc_dvp != vp->v_dd) { 804 numfullpathfail3++; 805 free(buf, M_TEMP); 806 return (EBADF); 807 } 808 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 809 if (bp == buf) { 810 numfullpathfail4++; 811 free(buf, M_TEMP); 812 return (ENOMEM); 813 } 814 *--bp = ncp->nc_name[i]; 815 } 816 if (bp == buf) { 817 numfullpathfail4++; 818 free(buf, M_TEMP); 819 return (ENOMEM); 820 } 821 *--bp = '/'; 822 slash_prefixed = 1; 823 vp = ncp->nc_dvp; 824 } 825 if (!slash_prefixed) { 826 if (bp == buf) { 827 numfullpathfail4++; 828 free(buf, M_TEMP); 829 return (ENOMEM); 830 } 831 *--bp = '/'; 832 } 833 numfullpathfound++; 834 *retbuf = bp; 835 *retfreebuf = buf; 836 return (0); 837} 838