vfs_cache.c revision 75654
1/* 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Poul-Henning Kamp of the FreeBSD Project. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 37 * $FreeBSD: head/sys/kern/vfs_cache.c 75654 2001-04-18 11:19:50Z tanimura $ 38 */ 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/kernel.h> 43#include <sys/sysctl.h> 44#include <sys/mount.h> 45#include <sys/vnode.h> 46#include <sys/namei.h> 47#include <sys/malloc.h> 48#include <sys/sysproto.h> 49#include <sys/proc.h> 50#include <sys/filedesc.h> 51#include <sys/fnv_hash.h> 52 53/* 54 * This structure describes the elements in the cache of recent 55 * names looked up by namei. 56 */ 57 58struct namecache { 59 LIST_ENTRY(namecache) nc_hash; /* hash chain */ 60 LIST_ENTRY(namecache) nc_src; /* source vnode list */ 61 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ 62 struct vnode *nc_dvp; /* vnode of parent of name */ 63 struct vnode *nc_vp; /* vnode the name refers to */ 64 u_char nc_flag; /* flag bits */ 65 u_char nc_nlen; /* length of name */ 66 char nc_name[0]; /* segment name */ 67}; 68 69/* 70 * Name caching works as follows: 71 * 72 * Names found by directory scans are retained in a cache 73 * for future reference. It is managed LRU, so frequently 74 * used names will hang around. Cache is indexed by hash value 75 * obtained from (vp, name) where vp refers to the directory 76 * containing name. 77 * 78 * If it is a "negative" entry, (i.e. for a name that is known NOT to 79 * exist) the vnode pointer will be NULL. 80 * 81 * Upon reaching the last segment of a path, if the reference 82 * is for DELETE, or NOCACHE is set (rewrite), and the 83 * name is located in the cache, it will be dropped. 84 */ 85 86/* 87 * Structures associated with name cacheing. 88 */ 89#define NCHHASH(hash) \ 90 (&nchashtbl[(hash) & nchash]) 91static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 92static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ 93static u_long nchash; /* size of hash table */ 94SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 95static u_long ncnegfactor = 16; /* ratio of negative entries */ 96SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 97static u_long numneg; /* number of cache entries allocated */ 98SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 99static u_long numcache; /* number of cache entries allocated */ 100SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 101static u_long numcachehv; /* number of cache entries with vnodes held */ 102SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, ""); 103static u_long numcachepl; /* number of cache purge for leaf entries */ 104SYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, ""); 105struct nchstats nchstats; /* cache effectiveness statistics */ 106 107static int doingcache = 1; /* 1 => enable the cache */ 108SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, ""); 109SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); 110SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); 111 112/* 113 * The new name cache statistics 114 */ 115SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); 116#define STATNODE(mode, name, var) \ 117 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); 118STATNODE(CTLFLAG_RD, numneg, &numneg); 119STATNODE(CTLFLAG_RD, numcache, &numcache); 120static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); 121static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); 122static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); 123static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); 124static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); 125static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); 126static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); 127static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); 128static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); 129static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); 130 131SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats, 132 sizeof(nchstats), "LU", "VFS cache effectiveness statistics"); 133 134 135 136static void cache_zap __P((struct namecache *ncp)); 137 138static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 139 140/* 141 * Flags in namecache.nc_flag 142 */ 143#define NCF_WHITE 1 144 145/* 146 * Grab an atomic snapshot of the name cache hash chain lengths 147 */ 148SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats"); 149 150static int 151sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) 152{ 153 int error; 154 struct nchashhead *ncpp; 155 struct namecache *ncp; 156 int n_nchash; 157 int count; 158 159 n_nchash = nchash + 1; /* nchash is max index, not count */ 160 if (!req->oldptr) 161 return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); 162 163 /* Scan hash tables for applicable entries */ 164 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 165 count = 0; 166 LIST_FOREACH(ncp, ncpp, nc_hash) { 167 count++; 168 } 169 error = SYSCTL_OUT(req, (caddr_t)&count, sizeof(count)); 170 if (error) 171 return (error); 172 } 173 return (0); 174} 175SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD, 176 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths"); 177 178static int 179sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) 180{ 181 int error; 182 struct nchashhead *ncpp; 183 struct namecache *ncp; 184 int n_nchash; 185 int count, maxlength, used, pct; 186 187 if (!req->oldptr) 188 return SYSCTL_OUT(req, 0, 4 * sizeof(int)); 189 190 n_nchash = nchash + 1; /* nchash is max index, not count */ 191 used = 0; 192 maxlength = 0; 193 194 /* Scan hash tables for applicable entries */ 195 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 196 count = 0; 197 LIST_FOREACH(ncp, ncpp, nc_hash) { 198 count++; 199 } 200 if (count) 201 used++; 202 if (maxlength < count) 203 maxlength = count; 204 } 205 n_nchash = nchash + 1; 206 pct = (used * 100 * 100) / n_nchash; 207 error = SYSCTL_OUT(req, (caddr_t)&n_nchash, sizeof(n_nchash)); 208 if (error) 209 return (error); 210 error = SYSCTL_OUT(req, (caddr_t)&used, sizeof(used)); 211 if (error) 212 return (error); 213 error = SYSCTL_OUT(req, (caddr_t)&maxlength, sizeof(maxlength)); 214 if (error) 215 return (error); 216 error = SYSCTL_OUT(req, (caddr_t)&pct, sizeof(pct)); 217 if (error) 218 return (error); 219 return (0); 220} 221SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD, 222 0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths"); 223 224/* 225 * Delete an entry from its hash list and move it to the front 226 * of the LRU list for immediate reuse. 227 */ 228static void 229cache_zap(ncp) 230 struct namecache *ncp; 231{ 232 LIST_REMOVE(ncp, nc_hash); 233 LIST_REMOVE(ncp, nc_src); 234 if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { 235 vdrop(ncp->nc_dvp); 236 numcachehv--; 237 } 238 if (ncp->nc_vp) { 239 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); 240 } else { 241 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 242 numneg--; 243 } 244 numcache--; 245 free(ncp, M_VFSCACHE); 246} 247 248/* 249 * Lookup an entry in the cache 250 * 251 * We don't do this if the segment name is long, simply so the cache 252 * can avoid holding long names (which would either waste space, or 253 * add greatly to the complexity). 254 * 255 * Lookup is called with dvp pointing to the directory to search, 256 * cnp pointing to the name of the entry being sought. If the lookup 257 * succeeds, the vnode is returned in *vpp, and a status of -1 is 258 * returned. If the lookup determines that the name does not exist 259 * (negative cacheing), a status of ENOENT is returned. If the lookup 260 * fails, a status of zero is returned. 261 */ 262 263int 264cache_lookup(dvp, vpp, cnp) 265 struct vnode *dvp; 266 struct vnode **vpp; 267 struct componentname *cnp; 268{ 269 struct namecache *ncp; 270 u_int32_t hash; 271 272 if (!doingcache) { 273 cnp->cn_flags &= ~MAKEENTRY; 274 return (0); 275 } 276 277 numcalls++; 278 279 if (cnp->cn_nameptr[0] == '.') { 280 if (cnp->cn_namelen == 1) { 281 *vpp = dvp; 282 dothits++; 283 return (-1); 284 } 285 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 286 dotdothits++; 287 if (dvp->v_dd->v_id != dvp->v_ddid || 288 (cnp->cn_flags & MAKEENTRY) == 0) { 289 dvp->v_ddid = 0; 290 return (0); 291 } 292 *vpp = dvp->v_dd; 293 return (-1); 294 } 295 } 296 297 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 298 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 299 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 300 numchecks++; 301 if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && 302 !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) 303 break; 304 } 305 306 /* We failed to find an entry */ 307 if (ncp == 0) { 308 if ((cnp->cn_flags & MAKEENTRY) == 0) { 309 nummisszap++; 310 } else { 311 nummiss++; 312 } 313 nchstats.ncs_miss++; 314 return (0); 315 } 316 317 /* We don't want to have an entry, so dump it */ 318 if ((cnp->cn_flags & MAKEENTRY) == 0) { 319 numposzaps++; 320 nchstats.ncs_badhits++; 321 cache_zap(ncp); 322 return (0); 323 } 324 325 /* We found a "positive" match, return the vnode */ 326 if (ncp->nc_vp) { 327 numposhits++; 328 nchstats.ncs_goodhits++; 329 *vpp = ncp->nc_vp; 330 return (-1); 331 } 332 333 /* We found a negative match, and want to create it, so purge */ 334 if (cnp->cn_nameiop == CREATE) { 335 numnegzaps++; 336 nchstats.ncs_badhits++; 337 cache_zap(ncp); 338 return (0); 339 } 340 341 numneghits++; 342 /* 343 * We found a "negative" match, ENOENT notifies client of this match. 344 * The nc_vpid field records whether this is a whiteout. 345 */ 346 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 347 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 348 nchstats.ncs_neghits++; 349 if (ncp->nc_flag & NCF_WHITE) 350 cnp->cn_flags |= ISWHITEOUT; 351 return (ENOENT); 352} 353 354/* 355 * Add an entry to the cache. 356 */ 357void 358cache_enter(dvp, vp, cnp) 359 struct vnode *dvp; 360 struct vnode *vp; 361 struct componentname *cnp; 362{ 363 struct namecache *ncp; 364 struct nchashhead *ncpp; 365 u_int32_t hash; 366 int len; 367 368 if (!doingcache) 369 return; 370 371 if (cnp->cn_nameptr[0] == '.') { 372 if (cnp->cn_namelen == 1) { 373 return; 374 } 375 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 376 if (vp) { 377 dvp->v_dd = vp; 378 dvp->v_ddid = vp->v_id; 379 } else { 380 dvp->v_dd = dvp; 381 dvp->v_ddid = 0; 382 } 383 return; 384 } 385 } 386 387 ncp = (struct namecache *) 388 malloc(sizeof *ncp + cnp->cn_namelen, M_VFSCACHE, M_WAITOK); 389 bzero((char *)ncp, sizeof *ncp); 390 numcache++; 391 if (!vp) { 392 numneg++; 393 ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0; 394 } else if (vp->v_type == VDIR) { 395 vp->v_dd = dvp; 396 vp->v_ddid = dvp->v_id; 397 } 398 399 /* 400 * Fill in cache info, if vp is NULL this is a "negative" cache entry. 401 * For negative entries, we have to record whether it is a whiteout. 402 * the whiteout flag is stored in the nc_vpid field which is 403 * otherwise unused. 404 */ 405 ncp->nc_vp = vp; 406 ncp->nc_dvp = dvp; 407 len = ncp->nc_nlen = cnp->cn_namelen; 408 hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); 409 bcopy(cnp->cn_nameptr, ncp->nc_name, len); 410 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 411 ncpp = NCHHASH(hash); 412 LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 413 if (LIST_EMPTY(&dvp->v_cache_src)) { 414 vhold(dvp); 415 numcachehv++; 416 } 417 LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); 418 if (vp) { 419 TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); 420 } else { 421 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 422 } 423 if (numneg * ncnegfactor > numcache) { 424 ncp = TAILQ_FIRST(&ncneg); 425 cache_zap(ncp); 426 } 427} 428 429/* 430 * Name cache initialization, from vfs_init() when we are booting 431 */ 432static void 433nchinit(void *dummy __unused) 434{ 435 436 TAILQ_INIT(&ncneg); 437 nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); 438} 439SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL) 440 441 442/* 443 * Invalidate all entries to a particular vnode. 444 * 445 * Remove all entries in the namecache relating to this vnode and 446 * change the v_id. We take the v_id from a global counter, since 447 * it becomes a handy sequence number in crash-dumps that way. 448 * No valid vnode will ever have (v_id == 0). 449 * 450 * XXX: Only time and the size of v_id prevents this from failing: 451 * XXX: In theory we should hunt down all (struct vnode*, v_id) 452 * XXX: soft references and nuke them, at least on the global 453 * XXX: v_id wraparound. The period of resistance can be extended 454 * XXX: by incrementing each vnodes v_id individually instead of 455 * XXX: using the global v_id. 456 */ 457 458void 459cache_purge(vp) 460 struct vnode *vp; 461{ 462 static u_long nextid; 463 464 while (!LIST_EMPTY(&vp->v_cache_src)) 465 cache_zap(LIST_FIRST(&vp->v_cache_src)); 466 while (!TAILQ_EMPTY(&vp->v_cache_dst)) 467 cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); 468 469 do 470 nextid++; 471 while (nextid == vp->v_id || !nextid); 472 vp->v_id = nextid; 473 vp->v_dd = vp; 474 vp->v_ddid = 0; 475} 476 477/* 478 * Flush all entries referencing a particular filesystem. 479 * 480 * Since we need to check it anyway, we will flush all the invalid 481 * entries at the same time. 482 */ 483void 484cache_purgevfs(mp) 485 struct mount *mp; 486{ 487 struct nchashhead *ncpp; 488 struct namecache *ncp, *nnp; 489 490 /* Scan hash tables for applicable entries */ 491 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { 492 for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) { 493 nnp = LIST_NEXT(ncp, nc_hash); 494 if (ncp->nc_dvp->v_mount == mp) { 495 cache_zap(ncp); 496 } 497 } 498 } 499} 500 501/* 502 * Flush all dirctory entries with no child directories held in 503 * the cache. 504 * 505 * Since we need to check it anyway, we will flush all the invalid 506 * entries at the same time. 507 */ 508void 509cache_purgeleafdirs(ndir) 510 int ndir; 511{ 512 struct nchashhead *ncpp; 513 struct namecache *ncp, *nnp, *ncpc, *nnpc; 514 struct vnode *dvp; 515 516 /* Scan hash tables for applicable entries */ 517 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl && ndir > 0; ncpp--) { 518 for (ncp = LIST_FIRST(ncpp); ncp != 0 && ndir > 0; ncp = nnp) { 519 nnp = LIST_NEXT(ncp, nc_hash); 520 if (ncp->nc_dvp != 0) { 521 /* 522 * Skip over if nc_dvp of this cache holds 523 * a child directory, or the hold count of 524 * nc_dvp is greater than 1 (in which case 525 * nc_dvp is likely to be the working 526 * directory of a process). 527 */ 528 if (ncp->nc_dvp->v_holdcnt > 1) 529 continue; 530 for (ncpc = LIST_FIRST(&ncp->nc_dvp->v_cache_src); 531 ncpc != 0; ncpc = nnpc) { 532 nnpc = LIST_NEXT(ncpc, nc_src); 533 if (ncpc->nc_vp != 0 && ncpc->nc_vp->v_type == VDIR) 534 break; 535 } 536 if (ncpc == 0) { 537 /* 538 * Zap all of this directory's children, 539 * held in ncp->nc_dvp->v_cache_src. 540 */ 541 dvp = ncp->nc_dvp; 542 while (!LIST_EMPTY(&dvp->v_cache_src)) 543 cache_zap(LIST_FIRST(&dvp->v_cache_src)); 544 545 ndir--; 546 547 /* Restart in case where nnp is reclaimed. */ 548 nnp = LIST_FIRST(ncpp); 549 continue; 550 } 551 } 552 } 553 } 554 numcachepl++; 555} 556 557/* 558 * Perform canonical checks and cache lookup and pass on to filesystem 559 * through the vop_cachedlookup only if needed. 560 */ 561 562int 563vfs_cache_lookup(ap) 564 struct vop_lookup_args /* { 565 struct vnode *a_dvp; 566 struct vnode **a_vpp; 567 struct componentname *a_cnp; 568 } */ *ap; 569{ 570 struct vnode *dvp, *vp; 571 int lockparent; 572 int error; 573 struct vnode **vpp = ap->a_vpp; 574 struct componentname *cnp = ap->a_cnp; 575 struct ucred *cred = cnp->cn_cred; 576 int flags = cnp->cn_flags; 577 struct proc *p = cnp->cn_proc; 578 u_long vpid; /* capability number of vnode */ 579 580 *vpp = NULL; 581 dvp = ap->a_dvp; 582 lockparent = flags & LOCKPARENT; 583 584 if (dvp->v_type != VDIR) 585 return (ENOTDIR); 586 587 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 588 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 589 return (EROFS); 590 591 error = VOP_ACCESS(dvp, VEXEC, cred, p); 592 593 if (error) 594 return (error); 595 596 error = cache_lookup(dvp, vpp, cnp); 597 598 if (!error) 599 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 600 601 if (error == ENOENT) 602 return (error); 603 604 vp = *vpp; 605 vpid = vp->v_id; 606 cnp->cn_flags &= ~PDIRUNLOCK; 607 if (dvp == vp) { /* lookup on "." */ 608 VREF(vp); 609 error = 0; 610 } else if (flags & ISDOTDOT) { 611 VOP_UNLOCK(dvp, 0, p); 612 cnp->cn_flags |= PDIRUNLOCK; 613 error = vget(vp, LK_EXCLUSIVE, p); 614 if (!error && lockparent && (flags & ISLASTCN)) { 615 if ((error = vn_lock(dvp, LK_EXCLUSIVE, p)) == 0) 616 cnp->cn_flags &= ~PDIRUNLOCK; 617 } 618 } else { 619 error = vget(vp, LK_EXCLUSIVE, p); 620 if (!lockparent || error || !(flags & ISLASTCN)) { 621 VOP_UNLOCK(dvp, 0, p); 622 cnp->cn_flags |= PDIRUNLOCK; 623 } 624 } 625 /* 626 * Check that the capability number did not change 627 * while we were waiting for the lock. 628 */ 629 if (!error) { 630 if (vpid == vp->v_id) 631 return (0); 632 vput(vp); 633 if (lockparent && dvp != vp && (flags & ISLASTCN)) { 634 VOP_UNLOCK(dvp, 0, p); 635 cnp->cn_flags |= PDIRUNLOCK; 636 } 637 } 638 if (cnp->cn_flags & PDIRUNLOCK) { 639 error = vn_lock(dvp, LK_EXCLUSIVE, p); 640 if (error) 641 return (error); 642 cnp->cn_flags &= ~PDIRUNLOCK; 643 } 644 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 645} 646 647 648#ifndef _SYS_SYSPROTO_H_ 649struct __getcwd_args { 650 u_char *buf; 651 u_int buflen; 652}; 653#endif 654 655static int disablecwd; 656SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, ""); 657 658static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); 659static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); 660static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); 661static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); 662static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); 663static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); 664int 665__getcwd(p, uap) 666 struct proc *p; 667 struct __getcwd_args *uap; 668{ 669 char *bp, *buf; 670 int error, i, slash_prefixed; 671 struct filedesc *fdp; 672 struct namecache *ncp; 673 struct vnode *vp; 674 675 numcwdcalls++; 676 if (disablecwd) 677 return (ENODEV); 678 if (uap->buflen < 2) 679 return (EINVAL); 680 if (uap->buflen > MAXPATHLEN) 681 uap->buflen = MAXPATHLEN; 682 buf = bp = malloc(uap->buflen, M_TEMP, M_WAITOK); 683 bp += uap->buflen - 1; 684 *bp = '\0'; 685 fdp = p->p_fd; 686 slash_prefixed = 0; 687 for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { 688 if (vp->v_flag & VROOT) { 689 if (vp->v_mount == NULL) /* forced unmount */ 690 return (EBADF); 691 vp = vp->v_mount->mnt_vnodecovered; 692 continue; 693 } 694 if (vp->v_dd->v_id != vp->v_ddid) { 695 numcwdfail1++; 696 free(buf, M_TEMP); 697 return (ENOTDIR); 698 } 699 ncp = TAILQ_FIRST(&vp->v_cache_dst); 700 if (!ncp) { 701 numcwdfail2++; 702 free(buf, M_TEMP); 703 return (ENOENT); 704 } 705 if (ncp->nc_dvp != vp->v_dd) { 706 numcwdfail3++; 707 free(buf, M_TEMP); 708 return (EBADF); 709 } 710 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 711 if (bp == buf) { 712 numcwdfail4++; 713 free(buf, M_TEMP); 714 return (ENOMEM); 715 } 716 *--bp = ncp->nc_name[i]; 717 } 718 if (bp == buf) { 719 numcwdfail4++; 720 free(buf, M_TEMP); 721 return (ENOMEM); 722 } 723 *--bp = '/'; 724 slash_prefixed = 1; 725 vp = vp->v_dd; 726 } 727 if (!slash_prefixed) { 728 if (bp == buf) { 729 numcwdfail4++; 730 free(buf, M_TEMP); 731 return (ENOMEM); 732 } 733 *--bp = '/'; 734 } 735 numcwdfound++; 736 error = copyout(bp, uap->buf, strlen(bp) + 1); 737 free(buf, M_TEMP); 738 return (error); 739} 740 741/* 742 * Thus begins the fullpath magic. 743 */ 744 745#undef STATNODE 746#define STATNODE(name) \ 747 static u_int name; \ 748 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 749 750static int disablefullpath; 751SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, 752 &disablefullpath, 0, ""); 753 754STATNODE(numfullpathcalls); 755STATNODE(numfullpathfail1); 756STATNODE(numfullpathfail2); 757STATNODE(numfullpathfail3); 758STATNODE(numfullpathfail4); 759STATNODE(numfullpathfound); 760 761int 762textvp_fullpath(struct proc *p, char **retbuf, char **retfreebuf) { 763 char *bp, *buf; 764 int i, slash_prefixed; 765 struct filedesc *fdp; 766 struct namecache *ncp; 767 struct vnode *vp, *textvp; 768 769 numfullpathcalls++; 770 if (disablefullpath) 771 return (ENODEV); 772 textvp = p->p_textvp; 773 if (textvp == NULL) 774 return (EINVAL); 775 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 776 bp = buf + MAXPATHLEN - 1; 777 *bp = '\0'; 778 fdp = p->p_fd; 779 slash_prefixed = 0; 780 for (vp = textvp; vp != fdp->fd_rdir && vp != rootvnode;) { 781 if (vp->v_flag & VROOT) { 782 if (vp->v_mount == NULL) { /* forced unmount */ 783 free(buf, M_TEMP); 784 return (EBADF); 785 } 786 vp = vp->v_mount->mnt_vnodecovered; 787 continue; 788 } 789 if (vp != textvp && vp->v_dd->v_id != vp->v_ddid) { 790 numfullpathfail1++; 791 free(buf, M_TEMP); 792 return (ENOTDIR); 793 } 794 ncp = TAILQ_FIRST(&vp->v_cache_dst); 795 if (!ncp) { 796 numfullpathfail2++; 797 free(buf, M_TEMP); 798 return (ENOENT); 799 } 800 if (vp != textvp && ncp->nc_dvp != vp->v_dd) { 801 numfullpathfail3++; 802 free(buf, M_TEMP); 803 return (EBADF); 804 } 805 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 806 if (bp == buf) { 807 numfullpathfail4++; 808 free(buf, M_TEMP); 809 return (ENOMEM); 810 } 811 *--bp = ncp->nc_name[i]; 812 } 813 if (bp == buf) { 814 numfullpathfail4++; 815 free(buf, M_TEMP); 816 return (ENOMEM); 817 } 818 *--bp = '/'; 819 slash_prefixed = 1; 820 vp = ncp->nc_dvp; 821 } 822 if (!slash_prefixed) { 823 if (bp == buf) { 824 numfullpathfail4++; 825 free(buf, M_TEMP); 826 return (ENOMEM); 827 } 828 *--bp = '/'; 829 } 830 numfullpathfound++; 831 *retbuf = bp; 832 *retfreebuf = buf; 833 return (0); 834} 835